ih264_buf_mgr_size:
   93|   302k|{
   94|   302k|    WORD32 size;
   95|       |
   96|   302k|    size = sizeof(buf_mgr_t);
   97|   302k|    size += ithread_get_mutex_lock_size();
   98|       |
   99|   302k|    return size;
  100|   302k|}
ih264_buf_mgr_lock:
  120|  1.51M|{
  121|  1.51M|    WORD32 retval;
  122|       |
  123|  1.51M|    retval = ithread_mutex_lock(ps_buf_mgr->pv_mutex);
  124|  1.51M|    if(retval)
  ------------------
  |  Branch (124:8): [True: 0, False: 1.51M]
  ------------------
  125|      0|        return IH264_FAIL;
  126|  1.51M|    return IH264_SUCCESS;
  127|  1.51M|}
ih264_buf_mgr_unlock:
  147|  1.51M|{
  148|  1.51M|    WORD32 retval;
  149|       |
  150|  1.51M|    retval = ithread_mutex_unlock(ps_buf_mgr->pv_mutex);
  151|  1.51M|    if(retval)
  ------------------
  |  Branch (151:8): [True: 0, False: 1.51M]
  ------------------
  152|      0|        return IH264_FAIL;
  153|  1.51M|    return IH264_SUCCESS;
  154|  1.51M|}
ih264_buf_mgr_free:
  211|  48.2k|{
  212|  48.2k|    WORD32 ret;
  213|       |
  214|  48.2k|    ret = ithread_mutex_destroy(ps_buf_mgr->pv_mutex);
  215|  48.2k|    if(0 == ret)
  ------------------
  |  Branch (215:8): [True: 48.2k, False: 0]
  ------------------
  216|  48.2k|        return IH264_SUCCESS;
  217|      0|    return IH264_FAIL;
  218|  48.2k|}
ih264_buf_mgr_init:
  238|  48.8k|{
  239|  48.8k|    WORD32 id;
  240|  48.8k|    UWORD8 *pu1_buf = (UWORD8 *)pv_buf;
  241|  48.8k|    buf_mgr_t *ps_buf_mgr = (buf_mgr_t *)pu1_buf;
  242|       |
  243|  48.8k|    pu1_buf += sizeof(buf_mgr_t);
  244|  48.8k|    ps_buf_mgr->pv_mutex = pu1_buf;
  245|       |
  246|  48.8k|    pu1_buf += ithread_get_mutex_lock_size();
  247|  48.8k|    ithread_mutex_init(ps_buf_mgr->pv_mutex);
  248|       |
  249|  48.8k|    ps_buf_mgr->i4_max_buf_cnt = BUF_MGR_MAX_CNT;
  ------------------
  |  |   44|  48.8k|#define BUF_MGR_MAX_CNT 64
  ------------------
  250|  48.8k|    ps_buf_mgr->i4_active_buf_cnt = 0;
  251|  3.17M|    for(id = 0; id < BUF_MGR_MAX_CNT; id++)
  ------------------
  |  |   44|  3.17M|#define BUF_MGR_MAX_CNT 64
  ------------------
  |  Branch (251:17): [True: 3.12M, False: 48.8k]
  ------------------
  252|  3.12M|    {
  253|  3.12M|        ps_buf_mgr->au4_status[id] = 0;
  254|  3.12M|        ps_buf_mgr->apv_ptr[id] = NULL;
  255|  3.12M|    }
  256|       |
  257|  48.8k|    return ps_buf_mgr;
  258|  48.8k|}
ih264_buf_mgr_reset:
  278|   155k|{
  279|   155k|    WORD32 id;
  280|   155k|    buf_mgr_t *ps_buf_mgr;
  281|       |
  282|   155k|    ps_buf_mgr = (buf_mgr_t *) pv_buf_mgr;
  283|       |
  284|  10.1M|    for(id = 0; id < BUF_MGR_MAX_CNT; id++)
  ------------------
  |  |   44|  10.1M|#define BUF_MGR_MAX_CNT 64
  ------------------
  |  Branch (284:17): [True: 9.96M, False: 155k]
  ------------------
  285|  9.96M|    {
  286|  9.96M|        ps_buf_mgr->au4_status[id] = 0;
  287|  9.96M|    }
  288|       |
  289|   155k|    return;
  290|   155k|}
ih264_buf_mgr_add:
  320|   486k|{
  321|   486k|    IH264_ERROR_T ret = IH264_SUCCESS;
  322|       |
  323|   486k|    ret = ih264_buf_mgr_lock(ps_buf_mgr);
  324|   486k|    RETURN_IF((ret != IH264_SUCCESS), ret);
  ------------------
  |  |   44|   486k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 486k]
  |  |  ------------------
  ------------------
  325|       |
  326|       |    /* Check if buffer ID is within allowed range */
  327|   486k|    if(buf_id >= ps_buf_mgr->i4_max_buf_cnt)
  ------------------
  |  Branch (327:8): [True: 0, False: 486k]
  ------------------
  328|      0|    {
  329|      0|        ret = ih264_buf_mgr_unlock(ps_buf_mgr);
  330|      0|        RETURN_IF((ret != IH264_SUCCESS), ret);
  ------------------
  |  |   44|      0|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  331|      0|        return IH264_FAIL;
  332|      0|    }
  333|       |
  334|       |    /* Check if the current ID is being used to hold some other buffer */
  335|   486k|    if((ps_buf_mgr->apv_ptr[buf_id] != NULL) &&
  ------------------
  |  Branch (335:8): [True: 0, False: 486k]
  ------------------
  336|      0|       (ps_buf_mgr->apv_ptr[buf_id] != pv_ptr))
  ------------------
  |  Branch (336:8): [True: 0, False: 0]
  ------------------
  337|      0|    {
  338|      0|        ret = ih264_buf_mgr_unlock(ps_buf_mgr);
  339|      0|        RETURN_IF((ret != IH264_SUCCESS), ret);
  ------------------
  |  |   44|      0|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  340|      0|        return IH264_FAIL;
  341|      0|    }
  342|   486k|    ps_buf_mgr->apv_ptr[buf_id] = pv_ptr;
  343|   486k|    ps_buf_mgr->i4_active_buf_cnt++;
  344|       |
  345|   486k|    ret = ih264_buf_mgr_unlock(ps_buf_mgr);
  346|   486k|    RETURN_IF((ret != IH264_SUCCESS), ret);
  ------------------
  |  |   44|   486k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 486k]
  |  |  ------------------
  ------------------
  347|       |
  348|   486k|    return ret;
  349|   486k|}
ih264_buf_mgr_get_next_free:
  374|   268k|{
  375|   268k|    WORD32 id;
  376|   268k|    void *pv_ret_ptr = NULL;
  377|   268k|    IH264_ERROR_T ret = IH264_SUCCESS;
  378|       |
  379|   268k|    ret = ih264_buf_mgr_lock(ps_buf_mgr);
  380|   268k|    RETURN_IF((ret != IH264_SUCCESS), NULL);
  ------------------
  |  |   44|   268k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 268k]
  |  |  ------------------
  ------------------
  381|       |
  382|   586k|    for(id = 0; id < ps_buf_mgr->i4_active_buf_cnt; id++)
  ------------------
  |  Branch (382:17): [True: 584k, False: 1.60k]
  ------------------
  383|   584k|    {
  384|       |        /* Check if the buffer is non-null and status is zero */
  385|   584k|        if((ps_buf_mgr->au4_status[id] == 0) && (ps_buf_mgr->apv_ptr[id]))
  ------------------
  |  Branch (385:12): [True: 267k, False: 317k]
  |  Branch (385:49): [True: 267k, False: 0]
  ------------------
  386|   267k|        {
  387|   267k|            *pi4_buf_id = id;
  388|       |            /* mark buffer as busy before returning */
  389|   267k|            ps_buf_mgr->au4_status[id] = 1;
  390|   267k|            pv_ret_ptr = ps_buf_mgr->apv_ptr[id];
  391|   267k|            break;
  392|   267k|        }
  393|   584k|    }
  394|   268k|    ret = ih264_buf_mgr_unlock(ps_buf_mgr);
  395|   268k|    RETURN_IF((ret != IH264_SUCCESS), NULL);
  ------------------
  |  |   44|   268k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 268k]
  |  |  ------------------
  ------------------
  396|       |
  397|   268k|    return pv_ret_ptr;
  398|   268k|}
ih264_buf_mgr_release:
  470|   410k|{
  471|   410k|    IH264_ERROR_T ret = IH264_SUCCESS;
  472|       |
  473|   410k|    ret = ih264_buf_mgr_lock(ps_buf_mgr);
  474|   410k|    RETURN_IF((ret != IH264_SUCCESS), ret);
  ------------------
  |  |   44|   410k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 410k]
  |  |  ------------------
  ------------------
  475|       |
  476|       |    /* If the given id is pointing to an id which is not yet added */
  477|   410k|    if(buf_id >= ps_buf_mgr->i4_active_buf_cnt)
  ------------------
  |  Branch (477:8): [True: 96.9k, False: 313k]
  ------------------
  478|  96.9k|    {
  479|  96.9k|        ret = ih264_buf_mgr_unlock(ps_buf_mgr);
  480|  96.9k|        RETURN_IF((ret != IH264_SUCCESS), ret);
  ------------------
  |  |   44|  96.9k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 96.9k]
  |  |  ------------------
  ------------------
  481|  96.9k|        return IH264_FAIL;
  482|  96.9k|    }
  483|       |
  484|   313k|    ps_buf_mgr->au4_status[buf_id] &= ~mask;
  485|       |
  486|       |    /* If both the REF and DISP are zero, DEC is set to zero */
  487|   313k|    if(ps_buf_mgr->au4_status[buf_id] == 1)
  ------------------
  |  Branch (487:8): [True: 158k, False: 155k]
  ------------------
  488|   158k|    {
  489|   158k|        ps_buf_mgr->au4_status[buf_id] = 0;
  490|   158k|    }
  491|       |
  492|   313k|    ret = ih264_buf_mgr_unlock(ps_buf_mgr);
  493|   313k|    RETURN_IF((ret != IH264_SUCCESS), ret);
  ------------------
  |  |   44|   313k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 313k]
  |  |  ------------------
  ------------------
  494|       |
  495|   313k|    return ret;
  496|   313k|}
ih264_buf_mgr_set_status:
  525|   345k|{
  526|   345k|    IH264_ERROR_T ret = IH264_SUCCESS;
  527|       |
  528|   345k|    ret = ih264_buf_mgr_lock(ps_buf_mgr);
  529|   345k|    RETURN_IF((ret != IH264_SUCCESS), ret);
  ------------------
  |  |   44|   345k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 345k]
  |  |  ------------------
  ------------------
  530|       |
  531|   345k|    if(buf_id >= ps_buf_mgr->i4_active_buf_cnt)
  ------------------
  |  Branch (531:8): [True: 0, False: 345k]
  ------------------
  532|      0|    {
  533|      0|        ret = ih264_buf_mgr_unlock(ps_buf_mgr);
  534|      0|        RETURN_IF((ret != IH264_SUCCESS), ret);
  ------------------
  |  |   44|      0|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  535|      0|        return IH264_FAIL;
  536|      0|    }
  537|       |
  538|   345k|    if((ps_buf_mgr->au4_status[buf_id] & mask) != 0)
  ------------------
  |  Branch (538:8): [True: 0, False: 345k]
  ------------------
  539|      0|    {
  540|      0|        ret = ih264_buf_mgr_unlock(ps_buf_mgr);
  541|      0|        RETURN_IF((ret != IH264_SUCCESS), ret);
  ------------------
  |  |   44|      0|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  542|      0|        return IH264_FAIL;
  543|      0|    }
  544|       |
  545|   345k|    ps_buf_mgr->au4_status[buf_id] |= mask;
  546|   345k|    ret = ih264_buf_mgr_unlock(ps_buf_mgr);
  547|   345k|    RETURN_IF((ret != IH264_SUCCESS), ret);
  ------------------
  |  |   44|   345k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 345k]
  |  |  ------------------
  ------------------
  548|       |
  549|   345k|    return ret;
  550|   345k|}

ih264_intra_pred_chroma_8x8_mode_dc:
  118|  83.6k|{
  119|       |    /* availability of left predictors (only for DC) */
  120|  83.6k|    WORD32 left_avail, left_avail1, left_avail2;
  121|       |
  122|       |    /* availability of top predictors (only for DC) */
  123|  83.6k|    WORD32 top_avail;
  124|       |
  125|       |    /* Pointer to start of left predictors */
  126|  83.6k|    UWORD8 *pu1_left = NULL;
  127|       |
  128|       |    /* Pointer to start of top predictors */
  129|  83.6k|    UWORD8 *pu1_top = NULL;
  130|       |
  131|       |    /* temporary variables to store accumulated first left half, second left half,
  132|       |     * first top half, second top half of U and V values*/
  133|  83.6k|    WORD32 val_u_l1 = 0, val_u_l2 = 0, val_u_t1 = 0, val_u_t2 = 0;
  134|  83.6k|    WORD32 val_v_l1 = 0, val_v_l2 = 0, val_v_t1 = 0, val_v_t2 = 0;
  135|  83.6k|    WORD32 val_u1 = 0, val_u2 = 0, val_v1 = 0, val_v2 = 0;
  136|       |
  137|       |    /* temp */
  138|  83.6k|    WORD32 col, row;
  139|       |
  140|  83.6k|    UNUSED(src_strd);
  ------------------
  |  |   45|  83.6k|#define UNUSED(x) ((void)(x))
  ------------------
  141|  83.6k|    left_avail = ngbr_avail & 0x11;
  142|  83.6k|    left_avail1 = ngbr_avail & 1;
  143|  83.6k|    left_avail2 = (ngbr_avail >> 4) & 1;
  144|  83.6k|    top_avail = (ngbr_avail >> 2) & 1;
  145|       |
  146|  83.6k|    pu1_top = pu1_src + 2 * BLK8x8SIZE + 2;
  ------------------
  |  |  510|  83.6k|#define BLK8x8SIZE          8
  ------------------
  147|  83.6k|    pu1_left = pu1_src + 2 * BLK8x8SIZE - 2;
  ------------------
  |  |  510|  83.6k|#define BLK8x8SIZE          8
  ------------------
  148|       |
  149|  83.6k|    if(left_avail1)
  ------------------
  |  Branch (149:8): [True: 56.4k, False: 27.2k]
  ------------------
  150|  56.4k|    {
  151|       |        /* First 4x4 block */
  152|  56.4k|        val_u_l1 += *pu1_left;
  153|  56.4k|        val_v_l1 += *(pu1_left + 1);
  154|  56.4k|        pu1_left -= 2;
  155|  56.4k|        val_u_l1 += *pu1_left;
  156|  56.4k|        val_v_l1 += *(pu1_left + 1);
  157|  56.4k|        pu1_left -= 2;
  158|  56.4k|        val_u_l1 += *pu1_left;
  159|  56.4k|        val_v_l1 += *(pu1_left + 1);
  160|  56.4k|        pu1_left -= 2;
  161|  56.4k|        val_u_l1 += *pu1_left + 2;
  162|  56.4k|        val_v_l1 += *(pu1_left + 1) + 2;
  163|  56.4k|        pu1_left -= 2;
  164|  56.4k|    }
  165|  27.2k|    else
  166|  27.2k|    {
  167|  27.2k|        pu1_left -= 2 * 4;
  168|  27.2k|    }
  169|       |
  170|  83.6k|    if(left_avail2)
  ------------------
  |  Branch (170:8): [True: 56.4k, False: 27.2k]
  ------------------
  171|  56.4k|    {
  172|       |        /* Second 4x4 block */
  173|  56.4k|        val_u_l2 += *pu1_left;
  174|  56.4k|        val_v_l2 += *(pu1_left + 1);
  175|  56.4k|        pu1_left -= 2;
  176|  56.4k|        val_u_l2 += *pu1_left;
  177|  56.4k|        val_v_l2 += *(pu1_left + 1);
  178|  56.4k|        pu1_left -= 2;
  179|  56.4k|        val_u_l2 += *pu1_left;
  180|  56.4k|        val_v_l2 += *(pu1_left + 1);
  181|  56.4k|        pu1_left -= 2;
  182|  56.4k|        val_u_l2 += *pu1_left + 2;
  183|  56.4k|        val_v_l2 += *(pu1_left + 1) + 2;
  184|  56.4k|        pu1_left -= 2;
  185|  56.4k|    }
  186|  27.2k|    else
  187|  27.2k|    {
  188|  27.2k|        pu1_left -= 2 * 4;
  189|  27.2k|    }
  190|       |
  191|  83.6k|    if(top_avail)
  ------------------
  |  Branch (191:8): [True: 55.4k, False: 28.2k]
  ------------------
  192|  55.4k|    {
  193|  55.4k|        val_u_t1 += *pu1_top + *(pu1_top + 2) + *(pu1_top + 4)
  194|  55.4k|                        + *(pu1_top + 6) + 2;
  195|  55.4k|        val_u_t2 += *(pu1_top + 8) + *(pu1_top + 10) + *(pu1_top + 12)
  196|  55.4k|                        + *(pu1_top + 14) + 2;
  197|  55.4k|        val_v_t1 += *(pu1_top + 1) + *(pu1_top + 3) + *(pu1_top + 5)
  198|  55.4k|                        + *(pu1_top + 7) + 2;
  199|  55.4k|        val_v_t2 += *(pu1_top + 9) + *(pu1_top + 11) + *(pu1_top + 13)
  200|  55.4k|                        + *(pu1_top + 15) + 2;
  201|  55.4k|    }
  202|       |
  203|  83.6k|    if(left_avail + top_avail)
  ------------------
  |  Branch (203:8): [True: 70.3k, False: 13.3k]
  ------------------
  204|  70.3k|    {
  205|  70.3k|        val_u1 = (left_avail1 + top_avail) ?
  ------------------
  |  Branch (205:18): [True: 70.3k, False: 0]
  ------------------
  206|  70.3k|                        ((val_u_l1 + val_u_t1)
  207|  70.3k|                                        >> (1 + left_avail1 + top_avail)) :128;
  208|  70.3k|        val_v1 = (left_avail1 + top_avail) ?
  ------------------
  |  Branch (208:18): [True: 70.3k, False: 0]
  ------------------
  209|  70.3k|                        ((val_v_l1 + val_v_t1)
  210|  70.3k|                                        >> (1 + left_avail1 + top_avail)) :128;
  211|  70.3k|        if(top_avail)
  ------------------
  |  Branch (211:12): [True: 55.4k, False: 14.8k]
  ------------------
  212|  55.4k|        {
  213|  55.4k|            val_u2 = val_u_t2 >> 2;
  214|  55.4k|            val_v2 = val_v_t2 >> 2;
  215|  55.4k|        }
  216|  14.8k|        else if(left_avail1)
  ------------------
  |  Branch (216:17): [True: 14.8k, False: 0]
  ------------------
  217|  14.8k|        {
  218|  14.8k|            val_u2 = val_u_l1 >> 2;
  219|  14.8k|            val_v2 = val_v_l1 >> 2;
  220|  14.8k|        }
  221|      0|        else
  222|      0|        {
  223|      0|            val_u2 = val_v2 = 128;
  224|      0|        }
  225|       |
  226|   351k|        for(row = 0; row < 4; row++)
  ------------------
  |  Branch (226:22): [True: 281k, False: 70.3k]
  ------------------
  227|   281k|        {
  228|       |            /* top left 4x4 block */
  229|  1.40M|            for(col = 0; col < 8; col += 2)
  ------------------
  |  Branch (229:26): [True: 1.12M, False: 281k]
  ------------------
  230|  1.12M|            {
  231|  1.12M|                *(pu1_dst + row * dst_strd + col) = val_u1;
  232|  1.12M|                *(pu1_dst + row * dst_strd + col + 1) = val_v1;
  233|  1.12M|            }
  234|       |            /* top right 4x4 block */
  235|  1.40M|            for(col = 8; col < 16; col += 2)
  ------------------
  |  Branch (235:26): [True: 1.12M, False: 281k]
  ------------------
  236|  1.12M|            {
  237|  1.12M|                *(pu1_dst + row * dst_strd + col) = val_u2;
  238|  1.12M|                *(pu1_dst + row * dst_strd + col + 1) = val_v2;
  239|  1.12M|            }
  240|   281k|        }
  241|       |
  242|  70.3k|        if(left_avail2)
  ------------------
  |  Branch (242:12): [True: 56.4k, False: 13.8k]
  ------------------
  243|  56.4k|        {
  244|  56.4k|            val_u1 = val_u_l2 >> 2;
  245|  56.4k|            val_v1 = val_v_l2 >> 2;
  246|  56.4k|        }
  247|  13.8k|        else if(top_avail)
  ------------------
  |  Branch (247:17): [True: 13.8k, False: 0]
  ------------------
  248|  13.8k|        {
  249|  13.8k|            val_u1 = val_u_t1 >> 2;
  250|  13.8k|            val_v1 = val_v_t1 >> 2;
  251|  13.8k|        }
  252|      0|        else
  253|      0|        {
  254|      0|            val_u1 = val_v1 = 128;
  255|      0|        }
  256|  70.3k|        val_u2 = (left_avail2 + top_avail) ?
  ------------------
  |  Branch (256:18): [True: 70.3k, False: 0]
  ------------------
  257|  70.3k|                        ((val_u_l2 + val_u_t2)
  258|  70.3k|                                        >> (1 + left_avail2 + top_avail)) : 128;
  259|  70.3k|        val_v2 = (left_avail2 + top_avail) ?
  ------------------
  |  Branch (259:18): [True: 70.3k, False: 0]
  ------------------
  260|  70.3k|                        ((val_v_l2 + val_v_t2)
  261|  70.3k|                                        >> (1 + left_avail2 + top_avail)) :  128;
  262|       |
  263|   351k|        for(row = 4; row < 8; row++)
  ------------------
  |  Branch (263:22): [True: 281k, False: 70.3k]
  ------------------
  264|   281k|        {
  265|       |            /* bottom left 4x4 block */
  266|  1.40M|            for(col = 0; col < 8; col += 2)
  ------------------
  |  Branch (266:26): [True: 1.12M, False: 281k]
  ------------------
  267|  1.12M|            {
  268|  1.12M|                *(pu1_dst + row * dst_strd + col) = val_u1;
  269|  1.12M|                *(pu1_dst + row * dst_strd + col + 1) = val_v1;
  270|  1.12M|            }
  271|       |            /* bottom right 4x4 block */
  272|  1.40M|            for(col = 8; col < 16; col += 2)
  ------------------
  |  Branch (272:26): [True: 1.12M, False: 281k]
  ------------------
  273|  1.12M|            {
  274|  1.12M|                *(pu1_dst + row * dst_strd + col) = val_u2;
  275|  1.12M|                *(pu1_dst + row * dst_strd + col + 1) = val_v2;
  276|  1.12M|            }
  277|   281k|        }
  278|  70.3k|    }
  279|  13.3k|    else
  280|  13.3k|    {
  281|       |        /* Both left and top are unavailable, set the block to 128 */
  282|   120k|        for(row = 0; row < 8; row++)
  ------------------
  |  Branch (282:22): [True: 106k, False: 13.3k]
  ------------------
  283|   106k|        {
  284|   106k|            memset(pu1_dst + row * dst_strd, 128, 8 * sizeof(UWORD16));
  285|   106k|        }
  286|  13.3k|    }
  287|  83.6k|}
ih264_intra_pred_chroma_8x8_mode_horz:
  323|  23.8k|{
  324|       |    /* Pointer to start of left predictors */
  325|  23.8k|    UWORD8 *pu1_left = NULL;
  326|       |
  327|       |    /* temp */
  328|  23.8k|    WORD32 rows, cols;
  329|       |
  330|  23.8k|    UNUSED(src_strd);
  ------------------
  |  |   45|  23.8k|#define UNUSED(x) ((void)(x))
  ------------------
  331|  23.8k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  23.8k|#define UNUSED(x) ((void)(x))
  ------------------
  332|  23.8k|    pu1_left = pu1_src + 2 * BLK8x8SIZE - 2;
  ------------------
  |  |  510|  23.8k|#define BLK8x8SIZE          8
  ------------------
  333|   214k|    for(rows = 0; rows < 8; rows++)
  ------------------
  |  Branch (333:19): [True: 190k, False: 23.8k]
  ------------------
  334|   190k|    {
  335|  1.71M|        for(cols = 0; cols < 16; cols += 2)
  ------------------
  |  Branch (335:23): [True: 1.52M, False: 190k]
  ------------------
  336|  1.52M|        {
  337|  1.52M|            *(pu1_dst + rows * dst_strd + cols) = *pu1_left;
  338|  1.52M|            *(pu1_dst + rows * dst_strd + cols + 1) = *(pu1_left + 1);
  339|  1.52M|        }
  340|   190k|        pu1_left -= 2;
  341|   190k|    }
  342|  23.8k|}
ih264_intra_pred_chroma_8x8_mode_vert:
  379|  12.9k|{
  380|       |    /* Pointer to start of top predictors */
  381|  12.9k|    UWORD8 *pu1_top = NULL;
  382|       |
  383|       |    /* temp */
  384|  12.9k|    WORD32 row;
  385|       |
  386|  12.9k|    UNUSED(src_strd);
  ------------------
  |  |   45|  12.9k|#define UNUSED(x) ((void)(x))
  ------------------
  387|  12.9k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  12.9k|#define UNUSED(x) ((void)(x))
  ------------------
  388|  12.9k|    pu1_top = pu1_src + 2 * BLK8x8SIZE + 2;
  ------------------
  |  |  510|  12.9k|#define BLK8x8SIZE          8
  ------------------
  389|       |
  390|       |    /* 8 bytes are copied from src to dst */
  391|  38.9k|    for(row = 0; row < 2; row++)
  ------------------
  |  Branch (391:18): [True: 25.9k, False: 12.9k]
  ------------------
  392|  25.9k|    {
  393|  25.9k|        memcpy(pu1_dst, pu1_top, 16);
  394|       |
  395|  25.9k|        pu1_dst += dst_strd;
  396|  25.9k|        memcpy(pu1_dst, pu1_top, 16);
  397|       |
  398|  25.9k|        pu1_dst += dst_strd;
  399|  25.9k|        memcpy(pu1_dst, pu1_top, 16);
  400|       |
  401|  25.9k|        pu1_dst += dst_strd;
  402|  25.9k|        memcpy(pu1_dst, pu1_top, 16);
  403|       |
  404|  25.9k|        pu1_dst += dst_strd;
  405|  25.9k|    }
  406|  12.9k|}
ih264_intra_pred_chroma_8x8_mode_plane:
  443|  2.72k|{
  444|       |    /* Pointer to start of left predictors */
  445|  2.72k|    UWORD8 *pu1_left = NULL;
  446|       |
  447|       |    /* Pointer to start of top predictors */
  448|  2.72k|    UWORD8 *pu1_top = NULL;
  449|       |
  450|       |    /* temp */
  451|  2.72k|    WORD32 val = 0;
  452|  2.72k|    WORD32 rows, cols;
  453|       |
  454|       |    /* Implementing section 8.3.4.4. The variables represent the corresponding
  455|       |     * variables in the section */
  456|  2.72k|    WORD32 a_u, b_u, c_u, h_u, v_u;
  457|  2.72k|    WORD32 a_v, b_v, c_v, h_v, v_v;
  458|       |
  459|  2.72k|    UNUSED(src_strd);
  ------------------
  |  |   45|  2.72k|#define UNUSED(x) ((void)(x))
  ------------------
  460|  2.72k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  2.72k|#define UNUSED(x) ((void)(x))
  ------------------
  461|  2.72k|    a_u = b_u = c_u = h_u = v_u = 0;
  462|  2.72k|    a_v = b_v = c_v = h_v = v_v = 0;
  463|       |
  464|  2.72k|    pu1_top = pu1_src + 2 * BLK8x8SIZE + 2;
  ------------------
  |  |  510|  2.72k|#define BLK8x8SIZE          8
  ------------------
  465|  2.72k|    pu1_left = pu1_src + 2 * BLK8x8SIZE - 2;
  ------------------
  |  |  510|  2.72k|#define BLK8x8SIZE          8
  ------------------
  466|       |
  467|  13.6k|    for(cols = 0; cols < 4; cols++)
  ------------------
  |  Branch (467:19): [True: 10.9k, False: 2.72k]
  ------------------
  468|  10.9k|    {
  469|  10.9k|        h_u += (cols + 1) * (pu1_top[8 + 2 * cols] - pu1_top[4 - 2 * cols]);
  470|  10.9k|        h_v += (cols + 1) * (pu1_top[8 + 2 * cols + 1] - pu1_top[4 - 2 * cols+ 1]);
  471|       |
  472|  10.9k|        v_u += (cols + 1) * (pu1_left[(4 + cols) * (-2)] - pu1_left[(2 - cols) * (-2)]);
  473|  10.9k|        v_v += (cols + 1)  * (pu1_left[(4 + cols) * (-2) + 1]  - pu1_left[(2 - cols) * (-2) + 1]);
  474|  10.9k|    }
  475|  2.72k|    a_u = 16 * (pu1_left[7 * (-2)] + pu1_top[14]);
  476|  2.72k|    a_v = 16 * (pu1_left[7 * (-2) + 1] + pu1_top[15]);
  477|  2.72k|    b_u = (34 * h_u + 32) >> 6;
  478|  2.72k|    b_v = (34 * h_v + 32) >> 6;
  479|  2.72k|    c_u = (34 * v_u + 32) >> 6;
  480|  2.72k|    c_v = (34 * v_v + 32) >> 6;
  481|       |
  482|  24.5k|    for(rows = 0; rows < 8; rows++)
  ------------------
  |  Branch (482:19): [True: 21.8k, False: 2.72k]
  ------------------
  483|  21.8k|    {
  484|   196k|        for(cols = 0; cols < 8; cols++)
  ------------------
  |  Branch (484:23): [True: 174k, False: 21.8k]
  ------------------
  485|   174k|        {
  486|   174k|            val = (a_u + b_u * (cols - 3) + c_u * (rows - 3) );
  487|   174k|            val = (val + 16) >> 5;
  488|   174k|            *(pu1_dst + rows * dst_strd + 2 * cols) = CLIP_U8(val);
  ------------------
  |  |   58|   174k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   174k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 6.49k, False: 168k]
  |  |  |  |  |  Branch (77:54): [True: 2.00k, False: 166k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  489|   174k|            val = (a_v + b_v * (cols - 3) + c_v * (rows - 3) );
  490|   174k|            val = (val + 16) >> 5;
  491|   174k|            *(pu1_dst + rows * dst_strd + 2 * cols + 1) = CLIP_U8(val);
  ------------------
  |  |   58|   174k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   174k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 7.28k, False: 167k]
  |  |  |  |  |  Branch (77:54): [True: 920, False: 166k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  492|   174k|        }
  493|  21.8k|    }
  494|  2.72k|}

ih264_deblk_luma_vert_bs4:
  108|  44.0k|{
  109|  44.0k|    UWORD8 p3, p2, p1, p0, q0, q1, q2, q3;
  110|  44.0k|    WORD32 pos_p3, pos_p2, pos_p1, pos_p0;
  111|  44.0k|    WORD32 pos_q0, pos_q1, pos_q2,pos_q3;
  112|  44.0k|    UWORD8 a_p, a_q; /* threshold variables */
  113|  44.0k|    WORD32 blk_strd = src_strd << 2; /* block_increment = src_strd * 4 */
  114|  44.0k|    UWORD8 *pu1_src_temp;
  115|  44.0k|    WORD8 i = 0, edge;
  116|       |
  117|  44.0k|    pos_q0 = 0;
  118|  44.0k|    pos_q1 = 1;
  119|  44.0k|    pos_q2 = 2;
  120|  44.0k|    pos_q3 = 3;
  121|  44.0k|    pos_p0 = -1;
  122|  44.0k|    pos_p1 = -2;
  123|  44.0k|    pos_p2 = -3;
  124|  44.0k|    pos_p3 = -4;
  125|       |
  126|   220k|    for(edge = 0; edge < 4; edge++, pu1_src += blk_strd)
  ------------------
  |  Branch (126:19): [True: 176k, False: 44.0k]
  ------------------
  127|   176k|    {
  128|   176k|        pu1_src_temp = pu1_src;
  129|   881k|        for(i = 0; i < 4; ++i, pu1_src_temp += src_strd)
  ------------------
  |  Branch (129:20): [True: 704k, False: 176k]
  ------------------
  130|   704k|        {
  131|   704k|            q0 = pu1_src_temp[pos_q0];
  132|   704k|            q1 = pu1_src_temp[pos_q1];
  133|   704k|            p0 = pu1_src_temp[pos_p0];
  134|   704k|            p1 = pu1_src_temp[pos_p1];
  135|       |
  136|       |            /* Filter Decision */
  137|   704k|            if((ABS(p0 - q0) >= alpha) ||
  ------------------
  |  |  100|   704k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 150k, False: 554k]
  |  |  ------------------
  ------------------
  |  Branch (137:16): [True: 150k, False: 554k]
  ------------------
  138|   554k|               (ABS(q1 - q0) >= beta)  ||
  ------------------
  |  |  100|   554k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 34.7k, False: 520k]
  |  |  ------------------
  ------------------
  |  Branch (138:16): [True: 19.1k, False: 535k]
  ------------------
  139|   535k|               (ABS(p1 - p0) >= beta))
  ------------------
  |  |  100|   535k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 26.5k, False: 509k]
  |  |  ------------------
  ------------------
  |  Branch (139:16): [True: 13.3k, False: 522k]
  ------------------
  140|   182k|                continue;
  141|       |
  142|   522k|            p2 = pu1_src_temp[pos_p2];
  143|   522k|            p3 = pu1_src_temp[pos_p3];
  144|   522k|            q2 = pu1_src_temp[pos_q2];
  145|   522k|            q3 = pu1_src_temp[pos_q3];
  146|       |
  147|   522k|            if(ABS(p0 - q0) < ((alpha >> 2) + 2))
  ------------------
  |  |  100|   522k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 106k, False: 415k]
  |  |  ------------------
  ------------------
  |  Branch (147:16): [True: 505k, False: 17.1k]
  ------------------
  148|   505k|            {
  149|       |                /* Threshold Variables */
  150|   505k|                a_p = (UWORD8)ABS(p2 - p0);
  ------------------
  |  |  100|   505k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 34.8k, False: 470k]
  |  |  ------------------
  ------------------
  151|   505k|                a_q = (UWORD8)ABS(q2 - q0);
  ------------------
  |  |  100|   505k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 21.8k, False: 483k]
  |  |  ------------------
  ------------------
  152|       |
  153|   505k|                if(a_p < beta)
  ------------------
  |  Branch (153:20): [True: 501k, False: 3.91k]
  ------------------
  154|   501k|                {
  155|       |                    /* p0', p1', p2' */
  156|   501k|                    pu1_src_temp[pos_p0] = ((p2 + X2(p1) + X2(p0) + X2(q0) + q1
  ------------------
  |  |   91|   501k|#define X2(a)   ((a) << 1)
  ------------------
                                  pu1_src_temp[pos_p0] = ((p2 + X2(p1) + X2(p0) + X2(q0) + q1
  ------------------
  |  |   91|   501k|#define X2(a)   ((a) << 1)
  ------------------
                                  pu1_src_temp[pos_p0] = ((p2 + X2(p1) + X2(p0) + X2(q0) + q1
  ------------------
  |  |   91|   501k|#define X2(a)   ((a) << 1)
  ------------------
  157|   501k|                                    + 4) >> 3);
  158|   501k|                    pu1_src_temp[pos_p1] = ((p2 + p1 + p0 + q0 + 2) >> 2);
  159|   501k|                    pu1_src_temp[pos_p2] =
  160|   501k|                                    ((X2(p3) + X3(p2) + p1 + p0 + q0
  ------------------
  |  |   91|   501k|#define X2(a)   ((a) << 1)
  ------------------
                                                  ((X2(p3) + X3(p2) + p1 + p0 + q0
  ------------------
  |  |   92|   501k|#define X3(a)   (((a) << 1) + (a))
  ------------------
  161|   501k|                                                    + 4) >> 3);
  162|   501k|                }
  163|  3.91k|                else
  164|  3.91k|                {
  165|       |                    /* p0'*/
  166|  3.91k|                    pu1_src_temp[pos_p0] = ((X2(p1) + p0 + q1 + 2) >> 2);
  ------------------
  |  |   91|  3.91k|#define X2(a)   ((a) << 1)
  ------------------
  167|  3.91k|                }
  168|       |
  169|   505k|                if(a_q < beta)
  ------------------
  |  Branch (169:20): [True: 501k, False: 3.83k]
  ------------------
  170|   501k|                {
  171|       |                    /* q0', q1', q2' */
  172|   501k|                    pu1_src_temp[pos_q0] = (p1 + X2(p0) + X2(q0) + X2(q1) + q2
  ------------------
  |  |   91|   501k|#define X2(a)   ((a) << 1)
  ------------------
                                  pu1_src_temp[pos_q0] = (p1 + X2(p0) + X2(q0) + X2(q1) + q2
  ------------------
  |  |   91|   501k|#define X2(a)   ((a) << 1)
  ------------------
                                  pu1_src_temp[pos_q0] = (p1 + X2(p0) + X2(q0) + X2(q1) + q2
  ------------------
  |  |   91|   501k|#define X2(a)   ((a) << 1)
  ------------------
  173|   501k|                                    + 4) >> 3;
  174|   501k|                    pu1_src_temp[pos_q1] = (p0 + q0 + q1 + q2 + 2) >> 2;
  175|   501k|                    pu1_src_temp[pos_q2] = (X2(q3) + X3(q2) + q1 + q0 + p0 + 4)
  ------------------
  |  |   91|   501k|#define X2(a)   ((a) << 1)
  ------------------
                                  pu1_src_temp[pos_q2] = (X2(q3) + X3(q2) + q1 + q0 + p0 + 4)
  ------------------
  |  |   92|   501k|#define X3(a)   (((a) << 1) + (a))
  ------------------
  176|   501k|                                    >> 3;
  177|   501k|                }
  178|  3.83k|                else
  179|  3.83k|                {
  180|       |                    /* q0'*/
  181|  3.83k|                    pu1_src_temp[pos_q0] = (X2(q1) + q0 + p1 + 2) >> 2;
  ------------------
  |  |   91|  3.83k|#define X2(a)   ((a) << 1)
  ------------------
  182|  3.83k|                }
  183|   505k|            }
  184|  17.1k|            else
  185|  17.1k|            {
  186|       |                /* p0', q0'*/
  187|  17.1k|                pu1_src_temp[pos_p0] = ((X2(p1) + p0 + q1 + 2) >> 2);
  ------------------
  |  |   91|  17.1k|#define X2(a)   ((a) << 1)
  ------------------
  188|  17.1k|                pu1_src_temp[pos_q0] = (X2(q1) + q0 + p1 + 2) >> 2;
  ------------------
  |  |   91|  17.1k|#define X2(a)   ((a) << 1)
  ------------------
  189|  17.1k|            }
  190|   522k|        }
  191|   176k|    }
  192|  44.0k|}
ih264_deblk_luma_horz_bs4:
  226|  51.5k|{
  227|  51.5k|    UWORD8 p3, p2, p1, p0, q0, q1, q2, q3;
  228|  51.5k|    WORD32 pos_p3, pos_p2, pos_p1, pos_p0, pos_q0, pos_q1,
  229|  51.5k|                    pos_q2, pos_q3;
  230|  51.5k|    UWORD8 a_p, a_q; /* threshold variables */
  231|  51.5k|    UWORD8 *pu1_p3; /* pointer to the src sample p3 */
  232|  51.5k|    UWORD8 *pu1_p3_temp;
  233|  51.5k|    UWORD8 *pu1_src_temp;
  234|  51.5k|    WORD8 i = 0, edge;
  235|       |
  236|  51.5k|    pu1_p3 = pu1_src - (src_strd << 2);
  237|  51.5k|    pos_q0 = 0;
  238|  51.5k|    pos_q1 = src_strd;
  239|  51.5k|    pos_q2 = X2(src_strd);
  ------------------
  |  |   91|  51.5k|#define X2(a)   ((a) << 1)
  ------------------
  240|  51.5k|    pos_q3 = X3(src_strd);
  ------------------
  |  |   92|  51.5k|#define X3(a)   (((a) << 1) + (a))
  ------------------
  241|  51.5k|    pos_p0 = X3(src_strd);
  ------------------
  |  |   92|  51.5k|#define X3(a)   (((a) << 1) + (a))
  ------------------
  242|  51.5k|    pos_p1 = X2(src_strd);
  ------------------
  |  |   91|  51.5k|#define X2(a)   ((a) << 1)
  ------------------
  243|  51.5k|    pos_p2 = src_strd;
  244|  51.5k|    pos_p3 = 0;
  245|       |
  246|   257k|    for(edge = 0; edge < 4; edge++, pu1_src += 4, pu1_p3 += 4)
  ------------------
  |  Branch (246:19): [True: 206k, False: 51.5k]
  ------------------
  247|   206k|    {
  248|   206k|        pu1_src_temp = pu1_src;
  249|   206k|        pu1_p3_temp = pu1_p3;
  250|  1.03M|        for(i = 0; i < 4; ++i, pu1_src_temp++, pu1_p3_temp++)
  ------------------
  |  Branch (250:20): [True: 825k, False: 206k]
  ------------------
  251|   825k|        {
  252|   825k|            q0 = pu1_src_temp[pos_q0];
  253|   825k|            q1 = pu1_src_temp[pos_q1];
  254|   825k|            p0 = pu1_p3_temp[pos_p0];
  255|   825k|            p1 = pu1_p3_temp[pos_p1];
  256|       |
  257|       |            /* Filter Decision */
  258|   825k|            if((ABS(p0 - q0) >= alpha) ||
  ------------------
  |  |  100|   825k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 144k, False: 681k]
  |  |  ------------------
  ------------------
  |  Branch (258:16): [True: 171k, False: 653k]
  ------------------
  259|   653k|               (ABS(q1 - q0) >= beta) ||
  ------------------
  |  |  100|   653k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 33.9k, False: 619k]
  |  |  ------------------
  ------------------
  |  Branch (259:16): [True: 17.1k, False: 636k]
  ------------------
  260|   636k|               (ABS(p1 - p0) >= beta))
  ------------------
  |  |  100|   636k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 23.1k, False: 613k]
  |  |  ------------------
  ------------------
  |  Branch (260:16): [True: 10.2k, False: 626k]
  ------------------
  261|   199k|                continue;
  262|       |
  263|   626k|            p2 = pu1_p3_temp[pos_p2];
  264|   626k|            p3 = pu1_p3_temp[pos_p3];
  265|   626k|            q2 = pu1_src_temp[pos_q2];
  266|   626k|            q3 = pu1_src_temp[pos_q3];
  267|       |
  268|   626k|            if(ABS(p0 - q0) < ((alpha >> 2) + 2))
  ------------------
  |  |  100|   626k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 101k, False: 525k]
  |  |  ------------------
  ------------------
  |  Branch (268:16): [True: 588k, False: 38.0k]
  ------------------
  269|   588k|            {
  270|       |                /* Threshold Variables */
  271|   588k|                a_p = ABS(p2 - p0);
  ------------------
  |  |  100|   588k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 28.7k, False: 559k]
  |  |  ------------------
  ------------------
  272|   588k|                a_q = ABS(q2 - q0);
  ------------------
  |  |  100|   588k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 31.6k, False: 556k]
  |  |  ------------------
  ------------------
  273|       |
  274|   588k|                if((a_p < beta))
  ------------------
  |  Branch (274:20): [True: 581k, False: 6.71k]
  ------------------
  275|   581k|                {
  276|       |                    /* p0', p1', p2' */
  277|   581k|                    pu1_p3_temp[pos_p0] = (p2 + X2(p1) + X2(p0) + X2(q0) + q1
  ------------------
  |  |   91|   581k|#define X2(a)   ((a) << 1)
  ------------------
                                  pu1_p3_temp[pos_p0] = (p2 + X2(p1) + X2(p0) + X2(q0) + q1
  ------------------
  |  |   91|   581k|#define X2(a)   ((a) << 1)
  ------------------
                                  pu1_p3_temp[pos_p0] = (p2 + X2(p1) + X2(p0) + X2(q0) + q1
  ------------------
  |  |   91|   581k|#define X2(a)   ((a) << 1)
  ------------------
  278|   581k|                                    + 4) >> 3;
  279|   581k|                    pu1_p3_temp[pos_p1] = (p2 + p1 + p0 + q0 + 2) >> 2;
  280|   581k|                    pu1_p3_temp[pos_p2] =
  281|   581k|                                    (X2(p3) + X3(p2) + p1 + p0 + q0
  ------------------
  |  |   91|   581k|#define X2(a)   ((a) << 1)
  ------------------
                                                  (X2(p3) + X3(p2) + p1 + p0 + q0
  ------------------
  |  |   92|   581k|#define X3(a)   (((a) << 1) + (a))
  ------------------
  282|   581k|                                                    + 4) >> 3;
  283|   581k|                }
  284|  6.71k|                else
  285|  6.71k|                {
  286|       |                    /* p0'*/
  287|  6.71k|                    pu1_p3_temp[pos_p0] = (X2(p1) + p0 + q1 + 2) >> 2;
  ------------------
  |  |   91|  6.71k|#define X2(a)   ((a) << 1)
  ------------------
  288|  6.71k|                }
  289|       |
  290|   588k|                if(a_q < beta)
  ------------------
  |  Branch (290:20): [True: 580k, False: 7.92k]
  ------------------
  291|   580k|                {
  292|       |                    /* q0', q1', q2' */
  293|   580k|                    pu1_src_temp[pos_q0] = (p1 + X2(p0) + X2(q0) + X2(q1)
  ------------------
  |  |   91|   580k|#define X2(a)   ((a) << 1)
  ------------------
                                  pu1_src_temp[pos_q0] = (p1 + X2(p0) + X2(q0) + X2(q1)
  ------------------
  |  |   91|   580k|#define X2(a)   ((a) << 1)
  ------------------
                                  pu1_src_temp[pos_q0] = (p1 + X2(p0) + X2(q0) + X2(q1)
  ------------------
  |  |   91|   580k|#define X2(a)   ((a) << 1)
  ------------------
  294|   580k|                                    + q2 + 4) >> 3;
  295|   580k|                    pu1_src_temp[pos_q1] = (p0 + q0 + q1 + q2 + 2) >> 2;
  296|   580k|                    pu1_src_temp[pos_q2] = (X2(q3) + X3(q2) + q1 + q0 + p0
  ------------------
  |  |   91|   580k|#define X2(a)   ((a) << 1)
  ------------------
                                  pu1_src_temp[pos_q2] = (X2(q3) + X3(q2) + q1 + q0 + p0
  ------------------
  |  |   92|   580k|#define X3(a)   (((a) << 1) + (a))
  ------------------
  297|   580k|                                    + 4) >> 3;
  298|   580k|                }
  299|  7.92k|                else
  300|  7.92k|                {
  301|       |                    /* q0'*/
  302|  7.92k|                    pu1_src_temp[pos_q0] = (X2(q1) + q0 + p1 + 2) >> 2;
  ------------------
  |  |   91|  7.92k|#define X2(a)   ((a) << 1)
  ------------------
  303|  7.92k|                }
  304|   588k|            }
  305|  38.0k|            else
  306|  38.0k|            {
  307|       |                /* p0', q0'*/
  308|  38.0k|                pu1_p3_temp[pos_p0] = (X2(p1) + p0 + q1 + 2) >> 2;
  ------------------
  |  |   91|  38.0k|#define X2(a)   ((a) << 1)
  ------------------
  309|  38.0k|                pu1_src_temp[pos_q0] = (X2(q1) + q0 + p1 + 2) >> 2;
  ------------------
  |  |   91|  38.0k|#define X2(a)   ((a) << 1)
  ------------------
  310|  38.0k|            }
  311|   626k|        }
  312|   206k|    }
  313|  51.5k|}
ih264_deblk_luma_vert_bslt4:
  538|   254k|{
  539|   254k|    WORD8 i = 0, edge;
  540|   254k|    UWORD8 p2, p1, p0, q0, q1, q2;
  541|   254k|    WORD32 pos_p2, pos_p1, pos_p0, pos_q0, pos_q1, pos_q2;
  542|   254k|    UWORD8 a_p, a_q; /* threshold variables */
  543|   254k|    WORD32 blk_strd = src_strd << 2; /* block_increment = src_strd * 4 */
  544|   254k|    UWORD8 *pu1_src_temp;
  545|   254k|    WORD8 delta;
  546|   254k|    WORD8 tc;
  547|   254k|    WORD16 val;
  548|   254k|    UWORD8 tc0, u1_bs;
  549|       |
  550|   254k|    pos_q0 = 0;
  551|   254k|    pos_q1 = 1;
  552|   254k|    pos_q2 = 2;
  553|   254k|    pos_p0 = -1;
  554|   254k|    pos_p1 = -2;
  555|   254k|    pos_p2 = -3;
  556|       |
  557|  1.27M|    for(edge = 0; edge < 4; edge++, pu1_src += blk_strd)
  ------------------
  |  Branch (557:19): [True: 1.01M, False: 254k]
  ------------------
  558|  1.01M|    {
  559|  1.01M|        pu1_src_temp = pu1_src;
  560|       |        /* Filter Decision */
  561|  1.01M|        u1_bs = (UWORD8)((u4_bs >> ((3 - edge) << 3)) & 0x0ff);
  562|  1.01M|        if(!u1_bs)
  ------------------
  |  Branch (562:12): [True: 89.2k, False: 929k]
  ------------------
  563|  89.2k|            continue;
  564|       |        /* tc0 */
  565|   929k|        tc0 = pu1_cliptab[u1_bs];
  566|  4.64M|        for(i = 0; i < 4; ++i, pu1_src_temp += src_strd)
  ------------------
  |  Branch (566:20): [True: 3.71M, False: 929k]
  ------------------
  567|  3.71M|        {
  568|  3.71M|            q0 = pu1_src_temp[pos_q0];
  569|  3.71M|            q1 = pu1_src_temp[pos_q1];
  570|  3.71M|            p0 = pu1_src_temp[pos_p0];
  571|  3.71M|            p1 = pu1_src_temp[pos_p1];
  572|       |
  573|       |            /* Filter Decision */
  574|  3.71M|            if((ABS(p0 - q0) >= alpha) ||
  ------------------
  |  |  100|  3.71M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 457k, False: 3.25M]
  |  |  ------------------
  ------------------
  |  Branch (574:16): [True: 510k, False: 3.20M]
  ------------------
  575|  3.20M|               (ABS(q1 - q0) >= beta) ||
  ------------------
  |  |  100|  3.20M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 202k, False: 3.00M]
  |  |  ------------------
  ------------------
  |  Branch (575:16): [True: 118k, False: 3.08M]
  ------------------
  576|  3.08M|               (ABS(p1 - p0) >= beta))
  ------------------
  |  |  100|  3.08M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 221k, False: 2.86M]
  |  |  ------------------
  ------------------
  |  Branch (576:16): [True: 69.5k, False: 3.01M]
  ------------------
  577|   698k|                continue;
  578|       |
  579|  3.01M|            q2 = pu1_src_temp[pos_q2];
  580|  3.01M|            p2 = pu1_src_temp[pos_p2];
  581|       |
  582|  3.01M|            a_p = ABS(p2 - p0);
  ------------------
  |  |  100|  3.01M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 259k, False: 2.75M]
  |  |  ------------------
  ------------------
  583|  3.01M|            a_q = ABS(q2 - q0);
  ------------------
  |  |  100|  3.01M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 158k, False: 2.85M]
  |  |  ------------------
  ------------------
  584|       |
  585|       |            /* tc */
  586|  3.01M|            tc = tc0 + (a_p < beta) + (a_q < beta);
  587|       |
  588|  3.01M|            val = ((((q0 - p0) << 2) + (p1 - q1) + 4) >> 3);
  589|  3.01M|            delta = CLIP3(-tc, tc, val);
  ------------------
  |  |   77|  3.01M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 30.5k, False: 2.98M]
  |  |  |  Branch (77:54): [True: 35.0k, False: 2.95M]
  |  |  ------------------
  ------------------
  590|       |
  591|       |            /* p0' */
  592|  3.01M|            val = p0 + delta;
  593|  3.01M|            pu1_src_temp[pos_p0] = CLIP_U8(val);
  ------------------
  |  |   58|  3.01M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  3.01M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 2.02k, False: 3.01M]
  |  |  |  |  |  Branch (77:54): [True: 316, False: 3.01M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  594|       |            /* q0' */
  595|  3.01M|            val = q0 - delta;
  596|  3.01M|            pu1_src_temp[pos_q0] = CLIP_U8(val);
  ------------------
  |  |   58|  3.01M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  3.01M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 3.16k, False: 3.01M]
  |  |  |  |  |  Branch (77:54): [True: 422, False: 3.01M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  597|       |
  598|       |            /* Luma only */
  599|  3.01M|            if(a_p < beta)
  ------------------
  |  Branch (599:16): [True: 2.95M, False: 65.5k]
  ------------------
  600|  2.95M|            {
  601|       |                /* p1' */
  602|  2.95M|                val = ((p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1);
  603|  2.95M|                pu1_src_temp[pos_p1] += CLIP3(-tc0, tc0, val);
  ------------------
  |  |   77|  2.95M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 35.4k, False: 2.91M]
  |  |  |  Branch (77:54): [True: 40.7k, False: 2.87M]
  |  |  ------------------
  ------------------
  604|  2.95M|            }
  605|       |
  606|  3.01M|            if(a_q < beta)
  ------------------
  |  Branch (606:16): [True: 2.96M, False: 52.2k]
  ------------------
  607|  2.96M|            {
  608|       |                /* q1' */
  609|  2.96M|                val = ((q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1);
  610|  2.96M|                pu1_src_temp[pos_q1] += CLIP3(-tc0, tc0, val);
  ------------------
  |  |   77|  2.96M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 40.4k, False: 2.92M]
  |  |  |  Branch (77:54): [True: 35.3k, False: 2.89M]
  |  |  ------------------
  ------------------
  611|  2.96M|            }
  612|  3.01M|        }
  613|   929k|    }
  614|   254k|}
ih264_deblk_luma_horz_bslt4:
  772|   277k|{
  773|   277k|    UWORD8 p2, p1, p0, q0, q1, q2;
  774|   277k|    WORD32 pos_p2, pos_p1, pos_p0, pos_q0, pos_q1, pos_q2;
  775|   277k|    UWORD8 a_p, a_q; /* Threshold variables */
  776|   277k|    UWORD8 *pu1_p2; /* Pointer to the src sample p2 */
  777|   277k|    UWORD8 *pu1_p2_temp;
  778|   277k|    UWORD8 *pu1_src_temp;
  779|   277k|    WORD8 i = 0, edge;
  780|   277k|    WORD8 delta;
  781|   277k|    WORD8 tc;
  782|   277k|    WORD16 val;
  783|   277k|    UWORD8 tc0, u1_bs;
  784|       |
  785|   277k|    pu1_p2 = pu1_src - (src_strd << 2);
  786|   277k|    pos_q0 = 0;
  787|   277k|    pos_q1 = src_strd;
  788|   277k|    pos_q2 = X2(src_strd);
  ------------------
  |  |   91|   277k|#define X2(a)   ((a) << 1)
  ------------------
  789|   277k|    pos_p0 = X3(src_strd);
  ------------------
  |  |   92|   277k|#define X3(a)   (((a) << 1) + (a))
  ------------------
  790|   277k|    pos_p1 = X2(src_strd);
  ------------------
  |  |   91|   277k|#define X2(a)   ((a) << 1)
  ------------------
  791|   277k|    pos_p2 = src_strd;
  792|       |
  793|  1.38M|    for(edge = 0; edge < 4; edge++, pu1_src += 4, pu1_p2 += 4)
  ------------------
  |  Branch (793:19): [True: 1.11M, False: 277k]
  ------------------
  794|  1.11M|    {
  795|  1.11M|        pu1_src_temp = pu1_src;
  796|  1.11M|        pu1_p2_temp = pu1_p2;
  797|       |
  798|       |        /* Filter Decision */
  799|  1.11M|        u1_bs = (UWORD8)((u4_bs >> ((3 - edge) << 3)) & 0x0ff);
  800|  1.11M|        if(!u1_bs)
  ------------------
  |  Branch (800:12): [True: 95.1k, False: 1.01M]
  ------------------
  801|  95.1k|            continue;
  802|       |        /* tc0 */
  803|  1.01M|        tc0 = pu1_cliptab[u1_bs];
  804|       |
  805|  5.08M|        for(i = 0; i < 4; ++i, pu1_src_temp++, pu1_p2_temp++)
  ------------------
  |  Branch (805:20): [True: 4.06M, False: 1.01M]
  ------------------
  806|  4.06M|        {
  807|  4.06M|            q0 = pu1_src_temp[pos_q0];
  808|  4.06M|            q1 = pu1_src_temp[pos_q1];
  809|  4.06M|            p0 = pu1_p2_temp[pos_p0];
  810|  4.06M|            p1 = pu1_p2_temp[pos_p1];
  811|       |
  812|       |            /* Filter Decision */
  813|  4.06M|            if((ABS(p0 - q0) >= alpha) ||
  ------------------
  |  |  100|  4.06M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 452k, False: 3.61M]
  |  |  ------------------
  ------------------
  |  Branch (813:16): [True: 542k, False: 3.52M]
  ------------------
  814|  3.52M|               (ABS(q1 - q0) >= beta) ||
  ------------------
  |  |  100|  3.52M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 217k, False: 3.30M]
  |  |  ------------------
  ------------------
  |  Branch (814:16): [True: 115k, False: 3.40M]
  ------------------
  815|  3.40M|               (ABS(p1 - p0) >= beta))
  ------------------
  |  |  100|  3.40M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 225k, False: 3.18M]
  |  |  ------------------
  ------------------
  |  Branch (815:16): [True: 74.1k, False: 3.33M]
  ------------------
  816|   732k|                continue;
  817|       |
  818|  3.33M|            q2 = pu1_src_temp[pos_q2];
  819|  3.33M|            p2 = pu1_p2_temp[pos_p2];
  820|       |
  821|  3.33M|            a_p = ABS(p2 - p0);
  ------------------
  |  |  100|  3.33M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 268k, False: 3.06M]
  |  |  ------------------
  ------------------
  822|  3.33M|            a_q = ABS(q2 - q0);
  ------------------
  |  |  100|  3.33M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 165k, False: 3.16M]
  |  |  ------------------
  ------------------
  823|       |
  824|       |            /* tc */
  825|  3.33M|            tc = tc0 + (a_p < beta) + (a_q < beta);
  826|  3.33M|            val = ((((q0 - p0) << 2) + (p1 - q1) + 4) >> 3);
  827|  3.33M|            delta = CLIP3(-tc, tc, val);
  ------------------
  |  |   77|  3.33M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 22.7k, False: 3.31M]
  |  |  |  Branch (77:54): [True: 26.5k, False: 3.28M]
  |  |  ------------------
  ------------------
  828|       |            /* p0' */
  829|  3.33M|            val = p0 + delta;
  830|  3.33M|            pu1_p2_temp[pos_p0] = CLIP_U8(val);
  ------------------
  |  |   58|  3.33M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  3.33M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 1.93k, False: 3.33M]
  |  |  |  |  |  Branch (77:54): [True: 595, False: 3.33M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  831|       |            /* q0' */
  832|  3.33M|            val = q0 - delta;
  833|  3.33M|            pu1_src_temp[pos_q0] = CLIP_U8(val);
  ------------------
  |  |   58|  3.33M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  3.33M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 3.44k, False: 3.33M]
  |  |  |  |  |  Branch (77:54): [True: 685, False: 3.32M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  834|       |
  835|       |            /* Luma */
  836|  3.33M|            if(a_p < beta)
  ------------------
  |  Branch (836:16): [True: 3.27M, False: 60.6k]
  ------------------
  837|  3.27M|            {
  838|       |                /* p1' */
  839|  3.27M|                val = ((p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1);
  840|  3.27M|                pu1_p2_temp[pos_p1] += CLIP3(-tc0, tc0, val);
  ------------------
  |  |   77|  3.27M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 39.3k, False: 3.23M]
  |  |  |  Branch (77:54): [True: 31.3k, False: 3.20M]
  |  |  ------------------
  ------------------
  841|  3.27M|            }
  842|       |
  843|  3.33M|            if(a_q < beta)
  ------------------
  |  Branch (843:16): [True: 3.27M, False: 59.3k]
  ------------------
  844|  3.27M|            {
  845|       |                /* q1' */
  846|  3.27M|                val = ((q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1);
  847|  3.27M|                pu1_src_temp[pos_q1] += CLIP3(-tc0, tc0, val);
  ------------------
  |  |   77|  3.27M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 33.4k, False: 3.24M]
  |  |  |  Branch (77:54): [True: 31.6k, False: 3.20M]
  |  |  ------------------
  ------------------
  848|  3.27M|            }
  849|  3.33M|        }
  850|  1.01M|    }
  851|   277k|}
ih264_deblk_chroma_vert_bs4:
 1456|  44.0k|{
 1457|  44.0k|    UWORD8 *pu1_src_u = pu1_src; /* Pointer to the src sample q0 of U */
 1458|  44.0k|    UWORD8 *pu1_src_v = pu1_src + 1; /* Pointer to the src sample q0 of V */
 1459|  44.0k|    UWORD8 p1_u, p0_u, q0_u, q1_u, p1_v, p0_v, q0_v, q1_v;
 1460|  44.0k|    WORD32 blk_strd = src_strd << 1; /* block_increment = src_strd * 2*/
 1461|  44.0k|    WORD32 pos_p1, pos_p0, pos_q0, pos_q1;
 1462|  44.0k|    UWORD8 *pu1_src_temp_u, *pu1_src_temp_v;
 1463|  44.0k|    WORD8 i = 0, edge;
 1464|       |
 1465|  44.0k|    pos_q0 = 0;
 1466|  44.0k|    pos_q1 = 2;
 1467|  44.0k|    pos_p0 = -2;
 1468|  44.0k|    pos_p1 = -4;
 1469|       |
 1470|   220k|    for(edge = 0; edge < 4;
  ------------------
  |  Branch (1470:19): [True: 176k, False: 44.0k]
  ------------------
 1471|   176k|                    edge++, pu1_src_u += blk_strd, pu1_src_v += blk_strd)
 1472|   176k|    {
 1473|   176k|        pu1_src_temp_u = pu1_src_u;
 1474|   176k|        pu1_src_temp_v = pu1_src_v;
 1475|   528k|        for(i = 0; i < 2; ++i, pu1_src_temp_u += src_strd, pu1_src_temp_v +=
  ------------------
  |  Branch (1475:20): [True: 352k, False: 176k]
  ------------------
 1476|   352k|                        src_strd)
 1477|   352k|        {
 1478|   352k|            q0_u = pu1_src_temp_u[pos_q0];
 1479|   352k|            q1_u = pu1_src_temp_u[pos_q1];
 1480|   352k|            p0_u = pu1_src_temp_u[pos_p0];
 1481|   352k|            p1_u = pu1_src_temp_u[pos_p1];
 1482|   352k|            q0_v = pu1_src_temp_v[pos_q0];
 1483|   352k|            q1_v = pu1_src_temp_v[pos_q1];
 1484|   352k|            p0_v = pu1_src_temp_v[pos_p0];
 1485|   352k|            p1_v = pu1_src_temp_v[pos_p1];
 1486|       |
 1487|       |            /* Filter Decision */
 1488|   352k|            if((ABS(p0_u - q0_u) < alpha_cb) &&
  ------------------
  |  |  100|   352k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 42.5k, False: 309k]
  |  |  ------------------
  ------------------
  |  Branch (1488:16): [True: 271k, False: 81.2k]
  ------------------
 1489|   271k|               (ABS(q1_u - q0_u) < beta_cb) &&
  ------------------
  |  |  100|   271k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 7.24k, False: 263k]
  |  |  ------------------
  ------------------
  |  Branch (1489:16): [True: 267k, False: 3.34k]
  ------------------
 1490|   267k|               (ABS(p1_u - p0_u) < beta_cb))
  ------------------
  |  |  100|   267k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 8.12k, False: 259k]
  |  |  ------------------
  ------------------
  |  Branch (1490:16): [True: 265k, False: 2.48k]
  ------------------
 1491|   265k|            {
 1492|       |                /* p0' */
 1493|   265k|                pu1_src_temp_u[pos_p0] = ((X2(p1_u) + p0_u + q1_u + 2) >> 2);
  ------------------
  |  |   91|   265k|#define X2(a)   ((a) << 1)
  ------------------
 1494|       |                /* q0' */
 1495|   265k|                pu1_src_temp_u[pos_q0] = (X2(q1_u) + q0_u + p1_u + 2) >> 2;
  ------------------
  |  |   91|   265k|#define X2(a)   ((a) << 1)
  ------------------
 1496|   265k|            }
 1497|       |
 1498|       |            /* Filter Decision */
 1499|   352k|            if((ABS(p0_v - q0_v) < alpha_cr) &&
  ------------------
  |  |  100|   352k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 32.9k, False: 319k]
  |  |  ------------------
  ------------------
  |  Branch (1499:16): [True: 270k, False: 82.3k]
  ------------------
 1500|   270k|               (ABS(q1_v - q0_v) < beta_cr) &&
  ------------------
  |  |  100|   270k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 5.54k, False: 264k]
  |  |  ------------------
  ------------------
  |  Branch (1500:16): [True: 267k, False: 2.45k]
  ------------------
 1501|   267k|               (ABS(p1_v - p0_v) < beta_cr))
  ------------------
  |  |  100|   267k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 5.86k, False: 261k]
  |  |  ------------------
  ------------------
  |  Branch (1501:16): [True: 264k, False: 2.94k]
  ------------------
 1502|   264k|            {
 1503|       |                /* p0' */
 1504|   264k|                pu1_src_temp_v[pos_p0] = ((X2(p1_v) + p0_v + q1_v + 2) >> 2);
  ------------------
  |  |   91|   264k|#define X2(a)   ((a) << 1)
  ------------------
 1505|       |                /* q0' */
 1506|   264k|                pu1_src_temp_v[pos_q0] = (X2(q1_v) + q0_v + p1_v + 2) >> 2;
  ------------------
  |  |   91|   264k|#define X2(a)   ((a) << 1)
  ------------------
 1507|   264k|            }
 1508|   352k|        }
 1509|   176k|    }
 1510|  44.0k|}
ih264_deblk_chroma_horz_bs4:
 1552|  51.5k|{
 1553|  51.5k|    UWORD8 *pu1_src_u = pu1_src; /* Pointer to the src sample q0 of U */
 1554|  51.5k|    UWORD8 *pu1_src_v = pu1_src + 1; /* Pointer to the src sample q0 of V */
 1555|  51.5k|    UWORD8 p1_u, p0_u, q0_u, q1_u, p1_v, p0_v, q0_v, q1_v;
 1556|  51.5k|    WORD32 pos_p1, pos_p0, pos_q0, pos_q1;
 1557|  51.5k|    UWORD8 *pu1_src_temp_u, *pu1_src_temp_v;
 1558|  51.5k|    UWORD8 *pu1_p1_u; /* Pointer to the src sample p1 of U */
 1559|  51.5k|    UWORD8 *pu1_p1_v; /* Pointer to the src sample p1 of U */
 1560|  51.5k|    UWORD8 *pu1_p1_temp_u, *pu1_p1_temp_v;
 1561|  51.5k|    WORD8 i = 0, edge;
 1562|       |
 1563|  51.5k|    pu1_p1_u = pu1_src_u - (src_strd << 1);
 1564|  51.5k|    pu1_p1_v = pu1_src_v - (src_strd << 1);
 1565|  51.5k|    pos_q0 = 0;
 1566|  51.5k|    pos_q1 = src_strd;
 1567|  51.5k|    pos_p0 = src_strd;
 1568|  51.5k|    pos_p1 = 0;
 1569|       |
 1570|   257k|    for(edge = 0; edge < 4; edge++, pu1_src_u += 4, pu1_p1_u += 4, pu1_src_v +=
  ------------------
  |  Branch (1570:19): [True: 206k, False: 51.5k]
  ------------------
 1571|   206k|                    4, pu1_p1_v += 4)
 1572|   206k|    {
 1573|   206k|        pu1_src_temp_u = pu1_src_u;
 1574|   206k|        pu1_p1_temp_u = pu1_p1_u;
 1575|   206k|        pu1_src_temp_v = pu1_src_v;
 1576|   206k|        pu1_p1_temp_v = pu1_p1_v;
 1577|   618k|        for(i = 0; i < 2; ++i, pu1_src_temp_u += 2, pu1_p1_temp_u += 2,
  ------------------
  |  Branch (1577:20): [True: 412k, False: 206k]
  ------------------
 1578|   412k|                       pu1_src_temp_v += 2, pu1_p1_temp_v += 2)
 1579|   412k|        {
 1580|   412k|            q0_u = pu1_src_temp_u[pos_q0];
 1581|   412k|            q1_u = pu1_src_temp_u[pos_q1];
 1582|   412k|            p0_u = pu1_p1_temp_u[pos_p0];
 1583|   412k|            p1_u = pu1_p1_temp_u[pos_p1];
 1584|       |
 1585|   412k|            q0_v = pu1_src_temp_v[pos_q0];
 1586|   412k|            q1_v = pu1_src_temp_v[pos_q1];
 1587|   412k|            p0_v = pu1_p1_temp_v[pos_p0];
 1588|   412k|            p1_v = pu1_p1_temp_v[pos_p1];
 1589|       |
 1590|       |            /* Filter Decision */
 1591|   412k|            if(ABS(p0_u - q0_u) < alpha_cb && ABS(q1_u - q0_u) < beta_cb
  ------------------
  |  |  100|   412k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 55.4k, False: 357k]
  |  |  ------------------
  ------------------
                          if(ABS(p0_u - q0_u) < alpha_cb && ABS(q1_u - q0_u) < beta_cb
  ------------------
  |  |  100|   291k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 6.36k, False: 285k]
  |  |  ------------------
  ------------------
  |  Branch (1591:16): [True: 291k, False: 121k]
  |  Branch (1591:47): [True: 285k, False: 5.96k]
  ------------------
 1592|   285k|                            && ABS(p1_u - p0_u) < beta_cb)
  ------------------
  |  |  100|   285k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 6.55k, False: 278k]
  |  |  ------------------
  ------------------
  |  Branch (1592:32): [True: 283k, False: 2.16k]
  ------------------
 1593|   283k|            {
 1594|       |                /* p0' */
 1595|   283k|                pu1_p1_temp_u[pos_p0] = (X2(p1_u) + p0_u + q1_u + 2) >> 2;
  ------------------
  |  |   91|   283k|#define X2(a)   ((a) << 1)
  ------------------
 1596|       |                /* q0' */
 1597|   283k|                pu1_src_temp_u[pos_q0] = (X2(q1_u) + q0_u + p1_u + 2) >> 2;
  ------------------
  |  |   91|   283k|#define X2(a)   ((a) << 1)
  ------------------
 1598|   283k|            }
 1599|       |
 1600|       |            /* Filter Decision */
 1601|   412k|            if(ABS(p0_v - q0_v) < alpha_cr && ABS(q1_v - q0_v) < beta_cr
  ------------------
  |  |  100|   412k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 54.7k, False: 357k]
  |  |  ------------------
  ------------------
                          if(ABS(p0_v - q0_v) < alpha_cr && ABS(q1_v - q0_v) < beta_cr
  ------------------
  |  |  100|   291k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 5.99k, False: 285k]
  |  |  ------------------
  ------------------
  |  Branch (1601:16): [True: 291k, False: 121k]
  |  Branch (1601:47): [True: 284k, False: 6.72k]
  ------------------
 1602|   284k|                            && ABS(p1_v - p0_v) < beta_cr)
  ------------------
  |  |  100|   284k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 3.65k, False: 280k]
  |  |  ------------------
  ------------------
  |  Branch (1602:32): [True: 282k, False: 2.24k]
  ------------------
 1603|   282k|            {
 1604|       |                /* p0' */
 1605|   282k|                pu1_p1_temp_v[pos_p0] = (X2(p1_v) + p0_v + q1_v + 2) >> 2;
  ------------------
  |  |   91|   282k|#define X2(a)   ((a) << 1)
  ------------------
 1606|       |                /* q0' */
 1607|   282k|                pu1_src_temp_v[pos_q0] = (X2(q1_v) + q0_v + p1_v + 2) >> 2;
  ------------------
  |  |   91|   282k|#define X2(a)   ((a) << 1)
  ------------------
 1608|   282k|            }
 1609|   412k|        }
 1610|   206k|    }
 1611|  51.5k|}
ih264_deblk_chroma_vert_bslt4:
 1665|   137k|{
 1666|   137k|    UWORD8 *pu1_src_u = pu1_src; /* Pointer to the src sample q0 of plane U*/
 1667|   137k|    UWORD8 *pu1_src_v = pu1_src + 1; /* Pointer to the src sample q0 of plane V*/
 1668|   137k|    UWORD8 p1_u, p0_u, q0_u, q1_u, p1_v, p0_v, q0_v, q1_v;
 1669|   137k|    WORD32 blk_strd = src_strd << 1; /* block_increment = src_strd * 2 */
 1670|   137k|    WORD32 pos_p1, pos_p0, pos_q0, pos_q1;
 1671|   137k|    UWORD8 *pu1_src_temp_u, *pu1_src_temp_v;
 1672|   137k|    WORD8 i = 0, edge;
 1673|   137k|    WORD8 delta;
 1674|   137k|    WORD8 tcb, tcr;
 1675|   137k|    WORD16 val;
 1676|   137k|    UWORD8 tcb0, tcr0, u1_bs;
 1677|       |
 1678|   137k|    pos_q0 = 0;
 1679|   137k|    pos_q1 = 2;
 1680|   137k|    pos_p0 = -2;
 1681|   137k|    pos_p1 = -4;
 1682|       |
 1683|   685k|    for(edge = 0; edge < 4;
  ------------------
  |  Branch (1683:19): [True: 548k, False: 137k]
  ------------------
 1684|   548k|                    edge++, pu1_src_u += blk_strd, pu1_src_v += blk_strd)
 1685|   548k|    {
 1686|   548k|        pu1_src_temp_u = pu1_src_u;
 1687|   548k|        pu1_src_temp_v = pu1_src_v;
 1688|       |        /* Filter Decision */
 1689|   548k|        u1_bs = (UWORD8)((u4_bs >> ((3 - edge) << 3)) & 0x0ff);
 1690|   548k|        if(!u1_bs)
  ------------------
  |  Branch (1690:12): [True: 51.2k, False: 496k]
  ------------------
 1691|  51.2k|            continue;
 1692|       |        /* tc0 */
 1693|   496k|        tcb0 = pu1_cliptab_cb[u1_bs];
 1694|   496k|        tcr0 = pu1_cliptab_cr[u1_bs];
 1695|   496k|        tcb = tcb0 + 1;
 1696|   496k|        tcr = tcr0 + 1;
 1697|  1.49M|        for(i = 0; i < 2; ++i, pu1_src_temp_u += src_strd, pu1_src_temp_v +=
  ------------------
  |  Branch (1697:20): [True: 993k, False: 496k]
  ------------------
 1698|   993k|                        src_strd)
 1699|   993k|        {
 1700|   993k|            q0_u = pu1_src_temp_u[pos_q0];
 1701|   993k|            q1_u = pu1_src_temp_u[pos_q1];
 1702|   993k|            p0_u = pu1_src_temp_u[pos_p0];
 1703|   993k|            p1_u = pu1_src_temp_u[pos_p1];
 1704|       |
 1705|   993k|            q0_v = pu1_src_temp_v[pos_q0];
 1706|   993k|            q1_v = pu1_src_temp_v[pos_q1];
 1707|   993k|            p0_v = pu1_src_temp_v[pos_p0];
 1708|   993k|            p1_v = pu1_src_temp_v[pos_p1];
 1709|       |
 1710|       |            /* Filter Decision */
 1711|   993k|            if(ABS(p0_u - q0_u) < alpha_cb && ABS(q1_u - q0_u) < beta_cb
  ------------------
  |  |  100|   993k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 86.9k, False: 906k]
  |  |  ------------------
  ------------------
                          if(ABS(p0_u - q0_u) < alpha_cb && ABS(q1_u - q0_u) < beta_cb
  ------------------
  |  |  100|   884k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 25.5k, False: 859k]
  |  |  ------------------
  ------------------
  |  Branch (1711:16): [True: 884k, False: 109k]
  |  Branch (1711:47): [True: 871k, False: 13.1k]
  ------------------
 1712|   871k|                            && ABS(p1_u - p0_u) < beta_cb)
  ------------------
  |  |  100|   871k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 17.5k, False: 853k]
  |  |  ------------------
  ------------------
  |  Branch (1712:32): [True: 867k, False: 3.54k]
  ------------------
 1713|   867k|            {
 1714|   867k|                val = ((((q0_u - p0_u) << 2) + (p1_u - q1_u) + 4) >> 3);
 1715|   867k|                delta = CLIP3(-tcb, tcb, val);
  ------------------
  |  |   77|   867k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 6.95k, False: 861k]
  |  |  |  Branch (77:54): [True: 6.04k, False: 854k]
  |  |  ------------------
  ------------------
 1716|       |                /* p0' */
 1717|   867k|                val = p0_u + delta;
 1718|   867k|                pu1_src_temp_u[pos_p0] = CLIP_U8(val);
  ------------------
  |  |   58|   867k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   867k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 1.28k, False: 866k]
  |  |  |  |  |  Branch (77:54): [True: 552, False: 866k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 1719|       |                /* q0' */
 1720|   867k|                val = q0_u - delta;
 1721|   867k|                pu1_src_temp_u[pos_q0] = CLIP_U8(val);
  ------------------
  |  |   58|   867k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   867k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 392, False: 867k]
  |  |  |  |  |  Branch (77:54): [True: 529, False: 867k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 1722|   867k|            }
 1723|       |
 1724|       |            /* Filter Decision */
 1725|   993k|            if(ABS(p0_v - q0_v) < alpha_cr && ABS(q1_v - q0_v) < beta_cr
  ------------------
  |  |  100|   993k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 76.6k, False: 917k]
  |  |  ------------------
  ------------------
                          if(ABS(p0_v - q0_v) < alpha_cr && ABS(q1_v - q0_v) < beta_cr
  ------------------
  |  |  100|   883k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 23.7k, False: 859k]
  |  |  ------------------
  ------------------
  |  Branch (1725:16): [True: 883k, False: 110k]
  |  Branch (1725:47): [True: 874k, False: 9.01k]
  ------------------
 1726|   874k|                            && ABS(p1_v - p0_v) < beta_cr)
  ------------------
  |  |  100|   874k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 14.6k, False: 859k]
  |  |  ------------------
  ------------------
  |  Branch (1726:32): [True: 870k, False: 3.61k]
  ------------------
 1727|   870k|            {
 1728|   870k|                val = ((((q0_v - p0_v) << 2) + (p1_v - q1_v) + 4) >> 3);
 1729|   870k|                delta = CLIP3(-tcr, tcr, val);
  ------------------
  |  |   77|   870k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 2.63k, False: 867k]
  |  |  |  Branch (77:54): [True: 2.67k, False: 865k]
  |  |  ------------------
  ------------------
 1730|       |                /* p0' */
 1731|   870k|                val = p0_v + delta;
 1732|   870k|                pu1_src_temp_v[pos_p0] = CLIP_U8(val);
  ------------------
  |  |   58|   870k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   870k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 505, False: 869k]
  |  |  |  |  |  Branch (77:54): [True: 708, False: 869k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 1733|       |                /* q0' */
 1734|   870k|                val = q0_v - delta;
 1735|   870k|                pu1_src_temp_v[pos_q0] = CLIP_U8(val);
  ------------------
  |  |   58|   870k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   870k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 455, False: 869k]
  |  |  |  |  |  Branch (77:54): [True: 531, False: 869k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 1736|   870k|            }
 1737|   993k|        }
 1738|   496k|    }
 1739|   137k|}
ih264_deblk_chroma_horz_bslt4:
 1793|   160k|{
 1794|   160k|    UWORD8 *pu1_src_u = pu1_src; /* Pointer to the src sample q0 of plane U*/
 1795|   160k|    UWORD8 *pu1_src_v = pu1_src + 1; /* Pointer to the src sample q0 of plane V*/
 1796|   160k|    UWORD8 p1_u, p0_u, q0_u, q1_u, p1_v, p0_v, q0_v, q1_v;
 1797|   160k|    WORD32 pos_p1, pos_p0, pos_q0, pos_q1;
 1798|   160k|    UWORD8 *pu1_src_temp_u, *pu1_src_temp_v;
 1799|   160k|    UWORD8 *pu1_p1_u; /* Pointer to the src sample p1 of plane U*/
 1800|   160k|    UWORD8 *pu1_p1_v; /* Pointer to the src sample p1 of plane V*/
 1801|   160k|    UWORD8 *pu1_p1_temp_u, *pu1_p1_temp_v;
 1802|   160k|    WORD8 i = 0, edge;
 1803|   160k|    WORD8 delta;
 1804|   160k|    WORD8 tcb, tcr;
 1805|   160k|    WORD16 val;
 1806|   160k|    UWORD8 u1_bs;
 1807|   160k|    UWORD8 tcb0, tcr0;
 1808|       |
 1809|   160k|    pu1_p1_u = pu1_src_u - (src_strd << 1);
 1810|   160k|    pu1_p1_v = pu1_src_v - (src_strd << 1);
 1811|   160k|    pos_q0 = 0;
 1812|   160k|    pos_q1 = src_strd;
 1813|   160k|    pos_p0 = src_strd;
 1814|   160k|    pos_p1 = 0;
 1815|       |
 1816|   804k|    for(edge = 0; edge < 4; edge++, pu1_src_u += 4, pu1_p1_u += 4,
  ------------------
  |  Branch (1816:19): [True: 643k, False: 160k]
  ------------------
 1817|   643k|                    pu1_src_v += 4, pu1_p1_v += 4)
 1818|   643k|    {
 1819|   643k|        pu1_src_temp_u = pu1_src_u;
 1820|   643k|        pu1_p1_temp_u = pu1_p1_u;
 1821|   643k|        pu1_src_temp_v = pu1_src_v;
 1822|   643k|        pu1_p1_temp_v = pu1_p1_v;
 1823|       |
 1824|       |        /* Filter Decision */
 1825|   643k|        u1_bs = (UWORD8)((u4_bs >> ((3 - edge) << 3)) & 0x0ff);
 1826|   643k|        if(!u1_bs)
  ------------------
  |  Branch (1826:12): [True: 56.4k, False: 587k]
  ------------------
 1827|  56.4k|            continue;
 1828|       |        /* tc0 */
 1829|   587k|        tcb0 = pu1_cliptab_cb[u1_bs];
 1830|   587k|        tcr0 = pu1_cliptab_cr[u1_bs];
 1831|       |
 1832|  1.76M|        for(i = 0; i < 2; ++i, pu1_src_temp_u += 2, pu1_p1_temp_u += 2,
  ------------------
  |  Branch (1832:20): [True: 1.17M, False: 587k]
  ------------------
 1833|  1.17M|                       pu1_src_temp_v += 2, pu1_p1_temp_v += 2)
 1834|  1.17M|        {
 1835|  1.17M|            q0_u = pu1_src_temp_u[pos_q0];
 1836|  1.17M|            q1_u = pu1_src_temp_u[pos_q1];
 1837|  1.17M|            p0_u = pu1_p1_temp_u[pos_p0];
 1838|  1.17M|            p1_u = pu1_p1_temp_u[pos_p1];
 1839|       |
 1840|  1.17M|            q0_v = pu1_src_temp_v[pos_q0];
 1841|  1.17M|            q1_v = pu1_src_temp_v[pos_q1];
 1842|  1.17M|            p0_v = pu1_p1_temp_v[pos_p0];
 1843|  1.17M|            p1_v = pu1_p1_temp_v[pos_p1];
 1844|       |
 1845|       |            /* tc */
 1846|  1.17M|            tcb = tcb0 + 1;
 1847|  1.17M|            tcr = tcr0 + 1;
 1848|       |            /* Filter Decision */
 1849|  1.17M|            if(ABS(p0_u - q0_u) < alpha_cb && ABS(q1_u - q0_u) < beta_cb
  ------------------
  |  |  100|  1.17M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 90.7k, False: 1.08M]
  |  |  ------------------
  ------------------
                          if(ABS(p0_u - q0_u) < alpha_cb && ABS(q1_u - q0_u) < beta_cb
  ------------------
  |  |  100|  1.05M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 25.9k, False: 1.03M]
  |  |  ------------------
  ------------------
  |  Branch (1849:16): [True: 1.05M, False: 117k]
  |  Branch (1849:47): [True: 1.04M, False: 14.6k]
  ------------------
 1850|  1.04M|                            && ABS(p1_u - p0_u) < beta_cb)
  ------------------
  |  |  100|  1.04M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 21.8k, False: 1.02M]
  |  |  ------------------
  ------------------
  |  Branch (1850:32): [True: 1.03M, False: 6.20k]
  ------------------
 1851|  1.03M|            {
 1852|  1.03M|                val = ((((q0_u - p0_u) << 2) + (p1_u - q1_u) + 4) >> 3);
 1853|  1.03M|                delta = CLIP3(-tcb, tcb, val);
  ------------------
  |  |   77|  1.03M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 7.65k, False: 1.02M]
  |  |  |  Branch (77:54): [True: 4.53k, False: 1.02M]
  |  |  ------------------
  ------------------
 1854|       |                /* p0' */
 1855|  1.03M|                val = p0_u + delta;
 1856|  1.03M|                pu1_p1_temp_u[pos_p0] = CLIP_U8(val);
  ------------------
  |  |   58|  1.03M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  1.03M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 257, False: 1.03M]
  |  |  |  |  |  Branch (77:54): [True: 668, False: 1.03M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 1857|       |                /* q0' */
 1858|  1.03M|                val = q0_u - delta;
 1859|  1.03M|                pu1_src_temp_u[pos_q0] = CLIP_U8(val);
  ------------------
  |  |   58|  1.03M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  1.03M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 403, False: 1.03M]
  |  |  |  |  |  Branch (77:54): [True: 838, False: 1.03M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 1860|  1.03M|            }
 1861|       |            /* Filter Decision */
 1862|  1.17M|            if(ABS(p0_v - q0_v) < alpha_cr && ABS(q1_v - q0_v) < beta_cr
  ------------------
  |  |  100|  1.17M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 84.8k, False: 1.08M]
  |  |  ------------------
  ------------------
                          if(ABS(p0_v - q0_v) < alpha_cr && ABS(q1_v - q0_v) < beta_cr
  ------------------
  |  |  100|  1.05M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 24.1k, False: 1.03M]
  |  |  ------------------
  ------------------
  |  Branch (1862:16): [True: 1.05M, False: 114k]
  |  Branch (1862:47): [True: 1.05M, False: 9.49k]
  ------------------
 1863|  1.05M|                            && ABS(p1_v - p0_v) < beta_cr)
  ------------------
  |  |  100|  1.05M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 15.2k, False: 1.03M]
  |  |  ------------------
  ------------------
  |  Branch (1863:32): [True: 1.04M, False: 4.09k]
  ------------------
 1864|  1.04M|            {
 1865|  1.04M|                val = ((((q0_v - p0_v) << 2) + (p1_v - q1_v) + 4) >> 3);
 1866|  1.04M|                delta = CLIP3(-tcr, tcr, val);
  ------------------
  |  |   77|  1.04M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 4.88k, False: 1.04M]
  |  |  |  Branch (77:54): [True: 3.65k, False: 1.03M]
  |  |  ------------------
  ------------------
 1867|       |                /* p0' */
 1868|  1.04M|                val = p0_v + delta;
 1869|  1.04M|                pu1_p1_temp_v[pos_p0] = CLIP_U8(val);
  ------------------
  |  |   58|  1.04M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  1.04M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 344, False: 1.04M]
  |  |  |  |  |  Branch (77:54): [True: 991, False: 1.04M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 1870|       |                /* q0' */
 1871|  1.04M|                val = q0_v - delta;
 1872|  1.04M|                pu1_src_temp_v[pos_q0] = CLIP_U8(val);
  ------------------
  |  |   58|  1.04M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  1.04M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 363, False: 1.04M]
  |  |  |  |  |  Branch (77:54): [True: 588, False: 1.04M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 1873|  1.04M|            }
 1874|  1.17M|        }
 1875|   587k|    }
 1876|   160k|}

ih264_disp_mgr_init:
   77|  24.4k|{
   78|  24.4k|    WORD32 id;
   79|       |
   80|  24.4k|    ps_disp_mgr->u4_last_abs_poc = DEFAULT_POC;
  ------------------
  |  |   45|  24.4k|#define DEFAULT_POC 0x7FFFFFFF
  ------------------
   81|       |
   82|  1.58M|    for(id = 0; id < DISP_MGR_MAX_CNT; id++)
  ------------------
  |  |   44|  1.58M|#define DISP_MGR_MAX_CNT 64
  ------------------
  |  Branch (82:17): [True: 1.56M, False: 24.4k]
  ------------------
   83|  1.56M|    {
   84|  1.56M|        ps_disp_mgr->ai4_abs_poc[id] = DEFAULT_POC;
  ------------------
  |  |   45|  1.56M|#define DEFAULT_POC 0x7FFFFFFF
  ------------------
   85|       |        ps_disp_mgr->apv_ptr[id] = NULL;
   86|  1.56M|    }
   87|  24.4k|}
ih264_disp_mgr_add:
  120|  93.6k|{
  121|  93.6k|    if(buf_id >= DISP_MGR_MAX_CNT)
  ------------------
  |  |   44|  93.6k|#define DISP_MGR_MAX_CNT 64
  ------------------
  |  Branch (121:8): [True: 0, False: 93.6k]
  ------------------
  122|      0|    {
  123|      0|        return (-1);
  124|      0|    }
  125|       |
  126|  93.6k|    if(ps_disp_mgr->apv_ptr[buf_id] != NULL)
  ------------------
  |  Branch (126:8): [True: 465, False: 93.1k]
  ------------------
  127|    465|    {
  128|    465|        return (-1);
  129|    465|    }
  130|       |
  131|  93.1k|    ps_disp_mgr->apv_ptr[buf_id] = pv_ptr;
  132|  93.1k|    ps_disp_mgr->ai4_abs_poc[buf_id] = abs_poc;
  133|       |
  134|  93.1k|    return 0;
  135|  93.6k|}
ih264_disp_mgr_get:
  159|   182k|{
  160|   182k|    WORD32 id;
  161|   182k|    void *pv_ret_ptr = NULL;
  162|   182k|    WORD32 i4_min_poc = 0x7FFFFFFF;
  163|   182k|    WORD32 min_poc_id = -1;
  164|       |
  165|       |    /* Find minimum POC */
  166|  11.8M|    for(id = 0; id < DISP_MGR_MAX_CNT; id++)
  ------------------
  |  |   44|  11.8M|#define DISP_MGR_MAX_CNT 64
  ------------------
  |  Branch (166:17): [True: 11.7M, False: 182k]
  ------------------
  167|  11.7M|    {
  168|  11.7M|        if((DEFAULT_POC != ps_disp_mgr->ai4_abs_poc[id]) &&
  ------------------
  |  |   45|  11.7M|#define DEFAULT_POC 0x7FFFFFFF
  ------------------
  |  Branch (168:12): [True: 104k, False: 11.5M]
  ------------------
  169|   104k|           (ps_disp_mgr->ai4_abs_poc[id] <= i4_min_poc))
  ------------------
  |  Branch (169:12): [True: 91.0k, False: 13.6k]
  ------------------
  170|  91.0k|        {
  171|  91.0k|            i4_min_poc = ps_disp_mgr->ai4_abs_poc[id];
  172|  91.0k|            min_poc_id = id;
  173|  91.0k|        }
  174|  11.7M|    }
  175|   182k|    *pi4_buf_id = min_poc_id;
  176|       |    /* If all pocs are still default_poc then return NULL */
  177|   182k|    if(-1 == min_poc_id)
  ------------------
  |  Branch (177:8): [True: 98.5k, False: 84.2k]
  ------------------
  178|  98.5k|    {
  179|  98.5k|        return NULL;
  180|  98.5k|    }
  181|       |
  182|  84.2k|    pv_ret_ptr = ps_disp_mgr->apv_ptr[min_poc_id];
  183|       |
  184|       |    /* Set abs poc to default and apv_ptr to null so that the buffer is not returned again */
  185|  84.2k|    ps_disp_mgr->apv_ptr[min_poc_id] = NULL;
  186|  84.2k|    ps_disp_mgr->ai4_abs_poc[min_poc_id] = DEFAULT_POC;
  ------------------
  |  |   45|  84.2k|#define DEFAULT_POC 0x7FFFFFFF
  ------------------
  187|       |
  188|  84.2k|    return pv_ret_ptr;
  189|   182k|}

ih264_ihadamard_scaling_4x4:
  102|  26.2k|{
  103|  26.2k|    WORD32 i;
  104|  26.2k|    WORD32 x0, x1, x2, x3, x4, x5, x6, x7;
  105|  26.2k|    WORD16 *pi2_src_ptr, *pi2_out_ptr;
  106|  26.2k|    WORD32 *pi4_tmp_ptr;
  107|  26.2k|    WORD32 rnd_fact = (u4_qp_div_6 < 6) ? (1 << (5 - u4_qp_div_6)) : 0;
  ------------------
  |  Branch (107:23): [True: 10.6k, False: 15.5k]
  ------------------
  108|       |
  109|  26.2k|    pi4_tmp_ptr = pi4_tmp;
  110|  26.2k|    pi2_src_ptr = pi2_src;
  111|  26.2k|    pi2_out_ptr = pi2_out;
  112|       |
  113|       |    /* horizontal transform */
  114|   131k|    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
  ------------------
  |  |   48|   131k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  |  Branch (114:16): [True: 105k, False: 26.2k]
  ------------------
  115|   105k|    {
  116|   105k|        x4 = pi2_src_ptr[0];
  117|   105k|        x5 = pi2_src_ptr[1];
  118|   105k|        x6 = pi2_src_ptr[2];
  119|   105k|        x7 = pi2_src_ptr[3];
  120|       |
  121|   105k|        x0 = x4 + x7;
  122|   105k|        x1 = x5 + x6;
  123|   105k|        x2 = x5 - x6;
  124|   105k|        x3 = x4 - x7;
  125|       |
  126|   105k|        pi4_tmp_ptr[0] = x0 + x1;
  127|   105k|        pi4_tmp_ptr[1] = x2 + x3;
  128|   105k|        pi4_tmp_ptr[2] = x0 - x1;
  129|   105k|        pi4_tmp_ptr[3] = x3 - x2;
  130|       |
  131|   105k|        pi4_tmp_ptr += SUB_BLK_WIDTH_4x4;
  ------------------
  |  |   48|   105k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  132|   105k|        pi2_src_ptr += SUB_BLK_WIDTH_4x4;
  ------------------
  |  |   48|   105k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  133|   105k|    }
  134|       |
  135|       |    /* vertical transform */
  136|  26.2k|    pi4_tmp_ptr = pi4_tmp;
  137|   131k|    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
  ------------------
  |  |   48|   131k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  |  Branch (137:16): [True: 105k, False: 26.2k]
  ------------------
  138|   105k|    {
  139|   105k|        x4 = pi4_tmp_ptr[0];
  140|   105k|        x5 = pi4_tmp_ptr[4];
  141|   105k|        x6 = pi4_tmp_ptr[8];
  142|   105k|        x7 = pi4_tmp_ptr[12];
  143|       |
  144|   105k|        x0 = x4 + x7;
  145|   105k|        x1 = x5 + x6;
  146|   105k|        x2 = x5 - x6;
  147|   105k|        x3 = x4 - x7;
  148|       |
  149|   105k|        pi4_tmp_ptr[0] = x0 + x1;
  150|   105k|        pi4_tmp_ptr[4] = x2 + x3;
  151|   105k|        pi4_tmp_ptr[8] = x0 - x1;
  152|   105k|        pi4_tmp_ptr[12] = x3 - x2;
  153|       |
  154|   105k|        pi4_tmp_ptr++;
  155|   105k|    }
  156|  26.2k|    pi4_tmp_ptr = pi4_tmp;
  157|       |
  158|       |    /* scaling */
  159|   446k|    for(i = 0; i < (SUB_BLK_WIDTH_4x4 * SUB_BLK_WIDTH_4x4); i++)
  ------------------
  |  |   48|   446k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
                  for(i = 0; i < (SUB_BLK_WIDTH_4x4 * SUB_BLK_WIDTH_4x4); i++)
  ------------------
  |  |   48|   446k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  |  Branch (159:16): [True: 420k, False: 26.2k]
  ------------------
  160|   420k|    {
  161|   420k|        INV_QUANT(pi4_tmp_ptr[i], pu2_iscal_mat[0], pu2_weigh_mat[0],
  ------------------
  |  |  103|   420k|                {\
  |  |  104|   420k|                    i4_value *= quant_scale;\
  |  |  105|   420k|                    i4_value *= weight_scale;\
  |  |  106|   420k|                    i4_value += rndfactor;\
  |  |  107|   420k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|   420k|                    i4_value >>= qbits;\
  |  |  109|   420k|                }
  ------------------
  162|   420k|                  u4_qp_div_6, rnd_fact, 6);
  163|   420k|        pi2_out_ptr[i] = pi4_tmp_ptr[i];
  164|   420k|    }
  165|  26.2k|}

ih264_inter_pred_luma_copy:
  129|  2.95M|{
  130|  2.95M|    WORD32 row, col;
  131|       |
  132|  2.95M|    UNUSED(pu1_tmp);
  ------------------
  |  |   45|  2.95M|#define UNUSED(x) ((void)(x))
  ------------------
  133|  2.95M|    UNUSED(dydx);
  ------------------
  |  |   45|  2.95M|#define UNUSED(x) ((void)(x))
  ------------------
  134|  49.7M|    for(row = 0; row < ht; row++)
  ------------------
  |  Branch (134:18): [True: 46.8M, False: 2.95M]
  ------------------
  135|  46.8M|    {
  136|   792M|        for(col = 0; col < wd; col++)
  ------------------
  |  Branch (136:22): [True: 745M, False: 46.8M]
  ------------------
  137|   745M|        {
  138|   745M|            pu1_dst[col] = pu1_src[col];
  139|   745M|        }
  140|       |
  141|  46.8M|        pu1_src += src_strd;
  142|  46.8M|        pu1_dst += dst_strd;
  143|  46.8M|    }
  144|  2.95M|}
ih264_inter_pred_luma_horz:
  251|  38.1k|{
  252|  38.1k|    WORD32 row, col;
  253|  38.1k|    WORD16 i2_tmp;
  254|       |
  255|  38.1k|    UNUSED(pu1_tmp);
  ------------------
  |  |   45|  38.1k|#define UNUSED(x) ((void)(x))
  ------------------
  256|  38.1k|    UNUSED(dydx);
  ------------------
  |  |   45|  38.1k|#define UNUSED(x) ((void)(x))
  ------------------
  257|   522k|    for(row = 0; row < ht; row++)
  ------------------
  |  Branch (257:18): [True: 484k, False: 38.1k]
  ------------------
  258|   484k|    {
  259|  7.28M|        for(col = 0; col < wd; col++)
  ------------------
  |  Branch (259:22): [True: 6.79M, False: 484k]
  ------------------
  260|  6.79M|        {
  261|  6.79M|            i2_tmp = ih264_g_six_tap[0] * (pu1_src[col - 2] + pu1_src[col + 3])
  262|  6.79M|                     + ih264_g_six_tap[1] * (pu1_src[col - 1] + pu1_src[col + 2])
  263|  6.79M|                     + ih264_g_six_tap[2] * (pu1_src[col] + pu1_src[col + 1]);
  264|  6.79M|            i2_tmp = (i2_tmp + 16) >> 5;
  265|  6.79M|            pu1_dst[col] = CLIP_U8(i2_tmp);
  ------------------
  |  |   58|  6.79M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  6.79M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 9.79k, False: 6.78M]
  |  |  |  |  |  Branch (77:54): [True: 198k, False: 6.58M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  266|  6.79M|        }
  267|   484k|        pu1_src += src_strd;
  268|   484k|        pu1_dst += dst_strd;
  269|   484k|    }
  270|  38.1k|}
ih264_inter_pred_luma_vert:
  321|  22.8k|{
  322|  22.8k|    WORD32 row, col;
  323|  22.8k|    WORD16 i2_tmp;
  324|       |
  325|  22.8k|    UNUSED(pu1_tmp);
  ------------------
  |  |   45|  22.8k|#define UNUSED(x) ((void)(x))
  ------------------
  326|  22.8k|    UNUSED(dydx);
  ------------------
  |  |   45|  22.8k|#define UNUSED(x) ((void)(x))
  ------------------
  327|   352k|    for(row = 0; row < ht; row++)
  ------------------
  |  Branch (327:18): [True: 329k, False: 22.8k]
  ------------------
  328|   329k|    {
  329|  5.14M|        for(col = 0; col < wd; col++)
  ------------------
  |  Branch (329:22): [True: 4.81M, False: 329k]
  ------------------
  330|  4.81M|        {
  331|  4.81M|            i2_tmp = ih264_g_six_tap[0] * (pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd])
  332|  4.81M|                     + ih264_g_six_tap[1] * (pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd])
  333|  4.81M|                     + ih264_g_six_tap[2] * (pu1_src[col] + pu1_src[col + 1 * src_strd]);
  334|  4.81M|            i2_tmp = (i2_tmp + 16) >> 5;
  335|  4.81M|            pu1_dst[col] = CLIP_U8(i2_tmp);
  ------------------
  |  |   58|  4.81M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  4.81M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 9.61k, False: 4.80M]
  |  |  |  |  |  Branch (77:54): [True: 26.0k, False: 4.78M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  336|  4.81M|        }
  337|   329k|        pu1_src += src_strd;
  338|   329k|        pu1_dst += dst_strd;
  339|   329k|    }
  340|  22.8k|}
ih264_inter_pred_luma_horz_hpel_vert_hpel:
  400|  22.3k|{
  401|  22.3k|    WORD32 row, col;
  402|  22.3k|    WORD32 tmp;
  403|  22.3k|    WORD16 *pi2_pred1_temp;
  404|  22.3k|    WORD16 *pi2_pred1;
  405|       |
  406|  22.3k|    UNUSED(dydx);
  ------------------
  |  |   45|  22.3k|#define UNUSED(x) ((void)(x))
  ------------------
  407|  22.3k|    pi2_pred1_temp = (WORD16*)pu1_tmp;
  408|  22.3k|    pi2_pred1_temp += 2;
  409|  22.3k|    pi2_pred1 = pi2_pred1_temp;
  410|       |
  411|   266k|    for(row = 0; row < ht; row++)
  ------------------
  |  Branch (411:18): [True: 244k, False: 22.3k]
  ------------------
  412|   244k|    {
  413|  4.47M|        for(col = -2; col < wd + 3; col++)
  ------------------
  |  Branch (413:23): [True: 4.22M, False: 244k]
  ------------------
  414|  4.22M|        {
  415|  4.22M|            tmp = ih264_g_six_tap[0] * (pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd])
  416|  4.22M|                  + ih264_g_six_tap[1] * (pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd])
  417|  4.22M|                  + ih264_g_six_tap[2] * (pu1_src[col] + pu1_src[col + 1 * src_strd]);
  418|  4.22M|            pi2_pred1_temp[col] = tmp;
  419|  4.22M|        }
  420|   244k|        pu1_src += src_strd;
  421|   244k|        pi2_pred1_temp = pi2_pred1_temp + wd + 5;
  422|   244k|    }
  423|       |
  424|   266k|    for(row = 0; row < ht; row++)
  ------------------
  |  Branch (424:18): [True: 244k, False: 22.3k]
  ------------------
  425|   244k|    {
  426|  3.25M|        for(col = 0; col < wd; col++)
  ------------------
  |  Branch (426:22): [True: 3.00M, False: 244k]
  ------------------
  427|  3.00M|        {
  428|  3.00M|            tmp = ih264_g_six_tap[0] * (pi2_pred1[col - 2] + pi2_pred1[col + 3])
  429|  3.00M|                  + ih264_g_six_tap[1] * (pi2_pred1[col - 1] + pi2_pred1[col + 2])
  430|  3.00M|                  + ih264_g_six_tap[2] * (pi2_pred1[col] + pi2_pred1[col + 1]);
  431|  3.00M|            tmp = (tmp + 512) >> 10;
  432|  3.00M|            pu1_dst[col] = CLIP_U8(tmp);
  ------------------
  |  |   58|  3.00M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  3.00M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 14.6k, False: 2.99M]
  |  |  |  |  |  Branch (77:54): [True: 5.36k, False: 2.98M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  433|  3.00M|        }
  434|   244k|        pi2_pred1 += (wd + 5);
  435|   244k|        pu1_dst += dst_strd;
  436|   244k|    }
  437|  22.3k|}
ih264_inter_pred_luma_horz_qpel:
  496|  69.7k|{
  497|  69.7k|    WORD32 row, col;
  498|  69.7k|    UWORD8 *pu1_pred1;
  499|  69.7k|    WORD32 x_offset = dydx & 0x3;
  500|  69.7k|    UNUSED(pu1_tmp);
  ------------------
  |  |   45|  69.7k|#define UNUSED(x) ((void)(x))
  ------------------
  501|  69.7k|    pu1_pred1 = pu1_src + (x_offset >> 1);
  502|       |
  503|   997k|    for(row = 0; row < ht; row++)
  ------------------
  |  Branch (503:18): [True: 927k, False: 69.7k]
  ------------------
  504|   927k|    {
  505|  14.4M|        for(col = 0; col < wd; col++, pu1_src++, pu1_dst++)
  ------------------
  |  Branch (505:22): [True: 13.4M, False: 927k]
  ------------------
  506|  13.4M|        {
  507|  13.4M|            WORD16 i2_temp;
  508|       |            /* The logic below implements the following equation
  509|       |             i2_temp = puc_pred[-2] - 5 * (puc_pred[-1] + puc_pred[2]) +
  510|       |             20 * (puc_pred[0] + puc_pred[1]) + puc_pred[3]; */
  511|  13.4M|            i2_temp = pu1_src[-2] + pu1_src[3]
  512|  13.4M|                      - (pu1_src[-1] + pu1_src[2])
  513|  13.4M|                      + ((pu1_src[0] + pu1_src[1] - pu1_src[-1] - pu1_src[2]) << 2)
  514|  13.4M|                      + ((pu1_src[0] + pu1_src[1]) << 4);
  515|  13.4M|            i2_temp = (i2_temp + 16) >> 5;
  516|  13.4M|            i2_temp = CLIP_U8(i2_temp);
  ------------------
  |  |   58|  13.4M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  13.4M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 28.4k, False: 13.4M]
  |  |  |  |  |  Branch (77:54): [True: 28.9k, False: 13.4M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  517|  13.4M|            *pu1_dst = (i2_temp + *pu1_pred1 + 1) >> 1;
  518|       |
  519|  13.4M|            pu1_pred1++;
  520|  13.4M|        }
  521|   927k|        pu1_dst += dst_strd - wd;
  522|   927k|        pu1_src += src_strd - wd;
  523|   927k|        pu1_pred1 += src_strd - wd;
  524|   927k|    }
  525|  69.7k|}
ih264_inter_pred_luma_vert_qpel:
  584|  41.0k|{
  585|  41.0k|    WORD32 row, col;
  586|  41.0k|    WORD32 y_offset = dydx >> 2;
  587|  41.0k|    WORD32 off1, off2, off3;
  588|  41.0k|    UWORD8 *pu1_pred1;
  589|  41.0k|    UNUSED(pu1_tmp);
  ------------------
  |  |   45|  41.0k|#define UNUSED(x) ((void)(x))
  ------------------
  590|  41.0k|    y_offset = y_offset & 0x3;
  591|       |
  592|  41.0k|    off1 = src_strd;
  593|  41.0k|    off2 = src_strd << 1;
  594|  41.0k|    off3 = off1 + off2;
  595|       |
  596|  41.0k|    pu1_pred1 = pu1_src + (y_offset >> 1) * src_strd;
  597|       |
  598|   590k|    for(row = 0; row < ht; row++)
  ------------------
  |  Branch (598:18): [True: 548k, False: 41.0k]
  ------------------
  599|   548k|    {
  600|  8.28M|        for(col = 0; col < wd; col++, pu1_dst++, pu1_src++, pu1_pred1++)
  ------------------
  |  Branch (600:22): [True: 7.73M, False: 548k]
  ------------------
  601|  7.73M|        {
  602|  7.73M|            WORD16 i2_temp;
  603|       |            /* The logic below implements the following equation
  604|       |             i16_temp = puc_pred[-2*src_strd] + puc_pred[3*src_strd] -
  605|       |             5 * (puc_pred[-1*src_strd] + puc_pred[2*src_strd])  +
  606|       |             20 * (puc_pred[0] + puc_pred[src_strd]); */
  607|  7.73M|            i2_temp = pu1_src[-off2] + pu1_src[off3]
  608|  7.73M|                       - (pu1_src[-off1] + pu1_src[off2])
  609|  7.73M|                       + ((pu1_src[0] + pu1_src[off1] - pu1_src[-off1] - pu1_src[off2]) << 2)
  610|  7.73M|                       + ((pu1_src[0] + pu1_src[off1]) << 4);
  611|  7.73M|            i2_temp = (i2_temp + 16) >> 5;
  612|  7.73M|            i2_temp = CLIP_U8(i2_temp);
  ------------------
  |  |   58|  7.73M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  7.73M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 16.7k, False: 7.71M]
  |  |  |  |  |  Branch (77:54): [True: 6.41k, False: 7.70M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  613|       |
  614|  7.73M|            *pu1_dst = (i2_temp + *pu1_pred1 + 1) >> 1;
  615|  7.73M|        }
  616|   548k|        pu1_src += src_strd - wd;
  617|   548k|        pu1_pred1 += src_strd - wd;
  618|   548k|        pu1_dst += dst_strd - wd;
  619|   548k|    }
  620|  41.0k|}
ih264_inter_pred_luma_horz_qpel_vert_qpel:
  679|  69.5k|{
  680|  69.5k|    WORD32 row, col;
  681|  69.5k|    WORD32 x_offset = dydx & 0x3;
  682|  69.5k|    WORD32 y_offset = dydx >> 2;
  683|       |
  684|  69.5k|    WORD32 off1, off2, off3;
  685|  69.5k|    UWORD8* pu1_pred_vert, *pu1_pred_horz;
  686|  69.5k|    UNUSED(pu1_tmp);
  ------------------
  |  |   45|  69.5k|#define UNUSED(x) ((void)(x))
  ------------------
  687|  69.5k|    y_offset = y_offset & 0x3;
  688|       |
  689|  69.5k|    off1 = src_strd;
  690|  69.5k|    off2 = src_strd << 1;
  691|  69.5k|    off3 = off1 + off2;
  692|       |
  693|  69.5k|    pu1_pred_horz = pu1_src + (y_offset >> 1) * src_strd;
  694|  69.5k|    pu1_pred_vert = pu1_src + (x_offset >> 1);
  695|       |
  696|  1.00M|    for(row = 0; row < ht; row++)
  ------------------
  |  Branch (696:18): [True: 937k, False: 69.5k]
  ------------------
  697|   937k|    {
  698|  14.1M|        for(col = 0; col < wd;
  ------------------
  |  Branch (698:22): [True: 13.2M, False: 937k]
  ------------------
  699|  13.2M|                        col++, pu1_dst++, pu1_pred_vert++, pu1_pred_horz++)
  700|  13.2M|        {
  701|  13.2M|            WORD16 i2_temp_vert, i2_temp_horz;
  702|       |            /* The logic below implements the following equation
  703|       |             i2_temp = puc_pred[-2*src_strd] + puc_pred[3*src_strd] -
  704|       |             5 * (puc_pred[-1*src_strd] + puc_pred[2*src_strd])  +
  705|       |             20 * (puc_pred[0] + puc_pred[src_strd]); */
  706|  13.2M|            i2_temp_vert = pu1_pred_vert[-off2] + pu1_pred_vert[off3]
  707|  13.2M|                            - (pu1_pred_vert[-off1] + pu1_pred_vert[off2])
  708|  13.2M|                            + ((pu1_pred_vert[0] + pu1_pred_vert[off1]
  709|  13.2M|                                            - pu1_pred_vert[-off1]
  710|  13.2M|                                            - pu1_pred_vert[off2]) << 2)
  711|  13.2M|                            + ((pu1_pred_vert[0] + pu1_pred_vert[off1]) << 4);
  712|  13.2M|            i2_temp_vert = (i2_temp_vert + 16) >> 5;
  713|  13.2M|            i2_temp_vert = CLIP_U8(i2_temp_vert);
  ------------------
  |  |   58|  13.2M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  13.2M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 31.0k, False: 13.2M]
  |  |  |  |  |  Branch (77:54): [True: 191k, False: 13.0M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  714|       |
  715|       |            /* The logic below implements the following equation
  716|       |             i16_temp = puc_pred[-2] - 5 * (puc_pred[-1] + puc_pred[2]) +
  717|       |             20 * (puc_pred[0] + puc_pred[1]) + puc_pred[3]; */
  718|  13.2M|            i2_temp_horz = pu1_pred_horz[-2] + pu1_pred_horz[3]
  719|  13.2M|                            - (pu1_pred_horz[-1] + pu1_pred_horz[2])
  720|  13.2M|                            + ((pu1_pred_horz[0] + pu1_pred_horz[1]
  721|  13.2M|                                            - pu1_pred_horz[-1]
  722|  13.2M|                                            - pu1_pred_horz[2]) << 2)
  723|  13.2M|                            + ((pu1_pred_horz[0] + pu1_pred_horz[1]) << 4);
  724|  13.2M|            i2_temp_horz = (i2_temp_horz + 16) >> 5;
  725|  13.2M|            i2_temp_horz = CLIP_U8(i2_temp_horz);
  ------------------
  |  |   58|  13.2M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  13.2M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 12.3k, False: 13.2M]
  |  |  |  |  |  Branch (77:54): [True: 80.0k, False: 13.1M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  726|  13.2M|            *pu1_dst = (i2_temp_vert + i2_temp_horz + 1) >> 1;
  727|  13.2M|        }
  728|   937k|        pu1_pred_vert += (src_strd - wd);
  729|   937k|        pu1_pred_horz += (src_strd - wd);
  730|   937k|        pu1_dst += (dst_strd - wd);
  731|   937k|    }
  732|  69.5k|}
ih264_inter_pred_luma_horz_qpel_vert_hpel:
  792|  30.2k|{
  793|  30.2k|    WORD32 row, col;
  794|  30.2k|    WORD32 tmp;
  795|  30.2k|    WORD16* pi2_pred1_temp, *pi2_pred1;
  796|  30.2k|    UWORD8* pu1_dst_tmp;
  797|  30.2k|    WORD32 x_offset = dydx & 0x3;
  798|  30.2k|    WORD16 i2_macro;
  799|       |
  800|  30.2k|    pi2_pred1_temp = (WORD16*)pu1_tmp;
  801|  30.2k|    pi2_pred1_temp += 2;
  802|  30.2k|    pi2_pred1 = pi2_pred1_temp;
  803|  30.2k|    pu1_dst_tmp = pu1_dst;
  804|       |
  805|   450k|    for(row = 0; row < ht; row++)
  ------------------
  |  Branch (805:18): [True: 420k, False: 30.2k]
  ------------------
  806|   420k|    {
  807|  8.59M|        for(col = -2; col < wd + 3; col++)
  ------------------
  |  Branch (807:23): [True: 8.17M, False: 420k]
  ------------------
  808|  8.17M|        {
  809|  8.17M|            tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
  810|  8.17M|            tmp = ih264_g_six_tap[0] *
  811|  8.17M|                            (pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd])
  812|  8.17M|                  + ih264_g_six_tap[1] *
  813|  8.17M|                            (pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd])
  814|  8.17M|                  + ih264_g_six_tap[2] *
  815|  8.17M|                            (pu1_src[col] + pu1_src[col + 1 * src_strd]);
  816|  8.17M|            pi2_pred1_temp[col] = tmp;
  817|  8.17M|        }
  818|       |
  819|   420k|        pu1_src += src_strd;
  820|   420k|        pi2_pred1_temp = pi2_pred1_temp + wd + 5;
  821|   420k|    }
  822|       |
  823|  30.2k|    pi2_pred1_temp = pi2_pred1;
  824|   450k|    for(row = 0; row < ht; row++)
  ------------------
  |  Branch (824:18): [True: 420k, False: 30.2k]
  ------------------
  825|   420k|    {
  826|  6.49M|        for(col = 0; col < wd; col++)
  ------------------
  |  Branch (826:22): [True: 6.07M, False: 420k]
  ------------------
  827|  6.07M|        {
  828|  6.07M|            tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
  829|  6.07M|            tmp = ih264_g_six_tap[0] *
  830|  6.07M|                            (pi2_pred1[col - 2] + pi2_pred1[col + 3])
  831|  6.07M|                  + ih264_g_six_tap[1] *
  832|  6.07M|                            (pi2_pred1[col - 1] + pi2_pred1[col + 2])
  833|  6.07M|                  + ih264_g_six_tap[2] *
  834|  6.07M|                            (pi2_pred1[col] + pi2_pred1[col + 1]);
  835|  6.07M|            tmp = (tmp + 512) >> 10;
  836|  6.07M|            pu1_dst[col] = CLIP_U8(tmp);
  ------------------
  |  |   58|  6.07M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  6.07M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 70.3k, False: 6.00M]
  |  |  |  |  |  Branch (77:54): [True: 1.06k, False: 6.00M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  837|  6.07M|        }
  838|   420k|        pi2_pred1 += (wd + 5);
  839|   420k|        pu1_dst += dst_strd;
  840|   420k|    }
  841|       |
  842|  30.2k|    pu1_dst = pu1_dst_tmp;
  843|  30.2k|    pi2_pred1_temp += (x_offset >> 1);
  844|   450k|    for(row = ht; row != 0; row--)
  ------------------
  |  Branch (844:19): [True: 420k, False: 30.2k]
  ------------------
  845|   420k|    {
  846|  6.49M|        for(col = wd; col != 0; col--, pu1_dst++, pi2_pred1_temp++)
  ------------------
  |  Branch (846:23): [True: 6.07M, False: 420k]
  ------------------
  847|  6.07M|        {
  848|  6.07M|            UWORD8 uc_temp;
  849|       |            /* Clipping the output of the six tap filter obtained from the
  850|       |             first stage of the 2d filter stage */
  851|  6.07M|            *pi2_pred1_temp = (*pi2_pred1_temp + 16) >> 5;
  852|  6.07M|            i2_macro = (*pi2_pred1_temp);
  853|  6.07M|            uc_temp = CLIP_U8(i2_macro);
  ------------------
  |  |   58|  6.07M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  6.07M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 63.3k, False: 6.01M]
  |  |  |  |  |  Branch (77:54): [True: 1.06k, False: 6.01M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  854|  6.07M|            *pu1_dst = (*pu1_dst + uc_temp + 1) >> 1;
  855|  6.07M|        }
  856|   420k|        pi2_pred1_temp += 5;
  857|   420k|        pu1_dst += dst_strd - wd;
  858|   420k|    }
  859|  30.2k|}
ih264_inter_pred_luma_horz_hpel_vert_qpel:
  957|  51.3k|{
  958|       |
  959|  51.3k|    WORD32 row, col;
  960|  51.3k|    WORD32 tmp;
  961|  51.3k|    WORD32 y_offset = dydx >> 2;
  962|  51.3k|    WORD16* pi2_pred1_temp, *pi2_pred1;
  963|  51.3k|    UWORD8* pu1_dst_tmp;
  964|       |    //WORD32 x_offset = dydx & 0x3;
  965|  51.3k|    WORD16 i2_macro;
  966|       |
  967|  51.3k|    y_offset = y_offset & 0x3;
  968|       |
  969|  51.3k|    pi2_pred1_temp = (WORD16*)pu1_tmp;
  970|  51.3k|    pi2_pred1_temp += 2 * wd;
  971|  51.3k|    pi2_pred1 = pi2_pred1_temp;
  972|  51.3k|    pu1_dst_tmp = pu1_dst;
  973|  51.3k|    pu1_src -= 2 * src_strd;
  974|  1.04M|    for(row = -2; row < ht + 3; row++)
  ------------------
  |  Branch (974:19): [True: 992k, False: 51.3k]
  ------------------
  975|   992k|    {
  976|  15.3M|        for(col = 0; col < wd; col++)
  ------------------
  |  Branch (976:22): [True: 14.4M, False: 992k]
  ------------------
  977|  14.4M|        {
  978|  14.4M|            tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
  979|  14.4M|            tmp = ih264_g_six_tap[0] * (pu1_src[col - 2] + pu1_src[col + 3])
  980|  14.4M|                  + ih264_g_six_tap[1] * (pu1_src[col - 1] + pu1_src[col + 2])
  981|  14.4M|                  + ih264_g_six_tap[2] * (pu1_src[col] + pu1_src[col + 1]);
  982|  14.4M|            pi2_pred1_temp[col - 2 * wd] = tmp;
  983|  14.4M|        }
  984|       |
  985|   992k|        pu1_src += src_strd;
  986|   992k|        pi2_pred1_temp += wd;
  987|   992k|    }
  988|  51.3k|    pi2_pred1_temp = pi2_pred1;
  989|   786k|    for(row = 0; row < ht; row++)
  ------------------
  |  Branch (989:18): [True: 735k, False: 51.3k]
  ------------------
  990|   735k|    {
  991|  11.5M|        for(col = 0; col < wd; col++)
  ------------------
  |  Branch (991:22): [True: 10.8M, False: 735k]
  ------------------
  992|  10.8M|        {
  993|  10.8M|            tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
  994|  10.8M|            tmp = ih264_g_six_tap[0] * (pi2_pred1[col - 2 * wd] + pi2_pred1[col + 3 * wd])
  995|  10.8M|                  + ih264_g_six_tap[1] * (pi2_pred1[col - 1 * wd] + pi2_pred1[col + 2 * wd])
  996|  10.8M|                  + ih264_g_six_tap[2] * (pi2_pred1[col] + pi2_pred1[col + 1 * wd]);
  997|  10.8M|            tmp = (tmp + 512) >> 10;
  998|  10.8M|            pu1_dst[col] = CLIP_U8(tmp);
  ------------------
  |  |   58|  10.8M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  10.8M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 19.4k, False: 10.8M]
  |  |  |  |  |  Branch (77:54): [True: 33.3k, False: 10.7M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  999|  10.8M|        }
 1000|   735k|        pi2_pred1 += wd;
 1001|   735k|        pu1_dst += dst_strd;
 1002|   735k|    }
 1003|  51.3k|    pu1_dst = pu1_dst_tmp;
 1004|  51.3k|    pi2_pred1_temp += (y_offset >> 1) * wd;
 1005|   786k|    for(row = ht; row != 0; row--)
  ------------------
  |  Branch (1005:19): [True: 735k, False: 51.3k]
  ------------------
 1006|       |
 1007|   735k|    {
 1008|  11.5M|        for(col = wd; col != 0; col--, pu1_dst++, pi2_pred1_temp++)
  ------------------
  |  Branch (1008:23): [True: 10.8M, False: 735k]
  ------------------
 1009|  10.8M|        {
 1010|  10.8M|            UWORD8 u1_temp;
 1011|       |            /* Clipping the output of the six tap filter obtained from the
 1012|       |             first stage of the 2d filter stage */
 1013|  10.8M|            *pi2_pred1_temp = (*pi2_pred1_temp + 16) >> 5;
 1014|  10.8M|            i2_macro = (*pi2_pred1_temp);
 1015|  10.8M|            u1_temp = CLIP_U8(i2_macro);
  ------------------
  |  |   58|  10.8M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  10.8M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 7.98k, False: 10.8M]
  |  |  |  |  |  Branch (77:54): [True: 25.3k, False: 10.7M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 1016|  10.8M|            *pu1_dst = (*pu1_dst + u1_temp + 1) >> 1;
 1017|  10.8M|        }
 1018|       |        //pi16_pred1_temp += wd;
 1019|   735k|        pu1_dst += dst_strd - wd;
 1020|   735k|    }
 1021|  51.3k|}
ih264_inter_pred_chroma:
 1141|  3.29M|{
 1142|  3.29M|    WORD32 row, col;
 1143|  3.29M|    WORD16 i2_tmp;
 1144|       |
 1145|  29.0M|    for(row = 0; row < ht; row++)
  ------------------
  |  Branch (1145:18): [True: 25.7M, False: 3.29M]
  ------------------
 1146|  25.7M|    {
 1147|   431M|        for(col = 0; col < 2 * wd; col++)
  ------------------
  |  Branch (1147:22): [True: 405M, False: 25.7M]
  ------------------
 1148|   405M|        {
 1149|   405M|            i2_tmp = 0; /* applies equation (8-266) in section 8.4.2.2.2 */
 1150|   405M|            i2_tmp = (8 - dx) * (8 - dy) * pu1_src[col]
 1151|   405M|                     + (dx) * (8 - dy) * pu1_src[col + 2]
 1152|   405M|                     + (8 - dx) * (dy) * (pu1_src + src_strd)[col]
 1153|   405M|                     + (dx) * (dy) * (pu1_src + src_strd)[col + 2];
 1154|   405M|            i2_tmp = (i2_tmp + 32) >> 6;
 1155|   405M|            pu1_dst[col] = CLIP_U8(i2_tmp);
  ------------------
  |  |   58|   405M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   405M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 0, False: 405M]
  |  |  |  |  |  Branch (77:54): [True: 0, False: 405M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 1156|   405M|        }
 1157|  25.7M|        pu1_src += src_strd;
 1158|  25.7M|        pu1_dst += dst_strd;
 1159|  25.7M|    }
 1160|  3.29M|}

ih264_iquant_itrans_recon_4x4:
  126|  56.6k|{
  127|  56.6k|    WORD16 *pi2_src_ptr = pi2_src;
  128|  56.6k|    WORD16 *pi2_tmp_ptr = pi2_tmp;
  129|  56.6k|    UWORD8 *pu1_pred_ptr = pu1_pred;
  130|  56.6k|    UWORD8 *pu1_out_ptr = pu1_out;
  131|  56.6k|    WORD16 x0, x1, x2, x3, i;
  132|  56.6k|    WORD32 q0, q1, q2, q3;
  133|  56.6k|    WORD16 i_macro;
  134|  56.6k|    WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
  ------------------
  |  Branch (134:23): [True: 8.35k, False: 48.3k]
  ------------------
  135|       |
  136|       |    /* inverse quant */
  137|       |    /* horizontal inverse transform */
  138|   283k|    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
  ------------------
  |  |   48|   283k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  |  Branch (138:16): [True: 226k, False: 56.6k]
  ------------------
  139|   226k|    {
  140|   226k|        q0 = pi2_src_ptr[0];
  141|   226k|        INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|   226k|                {\
  |  |  104|   226k|                    i4_value *= quant_scale;\
  |  |  105|   226k|                    i4_value *= weight_scale;\
  |  |  106|   226k|                    i4_value += rndfactor;\
  |  |  107|   226k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|   226k|                    i4_value >>= qbits;\
  |  |  109|   226k|                }
  ------------------
  142|       |        /* Restoring dc value for intra case */
  143|   226k|        if (i==0 && iq_start_idx == 1)
  ------------------
  |  Branch (143:13): [True: 56.6k, False: 170k]
  |  Branch (143:21): [True: 5.38k, False: 51.2k]
  ------------------
  144|  5.38k|        {
  145|  5.38k|            q0 = pi2_dc_ld_addr[0];
  146|  5.38k|        }
  147|       |
  148|   226k|        q2 = pi2_src_ptr[2];
  149|   226k|        INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|   226k|                {\
  |  |  104|   226k|                    i4_value *= quant_scale;\
  |  |  105|   226k|                    i4_value *= weight_scale;\
  |  |  106|   226k|                    i4_value += rndfactor;\
  |  |  107|   226k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|   226k|                    i4_value >>= qbits;\
  |  |  109|   226k|                }
  ------------------
  150|       |
  151|   226k|        x0 = q0 + q2;
  152|   226k|        x1 = q0 - q2;
  153|       |
  154|   226k|        q1 = pi2_src_ptr[1];
  155|   226k|        INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|   226k|                {\
  |  |  104|   226k|                    i4_value *= quant_scale;\
  |  |  105|   226k|                    i4_value *= weight_scale;\
  |  |  106|   226k|                    i4_value += rndfactor;\
  |  |  107|   226k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|   226k|                    i4_value >>= qbits;\
  |  |  109|   226k|                }
  ------------------
  156|       |
  157|   226k|        q3 = pi2_src_ptr[3];
  158|   226k|        INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|   226k|                {\
  |  |  104|   226k|                    i4_value *= quant_scale;\
  |  |  105|   226k|                    i4_value *= weight_scale;\
  |  |  106|   226k|                    i4_value += rndfactor;\
  |  |  107|   226k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|   226k|                    i4_value >>= qbits;\
  |  |  109|   226k|                }
  ------------------
  159|       |
  160|   226k|        x2 = (q1 >> 1) - q3;
  161|   226k|        x3 = q1 + (q3 >> 1);
  162|       |
  163|   226k|        pi2_tmp_ptr[0] = x0 + x3;
  164|   226k|        pi2_tmp_ptr[1] = x1 + x2;
  165|   226k|        pi2_tmp_ptr[2] = x1 - x2;
  166|   226k|        pi2_tmp_ptr[3] = x0 - x3;
  167|       |
  168|   226k|        pi2_src_ptr += SUB_BLK_WIDTH_4x4;
  ------------------
  |  |   48|   226k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  169|   226k|        pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
  ------------------
  |  |   48|   226k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  170|   226k|        pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
  ------------------
  |  |   48|   226k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  171|   226k|        pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
  ------------------
  |  |   48|   226k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  172|   226k|    }
  173|       |
  174|       |    /* vertical inverse transform */
  175|  56.6k|    pi2_tmp_ptr = pi2_tmp;
  176|   283k|    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
  ------------------
  |  |   48|   283k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  |  Branch (176:16): [True: 226k, False: 56.6k]
  ------------------
  177|   226k|    {
  178|   226k|        pu1_pred_ptr = pu1_pred;
  179|   226k|        pu1_out = pu1_out_ptr;
  180|       |
  181|   226k|        x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
  182|   226k|        x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
  183|   226k|        x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
  184|   226k|        x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
  185|       |
  186|       |        /* inverse prediction */
  187|   226k|        i_macro = x0 + x3;
  188|   226k|        i_macro = ((i_macro + 32) >> 6);
  189|   226k|        i_macro += *pu1_pred_ptr;
  190|   226k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|   226k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   226k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 66.9k, False: 159k]
  |  |  |  |  |  Branch (77:54): [True: 19.6k, False: 140k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  191|   226k|        pu1_pred_ptr += pred_strd;
  192|   226k|        pu1_out += out_strd;
  193|       |
  194|   226k|        i_macro = x1 + x2;
  195|   226k|        i_macro = ((i_macro + 32) >> 6);
  196|   226k|        i_macro += *pu1_pred_ptr;
  197|   226k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|   226k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   226k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 75.6k, False: 151k]
  |  |  |  |  |  Branch (77:54): [True: 22.1k, False: 128k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  198|   226k|        pu1_pred_ptr += pred_strd;
  199|   226k|        pu1_out += out_strd;
  200|       |
  201|   226k|        i_macro = x1 - x2;
  202|   226k|        i_macro = ((i_macro + 32) >> 6);
  203|   226k|        i_macro += *pu1_pred_ptr;
  204|   226k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|   226k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   226k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 66.7k, False: 159k]
  |  |  |  |  |  Branch (77:54): [True: 17.7k, False: 142k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  205|   226k|        pu1_pred_ptr += pred_strd;
  206|   226k|        pu1_out += out_strd;
  207|       |
  208|   226k|        i_macro = x0 - x3;
  209|   226k|        i_macro = ((i_macro + 32) >> 6);
  210|   226k|        i_macro += *pu1_pred_ptr;
  211|   226k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|   226k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   226k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 77.6k, False: 149k]
  |  |  |  |  |  Branch (77:54): [True: 27.8k, False: 121k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  212|       |
  213|   226k|        pi2_tmp_ptr++;
  214|   226k|        pu1_out_ptr++;
  215|   226k|        pu1_pred++;
  216|   226k|    }
  217|  56.6k|}
ih264_iquant_itrans_recon_4x4_dc:
  280|   371k|{
  281|   371k|    UWORD8 *pu1_pred_ptr = pu1_pred;
  282|   371k|    UWORD8 *pu1_out_ptr = pu1_out;
  283|   371k|    WORD32 q0;
  284|   371k|    WORD16 x, i_macro, i;
  285|   371k|    WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
  ------------------
  |  Branch (285:23): [True: 82.3k, False: 289k]
  ------------------
  286|       |
  287|   371k|    UNUSED(pi2_tmp);
  ------------------
  |  |   45|   371k|#define UNUSED(x) ((void)(x))
  ------------------
  288|   371k|    if(iq_start_idx == 0)
  ------------------
  |  Branch (288:8): [True: 15.7k, False: 355k]
  ------------------
  289|  15.7k|    {
  290|  15.7k|        q0 = pi2_src[0];
  291|  15.7k|        INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|  15.7k|                {\
  |  |  104|  15.7k|                    i4_value *= quant_scale;\
  |  |  105|  15.7k|                    i4_value *= weight_scale;\
  |  |  106|  15.7k|                    i4_value += rndfactor;\
  |  |  107|  15.7k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|  15.7k|                    i4_value >>= qbits;\
  |  |  109|  15.7k|                }
  ------------------
  292|  15.7k|    }
  293|   355k|    else
  294|   355k|    {
  295|   355k|        q0 = pi2_dc_ld_addr[0]; // Restoring dc value for intra case3
  296|   355k|    }
  297|   371k|    i_macro = ((q0 + 32) >> 6);
  298|  1.85M|    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
  ------------------
  |  |   48|  1.85M|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  |  Branch (298:16): [True: 1.48M, False: 371k]
  ------------------
  299|  1.48M|    {
  300|  1.48M|        pu1_pred_ptr = pu1_pred;
  301|  1.48M|        pu1_out = pu1_out_ptr;
  302|       |
  303|       |        /* inverse prediction */
  304|  1.48M|        x = i_macro + *pu1_pred_ptr;
  305|  1.48M|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|  1.48M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  1.48M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 195k, False: 1.28M]
  |  |  |  |  |  Branch (77:54): [True: 216k, False: 1.07M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  306|  1.48M|        pu1_pred_ptr += pred_strd;
  307|  1.48M|        pu1_out += out_strd;
  308|       |
  309|  1.48M|        x = i_macro + *pu1_pred_ptr;
  310|  1.48M|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|  1.48M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  1.48M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 196k, False: 1.28M]
  |  |  |  |  |  Branch (77:54): [True: 216k, False: 1.07M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  311|  1.48M|        pu1_pred_ptr += pred_strd;
  312|  1.48M|        pu1_out += out_strd;
  313|       |
  314|  1.48M|        x = i_macro + *pu1_pred_ptr;
  315|  1.48M|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|  1.48M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  1.48M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 196k, False: 1.28M]
  |  |  |  |  |  Branch (77:54): [True: 217k, False: 1.07M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  316|  1.48M|        pu1_pred_ptr += pred_strd;
  317|  1.48M|        pu1_out += out_strd;
  318|       |
  319|  1.48M|        x = i_macro + *pu1_pred_ptr;
  320|  1.48M|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|  1.48M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  1.48M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 196k, False: 1.28M]
  |  |  |  |  |  Branch (77:54): [True: 218k, False: 1.07M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  321|       |
  322|  1.48M|        pu1_out_ptr++;
  323|  1.48M|        pu1_pred++;
  324|  1.48M|    }
  325|   371k|}
ih264_iquant_itrans_recon_8x8:
  389|  25.1k|{
  390|  25.1k|    WORD32 i;
  391|  25.1k|    WORD16 *pi2_tmp_ptr = pi2_tmp;
  392|  25.1k|    UWORD8 *pu1_pred_ptr = pu1_pred;
  393|  25.1k|    UWORD8 *pu1_out_ptr = pu1_out;
  394|  25.1k|    WORD16 i_z0, i_z1, i_z2, i_z3, i_z4, i_z5, i_z6, i_z7;
  395|  25.1k|    WORD16 i_y0, i_y1, i_y2, i_y3, i_y4, i_y5, i_y6, i_y7;
  396|  25.1k|    WORD16 i_macro;
  397|  25.1k|    WORD32 q;
  398|  25.1k|    WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
  ------------------
  |  Branch (398:23): [True: 17.0k, False: 8.15k]
  ------------------
  399|       |
  400|  25.1k|    UNUSED(iq_start_idx);
  ------------------
  |  |   45|  25.1k|#define UNUSED(x) ((void)(x))
  ------------------
  401|  25.1k|    UNUSED(pi2_dc_ld_addr);
  ------------------
  |  |   45|  25.1k|#define UNUSED(x) ((void)(x))
  ------------------
  402|       |    /*************************************************************/
  403|       |    /* De quantization of coefficients. Will be replaced by SIMD */
  404|       |    /* operations on platform. Note : DC coeff is not scaled     */
  405|       |    /*************************************************************/
  406|  1.63M|    for(i = 0; i < (SUB_BLK_WIDTH_8x8 * SUB_BLK_WIDTH_8x8); i++)
  ------------------
  |  |   53|  1.63M|#define SUB_BLK_WIDTH_8x8                   8
  ------------------
                  for(i = 0; i < (SUB_BLK_WIDTH_8x8 * SUB_BLK_WIDTH_8x8); i++)
  ------------------
  |  |   53|  1.63M|#define SUB_BLK_WIDTH_8x8                   8
  ------------------
  |  Branch (406:16): [True: 1.61M, False: 25.1k]
  ------------------
  407|  1.61M|    {
  408|  1.61M|        q = pi2_src[i];
  409|  1.61M|        INV_QUANT(q, pu2_iscale_mat[i], pu2_weigh_mat[i], qp_div, rnd_fact, 6);
  ------------------
  |  |  103|  1.61M|                {\
  |  |  104|  1.61M|                    i4_value *= quant_scale;\
  |  |  105|  1.61M|                    i4_value *= weight_scale;\
  |  |  106|  1.61M|                    i4_value += rndfactor;\
  |  |  107|  1.61M|                    i4_value <<= u4_qp_div_6;\
  |  |  108|  1.61M|                    i4_value >>= qbits;\
  |  |  109|  1.61M|                }
  ------------------
  410|  1.61M|        pi2_tmp_ptr[i] = q;
  411|  1.61M|    }
  412|       |    /* Perform Inverse transform */
  413|       |    /*--------------------------------------------------------------------*/
  414|       |    /* IDCT [ Horizontal transformation ]                                 */
  415|       |    /*--------------------------------------------------------------------*/
  416|   226k|    for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
  ------------------
  |  |   53|   226k|#define SUB_BLK_WIDTH_8x8                   8
  ------------------
  |  Branch (416:16): [True: 201k, False: 25.1k]
  ------------------
  417|   201k|    {
  418|       |        /*------------------------------------------------------------------*/
  419|       |        /* y0 = w0 + w4                                                     */
  420|       |        /* y1 = -w3 + w5 - w7 - (w7 >> 1)                                   */
  421|       |        /* y2 = w0 - w4                                                     */
  422|       |        /* y3 = w1 + w7 - w3 - (w3 >> 1)                                    */
  423|       |        /* y4 = (w2 >> 1) - w6                                              */
  424|       |        /* y5 = -w1 + w7 + w5 + (w5 >> 1)                                   */
  425|       |        /* y6 = w2 + (w6 >> 1)                                              */
  426|       |        /* y7 = w3 + w5 + w1 + (w1 >> 1)                                    */
  427|       |        /*------------------------------------------------------------------*/
  428|   201k|        i_y0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[4] );
  429|       |
  430|   201k|        i_y1 = ((WORD32)(-pi2_tmp_ptr[3]) + pi2_tmp_ptr[5] - pi2_tmp_ptr[7]
  431|   201k|                        - (pi2_tmp_ptr[7] >> 1));
  432|       |
  433|   201k|        i_y2 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[4] );
  434|       |
  435|   201k|        i_y3 = ((WORD32)pi2_tmp_ptr[1] + pi2_tmp_ptr[7] - pi2_tmp_ptr[3]
  436|   201k|                        - (pi2_tmp_ptr[3] >> 1));
  437|       |
  438|   201k|        i_y4 = ((pi2_tmp_ptr[2] >> 1) - pi2_tmp_ptr[6] );
  439|       |
  440|   201k|        i_y5 = ((WORD32)(-pi2_tmp_ptr[1]) + pi2_tmp_ptr[7] + pi2_tmp_ptr[5]
  441|   201k|                        + (pi2_tmp_ptr[5] >> 1));
  442|       |
  443|   201k|        i_y6 = (pi2_tmp_ptr[2] + (pi2_tmp_ptr[6] >> 1));
  444|       |
  445|   201k|        i_y7 = ((WORD32)pi2_tmp_ptr[3] + pi2_tmp_ptr[5] + pi2_tmp_ptr[1]
  446|   201k|                        + (pi2_tmp_ptr[1] >> 1));
  447|       |
  448|       |        /*------------------------------------------------------------------*/
  449|       |        /* z0 = y0 + y6                                                     */
  450|       |        /* z1 = y1 + (y7 >> 2)                                              */
  451|       |        /* z2 = y2 + y4                                                     */
  452|       |        /* z3 = y3 + (y5 >> 2)                                              */
  453|       |        /* z4 = y2 - y4                                                     */
  454|       |        /* z5 = (y3 >> 2) - y5                                              */
  455|       |        /* z6 = y0 - y6                                                     */
  456|       |        /* z7 = y7 - (y1 >> 2)                                              */
  457|       |        /*------------------------------------------------------------------*/
  458|   201k|        i_z0 = i_y0 + i_y6;
  459|   201k|        i_z1 = i_y1 + (i_y7 >> 2);
  460|   201k|        i_z2 = i_y2 + i_y4;
  461|   201k|        i_z3 = i_y3 + (i_y5 >> 2);
  462|   201k|        i_z4 = i_y2 - i_y4;
  463|   201k|        i_z5 = (i_y3 >> 2) - i_y5;
  464|   201k|        i_z6 = i_y0 - i_y6;
  465|   201k|        i_z7 = i_y7 - (i_y1 >> 2);
  466|       |
  467|       |        /*------------------------------------------------------------------*/
  468|       |        /* x0 = z0 + z7                                                     */
  469|       |        /* x1 = z2 + z5                                                     */
  470|       |        /* x2 = z4 + z3                                                     */
  471|       |        /* x3 = z6 + z1                                                     */
  472|       |        /* x4 = z6 - z1                                                     */
  473|       |        /* x5 = z4 - z3                                                     */
  474|       |        /* x6 = z2 - z5                                                     */
  475|       |        /* x7 = z0 - z7                                                     */
  476|       |        /*------------------------------------------------------------------*/
  477|   201k|        pi2_tmp_ptr[0] = i_z0 + i_z7;
  478|   201k|        pi2_tmp_ptr[1] = i_z2 + i_z5;
  479|   201k|        pi2_tmp_ptr[2] = i_z4 + i_z3;
  480|   201k|        pi2_tmp_ptr[3] = i_z6 + i_z1;
  481|   201k|        pi2_tmp_ptr[4] = i_z6 - i_z1;
  482|   201k|        pi2_tmp_ptr[5] = i_z4 - i_z3;
  483|   201k|        pi2_tmp_ptr[6] = i_z2 - i_z5;
  484|   201k|        pi2_tmp_ptr[7] = i_z0 - i_z7;
  485|       |
  486|       |        /* move to the next row */
  487|       |        //pi2_src_ptr += SUB_BLK_WIDTH_8x8;
  488|   201k|        pi2_tmp_ptr += SUB_BLK_WIDTH_8x8;
  ------------------
  |  |   53|   201k|#define SUB_BLK_WIDTH_8x8                   8
  ------------------
  489|   201k|    }
  490|       |
  491|       |    /*--------------------------------------------------------------------*/
  492|       |    /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6            */
  493|       |    /*                                                                    */
  494|       |    /* Add the prediction and store it back to reconstructed frame buffer */
  495|       |    /* [Prediction buffer itself in this case]                            */
  496|       |    /*--------------------------------------------------------------------*/
  497|  25.1k|    pi2_tmp_ptr = pi2_tmp;
  498|   226k|    for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
  ------------------
  |  |   53|   226k|#define SUB_BLK_WIDTH_8x8                   8
  ------------------
  |  Branch (498:16): [True: 201k, False: 25.1k]
  ------------------
  499|   201k|    {
  500|   201k|        pu1_pred_ptr = pu1_pred;
  501|   201k|        pu1_out = pu1_out_ptr;
  502|       |        /*------------------------------------------------------------------*/
  503|       |        /* y0j = w0j + w4j                                                  */
  504|       |        /* y1j = -w3j + w5j -w7j -(w7j >> 1)                                */
  505|       |        /* y2j = w0j -w4j                                                   */
  506|       |        /* y3j = w1j + w7j -w3j -(w3j >> 1)                                 */
  507|       |        /* y4j = ( w2j >> 1 ) -w6j                                          */
  508|       |        /* y5j = -w1j + w7j + w5j + (w5j >> 1)                              */
  509|       |        /* y6j = w2j + ( w6j >> 1 )                                         */
  510|       |        /* y7j = w3j + w5j + w1j + (w1j >> 1)                               */
  511|       |        /*------------------------------------------------------------------*/
  512|   201k|        i_y0 = pi2_tmp_ptr[0] + pi2_tmp_ptr[32];
  513|       |
  514|   201k|        i_y1 = (WORD32)(-pi2_tmp_ptr[24]) + pi2_tmp_ptr[40] - pi2_tmp_ptr[56]
  515|   201k|                        - (pi2_tmp_ptr[56] >> 1);
  516|       |
  517|   201k|        i_y2 = pi2_tmp_ptr[0] - pi2_tmp_ptr[32];
  518|       |
  519|   201k|        i_y3 = (WORD32)pi2_tmp_ptr[8] + pi2_tmp_ptr[56] - pi2_tmp_ptr[24]
  520|   201k|                        - (pi2_tmp_ptr[24] >> 1);
  521|       |
  522|   201k|        i_y4 = (pi2_tmp_ptr[16] >> 1) - pi2_tmp_ptr[48];
  523|       |
  524|   201k|        i_y5 = (WORD32)(-pi2_tmp_ptr[8]) + pi2_tmp_ptr[56] + pi2_tmp_ptr[40]
  525|   201k|                        + (pi2_tmp_ptr[40] >> 1);
  526|       |
  527|   201k|        i_y6 = pi2_tmp_ptr[16] + (pi2_tmp_ptr[48] >> 1);
  528|       |
  529|   201k|        i_y7 = (WORD32)pi2_tmp_ptr[24] + pi2_tmp_ptr[40] + pi2_tmp_ptr[8]
  530|   201k|                        + (pi2_tmp_ptr[8] >> 1);
  531|       |
  532|       |        /*------------------------------------------------------------------*/
  533|       |        /* z0j = y0j + y6j                                                  */
  534|       |        /* z1j = y1j + (y7j >> 2)                                           */
  535|       |        /* z2j = y2j + y4j                                                  */
  536|       |        /* z3j = y3j + (y5j >> 2)                                           */
  537|       |        /* z4j = y2j -y4j                                                   */
  538|       |        /* z5j = (y3j >> 2) -y5j                                            */
  539|       |        /* z6j = y0j -y6j                                                   */
  540|       |        /* z7j = y7j -(y1j >> 2)                                            */
  541|       |        /*------------------------------------------------------------------*/
  542|   201k|        i_z0 = i_y0 + i_y6;
  543|   201k|        i_z1 = i_y1 + (i_y7 >> 2);
  544|   201k|        i_z2 = i_y2 + i_y4;
  545|   201k|        i_z3 = i_y3 + (i_y5 >> 2);
  546|   201k|        i_z4 = i_y2 - i_y4;
  547|   201k|        i_z5 = (i_y3 >> 2) - i_y5;
  548|   201k|        i_z6 = i_y0 - i_y6;
  549|   201k|        i_z7 = i_y7 - (i_y1 >> 2);
  550|       |
  551|       |        /*------------------------------------------------------------------*/
  552|       |        /* x0j = z0j + z7j                                                  */
  553|       |        /* x1j = z2j + z5j                                                  */
  554|       |        /* x2j = z4j + z3j                                                  */
  555|       |        /* x3j = z6j + z1j                                                  */
  556|       |        /* x4j = z6j -z1j                                                   */
  557|       |        /* x5j = z4j -z3j                                                   */
  558|       |        /* x6j = z2j -z5j                                                   */
  559|       |        /* x7j = z0j -z7j                                                   */
  560|       |        /*------------------------------------------------------------------*/
  561|   201k|        i_macro = ((i_z0 + i_z7 + 32) >> 6) + *pu1_pred_ptr;
  562|   201k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|   201k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   201k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 32.2k, False: 169k]
  |  |  |  |  |  Branch (77:54): [True: 12.1k, False: 157k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  563|       |        /* Change uc_recBuffer to Point to next element in the same column*/
  564|   201k|        pu1_pred_ptr += pred_strd;
  565|   201k|        pu1_out += out_strd;
  566|       |
  567|   201k|        i_macro = ((i_z2 + i_z5 + 32) >> 6) + *pu1_pred_ptr;
  568|   201k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|   201k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   201k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 32.0k, False: 169k]
  |  |  |  |  |  Branch (77:54): [True: 11.3k, False: 158k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  569|   201k|        pu1_pred_ptr += pred_strd;
  570|   201k|        pu1_out += out_strd;
  571|       |
  572|   201k|        i_macro = ((i_z4 + i_z3 + 32) >> 6) + *pu1_pred_ptr;
  573|   201k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|   201k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   201k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 36.7k, False: 164k]
  |  |  |  |  |  Branch (77:54): [True: 10.7k, False: 154k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  574|   201k|        pu1_pred_ptr += pred_strd;
  575|   201k|        pu1_out += out_strd;
  576|       |
  577|   201k|        i_macro = ((i_z6 + i_z1 + 32) >> 6) + *pu1_pred_ptr;
  578|   201k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|   201k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   201k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 34.1k, False: 167k]
  |  |  |  |  |  Branch (77:54): [True: 9.61k, False: 157k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  579|   201k|        pu1_pred_ptr += pred_strd;
  580|   201k|        pu1_out += out_strd;
  581|       |
  582|   201k|        i_macro = ((i_z6 - i_z1 + 32) >> 6) + *pu1_pred_ptr;
  583|   201k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|   201k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   201k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 35.8k, False: 165k]
  |  |  |  |  |  Branch (77:54): [True: 9.79k, False: 155k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  584|   201k|        pu1_pred_ptr += pred_strd;
  585|   201k|        pu1_out += out_strd;
  586|       |
  587|   201k|        i_macro = ((i_z4 - i_z3 + 32) >> 6) + *pu1_pred_ptr;
  588|   201k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|   201k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   201k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 34.0k, False: 167k]
  |  |  |  |  |  Branch (77:54): [True: 9.50k, False: 158k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  589|   201k|        pu1_pred_ptr += pred_strd;
  590|   201k|        pu1_out += out_strd;
  591|       |
  592|   201k|        i_macro = ((i_z2 - i_z5 + 32) >> 6) + *pu1_pred_ptr;
  593|   201k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|   201k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   201k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 28.7k, False: 172k]
  |  |  |  |  |  Branch (77:54): [True: 9.56k, False: 163k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  594|   201k|        pu1_pred_ptr += pred_strd;
  595|   201k|        pu1_out += out_strd;
  596|       |
  597|   201k|        i_macro = ((i_z0 - i_z7 + 32) >> 6) + *pu1_pred_ptr;
  598|   201k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|   201k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   201k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 32.2k, False: 169k]
  |  |  |  |  |  Branch (77:54): [True: 10.9k, False: 158k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  599|       |
  600|   201k|        pi2_tmp_ptr++;
  601|   201k|        pu1_out_ptr++;
  602|   201k|        pu1_pred++;
  603|   201k|    }
  604|  25.1k|}
ih264_iquant_itrans_recon_8x8_dc:
  668|  11.6k|{
  669|  11.6k|    UWORD8 *pu1_pred_ptr = pu1_pred;
  670|  11.6k|    UWORD8 *pu1_out_ptr = pu1_out;
  671|  11.6k|    WORD16 x, i, i_macro;
  672|  11.6k|    WORD32 q;
  673|  11.6k|    WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
  ------------------
  |  Branch (673:23): [True: 4.52k, False: 7.16k]
  ------------------
  674|       |
  675|  11.6k|    UNUSED(pi2_tmp);
  ------------------
  |  |   45|  11.6k|#define UNUSED(x) ((void)(x))
  ------------------
  676|  11.6k|    UNUSED(iq_start_idx);
  ------------------
  |  |   45|  11.6k|#define UNUSED(x) ((void)(x))
  ------------------
  677|  11.6k|    UNUSED(pi2_dc_ld_addr);
  ------------------
  |  |   45|  11.6k|#define UNUSED(x) ((void)(x))
  ------------------
  678|       |    /*************************************************************/
  679|       |    /* Dequantization of coefficients. Will be replaced by SIMD  */
  680|       |    /* operations on platform. Note : DC coeff is not scaled     */
  681|       |    /*************************************************************/
  682|  11.6k|    q = pi2_src[0];
  683|  11.6k|    INV_QUANT(q, pu2_iscale_mat[0], pu2_weigh_mat[0], qp_div, rnd_fact, 6);
  ------------------
  |  |  103|  11.6k|                {\
  |  |  104|  11.6k|                    i4_value *= quant_scale;\
  |  |  105|  11.6k|                    i4_value *= weight_scale;\
  |  |  106|  11.6k|                    i4_value += rndfactor;\
  |  |  107|  11.6k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|  11.6k|                    i4_value >>= qbits;\
  |  |  109|  11.6k|                }
  ------------------
  684|  11.6k|    i_macro = (q + 32) >> 6;
  685|       |    /* Perform Inverse transform */
  686|       |    /*--------------------------------------------------------------------*/
  687|       |    /* IDCT [ Horizontal transformation ]                                 */
  688|       |    /*--------------------------------------------------------------------*/
  689|       |    /*--------------------------------------------------------------------*/
  690|       |    /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6            */
  691|       |    /*                                                                    */
  692|       |    /* Add the prediction and store it back to reconstructed frame buffer */
  693|       |    /* [Prediction buffer itself in this case]                            */
  694|       |    /*--------------------------------------------------------------------*/
  695|   105k|    for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
  ------------------
  |  |   53|   105k|#define SUB_BLK_WIDTH_8x8                   8
  ------------------
  |  Branch (695:16): [True: 93.5k, False: 11.6k]
  ------------------
  696|  93.5k|    {
  697|  93.5k|        pu1_pred_ptr = pu1_pred;
  698|  93.5k|        pu1_out = pu1_out_ptr;
  699|       |
  700|  93.5k|        x = i_macro + *pu1_pred_ptr;
  701|  93.5k|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|  93.5k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  93.5k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 3.95k, False: 89.5k]
  |  |  |  |  |  Branch (77:54): [True: 31.1k, False: 58.3k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  702|       |        /* Change uc_recBuffer to Point to next element in the same column*/
  703|  93.5k|        pu1_pred_ptr += pred_strd;
  704|  93.5k|        pu1_out += out_strd;
  705|       |
  706|  93.5k|        x = i_macro + *pu1_pred_ptr;
  707|  93.5k|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|  93.5k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  93.5k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 3.94k, False: 89.5k]
  |  |  |  |  |  Branch (77:54): [True: 31.2k, False: 58.3k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  708|  93.5k|        pu1_pred_ptr += pred_strd;
  709|  93.5k|        pu1_out += out_strd;
  710|       |
  711|  93.5k|        x = i_macro + *pu1_pred_ptr;
  712|  93.5k|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|  93.5k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  93.5k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 3.95k, False: 89.5k]
  |  |  |  |  |  Branch (77:54): [True: 31.2k, False: 58.3k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  713|  93.5k|        pu1_pred_ptr += pred_strd;
  714|  93.5k|        pu1_out += out_strd;
  715|       |
  716|  93.5k|        x = i_macro + *pu1_pred_ptr;
  717|  93.5k|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|  93.5k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  93.5k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 3.95k, False: 89.5k]
  |  |  |  |  |  Branch (77:54): [True: 31.1k, False: 58.3k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  718|  93.5k|        pu1_pred_ptr += pred_strd;
  719|  93.5k|        pu1_out += out_strd;
  720|       |
  721|  93.5k|        x = i_macro + *pu1_pred_ptr;
  722|  93.5k|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|  93.5k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  93.5k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 3.97k, False: 89.5k]
  |  |  |  |  |  Branch (77:54): [True: 31.1k, False: 58.3k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  723|  93.5k|        pu1_pred_ptr += pred_strd;
  724|  93.5k|        pu1_out += out_strd;
  725|       |
  726|  93.5k|        x = i_macro + *pu1_pred_ptr;
  727|  93.5k|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|  93.5k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  93.5k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 3.99k, False: 89.5k]
  |  |  |  |  |  Branch (77:54): [True: 31.2k, False: 58.3k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  728|  93.5k|        pu1_pred_ptr += pred_strd;
  729|  93.5k|        pu1_out += out_strd;
  730|       |
  731|  93.5k|        x = i_macro + *pu1_pred_ptr;
  732|  93.5k|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|  93.5k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  93.5k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 4.00k, False: 89.5k]
  |  |  |  |  |  Branch (77:54): [True: 31.1k, False: 58.3k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  733|  93.5k|        pu1_pred_ptr += pred_strd;
  734|  93.5k|        pu1_out += out_strd;
  735|       |
  736|  93.5k|        x = i_macro + *pu1_pred_ptr;
  737|  93.5k|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|  93.5k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  93.5k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 4.01k, False: 89.4k]
  |  |  |  |  |  Branch (77:54): [True: 31.2k, False: 58.2k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  738|       |
  739|  93.5k|        pu1_out_ptr++;
  740|  93.5k|        pu1_pred++;
  741|  93.5k|    }
  742|  11.6k|}
ih264_iquant_itrans_recon_chroma_4x4:
  801|  15.6k|{
  802|  15.6k|    WORD16 *pi2_src_ptr = pi2_src;
  803|  15.6k|    WORD16 *pi2_tmp_ptr = pi2_tmp;
  804|  15.6k|    UWORD8 *pu1_pred_ptr = pu1_pred;
  805|  15.6k|    UWORD8 *pu1_out_ptr = pu1_out;
  806|  15.6k|    WORD16 x0, x1, x2, x3, i;
  807|  15.6k|    WORD32 q0, q1, q2, q3;
  808|  15.6k|    WORD16 i_macro;
  809|  15.6k|    WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
  ------------------
  |  Branch (809:23): [True: 2.76k, False: 12.8k]
  ------------------
  810|       |
  811|       |    /* inverse quant */
  812|       |    /* horizontal inverse transform */
  813|  78.2k|    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
  ------------------
  |  |   48|  78.2k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  |  Branch (813:16): [True: 62.5k, False: 15.6k]
  ------------------
  814|  62.5k|    {
  815|  62.5k|        if(i == 0)
  ------------------
  |  Branch (815:12): [True: 15.6k, False: 46.9k]
  ------------------
  816|  15.6k|        {
  817|  15.6k|            q0 = pi2_dc_src[0];
  818|  15.6k|        }
  819|  46.9k|        else
  820|  46.9k|        {
  821|  46.9k|            q0 = pi2_src_ptr[0];
  822|  46.9k|            INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|  46.9k|                {\
  |  |  104|  46.9k|                    i4_value *= quant_scale;\
  |  |  105|  46.9k|                    i4_value *= weight_scale;\
  |  |  106|  46.9k|                    i4_value += rndfactor;\
  |  |  107|  46.9k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|  46.9k|                    i4_value >>= qbits;\
  |  |  109|  46.9k|                }
  ------------------
  823|  46.9k|        }
  824|       |
  825|  62.5k|        q2 = pi2_src_ptr[2];
  826|  62.5k|        INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|  62.5k|                {\
  |  |  104|  62.5k|                    i4_value *= quant_scale;\
  |  |  105|  62.5k|                    i4_value *= weight_scale;\
  |  |  106|  62.5k|                    i4_value += rndfactor;\
  |  |  107|  62.5k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|  62.5k|                    i4_value >>= qbits;\
  |  |  109|  62.5k|                }
  ------------------
  827|       |
  828|  62.5k|        x0 = q0 + q2;
  829|  62.5k|        x1 = q0 - q2;
  830|       |
  831|  62.5k|        q1 = pi2_src_ptr[1];
  832|  62.5k|        INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|  62.5k|                {\
  |  |  104|  62.5k|                    i4_value *= quant_scale;\
  |  |  105|  62.5k|                    i4_value *= weight_scale;\
  |  |  106|  62.5k|                    i4_value += rndfactor;\
  |  |  107|  62.5k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|  62.5k|                    i4_value >>= qbits;\
  |  |  109|  62.5k|                }
  ------------------
  833|       |
  834|  62.5k|        q3 = pi2_src_ptr[3];
  835|  62.5k|        INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|  62.5k|                {\
  |  |  104|  62.5k|                    i4_value *= quant_scale;\
  |  |  105|  62.5k|                    i4_value *= weight_scale;\
  |  |  106|  62.5k|                    i4_value += rndfactor;\
  |  |  107|  62.5k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|  62.5k|                    i4_value >>= qbits;\
  |  |  109|  62.5k|                }
  ------------------
  836|       |
  837|  62.5k|        x2 = (q1 >> 1) - q3;
  838|  62.5k|        x3 = q1 + (q3 >> 1);
  839|       |
  840|  62.5k|        pi2_tmp_ptr[0] = x0 + x3;
  841|  62.5k|        pi2_tmp_ptr[1] = x1 + x2;
  842|  62.5k|        pi2_tmp_ptr[2] = x1 - x2;
  843|  62.5k|        pi2_tmp_ptr[3] = x0 - x3;
  844|       |
  845|  62.5k|        pi2_src_ptr += SUB_BLK_WIDTH_4x4;
  ------------------
  |  |   48|  62.5k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  846|  62.5k|        pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
  ------------------
  |  |   48|  62.5k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  847|  62.5k|        pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
  ------------------
  |  |   48|  62.5k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  848|  62.5k|        pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
  ------------------
  |  |   48|  62.5k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  849|  62.5k|    }
  850|       |
  851|       |    /* vertical inverse transform */
  852|  15.6k|    pi2_tmp_ptr = pi2_tmp;
  853|  78.2k|    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
  ------------------
  |  |   48|  78.2k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  |  Branch (853:16): [True: 62.5k, False: 15.6k]
  ------------------
  854|  62.5k|    {
  855|  62.5k|        pu1_pred_ptr = pu1_pred;
  856|  62.5k|        pu1_out = pu1_out_ptr;
  857|       |
  858|  62.5k|        x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
  859|  62.5k|        x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
  860|  62.5k|        x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
  861|  62.5k|        x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
  862|       |
  863|       |        /* inverse prediction */
  864|  62.5k|        i_macro = x0 + x3;
  865|  62.5k|        i_macro = ((i_macro + 32) >> 6);
  866|  62.5k|        i_macro += *pu1_pred_ptr;
  867|  62.5k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|  62.5k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  62.5k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 13.7k, False: 48.8k]
  |  |  |  |  |  Branch (77:54): [True: 2.90k, False: 45.9k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  868|  62.5k|        pu1_pred_ptr += pred_strd;
  869|  62.5k|        pu1_out += out_strd;
  870|       |
  871|  62.5k|        i_macro = x1 + x2;
  872|  62.5k|        i_macro = ((i_macro + 32) >> 6);
  873|  62.5k|        i_macro += *pu1_pred_ptr;
  874|  62.5k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|  62.5k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  62.5k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 14.2k, False: 48.2k]
  |  |  |  |  |  Branch (77:54): [True: 2.74k, False: 45.5k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  875|  62.5k|        pu1_pred_ptr += pred_strd;
  876|  62.5k|        pu1_out += out_strd;
  877|       |
  878|  62.5k|        i_macro = x1 - x2;
  879|  62.5k|        i_macro = ((i_macro + 32) >> 6);
  880|  62.5k|        i_macro += *pu1_pred_ptr;
  881|  62.5k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|  62.5k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  62.5k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 13.0k, False: 49.5k]
  |  |  |  |  |  Branch (77:54): [True: 2.83k, False: 46.7k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  882|  62.5k|        pu1_pred_ptr += pred_strd;
  883|  62.5k|        pu1_out += out_strd;
  884|       |
  885|  62.5k|        i_macro = x0 - x3;
  886|  62.5k|        i_macro = ((i_macro + 32) >> 6);
  887|  62.5k|        i_macro += *pu1_pred_ptr;
  888|  62.5k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|  62.5k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  62.5k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 14.7k, False: 47.8k]
  |  |  |  |  |  Branch (77:54): [True: 2.91k, False: 44.9k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  889|       |
  890|  62.5k|        pi2_tmp_ptr++;
  891|  62.5k|        pu1_out_ptr += 2; // Interleaved store for output
  892|  62.5k|        pu1_pred += 2; // Interleaved load for pred buffer
  893|  62.5k|    }
  894|  15.6k|}
ih264_iquant_itrans_recon_chroma_4x4_dc:
  950|   128k|{
  951|   128k|    UWORD8 *pu1_pred_ptr = pu1_pred;
  952|   128k|    UWORD8 *pu1_out_ptr = pu1_out;
  953|   128k|    WORD32 q0;
  954|   128k|    WORD16 x, i_macro, i;
  955|       |
  956|   128k|    UNUSED(pi2_src);
  ------------------
  |  |   45|   128k|#define UNUSED(x) ((void)(x))
  ------------------
  957|   128k|    UNUSED(pu2_iscal_mat);
  ------------------
  |  |   45|   128k|#define UNUSED(x) ((void)(x))
  ------------------
  958|   128k|    UNUSED(pu2_weigh_mat);
  ------------------
  |  |   45|   128k|#define UNUSED(x) ((void)(x))
  ------------------
  959|   128k|    UNUSED(u4_qp_div_6);
  ------------------
  |  |   45|   128k|#define UNUSED(x) ((void)(x))
  ------------------
  960|   128k|    UNUSED(pi2_tmp);
  ------------------
  |  |   45|   128k|#define UNUSED(x) ((void)(x))
  ------------------
  961|       |
  962|   128k|    q0 = pi2_dc_src[0];    // Restoring dc value for intra case3
  963|   128k|    i_macro = ((q0 + 32) >> 6);
  964|       |
  965|   641k|    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
  ------------------
  |  |   48|   641k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  |  Branch (965:16): [True: 512k, False: 128k]
  ------------------
  966|   512k|    {
  967|   512k|        pu1_pred_ptr = pu1_pred;
  968|   512k|        pu1_out = pu1_out_ptr;
  969|       |
  970|       |        /* inverse prediction */
  971|   512k|        x = i_macro + *pu1_pred_ptr;
  972|   512k|        *pu1_out =  CLIP_U8(x);
  ------------------
  |  |   58|   512k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   512k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 134k, False: 378k]
  |  |  |  |  |  Branch (77:54): [True: 4.72k, False: 373k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  973|   512k|        pu1_pred_ptr += pred_strd;
  974|   512k|        pu1_out += out_strd;
  975|       |
  976|   512k|        x = i_macro + *pu1_pred_ptr;
  977|   512k|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|   512k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   512k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 135k, False: 377k]
  |  |  |  |  |  Branch (77:54): [True: 4.75k, False: 373k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  978|   512k|        pu1_pred_ptr += pred_strd;
  979|   512k|        pu1_out += out_strd;
  980|       |
  981|   512k|        x = i_macro + *pu1_pred_ptr;
  982|   512k|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|   512k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   512k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 135k, False: 377k]
  |  |  |  |  |  Branch (77:54): [True: 4.76k, False: 372k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  983|   512k|        pu1_pred_ptr += pred_strd;
  984|   512k|        pu1_out += out_strd;
  985|       |
  986|   512k|        x = i_macro + *pu1_pred_ptr;
  987|   512k|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|   512k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   512k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 134k, False: 377k]
  |  |  |  |  |  Branch (77:54): [True: 4.74k, False: 373k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  988|       |
  989|   512k|        pu1_out_ptr+=2;
  990|   512k|        pu1_pred+=2;
  991|   512k|    }
  992|   128k|}

ih264_intra_pred_luma_4x4_mode_vert:
  139|  71.0k|{
  140|  71.0k|    UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
  141|       |
  142|  71.0k|    UNUSED(src_strd);
  ------------------
  |  |   45|  71.0k|#define UNUSED(x) ((void)(x))
  ------------------
  143|  71.0k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  71.0k|#define UNUSED(x) ((void)(x))
  ------------------
  144|  71.0k|    pu1_top = pu1_src + BLK_SIZE + 1;
  ------------------
  |  |  511|  71.0k|#define BLK_SIZE            4
  ------------------
  145|  71.0k|    memcpy(pu1_dst, pu1_top, 4);
  146|  71.0k|    memcpy(pu1_dst + dst_strd, pu1_top, 4);
  147|  71.0k|    memcpy(pu1_dst + 2 * dst_strd, pu1_top, 4);
  148|  71.0k|    memcpy(pu1_dst + 3 * dst_strd, pu1_top, 4);
  149|  71.0k|}
ih264_intra_pred_luma_4x4_mode_horz:
  188|  15.3k|{
  189|  15.3k|    UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
  190|       |
  191|  15.3k|    UNUSED(src_strd);
  ------------------
  |  |   45|  15.3k|#define UNUSED(x) ((void)(x))
  ------------------
  192|  15.3k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  15.3k|#define UNUSED(x) ((void)(x))
  ------------------
  193|  15.3k|    pu1_left = pu1_src + BLK_SIZE - 1;
  ------------------
  |  |  511|  15.3k|#define BLK_SIZE            4
  ------------------
  194|  15.3k|    memset(pu1_dst, *pu1_left, 4);
  195|  15.3k|    memset(pu1_dst + dst_strd, *(pu1_left - 1), 4);
  196|  15.3k|    memset(pu1_dst + 2 * dst_strd, *(pu1_left - 2), 4);
  197|  15.3k|    memset(pu1_dst + 3 * dst_strd, *(pu1_left - 3), 4);
  198|  15.3k|}
ih264_intra_pred_luma_4x4_mode_dc:
  236|  63.5k|{
  237|  63.5k|    UWORD8 u1_useleft; /* availability of left predictors (only for DC) */
  238|  63.5k|    UWORD8 u1_usetop; /* availability of top predictors (only for DC) */
  239|  63.5k|    UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
  240|  63.5k|    UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
  241|  63.5k|    WORD32 val = 0;
  242|       |
  243|  63.5k|    UNUSED(src_strd);
  ------------------
  |  |   45|  63.5k|#define UNUSED(x) ((void)(x))
  ------------------
  244|  63.5k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  63.5k|#define UNUSED(x) ((void)(x))
  ------------------
  245|  63.5k|    u1_useleft = BOOLEAN(ngbr_avail & LEFT_MB_AVAILABLE_MASK);
  ------------------
  |  |   84|  63.5k|#define BOOLEAN(x) (!!(x))
  ------------------
  246|  63.5k|    u1_usetop = BOOLEAN(ngbr_avail & TOP_MB_AVAILABLE_MASK);
  ------------------
  |  |   84|  63.5k|#define BOOLEAN(x) (!!(x))
  ------------------
  247|  63.5k|    pu1_top = pu1_src + BLK_SIZE + 1;
  ------------------
  |  |  511|  63.5k|#define BLK_SIZE            4
  ------------------
  248|  63.5k|    pu1_left = pu1_src + BLK_SIZE - 1;
  ------------------
  |  |  511|  63.5k|#define BLK_SIZE            4
  ------------------
  249|       |
  250|  63.5k|    if(u1_useleft)
  ------------------
  |  Branch (250:8): [True: 50.5k, False: 12.9k]
  ------------------
  251|  50.5k|    {
  252|  50.5k|        val += *pu1_left--;
  253|  50.5k|        val += *pu1_left--;
  254|  50.5k|        val += *pu1_left--;
  255|  50.5k|        val += *pu1_left + 2;
  256|  50.5k|    }
  257|  63.5k|    if(u1_usetop)
  ------------------
  |  Branch (257:8): [True: 57.3k, False: 6.24k]
  ------------------
  258|  57.3k|    {
  259|  57.3k|        val += *pu1_top + *(pu1_top + 1) + *(pu1_top + 2) + *(pu1_top + 3)
  260|  57.3k|                        + 2;
  261|  57.3k|    }
  262|       |    /* Since 2 is added if either left/top pred is there,
  263|       |     val still being zero implies both preds are not there */
  264|  63.5k|    val = (val) ? (val >> (1 + u1_useleft + u1_usetop)) : 128;
  ------------------
  |  Branch (264:11): [True: 62.8k, False: 743]
  ------------------
  265|       |    /* 4 bytes are copied from src to dst */
  266|  63.5k|    memset(pu1_dst, val, 4);
  267|  63.5k|    memset(pu1_dst + dst_strd, val, 4);
  268|  63.5k|    memset(pu1_dst + 2 * dst_strd, val, 4);
  269|  63.5k|    memset(pu1_dst + 3 * dst_strd, val, 4);
  270|  63.5k|}
ih264_intra_pred_luma_4x4_mode_diag_dl:
  309|  3.37k|{
  310|  3.37k|    UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
  311|  3.37k|    UWORD32 ui4_a, ui4_b, ui4_c, ui4_d, ui4_e, ui4_f, ui4_g, ui4_h;
  312|  3.37k|    UWORD8 predicted_pixels[7];
  313|       |
  314|  3.37k|    UNUSED(src_strd);
  ------------------
  |  |   45|  3.37k|#define UNUSED(x) ((void)(x))
  ------------------
  315|  3.37k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  3.37k|#define UNUSED(x) ((void)(x))
  ------------------
  316|  3.37k|    pu1_top = pu1_src +BLK_SIZE + 1;
  ------------------
  |  |  511|  3.37k|#define BLK_SIZE            4
  ------------------
  317|       |
  318|  3.37k|    ui4_a = *pu1_top++;
  319|  3.37k|    ui4_b = *pu1_top++;
  320|  3.37k|    ui4_c = *pu1_top++;
  321|  3.37k|    ui4_d = *pu1_top++;
  322|  3.37k|    ui4_e = *pu1_top++;
  323|  3.37k|    ui4_f = *pu1_top++;
  324|  3.37k|    ui4_g = *pu1_top++;
  325|  3.37k|    ui4_h = *pu1_top;
  326|       |
  327|  3.37k|    predicted_pixels[0] = FILT121(ui4_a, ui4_b, ui4_c);
  ------------------
  |  |   46|  3.37k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  328|  3.37k|    predicted_pixels[1] = FILT121(ui4_b, ui4_c, ui4_d);
  ------------------
  |  |   46|  3.37k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  329|  3.37k|    predicted_pixels[2] = FILT121(ui4_c, ui4_d, ui4_e);
  ------------------
  |  |   46|  3.37k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  330|  3.37k|    predicted_pixels[3] = FILT121(ui4_d, ui4_e, ui4_f);
  ------------------
  |  |   46|  3.37k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  331|  3.37k|    predicted_pixels[4] = FILT121(ui4_e, ui4_f, ui4_g);
  ------------------
  |  |   46|  3.37k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  332|  3.37k|    predicted_pixels[5] = FILT121(ui4_f, ui4_g, ui4_h);
  ------------------
  |  |   46|  3.37k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  333|  3.37k|    predicted_pixels[6] = FILT121(ui4_g, ui4_h, ui4_h);
  ------------------
  |  |   46|  3.37k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  334|       |
  335|  3.37k|    memcpy(pu1_dst, predicted_pixels, 4);
  336|  3.37k|    memcpy(pu1_dst + dst_strd, predicted_pixels + 1, 4);
  337|  3.37k|    memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 2, 4);
  338|  3.37k|    memcpy(pu1_dst + 3 * dst_strd, predicted_pixels + 3, 4);
  339|  3.37k|}
ih264_intra_pred_luma_4x4_mode_diag_dr:
  378|  1.44k|{
  379|  1.44k|    UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
  380|  1.44k|    UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
  381|  1.44k|    UWORD8 *pu1_topleft = NULL;/* Pointer to top left predictor */
  382|  1.44k|    UWORD32 ui4_a, ui4_b, ui4_c, ui4_d, ui4_i, ui4_j, ui4_k, ui4_l, ui4_m;
  383|  1.44k|    UWORD8 predicted_pixels[7];
  384|       |
  385|  1.44k|    UNUSED(src_strd);
  ------------------
  |  |   45|  1.44k|#define UNUSED(x) ((void)(x))
  ------------------
  386|  1.44k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  1.44k|#define UNUSED(x) ((void)(x))
  ------------------
  387|  1.44k|    pu1_top = pu1_src + BLK_SIZE + 1;
  ------------------
  |  |  511|  1.44k|#define BLK_SIZE            4
  ------------------
  388|  1.44k|    pu1_left = pu1_src + BLK_SIZE - 1;
  ------------------
  |  |  511|  1.44k|#define BLK_SIZE            4
  ------------------
  389|  1.44k|    pu1_topleft = pu1_src +BLK_SIZE;
  ------------------
  |  |  511|  1.44k|#define BLK_SIZE            4
  ------------------
  390|       |
  391|  1.44k|    ui4_a = *pu1_top++;
  392|  1.44k|    ui4_b = *pu1_top++;
  393|  1.44k|    ui4_c = *pu1_top++;
  394|  1.44k|    ui4_d = *pu1_top++;
  395|  1.44k|    ui4_i = *pu1_left--;
  396|  1.44k|    ui4_j = *pu1_left--;
  397|  1.44k|    ui4_k = *pu1_left--;
  398|  1.44k|    ui4_l = *pu1_left;
  399|  1.44k|    ui4_m = *pu1_topleft;
  400|       |
  401|  1.44k|    predicted_pixels[2] = FILT121(ui4_j, ui4_i, ui4_m);
  ------------------
  |  |   46|  1.44k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  402|  1.44k|    predicted_pixels[1] = FILT121(ui4_k, ui4_j, ui4_i);
  ------------------
  |  |   46|  1.44k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  403|  1.44k|    predicted_pixels[0] = FILT121(ui4_l, ui4_k, ui4_j);
  ------------------
  |  |   46|  1.44k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  404|  1.44k|    predicted_pixels[3] = FILT121(ui4_i, ui4_m, ui4_a);
  ------------------
  |  |   46|  1.44k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  405|  1.44k|    predicted_pixels[4] = FILT121(ui4_m, ui4_a, ui4_b);
  ------------------
  |  |   46|  1.44k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  406|  1.44k|    predicted_pixels[5] = FILT121(ui4_a, ui4_b, ui4_c);
  ------------------
  |  |   46|  1.44k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  407|  1.44k|    predicted_pixels[6] = FILT121(ui4_b, ui4_c, ui4_d);
  ------------------
  |  |   46|  1.44k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  408|       |
  409|  1.44k|    memcpy(pu1_dst, predicted_pixels + 3, 4);
  410|  1.44k|    memcpy(pu1_dst + dst_strd, predicted_pixels + 2, 4);
  411|  1.44k|    memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 1, 4);
  412|  1.44k|    memcpy(pu1_dst + 3 * dst_strd, predicted_pixels, 4);
  413|  1.44k|}
ih264_intra_pred_luma_4x4_mode_vert_r:
  452|  1.24k|{
  453|       |
  454|  1.24k|    UWORD32 ui4_a, ui4_b, ui4_c, ui4_d, ui4_i, ui4_j, ui4_k, ui4_m;
  455|  1.24k|    UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
  456|  1.24k|    UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
  457|  1.24k|    UWORD8 *pu1_topleft = NULL;/* Pointer to top left predictor */
  458|  1.24k|    UWORD8 predicted_pixels[10];
  459|       |
  460|  1.24k|    UNUSED(src_strd);
  ------------------
  |  |   45|  1.24k|#define UNUSED(x) ((void)(x))
  ------------------
  461|  1.24k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  1.24k|#define UNUSED(x) ((void)(x))
  ------------------
  462|  1.24k|    pu1_top = pu1_src +BLK_SIZE + 1;
  ------------------
  |  |  511|  1.24k|#define BLK_SIZE            4
  ------------------
  463|  1.24k|    pu1_left = pu1_src + BLK_SIZE - 1;
  ------------------
  |  |  511|  1.24k|#define BLK_SIZE            4
  ------------------
  464|  1.24k|    pu1_topleft = pu1_src + BLK_SIZE;
  ------------------
  |  |  511|  1.24k|#define BLK_SIZE            4
  ------------------
  465|       |
  466|  1.24k|    ui4_a = *pu1_top++;
  467|  1.24k|    ui4_b = *pu1_top++;
  468|  1.24k|    ui4_c = *pu1_top++;
  469|  1.24k|    ui4_d = *pu1_top++;
  470|  1.24k|    ui4_i = *pu1_left--;
  471|  1.24k|    ui4_j = *pu1_left--;
  472|  1.24k|    ui4_k = *pu1_left;
  473|  1.24k|    ui4_m = *pu1_topleft;
  474|       |
  475|  1.24k|    predicted_pixels[6] = FILT11(ui4_m, ui4_a);
  ------------------
  |  |   49|  1.24k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
  476|  1.24k|    predicted_pixels[7] = FILT11(ui4_a, ui4_b);
  ------------------
  |  |   49|  1.24k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
  477|  1.24k|    predicted_pixels[8] = FILT11(ui4_b, ui4_c);
  ------------------
  |  |   49|  1.24k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
  478|  1.24k|    predicted_pixels[9] = FILT11(ui4_c, ui4_d);
  ------------------
  |  |   49|  1.24k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
  479|  1.24k|    predicted_pixels[1] = FILT121(ui4_i, ui4_m, ui4_a);
  ------------------
  |  |   46|  1.24k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  480|  1.24k|    predicted_pixels[2] = FILT121(ui4_m, ui4_a, ui4_b);
  ------------------
  |  |   46|  1.24k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  481|  1.24k|    predicted_pixels[3] = FILT121(ui4_a, ui4_b, ui4_c);
  ------------------
  |  |   46|  1.24k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  482|  1.24k|    predicted_pixels[4] = FILT121(ui4_b, ui4_c, ui4_d);
  ------------------
  |  |   46|  1.24k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  483|  1.24k|    predicted_pixels[5] = FILT121(ui4_j, ui4_i, ui4_m);
  ------------------
  |  |   46|  1.24k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  484|  1.24k|    predicted_pixels[0] = FILT121(ui4_k, ui4_j, ui4_i);
  ------------------
  |  |   46|  1.24k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  485|       |
  486|  1.24k|    memcpy(pu1_dst, predicted_pixels + 6, 4);
  487|  1.24k|    memcpy(pu1_dst + dst_strd, predicted_pixels + 1, 4);
  488|  1.24k|    memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 5, 4);
  489|  1.24k|    memcpy(pu1_dst + 3 * dst_strd, predicted_pixels, 4);
  490|  1.24k|}
ih264_intra_pred_luma_4x4_mode_horz_d:
  529|  2.38k|{
  530|  2.38k|    UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
  531|  2.38k|    UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
  532|  2.38k|    UWORD8 *pu1_topleft = NULL;/* Pointer to top left predictor */
  533|  2.38k|    UWORD32 ui4_a, ui4_b, ui4_c, ui4_i, ui4_j, ui4_k, ui4_l, ui4_m;
  534|  2.38k|    UWORD8 predicted_pixels[10];
  535|       |
  536|  2.38k|    UNUSED(src_strd);
  ------------------
  |  |   45|  2.38k|#define UNUSED(x) ((void)(x))
  ------------------
  537|  2.38k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  2.38k|#define UNUSED(x) ((void)(x))
  ------------------
  538|  2.38k|    pu1_top = pu1_src + BLK_SIZE + 1;
  ------------------
  |  |  511|  2.38k|#define BLK_SIZE            4
  ------------------
  539|  2.38k|    pu1_left = pu1_src + BLK_SIZE - 1;
  ------------------
  |  |  511|  2.38k|#define BLK_SIZE            4
  ------------------
  540|  2.38k|    pu1_topleft = pu1_src + BLK_SIZE;
  ------------------
  |  |  511|  2.38k|#define BLK_SIZE            4
  ------------------
  541|       |
  542|  2.38k|    ui4_a = *pu1_top++;
  543|  2.38k|    ui4_b = *pu1_top++;
  544|  2.38k|    ui4_c = *pu1_top++;
  545|  2.38k|    ui4_i = *pu1_left--;
  546|  2.38k|    ui4_j = *pu1_left--;
  547|  2.38k|    ui4_k = *pu1_left--;
  548|  2.38k|    ui4_l = *pu1_left--;
  549|  2.38k|    ui4_m = *pu1_topleft;
  550|       |
  551|  2.38k|    predicted_pixels[6] = FILT11(ui4_i, ui4_m);
  ------------------
  |  |   49|  2.38k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
  552|  2.38k|    predicted_pixels[7] = FILT121(ui4_i, ui4_m, ui4_a);
  ------------------
  |  |   46|  2.38k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  553|  2.38k|    predicted_pixels[8] = FILT121(ui4_m, ui4_a, ui4_b);
  ------------------
  |  |   46|  2.38k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  554|  2.38k|    predicted_pixels[9] = FILT121(ui4_a, ui4_b, ui4_c);
  ------------------
  |  |   46|  2.38k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  555|  2.38k|    predicted_pixels[1] = FILT121(ui4_l, ui4_k, ui4_j);
  ------------------
  |  |   46|  2.38k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  556|  2.38k|    predicted_pixels[2] = FILT11(ui4_k, ui4_j);
  ------------------
  |  |   49|  2.38k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
  557|  2.38k|    predicted_pixels[3] = FILT121(ui4_k, ui4_j, ui4_i);
  ------------------
  |  |   46|  2.38k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  558|  2.38k|    predicted_pixels[4] = FILT11(ui4_j, ui4_i);
  ------------------
  |  |   49|  2.38k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
  559|  2.38k|    predicted_pixels[5] = FILT121(ui4_j, ui4_i, ui4_m);
  ------------------
  |  |   46|  2.38k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  560|  2.38k|    predicted_pixels[0] = FILT11(ui4_l, ui4_k);
  ------------------
  |  |   49|  2.38k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
  561|       |
  562|  2.38k|    memcpy(pu1_dst, predicted_pixels + 6, 4);
  563|  2.38k|    memcpy(pu1_dst + dst_strd, predicted_pixels + 4, 4);
  564|  2.38k|    memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 2, 4);
  565|  2.38k|    memcpy(pu1_dst + 3 * dst_strd, predicted_pixels, 4);
  566|  2.38k|}
ih264_intra_pred_luma_4x4_mode_vert_l:
  605|  6.71k|{
  606|  6.71k|    UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
  607|  6.71k|    UWORD32 ui4_a, ui4_b, ui4_c, ui4_d, ui4_e, ui4_f, ui4_g;
  608|  6.71k|    UWORD8 predicted_pixels[10];
  609|       |
  610|  6.71k|    UNUSED(src_strd);
  ------------------
  |  |   45|  6.71k|#define UNUSED(x) ((void)(x))
  ------------------
  611|  6.71k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  6.71k|#define UNUSED(x) ((void)(x))
  ------------------
  612|  6.71k|    pu1_top = pu1_src + BLK_SIZE + 1;
  ------------------
  |  |  511|  6.71k|#define BLK_SIZE            4
  ------------------
  613|       |
  614|  6.71k|    ui4_a = *pu1_top++;
  615|  6.71k|    ui4_b = *pu1_top++;
  616|  6.71k|    ui4_c = *pu1_top++;
  617|  6.71k|    ui4_d = *pu1_top++;
  618|  6.71k|    ui4_e = *pu1_top++;
  619|  6.71k|    ui4_f = *pu1_top++;
  620|  6.71k|    ui4_g = *pu1_top;
  621|       |
  622|  6.71k|    predicted_pixels[5] = FILT11(ui4_a, ui4_b);
  ------------------
  |  |   49|  6.71k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
  623|  6.71k|    predicted_pixels[6] = FILT11(ui4_b, ui4_c);
  ------------------
  |  |   49|  6.71k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
  624|  6.71k|    predicted_pixels[7] = FILT11(ui4_c, ui4_d);
  ------------------
  |  |   49|  6.71k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
  625|  6.71k|    predicted_pixels[8] = FILT11(ui4_d, ui4_e);
  ------------------
  |  |   49|  6.71k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
  626|  6.71k|    predicted_pixels[0] = FILT121(ui4_a, ui4_b, ui4_c);
  ------------------
  |  |   46|  6.71k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  627|  6.71k|    predicted_pixels[1] = FILT121(ui4_b, ui4_c, ui4_d);
  ------------------
  |  |   46|  6.71k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  628|  6.71k|    predicted_pixels[2] = FILT121(ui4_c, ui4_d, ui4_e);
  ------------------
  |  |   46|  6.71k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  629|  6.71k|    predicted_pixels[3] = FILT121(ui4_d, ui4_e, ui4_f);
  ------------------
  |  |   46|  6.71k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  630|  6.71k|    predicted_pixels[9] = FILT11(ui4_e, ui4_f);
  ------------------
  |  |   49|  6.71k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
  631|  6.71k|    predicted_pixels[4] = FILT121(ui4_e, ui4_f, ui4_g);
  ------------------
  |  |   46|  6.71k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  632|       |
  633|  6.71k|    memcpy(pu1_dst, predicted_pixels + 5, 4);
  634|  6.71k|    memcpy(pu1_dst + dst_strd, predicted_pixels, 4);
  635|  6.71k|    memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 6, 4);
  636|  6.71k|    memcpy(pu1_dst + 3 * dst_strd, predicted_pixels + 1, 4);
  637|  6.71k|}
ih264_intra_pred_luma_4x4_mode_horz_u:
  676|  10.4k|{
  677|  10.4k|    UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
  678|  10.4k|    UWORD32 ui4_i, ui4_j, ui4_k, ui4_l;
  679|  10.4k|    UWORD8 predicted_pixels[10];
  680|       |
  681|  10.4k|    UNUSED(src_strd);
  ------------------
  |  |   45|  10.4k|#define UNUSED(x) ((void)(x))
  ------------------
  682|  10.4k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  10.4k|#define UNUSED(x) ((void)(x))
  ------------------
  683|  10.4k|    pu1_left = pu1_src + BLK_SIZE - 1;
  ------------------
  |  |  511|  10.4k|#define BLK_SIZE            4
  ------------------
  684|       |
  685|  10.4k|    ui4_i = *pu1_left--;
  686|  10.4k|    ui4_j = *pu1_left--;
  687|  10.4k|    ui4_k = *pu1_left--;
  688|  10.4k|    ui4_l = *pu1_left--;
  689|       |
  690|  10.4k|    predicted_pixels[0] = FILT11(ui4_j, ui4_i);
  ------------------
  |  |   49|  10.4k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
  691|  10.4k|    predicted_pixels[1] = FILT121(ui4_k, ui4_j, ui4_i);
  ------------------
  |  |   46|  10.4k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  692|  10.4k|    predicted_pixels[2] = FILT11(ui4_k, ui4_j);
  ------------------
  |  |   49|  10.4k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
  693|  10.4k|    predicted_pixels[3] = FILT121(ui4_l, ui4_k, ui4_j);
  ------------------
  |  |   46|  10.4k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  694|  10.4k|    predicted_pixels[4] = FILT11(ui4_l, ui4_k);
  ------------------
  |  |   49|  10.4k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
  695|  10.4k|    predicted_pixels[5] = FILT121(ui4_l, ui4_l, ui4_k);
  ------------------
  |  |   46|  10.4k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  696|  10.4k|    predicted_pixels[6] = ui4_l;
  697|  10.4k|    predicted_pixels[7] = ui4_l;
  698|  10.4k|    predicted_pixels[8] = ui4_l;
  699|  10.4k|    predicted_pixels[9] = ui4_l;
  700|       |
  701|  10.4k|    memcpy(pu1_dst, predicted_pixels, 4);
  702|  10.4k|    memcpy(pu1_dst + dst_strd, predicted_pixels + 2, 4);
  703|  10.4k|    memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 4, 4);
  704|  10.4k|    memcpy(pu1_dst + 3 * dst_strd, predicted_pixels + 6, 4);
  705|  10.4k|}
ih264_intra_pred_luma_8x8_mode_ref_filtering:
  747|   128k|{
  748|   128k|    WORD32 top_avail, left_avail, top_left_avail, top_right_avail;
  749|       |
  750|   128k|    left_avail = BOOLEAN(ngbr_avail & LEFT_MB_AVAILABLE_MASK);
  ------------------
  |  |   84|   128k|#define BOOLEAN(x) (!!(x))
  ------------------
  751|   128k|    top_avail = BOOLEAN(ngbr_avail & TOP_MB_AVAILABLE_MASK);
  ------------------
  |  |   84|   128k|#define BOOLEAN(x) (!!(x))
  ------------------
  752|   128k|    top_left_avail = BOOLEAN(ngbr_avail & TOP_LEFT_MB_AVAILABLE_MASK);
  ------------------
  |  |   84|   128k|#define BOOLEAN(x) (!!(x))
  ------------------
  753|   128k|    top_right_avail = BOOLEAN(ngbr_avail & TOP_RIGHT_MB_AVAILABLE_MASK);
  ------------------
  |  |   84|   128k|#define BOOLEAN(x) (!!(x))
  ------------------
  754|       |
  755|   128k|    if(top_avail)
  ------------------
  |  Branch (755:8): [True: 103k, False: 25.1k]
  ------------------
  756|   103k|    {
  757|   103k|        WORD32 i;
  758|   103k|        UWORD32 u4_xm1;
  759|       |
  760|   103k|        if(!top_right_avail)
  ------------------
  |  Branch (760:12): [True: 37.2k, False: 66.0k]
  ------------------
  761|  37.2k|        {
  762|  37.2k|            memset(pu1_dst + 8 + 1 + 8, pu1_top[7], 8);
  763|  37.2k|            top_right_avail = 1;
  764|  37.2k|        }
  765|  66.0k|        else
  766|  66.0k|        {
  767|  66.0k|            memcpy(pu1_dst + 8 + 1 + 8, pu1_top + 8, 8);
  768|  66.0k|        }
  769|       |
  770|   103k|        if(top_left_avail)
  ------------------
  |  Branch (770:12): [True: 89.8k, False: 13.3k]
  ------------------
  771|  89.8k|        {
  772|  89.8k|            pu1_dst[8 + 1 + 0] = FILT121((*pu1_topleft), pu1_top[0],
  ------------------
  |  |   46|  89.8k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  773|  89.8k|                                         pu1_top[1]);
  774|  89.8k|        }
  775|  13.3k|        else
  776|  13.3k|        {
  777|  13.3k|            pu1_dst[8 + 1] = ((3 * pu1_top[0]) + pu1_top[1] + 2) >> 2;
  778|  13.3k|        }
  779|       |
  780|   722k|        for(i = 1; i <= 6; i++)
  ------------------
  |  Branch (780:20): [True: 619k, False: 103k]
  ------------------
  781|   619k|        {
  782|   619k|            pu1_dst[8 + 1 + i] = FILT121(pu1_top[i - 1], pu1_top[i],
  ------------------
  |  |   46|   619k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  783|   619k|                                         pu1_top[i + 1]);
  784|   619k|        }
  785|       |        /* First byte of Top Right input is in pu1_dst[8 + 1 + 8]*/
  786|   103k|        pu1_dst[8 + 1 + 7] = FILT121(pu1_top[6], pu1_top[7],
  ------------------
  |  |   46|   103k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  787|   103k|                                     pu1_dst[8 + 1 + 8]);
  788|       |
  789|       |        /* filtered output and source in same buf, to prevent output(x - 1)
  790|       |         being over written in process */
  791|   103k|        u4_xm1 = pu1_top[7];
  792|       |
  793|   825k|        for(i = 8; i <= 14; i++)
  ------------------
  |  Branch (793:20): [True: 722k, False: 103k]
  ------------------
  794|   722k|        {
  795|   722k|            UWORD32 u4_x;
  796|   722k|            u4_x = (u4_xm1 + (pu1_dst[8 + 1 + i] << 1) + pu1_dst[8 + 1 + i + 1]
  797|   722k|                            + 2) >> 2;
  798|       |            /* assigning u4_xm1 from the un-filtered values for the next iteration */
  799|   722k|            u4_xm1 = pu1_dst[8 + 1 + i];
  800|   722k|            pu1_dst[8 + 1 + i] = u4_x;
  801|   722k|        }
  802|       |
  803|   103k|        pu1_dst[8 + 1 + 15] = (u4_xm1 + (3 * pu1_dst[8 + 1 + 15]) + 2) >> 2;
  804|   103k|    }
  805|       |
  806|       |    /* pu1_topleft is overloaded. It is both: */
  807|       |    /* a. A pointer for the top left pixel */
  808|       |    /* b. An indicator of availability of top left. */
  809|       |    /*    If it is null then top left not available */
  810|   128k|    if(top_left_avail)
  ------------------
  |  Branch (810:8): [True: 90.9k, False: 37.4k]
  ------------------
  811|  90.9k|    {
  812|  90.9k|        if((!top_avail) || (!left_avail))
  ------------------
  |  Branch (812:12): [True: 1.08k, False: 89.8k]
  |  Branch (812:28): [True: 313, False: 89.5k]
  ------------------
  813|  1.40k|        {
  814|  1.40k|            if(top_avail)
  ------------------
  |  Branch (814:16): [True: 313, False: 1.08k]
  ------------------
  815|    313|                pu1_dst[8] = (3 * pu1_topleft[0] + pu1_top[0] + 2) >> 2;
  816|  1.08k|            else if(left_avail)
  ------------------
  |  Branch (816:21): [True: 344, False: 745]
  ------------------
  817|    344|                pu1_dst[8] = (3 * pu1_topleft[0] + pu1_left[0] + 2) >> 2;
  818|  1.40k|        }
  819|  89.5k|        else
  820|  89.5k|        {
  821|  89.5k|            pu1_dst[8] = FILT121(pu1_top[0], (*pu1_topleft), pu1_left[0]);
  ------------------
  |  |   46|  89.5k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  822|  89.5k|        }
  823|  90.9k|    }
  824|       |
  825|   128k|    if(left_avail)
  ------------------
  |  Branch (825:8): [True: 112k, False: 15.7k]
  ------------------
  826|   112k|    {
  827|   112k|        UWORD32 idx;
  828|   112k|        if(0 != pu1_topleft)
  ------------------
  |  Branch (828:12): [True: 89.8k, False: 22.7k]
  ------------------
  829|  89.8k|        {
  830|  89.8k|            pu1_dst[7] = FILT121((*pu1_topleft), pu1_left[0],
  ------------------
  |  |   46|  89.8k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  831|  89.8k|                                 pu1_left[src_strd]);
  832|  89.8k|        }
  833|  22.7k|        else
  834|  22.7k|        {
  835|  22.7k|            pu1_dst[7] = ((3 * pu1_left[0]) + pu1_left[src_strd] + 2) >> 2;
  836|  22.7k|        }
  837|       |
  838|   788k|        for(idx = 1; idx <= 6; idx++)
  ------------------
  |  Branch (838:22): [True: 675k, False: 112k]
  ------------------
  839|   675k|        {
  840|   675k|            pu1_dst[7 - idx] = FILT121(pu1_left[(idx - 1) * src_strd],
  ------------------
  |  |   46|   675k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
  841|   675k|                                       pu1_left[idx * src_strd],
  842|   675k|                                       pu1_left[(idx + 1) * src_strd]);
  843|       |
  844|   675k|        }
  845|   112k|        pu1_dst[0] = (pu1_left[6 * src_strd] + 3 * pu1_left[7 * src_strd] + 2)
  846|   112k|                        >> 2;
  847|   112k|    }
  848|   128k|}
ih264_intra_pred_luma_8x8_mode_vert:
  887|  14.6k|{
  888|  14.6k|    UWORD8 *pu1_top = NULL;
  889|       |
  890|  14.6k|    UNUSED(src_strd);
  ------------------
  |  |   45|  14.6k|#define UNUSED(x) ((void)(x))
  ------------------
  891|  14.6k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  14.6k|#define UNUSED(x) ((void)(x))
  ------------------
  892|  14.6k|    pu1_top = pu1_src + BLK8x8SIZE + 1;
  ------------------
  |  |  510|  14.6k|#define BLK8x8SIZE          8
  ------------------
  893|       |
  894|  14.6k|    memcpy(pu1_dst, pu1_top, 8);
  895|  14.6k|    memcpy(pu1_dst + dst_strd, pu1_top, 8);
  896|  14.6k|    memcpy(pu1_dst + 2 * dst_strd, pu1_top, 8);
  897|  14.6k|    memcpy(pu1_dst + 3 * dst_strd, pu1_top, 8);
  898|  14.6k|    memcpy(pu1_dst + 4 * dst_strd, pu1_top, 8);
  899|  14.6k|    memcpy(pu1_dst + 5 * dst_strd, pu1_top, 8);
  900|  14.6k|    memcpy(pu1_dst + 6 * dst_strd, pu1_top, 8);
  901|  14.6k|    memcpy(pu1_dst + 7 * dst_strd, pu1_top, 8);
  902|  14.6k|}
ih264_intra_pred_luma_8x8_mode_horz:
  941|  1.99k|{
  942|  1.99k|    UWORD8 *pu1_left = pu1_src + BLK8x8SIZE - 1;
  ------------------
  |  |  510|  1.99k|#define BLK8x8SIZE          8
  ------------------
  943|       |
  944|  1.99k|    UNUSED(src_strd);
  ------------------
  |  |   45|  1.99k|#define UNUSED(x) ((void)(x))
  ------------------
  945|  1.99k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  1.99k|#define UNUSED(x) ((void)(x))
  ------------------
  946|  1.99k|    memset(pu1_dst, *pu1_left, 8);
  947|  1.99k|    memset(pu1_dst + dst_strd, *(pu1_left - 1), 8);
  948|  1.99k|    memset(pu1_dst + 2 * dst_strd, *(pu1_left - 2), 8);
  949|  1.99k|    memset(pu1_dst + 3 * dst_strd, *(pu1_left - 3), 8);
  950|  1.99k|    memset(pu1_dst + 4 * dst_strd, *(pu1_left - 4), 8);
  951|  1.99k|    memset(pu1_dst + 5 * dst_strd, *(pu1_left - 5), 8);
  952|  1.99k|    memset(pu1_dst + 6 * dst_strd, *(pu1_left - 6), 8);
  953|  1.99k|    memset(pu1_dst + 7 * dst_strd, *(pu1_left - 7), 8);
  954|  1.99k|}
ih264_intra_pred_luma_8x8_mode_dc:
  992|  37.0k|{
  993|  37.0k|    UWORD8 u1_useleft; /* availability of left predictors (only for DC) */
  994|  37.0k|    UWORD8 u1_usetop; /* availability of top predictors (only for DC) */
  995|  37.0k|    UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
  996|  37.0k|    UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
  997|  37.0k|    WORD32 row;
  998|  37.0k|    WORD32 val = 0;
  999|       |
 1000|  37.0k|    UNUSED(src_strd);
  ------------------
  |  |   45|  37.0k|#define UNUSED(x) ((void)(x))
  ------------------
 1001|  37.0k|    u1_useleft = BOOLEAN(ngbr_avail & LEFT_MB_AVAILABLE_MASK);
  ------------------
  |  |   84|  37.0k|#define BOOLEAN(x) (!!(x))
  ------------------
 1002|  37.0k|    u1_usetop = BOOLEAN(ngbr_avail & TOP_MB_AVAILABLE_MASK);
  ------------------
  |  |   84|  37.0k|#define BOOLEAN(x) (!!(x))
  ------------------
 1003|  37.0k|    pu1_top = pu1_src + BLK8x8SIZE + 1;
  ------------------
  |  |  510|  37.0k|#define BLK8x8SIZE          8
  ------------------
 1004|  37.0k|    pu1_left = pu1_src + BLK8x8SIZE - 1;
  ------------------
  |  |  510|  37.0k|#define BLK8x8SIZE          8
  ------------------
 1005|       |
 1006|  37.0k|    if(u1_useleft)
  ------------------
  |  Branch (1006:8): [True: 29.9k, False: 7.10k]
  ------------------
 1007|  29.9k|    {
 1008|   269k|        for(row = 0; row < BLK8x8SIZE; row++)
  ------------------
  |  |  510|   269k|#define BLK8x8SIZE          8
  ------------------
  |  Branch (1008:22): [True: 239k, False: 29.9k]
  ------------------
 1009|   239k|            val += *(pu1_left - row);
 1010|  29.9k|        val += 4;
 1011|  29.9k|    }
 1012|  37.0k|    if(u1_usetop)
  ------------------
  |  Branch (1012:8): [True: 25.4k, False: 11.6k]
  ------------------
 1013|  25.4k|    {
 1014|   228k|        for(row = 0; row < BLK8x8SIZE; row++)
  ------------------
  |  |  510|   228k|#define BLK8x8SIZE          8
  ------------------
  |  Branch (1014:22): [True: 203k, False: 25.4k]
  ------------------
 1015|   203k|            val += *(pu1_top + row);
 1016|  25.4k|        val += 4;
 1017|  25.4k|    }
 1018|       |
 1019|       |    /* Since 4 is added if either left/top pred is there,
 1020|       |     val still being zero implies both preds are not there */
 1021|  37.0k|    val = (val) ? (val >> (2 + u1_useleft + u1_usetop)) : 128;
  ------------------
  |  Branch (1021:11): [True: 35.9k, False: 1.06k]
  ------------------
 1022|       |
 1023|  37.0k|    memset(pu1_dst, val, 8);
 1024|  37.0k|    memset(pu1_dst + dst_strd, val, 8);
 1025|  37.0k|    memset(pu1_dst + 2 * dst_strd, val, 8);
 1026|  37.0k|    memset(pu1_dst + 3 * dst_strd, val, 8);
 1027|  37.0k|    memset(pu1_dst + 4 * dst_strd, val, 8);
 1028|  37.0k|    memset(pu1_dst + 5 * dst_strd, val, 8);
 1029|  37.0k|    memset(pu1_dst + 6 * dst_strd, val, 8);
 1030|  37.0k|    memset(pu1_dst + 7 * dst_strd, val, 8);
 1031|  37.0k|}
ih264_intra_pred_luma_8x8_mode_diag_dl:
 1070|    561|{
 1071|    561|    UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
 1072|    561|    UWORD32 ui4_a, ui4_b, ui4_c, ui4_d, ui4_e, ui4_f, ui4_g, ui4_h;
 1073|    561|    UWORD32 ui4_i, ui4_j, ui4_k, ui4_l, ui4_m, ui4_n, ui4_o, ui4_p;
 1074|    561|    UWORD8 predicted_pixels[15];
 1075|       |
 1076|    561|    UNUSED(src_strd);
  ------------------
  |  |   45|    561|#define UNUSED(x) ((void)(x))
  ------------------
 1077|    561|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|    561|#define UNUSED(x) ((void)(x))
  ------------------
 1078|    561|    pu1_top = pu1_src + BLK8x8SIZE + 1;
  ------------------
  |  |  510|    561|#define BLK8x8SIZE          8
  ------------------
 1079|       |
 1080|    561|    ui4_a = *pu1_top++;
 1081|    561|    ui4_b = *pu1_top++;
 1082|    561|    ui4_c = *pu1_top++;
 1083|    561|    ui4_d = *pu1_top++;
 1084|    561|    ui4_e = *pu1_top++;
 1085|    561|    ui4_f = *pu1_top++;
 1086|    561|    ui4_g = *pu1_top++;
 1087|    561|    ui4_h = *pu1_top++;
 1088|    561|    ui4_i = *pu1_top++;
 1089|    561|    ui4_j = *pu1_top++;
 1090|    561|    ui4_k = *pu1_top++;
 1091|    561|    ui4_l = *pu1_top++;
 1092|    561|    ui4_m = *pu1_top++;
 1093|    561|    ui4_n = *pu1_top++;
 1094|    561|    ui4_o = *pu1_top++;
 1095|    561|    ui4_p = *pu1_top;
 1096|       |
 1097|    561|    predicted_pixels[0] = FILT121(ui4_a, ui4_b, ui4_c);
  ------------------
  |  |   46|    561|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1098|    561|    predicted_pixels[1] = FILT121(ui4_b, ui4_c, ui4_d);
  ------------------
  |  |   46|    561|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1099|    561|    predicted_pixels[2] = FILT121(ui4_c, ui4_d, ui4_e);
  ------------------
  |  |   46|    561|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1100|    561|    predicted_pixels[3] = FILT121(ui4_d, ui4_e, ui4_f);
  ------------------
  |  |   46|    561|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1101|    561|    predicted_pixels[4] = FILT121(ui4_e, ui4_f, ui4_g);
  ------------------
  |  |   46|    561|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1102|    561|    predicted_pixels[5] = FILT121(ui4_f, ui4_g, ui4_h);
  ------------------
  |  |   46|    561|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1103|    561|    predicted_pixels[6] = FILT121(ui4_g, ui4_h, ui4_i);
  ------------------
  |  |   46|    561|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1104|    561|    predicted_pixels[7] = FILT121(ui4_h, ui4_i, ui4_j);
  ------------------
  |  |   46|    561|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1105|    561|    predicted_pixels[8] = FILT121(ui4_i, ui4_j, ui4_k);
  ------------------
  |  |   46|    561|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1106|    561|    predicted_pixels[9] = FILT121(ui4_j, ui4_k, ui4_l);
  ------------------
  |  |   46|    561|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1107|    561|    predicted_pixels[10] = FILT121(ui4_k, ui4_l, ui4_m);
  ------------------
  |  |   46|    561|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1108|    561|    predicted_pixels[11] = FILT121(ui4_l, ui4_m, ui4_n);
  ------------------
  |  |   46|    561|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1109|    561|    predicted_pixels[12] = FILT121(ui4_m, ui4_n, ui4_o);
  ------------------
  |  |   46|    561|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1110|    561|    predicted_pixels[13] = FILT121(ui4_n, ui4_o, ui4_p);
  ------------------
  |  |   46|    561|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1111|    561|    predicted_pixels[14] = FILT121(ui4_o, ui4_p, ui4_p);
  ------------------
  |  |   46|    561|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1112|       |
 1113|    561|    memcpy(pu1_dst, predicted_pixels, 8);
 1114|    561|    memcpy(pu1_dst + dst_strd, predicted_pixels + 1, 8);
 1115|    561|    memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 2, 8);
 1116|    561|    memcpy(pu1_dst + 3 * dst_strd, predicted_pixels + 3, 8);
 1117|    561|    memcpy(pu1_dst + 4 * dst_strd, predicted_pixels + 4, 8);
 1118|    561|    memcpy(pu1_dst + 5 * dst_strd, predicted_pixels + 5, 8);
 1119|    561|    memcpy(pu1_dst + 6 * dst_strd, predicted_pixels + 6, 8);
 1120|    561|    memcpy(pu1_dst + 7 * dst_strd, predicted_pixels + 7, 8);
 1121|    561|}
ih264_intra_pred_luma_8x8_mode_diag_dr:
 1160|    557|{
 1161|    557|    UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
 1162|    557|    UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
 1163|    557|    UWORD8 *pu1_topleft = NULL; /* Pointer to start of top left predictors */
 1164|    557|    UWORD32 ui4_a;
 1165|    557|    UWORD32 ui4_b, ui4_c, ui4_d, ui4_e, ui4_f, ui4_g, ui4_h, ui4_i;
 1166|    557|    UWORD32 ui4_j, ui4_k, ui4_l, ui4_m, ui4_n, ui4_o, ui4_p, ui4_q;
 1167|    557|    UWORD8 predicted_pixels[15];
 1168|       |
 1169|    557|    UNUSED(src_strd);
  ------------------
  |  |   45|    557|#define UNUSED(x) ((void)(x))
  ------------------
 1170|    557|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|    557|#define UNUSED(x) ((void)(x))
  ------------------
 1171|    557|    pu1_top = pu1_src + BLK8x8SIZE + 1;
  ------------------
  |  |  510|    557|#define BLK8x8SIZE          8
  ------------------
 1172|    557|    pu1_left = pu1_src + BLK8x8SIZE - 1;
  ------------------
  |  |  510|    557|#define BLK8x8SIZE          8
  ------------------
 1173|    557|    pu1_topleft = pu1_src + BLK8x8SIZE;
  ------------------
  |  |  510|    557|#define BLK8x8SIZE          8
  ------------------
 1174|       |
 1175|    557|    ui4_a = *pu1_topleft;
 1176|    557|    ui4_b = *pu1_top++;
 1177|    557|    ui4_c = *pu1_top++;
 1178|    557|    ui4_d = *pu1_top++;
 1179|    557|    ui4_e = *pu1_top++;
 1180|    557|    ui4_f = *pu1_top++;
 1181|    557|    ui4_g = *pu1_top++;
 1182|    557|    ui4_h = *pu1_top++;
 1183|    557|    ui4_i = *pu1_top;
 1184|    557|    ui4_j = *pu1_left--;
 1185|    557|    ui4_k = *pu1_left--;
 1186|    557|    ui4_l = *pu1_left--;
 1187|    557|    ui4_m = *pu1_left--;
 1188|    557|    ui4_n = *pu1_left--;
 1189|    557|    ui4_o = *pu1_left--;
 1190|    557|    ui4_p = *pu1_left--;
 1191|    557|    ui4_q = *pu1_left;
 1192|       |
 1193|    557|    predicted_pixels[6] = FILT121(ui4_a, ui4_j, ui4_k);
  ------------------
  |  |   46|    557|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1194|    557|    predicted_pixels[5] = FILT121(ui4_j, ui4_k, ui4_l);
  ------------------
  |  |   46|    557|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1195|    557|    predicted_pixels[4] = FILT121(ui4_k, ui4_l, ui4_m);
  ------------------
  |  |   46|    557|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1196|    557|    predicted_pixels[3] = FILT121(ui4_l, ui4_m, ui4_n);
  ------------------
  |  |   46|    557|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1197|    557|    predicted_pixels[2] = FILT121(ui4_m, ui4_n, ui4_o);
  ------------------
  |  |   46|    557|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1198|    557|    predicted_pixels[1] = FILT121(ui4_n, ui4_o, ui4_p);
  ------------------
  |  |   46|    557|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1199|    557|    predicted_pixels[0] = FILT121(ui4_o, ui4_p, ui4_q);
  ------------------
  |  |   46|    557|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1200|    557|    predicted_pixels[7] = FILT121(ui4_b, ui4_a, ui4_j);
  ------------------
  |  |   46|    557|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1201|    557|    predicted_pixels[8] = FILT121(ui4_a, ui4_b, ui4_c);
  ------------------
  |  |   46|    557|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1202|    557|    predicted_pixels[9] = FILT121(ui4_b, ui4_c, ui4_d);
  ------------------
  |  |   46|    557|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1203|    557|    predicted_pixels[10] = FILT121(ui4_c, ui4_d, ui4_e);
  ------------------
  |  |   46|    557|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1204|    557|    predicted_pixels[11] = FILT121(ui4_d, ui4_e, ui4_f);
  ------------------
  |  |   46|    557|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1205|    557|    predicted_pixels[12] = FILT121(ui4_e, ui4_f, ui4_g);
  ------------------
  |  |   46|    557|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1206|    557|    predicted_pixels[13] = FILT121(ui4_f, ui4_g, ui4_h);
  ------------------
  |  |   46|    557|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1207|    557|    predicted_pixels[14] = FILT121(ui4_g, ui4_h, ui4_i);
  ------------------
  |  |   46|    557|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1208|       |
 1209|    557|    memcpy(pu1_dst, predicted_pixels + 7, 8);
 1210|    557|    memcpy(pu1_dst + dst_strd, predicted_pixels + 6, 8);
 1211|    557|    memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 5, 8);
 1212|    557|    memcpy(pu1_dst + 3 * dst_strd, predicted_pixels + 4, 8);
 1213|    557|    memcpy(pu1_dst + 4 * dst_strd, predicted_pixels + 3, 8);
 1214|    557|    memcpy(pu1_dst + 5 * dst_strd, predicted_pixels + 2, 8);
 1215|    557|    memcpy(pu1_dst + 6 * dst_strd, predicted_pixels + 1, 8);
 1216|    557|    memcpy(pu1_dst + 7 * dst_strd, predicted_pixels, 8);
 1217|    557|}
ih264_intra_pred_luma_8x8_mode_vert_r:
 1256|    542|{
 1257|    542|    UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
 1258|    542|    UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
 1259|    542|    UWORD8 *pu1_topleft = NULL; /* Pointer to start of top left predictors */
 1260|    542|    UWORD32 ui4_a;
 1261|    542|    UWORD32 ui4_b, ui4_c, ui4_d, ui4_e, ui4_f, ui4_g, ui4_h, ui4_i;
 1262|    542|    UWORD32 ui4_j, ui4_k, ui4_l, ui4_m, ui4_n, ui4_o, ui4_p;
 1263|    542|    UWORD8 predicted_pixels[22];
 1264|       |
 1265|    542|    UNUSED(src_strd);
  ------------------
  |  |   45|    542|#define UNUSED(x) ((void)(x))
  ------------------
 1266|    542|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|    542|#define UNUSED(x) ((void)(x))
  ------------------
 1267|    542|    pu1_top = pu1_src + BLK8x8SIZE + 1;
  ------------------
  |  |  510|    542|#define BLK8x8SIZE          8
  ------------------
 1268|    542|    pu1_left = pu1_src + BLK8x8SIZE - 1;
  ------------------
  |  |  510|    542|#define BLK8x8SIZE          8
  ------------------
 1269|    542|    pu1_topleft = pu1_src + BLK8x8SIZE;
  ------------------
  |  |  510|    542|#define BLK8x8SIZE          8
  ------------------
 1270|       |
 1271|    542|    ui4_a = *pu1_topleft;
 1272|       |
 1273|    542|    ui4_b = *pu1_top++;
 1274|    542|    ui4_c = *pu1_top++;
 1275|    542|    ui4_d = *pu1_top++;
 1276|    542|    ui4_e = *pu1_top++;
 1277|    542|    ui4_f = *pu1_top++;
 1278|    542|    ui4_g = *pu1_top++;
 1279|    542|    ui4_h = *pu1_top++;
 1280|    542|    ui4_i = *pu1_top;
 1281|    542|    ui4_j = *pu1_left--;
 1282|    542|    ui4_k = *pu1_left--;
 1283|    542|    ui4_l = *pu1_left--;
 1284|    542|    ui4_m = *pu1_left--;
 1285|    542|    ui4_n = *pu1_left--;
 1286|    542|    ui4_o = *pu1_left--;
 1287|    542|    ui4_p = *pu1_left--;
 1288|       |
 1289|    542|    predicted_pixels[0] = FILT121(ui4_o, ui4_n, ui4_m);
  ------------------
  |  |   46|    542|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1290|    542|    predicted_pixels[1] = FILT121(ui4_m, ui4_l, ui4_k);
  ------------------
  |  |   46|    542|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1291|    542|    predicted_pixels[2] = FILT121(ui4_k, ui4_j, ui4_a);
  ------------------
  |  |   46|    542|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1292|    542|    predicted_pixels[3] = FILT11(ui4_a, ui4_b);
  ------------------
  |  |   49|    542|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1293|    542|    predicted_pixels[4] = FILT11(ui4_b, ui4_c);
  ------------------
  |  |   49|    542|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1294|    542|    predicted_pixels[5] = FILT11(ui4_c, ui4_d);
  ------------------
  |  |   49|    542|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1295|    542|    predicted_pixels[6] = FILT11(ui4_d, ui4_e);
  ------------------
  |  |   49|    542|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1296|    542|    predicted_pixels[7] = FILT11(ui4_e, ui4_f);
  ------------------
  |  |   49|    542|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1297|    542|    predicted_pixels[8] = FILT11(ui4_f, ui4_g);
  ------------------
  |  |   49|    542|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1298|    542|    predicted_pixels[9] = FILT11(ui4_g, ui4_h);
  ------------------
  |  |   49|    542|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1299|    542|    predicted_pixels[10] = FILT11(ui4_h, ui4_i);
  ------------------
  |  |   49|    542|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1300|    542|    predicted_pixels[11] = FILT121(ui4_p, ui4_o, ui4_n);
  ------------------
  |  |   46|    542|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1301|    542|    predicted_pixels[12] = FILT121(ui4_n, ui4_m, ui4_l);
  ------------------
  |  |   46|    542|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1302|    542|    predicted_pixels[13] = FILT121(ui4_l, ui4_k, ui4_j);
  ------------------
  |  |   46|    542|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1303|    542|    predicted_pixels[14] = FILT121(ui4_b, ui4_a, ui4_j);
  ------------------
  |  |   46|    542|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1304|    542|    predicted_pixels[15] = FILT121(ui4_a, ui4_b, ui4_c);
  ------------------
  |  |   46|    542|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1305|    542|    predicted_pixels[16] = FILT121(ui4_b, ui4_c, ui4_d);
  ------------------
  |  |   46|    542|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1306|    542|    predicted_pixels[17] = FILT121(ui4_c, ui4_d, ui4_e);
  ------------------
  |  |   46|    542|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1307|    542|    predicted_pixels[18] = FILT121(ui4_d, ui4_e, ui4_f);
  ------------------
  |  |   46|    542|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1308|    542|    predicted_pixels[19] = FILT121(ui4_e, ui4_f, ui4_g);
  ------------------
  |  |   46|    542|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1309|    542|    predicted_pixels[20] = FILT121(ui4_f, ui4_g, ui4_h);
  ------------------
  |  |   46|    542|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1310|    542|    predicted_pixels[21] = FILT121(ui4_g, ui4_h, ui4_i);
  ------------------
  |  |   46|    542|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1311|       |
 1312|    542|    memcpy(pu1_dst, predicted_pixels + 3, 8);
 1313|    542|    memcpy(pu1_dst + 1 * dst_strd, predicted_pixels + 14, 8);
 1314|    542|    memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 2, 8);
 1315|    542|    memcpy(pu1_dst + 3 * dst_strd, predicted_pixels + 13, 8);
 1316|    542|    memcpy(pu1_dst + 4 * dst_strd, predicted_pixels + 1, 8);
 1317|    542|    memcpy(pu1_dst + 5 * dst_strd, predicted_pixels + 12, 8);
 1318|    542|    memcpy(pu1_dst + 6 * dst_strd, predicted_pixels, 8);
 1319|    542|    memcpy(pu1_dst + 7 * dst_strd, predicted_pixels + 11, 8);
 1320|    542|}
ih264_intra_pred_luma_8x8_mode_horz_d:
 1359|  1.37k|{
 1360|  1.37k|    UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
 1361|  1.37k|    UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
 1362|  1.37k|    UWORD8 *pu1_topleft = NULL; /* Pointer to start of top left predictors */
 1363|  1.37k|    UWORD32 ui4_a;
 1364|  1.37k|    UWORD32 ui4_b, ui4_c, ui4_d, ui4_e, ui4_f, ui4_g, ui4_h, ui4_i;
 1365|  1.37k|    UWORD32 ui4_j, ui4_k, ui4_l, ui4_m, ui4_n, ui4_o, ui4_p;
 1366|  1.37k|    UWORD8 predicted_pixels[22];
 1367|       |
 1368|  1.37k|    UNUSED(src_strd);
  ------------------
  |  |   45|  1.37k|#define UNUSED(x) ((void)(x))
  ------------------
 1369|  1.37k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  1.37k|#define UNUSED(x) ((void)(x))
  ------------------
 1370|  1.37k|    pu1_top = pu1_src + BLK8x8SIZE + 1;
  ------------------
  |  |  510|  1.37k|#define BLK8x8SIZE          8
  ------------------
 1371|  1.37k|    pu1_left = pu1_src + BLK8x8SIZE - 1;
  ------------------
  |  |  510|  1.37k|#define BLK8x8SIZE          8
  ------------------
 1372|  1.37k|    pu1_topleft = pu1_src + BLK8x8SIZE;
  ------------------
  |  |  510|  1.37k|#define BLK8x8SIZE          8
  ------------------
 1373|       |
 1374|  1.37k|    ui4_a = *pu1_topleft;
 1375|  1.37k|    ui4_j = *pu1_top++;
 1376|  1.37k|    ui4_k = *pu1_top++;
 1377|  1.37k|    ui4_l = *pu1_top++;
 1378|  1.37k|    ui4_m = *pu1_top++;
 1379|  1.37k|    ui4_n = *pu1_top++;
 1380|  1.37k|    ui4_o = *pu1_top++;
 1381|  1.37k|    ui4_p = *pu1_top++;
 1382|  1.37k|    ui4_b = *pu1_left--;
 1383|  1.37k|    ui4_c = *pu1_left--;
 1384|  1.37k|    ui4_d = *pu1_left--;
 1385|  1.37k|    ui4_e = *pu1_left--;
 1386|  1.37k|    ui4_f = *pu1_left--;
 1387|  1.37k|    ui4_g = *pu1_left--;
 1388|  1.37k|    ui4_h = *pu1_left--;
 1389|  1.37k|    ui4_i = *pu1_left;
 1390|       |
 1391|  1.37k|    predicted_pixels[0] = FILT11(ui4_h, ui4_i);
  ------------------
  |  |   49|  1.37k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1392|  1.37k|    predicted_pixels[1] = FILT121(ui4_g, ui4_h, ui4_i);
  ------------------
  |  |   46|  1.37k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1393|  1.37k|    predicted_pixels[2] = FILT11(ui4_g, ui4_h);
  ------------------
  |  |   49|  1.37k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1394|  1.37k|    predicted_pixels[3] = FILT121(ui4_f, ui4_g, ui4_h);
  ------------------
  |  |   46|  1.37k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1395|  1.37k|    predicted_pixels[4] = FILT11(ui4_f, ui4_g);
  ------------------
  |  |   49|  1.37k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1396|  1.37k|    predicted_pixels[5] = FILT121(ui4_e, ui4_f, ui4_g);
  ------------------
  |  |   46|  1.37k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1397|  1.37k|    predicted_pixels[6] = FILT11(ui4_e, ui4_f);
  ------------------
  |  |   49|  1.37k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1398|  1.37k|    predicted_pixels[7] = FILT121(ui4_d, ui4_e, ui4_f);
  ------------------
  |  |   46|  1.37k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1399|  1.37k|    predicted_pixels[8] = FILT11(ui4_d, ui4_e);
  ------------------
  |  |   49|  1.37k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1400|  1.37k|    predicted_pixels[9] = FILT121(ui4_c, ui4_d, ui4_e);
  ------------------
  |  |   46|  1.37k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1401|  1.37k|    predicted_pixels[10] = FILT11(ui4_c, ui4_d);
  ------------------
  |  |   49|  1.37k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1402|  1.37k|    predicted_pixels[11] = FILT121(ui4_b, ui4_c, ui4_d);
  ------------------
  |  |   46|  1.37k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1403|  1.37k|    predicted_pixels[12] = FILT11(ui4_b, ui4_c);
  ------------------
  |  |   49|  1.37k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1404|  1.37k|    predicted_pixels[13] = FILT121(ui4_a, ui4_b, ui4_c);
  ------------------
  |  |   46|  1.37k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1405|  1.37k|    predicted_pixels[14] = FILT11(ui4_a, ui4_b);
  ------------------
  |  |   49|  1.37k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1406|  1.37k|    predicted_pixels[15] = FILT121(ui4_j, ui4_a, ui4_b);
  ------------------
  |  |   46|  1.37k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1407|  1.37k|    predicted_pixels[16] = FILT121(ui4_k, ui4_j, ui4_a);
  ------------------
  |  |   46|  1.37k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1408|  1.37k|    predicted_pixels[17] = FILT121(ui4_l, ui4_k, ui4_j);
  ------------------
  |  |   46|  1.37k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1409|  1.37k|    predicted_pixels[18] = FILT121(ui4_m, ui4_l, ui4_k);
  ------------------
  |  |   46|  1.37k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1410|  1.37k|    predicted_pixels[19] = FILT121(ui4_n, ui4_m, ui4_l);
  ------------------
  |  |   46|  1.37k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1411|  1.37k|    predicted_pixels[20] = FILT121(ui4_o, ui4_n, ui4_m);
  ------------------
  |  |   46|  1.37k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1412|  1.37k|    predicted_pixels[21] = FILT121(ui4_p, ui4_o, ui4_n);
  ------------------
  |  |   46|  1.37k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1413|       |
 1414|  1.37k|    memcpy(pu1_dst, predicted_pixels + 14, 8);
 1415|  1.37k|    memcpy(pu1_dst + dst_strd, predicted_pixels + 12, 8);
 1416|  1.37k|    memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 10, 8);
 1417|  1.37k|    memcpy(pu1_dst + 3 * dst_strd, predicted_pixels + 8, 8);
 1418|  1.37k|    memcpy(pu1_dst + 4 * dst_strd, predicted_pixels + 6, 8);
 1419|  1.37k|    memcpy(pu1_dst + 5 * dst_strd, predicted_pixels + 4, 8);
 1420|  1.37k|    memcpy(pu1_dst + 6 * dst_strd, predicted_pixels + 2, 8);
 1421|  1.37k|    memcpy(pu1_dst + 7 * dst_strd, predicted_pixels, 8);
 1422|  1.37k|}
ih264_intra_pred_luma_8x8_mode_vert_l:
 1462|  2.81k|{
 1463|  2.81k|    UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
 1464|  2.81k|    UWORD32 ui4_a, ui4_b, ui4_c, ui4_d, ui4_e, ui4_f, ui4_g, ui4_h;
 1465|  2.81k|    UWORD32 ui4_i, ui4_j, ui4_k, ui4_l, ui4_m;
 1466|  2.81k|    UWORD8 predicted_pixels[22];
 1467|       |
 1468|  2.81k|    UNUSED(src_strd);
  ------------------
  |  |   45|  2.81k|#define UNUSED(x) ((void)(x))
  ------------------
 1469|  2.81k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  2.81k|#define UNUSED(x) ((void)(x))
  ------------------
 1470|  2.81k|    pu1_top = pu1_src + BLK8x8SIZE + 1;
  ------------------
  |  |  510|  2.81k|#define BLK8x8SIZE          8
  ------------------
 1471|       |
 1472|  2.81k|    ui4_a = *pu1_top++;
 1473|  2.81k|    ui4_b = *pu1_top++;
 1474|  2.81k|    ui4_c = *pu1_top++;
 1475|  2.81k|    ui4_d = *pu1_top++;
 1476|  2.81k|    ui4_e = *pu1_top++;
 1477|  2.81k|    ui4_f = *pu1_top++;
 1478|  2.81k|    ui4_g = *pu1_top++;
 1479|  2.81k|    ui4_h = *pu1_top++;
 1480|  2.81k|    ui4_i = *pu1_top++;
 1481|  2.81k|    ui4_j = *pu1_top++;
 1482|  2.81k|    ui4_k = *pu1_top++;
 1483|  2.81k|    ui4_l = *pu1_top++;
 1484|  2.81k|    ui4_m = *pu1_top++;
 1485|       |
 1486|  2.81k|    predicted_pixels[0] = FILT11(ui4_a, ui4_b);
  ------------------
  |  |   49|  2.81k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1487|  2.81k|    predicted_pixels[1] = FILT11(ui4_b, ui4_c);
  ------------------
  |  |   49|  2.81k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1488|  2.81k|    predicted_pixels[2] = FILT11(ui4_c, ui4_d);
  ------------------
  |  |   49|  2.81k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1489|  2.81k|    predicted_pixels[3] = FILT11(ui4_d, ui4_e);
  ------------------
  |  |   49|  2.81k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1490|  2.81k|    predicted_pixels[4] = FILT11(ui4_e, ui4_f);
  ------------------
  |  |   49|  2.81k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1491|  2.81k|    predicted_pixels[5] = FILT11(ui4_f, ui4_g);
  ------------------
  |  |   49|  2.81k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1492|  2.81k|    predicted_pixels[6] = FILT11(ui4_g, ui4_h);
  ------------------
  |  |   49|  2.81k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1493|  2.81k|    predicted_pixels[7] = FILT11(ui4_h, ui4_i);
  ------------------
  |  |   49|  2.81k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1494|  2.81k|    predicted_pixels[8] = FILT11(ui4_i, ui4_j);
  ------------------
  |  |   49|  2.81k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1495|  2.81k|    predicted_pixels[9] = FILT11(ui4_j, ui4_k);
  ------------------
  |  |   49|  2.81k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1496|  2.81k|    predicted_pixels[10] = FILT11(ui4_k, ui4_l);
  ------------------
  |  |   49|  2.81k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1497|  2.81k|    predicted_pixels[11] = FILT121(ui4_a, ui4_b, ui4_c);
  ------------------
  |  |   46|  2.81k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1498|  2.81k|    predicted_pixels[12] = FILT121(ui4_b, ui4_c, ui4_d);
  ------------------
  |  |   46|  2.81k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1499|  2.81k|    predicted_pixels[13] = FILT121(ui4_c, ui4_d, ui4_e);
  ------------------
  |  |   46|  2.81k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1500|  2.81k|    predicted_pixels[14] = FILT121(ui4_d, ui4_e, ui4_f);
  ------------------
  |  |   46|  2.81k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1501|  2.81k|    predicted_pixels[15] = FILT121(ui4_e, ui4_f, ui4_g);
  ------------------
  |  |   46|  2.81k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1502|  2.81k|    predicted_pixels[16] = FILT121(ui4_f, ui4_g, ui4_h);
  ------------------
  |  |   46|  2.81k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1503|  2.81k|    predicted_pixels[17] = FILT121(ui4_g, ui4_h, ui4_i);
  ------------------
  |  |   46|  2.81k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1504|  2.81k|    predicted_pixels[18] = FILT121(ui4_h, ui4_i, ui4_j);
  ------------------
  |  |   46|  2.81k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1505|  2.81k|    predicted_pixels[19] = FILT121(ui4_i, ui4_j, ui4_k);
  ------------------
  |  |   46|  2.81k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1506|  2.81k|    predicted_pixels[20] = FILT121(ui4_j, ui4_k, ui4_l);
  ------------------
  |  |   46|  2.81k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1507|  2.81k|    predicted_pixels[21] = FILT121(ui4_k, ui4_l, ui4_m);
  ------------------
  |  |   46|  2.81k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1508|       |
 1509|  2.81k|    memcpy(pu1_dst, predicted_pixels, 8);
 1510|  2.81k|    memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 1, 8);
 1511|  2.81k|    memcpy(pu1_dst + 4 * dst_strd, predicted_pixels + 2, 8);
 1512|  2.81k|    memcpy(pu1_dst + 6 * dst_strd, predicted_pixels + 3, 8);
 1513|  2.81k|    memcpy(pu1_dst + 1 * dst_strd, predicted_pixels + 11, 8);
 1514|  2.81k|    memcpy(pu1_dst + 3 * dst_strd, predicted_pixels + 12, 8);
 1515|  2.81k|    memcpy(pu1_dst + 5 * dst_strd, predicted_pixels + 13, 8);
 1516|  2.81k|    memcpy(pu1_dst + 7 * dst_strd, predicted_pixels + 14, 8);
 1517|  2.81k|}
ih264_intra_pred_luma_8x8_mode_horz_u:
 1556|  6.18k|{
 1557|  6.18k|    UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
 1558|  6.18k|    UWORD32 ui4_j, ui4_k, ui4_l, ui4_m, ui4_n, ui4_o, ui4_p, ui4_q;
 1559|  6.18k|    UWORD8 predicted_pixels[22];
 1560|       |
 1561|  6.18k|    UNUSED(src_strd);
  ------------------
  |  |   45|  6.18k|#define UNUSED(x) ((void)(x))
  ------------------
 1562|  6.18k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  6.18k|#define UNUSED(x) ((void)(x))
  ------------------
 1563|  6.18k|    pu1_left = pu1_src + BLK8x8SIZE - 1;
  ------------------
  |  |  510|  6.18k|#define BLK8x8SIZE          8
  ------------------
 1564|       |
 1565|  6.18k|    ui4_j = *pu1_left--;
 1566|  6.18k|    ui4_k = *pu1_left--;
 1567|  6.18k|    ui4_l = *pu1_left--;
 1568|  6.18k|    ui4_m = *pu1_left--;
 1569|  6.18k|    ui4_n = *pu1_left--;
 1570|  6.18k|    ui4_o = *pu1_left--;
 1571|  6.18k|    ui4_p = *pu1_left--;
 1572|  6.18k|    ui4_q = *pu1_left;
 1573|       |
 1574|  6.18k|    pu1_left = pu1_src + BLK8x8SIZE - 1;
  ------------------
  |  |  510|  6.18k|#define BLK8x8SIZE          8
  ------------------
 1575|       |
 1576|  6.18k|    predicted_pixels[0] = FILT11(ui4_j, ui4_k);
  ------------------
  |  |   49|  6.18k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1577|  6.18k|    predicted_pixels[1] = FILT121(ui4_j, ui4_k, ui4_l);
  ------------------
  |  |   46|  6.18k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1578|  6.18k|    predicted_pixels[2] = FILT11(ui4_k, ui4_l);
  ------------------
  |  |   49|  6.18k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1579|  6.18k|    predicted_pixels[3] = FILT121(ui4_k, ui4_l, ui4_m);
  ------------------
  |  |   46|  6.18k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1580|  6.18k|    predicted_pixels[4] = FILT11(ui4_l, ui4_m);
  ------------------
  |  |   49|  6.18k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1581|  6.18k|    predicted_pixels[5] = FILT121(ui4_l, ui4_m, ui4_n);
  ------------------
  |  |   46|  6.18k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1582|  6.18k|    predicted_pixels[6] = FILT11(ui4_m, ui4_n);
  ------------------
  |  |   49|  6.18k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1583|  6.18k|    predicted_pixels[7] = FILT121(ui4_m, ui4_n, ui4_o);
  ------------------
  |  |   46|  6.18k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1584|  6.18k|    predicted_pixels[8] = FILT11(ui4_n, ui4_o);
  ------------------
  |  |   49|  6.18k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1585|  6.18k|    predicted_pixels[9] = FILT121(ui4_n, ui4_o, ui4_p);
  ------------------
  |  |   46|  6.18k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1586|  6.18k|    predicted_pixels[10] = FILT11(ui4_o, ui4_p);
  ------------------
  |  |   49|  6.18k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1587|  6.18k|    predicted_pixels[11] = FILT121(ui4_o, ui4_p, ui4_q);
  ------------------
  |  |   46|  6.18k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1588|  6.18k|    predicted_pixels[12] = FILT11(ui4_p, ui4_q);
  ------------------
  |  |   49|  6.18k|#define FILT11(a,b) ((a + b + 1) >> 1)
  ------------------
 1589|  6.18k|    predicted_pixels[13] = FILT121(ui4_p, ui4_q, ui4_q);
  ------------------
  |  |   46|  6.18k|#define FILT121(a,b,c) ((a + (b << 1) + c + 2) >> 2)
  ------------------
 1590|  6.18k|    memset(predicted_pixels+14,ui4_q,8);
 1591|       |
 1592|  6.18k|    memcpy(pu1_dst, predicted_pixels, 8);
 1593|  6.18k|    memcpy(pu1_dst + 1 * dst_strd, predicted_pixels + 2, 8);
 1594|  6.18k|    memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 4, 8);
 1595|  6.18k|    memcpy(pu1_dst + 3 * dst_strd, predicted_pixels + 6, 8);
 1596|  6.18k|    memcpy(pu1_dst + 4 * dst_strd, predicted_pixels + 8, 8);
 1597|  6.18k|    memcpy(pu1_dst + 5 * dst_strd, predicted_pixels + 10, 8);
 1598|  6.18k|    memcpy(pu1_dst + 6 * dst_strd, predicted_pixels + 12, 8);
 1599|  6.18k|    memcpy(pu1_dst + 7 * dst_strd, predicted_pixels + 14, 8);
 1600|  6.18k|}
ih264_intra_pred_luma_16x16_mode_vert:
 1639|  22.9k|{
 1640|  22.9k|    UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
 1641|  22.9k|    WORD32 rows; /* loop variables*/
 1642|       |
 1643|  22.9k|    UNUSED(src_strd);
  ------------------
  |  |   45|  22.9k|#define UNUSED(x) ((void)(x))
  ------------------
 1644|  22.9k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  22.9k|#define UNUSED(x) ((void)(x))
  ------------------
 1645|  22.9k|    pu1_top = pu1_src + MB_SIZE + 1;
  ------------------
  |  |  509|  22.9k|#define MB_SIZE             16
  ------------------
 1646|   114k|    for(rows = 0; rows < 16; rows += 4, pu1_dst += dst_strd)
  ------------------
  |  Branch (1646:19): [True: 91.8k, False: 22.9k]
  ------------------
 1647|  91.8k|    {
 1648|  91.8k|        memcpy(pu1_dst, pu1_top, 16);
 1649|  91.8k|        pu1_dst += dst_strd;
 1650|  91.8k|        memcpy(pu1_dst, pu1_top, 16);
 1651|  91.8k|        pu1_dst += dst_strd;
 1652|  91.8k|        memcpy(pu1_dst, pu1_top, 16);
 1653|  91.8k|        pu1_dst += dst_strd;
 1654|  91.8k|        memcpy(pu1_dst, pu1_top, 16);
 1655|  91.8k|    }
 1656|  22.9k|}
ih264_intra_pred_luma_16x16_mode_horz:
 1695|  10.0k|{
 1696|  10.0k|    UWORD8 *pu1_left = NULL; /* Pointer to start of top predictors */
 1697|  10.0k|    WORD32 rows;
 1698|       |
 1699|  10.0k|    UNUSED(src_strd);
  ------------------
  |  |   45|  10.0k|#define UNUSED(x) ((void)(x))
  ------------------
 1700|  10.0k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  10.0k|#define UNUSED(x) ((void)(x))
  ------------------
 1701|  10.0k|    pu1_left = pu1_src + MB_SIZE - 1;
  ------------------
  |  |  509|  10.0k|#define MB_SIZE             16
  ------------------
 1702|  50.4k|    for(rows = 0; rows < 16; rows += 4, pu1_dst += dst_strd, pu1_left --)
  ------------------
  |  Branch (1702:19): [True: 40.3k, False: 10.0k]
  ------------------
 1703|  40.3k|    {
 1704|  40.3k|        memset(pu1_dst, *pu1_left, 16); /* copy the left value to the entire row*/
 1705|  40.3k|        pu1_left --;
 1706|  40.3k|        pu1_dst += dst_strd;
 1707|  40.3k|        memset(pu1_dst, *pu1_left, 16);
 1708|  40.3k|        pu1_left --;
 1709|  40.3k|        pu1_dst += dst_strd;
 1710|  40.3k|        memset(pu1_dst, *pu1_left, 16);
 1711|  40.3k|        pu1_left --;
 1712|  40.3k|        pu1_dst += dst_strd;
 1713|  40.3k|        memset(pu1_dst, *pu1_left, 16);
 1714|  40.3k|    }
 1715|  10.0k|}
ih264_intra_pred_luma_16x16_mode_dc:
 1753|  17.5k|{
 1754|  17.5k|    WORD8 u1_useleft; /* availability of left predictors (only for DC) */
 1755|  17.5k|    UWORD8 u1_usetop; /* availability of top predictors (only for DC) */
 1756|  17.5k|    UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
 1757|  17.5k|    UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
 1758|  17.5k|    WORD32 rows; /* loop variables*/
 1759|  17.5k|    WORD32 val = 0;
 1760|       |
 1761|  17.5k|    UNUSED(src_strd);
  ------------------
  |  |   45|  17.5k|#define UNUSED(x) ((void)(x))
  ------------------
 1762|  17.5k|    u1_useleft = BOOLEAN(ngbr_avail & LEFT_MB_AVAILABLE_MASK);
  ------------------
  |  |   84|  17.5k|#define BOOLEAN(x) (!!(x))
  ------------------
 1763|  17.5k|    u1_usetop = BOOLEAN(ngbr_avail & TOP_MB_AVAILABLE_MASK);
  ------------------
  |  |   84|  17.5k|#define BOOLEAN(x) (!!(x))
  ------------------
 1764|  17.5k|    pu1_top = pu1_src + MB_SIZE + 1;
  ------------------
  |  |  509|  17.5k|#define MB_SIZE             16
  ------------------
 1765|  17.5k|    pu1_left = pu1_src + MB_SIZE - 1;
  ------------------
  |  |  509|  17.5k|#define MB_SIZE             16
  ------------------
 1766|  17.5k|    if(u1_useleft)
  ------------------
  |  Branch (1766:8): [True: 12.6k, False: 4.93k]
  ------------------
 1767|  12.6k|    {
 1768|   214k|        for(rows = 0; rows < 16; rows++)
  ------------------
  |  Branch (1768:23): [True: 202k, False: 12.6k]
  ------------------
 1769|   202k|            val += *(pu1_left - rows);
 1770|  12.6k|        val += 8;
 1771|  12.6k|    }
 1772|  17.5k|    if(u1_usetop)
  ------------------
  |  Branch (1772:8): [True: 11.7k, False: 5.85k]
  ------------------
 1773|  11.7k|    {
 1774|   199k|        for(rows = 0; rows < 16; rows++)
  ------------------
  |  Branch (1774:23): [True: 187k, False: 11.7k]
  ------------------
 1775|   187k|            val += *(pu1_top + rows);
 1776|  11.7k|        val += 8;
 1777|  11.7k|    }
 1778|       |    /* Since 8 is added if either left/top pred is there,
 1779|       |     val still being zero implies both preds are not there */
 1780|  17.5k|    val = (val) ? (val >> (3 + u1_useleft + u1_usetop)) : 128;
  ------------------
  |  Branch (1780:11): [True: 16.0k, False: 1.48k]
  ------------------
 1781|       |
 1782|  87.9k|    for(rows = 0; rows < 16; rows += 4, pu1_dst += dst_strd)
  ------------------
  |  Branch (1782:19): [True: 70.3k, False: 17.5k]
  ------------------
 1783|  70.3k|    {
 1784|  70.3k|        memset(pu1_dst, val, 16);
 1785|  70.3k|        pu1_dst += dst_strd;
 1786|  70.3k|        memset(pu1_dst, val, 16);
 1787|  70.3k|        pu1_dst += dst_strd;
 1788|  70.3k|        memset(pu1_dst, val, 16);
 1789|  70.3k|        pu1_dst += dst_strd;
 1790|  70.3k|        memset(pu1_dst, val, 16);
 1791|  70.3k|    }
 1792|  17.5k|}
ih264_intra_pred_luma_16x16_mode_plane:
 1830|  3.59k|{
 1831|       |    /*! Written with no multiplications */
 1832|  3.59k|    UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
 1833|  3.59k|    UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
 1834|  3.59k|    UWORD8 *pu1_topleft = NULL;
 1835|  3.59k|    WORD32 a, b, c, tmp;
 1836|  3.59k|    UWORD8 *pu1_tmp1, *pu1_tmp2;
 1837|  3.59k|    WORD32 shift;
 1838|       |
 1839|  3.59k|    UNUSED(src_strd);
  ------------------
  |  |   45|  3.59k|#define UNUSED(x) ((void)(x))
  ------------------
 1840|  3.59k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  3.59k|#define UNUSED(x) ((void)(x))
  ------------------
 1841|  3.59k|    pu1_top = pu1_src + MB_SIZE + 1;
  ------------------
  |  |  509|  3.59k|#define MB_SIZE             16
  ------------------
 1842|  3.59k|    pu1_left = pu1_src + MB_SIZE - 1;
  ------------------
  |  |  509|  3.59k|#define MB_SIZE             16
  ------------------
 1843|  3.59k|    pu1_topleft = pu1_src + MB_SIZE;
  ------------------
  |  |  509|  3.59k|#define MB_SIZE             16
  ------------------
 1844|       |
 1845|  3.59k|    {
 1846|  3.59k|        a = (*(pu1_top + 15) + *(pu1_left - 15)) << 4;
 1847|       |
 1848|       |        /*! Implement Sum(x*(P((x+7),-1) - P((x-7),-1))) x=1...8 */
 1849|  3.59k|        pu1_tmp1 = pu1_top + 8;
 1850|  3.59k|        pu1_tmp2 = pu1_tmp1 - 2;
 1851|       |
 1852|       |        /* Pixel diffs are only 9 bits;
 1853|       |         so sign extension allows shifts to be used even for signed */
 1854|  3.59k|        b = ((*pu1_tmp1++) - (*pu1_tmp2--)); /* x=1 */
 1855|  3.59k|        b += ((*pu1_tmp1++) - (*pu1_tmp2--)) << 1; /* x=2 */
 1856|  3.59k|        tmp = ((*pu1_tmp1++) - (*pu1_tmp2--));
 1857|  3.59k|        b += (tmp << 1) + tmp; /* x=3 */
 1858|  3.59k|        b += ((*pu1_tmp1++) - (*pu1_tmp2--)) << 2; /* x=4 */
 1859|       |
 1860|  3.59k|        tmp = ((*pu1_tmp1++) - (*pu1_tmp2--));
 1861|  3.59k|        b += (tmp << 2) + tmp; /* x=5 */
 1862|  3.59k|        tmp = ((*pu1_tmp1++) - (*pu1_tmp2--));
 1863|  3.59k|        b += (tmp << 2) + (tmp << 1); /* x=6 */
 1864|  3.59k|        tmp = ((*pu1_tmp1++) - (*pu1_tmp2--));
 1865|  3.59k|        b += (tmp << 3) - tmp; /* x=7 */
 1866|  3.59k|        b += ((*pu1_tmp1) - (*pu1_topleft)) << 3; /* x=8 */
 1867|       |
 1868|  3.59k|        b = ((b << 2) + b + 32) >> 6; /*! (5*H + 32)>>6 */
 1869|       |
 1870|       |        /*! Implement Sum(y*(P(-1,(y+7)) - P(-1,(y-7)))) y=1...8 */
 1871|  3.59k|        pu1_tmp1 = pu1_left - 8;
 1872|  3.59k|        pu1_tmp2 = pu1_tmp1 + 2;
 1873|       |
 1874|  3.59k|        c = ((*pu1_tmp1) - (*pu1_tmp2)); /* y=1 */
 1875|  3.59k|        pu1_tmp1--;
 1876|  3.59k|        pu1_tmp2++;
 1877|  3.59k|        c += ((*pu1_tmp1) - (*pu1_tmp2)) << 1; /* y=2 */
 1878|  3.59k|        pu1_tmp1--;
 1879|  3.59k|        pu1_tmp2++;
 1880|  3.59k|        tmp = ((*pu1_tmp1) - (*pu1_tmp2));
 1881|  3.59k|        c += (tmp << 1) + tmp; /* y=3 */
 1882|  3.59k|        pu1_tmp1--;
 1883|  3.59k|        pu1_tmp2++;
 1884|  3.59k|        c += ((*pu1_tmp1) - (*pu1_tmp2)) << 2; /* y=4 */
 1885|  3.59k|        pu1_tmp1--;
 1886|  3.59k|        pu1_tmp2++;
 1887|       |
 1888|  3.59k|        tmp = ((*pu1_tmp1) - (*pu1_tmp2));
 1889|  3.59k|        c += (tmp << 2) + tmp; /* y=5 */
 1890|  3.59k|        pu1_tmp1--;
 1891|  3.59k|        pu1_tmp2++;
 1892|  3.59k|        tmp = ((*pu1_tmp1) - (*pu1_tmp2));
 1893|  3.59k|        c += (tmp << 2) + (tmp << 1); /* y=6 */
 1894|  3.59k|        pu1_tmp1--;
 1895|  3.59k|        pu1_tmp2++;
 1896|  3.59k|        tmp = ((*pu1_tmp1) - (*pu1_tmp2));
 1897|  3.59k|        c += (tmp << 3) - tmp; /* y=7 */
 1898|  3.59k|        pu1_tmp1--; //pu1_tmp2 ++;
 1899|       |        /* Modified to get (-1,-1) location as *(pu1_top - 1) instead of (pu1_left - ui4_stride) */
 1900|       |        //c += ((*pu1_tmp1) - (*(pu1_top - 1)))<<3;      /* y=8 */
 1901|  3.59k|        c += ((*pu1_tmp1) - (*pu1_topleft)) << 3; /* y=8 */
 1902|       |
 1903|  3.59k|        c = ((c << 2) + c + 32) >> 6; /*! (5*V + 32)>>32 */
 1904|  3.59k|        shift = 3;
 1905|  3.59k|    }
 1906|       |
 1907|       |    /*! Now from the plane parameters a, b, and c,
 1908|       |     compute the fitted plane values over the block */
 1909|  3.59k|    {
 1910|  3.59k|        WORD32 tmp1, tmpx, tmpx_init, j, i;
 1911|       |
 1912|  3.59k|        tmpx_init = -(b << shift); /* -8b */
 1913|  3.59k|        tmp = a - (c << shift) + 16; /* a-((4or8)*c)+16 */
 1914|  61.1k|        for(i = 0; i < 16; i++)
  ------------------
  |  Branch (1914:20): [True: 57.5k, False: 3.59k]
  ------------------
 1915|  57.5k|        {
 1916|  57.5k|            tmp += c; /*increment every time by c to get c*(y-7or3)*/
 1917|  57.5k|            tmpx = tmpx_init; /* Init to -8b */
 1918|   978k|            for(j = 0; j < 16; j++)
  ------------------
  |  Branch (1918:24): [True: 920k, False: 57.5k]
  ------------------
 1919|   920k|            {
 1920|   920k|                tmpx += b; /* increment every time by b to get b*(x-7or3) */
 1921|   920k|                tmp1 = (tmp + tmpx) >> 5;
 1922|   920k|                *pu1_dst++ = CLIP_U8(tmp1);
  ------------------
  |  |   58|   920k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   920k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 14.6k, False: 906k]
  |  |  |  |  |  Branch (77:54): [True: 8.74k, False: 897k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 1923|   920k|            }
 1924|  57.5k|            pu1_dst += (dst_strd - 16);
 1925|  57.5k|        }
 1926|  3.59k|    }
 1927|  3.59k|}

ih264_pad_top:
   94|   219k|{
   95|   219k|    WORD32 row;
   96|       |
   97|  3.28M|    for(row = 1; row <= pad_size; row++)
  ------------------
  |  Branch (97:18): [True: 3.06M, False: 219k]
  ------------------
   98|  3.06M|    {
   99|  3.06M|        memcpy(pu1_src - row * src_strd, pu1_src, wd);
  100|  3.06M|    }
  101|   219k|}
ih264_pad_bottom:
  133|   219k|{
  134|   219k|    WORD32 row;
  135|       |
  136|  3.28M|    for(row = 1; row <= pad_size; row++)
  ------------------
  |  Branch (136:18): [True: 3.06M, False: 219k]
  ------------------
  137|  3.06M|    {
  138|  3.06M|        memcpy(pu1_src + (row - 1) * src_strd, pu1_src - 1 * src_strd, wd);
  139|  3.06M|    }
  140|   219k|}
ih264_pad_left_luma:
  172|  38.9k|{
  173|  38.9k|    WORD32 row;
  174|       |
  175|  9.74M|    for(row = 0; row < ht; row++)
  ------------------
  |  Branch (175:18): [True: 9.70M, False: 38.9k]
  ------------------
  176|  9.70M|    {
  177|  9.70M|        memset(pu1_src - pad_size, *pu1_src, pad_size);
  178|  9.70M|        pu1_src += src_strd;
  179|  9.70M|    }
  180|  38.9k|}
ih264_pad_left_chroma:
  212|  38.9k|{
  213|  38.9k|    WORD32 row, col;
  214|  38.9k|    UWORD16 u2_uv_val;
  215|  38.9k|    UWORD16 *pu2_src = (UWORD16 *)pu1_src;
  216|       |
  217|  38.9k|    src_strd >>= 1;
  218|  38.9k|    pad_size >>= 1;
  219|       |
  220|  4.89M|    for(row = 0; row < ht; row++)
  ------------------
  |  Branch (220:18): [True: 4.85M, False: 38.9k]
  ------------------
  221|  4.85M|    {
  222|  4.85M|        u2_uv_val = pu2_src[0];
  223|  82.5M|        for(col = -pad_size; col < 0; col++)
  ------------------
  |  Branch (223:30): [True: 77.6M, False: 4.85M]
  ------------------
  224|  77.6M|        {
  225|  77.6M|            pu2_src[col] = u2_uv_val;
  226|  77.6M|        }
  227|  4.85M|        pu2_src += src_strd;
  228|  4.85M|    }
  229|  38.9k|}
ih264_pad_right_luma:
  261|  38.9k|{
  262|  38.9k|    WORD32 row;
  263|       |
  264|  9.74M|    for(row = 0; row < ht; row++)
  ------------------
  |  Branch (264:18): [True: 9.70M, False: 38.9k]
  ------------------
  265|  9.70M|    {
  266|  9.70M|        memset(pu1_src, *(pu1_src -1), pad_size);
  267|  9.70M|        pu1_src += src_strd;
  268|  9.70M|    }
  269|  38.9k|}
ih264_pad_right_chroma:
  301|  38.9k|{
  302|  38.9k|    WORD32 row, col;
  303|  38.9k|    UWORD16 u2_uv_val;
  304|  38.9k|    UWORD16 *pu2_src = (UWORD16 *)pu1_src;
  305|       |
  306|  38.9k|    src_strd >>= 1;
  307|  38.9k|    pad_size >>= 1;
  308|       |
  309|  4.89M|    for(row = 0; row < ht; row++)
  ------------------
  |  Branch (309:18): [True: 4.85M, False: 38.9k]
  ------------------
  310|  4.85M|    {
  311|  4.85M|        u2_uv_val = pu2_src[-1];
  312|  82.5M|        for(col = 0; col < pad_size; col++)
  ------------------
  |  Branch (312:22): [True: 77.6M, False: 4.85M]
  ------------------
  313|  77.6M|        {
  314|  77.6M|            pu2_src[col] = u2_uv_val;
  315|  77.6M|        }
  316|  4.85M|        pu2_src += src_strd;
  317|  4.85M|    }
  318|  38.9k|}

ih264_default_weighted_pred_luma:
  110|  66.9k|{
  111|  66.9k|    WORD32 i, j;
  112|       |
  113|  66.9k|    src_strd1 -= wd;
  114|  66.9k|    src_strd2 -= wd;
  115|  66.9k|    dst_strd -= wd;
  116|       |
  117|  1.07M|    for(i = 0; i < ht; i++)
  ------------------
  |  Branch (117:16): [True: 1.01M, False: 66.9k]
  ------------------
  118|  1.01M|    {
  119|  16.7M|        for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++)
  ------------------
  |  Branch (119:20): [True: 15.7M, False: 1.01M]
  ------------------
  120|  15.7M|            *pu1_dst = (*pu1_src1 + *pu1_src2 + 1) >> 1;
  121|       |
  122|  1.01M|        pu1_src1 += src_strd1;
  123|  1.01M|        pu1_src2 += src_strd2;
  124|  1.01M|        pu1_dst += dst_strd;
  125|  1.01M|    }
  126|  66.9k|}
ih264_default_weighted_pred_chroma:
  178|  66.9k|{
  179|  66.9k|    WORD32 i, j;
  180|       |
  181|  66.9k|    wd = wd << 1;
  182|       |
  183|  66.9k|    src_strd1 -= wd;
  184|  66.9k|    src_strd2 -= wd;
  185|  66.9k|    dst_strd -= wd;
  186|       |
  187|   572k|    for(i = 0; i < ht; i++)
  ------------------
  |  Branch (187:16): [True: 505k, False: 66.9k]
  ------------------
  188|   505k|    {
  189|  8.38M|        for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++)
  ------------------
  |  Branch (189:20): [True: 7.87M, False: 505k]
  ------------------
  190|  7.87M|            *pu1_dst = (*pu1_src1 + *pu1_src2 + 1) >> 1;
  191|       |
  192|   505k|        pu1_src1 += src_strd1;
  193|   505k|        pu1_src2 += src_strd2;
  194|   505k|        pu1_dst += dst_strd;
  195|   505k|    }
  196|  66.9k|}
ih264_weighted_pred_luma:
  252|  1.28M|{
  253|  1.28M|    WORD32 i, j;
  254|       |
  255|  1.28M|    wt = (WORD16)(wt & 0xffff);
  256|  1.28M|    ofst = (WORD8)(ofst & 0xff);
  257|       |
  258|  1.28M|    src_strd -= wd;
  259|  1.28M|    dst_strd -= wd;
  260|       |
  261|  1.28M|    if(log_wd >= 1)
  ------------------
  |  Branch (261:8): [True: 459k, False: 822k]
  ------------------
  262|   459k|    {
  263|   459k|        WORD32 i_ofst = (1 << (log_wd - 1)) + (ofst << log_wd);
  264|  7.52M|        for(i = 0; i < ht; i++)
  ------------------
  |  Branch (264:20): [True: 7.06M, False: 459k]
  ------------------
  265|  7.06M|        {
  266|   117M|            for(j = 0; j < wd; j++, pu1_src++, pu1_dst++)
  ------------------
  |  Branch (266:24): [True: 110M, False: 7.06M]
  ------------------
  267|   110M|                *pu1_dst = CLIP_U8((wt * (*pu1_src) + i_ofst) >> log_wd);
  ------------------
  |  |   58|   110M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   110M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 27.1M, False: 83.3M]
  |  |  |  |  |  Branch (77:54): [True: 98.0k, False: 83.2M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  268|       |
  269|  7.06M|            pu1_src += src_strd;
  270|  7.06M|            pu1_dst += dst_strd;
  271|  7.06M|        }
  272|   459k|    }
  273|   822k|    else
  274|   822k|    {
  275|  13.6M|        for(i = 0; i < ht; i++)
  ------------------
  |  Branch (275:20): [True: 12.8M, False: 822k]
  ------------------
  276|  12.8M|        {
  277|   216M|            for(j = 0; j < wd; j++, pu1_src++, pu1_dst++)
  ------------------
  |  Branch (277:24): [True: 203M, False: 12.8M]
  ------------------
  278|   203M|                *pu1_dst = CLIP_U8(wt * (*pu1_src) + ofst);
  ------------------
  |  |   58|   203M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   203M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 9.38M, False: 193M]
  |  |  |  |  |  Branch (77:54): [True: 87.3k, False: 193M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  279|       |
  280|  12.8M|            pu1_src += src_strd;
  281|  12.8M|            pu1_dst += dst_strd;
  282|  12.8M|        }
  283|   822k|    }
  284|  1.28M|}
ih264_weighted_pred_chroma:
  340|  1.28M|{
  341|  1.28M|    WORD32 i, j;
  342|  1.28M|    WORD32 wt_u, wt_v;
  343|  1.28M|    WORD32 ofst_u, ofst_v;
  344|       |
  345|  1.28M|    wt_u = (WORD16)(wt & 0xffff);
  346|  1.28M|    wt_v = (WORD16)(wt >> 16);
  347|       |
  348|  1.28M|    ofst_u = (WORD8)(ofst & 0xff);
  349|  1.28M|    ofst_v = (WORD8)(ofst >> 8);
  350|       |
  351|  1.28M|    src_strd -= wd << 1;
  352|  1.28M|    dst_strd -= wd << 1;
  353|       |
  354|  1.28M|    if(log_wd >= 1)
  ------------------
  |  Branch (354:8): [True: 409k, False: 872k]
  ------------------
  355|   409k|    {
  356|   409k|        ofst_u = (1 << (log_wd - 1)) + (ofst_u << log_wd);
  357|   409k|        ofst_v = (1 << (log_wd - 1)) + (ofst_v << log_wd);
  358|       |
  359|  3.60M|        for(i = 0; i < ht; i++)
  ------------------
  |  Branch (359:20): [True: 3.19M, False: 409k]
  ------------------
  360|  3.19M|        {
  361|  28.3M|            for(j = 0; j < wd; j++, pu1_src++, pu1_dst++)
  ------------------
  |  Branch (361:24): [True: 25.1M, False: 3.19M]
  ------------------
  362|  25.1M|            {
  363|  25.1M|                *pu1_dst = CLIP_U8((wt_u * (*pu1_src) + ofst_u) >> log_wd);
  ------------------
  |  |   58|  25.1M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  25.1M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 1.14M, False: 23.9M]
  |  |  |  |  |  Branch (77:54): [True: 701k, False: 23.2M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  364|  25.1M|                pu1_src++;
  365|  25.1M|                pu1_dst++;
  366|  25.1M|                *pu1_dst = CLIP_U8((wt_v * (*pu1_src) + ofst_v) >> log_wd);
  ------------------
  |  |   58|  25.1M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  25.1M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 1.39M, False: 23.7M]
  |  |  |  |  |  Branch (77:54): [True: 260k, False: 23.4M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  367|  25.1M|            }
  368|  3.19M|            pu1_src += src_strd;
  369|  3.19M|            pu1_dst += dst_strd;
  370|  3.19M|        }
  371|   409k|    }
  372|   872k|    else
  373|   872k|    {
  374|  7.62M|        for(i = 0; i < ht; i++)
  ------------------
  |  Branch (374:20): [True: 6.75M, False: 872k]
  ------------------
  375|  6.75M|        {
  376|  60.0M|            for(j = 0; j < wd; j++, pu1_src++, pu1_dst++)
  ------------------
  |  Branch (376:24): [True: 53.2M, False: 6.75M]
  ------------------
  377|  53.2M|            {
  378|  53.2M|                *pu1_dst = CLIP_U8(wt_u * (*pu1_src) + ofst_u);
  ------------------
  |  |   58|  53.2M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  53.2M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 2.08M, False: 51.2M]
  |  |  |  |  |  Branch (77:54): [True: 170k, False: 51.0M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  379|  53.2M|                pu1_src++;
  380|  53.2M|                pu1_dst++;
  381|  53.2M|                *pu1_dst = CLIP_U8(wt_v * (*pu1_src) + ofst_v);
  ------------------
  |  |   58|  53.2M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  53.2M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 565k, False: 52.7M]
  |  |  |  |  |  Branch (77:54): [True: 458k, False: 52.2M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  382|  53.2M|            }
  383|  6.75M|            pu1_src += src_strd;
  384|  6.75M|            pu1_dst += dst_strd;
  385|  6.75M|        }
  386|   872k|    }
  387|  1.28M|}
ih264_weighted_bi_pred_luma:
  460|   312k|{
  461|   312k|    WORD32 i, j;
  462|   312k|    WORD32 shft, ofst;
  463|       |
  464|   312k|    ofst1 = (WORD8)(ofst1 & 0xff);
  465|   312k|    ofst2 = (WORD8)(ofst2 & 0xff);
  466|   312k|    wt1 = (WORD16)(wt1 & 0xffff);
  467|   312k|    wt2 = (WORD16)(wt2 & 0xffff);
  468|   312k|    ofst = (ofst1 + ofst2 + 1) >> 1;
  469|       |
  470|   312k|    shft = log_wd + 1;
  471|   312k|    ofst = (1 << log_wd) + (ofst << shft);
  472|       |
  473|   312k|    src_strd1 -= wd;
  474|   312k|    src_strd2 -= wd;
  475|   312k|    dst_strd -= wd;
  476|       |
  477|  5.22M|    for(i = 0; i < ht; i++)
  ------------------
  |  Branch (477:16): [True: 4.91M, False: 312k]
  ------------------
  478|  4.91M|    {
  479|  82.4M|        for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++)
  ------------------
  |  Branch (479:20): [True: 77.5M, False: 4.91M]
  ------------------
  480|  77.5M|            *pu1_dst = CLIP_U8((wt1 * (*pu1_src1) + wt2 * (*pu1_src2) + ofst) >> shft);
  ------------------
  |  |   58|  77.5M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  77.5M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 12.5M, False: 65.0M]
  |  |  |  |  |  Branch (77:54): [True: 1.00M, False: 64.0M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  481|       |
  482|  4.91M|        pu1_src1 += src_strd1;
  483|  4.91M|        pu1_src2 += src_strd2;
  484|  4.91M|        pu1_dst += dst_strd;
  485|  4.91M|    }
  486|   312k|}
ih264_weighted_bi_pred_chroma:
  559|   312k|{
  560|   312k|    WORD32 i, j;
  561|   312k|    WORD32 wt1_u, wt1_v, wt2_u, wt2_v;
  562|   312k|    WORD32 ofst1_u, ofst1_v, ofst2_u, ofst2_v;
  563|   312k|    WORD32 ofst_u, ofst_v;
  564|   312k|    WORD32 shft;
  565|       |
  566|   312k|    ofst1_u = (WORD8)(ofst1 & 0xff);
  567|   312k|    ofst1_v = (WORD8)(ofst1 >> 8);
  568|   312k|    ofst2_u = (WORD8)(ofst2 & 0xff);
  569|   312k|    ofst2_v = (WORD8)(ofst2 >> 8);
  570|   312k|    wt1_u = (WORD16)(wt1 & 0xffff);
  571|   312k|    wt1_v = (WORD16)(wt1 >> 16);
  572|   312k|    wt2_u = (WORD16)(wt2 & 0xffff);
  573|   312k|    wt2_v = (WORD16)(wt2 >> 16);
  574|   312k|    ofst_u = (ofst1_u + ofst2_u + 1) >> 1;
  575|   312k|    ofst_v = (ofst1_v + ofst2_v + 1) >> 1;
  576|       |
  577|   312k|    src_strd1 -= wd << 1;
  578|   312k|    src_strd2 -= wd << 1;
  579|   312k|    dst_strd -= wd << 1;
  580|       |
  581|   312k|    shft = log_wd + 1;
  582|   312k|    ofst_u = (1 << log_wd) + (ofst_u << shft);
  583|   312k|    ofst_v = (1 << log_wd) + (ofst_v << shft);
  584|       |
  585|  2.76M|    for(i = 0; i < ht; i++)
  ------------------
  |  Branch (585:16): [True: 2.45M, False: 312k]
  ------------------
  586|  2.45M|    {
  587|  21.8M|        for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++)
  ------------------
  |  Branch (587:20): [True: 19.3M, False: 2.45M]
  ------------------
  588|  19.3M|        {
  589|  19.3M|            *pu1_dst = CLIP_U8((wt1_u * (*pu1_src1) + wt2_u * (*pu1_src2) + ofst_u) >> shft);
  ------------------
  |  |   58|  19.3M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  19.3M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 799k, False: 18.5M]
  |  |  |  |  |  Branch (77:54): [True: 660k, False: 17.9M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  590|  19.3M|            pu1_src1++;
  591|  19.3M|            pu1_src2++;
  592|  19.3M|            pu1_dst++;
  593|  19.3M|            *pu1_dst = CLIP_U8((wt1_v * (*pu1_src1) + wt2_v * (*pu1_src2) + ofst_v) >> shft);
  ------------------
  |  |   58|  19.3M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  19.3M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 885k, False: 18.5M]
  |  |  |  |  |  Branch (77:54): [True: 170k, False: 18.3M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  594|  19.3M|        }
  595|  2.45M|        pu1_src1 += src_strd1;
  596|  2.45M|        pu1_src2 += src_strd2;
  597|  2.45M|        pu1_dst += dst_strd;
  598|  2.45M|    }
  599|   312k|}

ithread_get_handle_size:
   67|   135k|{
   68|   135k|    return sizeof(pthread_t);
   69|   135k|}
ithread_get_mutex_lock_size:
   72|   351k|{
   73|   351k|    return sizeof(pthread_mutex_t);
   74|   351k|}
ithread_create:
   78|  50.8k|{
   79|  50.8k|    UNUSED(attribute);
  ------------------
  |  |   47|  50.8k|#define UNUSED(x) ((void)(x))
  ------------------
   80|       |    return pthread_create((pthread_t *)thread_handle, NULL,(void *(*)(void *)) strt, argument);
   81|  50.8k|}
ithread_join:
   84|  50.8k|{
   85|  50.8k|    pthread_t *pthread_handle   = (pthread_t *)thread_handle;
   86|  50.8k|    UNUSED(val_ptr);
  ------------------
  |  |   47|  50.8k|#define UNUSED(x) ((void)(x))
  ------------------
   87|       |    return pthread_join(*pthread_handle, NULL);
   88|  50.8k|}
ithread_mutex_init:
   95|  48.8k|{
   96|       |    return pthread_mutex_init((pthread_mutex_t *) mutex, NULL);
   97|  48.8k|}
ithread_mutex_destroy:
  100|  48.2k|{
  101|  48.2k|    return pthread_mutex_destroy((pthread_mutex_t *) mutex);
  102|  48.2k|}
ithread_mutex_lock:
  105|  1.51M|{
  106|  1.51M|    return pthread_mutex_lock((pthread_mutex_t *)mutex);
  107|  1.51M|}
ithread_mutex_unlock:
  110|  1.51M|{
  111|  1.51M|    return pthread_mutex_unlock((pthread_mutex_t *)mutex);
  112|  1.51M|}
ithread_yield:
  115|   402k|{
  116|   402k|    sched_yield();
  117|   402k|}
ithread_set_name:
  163|   774k|{
  164|       |
  165|   774k|#ifndef WIN32
  166|   774k|#ifndef QNX
  167|   774k|#ifndef IOS
  168|   774k|    UNUSED(pc_thread_name);
  ------------------
  |  |   47|   774k|#define UNUSED(x) ((void)(x))
  ------------------
  169|       |//prctl(PR_SET_NAME, (unsigned long)pu1_thread_name, 0, 0, 0);
  170|   774k|#endif
  171|   774k|#endif
  172|   774k|#endif
  173|       |
  174|   774k|}

ih264_intra_pred_chroma_8x8_mode_horz_ssse3:
  101|  10.9k|{
  102|       |
  103|  10.9k|    UWORD8 *pu1_left; /* Pointer to start of top predictors */
  104|  10.9k|    WORD32 dst_strd2;
  105|       |
  106|  10.9k|    __m128i row1_16x8b, row2_16x8b;
  107|       |
  108|  10.9k|    UNUSED(src_strd);
  ------------------
  |  |   45|  10.9k|#define UNUSED(x) ((void)(x))
  ------------------
  109|  10.9k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  10.9k|#define UNUSED(x) ((void)(x))
  ------------------
  110|       |
  111|  10.9k|    pu1_left = pu1_src + 2 * BLK8x8SIZE - 2;
  ------------------
  |  |  510|  10.9k|#define BLK8x8SIZE          8
  ------------------
  112|       |
  113|       |
  114|  10.9k|    dst_strd2 = dst_strd << 1;
  115|  10.9k|    row1_16x8b = _mm_set1_epi16(*((WORD16 *)(pu1_left)));
  116|  10.9k|    row2_16x8b = _mm_set1_epi16(*((WORD16 *)(pu1_left - 2)));
  117|  10.9k|    _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b);
  118|  10.9k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b);
  119|       |
  120|  10.9k|    pu1_dst += dst_strd2;
  121|  10.9k|    row1_16x8b = _mm_set1_epi16(*((WORD16 *)(pu1_left - 4)));
  122|  10.9k|    row2_16x8b = _mm_set1_epi16(*((WORD16 *)(pu1_left - 6)));
  123|  10.9k|    _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b);
  124|  10.9k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b);
  125|       |
  126|  10.9k|    pu1_dst += dst_strd2;
  127|  10.9k|    row1_16x8b = _mm_set1_epi16(*((WORD16 *)(pu1_left - 8)));
  128|  10.9k|    row2_16x8b = _mm_set1_epi16(*((WORD16 *)(pu1_left - 10)));
  129|  10.9k|    _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b);
  130|  10.9k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b);
  131|       |
  132|  10.9k|    pu1_dst += dst_strd2;
  133|  10.9k|    row1_16x8b = _mm_set1_epi16(*((WORD16 *)(pu1_left - 12)));
  134|  10.9k|    row2_16x8b = _mm_set1_epi16(*((WORD16 *)(pu1_left - 14)));
  135|  10.9k|    _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b);
  136|  10.9k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b);
  137|  10.9k|}
ih264_intra_pred_chroma_8x8_mode_vert_ssse3:
  177|  7.26k|{
  178|  7.26k|    UWORD8 *pu1_top; /* Pointer to start of top predictors */
  179|  7.26k|    WORD32 dst_strd2;
  180|       |
  181|  7.26k|    __m128i top_16x8b;
  182|       |
  183|  7.26k|    UNUSED(src_strd);
  ------------------
  |  |   45|  7.26k|#define UNUSED(x) ((void)(x))
  ------------------
  184|  7.26k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  7.26k|#define UNUSED(x) ((void)(x))
  ------------------
  185|       |
  186|  7.26k|    pu1_top = pu1_src + 2 * BLK8x8SIZE + 2;
  ------------------
  |  |  510|  7.26k|#define BLK8x8SIZE          8
  ------------------
  187|       |
  188|  7.26k|    top_16x8b = _mm_loadu_si128((__m128i *)pu1_top);
  189|       |
  190|  7.26k|    dst_strd2 = dst_strd << 1;
  191|  7.26k|    _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
  192|  7.26k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b);
  193|       |
  194|  7.26k|    pu1_dst += dst_strd2;
  195|  7.26k|    _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
  196|  7.26k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b);
  197|       |
  198|  7.26k|    pu1_dst += dst_strd2;
  199|  7.26k|    _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
  200|  7.26k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b);
  201|       |
  202|  7.26k|    pu1_dst += dst_strd2;
  203|  7.26k|    _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
  204|  7.26k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b);
  205|  7.26k|}
ih264_intra_pred_chroma_8x8_mode_plane_ssse3:
  245|  1.77k|{
  246|  1.77k|    UWORD8 *pu1_left, *pu1_top;
  247|  1.77k|    WORD32 a_u, a_v, b_u, b_v, c_u, c_v;
  248|       |
  249|  1.77k|    __m128i mul_8x16b, shuffle_8x16b;
  250|       |
  251|  1.77k|    UNUSED(src_strd);
  ------------------
  |  |   45|  1.77k|#define UNUSED(x) ((void)(x))
  ------------------
  252|  1.77k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  1.77k|#define UNUSED(x) ((void)(x))
  ------------------
  253|       |
  254|  1.77k|    pu1_top = pu1_src + MB_SIZE + 2;
  ------------------
  |  |  509|  1.77k|#define MB_SIZE             16
  ------------------
  255|  1.77k|    pu1_left = pu1_src + MB_SIZE - 2;
  ------------------
  |  |  509|  1.77k|#define MB_SIZE             16
  ------------------
  256|       |
  257|  1.77k|    mul_8x16b = _mm_setr_epi16(1, 2, 3, 4, 1, 2, 3, 4);
  258|  1.77k|    shuffle_8x16b = _mm_setr_epi16(0xff00, 0xff02, 0xff04, 0xff06,
  259|  1.77k|                                   0xff01, 0xff03, 0xff05, 0xff07);
  260|       |
  261|       |    //calculating a, b and c
  262|  1.77k|    {
  263|  1.77k|        WORD32 h_u, h_v, v_u, v_v;
  264|       |
  265|  1.77k|        __m128i h_val1_16x8b, h_val2_16x8b;
  266|  1.77k|        __m128i h_val1_8x16b, h_val2_8x16b, h_val_4x32b;
  267|  1.77k|        __m128i v_val1_16x8b, v_val2_16x8b;
  268|  1.77k|        __m128i v_val1_8x16b, v_val2_8x16b, v_val_4x32b;
  269|  1.77k|        __m128i hv_val_4x32b;
  270|       |
  271|  1.77k|        h_val1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_top + 8));
  272|  1.77k|        h_val2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_top - 2));
  273|  1.77k|        v_val1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 14));
  274|  1.77k|        v_val2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 4));
  275|       |
  276|       |        // reversing the order
  277|  1.77k|        h_val2_16x8b = _mm_shufflelo_epi16(h_val2_16x8b, 0x1b);
  278|  1.77k|        v_val1_16x8b = _mm_shufflelo_epi16(v_val1_16x8b, 0x1b);
  279|       |
  280|       |        // separating u and v and 8-bit to 16-bit conversion
  281|  1.77k|        h_val1_8x16b = _mm_shuffle_epi8(h_val1_16x8b, shuffle_8x16b);
  282|  1.77k|        h_val2_8x16b = _mm_shuffle_epi8(h_val2_16x8b, shuffle_8x16b);
  283|  1.77k|        v_val1_8x16b = _mm_shuffle_epi8(v_val1_16x8b, shuffle_8x16b);
  284|  1.77k|        v_val2_8x16b = _mm_shuffle_epi8(v_val2_16x8b, shuffle_8x16b);
  285|       |
  286|  1.77k|        h_val1_8x16b = _mm_sub_epi16(h_val1_8x16b, h_val2_8x16b);
  287|  1.77k|        v_val1_8x16b = _mm_sub_epi16(v_val1_8x16b, v_val2_8x16b);
  288|       |
  289|  1.77k|        h_val_4x32b = _mm_madd_epi16(mul_8x16b, h_val1_8x16b);
  290|  1.77k|        v_val_4x32b = _mm_madd_epi16(mul_8x16b, v_val1_8x16b);
  291|       |
  292|  1.77k|        hv_val_4x32b = _mm_hadd_epi32(h_val_4x32b, v_val_4x32b);
  293|       |
  294|  1.77k|        a_u = (pu1_left[7 * (-2)] + pu1_top[14]) << 4;
  295|  1.77k|        a_v = (pu1_left[7 * (-2) + 1] + pu1_top[15]) << 4;
  296|       |
  297|  1.77k|        h_u = _mm_extract_epi16(hv_val_4x32b, 0);
  298|  1.77k|        h_v = _mm_extract_epi16(hv_val_4x32b, 2);
  299|  1.77k|        v_u = _mm_extract_epi16(hv_val_4x32b, 4);
  300|  1.77k|        v_v = _mm_extract_epi16(hv_val_4x32b, 6);
  301|       |
  302|  1.77k|        h_u = (h_u << 16) >> 15; // sign-extension and multiplication by 2
  303|  1.77k|        h_v = (h_v << 16) >> 15;
  304|  1.77k|        v_u = (v_u << 16) >> 15;
  305|  1.77k|        v_v = (v_v << 16) >> 15;
  306|       |
  307|  1.77k|        b_u = ((h_u << 4) + h_u + 32) >> 6;
  308|  1.77k|        b_v = ((h_v << 4) + h_v + 32) >> 6;
  309|  1.77k|        c_u = ((v_u << 4) + v_u + 32) >> 6;
  310|  1.77k|        c_v = ((v_v << 4) + v_v + 32) >> 6;
  311|  1.77k|    }
  312|       |    //using a, b and c to compute the fitted plane values
  313|  1.77k|    {
  314|  1.77k|        __m128i const_8x16b, c2_8x16b;
  315|  1.77k|        __m128i res1_l_8x16b, res1_h_8x16b;
  316|  1.77k|        __m128i res2_l_8x16b, res2_h_8x16b;
  317|  1.77k|        __m128i res1_sh_l_8x16b, res1_sh_h_8x16b, res1_16x8b;
  318|  1.77k|        __m128i res2_sh_l_8x16b, res2_sh_h_8x16b, res2_16x8b;
  319|       |
  320|  1.77k|        WORD32 b_u2, b_v2, b_u3, b_v3;
  321|  1.77k|        WORD32 const_u, const_v;
  322|  1.77k|        WORD32 dst_strd2;
  323|       |
  324|  1.77k|        const_u = a_u - (c_u << 1) - c_u + 16;
  325|  1.77k|        const_v = a_v - (c_v << 1) - c_v + 16;
  326|       |
  327|  1.77k|        b_u2 = b_u << 1;
  328|  1.77k|        b_v2 = b_v << 1;
  329|  1.77k|        b_u3 = b_u + b_u2;
  330|  1.77k|        b_v3 = b_v + b_v2;
  331|       |
  332|  1.77k|        const_8x16b = _mm_setr_epi16(const_u, const_v, const_u, const_v, const_u, const_v, const_u, const_v);
  333|  1.77k|        res1_l_8x16b = _mm_setr_epi16(-b_u3, -b_v3, -b_u2, -b_v2, -b_u, -b_v, 0, 0);
  334|       |        //contains {-b*3, -b*2, -b*1, b*0}
  335|  1.77k|        res1_h_8x16b = _mm_setr_epi16(b_u, b_v, b_u2, b_v2, b_u3, b_v3, b_u << 2, b_v << 2);
  336|       |        //contains {b*1, b*2, b*3, b*4}
  337|  1.77k|        c2_8x16b = _mm_setr_epi16(c_u, c_v, c_u, c_v, c_u, c_v, c_u, c_v);
  338|       |
  339|       |        // rows 1, 2
  340|  1.77k|        res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, const_8x16b);
  341|  1.77k|        res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, const_8x16b);
  342|  1.77k|        res2_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b);
  343|  1.77k|        res2_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b);
  344|       |
  345|  1.77k|        res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
  346|  1.77k|        res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
  347|  1.77k|        res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
  348|  1.77k|        res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
  349|       |
  350|  1.77k|        dst_strd2 = dst_strd << 1;
  351|  1.77k|        c2_8x16b = _mm_slli_epi16(c2_8x16b, 1);
  352|       |
  353|  1.77k|        res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
  354|  1.77k|        res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
  355|       |
  356|  1.77k|        _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
  357|  1.77k|        _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
  358|       |
  359|       |        // rows 3, 4
  360|  1.77k|        res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b);
  361|  1.77k|        res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b);
  362|  1.77k|        res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b);
  363|  1.77k|        res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b);
  364|       |
  365|  1.77k|        res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
  366|  1.77k|        res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
  367|  1.77k|        res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
  368|  1.77k|        res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
  369|       |
  370|  1.77k|        pu1_dst += dst_strd2;
  371|       |
  372|  1.77k|        res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
  373|  1.77k|        res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
  374|       |
  375|  1.77k|        _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
  376|  1.77k|        _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
  377|       |
  378|       |        // rows 5, 6
  379|  1.77k|        res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b);
  380|  1.77k|        res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b);
  381|  1.77k|        res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b);
  382|  1.77k|        res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b);
  383|       |
  384|  1.77k|        res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
  385|  1.77k|        res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
  386|  1.77k|        res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
  387|  1.77k|        res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
  388|       |
  389|  1.77k|        pu1_dst += dst_strd2;
  390|       |
  391|  1.77k|        res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
  392|  1.77k|        res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
  393|       |
  394|  1.77k|        _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
  395|  1.77k|        _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
  396|       |
  397|       |        // rows 7, 8
  398|  1.77k|        res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b);
  399|  1.77k|        res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b);
  400|  1.77k|        res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b);
  401|  1.77k|        res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b);
  402|       |
  403|  1.77k|        res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
  404|  1.77k|        res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
  405|  1.77k|        res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
  406|  1.77k|        res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
  407|       |
  408|  1.77k|        pu1_dst += dst_strd2;
  409|       |
  410|  1.77k|        res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
  411|  1.77k|        res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
  412|       |
  413|  1.77k|        _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
  414|  1.77k|        _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
  415|       |
  416|  1.77k|    }
  417|  1.77k|}

ih264_deblk_chroma_vert_bs4_ssse3:
  100|  31.5k|{
  101|  31.5k|    UWORD8 *pu1_src_uv = pu1_src; /* Pointer to the src sample q0 of plane U*/
  102|  31.5k|    WORD32 alpha_cbcr = (alpha_cr << 16) + alpha_cb;
  103|  31.5k|    WORD32 beta_cbcr = (beta_cr << 16) + beta_cb;
  104|  31.5k|    __m128i linea, lineb, linec, lined, linee, linef, lineg, lineh;
  105|  31.5k|    __m128i temp1, temp2, temp3, temp4;
  106|       |
  107|  31.5k|    __m128i q0_uv_16x8, p0_uv_16x8, q1_uv_16x8, p1_uv_16x8;
  108|  31.5k|    __m128i q0_uv_8x16, p0_uv_8x16, q1_uv_8x16, p1_uv_8x16;
  109|  31.5k|    __m128i flag1, flag2;
  110|  31.5k|    __m128i diff, alpha_cbcr_16x8, beta_cbcr_16x8;
  111|  31.5k|    __m128i zero = _mm_setzero_si128();
  112|  31.5k|    __m128i p0_uv_8x16_1, p0_uv_8x16_2, q0_uv_8x16_1, q0_uv_8x16_2;
  113|       |
  114|       |    /* Load and transpose the pixel values */
  115|  31.5k|    linea = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4));
  116|  31.5k|    lineb = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + src_strd));
  117|  31.5k|    linec = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 2 * src_strd));
  118|  31.5k|    lined = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 3 * src_strd));
  119|  31.5k|    linee = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 4 * src_strd));
  120|  31.5k|    linef = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 5 * src_strd));
  121|  31.5k|    lineg = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 6 * src_strd));
  122|  31.5k|    lineh = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 7 * src_strd));
  123|       |
  124|  31.5k|    temp1 = _mm_unpacklo_epi16(linea, lineb);
  125|  31.5k|    temp2 = _mm_unpacklo_epi16(linec, lined);
  126|  31.5k|    temp3 = _mm_unpacklo_epi16(linee, linef);
  127|  31.5k|    temp4 = _mm_unpacklo_epi16(lineg, lineh);
  128|       |
  129|  31.5k|    p1_uv_8x16 = _mm_unpacklo_epi32(temp1, temp2);
  130|  31.5k|    p0_uv_8x16 = _mm_unpacklo_epi32(temp3, temp4);
  131|  31.5k|    q0_uv_8x16 = _mm_unpackhi_epi32(temp1, temp2);
  132|  31.5k|    q1_uv_8x16 = _mm_unpackhi_epi32(temp3, temp4);
  133|       |
  134|  31.5k|    p1_uv_16x8 = _mm_unpacklo_epi64(p1_uv_8x16, p0_uv_8x16);
  135|  31.5k|    p0_uv_16x8 = _mm_unpackhi_epi64(p1_uv_8x16, p0_uv_8x16);
  136|  31.5k|    q0_uv_16x8 = _mm_unpacklo_epi64(q0_uv_8x16, q1_uv_8x16);
  137|  31.5k|    q1_uv_16x8 = _mm_unpackhi_epi64(q0_uv_8x16, q1_uv_8x16);
  138|       |    /* End of transpose */
  139|       |
  140|  31.5k|    q0_uv_8x16 = _mm_unpacklo_epi8(q0_uv_16x8, zero);
  141|  31.5k|    q1_uv_8x16 = _mm_unpacklo_epi8(q1_uv_16x8, zero);
  142|  31.5k|    p1_uv_8x16 = _mm_unpacklo_epi8(p1_uv_16x8, zero);
  143|  31.5k|    p0_uv_8x16 = _mm_unpacklo_epi8(p0_uv_16x8, zero);
  144|       |
  145|  31.5k|    diff = _mm_subs_epi16(p0_uv_8x16, q0_uv_8x16); //Condn 1
  146|  31.5k|    diff = _mm_abs_epi16(diff);
  147|  31.5k|    alpha_cbcr_16x8 = _mm_set1_epi32(alpha_cbcr);
  148|  31.5k|    flag1 = _mm_cmpgt_epi16(alpha_cbcr_16x8, diff);
  149|       |
  150|  31.5k|    diff = _mm_subs_epi16(q1_uv_8x16, q0_uv_8x16); //Condtn 2
  151|  31.5k|    diff = _mm_abs_epi16(diff);
  152|  31.5k|    beta_cbcr_16x8 = _mm_set1_epi32(beta_cbcr);
  153|  31.5k|    flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
  154|       |
  155|  31.5k|    diff = _mm_subs_epi16(p1_uv_8x16, p0_uv_8x16); //Condtn 3
  156|  31.5k|    diff = _mm_abs_epi16(diff);
  157|  31.5k|    flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
  158|       |
  159|  31.5k|    temp1 = _mm_slli_epi16(p1_uv_8x16, 1);
  160|  31.5k|    temp2 = _mm_add_epi16(p0_uv_8x16, q1_uv_8x16);
  161|  31.5k|    temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(2));
  162|  31.5k|    temp1 = _mm_add_epi16(temp1, temp2);
  163|  31.5k|    p0_uv_8x16_1 = _mm_srai_epi16(temp1, 2);
  164|       |
  165|  31.5k|    temp1 = _mm_slli_epi16(q1_uv_8x16, 1);
  166|  31.5k|    temp2 = _mm_add_epi16(p1_uv_8x16, q0_uv_8x16);
  167|  31.5k|    temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(2));
  168|  31.5k|    temp1 = _mm_add_epi16(temp1, temp2);
  169|  31.5k|    q0_uv_8x16_1 = _mm_srai_epi16(temp1, 2);
  170|       |
  171|  31.5k|    q0_uv_8x16 = _mm_unpackhi_epi8(q0_uv_16x8, zero);
  172|  31.5k|    q1_uv_8x16 = _mm_unpackhi_epi8(q1_uv_16x8, zero);
  173|  31.5k|    p1_uv_8x16 = _mm_unpackhi_epi8(p1_uv_16x8, zero);
  174|  31.5k|    p0_uv_8x16 = _mm_unpackhi_epi8(p0_uv_16x8, zero);
  175|       |
  176|  31.5k|    diff = _mm_subs_epi16(p0_uv_8x16, q0_uv_8x16); //Condn 1
  177|  31.5k|    diff = _mm_abs_epi16(diff);
  178|  31.5k|    alpha_cbcr_16x8 = _mm_set1_epi32(alpha_cbcr);
  179|  31.5k|    flag2 = _mm_cmpgt_epi16(alpha_cbcr_16x8, diff);
  180|       |
  181|  31.5k|    diff = _mm_subs_epi16(q1_uv_8x16, q0_uv_8x16); //Condtn 2
  182|  31.5k|    diff = _mm_abs_epi16(diff);
  183|  31.5k|    beta_cbcr_16x8 = _mm_set1_epi32(beta_cbcr);
  184|  31.5k|    flag2 = _mm_and_si128(flag2, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
  185|       |
  186|  31.5k|    diff = _mm_subs_epi16(p1_uv_8x16, p0_uv_8x16); //Condtn 3
  187|  31.5k|    diff = _mm_abs_epi16(diff);
  188|  31.5k|    flag2 = _mm_and_si128(flag2, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
  189|       |
  190|  31.5k|    temp1 = _mm_slli_epi16(p1_uv_8x16, 1);
  191|  31.5k|    temp2 = _mm_add_epi16(p0_uv_8x16, q1_uv_8x16);
  192|  31.5k|    temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(2));
  193|  31.5k|    temp1 = _mm_add_epi16(temp1, temp2);
  194|  31.5k|    p0_uv_8x16_2 = _mm_srai_epi16(temp1, 2);
  195|       |
  196|  31.5k|    temp1 = _mm_slli_epi16(q1_uv_8x16, 1);
  197|  31.5k|    temp2 = _mm_add_epi16(p1_uv_8x16, q0_uv_8x16);
  198|  31.5k|    temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(2));
  199|  31.5k|    temp1 = _mm_add_epi16(temp1, temp2);
  200|  31.5k|    q0_uv_8x16_2 = _mm_srai_epi16(temp1, 2);
  201|       |
  202|  31.5k|    p0_uv_8x16_2 = _mm_packus_epi16(p0_uv_8x16_1, p0_uv_8x16_2);
  203|  31.5k|    q0_uv_8x16_2 = _mm_packus_epi16(q0_uv_8x16_1, q0_uv_8x16_2);
  204|       |
  205|  31.5k|    flag1 = _mm_packs_epi16(flag1, flag2);
  206|       |
  207|  31.5k|    p0_uv_8x16_1 = _mm_and_si128(p0_uv_16x8,
  208|  31.5k|                                 _mm_xor_si128(flag1, _mm_set1_epi8(0xFF)));
  209|  31.5k|    p0_uv_8x16_2 = _mm_and_si128(p0_uv_8x16_2, flag1);
  210|  31.5k|    p0_uv_16x8 = _mm_add_epi8(p0_uv_8x16_1, p0_uv_8x16_2);
  211|       |
  212|  31.5k|    q0_uv_8x16_1 = _mm_and_si128(q0_uv_16x8,
  213|  31.5k|                                 _mm_xor_si128(flag1, _mm_set1_epi8(0xFF)));
  214|  31.5k|    q0_uv_8x16_2 = _mm_and_si128(q0_uv_8x16_2, flag1);
  215|  31.5k|    q0_uv_16x8 = _mm_add_epi8(q0_uv_8x16_1, q0_uv_8x16_2);
  216|       |
  217|       |    /* Inverse-transpose and store back */
  218|  31.5k|    temp1 = _mm_unpacklo_epi16(p1_uv_16x8, p0_uv_16x8);
  219|  31.5k|    temp2 = _mm_unpackhi_epi16(p1_uv_16x8, p0_uv_16x8);
  220|  31.5k|    temp3 = _mm_unpacklo_epi16(q0_uv_16x8, q1_uv_16x8);
  221|  31.5k|    temp4 = _mm_unpackhi_epi16(q0_uv_16x8, q1_uv_16x8);
  222|       |
  223|  31.5k|    linea = _mm_unpacklo_epi32(temp1, temp3);
  224|  31.5k|    lineb = _mm_srli_si128(linea, 8);
  225|  31.5k|    linec = _mm_unpackhi_epi32(temp1, temp3);
  226|  31.5k|    lined = _mm_srli_si128(linec, 8);
  227|  31.5k|    linee = _mm_unpacklo_epi32(temp2, temp4);
  228|  31.5k|    linef = _mm_srli_si128(linee, 8);
  229|  31.5k|    lineg = _mm_unpackhi_epi32(temp2, temp4);
  230|  31.5k|    lineh = _mm_srli_si128(lineg, 8);
  231|       |
  232|  31.5k|    _mm_storel_epi64((__m128i *)(pu1_src_uv - 4), linea);
  233|  31.5k|    _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + src_strd), lineb);
  234|  31.5k|    _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 2 * src_strd), linec);
  235|  31.5k|    _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 3 * src_strd), lined);
  236|  31.5k|    _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 4 * src_strd), linee);
  237|  31.5k|    _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 5 * src_strd), linef);
  238|  31.5k|    _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 6 * src_strd), lineg);
  239|  31.5k|    _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 7 * src_strd), lineh);
  240|       |
  241|  31.5k|}
ih264_deblk_chroma_horz_bs4_ssse3:
  283|  33.7k|{
  284|  33.7k|    UWORD8 *pu1_src_uv = pu1_src; /* Pointer to the src sample q0 of plane U*/
  285|  33.7k|    WORD16 i16_posP1, i16_posP0, i16_posQ1;
  286|       |
  287|  33.7k|    UWORD8 *pu1_HorzPixelUV; /*! < Pointer to the first pixel of the boundary */
  288|  33.7k|    WORD32 alpha_cbcr = (alpha_cr << 16) + alpha_cb;
  289|  33.7k|    WORD32 beta_cbcr = (beta_cr << 16) + beta_cb;
  290|  33.7k|    __m128i q0_uv_16x8, p0_uv_16x8, q1_uv_16x8, p1_uv_16x8;
  291|  33.7k|    __m128i q0_uv_8x16, p0_uv_8x16, q1_uv_8x16, p1_uv_8x16;
  292|  33.7k|    __m128i flag1, flag2;
  293|  33.7k|    __m128i diff, alpha_cbcr_16x8, beta_cbcr_16x8;
  294|  33.7k|    __m128i zero = _mm_setzero_si128();
  295|  33.7k|    __m128i p0_uv_8x16_1, p0_uv_8x16_2, q0_uv_8x16_1, q0_uv_8x16_2;
  296|  33.7k|    __m128i temp1, temp2;
  297|       |
  298|  33.7k|    pu1_HorzPixelUV = pu1_src_uv - (src_strd << 1);
  299|       |
  300|  33.7k|    i16_posQ1 = src_strd;
  301|  33.7k|    i16_posP0 = src_strd;
  302|  33.7k|    i16_posP1 = 0;
  303|       |
  304|  33.7k|    q0_uv_16x8 = _mm_loadu_si128((__m128i *)(pu1_src_uv));
  305|  33.7k|    q1_uv_16x8 = _mm_loadu_si128((__m128i *)(pu1_src_uv + i16_posQ1));
  306|  33.7k|    p1_uv_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixelUV + i16_posP1));
  307|  33.7k|    p0_uv_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixelUV + i16_posP0));
  308|       |
  309|  33.7k|    q0_uv_8x16 = _mm_unpacklo_epi8(q0_uv_16x8, zero);
  310|  33.7k|    q1_uv_8x16 = _mm_unpacklo_epi8(q1_uv_16x8, zero);
  311|  33.7k|    p1_uv_8x16 = _mm_unpacklo_epi8(p1_uv_16x8, zero);
  312|  33.7k|    p0_uv_8x16 = _mm_unpacklo_epi8(p0_uv_16x8, zero);
  313|       |
  314|  33.7k|    diff = _mm_subs_epi16(p0_uv_8x16, q0_uv_8x16); //Condn 1
  315|  33.7k|    diff = _mm_abs_epi16(diff);
  316|  33.7k|    alpha_cbcr_16x8 = _mm_set1_epi32(alpha_cbcr);
  317|  33.7k|    flag1 = _mm_cmpgt_epi16(alpha_cbcr_16x8, diff);
  318|       |
  319|  33.7k|    diff = _mm_subs_epi16(q1_uv_8x16, q0_uv_8x16); //Condtn 2
  320|  33.7k|    diff = _mm_abs_epi16(diff);
  321|  33.7k|    beta_cbcr_16x8 = _mm_set1_epi32(beta_cbcr);
  322|  33.7k|    flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
  323|       |
  324|  33.7k|    diff = _mm_subs_epi16(p1_uv_8x16, p0_uv_8x16); //Condtn 3
  325|  33.7k|    diff = _mm_abs_epi16(diff);
  326|  33.7k|    flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
  327|       |
  328|  33.7k|    temp1 = _mm_slli_epi16(p1_uv_8x16, 1);
  329|  33.7k|    temp2 = _mm_add_epi16(p0_uv_8x16, q1_uv_8x16);
  330|  33.7k|    temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(2));
  331|  33.7k|    temp1 = _mm_add_epi16(temp1, temp2);
  332|  33.7k|    p0_uv_8x16_1 = _mm_srai_epi16(temp1, 2);
  333|       |
  334|  33.7k|    temp1 = _mm_slli_epi16(q1_uv_8x16, 1);
  335|  33.7k|    temp2 = _mm_add_epi16(p1_uv_8x16, q0_uv_8x16);
  336|  33.7k|    temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(2));
  337|  33.7k|    temp1 = _mm_add_epi16(temp1, temp2);
  338|  33.7k|    q0_uv_8x16_1 = _mm_srai_epi16(temp1, 2);
  339|       |
  340|  33.7k|    q0_uv_8x16 = _mm_unpackhi_epi8(q0_uv_16x8, zero);
  341|  33.7k|    q1_uv_8x16 = _mm_unpackhi_epi8(q1_uv_16x8, zero);
  342|  33.7k|    p1_uv_8x16 = _mm_unpackhi_epi8(p1_uv_16x8, zero);
  343|  33.7k|    p0_uv_8x16 = _mm_unpackhi_epi8(p0_uv_16x8, zero);
  344|       |
  345|  33.7k|    diff = _mm_subs_epi16(p0_uv_8x16, q0_uv_8x16); //Condn 1
  346|  33.7k|    diff = _mm_abs_epi16(diff);
  347|  33.7k|    alpha_cbcr_16x8 = _mm_set1_epi32(alpha_cbcr);
  348|  33.7k|    flag2 = _mm_cmpgt_epi16(alpha_cbcr_16x8, diff);
  349|       |
  350|  33.7k|    diff = _mm_subs_epi16(q1_uv_8x16, q0_uv_8x16); //Condtn 2
  351|  33.7k|    diff = _mm_abs_epi16(diff);
  352|  33.7k|    beta_cbcr_16x8 = _mm_set1_epi32(beta_cbcr);
  353|  33.7k|    flag2 = _mm_and_si128(flag2, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
  354|       |
  355|  33.7k|    diff = _mm_subs_epi16(p1_uv_8x16, p0_uv_8x16); //Condtn 3
  356|  33.7k|    diff = _mm_abs_epi16(diff);
  357|  33.7k|    flag2 = _mm_and_si128(flag2, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
  358|       |
  359|  33.7k|    temp1 = _mm_slli_epi16(p1_uv_8x16, 1);
  360|  33.7k|    temp2 = _mm_add_epi16(p0_uv_8x16, q1_uv_8x16);
  361|  33.7k|    temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(2));
  362|  33.7k|    temp1 = _mm_add_epi16(temp1, temp2);
  363|  33.7k|    p0_uv_8x16_2 = _mm_srai_epi16(temp1, 2);
  364|       |
  365|  33.7k|    temp1 = _mm_slli_epi16(q1_uv_8x16, 1);
  366|  33.7k|    temp2 = _mm_add_epi16(p1_uv_8x16, q0_uv_8x16);
  367|  33.7k|    temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(2));
  368|  33.7k|    temp1 = _mm_add_epi16(temp1, temp2);
  369|  33.7k|    q0_uv_8x16_2 = _mm_srai_epi16(temp1, 2);
  370|       |
  371|  33.7k|    p0_uv_8x16_2 = _mm_packus_epi16(p0_uv_8x16_1, p0_uv_8x16_2);
  372|  33.7k|    q0_uv_8x16_2 = _mm_packus_epi16(q0_uv_8x16_1, q0_uv_8x16_2);
  373|       |
  374|  33.7k|    flag1 = _mm_packs_epi16(flag1, flag2);
  375|       |
  376|  33.7k|    p0_uv_8x16_1 = _mm_and_si128(p0_uv_16x8,
  377|  33.7k|                                 _mm_xor_si128(flag1, _mm_set1_epi8(0xFF)));
  378|  33.7k|    p0_uv_8x16_2 = _mm_and_si128(p0_uv_8x16_2, flag1);
  379|  33.7k|    p0_uv_8x16_1 = _mm_add_epi8(p0_uv_8x16_1, p0_uv_8x16_2);
  380|  33.7k|    _mm_storeu_si128((__m128i *)(pu1_HorzPixelUV + i16_posP0), p0_uv_8x16_1);
  381|       |
  382|  33.7k|    q0_uv_8x16_1 = _mm_and_si128(q0_uv_16x8,
  383|  33.7k|                                 _mm_xor_si128(flag1, _mm_set1_epi8(0xFF)));
  384|  33.7k|    q0_uv_8x16_2 = _mm_and_si128(q0_uv_8x16_2, flag1);
  385|  33.7k|    q0_uv_8x16_1 = _mm_add_epi8(q0_uv_8x16_1, q0_uv_8x16_2);
  386|  33.7k|    _mm_storeu_si128((__m128i *)(pu1_src_uv), q0_uv_8x16_1);
  387|       |
  388|  33.7k|}
ih264_deblk_chroma_vert_bslt4_ssse3:
  436|   186k|{
  437|   186k|    UWORD8 *pu1_src_uv = pu1_src; /* Pointer to the src sample q0 of plane U*/
  438|   186k|    UWORD8 u1_Bs0, u1_Bs1, u1_Bs2, u1_Bs3;
  439|   186k|    WORD32 alpha_cbcr = (alpha_cr << 16) + alpha_cb;
  440|   186k|    WORD32 beta_cbcr = (beta_cr << 16) + beta_cb;
  441|   186k|    __m128i linea, lineb, linec, lined, linee, linef, lineg, lineh;
  442|   186k|    __m128i temp1, temp2, temp3, temp4;
  443|       |
  444|   186k|    __m128i q0_uv_16x8, p0_uv_16x8, q1_uv_16x8, p1_uv_16x8;
  445|   186k|    __m128i q0_uv_8x16, p0_uv_8x16, q1_uv_8x16, p1_uv_8x16;
  446|   186k|    __m128i flag_bs, flag1, flag2;
  447|   186k|    __m128i diff, diff1, alpha_cbcr_16x8, beta_cbcr_16x8, in_macro;
  448|   186k|    __m128i zero = _mm_setzero_si128();
  449|   186k|    __m128i C0_uv_8x16;
  450|   186k|    __m128i p0_uv_8x16_1, p0_uv_8x16_2, q0_uv_8x16_1, q0_uv_8x16_2;
  451|       |
  452|   186k|    u1_Bs0 = (u4_bs >> 24) & 0xff;
  453|   186k|    u1_Bs1 = (u4_bs >> 16) & 0xff;
  454|   186k|    u1_Bs2 = (u4_bs >> 8) & 0xff;
  455|   186k|    u1_Bs3 = (u4_bs >> 0) & 0xff;
  456|       |
  457|   186k|    flag_bs = _mm_set_epi8(u1_Bs3, u1_Bs3, u1_Bs3, u1_Bs3, u1_Bs2, u1_Bs2,
  458|   186k|                           u1_Bs2, u1_Bs2, u1_Bs1, u1_Bs1, u1_Bs1, u1_Bs1,
  459|   186k|                           u1_Bs0, u1_Bs0, u1_Bs0, u1_Bs0);
  460|   186k|    flag_bs = _mm_cmpeq_epi8(flag_bs, zero); //Set flag to 1s and 0s
  461|   186k|    flag_bs = _mm_xor_si128(flag_bs, _mm_set1_epi8(0xFF)); //Invert for required mask
  462|       |
  463|       |    /* Load and transpose the pixel values */
  464|   186k|    linea = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4));
  465|   186k|    lineb = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + src_strd));
  466|   186k|    linec = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 2 * src_strd));
  467|   186k|    lined = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 3 * src_strd));
  468|   186k|    linee = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 4 * src_strd));
  469|   186k|    linef = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 5 * src_strd));
  470|   186k|    lineg = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 6 * src_strd));
  471|   186k|    lineh = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 7 * src_strd));
  472|       |
  473|   186k|    temp1 = _mm_unpacklo_epi16(linea, lineb);
  474|   186k|    temp2 = _mm_unpacklo_epi16(linec, lined);
  475|   186k|    temp3 = _mm_unpacklo_epi16(linee, linef);
  476|   186k|    temp4 = _mm_unpacklo_epi16(lineg, lineh);
  477|       |
  478|   186k|    p1_uv_8x16 = _mm_unpacklo_epi32(temp1, temp2);
  479|   186k|    p0_uv_8x16 = _mm_unpacklo_epi32(temp3, temp4);
  480|   186k|    q0_uv_8x16 = _mm_unpackhi_epi32(temp1, temp2);
  481|   186k|    q1_uv_8x16 = _mm_unpackhi_epi32(temp3, temp4);
  482|       |
  483|   186k|    p1_uv_16x8 = _mm_unpacklo_epi64(p1_uv_8x16, p0_uv_8x16);
  484|   186k|    p0_uv_16x8 = _mm_unpackhi_epi64(p1_uv_8x16, p0_uv_8x16);
  485|   186k|    q0_uv_16x8 = _mm_unpacklo_epi64(q0_uv_8x16, q1_uv_8x16);
  486|   186k|    q1_uv_16x8 = _mm_unpackhi_epi64(q0_uv_8x16, q1_uv_8x16);
  487|       |    /* End of transpose */
  488|       |
  489|   186k|    q0_uv_8x16 = _mm_unpacklo_epi8(q0_uv_16x8, zero);
  490|   186k|    q1_uv_8x16 = _mm_unpacklo_epi8(q1_uv_16x8, zero);
  491|   186k|    p1_uv_8x16 = _mm_unpacklo_epi8(p1_uv_16x8, zero);
  492|   186k|    p0_uv_8x16 = _mm_unpacklo_epi8(p0_uv_16x8, zero);
  493|       |
  494|   186k|    diff = _mm_subs_epi16(p0_uv_8x16, q0_uv_8x16); //Condn 1
  495|   186k|    diff = _mm_abs_epi16(diff);
  496|   186k|    alpha_cbcr_16x8 = _mm_set1_epi32(alpha_cbcr);
  497|   186k|    flag1 = _mm_cmpgt_epi16(alpha_cbcr_16x8, diff);
  498|       |
  499|   186k|    diff = _mm_subs_epi16(q1_uv_8x16, q0_uv_8x16); //Condtn 2
  500|   186k|    diff = _mm_abs_epi16(diff);
  501|   186k|    beta_cbcr_16x8 = _mm_set1_epi32(beta_cbcr);
  502|   186k|    flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
  503|       |
  504|   186k|    diff = _mm_subs_epi16(p1_uv_8x16, p0_uv_8x16); //Condtn 3
  505|   186k|    diff = _mm_abs_epi16(diff);
  506|   186k|    flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
  507|       |
  508|   186k|    diff = _mm_subs_epi16(q0_uv_8x16, p0_uv_8x16);
  509|   186k|    diff = _mm_slli_epi16(diff, 2);
  510|   186k|    diff1 = _mm_subs_epi16(p1_uv_8x16, q1_uv_8x16);
  511|   186k|    diff = _mm_add_epi16(diff, diff1);
  512|   186k|    diff = _mm_add_epi16(diff, _mm_set1_epi16(4));
  513|   186k|    in_macro = _mm_srai_epi16(diff, 3);
  514|       |
  515|   186k|    C0_uv_8x16 = _mm_set_epi16(pu1_cliptab_cr[u1_Bs1], pu1_cliptab_cb[u1_Bs1],
  516|   186k|                               pu1_cliptab_cr[u1_Bs1], pu1_cliptab_cb[u1_Bs1],
  517|   186k|                               pu1_cliptab_cr[u1_Bs0], pu1_cliptab_cb[u1_Bs0],
  518|   186k|                               pu1_cliptab_cr[u1_Bs0], pu1_cliptab_cb[u1_Bs0]);
  519|       |
  520|   186k|    C0_uv_8x16 = _mm_add_epi16(C0_uv_8x16, _mm_set1_epi16(1));
  521|       |
  522|   186k|    in_macro = _mm_min_epi16(C0_uv_8x16, in_macro); //CLIP3
  523|   186k|    C0_uv_8x16 = _mm_subs_epi16(zero, C0_uv_8x16);
  524|   186k|    in_macro = _mm_max_epi16(C0_uv_8x16, in_macro);
  525|       |
  526|   186k|    p0_uv_8x16_1 = _mm_add_epi16(p0_uv_8x16, in_macro);
  527|   186k|    q0_uv_8x16_1 = _mm_sub_epi16(q0_uv_8x16, in_macro);
  528|       |
  529|   186k|    q0_uv_8x16 = _mm_unpackhi_epi8(q0_uv_16x8, zero);
  530|   186k|    q1_uv_8x16 = _mm_unpackhi_epi8(q1_uv_16x8, zero);
  531|   186k|    p1_uv_8x16 = _mm_unpackhi_epi8(p1_uv_16x8, zero);
  532|   186k|    p0_uv_8x16 = _mm_unpackhi_epi8(p0_uv_16x8, zero);
  533|       |
  534|   186k|    diff = _mm_subs_epi16(p0_uv_8x16, q0_uv_8x16); //Condn 1
  535|   186k|    diff = _mm_abs_epi16(diff);
  536|   186k|    alpha_cbcr_16x8 = _mm_set1_epi32(alpha_cbcr);
  537|   186k|    flag2 = _mm_cmpgt_epi16(alpha_cbcr_16x8, diff);
  538|       |
  539|   186k|    diff = _mm_subs_epi16(q1_uv_8x16, q0_uv_8x16); //Condtn 2
  540|   186k|    diff = _mm_abs_epi16(diff);
  541|   186k|    beta_cbcr_16x8 = _mm_set1_epi32(beta_cbcr);
  542|   186k|    flag2 = _mm_and_si128(flag2, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
  543|       |
  544|   186k|    diff = _mm_subs_epi16(p1_uv_8x16, p0_uv_8x16); //Condtn 3
  545|   186k|    diff = _mm_abs_epi16(diff);
  546|   186k|    flag2 = _mm_and_si128(flag2, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
  547|       |
  548|   186k|    diff = _mm_subs_epi16(q0_uv_8x16, p0_uv_8x16);
  549|   186k|    diff = _mm_slli_epi16(diff, 2);
  550|   186k|    diff1 = _mm_subs_epi16(p1_uv_8x16, q1_uv_8x16);
  551|   186k|    diff = _mm_add_epi16(diff, diff1);
  552|   186k|    diff = _mm_add_epi16(diff, _mm_set1_epi16(4));
  553|   186k|    in_macro = _mm_srai_epi16(diff, 3);
  554|       |
  555|   186k|    C0_uv_8x16 = _mm_set_epi16(pu1_cliptab_cr[u1_Bs3], pu1_cliptab_cb[u1_Bs3],
  556|   186k|                               pu1_cliptab_cr[u1_Bs3], pu1_cliptab_cb[u1_Bs3],
  557|   186k|                               pu1_cliptab_cr[u1_Bs2], pu1_cliptab_cb[u1_Bs2],
  558|   186k|                               pu1_cliptab_cr[u1_Bs2], pu1_cliptab_cb[u1_Bs2]);
  559|       |
  560|   186k|    C0_uv_8x16 = _mm_add_epi16(C0_uv_8x16, _mm_set1_epi16(1));
  561|       |
  562|   186k|    in_macro = _mm_min_epi16(C0_uv_8x16, in_macro); //CLIP3
  563|   186k|    C0_uv_8x16 = _mm_subs_epi16(zero, C0_uv_8x16);
  564|   186k|    in_macro = _mm_max_epi16(C0_uv_8x16, in_macro);
  565|       |
  566|   186k|    p0_uv_8x16_2 = _mm_add_epi16(p0_uv_8x16, in_macro);
  567|   186k|    q0_uv_8x16_2 = _mm_sub_epi16(q0_uv_8x16, in_macro);
  568|       |
  569|   186k|    p0_uv_8x16_2 = _mm_packus_epi16(p0_uv_8x16_1, p0_uv_8x16_2);
  570|   186k|    q0_uv_8x16_2 = _mm_packus_epi16(q0_uv_8x16_1, q0_uv_8x16_2);
  571|       |
  572|   186k|    flag1 = _mm_packs_epi16(flag1, flag2);
  573|   186k|    flag1 = _mm_and_si128(flag1, flag_bs); //Final flag (BS condition + other 3 conditions)
  574|       |
  575|   186k|    p0_uv_8x16_1 = _mm_and_si128(p0_uv_16x8,
  576|   186k|                                 _mm_xor_si128(flag1, _mm_set1_epi8(0xFF)));
  577|   186k|    p0_uv_8x16_2 = _mm_and_si128(p0_uv_8x16_2, flag1);
  578|   186k|    p0_uv_16x8 = _mm_add_epi8(p0_uv_8x16_1, p0_uv_8x16_2);
  579|       |
  580|   186k|    q0_uv_8x16_1 = _mm_and_si128(q0_uv_16x8,
  581|   186k|                                 _mm_xor_si128(flag1, _mm_set1_epi8(0xFF)));
  582|   186k|    q0_uv_8x16_2 = _mm_and_si128(q0_uv_8x16_2, flag1);
  583|   186k|    q0_uv_16x8 = _mm_add_epi8(q0_uv_8x16_1, q0_uv_8x16_2);
  584|       |
  585|       |    /* Inverse-transpose and store back */
  586|   186k|    temp1 = _mm_unpacklo_epi16(p1_uv_16x8, p0_uv_16x8);
  587|   186k|    temp2 = _mm_unpackhi_epi16(p1_uv_16x8, p0_uv_16x8);
  588|   186k|    temp3 = _mm_unpacklo_epi16(q0_uv_16x8, q1_uv_16x8);
  589|   186k|    temp4 = _mm_unpackhi_epi16(q0_uv_16x8, q1_uv_16x8);
  590|       |
  591|   186k|    linea = _mm_unpacklo_epi32(temp1, temp3);
  592|   186k|    lineb = _mm_srli_si128(linea, 8);
  593|   186k|    linec = _mm_unpackhi_epi32(temp1, temp3);
  594|   186k|    lined = _mm_srli_si128(linec, 8);
  595|   186k|    linee = _mm_unpacklo_epi32(temp2, temp4);
  596|   186k|    linef = _mm_srli_si128(linee, 8);
  597|   186k|    lineg = _mm_unpackhi_epi32(temp2, temp4);
  598|   186k|    lineh = _mm_srli_si128(lineg, 8);
  599|       |
  600|   186k|    _mm_storel_epi64((__m128i *)(pu1_src_uv - 4), linea);
  601|   186k|    _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + src_strd), lineb);
  602|   186k|    _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 2 * src_strd), linec);
  603|   186k|    _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 3 * src_strd), lined);
  604|   186k|    _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 4 * src_strd), linee);
  605|   186k|    _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 5 * src_strd), linef);
  606|   186k|    _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 6 * src_strd), lineg);
  607|   186k|    _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 7 * src_strd), lineh);
  608|       |
  609|   186k|}
ih264_deblk_chroma_horz_bslt4_ssse3:
  657|   220k|{
  658|   220k|    UWORD8 *pu1_src_uv = pu1_src; /* Pointer to the src sample q0 of plane U*/
  659|   220k|    WORD16 i16_posP1, i16_posP0, i16_posQ1;
  660|   220k|    UWORD8 u1_Bs0, u1_Bs1, u1_Bs2, u1_Bs3;
  661|       |
  662|   220k|    UWORD8 *pu1_HorzPixelUV; /*! < Pointer to the first pixel of the boundary */
  663|   220k|    WORD32 alpha_cbcr = (alpha_cr << 16) + alpha_cb;
  664|   220k|    WORD32 beta_cbcr = (beta_cr << 16) + beta_cb;
  665|   220k|    __m128i q0_uv_16x8, p0_uv_16x8, q1_uv_16x8, p1_uv_16x8;
  666|   220k|    __m128i q0_uv_8x16, p0_uv_8x16, q1_uv_8x16, p1_uv_8x16;
  667|   220k|    __m128i flag_bs, flag1, flag2;
  668|   220k|    __m128i diff, diff1, alpha_cbcr_16x8, beta_cbcr_16x8, in_macro;
  669|   220k|    __m128i zero = _mm_setzero_si128();
  670|   220k|    __m128i C0_uv_8x16;
  671|   220k|    __m128i p0_uv_8x16_1, p0_uv_8x16_2, q0_uv_8x16_1, q0_uv_8x16_2;
  672|       |
  673|   220k|    pu1_HorzPixelUV = pu1_src_uv - (src_strd << 1);
  674|       |
  675|   220k|    i16_posQ1 = src_strd;
  676|   220k|    i16_posP0 = src_strd;
  677|   220k|    i16_posP1 = 0;
  678|       |
  679|   220k|    u1_Bs0 = (u4_bs >> 24) & 0xff;
  680|   220k|    u1_Bs1 = (u4_bs >> 16) & 0xff;
  681|   220k|    u1_Bs2 = (u4_bs >> 8) & 0xff;
  682|   220k|    u1_Bs3 = (u4_bs >> 0) & 0xff;
  683|       |
  684|   220k|    flag_bs = _mm_set_epi8(u1_Bs3, u1_Bs3, u1_Bs3, u1_Bs3, u1_Bs2, u1_Bs2,
  685|   220k|                           u1_Bs2, u1_Bs2, u1_Bs1, u1_Bs1, u1_Bs1, u1_Bs1,
  686|   220k|                           u1_Bs0, u1_Bs0, u1_Bs0, u1_Bs0);
  687|   220k|    flag_bs = _mm_cmpeq_epi8(flag_bs, zero); //Set flag to 1s and 0s
  688|   220k|    flag_bs = _mm_xor_si128(flag_bs, _mm_set1_epi8(0xFF)); //Invert for required mask
  689|       |
  690|   220k|    q0_uv_16x8 = _mm_loadu_si128((__m128i *)(pu1_src_uv));
  691|   220k|    q1_uv_16x8 = _mm_loadu_si128((__m128i *)(pu1_src_uv + i16_posQ1));
  692|   220k|    p1_uv_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixelUV + i16_posP1));
  693|   220k|    p0_uv_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixelUV + i16_posP0));
  694|       |
  695|   220k|    q0_uv_8x16 = _mm_unpacklo_epi8(q0_uv_16x8, zero);
  696|   220k|    q1_uv_8x16 = _mm_unpacklo_epi8(q1_uv_16x8, zero);
  697|   220k|    p1_uv_8x16 = _mm_unpacklo_epi8(p1_uv_16x8, zero);
  698|   220k|    p0_uv_8x16 = _mm_unpacklo_epi8(p0_uv_16x8, zero);
  699|       |
  700|   220k|    diff = _mm_subs_epi16(p0_uv_8x16, q0_uv_8x16); //Condn 1
  701|   220k|    diff = _mm_abs_epi16(diff);
  702|   220k|    alpha_cbcr_16x8 = _mm_set1_epi32(alpha_cbcr);
  703|   220k|    flag1 = _mm_cmpgt_epi16(alpha_cbcr_16x8, diff);
  704|       |
  705|   220k|    diff = _mm_subs_epi16(q1_uv_8x16, q0_uv_8x16); //Condtn 2
  706|   220k|    diff = _mm_abs_epi16(diff);
  707|   220k|    beta_cbcr_16x8 = _mm_set1_epi32(beta_cbcr);
  708|   220k|    flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
  709|       |
  710|   220k|    diff = _mm_subs_epi16(p1_uv_8x16, p0_uv_8x16); //Condtn 3
  711|   220k|    diff = _mm_abs_epi16(diff);
  712|   220k|    flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
  713|       |
  714|   220k|    diff = _mm_subs_epi16(q0_uv_8x16, p0_uv_8x16);
  715|   220k|    diff = _mm_slli_epi16(diff, 2);
  716|   220k|    diff1 = _mm_subs_epi16(p1_uv_8x16, q1_uv_8x16);
  717|   220k|    diff = _mm_add_epi16(diff, diff1);
  718|   220k|    diff = _mm_add_epi16(diff, _mm_set1_epi16(4));
  719|   220k|    in_macro = _mm_srai_epi16(diff, 3);
  720|       |
  721|   220k|    C0_uv_8x16 = _mm_set_epi16(pu1_cliptab_cr[u1_Bs1], pu1_cliptab_cb[u1_Bs1],
  722|   220k|                               pu1_cliptab_cr[u1_Bs1], pu1_cliptab_cb[u1_Bs1],
  723|   220k|                               pu1_cliptab_cr[u1_Bs0], pu1_cliptab_cb[u1_Bs0],
  724|   220k|                               pu1_cliptab_cr[u1_Bs0], pu1_cliptab_cb[u1_Bs0]);
  725|       |
  726|   220k|    C0_uv_8x16 = _mm_add_epi16(C0_uv_8x16, _mm_set1_epi16(1));
  727|       |
  728|   220k|    in_macro = _mm_min_epi16(C0_uv_8x16, in_macro); //CLIP3
  729|   220k|    C0_uv_8x16 = _mm_subs_epi16(zero, C0_uv_8x16);
  730|   220k|    in_macro = _mm_max_epi16(C0_uv_8x16, in_macro);
  731|       |
  732|   220k|    p0_uv_8x16_1 = _mm_add_epi16(p0_uv_8x16, in_macro);
  733|   220k|    q0_uv_8x16_1 = _mm_sub_epi16(q0_uv_8x16, in_macro);
  734|       |
  735|   220k|    q0_uv_8x16 = _mm_unpackhi_epi8(q0_uv_16x8, zero);
  736|   220k|    q1_uv_8x16 = _mm_unpackhi_epi8(q1_uv_16x8, zero);
  737|   220k|    p1_uv_8x16 = _mm_unpackhi_epi8(p1_uv_16x8, zero);
  738|   220k|    p0_uv_8x16 = _mm_unpackhi_epi8(p0_uv_16x8, zero);
  739|       |
  740|   220k|    diff = _mm_subs_epi16(p0_uv_8x16, q0_uv_8x16); //Condn 1
  741|   220k|    diff = _mm_abs_epi16(diff);
  742|   220k|    alpha_cbcr_16x8 = _mm_set1_epi32(alpha_cbcr);
  743|   220k|    flag2 = _mm_cmpgt_epi16(alpha_cbcr_16x8, diff);
  744|       |
  745|   220k|    diff = _mm_subs_epi16(q1_uv_8x16, q0_uv_8x16); //Condtn 2
  746|   220k|    diff = _mm_abs_epi16(diff);
  747|   220k|    beta_cbcr_16x8 = _mm_set1_epi32(beta_cbcr);
  748|   220k|    flag2 = _mm_and_si128(flag2, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
  749|       |
  750|   220k|    diff = _mm_subs_epi16(p1_uv_8x16, p0_uv_8x16); //Condtn 3
  751|   220k|    diff = _mm_abs_epi16(diff);
  752|   220k|    flag2 = _mm_and_si128(flag2, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
  753|       |
  754|   220k|    diff = _mm_subs_epi16(q0_uv_8x16, p0_uv_8x16);
  755|   220k|    diff = _mm_slli_epi16(diff, 2);
  756|   220k|    diff1 = _mm_subs_epi16(p1_uv_8x16, q1_uv_8x16);
  757|   220k|    diff = _mm_add_epi16(diff, diff1);
  758|   220k|    diff = _mm_add_epi16(diff, _mm_set1_epi16(4));
  759|   220k|    in_macro = _mm_srai_epi16(diff, 3);
  760|       |
  761|   220k|    C0_uv_8x16 = _mm_set_epi16(pu1_cliptab_cr[u1_Bs3], pu1_cliptab_cb[u1_Bs3],
  762|   220k|                               pu1_cliptab_cr[u1_Bs3], pu1_cliptab_cb[u1_Bs3],
  763|   220k|                               pu1_cliptab_cr[u1_Bs2], pu1_cliptab_cb[u1_Bs2],
  764|   220k|                               pu1_cliptab_cr[u1_Bs2], pu1_cliptab_cb[u1_Bs2]);
  765|       |
  766|   220k|    C0_uv_8x16 = _mm_add_epi16(C0_uv_8x16, _mm_set1_epi16(1));
  767|       |
  768|   220k|    in_macro = _mm_min_epi16(C0_uv_8x16, in_macro); //CLIP3
  769|   220k|    C0_uv_8x16 = _mm_subs_epi16(zero, C0_uv_8x16);
  770|   220k|    in_macro = _mm_max_epi16(C0_uv_8x16, in_macro);
  771|       |
  772|   220k|    p0_uv_8x16_2 = _mm_add_epi16(p0_uv_8x16, in_macro);
  773|   220k|    q0_uv_8x16_2 = _mm_sub_epi16(q0_uv_8x16, in_macro);
  774|       |
  775|   220k|    p0_uv_8x16_2 = _mm_packus_epi16(p0_uv_8x16_1, p0_uv_8x16_2);
  776|   220k|    q0_uv_8x16_2 = _mm_packus_epi16(q0_uv_8x16_1, q0_uv_8x16_2);
  777|       |
  778|   220k|    flag1 = _mm_packs_epi16(flag1, flag2);
  779|   220k|    flag1 = _mm_and_si128(flag1, flag_bs); //Final flag (BS condition + other 3 conditions)
  780|       |
  781|   220k|    p0_uv_8x16_1 = _mm_and_si128(p0_uv_16x8,
  782|   220k|                                 _mm_xor_si128(flag1, _mm_set1_epi8(0xFF)));
  783|   220k|    p0_uv_8x16_2 = _mm_and_si128(p0_uv_8x16_2, flag1);
  784|   220k|    p0_uv_8x16_1 = _mm_add_epi8(p0_uv_8x16_1, p0_uv_8x16_2);
  785|   220k|    _mm_storeu_si128((__m128i *)(pu1_HorzPixelUV + i16_posP0), p0_uv_8x16_1);
  786|       |
  787|   220k|    q0_uv_8x16_1 = _mm_and_si128(q0_uv_16x8,
  788|   220k|                                 _mm_xor_si128(flag1, _mm_set1_epi8(0xFF)));
  789|   220k|    q0_uv_8x16_2 = _mm_and_si128(q0_uv_8x16_2, flag1);
  790|   220k|    q0_uv_8x16_1 = _mm_add_epi8(q0_uv_8x16_1, q0_uv_8x16_2);
  791|   220k|    _mm_storeu_si128((__m128i *)(pu1_src_uv), q0_uv_8x16_1);
  792|       |
  793|   220k|}

ih264_deblk_luma_vert_bs4_ssse3:
   94|  31.5k|{
   95|  31.5k|    __m128i zero = _mm_setzero_si128();
   96|  31.5k|    __m128i q0_16x8, q1_16x8, q2_16x8, q3_16x8;
   97|  31.5k|    __m128i p0_16x8, p1_16x8, p2_16x8, p3_16x8;
   98|  31.5k|    __m128i q0_8x16, q1_8x16, q2_8x16, q3_8x16;
   99|  31.5k|    __m128i p0_8x16, p1_8x16, p2_8x16, p3_8x16;
  100|  31.5k|    __m128i q0_16x8_1;
  101|  31.5k|    __m128i p0_16x8_1;
  102|  31.5k|    __m128i q0_16x8_2, q1_16x8_2, q2_16x8_2;
  103|  31.5k|    __m128i p0_16x8_2, p1_16x8_2, p2_16x8_2;
  104|  31.5k|    __m128i temp1, temp2, temp3, temp4, temp5, temp6;
  105|  31.5k|    __m128i Alpha_8x16, Beta_8x16;
  106|  31.5k|    __m128i flag1_16x8, flag2_16x8, flag3_16x8, flag4_16x8;
  107|  31.5k|    __m128i const_val2_16x8 = _mm_set1_epi16(2);
  108|  31.5k|    __m128i line1, line2, line3, line4, line5, line6, line7, line8;
  109|       |
  110|  31.5k|    Alpha_8x16 = _mm_set1_epi16(alpha);
  111|  31.5k|    Beta_8x16 = _mm_set1_epi16(beta);
  112|       |
  113|  31.5k|    line1 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 0 * src_strd));
  114|  31.5k|    line2 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 1 * src_strd));
  115|  31.5k|    line3 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 2 * src_strd));
  116|  31.5k|    line4 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 3 * src_strd));
  117|  31.5k|    line5 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 4 * src_strd));
  118|  31.5k|    line6 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 5 * src_strd));
  119|  31.5k|    line7 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 6 * src_strd));
  120|  31.5k|    line8 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 7 * src_strd));
  121|       |
  122|  31.5k|    temp1 = _mm_unpacklo_epi8(line1, line2);
  123|  31.5k|    temp2 = _mm_unpacklo_epi8(line3, line4);
  124|  31.5k|    temp3 = _mm_unpacklo_epi8(line5, line6);
  125|  31.5k|    temp4 = _mm_unpacklo_epi8(line7, line8);
  126|       |
  127|  31.5k|    line1 = _mm_unpacklo_epi16(temp1, temp2);
  128|  31.5k|    line2 = _mm_unpackhi_epi16(temp1, temp2);
  129|  31.5k|    line3 = _mm_unpacklo_epi16(temp3, temp4);
  130|  31.5k|    line4 = _mm_unpackhi_epi16(temp3, temp4);
  131|       |
  132|  31.5k|    p1_8x16 = _mm_unpacklo_epi32(line1, line3);
  133|  31.5k|    p0_8x16 = _mm_unpackhi_epi32(line1, line3);
  134|  31.5k|    q0_8x16 = _mm_unpacklo_epi32(line2, line4);
  135|  31.5k|    q1_8x16 = _mm_unpackhi_epi32(line2, line4);
  136|       |
  137|  31.5k|    line1 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 8 * src_strd));
  138|  31.5k|    line2 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 9 * src_strd));
  139|  31.5k|    line3 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 10 * src_strd));
  140|  31.5k|    line4 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 11 * src_strd));
  141|  31.5k|    line5 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 12 * src_strd));
  142|  31.5k|    line6 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 13 * src_strd));
  143|  31.5k|    line7 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 14 * src_strd));
  144|  31.5k|    line8 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 15 * src_strd));
  145|       |
  146|  31.5k|    temp1 = _mm_unpacklo_epi8(line1, line2);
  147|  31.5k|    temp2 = _mm_unpacklo_epi8(line3, line4);
  148|  31.5k|    temp3 = _mm_unpacklo_epi8(line5, line6);
  149|  31.5k|    temp4 = _mm_unpacklo_epi8(line7, line8);
  150|       |
  151|  31.5k|    line1 = _mm_unpacklo_epi16(temp1, temp2);
  152|  31.5k|    line2 = _mm_unpackhi_epi16(temp1, temp2);
  153|  31.5k|    line3 = _mm_unpacklo_epi16(temp3, temp4);
  154|  31.5k|    line4 = _mm_unpackhi_epi16(temp3, temp4);
  155|       |
  156|  31.5k|    temp1 = _mm_unpacklo_epi32(line1, line3);
  157|  31.5k|    temp2 = _mm_unpackhi_epi32(line1, line3);
  158|  31.5k|    temp3 = _mm_unpacklo_epi32(line2, line4);
  159|  31.5k|    temp4 = _mm_unpackhi_epi32(line2, line4);
  160|       |
  161|  31.5k|    p3_16x8 = _mm_unpacklo_epi64(p1_8x16, temp1);
  162|  31.5k|    p2_16x8 = _mm_unpackhi_epi64(p1_8x16, temp1);
  163|  31.5k|    q2_16x8 = _mm_unpacklo_epi64(q1_8x16, temp4);
  164|  31.5k|    q3_16x8 = _mm_unpackhi_epi64(q1_8x16, temp4);
  165|  31.5k|    p1_16x8 = _mm_unpacklo_epi64(p0_8x16, temp2);
  166|  31.5k|    p0_16x8 = _mm_unpackhi_epi64(p0_8x16, temp2);
  167|  31.5k|    q0_16x8 = _mm_unpacklo_epi64(q0_8x16, temp3);
  168|  31.5k|    q1_16x8 = _mm_unpackhi_epi64(q0_8x16, temp3);
  169|       |
  170|       |    //Cond1 (ABS(p0 - q0) < alpha)
  171|  31.5k|    temp1 = _mm_subs_epu8(q0_16x8, p0_16x8);
  172|  31.5k|    temp2 = _mm_subs_epu8(p0_16x8, q0_16x8);
  173|  31.5k|    temp1 = _mm_add_epi8(temp1, temp2);
  174|       |
  175|  31.5k|    temp2 = _mm_unpacklo_epi8(temp1, zero);
  176|  31.5k|    temp1 = _mm_unpackhi_epi8(temp1, zero);
  177|       |
  178|  31.5k|    temp2 = _mm_cmpgt_epi16(Alpha_8x16, temp2);
  179|  31.5k|    temp1 = _mm_cmpgt_epi16(Alpha_8x16, temp1);
  180|       |
  181|  31.5k|    flag1_16x8 = _mm_packs_epi16(temp2, temp1);
  182|       |
  183|       |    //Cond2 (ABS(q1 - q0) < beta)
  184|  31.5k|    temp1 = _mm_subs_epu8(q0_16x8, q1_16x8);
  185|  31.5k|    temp2 = _mm_subs_epu8(q1_16x8, q0_16x8);
  186|  31.5k|    temp1 = _mm_add_epi8(temp1, temp2);
  187|       |
  188|  31.5k|    temp2 = _mm_unpacklo_epi8(temp1, zero);
  189|  31.5k|    temp1 = _mm_unpackhi_epi8(temp1, zero);
  190|       |
  191|  31.5k|    temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
  192|  31.5k|    temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1);
  193|       |
  194|  31.5k|    flag2_16x8 = _mm_packs_epi16(temp2, temp1);
  195|       |
  196|  31.5k|    flag1_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8);
  197|       |
  198|       |    //Cond3 (ABS(p1 - p0) < beta)
  199|  31.5k|    temp1 = _mm_subs_epu8(p0_16x8, p1_16x8);
  200|  31.5k|    temp2 = _mm_subs_epu8(p1_16x8, p0_16x8);
  201|  31.5k|    temp1 = _mm_add_epi8(temp1, temp2);
  202|       |
  203|  31.5k|    temp2 = _mm_unpacklo_epi8(temp1, zero);
  204|  31.5k|    temp1 = _mm_unpackhi_epi8(temp1, zero);
  205|       |
  206|  31.5k|    temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
  207|  31.5k|    temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1);
  208|       |
  209|  31.5k|    flag2_16x8 = _mm_packs_epi16(temp2, temp1);
  210|       |
  211|       |    // !((ABS(p0 - q0) < alpha) || (ABS(q1 - q0) < beta) || (ABS(p1 - p0) < beta))
  212|  31.5k|    flag1_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8);
  213|       |
  214|       |    // (ABS(p0 - q0) < ((alpha >> 2) + 2))
  215|  31.5k|    temp1 = _mm_subs_epu8(p0_16x8, q0_16x8);
  216|  31.5k|    temp2 = _mm_subs_epu8(q0_16x8, p0_16x8);
  217|  31.5k|    temp1 = _mm_add_epi8(temp1, temp2);
  218|  31.5k|    Alpha_8x16 = _mm_srai_epi16(Alpha_8x16, 2);
  219|  31.5k|    Alpha_8x16 = _mm_add_epi16(Alpha_8x16, const_val2_16x8);
  220|       |
  221|  31.5k|    temp2 = _mm_unpacklo_epi8(temp1, zero);
  222|  31.5k|    temp1 = _mm_unpackhi_epi8(temp1, zero);
  223|  31.5k|    temp2 = _mm_cmpgt_epi16(Alpha_8x16, temp2);
  224|  31.5k|    temp1 = _mm_cmpgt_epi16(Alpha_8x16, temp1);
  225|       |
  226|  31.5k|    flag2_16x8 = _mm_packs_epi16(temp2, temp1);
  227|  31.5k|    flag2_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8);
  228|       |
  229|       |    // (ABS(p2 - p0) < beta)
  230|  31.5k|    temp1 = _mm_subs_epu8(p0_16x8, p2_16x8);
  231|  31.5k|    temp2 = _mm_subs_epu8(p2_16x8, p0_16x8);
  232|  31.5k|    temp1 = _mm_add_epi8(temp1, temp2);
  233|       |
  234|  31.5k|    temp2 = _mm_unpacklo_epi8(temp1, zero);
  235|  31.5k|    temp1 = _mm_unpackhi_epi8(temp1, zero);
  236|  31.5k|    temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
  237|  31.5k|    temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1);
  238|       |
  239|  31.5k|    flag3_16x8 = _mm_packs_epi16(temp2, temp1);
  240|  31.5k|    flag3_16x8 = _mm_and_si128(flag3_16x8, flag2_16x8);
  241|       |
  242|       |    // (ABS(q2 - q0) < beta)
  243|  31.5k|    temp1 = _mm_subs_epu8(q0_16x8, q2_16x8);
  244|  31.5k|    temp2 = _mm_subs_epu8(q2_16x8, q0_16x8);
  245|  31.5k|    temp1 = _mm_add_epi8(temp1, temp2);
  246|       |
  247|  31.5k|    temp2 = _mm_unpacklo_epi8(temp1, zero);
  248|  31.5k|    temp1 = _mm_unpackhi_epi8(temp1, zero);
  249|  31.5k|    temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
  250|  31.5k|    temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1);
  251|       |
  252|  31.5k|    flag4_16x8 = _mm_packs_epi16(temp2, temp1);
  253|  31.5k|    flag4_16x8 = _mm_and_si128(flag4_16x8, flag2_16x8);
  254|       |
  255|       |    // First 8 pixels
  256|  31.5k|    p3_8x16 = _mm_unpacklo_epi8(p3_16x8, zero);
  257|  31.5k|    p2_8x16 = _mm_unpacklo_epi8(p2_16x8, zero);
  258|  31.5k|    p1_8x16 = _mm_unpacklo_epi8(p1_16x8, zero);
  259|  31.5k|    p0_8x16 = _mm_unpacklo_epi8(p0_16x8, zero);
  260|  31.5k|    q0_8x16 = _mm_unpacklo_epi8(q0_16x8, zero);
  261|  31.5k|    q1_8x16 = _mm_unpacklo_epi8(q1_16x8, zero);
  262|  31.5k|    q2_8x16 = _mm_unpacklo_epi8(q2_16x8, zero);
  263|  31.5k|    q3_8x16 = _mm_unpacklo_epi8(q3_16x8, zero);
  264|       |
  265|       |    // p0_1 and q0_1
  266|  31.5k|    temp1 = _mm_add_epi16(p0_8x16, q1_8x16);
  267|  31.5k|    temp2 = _mm_add_epi16(p1_8x16, q0_8x16);
  268|  31.5k|    temp5 = _mm_add_epi16(temp1, const_val2_16x8);
  269|  31.5k|    temp6 = _mm_add_epi16(temp2, const_val2_16x8);
  270|  31.5k|    temp3 = _mm_slli_epi16(p1_8x16, 1);
  271|  31.5k|    temp4 = _mm_slli_epi16(q1_8x16, 1);
  272|  31.5k|    temp1 = _mm_add_epi16(temp5, temp3);
  273|  31.5k|    temp2 = _mm_add_epi16(temp6, temp4);
  274|  31.5k|    p0_16x8_1 = _mm_srai_epi16(temp1, 2);
  275|  31.5k|    q0_16x8_1 = _mm_srai_epi16(temp2, 2);
  276|       |
  277|       |    // p1_2 and q1_2
  278|  31.5k|    temp6 = _mm_add_epi16(temp6, p0_8x16);
  279|  31.5k|    temp5 = _mm_add_epi16(temp5, q0_8x16);
  280|  31.5k|    temp1 = _mm_add_epi16(temp6, p2_8x16);
  281|  31.5k|    temp2 = _mm_add_epi16(temp5, q2_8x16);
  282|  31.5k|    p1_16x8_2 = _mm_srai_epi16(temp1, 2);
  283|  31.5k|    q1_16x8_2 = _mm_srai_epi16(temp2, 2);
  284|       |
  285|       |    // p0_2 and q0_2
  286|  31.5k|    temp1 = _mm_add_epi16(temp3, p2_8x16);
  287|  31.5k|    temp2 = _mm_add_epi16(temp4, q2_8x16);
  288|  31.5k|    temp1 = _mm_add_epi16(temp1, q1_8x16);
  289|  31.5k|    temp2 = _mm_add_epi16(temp2, p1_8x16);
  290|  31.5k|    temp3 = _mm_add_epi16(p0_8x16, q0_8x16);
  291|  31.5k|    temp3 = _mm_slli_epi16(temp3, 1);
  292|  31.5k|    temp1 = _mm_add_epi16(temp1, temp3);
  293|  31.5k|    temp2 = _mm_add_epi16(temp2, temp3);
  294|  31.5k|    temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(4));
  295|  31.5k|    temp2 = _mm_add_epi16(temp2, _mm_set1_epi16(4));
  296|  31.5k|    p0_16x8_2 = _mm_srai_epi16(temp1, 3);
  297|  31.5k|    q0_16x8_2 = _mm_srai_epi16(temp2, 3);
  298|       |
  299|       |    // p2_2 and q2_2
  300|  31.5k|    temp1 = _mm_add_epi16(temp6, const_val2_16x8);
  301|  31.5k|    temp2 = _mm_add_epi16(temp5, const_val2_16x8);
  302|  31.5k|    temp3 = _mm_slli_epi16(p2_8x16, 1);
  303|  31.5k|    temp4 = _mm_slli_epi16(q2_8x16, 1);
  304|  31.5k|    temp3 = _mm_add_epi16(p2_8x16, temp3);
  305|  31.5k|    temp4 = _mm_add_epi16(q2_8x16, temp4);
  306|  31.5k|    temp5 = _mm_slli_epi16(p3_8x16, 1);
  307|  31.5k|    temp6 = _mm_slli_epi16(q3_8x16, 1);
  308|  31.5k|    temp1 = _mm_add_epi16(temp1, temp3);
  309|  31.5k|    temp2 = _mm_add_epi16(temp2, temp4);
  310|  31.5k|    temp1 = _mm_add_epi16(temp1, temp5);
  311|  31.5k|    temp2 = _mm_add_epi16(temp2, temp6);
  312|  31.5k|    p2_16x8_2 = _mm_srai_epi16(temp1, 3);
  313|  31.5k|    q2_16x8_2 = _mm_srai_epi16(temp2, 3);
  314|       |
  315|       |    // Second 8 pixels and packing with first 8 pixels
  316|  31.5k|    p3_8x16 = _mm_unpackhi_epi8(p3_16x8, zero);
  317|  31.5k|    p2_8x16 = _mm_unpackhi_epi8(p2_16x8, zero);
  318|  31.5k|    p1_8x16 = _mm_unpackhi_epi8(p1_16x8, zero);
  319|  31.5k|    p0_8x16 = _mm_unpackhi_epi8(p0_16x8, zero);
  320|  31.5k|    q0_8x16 = _mm_unpackhi_epi8(q0_16x8, zero);
  321|  31.5k|    q1_8x16 = _mm_unpackhi_epi8(q1_16x8, zero);
  322|  31.5k|    q2_8x16 = _mm_unpackhi_epi8(q2_16x8, zero);
  323|  31.5k|    q3_8x16 = _mm_unpackhi_epi8(q3_16x8, zero);
  324|       |
  325|       |    // p0_1 and q0_1
  326|  31.5k|    temp1 = _mm_add_epi16(p0_8x16, q1_8x16);
  327|  31.5k|    temp2 = _mm_add_epi16(p1_8x16, q0_8x16);
  328|  31.5k|    temp5 = _mm_add_epi16(temp1, const_val2_16x8);
  329|  31.5k|    temp6 = _mm_add_epi16(temp2, const_val2_16x8);
  330|  31.5k|    temp3 = _mm_slli_epi16(p1_8x16, 1);
  331|  31.5k|    temp4 = _mm_slli_epi16(q1_8x16, 1);
  332|  31.5k|    temp1 = _mm_add_epi16(temp5, temp3);
  333|  31.5k|    temp2 = _mm_add_epi16(temp6, temp4);
  334|  31.5k|    temp1 = _mm_srai_epi16(temp1, 2);
  335|  31.5k|    temp2 = _mm_srai_epi16(temp2, 2);
  336|  31.5k|    p0_16x8_1 = _mm_packus_epi16(p0_16x8_1, temp1);
  337|  31.5k|    q0_16x8_1 = _mm_packus_epi16(q0_16x8_1, temp2);
  338|       |
  339|       |    // p1_2 and q1_2
  340|  31.5k|    temp6 = _mm_add_epi16(temp6, p0_8x16);
  341|  31.5k|    temp5 = _mm_add_epi16(temp5, q0_8x16);
  342|  31.5k|    temp1 = _mm_add_epi16(temp6, p2_8x16);
  343|  31.5k|    temp2 = _mm_add_epi16(temp5, q2_8x16);
  344|  31.5k|    temp1 = _mm_srai_epi16(temp1, 2);
  345|  31.5k|    temp2 = _mm_srai_epi16(temp2, 2);
  346|  31.5k|    p1_16x8_2 = _mm_packus_epi16(p1_16x8_2, temp1);
  347|  31.5k|    q1_16x8_2 = _mm_packus_epi16(q1_16x8_2, temp2);
  348|       |
  349|       |    // p0_2 and q0_2
  350|  31.5k|    temp1 = _mm_add_epi16(temp3, p2_8x16);
  351|  31.5k|    temp2 = _mm_add_epi16(temp4, q2_8x16);
  352|  31.5k|    temp1 = _mm_add_epi16(temp1, q1_8x16);
  353|  31.5k|    temp2 = _mm_add_epi16(temp2, p1_8x16);
  354|  31.5k|    temp3 = _mm_add_epi16(p0_8x16, q0_8x16);
  355|  31.5k|    temp3 = _mm_slli_epi16(temp3, 1);
  356|  31.5k|    temp1 = _mm_add_epi16(temp1, temp3);
  357|  31.5k|    temp2 = _mm_add_epi16(temp2, temp3);
  358|  31.5k|    temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(4));
  359|  31.5k|    temp2 = _mm_add_epi16(temp2, _mm_set1_epi16(4));
  360|  31.5k|    temp1 = _mm_srai_epi16(temp1, 3);
  361|  31.5k|    temp2 = _mm_srai_epi16(temp2, 3);
  362|  31.5k|    p0_16x8_2 = _mm_packus_epi16(p0_16x8_2, temp1);
  363|  31.5k|    q0_16x8_2 = _mm_packus_epi16(q0_16x8_2, temp2);
  364|       |
  365|       |    // p2_2 and q2_2
  366|  31.5k|    temp1 = _mm_add_epi16(temp6, const_val2_16x8);
  367|  31.5k|    temp2 = _mm_add_epi16(temp5, const_val2_16x8);
  368|  31.5k|    temp3 = _mm_slli_epi16(p2_8x16, 1);
  369|  31.5k|    temp4 = _mm_slli_epi16(q2_8x16, 1);
  370|  31.5k|    temp3 = _mm_add_epi16(p2_8x16, temp3);
  371|  31.5k|    temp4 = _mm_add_epi16(q2_8x16, temp4);
  372|  31.5k|    temp5 = _mm_slli_epi16(p3_8x16, 1);
  373|  31.5k|    temp6 = _mm_slli_epi16(q3_8x16, 1);
  374|  31.5k|    temp1 = _mm_add_epi16(temp1, temp3);
  375|  31.5k|    temp2 = _mm_add_epi16(temp2, temp4);
  376|  31.5k|    temp1 = _mm_add_epi16(temp1, temp5);
  377|  31.5k|    temp2 = _mm_add_epi16(temp2, temp6);
  378|  31.5k|    temp1 = _mm_srai_epi16(temp1, 3);
  379|  31.5k|    temp2 = _mm_srai_epi16(temp2, 3);
  380|  31.5k|    p2_16x8_2 = _mm_packus_epi16(p2_16x8_2, temp1);
  381|  31.5k|    q2_16x8_2 = _mm_packus_epi16(q2_16x8_2, temp2);
  382|       |
  383|       |    // p0 and q0
  384|  31.5k|    p0_16x8 = _mm_and_si128(p0_16x8,
  385|  31.5k|                            _mm_xor_si128(flag1_16x8, _mm_set1_epi8(0xFF)));
  386|  31.5k|    p0_16x8_1 = _mm_and_si128(p0_16x8_1, flag1_16x8);
  387|  31.5k|    p0_16x8 = _mm_add_epi8(p0_16x8, p0_16x8_1);
  388|  31.5k|    q0_16x8 = _mm_and_si128(q0_16x8,
  389|  31.5k|                            _mm_xor_si128(flag1_16x8, _mm_set1_epi8(0xFF)));
  390|  31.5k|    q0_16x8_1 = _mm_and_si128(q0_16x8_1, flag1_16x8);
  391|  31.5k|    q0_16x8 = _mm_add_epi8(q0_16x8, q0_16x8_1);
  392|       |
  393|       |    // p0 and q0
  394|  31.5k|    p0_16x8 = _mm_and_si128(p0_16x8,
  395|  31.5k|                            _mm_xor_si128(flag3_16x8, _mm_set1_epi8(0xFF)));
  396|  31.5k|    p0_16x8_2 = _mm_and_si128(p0_16x8_2, flag3_16x8);
  397|  31.5k|    p0_16x8 = _mm_add_epi8(p0_16x8, p0_16x8_2);
  398|  31.5k|    q0_16x8 = _mm_and_si128(q0_16x8,
  399|  31.5k|                            _mm_xor_si128(flag4_16x8, _mm_set1_epi8(0xFF)));
  400|  31.5k|    q0_16x8_2 = _mm_and_si128(q0_16x8_2, flag4_16x8);
  401|  31.5k|    q0_16x8 = _mm_add_epi8(q0_16x8, q0_16x8_2);
  402|       |
  403|       |    // p1 and q1
  404|  31.5k|    p1_16x8 = _mm_and_si128(p1_16x8,
  405|  31.5k|                            _mm_xor_si128(flag3_16x8, _mm_set1_epi8(0xFF)));
  406|  31.5k|    p1_16x8_2 = _mm_and_si128(p1_16x8_2, flag3_16x8);
  407|  31.5k|    p1_16x8 = _mm_add_epi8(p1_16x8, p1_16x8_2);
  408|  31.5k|    q1_16x8 = _mm_and_si128(q1_16x8,
  409|  31.5k|                            _mm_xor_si128(flag4_16x8, _mm_set1_epi8(0xFF)));
  410|  31.5k|    q1_16x8_2 = _mm_and_si128(q1_16x8_2, flag4_16x8);
  411|  31.5k|    q1_16x8 = _mm_add_epi8(q1_16x8, q1_16x8_2);
  412|       |
  413|       |    // p2 and q2
  414|  31.5k|    p2_16x8 = _mm_and_si128(p2_16x8,
  415|  31.5k|                            _mm_xor_si128(flag3_16x8, _mm_set1_epi8(0xFF)));
  416|  31.5k|    p2_16x8_2 = _mm_and_si128(p2_16x8_2, flag3_16x8);
  417|  31.5k|    p2_16x8 = _mm_add_epi8(p2_16x8, p2_16x8_2);
  418|  31.5k|    q2_16x8 = _mm_and_si128(q2_16x8,
  419|  31.5k|                            _mm_xor_si128(flag4_16x8, _mm_set1_epi8(0xFF)));
  420|  31.5k|    q2_16x8_2 = _mm_and_si128(q2_16x8_2, flag4_16x8);
  421|  31.5k|    q2_16x8 = _mm_add_epi8(q2_16x8, q2_16x8_2);
  422|       |
  423|  31.5k|    temp1 = _mm_unpacklo_epi8(p3_16x8, p2_16x8);
  424|  31.5k|    temp2 = _mm_unpacklo_epi8(p1_16x8, p0_16x8);
  425|  31.5k|    temp3 = _mm_unpacklo_epi8(q0_16x8, q1_16x8);
  426|  31.5k|    temp4 = _mm_unpacklo_epi8(q2_16x8, q3_16x8);
  427|       |
  428|  31.5k|    p3_8x16 = _mm_unpacklo_epi16(temp1, temp2);
  429|  31.5k|    p2_8x16 = _mm_unpackhi_epi16(temp1, temp2);
  430|  31.5k|    q2_8x16 = _mm_unpacklo_epi16(temp3, temp4);
  431|  31.5k|    q3_8x16 = _mm_unpackhi_epi16(temp3, temp4);
  432|       |
  433|  31.5k|    line1 = _mm_unpacklo_epi32(p3_8x16, q2_8x16);
  434|  31.5k|    line2 = _mm_srli_si128(line1, 8);
  435|  31.5k|    line3 = _mm_unpackhi_epi32(p3_8x16, q2_8x16);
  436|  31.5k|    line4 = _mm_srli_si128(line3, 8);
  437|  31.5k|    line5 = _mm_unpacklo_epi32(p2_8x16, q3_8x16);
  438|  31.5k|    line6 = _mm_srli_si128(line5, 8);
  439|  31.5k|    line7 = _mm_unpackhi_epi32(p2_8x16, q3_8x16);
  440|  31.5k|    line8 = _mm_srli_si128(line7, 8);
  441|       |
  442|  31.5k|    _mm_storel_epi64((__m128i *)(pu1_src - 4 + 0 * src_strd), line1);
  443|  31.5k|    _mm_storel_epi64((__m128i *)(pu1_src - 4 + 1 * src_strd), line2);
  444|  31.5k|    _mm_storel_epi64((__m128i *)(pu1_src - 4 + 2 * src_strd), line3);
  445|  31.5k|    _mm_storel_epi64((__m128i *)(pu1_src - 4 + 3 * src_strd), line4);
  446|  31.5k|    _mm_storel_epi64((__m128i *)(pu1_src - 4 + 4 * src_strd), line5);
  447|  31.5k|    _mm_storel_epi64((__m128i *)(pu1_src - 4 + 5 * src_strd), line6);
  448|  31.5k|    _mm_storel_epi64((__m128i *)(pu1_src - 4 + 6 * src_strd), line7);
  449|  31.5k|    _mm_storel_epi64((__m128i *)(pu1_src - 4 + 7 * src_strd), line8);
  450|       |
  451|  31.5k|    temp1 = _mm_unpackhi_epi8(p3_16x8, p2_16x8);
  452|  31.5k|    temp2 = _mm_unpackhi_epi8(p1_16x8, p0_16x8);
  453|  31.5k|    temp3 = _mm_unpackhi_epi8(q0_16x8, q1_16x8);
  454|  31.5k|    temp4 = _mm_unpackhi_epi8(q2_16x8, q3_16x8);
  455|       |
  456|  31.5k|    p3_8x16 = _mm_unpacklo_epi16(temp1, temp2);
  457|  31.5k|    p2_8x16 = _mm_unpackhi_epi16(temp1, temp2);
  458|  31.5k|    q2_8x16 = _mm_unpacklo_epi16(temp3, temp4);
  459|  31.5k|    q3_8x16 = _mm_unpackhi_epi16(temp3, temp4);
  460|       |
  461|  31.5k|    line1 = _mm_unpacklo_epi32(p3_8x16, q2_8x16);
  462|  31.5k|    line2 = _mm_srli_si128(line1, 8);
  463|  31.5k|    line3 = _mm_unpackhi_epi32(p3_8x16, q2_8x16);
  464|  31.5k|    line4 = _mm_srli_si128(line3, 8);
  465|  31.5k|    line5 = _mm_unpacklo_epi32(p2_8x16, q3_8x16);
  466|  31.5k|    line6 = _mm_srli_si128(line5, 8);
  467|  31.5k|    line7 = _mm_unpackhi_epi32(p2_8x16, q3_8x16);
  468|  31.5k|    line8 = _mm_srli_si128(line7, 8);
  469|       |
  470|  31.5k|    _mm_storel_epi64((__m128i *)(pu1_src - 4 + 8 * src_strd), line1);
  471|  31.5k|    _mm_storel_epi64((__m128i *)(pu1_src - 4 + 9 * src_strd), line2);
  472|  31.5k|    _mm_storel_epi64((__m128i *)(pu1_src - 4 + 10 * src_strd), line3);
  473|  31.5k|    _mm_storel_epi64((__m128i *)(pu1_src - 4 + 11 * src_strd), line4);
  474|  31.5k|    _mm_storel_epi64((__m128i *)(pu1_src - 4 + 12 * src_strd), line5);
  475|  31.5k|    _mm_storel_epi64((__m128i *)(pu1_src - 4 + 13 * src_strd), line6);
  476|  31.5k|    _mm_storel_epi64((__m128i *)(pu1_src - 4 + 14 * src_strd), line7);
  477|  31.5k|    _mm_storel_epi64((__m128i *)(pu1_src - 4 + 15 * src_strd), line8);
  478|       |
  479|  31.5k|}
ih264_deblk_luma_horz_bs4_ssse3:
  515|  33.7k|{
  516|  33.7k|    WORD16 i16_posP3, i16_posP2, i16_posP1, i16_posP0;
  517|  33.7k|    WORD16 i16_posQ1, i16_posQ2, i16_posQ3;
  518|  33.7k|    UWORD8 *pu1_HorzPixel;
  519|  33.7k|    __m128i zero = _mm_setzero_si128();
  520|  33.7k|    __m128i q0_16x8, q1_16x8, q2_16x8, q3_16x8;
  521|  33.7k|    __m128i p0_16x8, p1_16x8, p2_16x8, p3_16x8;
  522|  33.7k|    __m128i q0_8x16, q1_8x16, q2_8x16, q3_8x16;
  523|  33.7k|    __m128i p0_8x16, p1_8x16, p2_8x16, p3_8x16;
  524|  33.7k|    __m128i q0_16x8_1;
  525|  33.7k|    __m128i p0_16x8_1;
  526|  33.7k|    __m128i q0_16x8_2, q1_16x8_2, q2_16x8_2;
  527|  33.7k|    __m128i p0_16x8_2, p1_16x8_2, p2_16x8_2;
  528|  33.7k|    __m128i temp1, temp2, temp3, temp4, temp5, temp6;
  529|  33.7k|    __m128i Alpha_8x16, Beta_8x16;
  530|  33.7k|    __m128i flag1_16x8, flag2_16x8, flag3_16x8, flag4_16x8;
  531|  33.7k|    __m128i const_val2_16x8 = _mm_set1_epi16(2);
  532|       |
  533|  33.7k|    pu1_HorzPixel = pu1_src - (src_strd << 2);
  534|       |
  535|  33.7k|    i16_posQ1 = src_strd;
  536|  33.7k|    i16_posQ2 = X2(src_strd);
  ------------------
  |  |   91|  33.7k|#define X2(a)   ((a) << 1)
  ------------------
  537|  33.7k|    i16_posQ3 = X3(src_strd);
  ------------------
  |  |   92|  33.7k|#define X3(a)   (((a) << 1) + (a))
  ------------------
  538|  33.7k|    i16_posP0 = X3(src_strd);
  ------------------
  |  |   92|  33.7k|#define X3(a)   (((a) << 1) + (a))
  ------------------
  539|  33.7k|    i16_posP1 = X2(src_strd);
  ------------------
  |  |   91|  33.7k|#define X2(a)   ((a) << 1)
  ------------------
  540|  33.7k|    i16_posP2 = src_strd;
  541|  33.7k|    i16_posP3 = 0;
  542|       |
  543|  33.7k|    Alpha_8x16 = _mm_set1_epi16(alpha);
  544|  33.7k|    Beta_8x16 = _mm_set1_epi16(beta);
  545|       |
  546|  33.7k|    p3_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixel + i16_posP3));
  547|  33.7k|    p2_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixel + i16_posP2));
  548|  33.7k|    p1_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixel + i16_posP1));
  549|  33.7k|    p0_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixel + i16_posP0));
  550|  33.7k|    q0_16x8 = _mm_loadu_si128((__m128i *)(pu1_src));
  551|  33.7k|    q1_16x8 = _mm_loadu_si128((__m128i *)(pu1_src + i16_posQ1));
  552|  33.7k|    q2_16x8 = _mm_loadu_si128((__m128i *)(pu1_src + i16_posQ2));
  553|  33.7k|    q3_16x8 = _mm_loadu_si128((__m128i *)(pu1_src + i16_posQ3));
  554|       |
  555|       |    //Cond1 (ABS(p0 - q0) < alpha)
  556|  33.7k|    temp1 = _mm_subs_epu8(q0_16x8, p0_16x8);
  557|  33.7k|    temp2 = _mm_subs_epu8(p0_16x8, q0_16x8);
  558|  33.7k|    temp1 = _mm_add_epi8(temp1, temp2);
  559|       |
  560|  33.7k|    temp2 = _mm_unpacklo_epi8(temp1, zero);
  561|  33.7k|    temp1 = _mm_unpackhi_epi8(temp1, zero);
  562|       |
  563|  33.7k|    temp2 = _mm_cmpgt_epi16(Alpha_8x16, temp2);
  564|  33.7k|    temp1 = _mm_cmpgt_epi16(Alpha_8x16, temp1);
  565|       |
  566|  33.7k|    flag1_16x8 = _mm_packs_epi16(temp2, temp1);
  567|       |
  568|       |    //Cond2 (ABS(q1 - q0) < beta)
  569|  33.7k|    temp1 = _mm_subs_epu8(q0_16x8, q1_16x8);
  570|  33.7k|    temp2 = _mm_subs_epu8(q1_16x8, q0_16x8);
  571|  33.7k|    temp1 = _mm_add_epi8(temp1, temp2);
  572|       |
  573|  33.7k|    temp2 = _mm_unpacklo_epi8(temp1, zero);
  574|  33.7k|    temp1 = _mm_unpackhi_epi8(temp1, zero);
  575|       |
  576|  33.7k|    temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
  577|  33.7k|    temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1);
  578|       |
  579|  33.7k|    flag2_16x8 = _mm_packs_epi16(temp2, temp1);
  580|       |
  581|  33.7k|    flag1_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8);
  582|       |
  583|       |    //Cond3 (ABS(p1 - p0) < beta)
  584|  33.7k|    temp1 = _mm_subs_epu8(p0_16x8, p1_16x8);
  585|  33.7k|    temp2 = _mm_subs_epu8(p1_16x8, p0_16x8);
  586|  33.7k|    temp1 = _mm_add_epi8(temp1, temp2);
  587|       |
  588|  33.7k|    temp2 = _mm_unpacklo_epi8(temp1, zero);
  589|  33.7k|    temp1 = _mm_unpackhi_epi8(temp1, zero);
  590|       |
  591|  33.7k|    temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
  592|  33.7k|    temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1);
  593|       |
  594|  33.7k|    flag2_16x8 = _mm_packs_epi16(temp2, temp1);
  595|       |
  596|       |    // !((ABS(p0 - q0) < alpha) || (ABS(q1 - q0) < beta) || (ABS(p1 - p0) < beta))
  597|  33.7k|    flag1_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8);
  598|       |
  599|       |    // (ABS(p0 - q0) < ((alpha >> 2) + 2))
  600|  33.7k|    temp1 = _mm_subs_epu8(p0_16x8, q0_16x8);
  601|  33.7k|    temp2 = _mm_subs_epu8(q0_16x8, p0_16x8);
  602|  33.7k|    temp1 = _mm_add_epi8(temp1, temp2);
  603|  33.7k|    Alpha_8x16 = _mm_srai_epi16(Alpha_8x16, 2);
  604|  33.7k|    Alpha_8x16 = _mm_add_epi16(Alpha_8x16, const_val2_16x8);
  605|       |
  606|  33.7k|    temp2 = _mm_unpacklo_epi8(temp1, zero);
  607|  33.7k|    temp1 = _mm_unpackhi_epi8(temp1, zero);
  608|  33.7k|    temp2 = _mm_cmpgt_epi16(Alpha_8x16, temp2);
  609|  33.7k|    temp1 = _mm_cmpgt_epi16(Alpha_8x16, temp1);
  610|       |
  611|  33.7k|    flag2_16x8 = _mm_packs_epi16(temp2, temp1);
  612|  33.7k|    flag2_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8);
  613|       |
  614|       |    // (ABS(p2 - p0) < beta)
  615|  33.7k|    temp1 = _mm_subs_epu8(p0_16x8, p2_16x8);
  616|  33.7k|    temp2 = _mm_subs_epu8(p2_16x8, p0_16x8);
  617|  33.7k|    temp1 = _mm_add_epi8(temp1, temp2);
  618|       |
  619|  33.7k|    temp2 = _mm_unpacklo_epi8(temp1, zero);
  620|  33.7k|    temp1 = _mm_unpackhi_epi8(temp1, zero);
  621|  33.7k|    temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
  622|  33.7k|    temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1);
  623|       |
  624|  33.7k|    flag3_16x8 = _mm_packs_epi16(temp2, temp1);
  625|  33.7k|    flag3_16x8 = _mm_and_si128(flag3_16x8, flag2_16x8);
  626|       |
  627|       |    // (ABS(q2 - q0) < beta)
  628|  33.7k|    temp1 = _mm_subs_epu8(q0_16x8, q2_16x8);
  629|  33.7k|    temp2 = _mm_subs_epu8(q2_16x8, q0_16x8);
  630|  33.7k|    temp1 = _mm_add_epi8(temp1, temp2);
  631|       |
  632|  33.7k|    temp2 = _mm_unpacklo_epi8(temp1, zero);
  633|  33.7k|    temp1 = _mm_unpackhi_epi8(temp1, zero);
  634|  33.7k|    temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
  635|  33.7k|    temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1);
  636|       |
  637|  33.7k|    flag4_16x8 = _mm_packs_epi16(temp2, temp1);
  638|  33.7k|    flag4_16x8 = _mm_and_si128(flag4_16x8, flag2_16x8);
  639|       |
  640|       |    // First 8 pixels
  641|  33.7k|    p3_8x16 = _mm_unpacklo_epi8(p3_16x8, zero);
  642|  33.7k|    p2_8x16 = _mm_unpacklo_epi8(p2_16x8, zero);
  643|  33.7k|    p1_8x16 = _mm_unpacklo_epi8(p1_16x8, zero);
  644|  33.7k|    p0_8x16 = _mm_unpacklo_epi8(p0_16x8, zero);
  645|  33.7k|    q0_8x16 = _mm_unpacklo_epi8(q0_16x8, zero);
  646|  33.7k|    q1_8x16 = _mm_unpacklo_epi8(q1_16x8, zero);
  647|  33.7k|    q2_8x16 = _mm_unpacklo_epi8(q2_16x8, zero);
  648|  33.7k|    q3_8x16 = _mm_unpacklo_epi8(q3_16x8, zero);
  649|       |
  650|       |    // p0_1 and q0_1
  651|  33.7k|    temp1 = _mm_add_epi16(p0_8x16, q1_8x16);
  652|  33.7k|    temp2 = _mm_add_epi16(p1_8x16, q0_8x16);
  653|  33.7k|    temp5 = _mm_add_epi16(temp1, const_val2_16x8);
  654|  33.7k|    temp6 = _mm_add_epi16(temp2, const_val2_16x8);
  655|  33.7k|    temp3 = _mm_slli_epi16(p1_8x16, 1);
  656|  33.7k|    temp4 = _mm_slli_epi16(q1_8x16, 1);
  657|  33.7k|    temp1 = _mm_add_epi16(temp5, temp3);
  658|  33.7k|    temp2 = _mm_add_epi16(temp6, temp4);
  659|  33.7k|    p0_16x8_1 = _mm_srai_epi16(temp1, 2);
  660|  33.7k|    q0_16x8_1 = _mm_srai_epi16(temp2, 2);
  661|       |
  662|       |    // p1_2 and q1_2
  663|  33.7k|    temp6 = _mm_add_epi16(temp6, p0_8x16);
  664|  33.7k|    temp5 = _mm_add_epi16(temp5, q0_8x16);
  665|  33.7k|    temp1 = _mm_add_epi16(temp6, p2_8x16);
  666|  33.7k|    temp2 = _mm_add_epi16(temp5, q2_8x16);
  667|  33.7k|    p1_16x8_2 = _mm_srai_epi16(temp1, 2);
  668|  33.7k|    q1_16x8_2 = _mm_srai_epi16(temp2, 2);
  669|       |
  670|       |    // p0_2 and q0_2
  671|  33.7k|    temp1 = _mm_add_epi16(temp3, p2_8x16);
  672|  33.7k|    temp2 = _mm_add_epi16(temp4, q2_8x16);
  673|  33.7k|    temp1 = _mm_add_epi16(temp1, q1_8x16);
  674|  33.7k|    temp2 = _mm_add_epi16(temp2, p1_8x16);
  675|  33.7k|    temp3 = _mm_add_epi16(p0_8x16, q0_8x16);
  676|  33.7k|    temp3 = _mm_slli_epi16(temp3, 1);
  677|  33.7k|    temp1 = _mm_add_epi16(temp1, temp3);
  678|  33.7k|    temp2 = _mm_add_epi16(temp2, temp3);
  679|  33.7k|    temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(4));
  680|  33.7k|    temp2 = _mm_add_epi16(temp2, _mm_set1_epi16(4));
  681|  33.7k|    p0_16x8_2 = _mm_srai_epi16(temp1, 3);
  682|  33.7k|    q0_16x8_2 = _mm_srai_epi16(temp2, 3);
  683|       |
  684|       |    // p2_2 and q2_2
  685|  33.7k|    temp1 = _mm_add_epi16(temp6, const_val2_16x8);
  686|  33.7k|    temp2 = _mm_add_epi16(temp5, const_val2_16x8);
  687|  33.7k|    temp3 = _mm_slli_epi16(p2_8x16, 1);
  688|  33.7k|    temp4 = _mm_slli_epi16(q2_8x16, 1);
  689|  33.7k|    temp3 = _mm_add_epi16(p2_8x16, temp3);
  690|  33.7k|    temp4 = _mm_add_epi16(q2_8x16, temp4);
  691|  33.7k|    temp5 = _mm_slli_epi16(p3_8x16, 1);
  692|  33.7k|    temp6 = _mm_slli_epi16(q3_8x16, 1);
  693|  33.7k|    temp1 = _mm_add_epi16(temp1, temp3);
  694|  33.7k|    temp2 = _mm_add_epi16(temp2, temp4);
  695|  33.7k|    temp1 = _mm_add_epi16(temp1, temp5);
  696|  33.7k|    temp2 = _mm_add_epi16(temp2, temp6);
  697|  33.7k|    p2_16x8_2 = _mm_srai_epi16(temp1, 3);
  698|  33.7k|    q2_16x8_2 = _mm_srai_epi16(temp2, 3);
  699|       |
  700|       |    // Second 8 pixels and packing with first 8 pixels
  701|  33.7k|    p3_8x16 = _mm_unpackhi_epi8(p3_16x8, zero);
  702|  33.7k|    p2_8x16 = _mm_unpackhi_epi8(p2_16x8, zero);
  703|  33.7k|    p1_8x16 = _mm_unpackhi_epi8(p1_16x8, zero);
  704|  33.7k|    p0_8x16 = _mm_unpackhi_epi8(p0_16x8, zero);
  705|  33.7k|    q0_8x16 = _mm_unpackhi_epi8(q0_16x8, zero);
  706|  33.7k|    q1_8x16 = _mm_unpackhi_epi8(q1_16x8, zero);
  707|  33.7k|    q2_8x16 = _mm_unpackhi_epi8(q2_16x8, zero);
  708|  33.7k|    q3_8x16 = _mm_unpackhi_epi8(q3_16x8, zero);
  709|       |
  710|       |    // p0_1 and q0_1
  711|  33.7k|    temp1 = _mm_add_epi16(p0_8x16, q1_8x16);
  712|  33.7k|    temp2 = _mm_add_epi16(p1_8x16, q0_8x16);
  713|  33.7k|    temp5 = _mm_add_epi16(temp1, const_val2_16x8);
  714|  33.7k|    temp6 = _mm_add_epi16(temp2, const_val2_16x8);
  715|  33.7k|    temp3 = _mm_slli_epi16(p1_8x16, 1);
  716|  33.7k|    temp4 = _mm_slli_epi16(q1_8x16, 1);
  717|  33.7k|    temp1 = _mm_add_epi16(temp5, temp3);
  718|  33.7k|    temp2 = _mm_add_epi16(temp6, temp4);
  719|  33.7k|    temp1 = _mm_srai_epi16(temp1, 2);
  720|  33.7k|    temp2 = _mm_srai_epi16(temp2, 2);
  721|  33.7k|    p0_16x8_1 = _mm_packus_epi16(p0_16x8_1, temp1);
  722|  33.7k|    q0_16x8_1 = _mm_packus_epi16(q0_16x8_1, temp2);
  723|       |
  724|       |    // p1_2 and q1_2
  725|  33.7k|    temp6 = _mm_add_epi16(temp6, p0_8x16);
  726|  33.7k|    temp5 = _mm_add_epi16(temp5, q0_8x16);
  727|  33.7k|    temp1 = _mm_add_epi16(temp6, p2_8x16);
  728|  33.7k|    temp2 = _mm_add_epi16(temp5, q2_8x16);
  729|  33.7k|    temp1 = _mm_srai_epi16(temp1, 2);
  730|  33.7k|    temp2 = _mm_srai_epi16(temp2, 2);
  731|  33.7k|    p1_16x8_2 = _mm_packus_epi16(p1_16x8_2, temp1);
  732|  33.7k|    q1_16x8_2 = _mm_packus_epi16(q1_16x8_2, temp2);
  733|       |
  734|       |    // p0_2 and q0_2
  735|  33.7k|    temp1 = _mm_add_epi16(temp3, p2_8x16);
  736|  33.7k|    temp2 = _mm_add_epi16(temp4, q2_8x16);
  737|  33.7k|    temp1 = _mm_add_epi16(temp1, q1_8x16);
  738|  33.7k|    temp2 = _mm_add_epi16(temp2, p1_8x16);
  739|  33.7k|    temp3 = _mm_add_epi16(p0_8x16, q0_8x16);
  740|  33.7k|    temp3 = _mm_slli_epi16(temp3, 1);
  741|  33.7k|    temp1 = _mm_add_epi16(temp1, temp3);
  742|  33.7k|    temp2 = _mm_add_epi16(temp2, temp3);
  743|  33.7k|    temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(4));
  744|  33.7k|    temp2 = _mm_add_epi16(temp2, _mm_set1_epi16(4));
  745|  33.7k|    temp1 = _mm_srai_epi16(temp1, 3);
  746|  33.7k|    temp2 = _mm_srai_epi16(temp2, 3);
  747|  33.7k|    p0_16x8_2 = _mm_packus_epi16(p0_16x8_2, temp1);
  748|  33.7k|    q0_16x8_2 = _mm_packus_epi16(q0_16x8_2, temp2);
  749|       |
  750|       |    // p2_2 and q2_2
  751|  33.7k|    temp1 = _mm_add_epi16(temp6, const_val2_16x8);
  752|  33.7k|    temp2 = _mm_add_epi16(temp5, const_val2_16x8);
  753|  33.7k|    temp3 = _mm_slli_epi16(p2_8x16, 1);
  754|  33.7k|    temp4 = _mm_slli_epi16(q2_8x16, 1);
  755|  33.7k|    temp3 = _mm_add_epi16(p2_8x16, temp3);
  756|  33.7k|    temp4 = _mm_add_epi16(q2_8x16, temp4);
  757|  33.7k|    temp5 = _mm_slli_epi16(p3_8x16, 1);
  758|  33.7k|    temp6 = _mm_slli_epi16(q3_8x16, 1);
  759|  33.7k|    temp1 = _mm_add_epi16(temp1, temp3);
  760|  33.7k|    temp2 = _mm_add_epi16(temp2, temp4);
  761|  33.7k|    temp1 = _mm_add_epi16(temp1, temp5);
  762|  33.7k|    temp2 = _mm_add_epi16(temp2, temp6);
  763|  33.7k|    temp1 = _mm_srai_epi16(temp1, 3);
  764|  33.7k|    temp2 = _mm_srai_epi16(temp2, 3);
  765|  33.7k|    p2_16x8_2 = _mm_packus_epi16(p2_16x8_2, temp1);
  766|  33.7k|    q2_16x8_2 = _mm_packus_epi16(q2_16x8_2, temp2);
  767|       |
  768|       |    // p0 and q0
  769|  33.7k|    p0_16x8 = _mm_and_si128(p0_16x8,
  770|  33.7k|                            _mm_xor_si128(flag1_16x8, _mm_set1_epi8(0xFF)));
  771|  33.7k|    p0_16x8_1 = _mm_and_si128(p0_16x8_1, flag1_16x8);
  772|  33.7k|    p0_16x8 = _mm_add_epi8(p0_16x8, p0_16x8_1);
  773|  33.7k|    q0_16x8 = _mm_and_si128(q0_16x8,
  774|  33.7k|                            _mm_xor_si128(flag1_16x8, _mm_set1_epi8(0xFF)));
  775|  33.7k|    q0_16x8_1 = _mm_and_si128(q0_16x8_1, flag1_16x8);
  776|  33.7k|    q0_16x8 = _mm_add_epi8(q0_16x8, q0_16x8_1);
  777|       |
  778|       |    // p0 and q0
  779|  33.7k|    p0_16x8 = _mm_and_si128(p0_16x8,
  780|  33.7k|                            _mm_xor_si128(flag3_16x8, _mm_set1_epi8(0xFF)));
  781|  33.7k|    p0_16x8_2 = _mm_and_si128(p0_16x8_2, flag3_16x8);
  782|  33.7k|    p0_16x8 = _mm_add_epi8(p0_16x8, p0_16x8_2);
  783|  33.7k|    q0_16x8 = _mm_and_si128(q0_16x8,
  784|  33.7k|                            _mm_xor_si128(flag4_16x8, _mm_set1_epi8(0xFF)));
  785|  33.7k|    q0_16x8_2 = _mm_and_si128(q0_16x8_2, flag4_16x8);
  786|  33.7k|    q0_16x8 = _mm_add_epi8(q0_16x8, q0_16x8_2);
  787|       |
  788|       |    // p1 and q1
  789|  33.7k|    p1_16x8 = _mm_and_si128(p1_16x8,
  790|  33.7k|                            _mm_xor_si128(flag3_16x8, _mm_set1_epi8(0xFF)));
  791|  33.7k|    p1_16x8_2 = _mm_and_si128(p1_16x8_2, flag3_16x8);
  792|  33.7k|    p1_16x8 = _mm_add_epi8(p1_16x8, p1_16x8_2);
  793|  33.7k|    q1_16x8 = _mm_and_si128(q1_16x8,
  794|  33.7k|                            _mm_xor_si128(flag4_16x8, _mm_set1_epi8(0xFF)));
  795|  33.7k|    q1_16x8_2 = _mm_and_si128(q1_16x8_2, flag4_16x8);
  796|  33.7k|    q1_16x8 = _mm_add_epi8(q1_16x8, q1_16x8_2);
  797|       |
  798|       |    // p2 and q2
  799|  33.7k|    p2_16x8 = _mm_and_si128(p2_16x8,
  800|  33.7k|                            _mm_xor_si128(flag3_16x8, _mm_set1_epi8(0xFF)));
  801|  33.7k|    p2_16x8_2 = _mm_and_si128(p2_16x8_2, flag3_16x8);
  802|  33.7k|    p2_16x8 = _mm_add_epi8(p2_16x8, p2_16x8_2);
  803|  33.7k|    q2_16x8 = _mm_and_si128(q2_16x8,
  804|  33.7k|                            _mm_xor_si128(flag4_16x8, _mm_set1_epi8(0xFF)));
  805|  33.7k|    q2_16x8_2 = _mm_and_si128(q2_16x8_2, flag4_16x8);
  806|  33.7k|    q2_16x8 = _mm_add_epi8(q2_16x8, q2_16x8_2);
  807|       |
  808|  33.7k|    _mm_storeu_si128((__m128i *)(pu1_HorzPixel + i16_posP2), p2_16x8);
  809|  33.7k|    _mm_storeu_si128((__m128i *)(pu1_HorzPixel + i16_posP1), p1_16x8);
  810|  33.7k|    _mm_storeu_si128((__m128i *)(pu1_HorzPixel + i16_posP0), p0_16x8);
  811|       |
  812|  33.7k|    _mm_storeu_si128((__m128i *)(pu1_src), q0_16x8);
  813|  33.7k|    _mm_storeu_si128((__m128i *)(pu1_src + i16_posQ1), q1_16x8);
  814|  33.7k|    _mm_storeu_si128((__m128i *)(pu1_src + i16_posQ2), q2_16x8);
  815|       |
  816|  33.7k|}
ih264_deblk_luma_vert_bslt4_ssse3:
  856|   284k|{
  857|   284k|    UWORD8 u1_Bs, u1_Bs1;
  858|       |
  859|   284k|    WORD32 j = 0;
  860|       |
  861|   284k|    __m128i linea, lineb, linec, lined, linee, linef, lineg, lineh;
  862|   284k|    __m128i int1, int2, int3, int4, high1, high2;
  863|   284k|    __m128i flag, flag1, i_C, i_C0;
  864|   284k|    __m128i i_Ap, i_Aq, diff, const1, const2, in_macro, in_macrotemp, temp,
  865|   284k|                    temp1;
  866|   284k|    __m128i zero = _mm_setzero_si128();
  867|       |
  868|   852k|    for(j = 0; j <= 8 * src_strd; j += 8 * src_strd)
  ------------------
  |  Branch (868:16): [True: 568k, False: 284k]
  ------------------
  869|   568k|    {
  870|       |        //Transpose
  871|   568k|        linea = _mm_loadl_epi64((__m128i *)(pu1_src - 3 + j));
  872|   568k|        lineb = _mm_loadl_epi64((__m128i *)(pu1_src - 3 + src_strd + j));
  873|   568k|        linec = _mm_loadl_epi64((__m128i *)(pu1_src - 3 + 2 * src_strd + j));
  874|   568k|        lined = _mm_loadl_epi64((__m128i *)(pu1_src - 3 + 3 * src_strd + j));
  875|       |
  876|   568k|        linea = _mm_unpacklo_epi8(linea, zero);
  877|   568k|        lineb = _mm_unpacklo_epi8(lineb, zero);
  878|   568k|        linec = _mm_unpacklo_epi8(linec, zero);
  879|   568k|        lined = _mm_unpacklo_epi8(lined, zero);
  880|       |
  881|   568k|        int1 = _mm_unpacklo_epi16(linea, lineb);
  882|   568k|        lineb = _mm_unpackhi_epi16(linea, lineb);
  883|       |
  884|   568k|        int2 = _mm_unpacklo_epi16(linec, lined);
  885|   568k|        lined = _mm_unpackhi_epi16(linec, lined);
  886|       |
  887|   568k|        linea = _mm_unpacklo_epi16(int1, int2);
  888|   568k|        int1 = _mm_unpackhi_epi16(int1, int2);
  889|       |
  890|   568k|        linec = _mm_unpacklo_epi16(lineb, lined);
  891|   568k|        high1 = _mm_unpackhi_epi16(lineb, lined);
  892|       |
  893|   568k|        linee = _mm_loadl_epi64((__m128i *)(pu1_src - 3 + 4 * src_strd + j));
  894|   568k|        linef = _mm_loadl_epi64((__m128i *)(pu1_src - 3 + 5 * src_strd + j));
  895|   568k|        lineg = _mm_loadl_epi64((__m128i *)(pu1_src - 3 + 6 * src_strd + j));
  896|   568k|        lineh = _mm_loadl_epi64((__m128i *)(pu1_src - 3 + 7 * src_strd + j));
  897|       |
  898|   568k|        linee = _mm_unpacklo_epi8(linee, zero);
  899|   568k|        linef = _mm_unpacklo_epi8(linef, zero);
  900|   568k|        lineg = _mm_unpacklo_epi8(lineg, zero);
  901|   568k|        lineh = _mm_unpacklo_epi8(lineh, zero);
  902|       |
  903|   568k|        int2 = _mm_unpacklo_epi16(linee, linef);
  904|   568k|        linef = _mm_unpackhi_epi16(linee, linef);
  905|       |
  906|   568k|        int3 = _mm_unpacklo_epi16(lineg, lineh);
  907|   568k|        lineh = _mm_unpackhi_epi16(lineg, lineh);
  908|       |
  909|   568k|        linee = _mm_unpacklo_epi16(int2, int3);
  910|   568k|        int2 = _mm_unpackhi_epi16(int2, int3);
  911|       |
  912|   568k|        lineg = _mm_unpacklo_epi16(linef, lineh);
  913|   568k|        high2 = _mm_unpackhi_epi16(linef, lineh);
  914|       |
  915|   568k|        int4 = _mm_unpacklo_epi16(linea, linee);
  916|   568k|        lineb = _mm_unpackhi_epi16(linea, linee);
  917|       |
  918|   568k|        int3 = _mm_unpacklo_epi16(int1, int2);
  919|   568k|        lined = _mm_unpackhi_epi16(int1, int2);
  920|       |
  921|   568k|        int2 = _mm_unpacklo_epi16(linec, lineg);
  922|   568k|        linef = _mm_unpackhi_epi16(linec, lineg);
  923|       |
  924|   568k|        linea = int4;
  925|   568k|        linec = int3;
  926|   568k|        linee = int2;
  927|       |
  928|   568k|        lineg = _mm_unpacklo_epi16(high1, high2);
  929|   568k|        lineh = _mm_unpackhi_epi16(high1, high2);
  930|       |
  931|       |        //end of transpose
  932|       |
  933|   568k|        u1_Bs = (u4_bs >> 24) & 0xff;
  934|   568k|        u1_Bs1 = (u4_bs >> 16) & 0xff;
  935|   568k|        u4_bs <<= 16;
  936|       |
  937|   568k|        flag1 = _mm_set_epi16(u1_Bs1, u1_Bs, u1_Bs1, u1_Bs, u1_Bs1, u1_Bs,
  938|   568k|                              u1_Bs1, u1_Bs);
  939|   568k|        flag1 = _mm_cmpeq_epi16(flag1, zero); //Set flag to 1s and 0s
  940|   568k|        flag1 = _mm_xor_si128(flag1, _mm_set1_epi16(0xFFFF)); //Invert for required mask
  941|       |
  942|   568k|        i_C0 = _mm_set_epi16(pu1_cliptab[u1_Bs1], pu1_cliptab[u1_Bs],
  943|   568k|                             pu1_cliptab[u1_Bs1], pu1_cliptab[u1_Bs],
  944|   568k|                             pu1_cliptab[u1_Bs1], pu1_cliptab[u1_Bs],
  945|   568k|                             pu1_cliptab[u1_Bs1], pu1_cliptab[u1_Bs]);
  946|       |
  947|   568k|        diff = _mm_subs_epi16(linec, lined); //Condn 1
  948|   568k|        diff = _mm_abs_epi16(diff);
  949|   568k|        const1 = _mm_set1_epi16(alpha);
  950|   568k|        flag = _mm_cmpgt_epi16(const1, diff);
  951|       |
  952|   568k|        diff = _mm_subs_epi16(linee, lined); //Condtn 2
  953|   568k|        diff = _mm_abs_epi16(diff);
  954|   568k|        const1 = _mm_set1_epi16(beta);
  955|   568k|        flag = _mm_and_si128(flag, _mm_cmpgt_epi16(const1, diff));
  956|       |
  957|   568k|        diff = _mm_subs_epi16(lineb, linec); //Condtn 3
  958|   568k|        diff = _mm_abs_epi16(diff);
  959|   568k|        flag = _mm_and_si128(flag, _mm_cmpgt_epi16(const1, diff)); //Const 1= Beta from now on
  960|       |
  961|   568k|        flag = _mm_and_si128(flag, flag1); //Final flag (ui_B condition + other 3 conditions)
  962|       |
  963|       |        //Adding Ap<Beta and Aq<Beta
  964|   568k|        i_Ap = _mm_subs_epi16(linea, linec);
  965|   568k|        i_Ap = _mm_abs_epi16(i_Ap);
  966|   568k|        const2 = _mm_cmpgt_epi16(const1, i_Ap);
  967|   568k|        const2 = _mm_subs_epi16(zero, const2); //Make FFFF=1 and 0000=0
  968|   568k|        i_C = _mm_add_epi16(i_C0, const2);
  969|       |
  970|   568k|        i_Aq = _mm_subs_epi16(linef, lined);
  971|   568k|        i_Aq = _mm_abs_epi16(i_Aq);
  972|   568k|        const2 = _mm_cmpgt_epi16(const1, i_Aq);
  973|   568k|        const2 = _mm_subs_epi16(zero, const2);
  974|   568k|        i_C = _mm_add_epi16(i_C, const2);
  975|       |
  976|       |        //Calculate in_macro
  977|   568k|        diff = _mm_subs_epi16(lined, linec);
  978|   568k|        diff = _mm_slli_epi16(diff, 2);
  979|   568k|        const2 = _mm_subs_epi16(lineb, linee);
  980|   568k|        diff = _mm_add_epi16(diff, const2);
  981|   568k|        const2 = _mm_set1_epi16(4);
  982|   568k|        diff = _mm_add_epi16(diff, const2);
  983|   568k|        in_macro = _mm_srai_epi16(diff, 3);
  984|       |
  985|   568k|        in_macro = _mm_min_epi16(i_C, in_macro); //CLIP3
  986|   568k|        i_C = _mm_subs_epi16(zero, i_C);
  987|   568k|        in_macro = _mm_max_epi16(i_C, in_macro);
  988|       |
  989|       |        //Compute and store
  990|   568k|        in_macrotemp = _mm_add_epi16(linec, in_macro);
  991|   568k|        in_macrotemp = _mm_and_si128(in_macrotemp, flag);
  992|   568k|        temp = _mm_and_si128(linec,
  993|   568k|                             _mm_xor_si128(flag, _mm_set1_epi16(0xFFFF)));
  994|   568k|        temp = _mm_add_epi16(temp, in_macrotemp);
  995|       |        //temp= _mm_packus_epi16 (temp, zero);
  996|       |        //_mm_storel_epi64(uc_HorzPixel+i16_posP0+i, in_macrotemp);
  997|       |
  998|   568k|        in_macrotemp = _mm_subs_epi16(lined, in_macro);
  999|   568k|        in_macrotemp = _mm_and_si128(in_macrotemp, flag);
 1000|   568k|        temp1 = _mm_and_si128(lined,
 1001|   568k|                              _mm_xor_si128(flag, _mm_set1_epi16(0xFFFF)));
 1002|   568k|        temp1 = _mm_add_epi16(temp1, in_macrotemp);
 1003|       |        //temp1= _mm_packus_epi16 (temp1, zero);
 1004|       |        //_mm_storel_epi64(pu1_src+i, in_macrotemp);
 1005|       |
 1006|       |        //If Ap<Beta
 1007|   568k|        flag1 = _mm_cmpgt_epi16(const1, i_Ap);
 1008|   568k|        flag1 = _mm_and_si128(flag, flag1);
 1009|   568k|        in_macrotemp = _mm_add_epi16(linec, lined);
 1010|   568k|        in_macrotemp = _mm_add_epi16(in_macrotemp, _mm_set1_epi16(1));
 1011|   568k|        in_macrotemp = _mm_srai_epi16(in_macrotemp, 1);
 1012|   568k|        in_macro = _mm_add_epi16(in_macrotemp, linea);
 1013|   568k|        in_macro = _mm_subs_epi16(in_macro, _mm_slli_epi16(lineb, 1));
 1014|   568k|        in_macro = _mm_srai_epi16(in_macro, 1);
 1015|       |
 1016|   568k|        in_macro = _mm_min_epi16(i_C0, in_macro); //CLIP3
 1017|   568k|        i_C0 = _mm_subs_epi16(zero, i_C0);
 1018|   568k|        in_macro = _mm_max_epi16(i_C0, in_macro);
 1019|       |
 1020|   568k|        in_macro = _mm_and_si128(in_macro, flag1);
 1021|   568k|        lineb = _mm_add_epi16(lineb, in_macro);
 1022|       |        //in_macro= _mm_packus_epi16 (i_p1, zero);
 1023|       |        //_mm_storel_epi64(uc_HorzPixel+i16_posP1+i, in_macro);
 1024|       |
 1025|   568k|        flag1 = _mm_cmpgt_epi16(const1, i_Aq);
 1026|   568k|        flag1 = _mm_and_si128(flag, flag1);
 1027|   568k|        in_macro = _mm_add_epi16(in_macrotemp, linef);
 1028|   568k|        in_macro = _mm_subs_epi16(in_macro, _mm_slli_epi16(linee, 1));
 1029|   568k|        in_macro = _mm_srai_epi16(in_macro, 1);
 1030|       |
 1031|   568k|        i_C0 = _mm_abs_epi16(i_C0);
 1032|   568k|        in_macro = _mm_min_epi16(i_C0, in_macro); //CLIP3
 1033|   568k|        i_C0 = _mm_subs_epi16(zero, i_C0);
 1034|   568k|        in_macro = _mm_max_epi16(i_C0, in_macro);
 1035|       |
 1036|   568k|        in_macro = _mm_and_si128(in_macro, flag1);
 1037|   568k|        linee = _mm_add_epi16(linee, in_macro);
 1038|       |        //in_macro= _mm_packus_epi16 (i_q1, zero);
 1039|       |        //_mm_storel_epi64(pu1_src+i16_posQ1+i, in_macro);
 1040|   568k|        linec = temp;
 1041|   568k|        lined = temp1;
 1042|       |        //End of filtering
 1043|       |
 1044|   568k|        int1 = _mm_unpacklo_epi16(linea, linee);
 1045|   568k|        linee = _mm_unpackhi_epi16(linea, linee);
 1046|       |
 1047|   568k|        int2 = _mm_unpacklo_epi16(linec, lineg);
 1048|   568k|        lineg = _mm_unpackhi_epi16(linec, lineg);
 1049|       |
 1050|   568k|        linea = _mm_unpacklo_epi16(int1, int2);
 1051|   568k|        int3 = _mm_unpackhi_epi16(int1, int2);
 1052|       |
 1053|   568k|        linec = _mm_unpacklo_epi16(linee, lineg);
 1054|   568k|        lineg = _mm_unpackhi_epi16(linee, lineg);
 1055|       |
 1056|   568k|        int1 = _mm_unpacklo_epi16(lineb, linef);
 1057|   568k|        linef = _mm_unpackhi_epi16(lineb, linef);
 1058|       |
 1059|   568k|        int2 = _mm_unpacklo_epi16(lined, lineh);
 1060|   568k|        lineh = _mm_unpackhi_epi16(lined, lineh);
 1061|       |
 1062|   568k|        lineb = _mm_unpacklo_epi16(int1, int2);
 1063|   568k|        int4 = _mm_unpackhi_epi16(int1, int2);
 1064|       |
 1065|   568k|        lined = _mm_unpacklo_epi16(linef, lineh);
 1066|   568k|        lineh = _mm_unpackhi_epi16(linef, lineh);
 1067|       |
 1068|   568k|        int1 = _mm_unpackhi_epi16(linea, lineb);
 1069|   568k|        linea = _mm_unpacklo_epi16(linea, lineb);
 1070|       |
 1071|   568k|        int2 = _mm_unpacklo_epi16(int3, int4);
 1072|   568k|        high1 = _mm_unpackhi_epi16(int3, int4);
 1073|       |
 1074|   568k|        lineb = _mm_unpacklo_epi16(linec, lined);
 1075|   568k|        linef = _mm_unpackhi_epi16(linec, lined);
 1076|       |
 1077|   568k|        lined = _mm_unpacklo_epi16(lineg, lineh);
 1078|   568k|        lineh = _mm_unpackhi_epi16(lineg, lineh);
 1079|       |
 1080|   568k|        linee = int1;
 1081|   568k|        lineg = high1;
 1082|   568k|        linec = int2;
 1083|       |        //End of inverse transpose
 1084|       |
 1085|       |        //Packs and stores
 1086|   568k|        linea = _mm_packus_epi16(linea, zero);
 1087|   568k|        _mm_storel_epi64((__m128i *)(pu1_src - 3 + j), linea);
 1088|       |
 1089|   568k|        lineb = _mm_packus_epi16(lineb, zero);
 1090|   568k|        _mm_storel_epi64((__m128i *)(pu1_src - 3 + src_strd + j), lineb);
 1091|       |
 1092|   568k|        linec = _mm_packus_epi16(linec, zero);
 1093|   568k|        _mm_storel_epi64((__m128i *)(pu1_src - 3 + 2 * src_strd + j), linec);
 1094|       |
 1095|   568k|        lined = _mm_packus_epi16(lined, zero);
 1096|   568k|        _mm_storel_epi64((__m128i *)(pu1_src - 3 + 3 * src_strd + j), lined);
 1097|       |
 1098|   568k|        linee = _mm_packus_epi16(linee, zero);
 1099|   568k|        _mm_storel_epi64((__m128i *)(pu1_src - 3 + 4 * src_strd + j), linee);
 1100|       |
 1101|   568k|        linef = _mm_packus_epi16(linef, zero);
 1102|   568k|        _mm_storel_epi64((__m128i *)(pu1_src - 3 + 5 * src_strd + j), linef);
 1103|       |
 1104|   568k|        lineg = _mm_packus_epi16(lineg, zero);
 1105|   568k|        _mm_storel_epi64((__m128i *)(pu1_src - 3 + 6 * src_strd + j), lineg);
 1106|       |
 1107|   568k|        lineh = _mm_packus_epi16(lineh, zero);
 1108|   568k|        _mm_storel_epi64((__m128i *)(pu1_src - 3 + 7 * src_strd + j), lineh);
 1109|       |
 1110|   568k|    }
 1111|   284k|}
ih264_deblk_luma_horz_bslt4_ssse3:
 1151|   315k|{
 1152|   315k|    WORD16 i16_posP2, i16_posP1, i16_posP0, i16_posQ1, i16_posQ2;
 1153|   315k|    UWORD8 *pu1_HorzPixel;
 1154|   315k|    __m128i zero = _mm_setzero_si128();
 1155|   315k|    __m128i bs_flag_16x8b, C0_16x8, C0_8x16, C0_hi_8x16, C_8x16, C_hi_8x16;
 1156|   315k|    __m128i q0_16x8, q1_16x8, q2_16x8, p0_16x8, p1_16x8, p2_16x8;
 1157|   315k|    __m128i temp1, temp2;
 1158|   315k|    __m128i Alpha_8x16, Beta_8x16, flag1_16x8, flag2_16x8, flag3_16x8;
 1159|   315k|    __m128i in_macro_16x8, in_macro_hi_16x8;
 1160|   315k|    __m128i const_val4_8x16;
 1161|   315k|    UWORD8 u1_Bs0, u1_Bs1, u1_Bs2, u1_Bs3;
 1162|   315k|    UWORD8 clip0, clip1, clip2, clip3;
 1163|       |
 1164|   315k|    pu1_HorzPixel = pu1_src - (src_strd << 2);
 1165|       |
 1166|   315k|    i16_posQ1 = src_strd;
 1167|   315k|    i16_posQ2 = X2(src_strd);
  ------------------
  |  |   91|   315k|#define X2(a)   ((a) << 1)
  ------------------
 1168|   315k|    i16_posP0 = X3(src_strd);
  ------------------
  |  |   92|   315k|#define X3(a)   (((a) << 1) + (a))
  ------------------
 1169|   315k|    i16_posP1 = X2(src_strd);
  ------------------
  |  |   91|   315k|#define X2(a)   ((a) << 1)
  ------------------
 1170|   315k|    i16_posP2 = src_strd;
 1171|       |
 1172|   315k|    q0_16x8 = _mm_loadu_si128((__m128i *)(pu1_src));
 1173|   315k|    q1_16x8 = _mm_loadu_si128((__m128i *)(pu1_src + i16_posQ1));
 1174|       |
 1175|   315k|    u1_Bs0 = (u4_bs >> 24) & 0xff;
 1176|   315k|    u1_Bs1 = (u4_bs >> 16) & 0xff;
 1177|   315k|    u1_Bs2 = (u4_bs >> 8) & 0xff;
 1178|   315k|    u1_Bs3 = (u4_bs >> 0) & 0xff;
 1179|   315k|    clip0 = pu1_cliptab[u1_Bs0];
 1180|   315k|    clip1 = pu1_cliptab[u1_Bs1];
 1181|   315k|    clip2 = pu1_cliptab[u1_Bs2];
 1182|   315k|    clip3 = pu1_cliptab[u1_Bs3];
 1183|       |
 1184|   315k|    Alpha_8x16 = _mm_set1_epi16(alpha);
 1185|   315k|    Beta_8x16 = _mm_set1_epi16(beta);
 1186|       |
 1187|   315k|    bs_flag_16x8b = _mm_set_epi8(u1_Bs3, u1_Bs3, u1_Bs3, u1_Bs3, u1_Bs2, u1_Bs2,
 1188|   315k|                                 u1_Bs2, u1_Bs2, u1_Bs1, u1_Bs1, u1_Bs1, u1_Bs1,
 1189|   315k|                                 u1_Bs0, u1_Bs0, u1_Bs0, u1_Bs0);
 1190|       |
 1191|   315k|    C0_16x8 = _mm_set_epi8(clip3, clip3, clip3, clip3, clip2, clip2, clip2,
 1192|   315k|                           clip2, clip1, clip1, clip1, clip1, clip0, clip0,
 1193|   315k|                           clip0, clip0);
 1194|       |
 1195|   315k|    bs_flag_16x8b = _mm_cmpeq_epi8(bs_flag_16x8b, zero);
 1196|   315k|    bs_flag_16x8b = _mm_xor_si128(bs_flag_16x8b, _mm_set1_epi8(0xFF)); //Invert for required mask
 1197|   315k|    C0_8x16 = _mm_unpacklo_epi8(C0_16x8, zero);
 1198|   315k|    C0_hi_8x16 = _mm_unpackhi_epi8(C0_16x8, zero);
 1199|       |
 1200|   315k|    p1_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixel + i16_posP1));
 1201|   315k|    p0_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixel + i16_posP0));
 1202|   315k|    p2_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixel + i16_posP2));
 1203|   315k|    q2_16x8 = _mm_loadu_si128((__m128i *)(pu1_src + i16_posQ2));
 1204|       |
 1205|       |    //Cond1 (ABS(p0 - q0) < alpha)
 1206|   315k|    temp1 = _mm_subs_epu8(q0_16x8, p0_16x8);
 1207|   315k|    temp2 = _mm_subs_epu8(p0_16x8, q0_16x8);
 1208|   315k|    temp1 = _mm_add_epi8(temp1, temp2);
 1209|       |
 1210|   315k|    temp2 = _mm_unpacklo_epi8(temp1, zero);
 1211|   315k|    temp1 = _mm_unpackhi_epi8(temp1, zero);
 1212|       |
 1213|   315k|    temp2 = _mm_cmpgt_epi16(Alpha_8x16, temp2);
 1214|   315k|    temp1 = _mm_cmpgt_epi16(Alpha_8x16, temp1);
 1215|       |
 1216|   315k|    flag1_16x8 = _mm_packs_epi16(temp2, temp1);
 1217|   315k|    flag1_16x8 = _mm_and_si128(flag1_16x8, bs_flag_16x8b);
 1218|       |
 1219|       |    //Cond2 (ABS(q1 - q0) < beta)
 1220|   315k|    temp1 = _mm_subs_epu8(q0_16x8, q1_16x8);
 1221|   315k|    temp2 = _mm_subs_epu8(q1_16x8, q0_16x8);
 1222|   315k|    temp1 = _mm_add_epi8(temp1, temp2);
 1223|       |
 1224|   315k|    temp2 = _mm_unpacklo_epi8(temp1, zero);
 1225|   315k|    temp1 = _mm_unpackhi_epi8(temp1, zero);
 1226|       |
 1227|   315k|    temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
 1228|   315k|    temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1);
 1229|       |
 1230|   315k|    flag2_16x8 = _mm_packs_epi16(temp2, temp1);
 1231|       |
 1232|   315k|    flag1_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8);
 1233|       |
 1234|       |    //Cond3 (ABS(p1 - p0) < beta)
 1235|   315k|    temp1 = _mm_subs_epu8(p0_16x8, p1_16x8);
 1236|   315k|    temp2 = _mm_subs_epu8(p1_16x8, p0_16x8);
 1237|   315k|    temp1 = _mm_add_epi8(temp1, temp2);
 1238|       |
 1239|   315k|    temp2 = _mm_unpacklo_epi8(temp1, zero);
 1240|   315k|    temp1 = _mm_unpackhi_epi8(temp1, zero);
 1241|       |
 1242|   315k|    temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
 1243|   315k|    temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1);
 1244|       |
 1245|   315k|    flag2_16x8 = _mm_packs_epi16(temp2, temp1);
 1246|       |
 1247|       |    // !((ABS(p0 - q0) < alpha) || (ABS(q1 - q0) < beta) || (ABS(p1 - p0) < beta))
 1248|   315k|    flag1_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8);
 1249|       |
 1250|       |    // (ABS(p2 - p0) < beta)
 1251|   315k|    temp1 = _mm_subs_epu8(p0_16x8, p2_16x8);
 1252|   315k|    temp2 = _mm_subs_epu8(p2_16x8, p0_16x8);
 1253|   315k|    temp1 = _mm_add_epi8(temp1, temp2);
 1254|       |
 1255|   315k|    temp2 = _mm_unpacklo_epi8(temp1, zero);
 1256|   315k|    temp1 = _mm_unpackhi_epi8(temp1, zero);
 1257|   315k|    temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
 1258|   315k|    temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1);
 1259|       |
 1260|   315k|    flag2_16x8 = _mm_packs_epi16(temp2, temp1);
 1261|   315k|    flag2_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8);
 1262|       |
 1263|   315k|    temp2 = _mm_subs_epi16(zero, temp2);
 1264|   315k|    temp1 = _mm_subs_epi16(zero, temp1);
 1265|       |
 1266|   315k|    C_8x16 = _mm_add_epi16(C0_8x16, temp2);
 1267|   315k|    C_hi_8x16 = _mm_add_epi16(C0_hi_8x16, temp1);
 1268|       |
 1269|       |    // (ABS(q2 - q0) < beta)
 1270|   315k|    temp1 = _mm_subs_epu8(q0_16x8, q2_16x8);
 1271|   315k|    temp2 = _mm_subs_epu8(q2_16x8, q0_16x8);
 1272|   315k|    temp1 = _mm_add_epi8(temp1, temp2);
 1273|       |
 1274|   315k|    temp2 = _mm_unpacklo_epi8(temp1, zero);
 1275|   315k|    temp1 = _mm_unpackhi_epi8(temp1, zero);
 1276|   315k|    temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
 1277|   315k|    temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1);
 1278|       |
 1279|   315k|    flag3_16x8 = _mm_packs_epi16(temp2, temp1);
 1280|   315k|    flag3_16x8 = _mm_and_si128(flag1_16x8, flag3_16x8);
 1281|       |
 1282|   315k|    temp2 = _mm_subs_epi16(zero, temp2);
 1283|   315k|    temp1 = _mm_subs_epi16(zero, temp1);
 1284|       |
 1285|   315k|    C_8x16 = _mm_add_epi16(C_8x16, temp2);
 1286|   315k|    C_hi_8x16 = _mm_add_epi16(C_hi_8x16, temp1);
 1287|       |
 1288|   315k|    const_val4_8x16 = _mm_set1_epi16(4);
 1289|   315k|    temp1 = _mm_subs_epi16(_mm_unpacklo_epi8(q0_16x8, zero),
 1290|   315k|                           _mm_unpacklo_epi8(p0_16x8, zero));
 1291|   315k|    temp2 = _mm_subs_epi16(_mm_unpacklo_epi8(p1_16x8, zero),
 1292|   315k|                           _mm_unpacklo_epi8(q1_16x8, zero));
 1293|   315k|    temp1 = _mm_slli_epi16(temp1, 2);
 1294|   315k|    temp1 = _mm_add_epi16(temp1, temp2);
 1295|   315k|    temp1 = _mm_add_epi16(temp1, const_val4_8x16);
 1296|   315k|    in_macro_16x8 = _mm_srai_epi16(temp1, 3);
 1297|       |
 1298|   315k|    temp1 = _mm_subs_epi16(_mm_unpackhi_epi8(q0_16x8, zero),
 1299|   315k|                           _mm_unpackhi_epi8(p0_16x8, zero));
 1300|   315k|    temp2 = _mm_subs_epi16(_mm_unpackhi_epi8(p1_16x8, zero),
 1301|   315k|                           _mm_unpackhi_epi8(q1_16x8, zero));
 1302|   315k|    temp1 = _mm_slli_epi16(temp1, 2);
 1303|   315k|    temp1 = _mm_add_epi16(temp1, temp2);
 1304|   315k|    temp1 = _mm_add_epi16(temp1, const_val4_8x16);
 1305|   315k|    in_macro_hi_16x8 = _mm_srai_epi16(temp1, 3);
 1306|       |
 1307|   315k|    in_macro_16x8 = _mm_min_epi16(C_8x16, in_macro_16x8); //CLIP3
 1308|   315k|    in_macro_hi_16x8 = _mm_min_epi16(C_hi_8x16, in_macro_hi_16x8); //CLIP3
 1309|   315k|    C_8x16 = _mm_subs_epi16(zero, C_8x16);
 1310|   315k|    C_hi_8x16 = _mm_subs_epi16(zero, C_hi_8x16);
 1311|   315k|    in_macro_16x8 = _mm_max_epi16(C_8x16, in_macro_16x8); //CLIP3
 1312|   315k|    in_macro_hi_16x8 = _mm_max_epi16(C_hi_8x16, in_macro_hi_16x8); //CLIP3
 1313|       |
 1314|   315k|    temp1 = _mm_add_epi16(_mm_unpacklo_epi8(p0_16x8, zero), in_macro_16x8);
 1315|   315k|    temp2 = _mm_add_epi16(_mm_unpackhi_epi8(p0_16x8, zero), in_macro_hi_16x8);
 1316|       |
 1317|   315k|    temp1 = _mm_packus_epi16(temp1, temp2);
 1318|       |
 1319|   315k|    temp1 = _mm_and_si128(temp1, flag1_16x8);
 1320|   315k|    temp2 = _mm_and_si128(p0_16x8,
 1321|   315k|                          _mm_xor_si128(flag1_16x8, _mm_set1_epi16(0xFFFF)));
 1322|       |
 1323|   315k|    temp1 = _mm_add_epi8(temp1, temp2);
 1324|       |
 1325|   315k|    _mm_storeu_si128((__m128i *)(pu1_HorzPixel + i16_posP0), temp1);
 1326|       |
 1327|   315k|    temp1 = _mm_sub_epi16(_mm_unpacklo_epi8(q0_16x8, zero), in_macro_16x8);
 1328|   315k|    temp2 = _mm_sub_epi16(_mm_unpackhi_epi8(q0_16x8, zero), in_macro_hi_16x8);
 1329|       |
 1330|   315k|    temp1 = _mm_packus_epi16(temp1, temp2);
 1331|       |
 1332|   315k|    temp1 = _mm_and_si128(temp1, flag1_16x8);
 1333|   315k|    temp2 = _mm_and_si128(q0_16x8,
 1334|   315k|                          _mm_xor_si128(flag1_16x8, _mm_set1_epi16(0xFFFF)));
 1335|       |
 1336|   315k|    temp1 = _mm_add_epi8(temp1, temp2);
 1337|   315k|    _mm_storeu_si128((__m128i *)(pu1_src), temp1);
 1338|       |
 1339|       |    //if(Ap < Beta)
 1340|   315k|    temp1 = _mm_avg_epu16(_mm_unpacklo_epi8(q0_16x8, zero),
 1341|   315k|                          _mm_unpacklo_epi8(p0_16x8, zero));
 1342|   315k|    temp2 = _mm_slli_epi16(_mm_unpacklo_epi8(p1_16x8, zero), 1);
 1343|       |    //temp2 = _mm_subs_epi16(zero,temp2);
 1344|   315k|    temp2 = _mm_subs_epi16(_mm_unpacklo_epi8(p2_16x8, zero), temp2);
 1345|   315k|    temp2 = _mm_add_epi16(temp1, temp2);
 1346|   315k|    in_macro_16x8 = _mm_srai_epi16(temp2, 1);
 1347|       |
 1348|   315k|    temp1 = _mm_avg_epu16(_mm_unpackhi_epi8(q0_16x8, zero),
 1349|   315k|                          _mm_unpackhi_epi8(p0_16x8, zero));
 1350|   315k|    temp2 = _mm_slli_epi16(_mm_unpackhi_epi8(p1_16x8, zero), 1);
 1351|       |    //temp2 = _mm_subs_epi16(zero,temp2);
 1352|   315k|    temp2 = _mm_subs_epi16(_mm_unpackhi_epi8(p2_16x8, zero), temp2);
 1353|   315k|    temp2 = _mm_add_epi16(temp1, temp2);
 1354|   315k|    in_macro_hi_16x8 = _mm_srai_epi16(temp2, 1);
 1355|       |
 1356|   315k|    in_macro_16x8 = _mm_min_epi16(C0_8x16, in_macro_16x8); //CLIP3
 1357|   315k|    in_macro_hi_16x8 = _mm_min_epi16(C0_hi_8x16, in_macro_hi_16x8); //CLIP3
 1358|   315k|    C0_8x16 = _mm_subs_epi16(zero, C0_8x16);
 1359|   315k|    C0_hi_8x16 = _mm_subs_epi16(zero, C0_hi_8x16);
 1360|   315k|    in_macro_16x8 = _mm_max_epi16(C0_8x16, in_macro_16x8); //CLIP3
 1361|   315k|    in_macro_hi_16x8 = _mm_max_epi16(C0_hi_8x16, in_macro_hi_16x8); //CLIP3
 1362|       |
 1363|   315k|    temp1 = _mm_add_epi16(_mm_unpacklo_epi8(p1_16x8, zero), in_macro_16x8);
 1364|   315k|    temp2 = _mm_add_epi16(_mm_unpackhi_epi8(p1_16x8, zero), in_macro_hi_16x8);
 1365|       |
 1366|   315k|    temp1 = _mm_packus_epi16(temp1, temp2);
 1367|       |
 1368|   315k|    temp1 = _mm_and_si128(temp1, flag2_16x8);
 1369|   315k|    temp2 = _mm_and_si128(p1_16x8,
 1370|   315k|                          _mm_xor_si128(flag2_16x8, _mm_set1_epi16(0xFFFF)));
 1371|   315k|    temp1 = _mm_add_epi8(temp1, temp2);
 1372|   315k|    _mm_storeu_si128((__m128i *)(pu1_HorzPixel + i16_posP1), temp1);
 1373|       |
 1374|       |    //if(Aq < Beta)
 1375|   315k|    temp1 = _mm_avg_epu16(_mm_unpacklo_epi8(q0_16x8, zero),
 1376|   315k|                          _mm_unpacklo_epi8(p0_16x8, zero));
 1377|   315k|    temp2 = _mm_slli_epi16(_mm_unpacklo_epi8(q1_16x8, zero), 1);
 1378|       |    //temp2 = _mm_slli_epi16 (temp2, 1);
 1379|   315k|    temp2 = _mm_subs_epi16(_mm_unpacklo_epi8(q2_16x8, zero), temp2);
 1380|   315k|    temp2 = _mm_add_epi16(temp1, temp2);
 1381|   315k|    in_macro_16x8 = _mm_srai_epi16(temp2, 1);
 1382|       |
 1383|   315k|    temp1 = _mm_avg_epu16(_mm_unpackhi_epi8(q0_16x8, zero),
 1384|   315k|                          _mm_unpackhi_epi8(p0_16x8, zero));
 1385|   315k|    temp2 = _mm_slli_epi16(_mm_unpackhi_epi8(q1_16x8, zero), 1);
 1386|       |    //temp2 = _mm_slli_epi16 (temp2, 1);
 1387|   315k|    temp2 = _mm_subs_epi16(_mm_unpackhi_epi8(q2_16x8, zero), temp2);
 1388|   315k|    temp2 = _mm_add_epi16(temp1, temp2);
 1389|   315k|    in_macro_hi_16x8 = _mm_srai_epi16(temp2, 1);
 1390|       |
 1391|   315k|    in_macro_16x8 = _mm_max_epi16(C0_8x16, in_macro_16x8); //CLIP3
 1392|   315k|    in_macro_hi_16x8 = _mm_max_epi16(C0_hi_8x16, in_macro_hi_16x8); //CLIP3
 1393|   315k|    C0_8x16 = _mm_subs_epi16(zero, C0_8x16);
 1394|   315k|    C0_hi_8x16 = _mm_subs_epi16(zero, C0_hi_8x16);
 1395|   315k|    in_macro_16x8 = _mm_min_epi16(C0_8x16, in_macro_16x8); //CLIP3
 1396|   315k|    in_macro_hi_16x8 = _mm_min_epi16(C0_hi_8x16, in_macro_hi_16x8); //CLIP3
 1397|       |
 1398|   315k|    temp1 = _mm_add_epi16(_mm_unpacklo_epi8(q1_16x8, zero), in_macro_16x8);
 1399|   315k|    temp2 = _mm_add_epi16(_mm_unpackhi_epi8(q1_16x8, zero), in_macro_hi_16x8);
 1400|       |
 1401|   315k|    temp1 = _mm_packus_epi16(temp1, temp2);
 1402|       |
 1403|   315k|    temp1 = _mm_and_si128(temp1, flag3_16x8);
 1404|   315k|    temp2 = _mm_and_si128(q1_16x8,
 1405|   315k|                          _mm_xor_si128(flag3_16x8, _mm_set1_epi16(0xFFFF)));
 1406|   315k|    temp1 = _mm_add_epi8(temp1, temp2);
 1407|       |
 1408|   315k|    _mm_storeu_si128((__m128i *)(pu1_src + i16_posQ1), temp1);
 1409|       |
 1410|   315k|}

ih264_ihadamard_scaling_4x4_sse42:
   95|  12.0k|{
   96|  12.0k|    __m128i src_r0_r1, src_r2_r3;
   97|  12.0k|    __m128i src_r0, src_r1, src_r2, src_r3;
   98|  12.0k|    __m128i temp0, temp1, temp2, temp3;
   99|  12.0k|    __m128i add_rshift = _mm_set1_epi32((u4_qp_div_6 < 6) ? (1 << (5 - u4_qp_div_6)) : 0);
  ------------------
  |  Branch (99:41): [True: 7.81k, False: 4.21k]
  ------------------
  100|  12.0k|    __m128i mult_val = _mm_set1_epi32(pu2_iscal_mat[0] * pu2_weigh_mat[0]);
  101|  12.0k|    UNUSED (pi4_tmp);
  ------------------
  |  |   45|  12.0k|#define UNUSED(x) ((void)(x))
  ------------------
  102|       |
  103|  12.0k|    src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src)); //a00 a01 a02 a03 a10 a11 a12 a13 -- the source matrix 0th,1st row
  104|  12.0k|    src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); //a20 a21 a22 a23 a30 a31 a32 a33 -- the source matrix 2nd,3rd row
  105|       |    //sign_reg = _mm_cmpgt_epi16(zero_8x16b, src_r0_r1);
  106|  12.0k|    src_r0 = _mm_cvtepi16_epi32(src_r0_r1);
  107|  12.0k|    src_r0_r1 = _mm_srli_si128(src_r0_r1, 8);
  108|  12.0k|    src_r1 = _mm_cvtepi16_epi32(src_r0_r1);
  109|       |
  110|  12.0k|    src_r2 = _mm_cvtepi16_epi32(src_r2_r3);
  111|  12.0k|    src_r2_r3 = _mm_srli_si128(src_r2_r3, 8);
  112|  12.0k|    src_r3 = _mm_cvtepi16_epi32(src_r2_r3);
  113|       |
  114|       |    /* Perform Inverse transform */
  115|       |    /*-------------------------------------------------------------*/
  116|       |    /* IDCT [ Horizontal transformation ]                          */
  117|       |    /*-------------------------------------------------------------*/
  118|       |    // Matrix transpose
  119|       |    /*
  120|       |     *  a0 a1 a2 a3
  121|       |     *  b0 b1 b2 b3
  122|       |     *  c0 c1 c2 c3
  123|       |     *  d0 d1 d2 d3
  124|       |     */
  125|  12.0k|    temp0 = _mm_unpacklo_epi32(src_r0, src_r1);                  //a0 b0 a1 b1
  126|  12.0k|    temp2 = _mm_unpacklo_epi32(src_r2, src_r3);                  //c0 d0 c1 d1
  127|  12.0k|    temp1 = _mm_unpackhi_epi32(src_r0, src_r1);                  //a2 b2 a3 b3
  128|  12.0k|    temp3 = _mm_unpackhi_epi32(src_r2, src_r3);                  //c2 d2 c3 d3
  129|  12.0k|    src_r0 = _mm_unpacklo_epi64(temp0, temp2);                    //a0 b0 c0 d0
  130|  12.0k|    src_r1 = _mm_unpackhi_epi64(temp0, temp2);                    //a1 b1 c1 d1
  131|  12.0k|    src_r2 = _mm_unpacklo_epi64(temp1, temp3);                    //a2 b2 c2 d2
  132|  12.0k|    src_r3 = _mm_unpackhi_epi64(temp1, temp3);                    //a3 b3 c3 d3
  133|       |
  134|  12.0k|    temp0 = _mm_add_epi32(src_r0, src_r3);
  135|  12.0k|    temp1 = _mm_add_epi32(src_r1, src_r2);
  136|  12.0k|    temp2 = _mm_sub_epi32(src_r1, src_r2);
  137|  12.0k|    temp3 = _mm_sub_epi32(src_r0, src_r3);
  138|       |
  139|  12.0k|    src_r0 = _mm_add_epi32(temp0, temp1);
  140|  12.0k|    src_r1 = _mm_add_epi32(temp2, temp3);
  141|  12.0k|    src_r2 = _mm_sub_epi32(temp0, temp1);
  142|  12.0k|    src_r3 = _mm_sub_epi32(temp3, temp2);
  143|       |
  144|       |    /*-------------------------------------------------------------*/
  145|       |    /* IDCT [ Vertical transformation ]                          */
  146|       |    /*-------------------------------------------------------------*/
  147|       |    // Matrix transpose
  148|       |    /*
  149|       |     *  a0 b0 c0 d0
  150|       |     *  a1 b1 c1 d1
  151|       |     *  a2 b2 c2 d2
  152|       |     *  a3 b3 c3 d3
  153|       |     */
  154|  12.0k|    temp0 = _mm_unpacklo_epi32(src_r0, src_r1);                  //a0 a1 b0 b1
  155|  12.0k|    temp2 = _mm_unpacklo_epi32(src_r2, src_r3);                  //a2 a3 b2 b3
  156|  12.0k|    temp1 = _mm_unpackhi_epi32(src_r0, src_r1);                  //c0 c1 d0 d1
  157|  12.0k|    temp3 = _mm_unpackhi_epi32(src_r2, src_r3);                  //c2 c3 d2 d3
  158|  12.0k|    src_r0 = _mm_unpacklo_epi64(temp0, temp2);                   //a0 a1 a2 a3
  159|  12.0k|    src_r1 = _mm_unpackhi_epi64(temp0, temp2);                   //b0 b1 b2 b3
  160|  12.0k|    src_r2 = _mm_unpacklo_epi64(temp1, temp3);                   //c0 c1 c2 c3
  161|  12.0k|    src_r3 = _mm_unpackhi_epi64(temp1, temp3);                   //d0 d1 d2 d3
  162|       |
  163|  12.0k|    temp0 = _mm_add_epi32(src_r0, src_r3);
  164|  12.0k|    temp1 = _mm_add_epi32(src_r1, src_r2);
  165|  12.0k|    temp2 = _mm_sub_epi32(src_r1, src_r2);
  166|  12.0k|    temp3 = _mm_sub_epi32(src_r0, src_r3);
  167|       |
  168|  12.0k|    src_r0 = _mm_add_epi32(temp0, temp1);
  169|  12.0k|    src_r1 = _mm_add_epi32(temp2, temp3);
  170|  12.0k|    src_r2 = _mm_sub_epi32(temp0, temp1);
  171|  12.0k|    src_r3 = _mm_sub_epi32(temp3, temp2);
  172|       |
  173|  12.0k|    src_r0 = _mm_mullo_epi32(src_r0, mult_val);
  174|  12.0k|    src_r1 = _mm_mullo_epi32(src_r1, mult_val);
  175|  12.0k|    src_r2 = _mm_mullo_epi32(src_r2, mult_val);
  176|  12.0k|    src_r3 = _mm_mullo_epi32(src_r3, mult_val);
  177|       |
  178|       |    //Scaling
  179|  12.0k|    if(u4_qp_div_6 >= 6)
  ------------------
  |  Branch (179:8): [True: 4.21k, False: 7.81k]
  ------------------
  180|  4.21k|    {
  181|  4.21k|        src_r0 = _mm_slli_epi32(src_r0, u4_qp_div_6 - 6);
  182|  4.21k|        src_r1 = _mm_slli_epi32(src_r1, u4_qp_div_6 - 6);
  183|  4.21k|        src_r2 = _mm_slli_epi32(src_r2, u4_qp_div_6 - 6);
  184|  4.21k|        src_r3 = _mm_slli_epi32(src_r3, u4_qp_div_6 - 6);
  185|  4.21k|    }
  186|  7.81k|    else
  187|  7.81k|    {
  188|  7.81k|        temp0 = _mm_add_epi32(src_r0, add_rshift);
  189|  7.81k|        temp1 = _mm_add_epi32(src_r1, add_rshift);
  190|  7.81k|        temp2 = _mm_add_epi32(src_r2, add_rshift);
  191|  7.81k|        temp3 = _mm_add_epi32(src_r3, add_rshift);
  192|  7.81k|        src_r0 = _mm_srai_epi32(temp0, 6 - u4_qp_div_6);
  193|  7.81k|        src_r1 = _mm_srai_epi32(temp1, 6 - u4_qp_div_6);
  194|  7.81k|        src_r2 = _mm_srai_epi32(temp2, 6 - u4_qp_div_6);
  195|  7.81k|        src_r3 = _mm_srai_epi32(temp3, 6 - u4_qp_div_6);
  196|  7.81k|    }
  197|  12.0k|    src_r0_r1 = _mm_packs_epi32(src_r0, src_r1);
  198|  12.0k|    src_r2_r3 = _mm_packs_epi32(src_r2, src_r3);
  199|       |
  200|  12.0k|    _mm_storeu_si128((__m128i *) (&pi2_out[0]), src_r0_r1);
  201|  12.0k|    _mm_storeu_si128((__m128i *) (&pi2_out[8]), src_r2_r3);
  202|  12.0k|}

ih264_inter_pred_luma_copy_ssse3:
   98|  9.83M|{
   99|  9.83M|    __m128i y_0_16x8b, y_1_16x8b, y_2_16x8b, y_3_16x8b;
  100|       |
  101|  9.83M|    WORD32 src_strd2, src_strd3, src_strd4, dst_strd2, dst_strd3, dst_strd4;
  102|  9.83M|    UNUSED(pu1_tmp);
  ------------------
  |  |   45|  9.83M|#define UNUSED(x) ((void)(x))
  ------------------
  103|  9.83M|    UNUSED(dydx);
  ------------------
  |  |   45|  9.83M|#define UNUSED(x) ((void)(x))
  ------------------
  104|       |
  105|  9.83M|    src_strd2 = src_strd << 1;
  106|  9.83M|    dst_strd2 = dst_strd << 1;
  107|  9.83M|    src_strd4 = src_strd << 2;
  108|  9.83M|    dst_strd4 = dst_strd << 2;
  109|  9.83M|    src_strd3 = src_strd2 + src_strd;
  110|  9.83M|    dst_strd3 = dst_strd2 + dst_strd;
  111|       |
  112|  9.83M|    if(wd == 4)
  ------------------
  |  Branch (112:8): [True: 22.4k, False: 9.80M]
  ------------------
  113|  22.4k|    {
  114|  22.4k|        do
  115|  38.9k|        {
  116|  38.9k|            *((WORD32 *)(pu1_dst)) =  *((WORD32 *)(pu1_src));
  117|  38.9k|            *((WORD32 *)(pu1_dst + dst_strd)) = *((WORD32 *)(pu1_src + src_strd));
  118|  38.9k|            *((WORD32 *)(pu1_dst + dst_strd2)) = *((WORD32 *)(pu1_src + src_strd2));
  119|  38.9k|            *((WORD32 *)(pu1_dst + dst_strd3)) = *((WORD32 *)(pu1_src + src_strd3));
  120|       |
  121|  38.9k|            ht -= 4;
  122|  38.9k|            pu1_src += src_strd4;
  123|  38.9k|            pu1_dst += dst_strd4;
  124|  38.9k|        }
  125|  38.9k|        while(ht > 0);
  ------------------
  |  Branch (125:15): [True: 16.4k, False: 22.4k]
  ------------------
  126|  22.4k|    }
  127|  9.80M|    else if(wd == 8)
  ------------------
  |  Branch (127:13): [True: 127k, False: 9.68M]
  ------------------
  128|   127k|    {
  129|   127k|        do
  130|   301k|        {
  131|   301k|            y_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
  132|   301k|            y_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd));
  133|   301k|            y_2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd2));
  134|   301k|            y_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd3));
  135|       |
  136|   301k|            _mm_storel_epi64((__m128i *)pu1_dst, y_0_16x8b);
  137|   301k|            _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), y_1_16x8b);
  138|   301k|            _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd2), y_2_16x8b);
  139|   301k|            _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd3), y_3_16x8b);
  140|       |
  141|   301k|            ht -= 4;
  142|   301k|            pu1_src += src_strd4;
  143|   301k|            pu1_dst += dst_strd4;
  144|   301k|        }
  145|   301k|        while(ht > 0);
  ------------------
  |  Branch (145:15): [True: 174k, False: 127k]
  ------------------
  146|   127k|    }
  147|  9.68M|    else // wd == 16
  148|  9.68M|    {
  149|  9.68M|        WORD32 src_strd5, src_strd6, src_strd7, src_strd8;
  150|  9.68M|        WORD32 dst_strd5, dst_strd6, dst_strd7, dst_strd8;
  151|       |
  152|  9.68M|        __m128i y_4_16x8b, y_5_16x8b, y_6_16x8b, y_7_16x8b;
  153|       |
  154|  9.68M|        src_strd5 = src_strd2 + src_strd3;
  155|  9.68M|        dst_strd5 = dst_strd2 + dst_strd3;
  156|  9.68M|        src_strd6 = src_strd3 << 1;
  157|  9.68M|        dst_strd6 = dst_strd3 << 1;
  158|  9.68M|        src_strd7 = src_strd3 + src_strd4;
  159|  9.68M|        dst_strd7 = dst_strd3 + dst_strd4;
  160|  9.68M|        src_strd8 = src_strd << 3;
  161|  9.68M|        dst_strd8 = dst_strd << 3;
  162|       |
  163|  9.68M|        do
  164|  19.3M|        {
  165|  19.3M|            y_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
  166|  19.3M|            y_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd));
  167|  19.3M|            y_2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd2));
  168|  19.3M|            y_3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd3));
  169|  19.3M|            y_4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd4));
  170|  19.3M|            y_5_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd5));
  171|  19.3M|            y_6_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd6));
  172|  19.3M|            y_7_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd7));
  173|       |
  174|  19.3M|            _mm_storeu_si128((__m128i *)pu1_dst, y_0_16x8b);
  175|  19.3M|            _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), y_1_16x8b);
  176|  19.3M|            _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), y_2_16x8b);
  177|  19.3M|            _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), y_3_16x8b);
  178|  19.3M|            _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd4), y_4_16x8b);
  179|  19.3M|            _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd5), y_5_16x8b);
  180|  19.3M|            _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd6), y_6_16x8b);
  181|  19.3M|            _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd7), y_7_16x8b);
  182|       |
  183|  19.3M|            ht -= 8;
  184|  19.3M|            pu1_src += src_strd8;
  185|  19.3M|            pu1_dst += dst_strd8;
  186|  19.3M|        }
  187|  19.3M|        while(ht > 0);
  ------------------
  |  Branch (187:15): [True: 9.65M, False: 9.68M]
  ------------------
  188|  9.68M|    }
  189|  9.83M|}
ih264_inter_pred_luma_horz_ssse3:
  224|  42.2k|{
  225|  42.2k|    __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
  226|  42.2k|    __m128i const_val16_8x16b;
  227|       |
  228|  42.2k|    UNUSED(pu1_tmp);
  ------------------
  |  |   45|  42.2k|#define UNUSED(x) ((void)(x))
  ------------------
  229|  42.2k|    UNUSED(dydx);
  ------------------
  |  |   45|  42.2k|#define UNUSED(x) ((void)(x))
  ------------------
  230|       |
  231|  42.2k|    pu1_src -= 2; // the filter input starts from x[-2] (till x[3])
  232|       |
  233|  42.2k|    coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
  234|  42.2k|    coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
  235|  42.2k|    coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5
  236|       |                                                 //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20
  237|  42.2k|    const_val16_8x16b = _mm_set1_epi16(16);
  238|       |
  239|  42.2k|    if(wd == 4)
  ------------------
  |  Branch (239:8): [True: 4.33k, False: 37.9k]
  ------------------
  240|  4.33k|    {
  241|  4.33k|        __m128i src_r0_16x8b, src_r1_16x8b, src_r0r1_16x8b;
  242|  4.33k|        __m128i src_r0_sht_16x8b, src_r1_sht_16x8b;
  243|       |
  244|  4.33k|        __m128i res_r0r1_t1_8x16b, res_r0r1_t2_8x16b, res_r0r1_t3_8x16b;
  245|  4.33k|        __m128i res_r0r1_16x8b;
  246|       |
  247|       |        //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
  248|       |        //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
  249|       |
  250|  4.33k|        do
  251|  12.8k|        {
  252|  12.8k|            src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);                     //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
  253|  12.8k|            src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd));        //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
  254|       |
  255|  12.8k|            src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1);                     //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
  256|  12.8k|            src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1);                     //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
  257|       |
  258|  12.8k|            src_r0_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);       //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
  259|  12.8k|            src_r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);       //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
  260|       |
  261|  12.8k|            src_r0r1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b);        //a0 a1 a1 a2 a2 a3 a3 a4 b0 b1 b1 b2 b2 b3 b3 b4
  262|  12.8k|            res_r0r1_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);  //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
  263|       |                                                                                    //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
  264|       |
  265|  12.8k|            src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4);                         //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8  0  0  0  0
  266|  12.8k|            src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4);                         //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8  0  0  0  0
  267|       |
  268|  12.8k|            src_r0r1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b);        //a2 a3 a3 a4 a4 a5 a5 a6 b2 b3 b3 b4 b4 b5 b5 b6
  269|  12.8k|            res_r0r1_t2_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff2_3_16x8b);  //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
  270|       |                                                                                    //b2*c2+b3*c3 b3*c2+b4*c3 b4*c2+b5*c3 b5*c2+b6*c3
  271|       |
  272|  12.8k|            src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4);                         //a4 a5 a5 a6 a6 a7 a7 a8  0  0  0  0  0  0  0  0
  273|  12.8k|            src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4);                         //b4 b5 b5 b6 b6 b7 b7 b8  0  0  0  0  0  0  0  0
  274|       |
  275|  12.8k|            src_r0r1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b);        //a4 a5 a5 a6 a6 a7 a7 a8 b4 b5 b5 b6 b6 b7 b7 b8
  276|  12.8k|            res_r0r1_t3_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff4_5_16x8b);  //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
  277|       |                                                                                    //b4*c4+b5*c5 b5*c4+b6*c5 b4*c6+b7*c5 b7*c4+b8*c5
  278|       |
  279|  12.8k|            res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t2_8x16b);
  280|  12.8k|            res_r0r1_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r0r1_t3_8x16b);
  281|  12.8k|            res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t3_8x16b); //a0*c0+a1*c1+a2*c2+a3*c3+a4*a4+a5*c5 + 16;
  282|       |                                                                                     //a1*c0+a2*c1+a2*c2+a3*c3+a5*a4+a6*c5 + 16;
  283|       |                                                                                     //a2*c0+a3*c1+a4*c2+a5*c3+a6*a4+a7*c5 + 16;
  284|       |                                                                                     //a3*c0+a4*c1+a5*c2+a6*c3+a6*a4+a8*c5 + 16;
  285|       |                                                                                     //b0*c0+b1*c1+b2*c2+b3*c3+b4*b4+b5*c5 + 16;
  286|       |                                                                                     //b1*c0+b2*c1+b2*c2+b3*c3+b5*b4+b6*c5 + 16;
  287|       |                                                                                     //b2*c0+b3*c1+b4*c2+b5*c3+b6*b4+b7*c5 + 16;
  288|       |                                                                                     //b3*c0+b4*c1+b5*c2+b6*c3+b6*b4+b8*c5 + 16;
  289|       |
  290|  12.8k|            res_r0r1_t1_8x16b = _mm_srai_epi16(res_r0r1_t1_8x16b, 5);                //shifting right by 5 bits.
  291|       |
  292|  12.8k|            res_r0r1_16x8b = _mm_packus_epi16(res_r0r1_t1_8x16b, res_r0r1_t1_8x16b);
  293|       |
  294|  12.8k|            *((WORD32 *)(pu1_dst)) = _mm_cvtsi128_si32(res_r0r1_16x8b);
  295|  12.8k|            res_r0r1_16x8b = _mm_srli_si128(res_r0r1_16x8b, 4);
  296|  12.8k|            *((WORD32 *)(pu1_dst + dst_strd)) = _mm_cvtsi128_si32(res_r0r1_16x8b);
  297|       |
  298|  12.8k|            ht -= 2;
  299|  12.8k|            pu1_src += src_strd << 1;
  300|  12.8k|            pu1_dst += dst_strd << 1;
  301|  12.8k|        }
  302|  12.8k|        while(ht > 0);
  ------------------
  |  Branch (302:15): [True: 8.53k, False: 4.33k]
  ------------------
  303|  4.33k|    }
  304|  37.9k|    else if(wd == 8)
  ------------------
  |  Branch (304:13): [True: 19.7k, False: 18.1k]
  ------------------
  305|  19.7k|    {
  306|  19.7k|        __m128i src_r0_16x8b, src_r1_16x8b, src_r0_sht_16x8b, src_r1_sht_16x8b;
  307|  19.7k|        __m128i src_r0_t1_16x8b, src_r1_t1_16x8b;
  308|       |
  309|  19.7k|        __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b;
  310|  19.7k|        __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b;
  311|       |
  312|       |        //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
  313|       |        //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
  314|       |
  315|  19.7k|        do
  316|  88.7k|        {
  317|  88.7k|            src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);                   //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
  318|  88.7k|            src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd));      //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
  319|       |
  320|  88.7k|            src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1);                   //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
  321|  88.7k|            src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1);                   //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
  322|       |
  323|  88.7k|            src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);  //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
  324|  88.7k|            src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);  //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
  325|       |
  326|  88.7k|            res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
  327|       |                                                                                  //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1
  328|  88.7k|            res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b); //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
  329|       |                                                                                  //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1
  330|       |
  331|  88.7k|            src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2);                       //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0
  332|  88.7k|            src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2);                       //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0
  333|       |
  334|  88.7k|            src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2);               //a3 a4 a5 a6 a7 a8 a9....a15 0  0  0
  335|  88.7k|            src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2);               //b3 b4 b5 b6 b7 b8 b9....b15 0  0  0
  336|       |
  337|  88.7k|            src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);  //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10
  338|  88.7k|            src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);  //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10
  339|       |
  340|  88.7k|            res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
  341|       |                                                                                  //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3
  342|  88.7k|            res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b); //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3
  343|       |                                                                                  //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3
  344|       |
  345|  88.7k|            src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2);                       //a4 a5 a6 a7 a8 a9....a15 0  0  0  0
  346|  88.7k|            src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2);                       //b4 b5 b6 b7 b8 b9....b15 0  0  0  0
  347|       |
  348|  88.7k|            src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2);               //a5 a6 a7 a8 a9....a15 0  0  0  0  0
  349|  88.7k|            src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2);               //b5 b6 b7 b8 b9....b15 0  0  0  0  0
  350|       |
  351|  88.7k|            src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);  //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12
  352|  88.7k|            src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);  //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12
  353|       |
  354|  88.7k|            res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
  355|       |                                                                                  //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5
  356|  88.7k|            res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b); //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5
  357|       |                                                                                  //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5
  358|  88.7k|            res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b);
  359|  88.7k|            res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b);
  360|  88.7k|            res_r0_t3_8x16b = _mm_add_epi16(res_r0_t3_8x16b, const_val16_8x16b);
  361|  88.7k|            res_r1_t3_8x16b = _mm_add_epi16(res_r1_t3_8x16b, const_val16_8x16b);
  362|  88.7k|            res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b);
  363|  88.7k|            res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b);
  364|       |
  365|  88.7k|            res_r0_t1_8x16b = _mm_srai_epi16(res_r0_t1_8x16b, 5);                 //shifting right by 5 bits.
  366|  88.7k|            res_r1_t1_8x16b = _mm_srai_epi16(res_r1_t1_8x16b, 5);
  367|       |
  368|  88.7k|            src_r0_16x8b = _mm_packus_epi16(res_r0_t1_8x16b, res_r0_t1_8x16b);
  369|  88.7k|            src_r1_16x8b = _mm_packus_epi16(res_r1_t1_8x16b, res_r1_t1_8x16b);
  370|       |
  371|  88.7k|            _mm_storel_epi64((__m128i *)pu1_dst, src_r0_16x8b);
  372|  88.7k|            _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), src_r1_16x8b);
  373|       |
  374|  88.7k|            ht -= 2;
  375|  88.7k|            pu1_src += src_strd << 1;
  376|  88.7k|            pu1_dst += dst_strd << 1;
  377|  88.7k|        }
  378|  88.7k|        while(ht > 0);
  ------------------
  |  Branch (378:15): [True: 69.0k, False: 19.7k]
  ------------------
  379|  19.7k|    }
  380|  18.1k|    else // wd == 16
  381|  18.1k|    {
  382|  18.1k|        __m128i src_r0_16x8b, src_r1_16x8b, src_r0_sht_16x8b, src_r1_sht_16x8b;
  383|  18.1k|        __m128i src_r0_t1_16x8b, src_r1_t1_16x8b;
  384|       |
  385|  18.1k|        __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b;
  386|  18.1k|        __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b;
  387|       |
  388|       |        //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
  389|       |        //Row0 :                         b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
  390|       |        //b0 is same a8. Similarly other bn pixels are same as a(n+8) pixels.
  391|       |
  392|  18.1k|        do
  393|   240k|        {
  394|   240k|            src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);                  //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
  395|   240k|            src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8));            //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
  396|       |
  397|   240k|            src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1);                   //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
  398|   240k|            src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1);                   //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
  399|       |
  400|   240k|            src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);  //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
  401|   240k|            src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);  //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
  402|       |
  403|   240k|            res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
  404|       |                                                                                  //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1
  405|   240k|            res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b); //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
  406|       |                                                                                  //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1
  407|       |
  408|   240k|            src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2);                       //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0
  409|   240k|            src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2);                       //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0
  410|       |
  411|   240k|            src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2);               //a3 a4 a5 a6 a7 a8 a9....a15 0  0  0
  412|   240k|            src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2);               //b3 b4 b5 b6 b7 b8 b9....b15 0  0  0
  413|       |
  414|   240k|            src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);  //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10
  415|   240k|            src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);  //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10
  416|       |
  417|   240k|            res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
  418|       |                                                                                  //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3
  419|   240k|            res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b); //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3
  420|       |                                                                                  //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3
  421|       |
  422|   240k|            src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2);                       //a4 a5 a6 a7 a8 a9....a15 0  0  0  0
  423|   240k|            src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2);                       //b4 b5 b6 b7 b8 b9....b15 0  0  0  0
  424|       |
  425|   240k|            src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2);               //a5 a6 a7 a8 a9....a15 0  0  0  0  0
  426|   240k|            src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2);               //b5 b6 b7 b8 b9....b15 0  0  0  0  0
  427|       |
  428|   240k|            src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);  //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12
  429|   240k|            src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);  //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12
  430|       |
  431|   240k|            res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
  432|       |                                                                                  //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5
  433|   240k|            res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b); //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5
  434|       |                                                                                  //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5
  435|   240k|            res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b);
  436|   240k|            res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b);
  437|   240k|            res_r0_t3_8x16b = _mm_add_epi16(res_r0_t3_8x16b, const_val16_8x16b);
  438|   240k|            res_r1_t3_8x16b = _mm_add_epi16(res_r1_t3_8x16b, const_val16_8x16b);
  439|   240k|            res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b);
  440|   240k|            res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b);
  441|       |
  442|   240k|            res_r0_t1_8x16b = _mm_srai_epi16(res_r0_t1_8x16b, 5);                 //shifting right by 5 bits.
  443|   240k|            res_r1_t1_8x16b = _mm_srai_epi16(res_r1_t1_8x16b, 5);
  444|       |
  445|   240k|            src_r0_16x8b = _mm_packus_epi16(res_r0_t1_8x16b, res_r1_t1_8x16b);
  446|   240k|            _mm_storeu_si128((__m128i *)pu1_dst, src_r0_16x8b);
  447|       |
  448|   240k|            ht--;
  449|   240k|            pu1_src += src_strd;
  450|   240k|            pu1_dst += dst_strd;
  451|   240k|        }
  452|   240k|        while(ht > 0);
  ------------------
  |  Branch (452:15): [True: 222k, False: 18.1k]
  ------------------
  453|  18.1k|    }
  454|  42.2k|}
ih264_inter_pred_luma_vert_ssse3:
  489|  61.7k|{
  490|  61.7k|    __m128i src_r0_16x8b, src_r1_16x8b, src_r2_16x8b, src_r3_16x8b, src_r4_16x8b;
  491|  61.7k|    __m128i src_r5_16x8b, src_r6_16x8b;
  492|  61.7k|    __m128i src_r0r1_16x8b, src_r2r3_16x8b, src_r4r5_16x8b;
  493|       |
  494|  61.7k|    __m128i res_16x8b, res_t1_8x16b, res_t2_8x16b, res_t3_8x16b;
  495|       |
  496|  61.7k|    __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
  497|  61.7k|    __m128i const_val16_8x16b;
  498|       |
  499|  61.7k|    UNUSED(pu1_tmp);
  ------------------
  |  |   45|  61.7k|#define UNUSED(x) ((void)(x))
  ------------------
  500|  61.7k|    UNUSED(dydx);
  ------------------
  |  |   45|  61.7k|#define UNUSED(x) ((void)(x))
  ------------------
  501|       |
  502|  61.7k|    coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
  503|  61.7k|    coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
  504|  61.7k|    coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5
  505|       |                                                 //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20
  506|  61.7k|    const_val16_8x16b = _mm_set1_epi16(16);
  507|       |
  508|  61.7k|    pu1_src -= src_strd << 1; // the filter input starts from x[-2] (till x[3])
  509|       |
  510|  61.7k|    if(wd == 4)
  ------------------
  |  Branch (510:8): [True: 2.94k, False: 58.8k]
  ------------------
  511|  2.94k|    {
  512|       |        //Epilogue: Load all the pred rows except sixth and seventh row
  513|       |        //          for the first and second row processing.
  514|  2.94k|        src_r0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
  515|  2.94k|        pu1_src += src_strd;
  516|  2.94k|        src_r1_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
  517|  2.94k|        pu1_src += src_strd;
  518|  2.94k|        src_r2_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
  519|  2.94k|        pu1_src += src_strd;
  520|  2.94k|        src_r3_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
  521|  2.94k|        pu1_src += src_strd;
  522|  2.94k|        src_r4_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
  523|  2.94k|        pu1_src += src_strd;
  524|       |
  525|  2.94k|        src_r0_16x8b = _mm_unpacklo_epi32(src_r0_16x8b, src_r1_16x8b);
  526|  2.94k|        src_r1_16x8b = _mm_unpacklo_epi32(src_r1_16x8b, src_r2_16x8b);
  527|  2.94k|        src_r2_16x8b = _mm_unpacklo_epi32(src_r2_16x8b, src_r3_16x8b);
  528|  2.94k|        src_r3_16x8b = _mm_unpacklo_epi32(src_r3_16x8b, src_r4_16x8b);
  529|       |
  530|  2.94k|        do
  531|  9.85k|        {
  532|  9.85k|            src_r5_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
  533|  9.85k|            src_r6_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd));
  534|       |
  535|  9.85k|            src_r4_16x8b = _mm_unpacklo_epi32(src_r4_16x8b, src_r5_16x8b);
  536|  9.85k|            src_r5_16x8b = _mm_unpacklo_epi32(src_r5_16x8b, src_r6_16x8b);
  537|       |
  538|  9.85k|            src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b);
  539|  9.85k|            src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b);
  540|  9.85k|            src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b);
  541|       |
  542|  9.85k|            res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
  543|  9.85k|            res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
  544|  9.85k|            res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
  545|       |
  546|  9.85k|            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
  547|  9.85k|            res_t3_8x16b = _mm_add_epi16(res_t3_8x16b, const_val16_8x16b);
  548|  9.85k|            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
  549|       |
  550|  9.85k|            res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
  551|  9.85k|            res_16x8b = _mm_packus_epi16(res_t1_8x16b, res_t1_8x16b);
  552|       |
  553|  9.85k|            *((WORD32 *)(pu1_dst)) = _mm_cvtsi128_si32(res_16x8b);
  554|  9.85k|            res_16x8b = _mm_srli_si128(res_16x8b, 4);
  555|  9.85k|            *((WORD32 *)(pu1_dst + dst_strd)) = _mm_cvtsi128_si32(res_16x8b);
  556|       |
  557|  9.85k|            src_r0_16x8b = src_r2_16x8b;
  558|  9.85k|            src_r1_16x8b = src_r3_16x8b;
  559|  9.85k|            src_r2_16x8b = src_r4_16x8b;
  560|  9.85k|            src_r3_16x8b = src_r5_16x8b;
  561|  9.85k|            src_r4_16x8b = src_r6_16x8b;
  562|       |
  563|  9.85k|            ht -= 2;
  564|  9.85k|            pu1_src += src_strd << 1;
  565|  9.85k|            pu1_dst += dst_strd << 1;
  566|  9.85k|        }
  567|  9.85k|        while(ht > 0);
  ------------------
  |  Branch (567:15): [True: 6.91k, False: 2.94k]
  ------------------
  568|  2.94k|    }
  569|       |
  570|  58.8k|    else if(wd == 8)
  ------------------
  |  Branch (570:13): [True: 27.4k, False: 31.4k]
  ------------------
  571|  27.4k|    {
  572|       |        //Epilogue: Load all the pred rows except sixth and seventh row
  573|       |        //          for the first and second row processing.
  574|  27.4k|        src_r0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
  575|  27.4k|        pu1_src += src_strd;
  576|  27.4k|        src_r1_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
  577|  27.4k|        pu1_src += src_strd;
  578|  27.4k|        src_r2_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
  579|  27.4k|        pu1_src += src_strd;
  580|  27.4k|        src_r3_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
  581|  27.4k|        pu1_src += src_strd;
  582|  27.4k|        src_r4_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
  583|  27.4k|        pu1_src += src_strd;
  584|       |
  585|  27.4k|        src_r0_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b);
  586|  27.4k|        src_r1_16x8b = _mm_unpacklo_epi64(src_r1_16x8b, src_r2_16x8b);
  587|  27.4k|        src_r2_16x8b = _mm_unpacklo_epi64(src_r2_16x8b, src_r3_16x8b);
  588|  27.4k|        src_r3_16x8b = _mm_unpacklo_epi64(src_r3_16x8b, src_r4_16x8b);
  589|       |
  590|  27.4k|        do
  591|   126k|        {
  592|   126k|            src_r5_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
  593|   126k|            src_r6_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd));
  594|       |
  595|   126k|            src_r4_16x8b = _mm_unpacklo_epi64(src_r4_16x8b, src_r5_16x8b);
  596|   126k|            src_r5_16x8b = _mm_unpacklo_epi64(src_r5_16x8b, src_r6_16x8b);
  597|       |
  598|   126k|            src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b);
  599|   126k|            src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b);
  600|   126k|            src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b);
  601|       |
  602|   126k|            res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
  603|   126k|            res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
  604|   126k|            res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
  605|       |
  606|   126k|            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
  607|   126k|            res_t3_8x16b = _mm_add_epi16(res_t3_8x16b, const_val16_8x16b);
  608|   126k|            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
  609|       |
  610|   126k|            res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
  611|   126k|            res_16x8b = _mm_packus_epi16(res_t1_8x16b, res_t1_8x16b);
  612|       |
  613|   126k|            _mm_storel_epi64((__m128i *)pu1_dst, res_16x8b);
  614|       |
  615|   126k|            src_r0r1_16x8b = _mm_unpackhi_epi8(src_r0_16x8b, src_r1_16x8b);
  616|   126k|            src_r2r3_16x8b = _mm_unpackhi_epi8(src_r2_16x8b, src_r3_16x8b);
  617|   126k|            src_r4r5_16x8b = _mm_unpackhi_epi8(src_r4_16x8b, src_r5_16x8b);
  618|       |
  619|   126k|            res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
  620|   126k|            res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
  621|   126k|            res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
  622|       |
  623|   126k|            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
  624|   126k|            res_t3_8x16b = _mm_add_epi16(res_t3_8x16b, const_val16_8x16b);
  625|   126k|            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
  626|       |
  627|   126k|            res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
  628|   126k|            res_16x8b = _mm_packus_epi16(res_t1_8x16b, res_t1_8x16b);
  629|       |
  630|   126k|            _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), res_16x8b);
  631|       |
  632|   126k|            src_r0_16x8b = src_r2_16x8b;
  633|   126k|            src_r1_16x8b = src_r3_16x8b;
  634|   126k|            src_r2_16x8b = src_r4_16x8b;
  635|   126k|            src_r3_16x8b = src_r5_16x8b;
  636|   126k|            src_r4_16x8b = src_r6_16x8b;
  637|       |
  638|   126k|            ht -= 2;
  639|   126k|            pu1_src += src_strd << 1;
  640|   126k|            pu1_dst += dst_strd << 1;
  641|   126k|        }
  642|   126k|        while(ht > 0);
  ------------------
  |  Branch (642:15): [True: 99.3k, False: 27.4k]
  ------------------
  643|  27.4k|    }
  644|  31.4k|    else // wd == 16
  645|  31.4k|    {
  646|  31.4k|        __m128i res_t0_8x16b;
  647|       |
  648|       |        //Epilogue: Load all the pred rows except sixth and seventh row
  649|       |        //          for the first and second row processing.
  650|  31.4k|        src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
  651|  31.4k|        pu1_src += src_strd;
  652|  31.4k|        src_r1_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
  653|  31.4k|        pu1_src += src_strd;
  654|  31.4k|        src_r2_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
  655|  31.4k|        pu1_src += src_strd;
  656|  31.4k|        src_r3_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
  657|  31.4k|        pu1_src += src_strd;
  658|  31.4k|        src_r4_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
  659|  31.4k|        pu1_src += src_strd;
  660|       |
  661|  31.4k|        do
  662|   233k|        {
  663|   233k|            src_r5_16x8b  = _mm_loadu_si128((__m128i *)pu1_src);
  664|   233k|            src_r6_16x8b  = _mm_loadu_si128((__m128i *)(pu1_src + src_strd));
  665|       |
  666|   233k|            src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b);
  667|   233k|            src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b);
  668|   233k|            src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b);
  669|       |
  670|   233k|            res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
  671|   233k|            res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
  672|   233k|            res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
  673|       |
  674|   233k|            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
  675|   233k|            res_t3_8x16b = _mm_add_epi16(res_t3_8x16b, const_val16_8x16b);
  676|   233k|            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
  677|   233k|            res_t0_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
  678|       |
  679|   233k|            src_r0r1_16x8b = _mm_unpackhi_epi8(src_r0_16x8b, src_r1_16x8b);
  680|   233k|            src_r2r3_16x8b = _mm_unpackhi_epi8(src_r2_16x8b, src_r3_16x8b);
  681|   233k|            src_r4r5_16x8b = _mm_unpackhi_epi8(src_r4_16x8b, src_r5_16x8b);
  682|       |
  683|   233k|            res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
  684|   233k|            res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
  685|   233k|            res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
  686|       |
  687|   233k|            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
  688|   233k|            res_t3_8x16b = _mm_add_epi16(res_t3_8x16b, const_val16_8x16b);
  689|   233k|            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
  690|   233k|            res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
  691|       |
  692|   233k|            res_16x8b = _mm_packus_epi16(res_t0_8x16b, res_t1_8x16b);
  693|       |
  694|   233k|            _mm_storeu_si128((__m128i *)pu1_dst, res_16x8b);
  695|       |
  696|   233k|            src_r0r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r2_16x8b);
  697|   233k|            src_r2r3_16x8b = _mm_unpacklo_epi8(src_r3_16x8b, src_r4_16x8b);
  698|   233k|            src_r4r5_16x8b = _mm_unpacklo_epi8(src_r5_16x8b, src_r6_16x8b);
  699|       |
  700|   233k|            res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
  701|   233k|            res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
  702|   233k|            res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
  703|       |
  704|   233k|            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
  705|   233k|            res_t3_8x16b = _mm_add_epi16(res_t3_8x16b, const_val16_8x16b);
  706|   233k|            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
  707|   233k|            res_t0_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
  708|       |
  709|   233k|            src_r0r1_16x8b = _mm_unpackhi_epi8(src_r1_16x8b, src_r2_16x8b);
  710|   233k|            src_r2r3_16x8b = _mm_unpackhi_epi8(src_r3_16x8b, src_r4_16x8b);
  711|   233k|            src_r4r5_16x8b = _mm_unpackhi_epi8(src_r5_16x8b, src_r6_16x8b);
  712|       |
  713|   233k|            res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
  714|   233k|            res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
  715|   233k|            res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
  716|       |
  717|   233k|            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
  718|   233k|            res_t3_8x16b = _mm_add_epi16(res_t3_8x16b, const_val16_8x16b);
  719|   233k|            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
  720|   233k|            res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
  721|       |
  722|   233k|            res_16x8b = _mm_packus_epi16(res_t0_8x16b, res_t1_8x16b);
  723|       |
  724|   233k|            _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res_16x8b);
  725|       |
  726|   233k|            src_r0_16x8b = src_r2_16x8b;
  727|   233k|            src_r1_16x8b = src_r3_16x8b;
  728|   233k|            src_r2_16x8b = src_r4_16x8b;
  729|   233k|            src_r3_16x8b = src_r5_16x8b;
  730|   233k|            src_r4_16x8b = src_r6_16x8b;
  731|       |
  732|   233k|            ht -= 2;
  733|   233k|            pu1_src += src_strd << 1;
  734|   233k|            pu1_dst += dst_strd << 1;
  735|   233k|        }
  736|   233k|        while(ht > 0);
  ------------------
  |  Branch (736:15): [True: 201k, False: 31.4k]
  ------------------
  737|  31.4k|    }
  738|  61.7k|}
ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3:
  775|  33.3k|{
  776|  33.3k|    UNUSED(dydx);
  ------------------
  |  |   45|  33.3k|#define UNUSED(x) ((void)(x))
  ------------------
  777|       |
  778|  33.3k|    if(wd == 4)
  ------------------
  |  Branch (778:8): [True: 10.7k, False: 22.6k]
  ------------------
  779|  10.7k|    {
  780|  10.7k|        WORD16 *pi2_temp;
  781|       |
  782|  10.7k|        pu1_tmp += 4;
  783|  10.7k|        pu1_src -= src_strd << 1;
  784|  10.7k|        pi2_temp = (WORD16 *)pu1_tmp;
  785|  10.7k|        pu1_src -= 2; // the filter input starts from x[-2] (till x[3])
  786|       |
  787|       |        // Horizontal 6-tap filtering
  788|  10.7k|        {
  789|  10.7k|            WORD32 ht_tmp = ht + 4;
  790|       |
  791|  10.7k|            __m128i src_r0_16x8b, src_r1_16x8b;
  792|  10.7k|            __m128i src_r0_sht_16x8b, src_r1_sht_16x8b;
  793|  10.7k|            __m128i src_r0r1_t1_16x8b;
  794|  10.7k|            __m128i res_r0r1_t1_8x16b, res_r0r1_t2_8x16b, res_r0r1_t3_8x16b;
  795|  10.7k|            __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
  796|       |
  797|  10.7k|            coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01);  //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
  798|  10.7k|            coeff2_3_16x8b = _mm_set1_epi32(0x14141414);  //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
  799|  10.7k|            coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB);  //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5
  800|       |                                                          //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20
  801|       |            //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
  802|       |            //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
  803|       |
  804|  10.7k|            do
  805|  63.4k|            {
  806|  63.4k|                src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);                       //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
  807|  63.4k|                src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd));          //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
  808|       |
  809|  63.4k|                src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1);                       //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
  810|  63.4k|                src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1);                       //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
  811|       |
  812|  63.4k|                src_r0_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);         //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
  813|  63.4k|                src_r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);         //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
  814|       |
  815|  63.4k|                src_r0r1_t1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b);       //a0 a1 a1 a2 a2 a3 a3 a4 b0 b1 b1 b2 b2 b3 b3 b4
  816|  63.4k|                res_r0r1_t1_8x16b = _mm_maddubs_epi16(src_r0r1_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
  817|       |                                                                                          //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
  818|       |
  819|  63.4k|                src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4);                           //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8  0  0  0  0
  820|  63.4k|                src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4);                           //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8  0  0  0  0
  821|       |
  822|  63.4k|                src_r0r1_t1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b);       //a2 a3 a3 a4 a4 a5 a5 a6 b2 b3 b3 b4 b4 b5 b5 b6
  823|  63.4k|                res_r0r1_t2_8x16b = _mm_maddubs_epi16(src_r0r1_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
  824|       |                                                                                          //b2*c2+b3*c3 b3*c2+b4*c3 b4*c2+b5*c3 b5*c2+b6*c3
  825|       |
  826|  63.4k|                src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4);                           //a4 a5 a5 a6 a6 a7 a7 a8  0  0  0  0  0  0  0  0
  827|  63.4k|                src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4);                           //b4 b5 b5 b6 b6 b7 b7 b8  0  0  0  0  0  0  0  0
  828|       |
  829|  63.4k|                src_r0r1_t1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b);       //a4 a5 a5 a6 a6 a7 a7 a8 b4 b5 b5 b6 b6 b7 b7 b8
  830|  63.4k|                res_r0r1_t3_8x16b = _mm_maddubs_epi16(src_r0r1_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
  831|       |                                                                                          //b4*c4+b5*c5 b5*c4+b6*c5 b4*c6+b7*c5 b7*c4+b8*c5
  832|  63.4k|                res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t2_8x16b);
  833|  63.4k|                res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t3_8x16b, res_r0r1_t1_8x16b);
  834|       |
  835|  63.4k|                _mm_storeu_si128((__m128i *)pi2_temp, res_r0r1_t1_8x16b);
  836|       |
  837|  63.4k|                ht_tmp -= 2;
  838|  63.4k|                pu1_src += src_strd << 1;
  839|  63.4k|                pi2_temp += 8;
  840|  63.4k|            }
  841|  63.4k|            while(ht_tmp > 0);
  ------------------
  |  Branch (841:19): [True: 52.6k, False: 10.7k]
  ------------------
  842|       |
  843|  10.7k|            src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);                           //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
  844|  10.7k|            src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1);                           //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
  845|       |
  846|  10.7k|            src_r0_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);             //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
  847|  10.7k|            res_r0r1_t1_8x16b = _mm_maddubs_epi16(src_r0_16x8b, coeff0_1_16x8b);          //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
  848|       |
  849|  10.7k|            src_r0_16x8b = _mm_srli_si128(src_r0_16x8b,4);                                //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8  0  0  0  0
  850|  10.7k|            res_r0r1_t2_8x16b = _mm_maddubs_epi16(src_r0_16x8b, coeff2_3_16x8b);          //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
  851|       |
  852|  10.7k|            src_r0_16x8b = _mm_srli_si128(src_r0_16x8b,4);                                //a4 a5 a5 a6 a6 a7 a7 a8  0  0  0  0  0  0  0  0
  853|  10.7k|            res_r0r1_t3_8x16b = _mm_maddubs_epi16(src_r0_16x8b, coeff4_5_16x8b);          //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
  854|       |
  855|  10.7k|            res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t2_8x16b);
  856|  10.7k|            res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t3_8x16b, res_r0r1_t1_8x16b);
  857|       |
  858|  10.7k|            _mm_storel_epi64((__m128i *)pi2_temp, res_r0r1_t1_8x16b);
  859|  10.7k|        }
  860|       |
  861|  10.7k|        pi2_temp = (WORD16 *)pu1_tmp;
  862|       |
  863|       |        // Vertical 6-tap filtering
  864|  10.7k|        {
  865|  10.7k|            __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b, src_r3_8x16b,
  866|  10.7k|                            src_r4_8x16b;
  867|  10.7k|            __m128i src_r5_8x16b, src_r6_8x16b;
  868|  10.7k|            __m128i src_t1_8x16b, src_t2_8x16b;
  869|       |
  870|  10.7k|            __m128i res_t0_4x32b, res_t1_4x32b, res_t2_4x32b, res_t3_4x32b;
  871|  10.7k|            __m128i res_8x16b, res_16x8b;
  872|       |
  873|  10.7k|            __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b;
  874|  10.7k|            __m128i const_val512_4x32b;
  875|       |
  876|  10.7k|            coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001);
  877|  10.7k|            coeff2_3_8x16b = _mm_set1_epi32(0x00140014);
  878|  10.7k|            coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB);
  879|       |
  880|  10.7k|            const_val512_4x32b = _mm_set1_epi32(512);
  881|       |
  882|  10.7k|            src_r0_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp));
  883|  10.7k|            src_r1_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp + 4));
  884|  10.7k|            src_r2_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp + 8));
  885|  10.7k|            src_r3_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp + 12));
  886|  10.7k|            src_r4_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp + 16));
  887|  10.7k|            pi2_temp += 20;
  888|       |
  889|  10.7k|            do
  890|  41.8k|            {
  891|  41.8k|                src_r5_8x16b = _mm_loadl_epi64((__m128i *)pi2_temp);
  892|  41.8k|                src_r6_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp + 4));
  893|       |
  894|  41.8k|                src_r0_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
  895|  41.8k|                src_t1_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
  896|  41.8k|                src_t2_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
  897|       |
  898|  41.8k|                res_t1_4x32b = _mm_madd_epi16(src_r0_8x16b, coeff0_1_8x16b);
  899|  41.8k|                res_t2_4x32b = _mm_madd_epi16(src_t1_8x16b, coeff2_3_8x16b);
  900|  41.8k|                res_t3_4x32b = _mm_madd_epi16(src_t2_8x16b, coeff4_5_8x16b);
  901|       |
  902|  41.8k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
  903|  41.8k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
  904|  41.8k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
  905|       |
  906|  41.8k|                res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
  907|       |
  908|  41.8k|                src_r1_8x16b = _mm_unpacklo_epi16(src_r1_8x16b, src_r2_8x16b);
  909|  41.8k|                src_t1_8x16b = _mm_unpacklo_epi16(src_r3_8x16b, src_r4_8x16b);
  910|  41.8k|                src_t2_8x16b = _mm_unpacklo_epi16(src_r5_8x16b, src_r6_8x16b);
  911|       |
  912|  41.8k|                res_t1_4x32b = _mm_madd_epi16(src_r1_8x16b, coeff0_1_8x16b);
  913|  41.8k|                res_t2_4x32b = _mm_madd_epi16(src_t1_8x16b, coeff2_3_8x16b);
  914|  41.8k|                res_t3_4x32b = _mm_madd_epi16(src_t2_8x16b, coeff4_5_8x16b);
  915|       |
  916|  41.8k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
  917|  41.8k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
  918|  41.8k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
  919|       |
  920|  41.8k|                res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
  921|       |
  922|  41.8k|                res_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b);
  923|  41.8k|                res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
  924|       |
  925|  41.8k|                *((WORD32 *)(pu1_dst)) = _mm_cvtsi128_si32(res_16x8b);
  926|  41.8k|                res_16x8b = _mm_srli_si128(res_16x8b, 4);
  927|  41.8k|                *((WORD32 *)(pu1_dst + dst_strd)) = _mm_cvtsi128_si32(res_16x8b);
  928|       |
  929|  41.8k|                src_r0_8x16b = src_r2_8x16b;
  930|  41.8k|                src_r1_8x16b = src_r3_8x16b;
  931|  41.8k|                src_r2_8x16b = src_r4_8x16b;
  932|  41.8k|                src_r3_8x16b = src_r5_8x16b;
  933|  41.8k|                src_r4_8x16b = src_r6_8x16b;
  934|       |
  935|  41.8k|                ht -= 2;
  936|  41.8k|                pi2_temp += 8;
  937|  41.8k|                pu1_dst += dst_strd << 1;
  938|  41.8k|            }
  939|  41.8k|            while(ht > 0);
  ------------------
  |  Branch (939:19): [True: 31.0k, False: 10.7k]
  ------------------
  940|  10.7k|        }
  941|  10.7k|    }
  942|  22.6k|    else if(wd == 8)
  ------------------
  |  Branch (942:13): [True: 12.0k, False: 10.5k]
  ------------------
  943|  12.0k|    {
  944|  12.0k|        WORD16 *pi2_temp;
  945|       |
  946|  12.0k|        pu1_tmp += 4;
  947|  12.0k|        pu1_src -= src_strd << 1;
  948|  12.0k|        pi2_temp = (WORD16 *)pu1_tmp;
  949|  12.0k|        pu1_src -= 2; // the filter input starts from x[-2] (till x[3])
  950|       |
  951|       |        // Horizontal 6-tap filtering
  952|  12.0k|        {
  953|  12.0k|            WORD32 ht_tmp = ht + 4;
  954|       |
  955|  12.0k|            __m128i src_r0_16x8b, src_r1_16x8b;
  956|  12.0k|            __m128i src_r0_sht_16x8b, src_r1_sht_16x8b;
  957|  12.0k|            __m128i src_r0_t1_16x8b, src_r1_t1_16x8b;
  958|  12.0k|            __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b;
  959|  12.0k|            __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b;
  960|  12.0k|            __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
  961|       |
  962|  12.0k|            coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01);  //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
  963|  12.0k|            coeff2_3_16x8b = _mm_set1_epi32(0x14141414);  //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
  964|  12.0k|            coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB);  //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5
  965|       |                                                          //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20
  966|       |            //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
  967|       |            //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
  968|       |
  969|  12.0k|            do
  970|  79.9k|            {
  971|  79.9k|                src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);                      //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15
  972|  79.9k|                src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd));         //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 b10 b11 b12 b13 b14 b15
  973|       |
  974|  79.9k|                src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1);                      //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
  975|  79.9k|                src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1);                      //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
  976|       |
  977|  79.9k|                src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);     //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
  978|  79.9k|                src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);     //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
  979|       |
  980|  79.9k|                res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b);    //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
  981|       |                                                                                         //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1
  982|  79.9k|                res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b);    //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
  983|       |                                                                                         //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1
  984|       |
  985|  79.9k|                src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2);                          //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0
  986|  79.9k|                src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2);                          //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0
  987|       |
  988|  79.9k|                src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2);                  //a3 a4 a5 a6 a7 a8 a9....a15 0  0  0
  989|  79.9k|                src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2);                  //b3 b4 b5 b6 b7 b8 b9....b15 0  0  0
  990|       |
  991|  79.9k|                src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);     //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10
  992|  79.9k|                src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);     //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10
  993|       |
  994|  79.9k|                res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b);    //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
  995|       |                                                                                         //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3
  996|  79.9k|                res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b);    //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3
  997|       |                                                                                         //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3
  998|       |
  999|  79.9k|                src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2);                          //a4 a5 a6 a7 a8 a9....a15 0  0  0  0
 1000|  79.9k|                src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2);                          //b4 b5 b6 b7 b8 b9....b15 0  0  0  0
 1001|       |
 1002|  79.9k|                src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2);                  //a5 a6 a7 a8 a9....a15 0  0  0  0  0
 1003|  79.9k|                src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2);                  //b5 b6 b7 b8 b9....b15 0  0  0  0  0
 1004|       |
 1005|  79.9k|                src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);     //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12
 1006|  79.9k|                src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);     //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12
 1007|       |
 1008|  79.9k|                res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b);    //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
 1009|       |                                                                                         //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5
 1010|  79.9k|                res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b);    //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5
 1011|       |                                                                                         //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5
 1012|  79.9k|                res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b);
 1013|  79.9k|                res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b);
 1014|       |
 1015|  79.9k|                res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b);
 1016|  79.9k|                res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b);
 1017|       |
 1018|  79.9k|                _mm_storeu_si128((__m128i *)pi2_temp, res_r0_t1_8x16b);
 1019|  79.9k|                _mm_storeu_si128((__m128i *)(pi2_temp + 8), res_r1_t1_8x16b);
 1020|       |
 1021|  79.9k|                ht_tmp -= 2;
 1022|  79.9k|                pu1_src += src_strd << 1;
 1023|  79.9k|                pi2_temp += 16;
 1024|  79.9k|            }
 1025|  79.9k|            while(ht_tmp > 0);
  ------------------
  |  Branch (1025:19): [True: 67.9k, False: 12.0k]
  ------------------
 1026|       |
 1027|  12.0k|            src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);                          //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15
 1028|  12.0k|            src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1);                          //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
 1029|       |
 1030|  12.0k|            src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b,src_r0_sht_16x8b);          //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
 1031|  12.0k|            res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b,coeff0_1_16x8b);         //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
 1032|       |                                                                                         //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1
 1033|       |
 1034|  12.0k|            src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2);                              //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0
 1035|  12.0k|            src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2);                      //a3 a4 a5 a6 a7 a8 a9....a15 0  0  0
 1036|       |
 1037|  12.0k|            src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);         //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10
 1038|  12.0k|            res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b);        //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
 1039|       |                                                                                         //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3
 1040|       |
 1041|  12.0k|            src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2);                              //a4 a5 a6 a7 a8 a9....a15 0  0  0  0
 1042|  12.0k|            src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2);                      //a5 a6 a7 a8 a9....a15 0  0  0  0  0
 1043|       |
 1044|  12.0k|            src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);         //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12
 1045|  12.0k|            res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b);        //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
 1046|       |                                                                                         //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5
 1047|  12.0k|            res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b);
 1048|  12.0k|            res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b);
 1049|       |
 1050|  12.0k|            _mm_storeu_si128((__m128i *)pi2_temp, res_r0_t1_8x16b);
 1051|  12.0k|        }
 1052|       |
 1053|  12.0k|        pi2_temp = (WORD16 *)pu1_tmp;
 1054|       |
 1055|       |        // Vertical 6-tap filtering
 1056|  12.0k|        {
 1057|  12.0k|            __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b, src_r3_8x16b,
 1058|  12.0k|                            src_r4_8x16b;
 1059|  12.0k|            __m128i src_r5_8x16b, src_r6_8x16b;
 1060|  12.0k|            __m128i src_r0r1_8x16b, src_r2r3_8x16b, src_r4r5_8x16b;
 1061|       |
 1062|  12.0k|            __m128i res_t1_4x32b, res_t2_4x32b, res_t3_4x32b;
 1063|  12.0k|            __m128i res_c0_4x32b, res_c1_4x32b;
 1064|  12.0k|            __m128i res_8x16b, res_16x8b;
 1065|       |
 1066|  12.0k|            __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b;
 1067|  12.0k|            __m128i const_val512_4x32b;
 1068|       |
 1069|  12.0k|            coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001);
 1070|  12.0k|            coeff2_3_8x16b = _mm_set1_epi32(0x00140014);
 1071|  12.0k|            coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB);
 1072|       |
 1073|  12.0k|            const_val512_4x32b = _mm_set1_epi32(512);
 1074|       |
 1075|  12.0k|            src_r0_8x16b = _mm_loadu_si128((__m128i *)pi2_temp);
 1076|  12.0k|            src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 8));
 1077|  12.0k|            src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 16));
 1078|  12.0k|            src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 24));
 1079|  12.0k|            src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 32));
 1080|  12.0k|            pi2_temp += 40;
 1081|       |
 1082|  12.0k|            do
 1083|  55.8k|            {
 1084|  55.8k|                src_r5_8x16b = _mm_loadu_si128((__m128i *)pi2_temp);
 1085|  55.8k|                src_r6_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 8));
 1086|       |
 1087|  55.8k|                src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
 1088|  55.8k|                src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
 1089|  55.8k|                src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
 1090|       |
 1091|  55.8k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
 1092|  55.8k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
 1093|  55.8k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
 1094|       |
 1095|  55.8k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 1096|  55.8k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
 1097|  55.8k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 1098|  55.8k|                res_c0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 1099|       |
 1100|  55.8k|                src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b);
 1101|  55.8k|                src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b);
 1102|  55.8k|                src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b);
 1103|       |
 1104|  55.8k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
 1105|  55.8k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
 1106|  55.8k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
 1107|       |
 1108|  55.8k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 1109|  55.8k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
 1110|  55.8k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 1111|  55.8k|                res_c1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 1112|       |
 1113|  55.8k|                res_8x16b = _mm_packs_epi32(res_c0_4x32b, res_c1_4x32b);
 1114|  55.8k|                res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
 1115|       |
 1116|  55.8k|                _mm_storel_epi64((__m128i *)pu1_dst, res_16x8b);
 1117|       |
 1118|  55.8k|                src_r0r1_8x16b = _mm_unpacklo_epi16(src_r1_8x16b, src_r2_8x16b);
 1119|  55.8k|                src_r2r3_8x16b = _mm_unpacklo_epi16(src_r3_8x16b, src_r4_8x16b);
 1120|  55.8k|                src_r4r5_8x16b = _mm_unpacklo_epi16(src_r5_8x16b, src_r6_8x16b);
 1121|       |
 1122|  55.8k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
 1123|  55.8k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
 1124|  55.8k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
 1125|       |
 1126|  55.8k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 1127|  55.8k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
 1128|  55.8k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 1129|  55.8k|                res_c0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 1130|       |
 1131|  55.8k|                src_r0r1_8x16b = _mm_unpackhi_epi16(src_r1_8x16b, src_r2_8x16b);
 1132|  55.8k|                src_r2r3_8x16b = _mm_unpackhi_epi16(src_r3_8x16b, src_r4_8x16b);
 1133|  55.8k|                src_r4r5_8x16b = _mm_unpackhi_epi16(src_r5_8x16b, src_r6_8x16b);
 1134|       |
 1135|  55.8k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
 1136|  55.8k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
 1137|  55.8k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
 1138|       |
 1139|  55.8k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 1140|  55.8k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
 1141|  55.8k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 1142|  55.8k|                res_c1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 1143|       |
 1144|  55.8k|                res_8x16b = _mm_packs_epi32(res_c0_4x32b, res_c1_4x32b);
 1145|  55.8k|                res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
 1146|       |
 1147|  55.8k|                _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), res_16x8b);
 1148|       |
 1149|  55.8k|                src_r0_8x16b = src_r2_8x16b;
 1150|  55.8k|                src_r1_8x16b = src_r3_8x16b;
 1151|  55.8k|                src_r2_8x16b = src_r4_8x16b;
 1152|  55.8k|                src_r3_8x16b = src_r5_8x16b;
 1153|  55.8k|                src_r4_8x16b = src_r6_8x16b;
 1154|       |
 1155|  55.8k|                ht -= 2;
 1156|  55.8k|                pi2_temp += 16;
 1157|  55.8k|                pu1_dst += dst_strd << 1;
 1158|  55.8k|            }
 1159|  55.8k|            while(ht > 0);
  ------------------
  |  Branch (1159:19): [True: 43.7k, False: 12.0k]
  ------------------
 1160|  12.0k|        }
 1161|  12.0k|    }
 1162|  10.5k|    else // wd == 16
 1163|  10.5k|    {
 1164|  10.5k|        WORD16 *pi2_temp;
 1165|  10.5k|        WORD32 ht_tmp;
 1166|       |
 1167|  10.5k|        pu1_tmp += 4;
 1168|  10.5k|        pu1_src -= src_strd << 1;
 1169|  10.5k|        pi2_temp = (WORD16 *)pu1_tmp;
 1170|  10.5k|        pu1_src -= 2; // the filter input starts from x[-2] (till x[3])
 1171|       |
 1172|       |        // Horizontal 6-tap filtering
 1173|  10.5k|        {
 1174|  10.5k|            __m128i src_r0_16x8b, src_r1_16x8b, src_r0_sht_16x8b, src_r1_sht_16x8b;
 1175|  10.5k|            __m128i src_r0_t1_16x8b, src_r1_t1_16x8b;
 1176|       |
 1177|  10.5k|            __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b;
 1178|  10.5k|            __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b;
 1179|       |
 1180|  10.5k|            __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
 1181|       |
 1182|  10.5k|            ht_tmp = ht + 5;
 1183|       |
 1184|  10.5k|            coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01);  //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
 1185|  10.5k|            coeff2_3_16x8b = _mm_set1_epi32(0x14141414);  //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
 1186|  10.5k|            coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB);  //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5
 1187|       |                                                          //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20
 1188|       |            //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
 1189|       |            //Row0 :                         b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
 1190|       |            //b0 is same a8. Similarly other bn pixels are same as a(n+8) pixels.
 1191|       |
 1192|  10.5k|            do
 1193|   187k|            {
 1194|   187k|                src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);                      //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
 1195|   187k|                src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8));                //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
 1196|       |
 1197|   187k|                src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1);                      //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
 1198|   187k|                src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1);                      //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
 1199|       |
 1200|   187k|                src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);     //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
 1201|   187k|                src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);     //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
 1202|       |
 1203|   187k|                res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b);    //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
 1204|       |                                                                                         //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1
 1205|   187k|                res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b);    //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
 1206|       |                                                                                         //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1
 1207|       |
 1208|   187k|                src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2);                          //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0
 1209|   187k|                src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2);                          //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0
 1210|       |
 1211|   187k|                src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2);                  //a3 a4 a5 a6 a7 a8 a9....a15 0  0  0
 1212|   187k|                src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2);                  //b3 b4 b5 b6 b7 b8 b9....b15 0  0  0
 1213|       |
 1214|   187k|                src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);     //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10
 1215|   187k|                src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);     //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10
 1216|       |
 1217|   187k|                res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b);    //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
 1218|       |                                                                                         //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3
 1219|   187k|                res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b);    //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3
 1220|       |                                                                                         //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3
 1221|       |
 1222|   187k|                src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2);                          //a4 a5 a6 a7 a8 a9....a15 0  0  0  0
 1223|   187k|                src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2);                          //b4 b5 b6 b7 b8 b9....b15 0  0  0  0
 1224|       |
 1225|   187k|                src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2);                  //a5 a6 a7 a8 a9....a15 0  0  0  0  0
 1226|   187k|                src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2);                  //b5 b6 b7 b8 b9....b15 0  0  0  0  0
 1227|       |
 1228|   187k|                src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);     //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12
 1229|   187k|                src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);     //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12
 1230|       |
 1231|   187k|                res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b);    //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
 1232|       |                                                                                         //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5
 1233|   187k|                res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b);    //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5
 1234|       |                                                                                         //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5
 1235|   187k|                res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b);
 1236|   187k|                res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b);
 1237|       |
 1238|   187k|                res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b);
 1239|   187k|                res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b);
 1240|       |
 1241|   187k|                _mm_storeu_si128((__m128i *)pi2_temp, res_r0_t1_8x16b);
 1242|   187k|                _mm_storeu_si128((__m128i *)(pi2_temp + 8), res_r1_t1_8x16b);
 1243|       |
 1244|   187k|                ht_tmp--;
 1245|   187k|                pu1_src += src_strd;
 1246|   187k|                pi2_temp += 16;
 1247|   187k|            }
 1248|   187k|            while(ht_tmp > 0);
  ------------------
  |  Branch (1248:19): [True: 176k, False: 10.5k]
  ------------------
 1249|  10.5k|        }
 1250|       |
 1251|  10.5k|        pi2_temp = (WORD16 *)pu1_tmp;
 1252|       |
 1253|       |        // Vertical 6-tap filtering
 1254|  10.5k|        {
 1255|  10.5k|            WORD16 *pi2_temp2;
 1256|  10.5k|            UWORD8 *pu1_dst2;
 1257|  10.5k|            WORD32 ht_tmp;
 1258|       |
 1259|  10.5k|            __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b, src_r3_8x16b, src_r4_8x16b;
 1260|  10.5k|            __m128i src_r5_8x16b, src_r6_8x16b;
 1261|  10.5k|            __m128i src_r0r1_8x16b, src_r2r3_8x16b, src_r4r5_8x16b;
 1262|       |
 1263|  10.5k|            __m128i res_t1_4x32b, res_t2_4x32b, res_t3_4x32b;
 1264|  10.5k|            __m128i res_c0_4x32b, res_c1_4x32b;
 1265|  10.5k|            __m128i res_8x16b, res_16x8b;
 1266|       |
 1267|  10.5k|            __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b;
 1268|  10.5k|            __m128i const_val512_4x32b;
 1269|       |
 1270|  10.5k|            coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001);
 1271|  10.5k|            coeff2_3_8x16b = _mm_set1_epi32(0x00140014);
 1272|  10.5k|            coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB);
 1273|       |
 1274|  10.5k|            const_val512_4x32b = _mm_set1_epi32(512);
 1275|       |
 1276|  10.5k|            pi2_temp2 = pi2_temp + 8;
 1277|  10.5k|            pu1_dst2 = pu1_dst + 8;
 1278|  10.5k|            ht_tmp = ht;
 1279|       |
 1280|       |            /**********************************************************/
 1281|       |            /*     Do first height x 8 block                          */
 1282|       |            /**********************************************************/
 1283|  10.5k|            src_r0_8x16b = _mm_loadu_si128((__m128i *)pi2_temp);
 1284|  10.5k|            src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 16));
 1285|  10.5k|            src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 32));
 1286|  10.5k|            src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 48));
 1287|  10.5k|            src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 64));
 1288|  10.5k|            pi2_temp += 80;
 1289|       |
 1290|  10.5k|            do
 1291|  67.2k|            {
 1292|  67.2k|                src_r5_8x16b = _mm_loadu_si128((__m128i *)pi2_temp);
 1293|  67.2k|                src_r6_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 16));
 1294|       |
 1295|  67.2k|                src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
 1296|  67.2k|                src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
 1297|  67.2k|                src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
 1298|       |
 1299|  67.2k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
 1300|  67.2k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
 1301|  67.2k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
 1302|       |
 1303|  67.2k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 1304|  67.2k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
 1305|  67.2k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 1306|  67.2k|                res_c0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 1307|       |
 1308|  67.2k|                src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b);
 1309|  67.2k|                src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b);
 1310|  67.2k|                src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b);
 1311|       |
 1312|  67.2k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
 1313|  67.2k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
 1314|  67.2k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
 1315|       |
 1316|  67.2k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 1317|  67.2k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
 1318|  67.2k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 1319|  67.2k|                res_c1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 1320|       |
 1321|  67.2k|                res_8x16b = _mm_packs_epi32(res_c0_4x32b, res_c1_4x32b);
 1322|  67.2k|                res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
 1323|       |
 1324|  67.2k|                _mm_storel_epi64((__m128i *)pu1_dst, res_16x8b);
 1325|       |
 1326|  67.2k|                src_r0r1_8x16b = _mm_unpacklo_epi16(src_r1_8x16b, src_r2_8x16b);
 1327|  67.2k|                src_r2r3_8x16b = _mm_unpacklo_epi16(src_r3_8x16b, src_r4_8x16b);
 1328|  67.2k|                src_r4r5_8x16b = _mm_unpacklo_epi16(src_r5_8x16b, src_r6_8x16b);
 1329|       |
 1330|  67.2k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
 1331|  67.2k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
 1332|  67.2k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
 1333|       |
 1334|  67.2k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 1335|  67.2k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
 1336|  67.2k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 1337|  67.2k|                res_c0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 1338|       |
 1339|  67.2k|                src_r0r1_8x16b = _mm_unpackhi_epi16(src_r1_8x16b, src_r2_8x16b);
 1340|  67.2k|                src_r2r3_8x16b = _mm_unpackhi_epi16(src_r3_8x16b, src_r4_8x16b);
 1341|  67.2k|                src_r4r5_8x16b = _mm_unpackhi_epi16(src_r5_8x16b, src_r6_8x16b);
 1342|       |
 1343|  67.2k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
 1344|  67.2k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
 1345|  67.2k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
 1346|       |
 1347|  67.2k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 1348|  67.2k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
 1349|  67.2k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 1350|  67.2k|                res_c1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 1351|       |
 1352|  67.2k|                res_8x16b = _mm_packs_epi32(res_c0_4x32b, res_c1_4x32b);
 1353|  67.2k|                res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
 1354|       |
 1355|  67.2k|                _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), res_16x8b);
 1356|       |
 1357|  67.2k|                src_r0_8x16b = src_r2_8x16b;
 1358|  67.2k|                src_r1_8x16b = src_r3_8x16b;
 1359|  67.2k|                src_r2_8x16b = src_r4_8x16b;
 1360|  67.2k|                src_r3_8x16b = src_r5_8x16b;
 1361|  67.2k|                src_r4_8x16b = src_r6_8x16b;
 1362|       |
 1363|  67.2k|                ht_tmp -= 2;
 1364|  67.2k|                pi2_temp += 32;
 1365|  67.2k|                pu1_dst += dst_strd << 1;
 1366|  67.2k|            }
 1367|  67.2k|            while(ht_tmp > 0);
  ------------------
  |  Branch (1367:19): [True: 56.7k, False: 10.5k]
  ------------------
 1368|       |
 1369|       |            /**********************************************************/
 1370|       |            /*     Do second ht x 8 block                          */
 1371|       |            /**********************************************************/
 1372|  10.5k|            src_r0_8x16b = _mm_loadu_si128((__m128i *)pi2_temp2);
 1373|  10.5k|            src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 16));
 1374|  10.5k|            src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 32));
 1375|  10.5k|            src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 48));
 1376|  10.5k|            src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 64));
 1377|  10.5k|            pi2_temp2 += 80;
 1378|       |
 1379|  10.5k|            do
 1380|  67.2k|            {
 1381|  67.2k|                src_r5_8x16b = _mm_loadu_si128((__m128i *)pi2_temp2);
 1382|  67.2k|                src_r6_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 16));
 1383|       |
 1384|  67.2k|                src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
 1385|  67.2k|                src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
 1386|  67.2k|                src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
 1387|       |
 1388|  67.2k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
 1389|  67.2k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
 1390|  67.2k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
 1391|       |
 1392|  67.2k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 1393|  67.2k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
 1394|  67.2k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 1395|  67.2k|                res_c0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 1396|       |
 1397|  67.2k|                src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b);
 1398|  67.2k|                src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b);
 1399|  67.2k|                src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b);
 1400|       |
 1401|  67.2k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
 1402|  67.2k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
 1403|  67.2k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
 1404|       |
 1405|  67.2k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 1406|  67.2k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
 1407|  67.2k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 1408|  67.2k|                res_c1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 1409|       |
 1410|  67.2k|                res_8x16b = _mm_packs_epi32(res_c0_4x32b, res_c1_4x32b);
 1411|  67.2k|                res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
 1412|       |
 1413|  67.2k|                _mm_storel_epi64((__m128i *)pu1_dst2, res_16x8b);
 1414|       |
 1415|  67.2k|                src_r0r1_8x16b = _mm_unpacklo_epi16(src_r1_8x16b, src_r2_8x16b);
 1416|  67.2k|                src_r2r3_8x16b = _mm_unpacklo_epi16(src_r3_8x16b, src_r4_8x16b);
 1417|  67.2k|                src_r4r5_8x16b = _mm_unpacklo_epi16(src_r5_8x16b, src_r6_8x16b);
 1418|       |
 1419|  67.2k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
 1420|  67.2k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
 1421|  67.2k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
 1422|       |
 1423|  67.2k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 1424|  67.2k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
 1425|  67.2k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 1426|  67.2k|                res_c0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 1427|       |
 1428|  67.2k|                src_r0r1_8x16b = _mm_unpackhi_epi16(src_r1_8x16b, src_r2_8x16b);
 1429|  67.2k|                src_r2r3_8x16b = _mm_unpackhi_epi16(src_r3_8x16b, src_r4_8x16b);
 1430|  67.2k|                src_r4r5_8x16b = _mm_unpackhi_epi16(src_r5_8x16b, src_r6_8x16b);
 1431|       |
 1432|  67.2k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
 1433|  67.2k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
 1434|  67.2k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
 1435|       |
 1436|  67.2k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 1437|  67.2k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
 1438|  67.2k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 1439|  67.2k|                res_c1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 1440|       |
 1441|  67.2k|                res_8x16b = _mm_packs_epi32(res_c0_4x32b, res_c1_4x32b);
 1442|  67.2k|                res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
 1443|       |
 1444|  67.2k|                _mm_storel_epi64((__m128i *)(pu1_dst2 + dst_strd), res_16x8b);
 1445|       |
 1446|  67.2k|                src_r0_8x16b = src_r2_8x16b;
 1447|  67.2k|                src_r1_8x16b = src_r3_8x16b;
 1448|  67.2k|                src_r2_8x16b = src_r4_8x16b;
 1449|  67.2k|                src_r3_8x16b = src_r5_8x16b;
 1450|  67.2k|                src_r4_8x16b = src_r6_8x16b;
 1451|       |
 1452|  67.2k|                ht -= 2;
 1453|  67.2k|                pi2_temp2 += 32;
 1454|  67.2k|                pu1_dst2 += dst_strd << 1;
 1455|  67.2k|            }
 1456|  67.2k|            while(ht > 0);
  ------------------
  |  Branch (1456:19): [True: 56.7k, False: 10.5k]
  ------------------
 1457|  10.5k|        }
 1458|  10.5k|    }
 1459|  33.3k|}
ih264_inter_pred_luma_horz_qpel_ssse3:
 1499|   105k|{
 1500|   105k|    WORD32 x_offset;
 1501|   105k|    UWORD8 *pu1_pred1;
 1502|       |
 1503|   105k|    __m128i src_r0_16x8b, src_r1_16x8b;
 1504|   105k|    __m128i src_r0_sht_16x8b, src_r1_sht_16x8b;
 1505|   105k|    __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
 1506|   105k|    __m128i const_val16_8x16b;
 1507|       |
 1508|   105k|    UNUSED(pu1_tmp);
  ------------------
  |  |   45|   105k|#define UNUSED(x) ((void)(x))
  ------------------
 1509|       |
 1510|   105k|    x_offset = dydx & 3;
 1511|       |
 1512|   105k|    coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
 1513|   105k|    coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
 1514|   105k|    coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5
 1515|       |                                                 //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20
 1516|   105k|    pu1_pred1 = pu1_src + (x_offset >> 1);
 1517|       |
 1518|   105k|    const_val16_8x16b = _mm_set1_epi16(16);
 1519|       |
 1520|   105k|    pu1_src -= 2; // the filter input starts from x[-2] (till x[3])
 1521|       |
 1522|   105k|    if(wd == 4)
  ------------------
  |  Branch (1522:8): [True: 14.6k, False: 90.8k]
  ------------------
 1523|  14.6k|    {
 1524|  14.6k|        __m128i src_r0r1_16x8b;
 1525|       |
 1526|  14.6k|        __m128i res_r0r1_t1_8x16b, res_r0r1_t2_8x16b, res_r0r1_t3_8x16b;
 1527|  14.6k|        __m128i res_r0r1_16x8b;
 1528|       |
 1529|       |        //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
 1530|       |        //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
 1531|       |
 1532|  14.6k|        do
 1533|  47.7k|        {
 1534|  47.7k|            src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);                         //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
 1535|  47.7k|            src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd));            //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
 1536|       |
 1537|  47.7k|            src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1);                         //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
 1538|  47.7k|            src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1);                         //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
 1539|       |
 1540|  47.7k|            src_r0_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);           //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
 1541|  47.7k|            src_r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);           //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
 1542|       |
 1543|  47.7k|            src_r0r1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b);            //a0 a1 a1 a2 a2 a3 a3 a4 b0 b1 b1 b2 b2 b3 b3 b4
 1544|  47.7k|            res_r0r1_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);      //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
 1545|       |                                                                                        //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
 1546|       |
 1547|  47.7k|            src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4);                             //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8  0  0  0  0
 1548|  47.7k|            src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4);                             //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8  0  0  0  0
 1549|       |
 1550|  47.7k|            src_r0r1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b);            //a2 a3 a3 a4 a4 a5 a5 a6 b2 b3 b3 b4 b4 b5 b5 b6
 1551|  47.7k|            res_r0r1_t2_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff2_3_16x8b);      //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
 1552|       |                                                                                        //b2*c2+b3*c3 b3*c2+b4*c3 b4*c2+b5*c3 b5*c2+b6*c3
 1553|       |
 1554|  47.7k|            src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4);                             //a4 a5 a5 a6 a6 a7 a7 a8  0  0  0  0  0  0  0  0
 1555|  47.7k|            src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4);                             //b4 b5 b5 b6 b6 b7 b7 b8  0  0  0  0  0  0  0  0
 1556|       |
 1557|  47.7k|            src_r0r1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b);            //a4 a5 a5 a6 a6 a7 a7 a8 b4 b5 b5 b6 b6 b7 b7 b8
 1558|  47.7k|            res_r0r1_t3_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff4_5_16x8b);      //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
 1559|       |                                                                                        //b4*c4+b5*c5 b5*c4+b6*c5 b4*c6+b7*c5 b7*c4+b8*c5
 1560|  47.7k|            src_r0_16x8b = _mm_loadl_epi64((__m128i *)pu1_pred1);
 1561|  47.7k|            src_r1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred1 + src_strd));
 1562|       |
 1563|  47.7k|            res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t2_8x16b);
 1564|  47.7k|            res_r0r1_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r0r1_t3_8x16b);
 1565|  47.7k|            res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t3_8x16b);    //a0*c0+a1*c1+a2*c2+a3*c3+a4*a4+a5*c5 + 16;
 1566|       |                                                                                        //a1*c0+a2*c1+a2*c2+a3*c3+a5*a4+a6*c5 + 16;
 1567|       |                                                                                        //a2*c0+a3*c1+a4*c2+a5*c3+a6*a4+a7*c5 + 16;
 1568|       |                                                                                        //a3*c0+a4*c1+a5*c2+a6*c3+a6*a4+a8*c5 + 16;
 1569|       |                                                                                        //b0*c0+b1*c1+b2*c2+b3*c3+b4*b4+b5*c5 + 16;
 1570|       |                                                                                        //b1*c0+b2*c1+b2*c2+b3*c3+b5*b4+b6*c5 + 16;
 1571|       |                                                                                        //b2*c0+b3*c1+b4*c2+b5*c3+b6*b4+b7*c5 + 16;
 1572|       |                                                                                        //b3*c0+b4*c1+b5*c2+b6*c3+b6*b4+b8*c5 + 16;
 1573|  47.7k|            src_r0r1_16x8b = _mm_unpacklo_epi32(src_r0_16x8b,src_r1_16x8b);
 1574|       |
 1575|  47.7k|            res_r0r1_t1_8x16b = _mm_srai_epi16(res_r0r1_t1_8x16b, 5);                   //shifting right by 5 bits.
 1576|       |
 1577|  47.7k|            res_r0r1_16x8b = _mm_packus_epi16(res_r0r1_t1_8x16b, res_r0r1_t1_8x16b);
 1578|  47.7k|            res_r0r1_16x8b = _mm_avg_epu8(src_r0r1_16x8b, res_r0r1_16x8b);              //computing q-pel
 1579|       |
 1580|  47.7k|            *((WORD32 *)(pu1_dst)) = _mm_cvtsi128_si32(res_r0r1_16x8b);
 1581|  47.7k|            res_r0r1_16x8b = _mm_srli_si128(res_r0r1_16x8b, 4);
 1582|  47.7k|            *((WORD32 *)(pu1_dst + dst_strd)) = _mm_cvtsi128_si32(res_r0r1_16x8b);
 1583|       |
 1584|  47.7k|            ht -= 2;
 1585|  47.7k|            pu1_src += src_strd << 1;
 1586|  47.7k|            pu1_pred1 += src_strd << 1;
 1587|  47.7k|            pu1_dst += dst_strd << 1;
 1588|  47.7k|        }
 1589|  47.7k|        while(ht > 0);
  ------------------
  |  Branch (1589:15): [True: 33.0k, False: 14.6k]
  ------------------
 1590|  14.6k|    }
 1591|  90.8k|    else if(wd == 8)
  ------------------
  |  Branch (1591:13): [True: 41.4k, False: 49.4k]
  ------------------
 1592|  41.4k|    {
 1593|  41.4k|        __m128i src_r0_t1_16x8b, src_r1_t1_16x8b;
 1594|       |
 1595|  41.4k|        __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b;
 1596|  41.4k|        __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b;
 1597|  41.4k|        __m128i res_r0_16x8b, res_r1_16x8b;
 1598|       |
 1599|       |        //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
 1600|       |        //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
 1601|       |
 1602|  41.4k|        do
 1603|   191k|        {
 1604|   191k|            src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);                      //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
 1605|   191k|            src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd));         //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
 1606|       |
 1607|   191k|            src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1);                      //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
 1608|   191k|            src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1);                      //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
 1609|       |
 1610|   191k|            src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);     //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
 1611|   191k|            src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);     //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
 1612|       |
 1613|   191k|            res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b);    //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
 1614|       |                                                                                     //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1
 1615|   191k|            res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b);    //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
 1616|       |                                                                                     //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1
 1617|       |
 1618|   191k|            src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2);                          //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0
 1619|   191k|            src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2);                          //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0
 1620|       |
 1621|   191k|            src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2);                  //a3 a4 a5 a6 a7 a8 a9....a15 0  0  0
 1622|   191k|            src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2);                  //b3 b4 b5 b6 b7 b8 b9....b15 0  0  0
 1623|       |
 1624|   191k|            src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);     //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10
 1625|   191k|            src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);     //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10
 1626|       |
 1627|   191k|            res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b);    //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
 1628|       |                                                                                     //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3
 1629|   191k|            res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b);    //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3
 1630|       |                                                                                     //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3
 1631|       |
 1632|   191k|            src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2);                          //a4 a5 a6 a7 a8 a9....a15 0  0  0  0
 1633|   191k|            src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2);                          //b4 b5 b6 b7 b8 b9....b15 0  0  0  0
 1634|       |
 1635|   191k|            src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2);                  //a5 a6 a7 a8 a9....a15 0  0  0  0  0
 1636|   191k|            src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2);                  //b5 b6 b7 b8 b9....b15 0  0  0  0  0
 1637|       |
 1638|   191k|            src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);     //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12
 1639|   191k|            src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);     //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12
 1640|       |
 1641|   191k|            res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b);    //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
 1642|       |                                                                                     //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5
 1643|   191k|            res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b);    //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5
 1644|       |                                                                                     //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5
 1645|   191k|            src_r0_16x8b = _mm_loadl_epi64((__m128i *)pu1_pred1);
 1646|   191k|            src_r1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred1 + src_strd));
 1647|       |
 1648|   191k|            res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b);
 1649|   191k|            res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b);
 1650|   191k|            res_r0_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r0_t3_8x16b);
 1651|   191k|            res_r1_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r1_t3_8x16b);
 1652|   191k|            res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b);
 1653|   191k|            res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b);
 1654|       |
 1655|   191k|            res_r0_t1_8x16b = _mm_srai_epi16(res_r0_t1_8x16b, 5);
 1656|   191k|            res_r1_t1_8x16b = _mm_srai_epi16(res_r1_t1_8x16b, 5);                    //shifting right by 5 bits.
 1657|       |
 1658|   191k|            res_r0_16x8b = _mm_packus_epi16(res_r0_t1_8x16b, res_r0_t1_8x16b);
 1659|   191k|            res_r1_16x8b = _mm_packus_epi16(res_r1_t1_8x16b, res_r1_t1_8x16b);
 1660|       |
 1661|   191k|            res_r0_16x8b = _mm_avg_epu8(src_r0_16x8b, res_r0_16x8b);
 1662|   191k|            res_r1_16x8b = _mm_avg_epu8(src_r1_16x8b, res_r1_16x8b);                 //computing q-pel
 1663|       |
 1664|   191k|            _mm_storel_epi64((__m128i *)pu1_dst, res_r0_16x8b);
 1665|   191k|            _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), res_r1_16x8b);
 1666|       |
 1667|   191k|            ht -= 2;
 1668|   191k|            pu1_src += src_strd << 1;
 1669|   191k|            pu1_pred1 += src_strd << 1;
 1670|   191k|            pu1_dst += dst_strd << 1;
 1671|   191k|        }
 1672|   191k|        while(ht > 0);
  ------------------
  |  Branch (1672:15): [True: 149k, False: 41.4k]
  ------------------
 1673|  41.4k|    }
 1674|  49.4k|    else // wd == 16
 1675|  49.4k|    {
 1676|  49.4k|        __m128i src_r0_t1_16x8b, src_r1_t1_16x8b;
 1677|       |
 1678|  49.4k|        __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b;
 1679|  49.4k|        __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b;
 1680|  49.4k|        __m128i res_16x8b;
 1681|       |
 1682|       |        //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
 1683|       |        //Row0 :                         b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
 1684|       |        //b0 is same a8. Similarly other bn pixels are same as a(n+8) pixels.
 1685|       |
 1686|  49.4k|        do
 1687|   647k|        {
 1688|   647k|            src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);                      //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
 1689|   647k|            src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8));                //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
 1690|       |
 1691|   647k|            src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1);                      //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
 1692|   647k|            src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1);                      //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
 1693|       |
 1694|   647k|            src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);     //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
 1695|   647k|            src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);     //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
 1696|       |
 1697|   647k|            res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b);    //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
 1698|       |                                                                                     //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1
 1699|   647k|            res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b);    //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
 1700|       |                                                                                     //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1
 1701|       |
 1702|   647k|            src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2);                          //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0
 1703|   647k|            src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2);                          //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0
 1704|       |
 1705|   647k|            src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2);                  //a3 a4 a5 a6 a7 a8 a9....a15 0  0  0
 1706|   647k|            src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2);                  //b3 b4 b5 b6 b7 b8 b9....b15 0  0  0
 1707|       |
 1708|   647k|            src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);     //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10
 1709|   647k|            src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);     //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10
 1710|       |
 1711|   647k|            res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b);    //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
 1712|       |                                                                                     //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3
 1713|   647k|            res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b);    //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3
 1714|       |                                                                                     //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3
 1715|       |
 1716|   647k|            src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2);                          //a4 a5 a6 a7 a8 a9....a15 0  0  0  0
 1717|   647k|            src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2);                          //b4 b5 b6 b7 b8 b9....b15 0  0  0  0
 1718|       |
 1719|   647k|            src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2);                  //a5 a6 a7 a8 a9....a15 0  0  0  0  0
 1720|   647k|            src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2);                  //b5 b6 b7 b8 b9....b15 0  0  0  0  0
 1721|       |
 1722|   647k|            src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);     //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12
 1723|   647k|            src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);     //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12
 1724|       |
 1725|   647k|            res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b);    //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
 1726|       |                                                                                     //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5
 1727|   647k|            res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b);    //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5
 1728|       |                                                                                     //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5
 1729|   647k|            src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_pred1);
 1730|       |
 1731|   647k|            res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b);
 1732|   647k|            res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b);
 1733|   647k|            res_r0_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r0_t3_8x16b);
 1734|   647k|            res_r1_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r1_t3_8x16b);
 1735|   647k|            res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b);
 1736|   647k|            res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b);
 1737|       |
 1738|   647k|            res_r0_t1_8x16b = _mm_srai_epi16(res_r0_t1_8x16b, 5);
 1739|   647k|            res_r1_t1_8x16b = _mm_srai_epi16(res_r1_t1_8x16b, 5);                    //shifting right by 5 bits
 1740|       |
 1741|   647k|            res_16x8b = _mm_packus_epi16(res_r0_t1_8x16b, res_r1_t1_8x16b);
 1742|   647k|            res_16x8b = _mm_avg_epu8(src_r0_16x8b, res_16x8b);                       //computing q-pel
 1743|       |
 1744|   647k|            _mm_storeu_si128((__m128i *)pu1_dst, res_16x8b);
 1745|       |
 1746|   647k|            ht--;
 1747|   647k|            pu1_src += src_strd;
 1748|   647k|            pu1_pred1 += src_strd;
 1749|   647k|            pu1_dst += dst_strd;
 1750|   647k|        }
 1751|   647k|        while(ht > 0);
  ------------------
  |  Branch (1751:15): [True: 597k, False: 49.4k]
  ------------------
 1752|  49.4k|    }
 1753|   105k|}
ih264_inter_pred_luma_vert_qpel_ssse3:
 1793|   105k|{
 1794|   105k|    WORD32 y_offset;
 1795|   105k|    UWORD8 *pu1_pred1;
 1796|       |
 1797|       |
 1798|   105k|    __m128i src_r0_16x8b, src_r1_16x8b, src_r2_16x8b, src_r3_16x8b, src_r4_16x8b;
 1799|   105k|    __m128i src_r5_16x8b, src_r6_16x8b;
 1800|   105k|    __m128i src_r0r1_16x8b, src_r2r3_16x8b, src_r4r5_16x8b;
 1801|   105k|    __m128i res_16x8b, res_t1_8x16b, res_t2_8x16b, res_t3_8x16b;
 1802|       |
 1803|   105k|    __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
 1804|   105k|    __m128i const_val16_8x16b;
 1805|       |
 1806|   105k|    UNUSED(pu1_tmp);
  ------------------
  |  |   45|   105k|#define UNUSED(x) ((void)(x))
  ------------------
 1807|   105k|    y_offset = dydx & 0xf;
 1808|       |
 1809|   105k|    coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
 1810|   105k|    coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
 1811|   105k|    coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5
 1812|       |                                                 //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20
 1813|       |
 1814|   105k|    pu1_pred1 = pu1_src + (y_offset >> 3) * src_strd;
 1815|       |
 1816|   105k|    const_val16_8x16b = _mm_set1_epi16(16);
 1817|       |
 1818|   105k|    pu1_src -= src_strd << 1; // the filter input starts from x[-2] (till x[3])
 1819|       |
 1820|   105k|    if(wd == 4)
  ------------------
  |  Branch (1820:8): [True: 7.61k, False: 97.4k]
  ------------------
 1821|  7.61k|    {
 1822|       |        //Epilogue: Load all the pred rows except sixth and seventh row
 1823|       |        //          for the first and second row processing.
 1824|  7.61k|        src_r0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
 1825|  7.61k|        pu1_src += src_strd;
 1826|  7.61k|        src_r1_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
 1827|  7.61k|        pu1_src += src_strd;
 1828|  7.61k|        src_r2_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
 1829|  7.61k|        pu1_src += src_strd;
 1830|  7.61k|        src_r3_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
 1831|  7.61k|        pu1_src += src_strd;
 1832|  7.61k|        src_r4_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
 1833|  7.61k|        pu1_src += src_strd;
 1834|       |
 1835|  7.61k|        src_r0_16x8b = _mm_unpacklo_epi32(src_r0_16x8b, src_r1_16x8b);
 1836|  7.61k|        src_r1_16x8b = _mm_unpacklo_epi32(src_r1_16x8b, src_r2_16x8b);
 1837|  7.61k|        src_r2_16x8b = _mm_unpacklo_epi32(src_r2_16x8b, src_r3_16x8b);
 1838|  7.61k|        src_r3_16x8b = _mm_unpacklo_epi32(src_r3_16x8b, src_r4_16x8b);
 1839|       |
 1840|  7.61k|        do
 1841|  26.3k|        {
 1842|  26.3k|            src_r5_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
 1843|  26.3k|            src_r6_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd));
 1844|       |
 1845|  26.3k|            src_r4_16x8b = _mm_unpacklo_epi32(src_r4_16x8b, src_r5_16x8b);
 1846|  26.3k|            src_r5_16x8b = _mm_unpacklo_epi32(src_r5_16x8b, src_r6_16x8b);
 1847|       |
 1848|  26.3k|            src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b);
 1849|  26.3k|            src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b);
 1850|  26.3k|            src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b);
 1851|       |
 1852|  26.3k|            res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
 1853|  26.3k|            res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
 1854|  26.3k|            res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
 1855|       |
 1856|  26.3k|            src_r0_16x8b = _mm_loadl_epi64((__m128i *)pu1_pred1);
 1857|  26.3k|            src_r1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred1 + src_strd));
 1858|       |
 1859|  26.3k|            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
 1860|  26.3k|            res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b);
 1861|  26.3k|            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
 1862|       |
 1863|  26.3k|            src_r0r1_16x8b = _mm_unpacklo_epi32(src_r0_16x8b,src_r1_16x8b);
 1864|       |
 1865|  26.3k|            res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
 1866|       |
 1867|  26.3k|            res_16x8b = _mm_packus_epi16(res_t1_8x16b, res_t1_8x16b);
 1868|       |
 1869|  26.3k|            res_16x8b = _mm_avg_epu8(src_r0r1_16x8b, res_16x8b); //computing q-pel
 1870|       |
 1871|  26.3k|            *((WORD32 *)(pu1_dst)) = _mm_cvtsi128_si32(res_16x8b);
 1872|  26.3k|            res_16x8b = _mm_srli_si128(res_16x8b, 4);
 1873|  26.3k|            *((WORD32 *)(pu1_dst + dst_strd)) = _mm_cvtsi128_si32(res_16x8b);
 1874|       |
 1875|  26.3k|            src_r0_16x8b = src_r2_16x8b;
 1876|  26.3k|            src_r1_16x8b = src_r3_16x8b;
 1877|  26.3k|            src_r2_16x8b = src_r4_16x8b;
 1878|  26.3k|            src_r3_16x8b = src_r5_16x8b;
 1879|  26.3k|            src_r4_16x8b = src_r6_16x8b;
 1880|       |
 1881|  26.3k|            ht -= 2;
 1882|  26.3k|            pu1_src += src_strd << 1;
 1883|  26.3k|            pu1_pred1 += src_strd << 1;
 1884|  26.3k|            pu1_dst += dst_strd << 1;
 1885|  26.3k|        }
 1886|  26.3k|        while(ht > 0);
  ------------------
  |  Branch (1886:15): [True: 18.7k, False: 7.61k]
  ------------------
 1887|  7.61k|    }
 1888|       |
 1889|  97.4k|    else if(wd == 8)
  ------------------
  |  Branch (1889:13): [True: 52.6k, False: 44.8k]
  ------------------
 1890|  52.6k|    {
 1891|       |        //Epilogue: Load all the pred rows except sixth and seventh row
 1892|       |        //          for the first and second row processing.
 1893|  52.6k|        src_r0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
 1894|  52.6k|        pu1_src += src_strd;
 1895|  52.6k|        src_r1_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
 1896|  52.6k|        pu1_src += src_strd;
 1897|  52.6k|        src_r2_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
 1898|  52.6k|        pu1_src += src_strd;
 1899|  52.6k|        src_r3_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
 1900|  52.6k|        pu1_src += src_strd;
 1901|  52.6k|        src_r4_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
 1902|  52.6k|        pu1_src += src_strd;
 1903|       |
 1904|  52.6k|        src_r0_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b);
 1905|  52.6k|        src_r1_16x8b = _mm_unpacklo_epi64(src_r1_16x8b, src_r2_16x8b);
 1906|  52.6k|        src_r2_16x8b = _mm_unpacklo_epi64(src_r2_16x8b, src_r3_16x8b);
 1907|  52.6k|        src_r3_16x8b = _mm_unpacklo_epi64(src_r3_16x8b, src_r4_16x8b);
 1908|       |
 1909|  52.6k|        do
 1910|   255k|        {
 1911|   255k|            src_r5_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
 1912|   255k|            src_r6_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd));
 1913|       |
 1914|   255k|            src_r4_16x8b = _mm_unpacklo_epi64(src_r4_16x8b, src_r5_16x8b);
 1915|   255k|            src_r5_16x8b = _mm_unpacklo_epi64(src_r5_16x8b, src_r6_16x8b);
 1916|       |
 1917|   255k|            src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b);
 1918|   255k|            src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b);
 1919|   255k|            src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b);
 1920|       |
 1921|   255k|            res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
 1922|   255k|            res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
 1923|   255k|            res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
 1924|       |
 1925|   255k|            src_r0r1_16x8b = _mm_loadl_epi64((__m128i *)pu1_pred1);
 1926|       |
 1927|   255k|            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
 1928|   255k|            res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b);
 1929|   255k|            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
 1930|       |
 1931|   255k|            res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
 1932|       |
 1933|   255k|            res_16x8b = _mm_packus_epi16(res_t1_8x16b, res_t1_8x16b);
 1934|   255k|            res_16x8b = _mm_avg_epu8(src_r0r1_16x8b, res_16x8b); //computing q-pel
 1935|       |
 1936|   255k|            _mm_storel_epi64((__m128i *)pu1_dst, res_16x8b);
 1937|       |
 1938|   255k|            src_r0r1_16x8b = _mm_unpackhi_epi8(src_r0_16x8b, src_r1_16x8b);
 1939|   255k|            src_r2r3_16x8b = _mm_unpackhi_epi8(src_r2_16x8b, src_r3_16x8b);
 1940|   255k|            src_r4r5_16x8b = _mm_unpackhi_epi8(src_r4_16x8b, src_r5_16x8b);
 1941|       |
 1942|   255k|            res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
 1943|   255k|            res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
 1944|   255k|            res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
 1945|       |
 1946|   255k|            src_r0r1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred1 + src_strd));
 1947|       |
 1948|   255k|            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
 1949|   255k|            res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b);
 1950|   255k|            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
 1951|       |
 1952|   255k|            res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
 1953|       |
 1954|   255k|            res_16x8b = _mm_packus_epi16(res_t1_8x16b, res_t1_8x16b);
 1955|   255k|            res_16x8b = _mm_avg_epu8(src_r0r1_16x8b, res_16x8b); //computing q-pel
 1956|       |
 1957|   255k|            _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), res_16x8b);
 1958|       |
 1959|   255k|            src_r0_16x8b = src_r2_16x8b;
 1960|   255k|            src_r1_16x8b = src_r3_16x8b;
 1961|   255k|            src_r2_16x8b = src_r4_16x8b;
 1962|   255k|            src_r3_16x8b = src_r5_16x8b;
 1963|   255k|            src_r4_16x8b = src_r6_16x8b;
 1964|       |
 1965|   255k|            ht -= 2;
 1966|   255k|            pu1_src += src_strd << 1;
 1967|   255k|            pu1_pred1 += src_strd << 1;
 1968|   255k|            pu1_dst += dst_strd << 1;
 1969|   255k|        }
 1970|   255k|        while(ht > 0);
  ------------------
  |  Branch (1970:15): [True: 202k, False: 52.6k]
  ------------------
 1971|  52.6k|    }
 1972|  44.8k|    else // wd == 16
 1973|  44.8k|    {
 1974|  44.8k|        __m128i res_t0_8x16b;
 1975|       |
 1976|       |        //Epilogue: Load all the pred rows except sixth and seventh row
 1977|       |        //          for the first and second row processing.
 1978|  44.8k|        src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
 1979|  44.8k|        pu1_src += src_strd;
 1980|  44.8k|        src_r1_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
 1981|  44.8k|        pu1_src += src_strd;
 1982|  44.8k|        src_r2_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
 1983|  44.8k|        pu1_src += src_strd;
 1984|  44.8k|        src_r3_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
 1985|  44.8k|        pu1_src += src_strd;
 1986|  44.8k|        src_r4_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
 1987|  44.8k|        pu1_src += src_strd;
 1988|       |
 1989|  44.8k|        do
 1990|   320k|        {
 1991|   320k|            src_r5_16x8b  = _mm_loadu_si128((__m128i *)pu1_src);
 1992|   320k|            src_r6_16x8b  = _mm_loadu_si128((__m128i *)(pu1_src + src_strd));
 1993|       |
 1994|   320k|            src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b);
 1995|   320k|            src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b);
 1996|   320k|            src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b);
 1997|       |
 1998|   320k|            res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
 1999|   320k|            res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
 2000|   320k|            res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
 2001|       |
 2002|   320k|            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
 2003|   320k|            res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b);
 2004|   320k|            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
 2005|       |
 2006|   320k|            res_t0_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
 2007|       |
 2008|   320k|            src_r0r1_16x8b = _mm_unpackhi_epi8(src_r0_16x8b, src_r1_16x8b);
 2009|   320k|            src_r2r3_16x8b = _mm_unpackhi_epi8(src_r2_16x8b, src_r3_16x8b);
 2010|   320k|            src_r4r5_16x8b = _mm_unpackhi_epi8(src_r4_16x8b, src_r5_16x8b);
 2011|       |
 2012|   320k|            res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
 2013|   320k|            res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
 2014|   320k|            res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
 2015|       |
 2016|   320k|            src_r0r1_16x8b = _mm_loadu_si128((__m128i *)pu1_pred1);
 2017|       |
 2018|   320k|            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
 2019|   320k|            res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b);
 2020|   320k|            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
 2021|       |
 2022|   320k|            res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
 2023|       |
 2024|   320k|            res_16x8b = _mm_packus_epi16(res_t0_8x16b, res_t1_8x16b);
 2025|   320k|            res_16x8b = _mm_avg_epu8(src_r0r1_16x8b, res_16x8b); //computing q-pel
 2026|       |
 2027|   320k|            _mm_storeu_si128((__m128i *)pu1_dst, res_16x8b);
 2028|       |
 2029|   320k|            src_r0r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r2_16x8b);
 2030|   320k|            src_r2r3_16x8b = _mm_unpacklo_epi8(src_r3_16x8b, src_r4_16x8b);
 2031|   320k|            src_r4r5_16x8b = _mm_unpacklo_epi8(src_r5_16x8b, src_r6_16x8b);
 2032|       |
 2033|   320k|            res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
 2034|   320k|            res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
 2035|   320k|            res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
 2036|       |
 2037|   320k|            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
 2038|   320k|            res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b);
 2039|   320k|            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
 2040|       |
 2041|   320k|            res_t0_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
 2042|       |
 2043|   320k|            src_r0r1_16x8b = _mm_unpackhi_epi8(src_r1_16x8b, src_r2_16x8b);
 2044|   320k|            src_r2r3_16x8b = _mm_unpackhi_epi8(src_r3_16x8b, src_r4_16x8b);
 2045|   320k|            src_r4r5_16x8b = _mm_unpackhi_epi8(src_r5_16x8b, src_r6_16x8b);
 2046|       |
 2047|   320k|            res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
 2048|   320k|            res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
 2049|   320k|            res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
 2050|       |
 2051|   320k|            src_r0r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred1 + src_strd));
 2052|       |
 2053|   320k|            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
 2054|   320k|            res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b);
 2055|   320k|            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
 2056|       |
 2057|   320k|            res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
 2058|       |
 2059|   320k|            res_16x8b = _mm_packus_epi16(res_t0_8x16b, res_t1_8x16b);
 2060|   320k|            res_16x8b = _mm_avg_epu8(src_r0r1_16x8b, res_16x8b); //computing q-pel
 2061|       |
 2062|   320k|            _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res_16x8b);
 2063|       |
 2064|   320k|            src_r0_16x8b = src_r2_16x8b;
 2065|   320k|            src_r1_16x8b = src_r3_16x8b;
 2066|   320k|            src_r2_16x8b = src_r4_16x8b;
 2067|   320k|            src_r3_16x8b = src_r5_16x8b;
 2068|   320k|            src_r4_16x8b = src_r6_16x8b;
 2069|       |
 2070|   320k|            ht -= 2;
 2071|   320k|            pu1_src += src_strd << 1;
 2072|   320k|            pu1_pred1 += src_strd << 1;
 2073|   320k|            pu1_dst += dst_strd << 1;
 2074|   320k|        }
 2075|   320k|        while(ht > 0);
  ------------------
  |  Branch (2075:15): [True: 275k, False: 44.8k]
  ------------------
 2076|  44.8k|    }
 2077|   105k|}
ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3:
 2118|  92.5k|{
 2119|  92.5k|    WORD32 ht_temp;
 2120|  92.5k|    UWORD8 *pu1_pred_vert,*pu1_pred_horiz;
 2121|  92.5k|    UWORD8 *pu1_tmp1, *pu1_tmp2;
 2122|  92.5k|    WORD32 x_offset, y_offset;
 2123|       |
 2124|  92.5k|    __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
 2125|  92.5k|    __m128i const_val16_8x16b;
 2126|       |
 2127|  92.5k|    pu1_tmp1 = pu1_tmp;
 2128|       |
 2129|  92.5k|    dydx &= 0xf;
 2130|  92.5k|    ht_temp = ht;
 2131|  92.5k|    x_offset = dydx & 0x3;
 2132|  92.5k|    y_offset = dydx >> 2;
 2133|  92.5k|    pu1_tmp2 = pu1_tmp1;
 2134|       |
 2135|  92.5k|    pu1_pred_vert  = pu1_src + (x_offset >> 1) - 2*src_strd;
 2136|  92.5k|    pu1_pred_horiz = pu1_src + (y_offset >> 1) * src_strd - 2;
 2137|       |    //the filter input starts from x[-2] (till x[3])
 2138|       |
 2139|  92.5k|    coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01);  //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
 2140|  92.5k|    coeff2_3_16x8b = _mm_set1_epi32(0x14141414);  //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
 2141|  92.5k|    coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB);  //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5
 2142|       |                                                  //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20
 2143|  92.5k|    const_val16_8x16b = _mm_set1_epi16(16);
 2144|       |
 2145|  92.5k|    if(wd == 4)
  ------------------
  |  Branch (2145:8): [True: 7.59k, False: 84.9k]
  ------------------
 2146|  7.59k|    {
 2147|       |        //vertical q-pel filter
 2148|  7.59k|        {
 2149|  7.59k|            __m128i src_r0_16x8b, src_r1_16x8b, src_r2_16x8b, src_r3_16x8b, src_r4_16x8b;
 2150|  7.59k|            __m128i src_r5_16x8b, src_r6_16x8b;
 2151|  7.59k|            __m128i src_r0r1_16x8b, src_r2r3_16x8b, src_r4r5_16x8b;
 2152|       |
 2153|  7.59k|            __m128i res_r0r1_16x8b, res_t1_8x16b, res_t2_8x16b, res_t3_8x16b;
 2154|       |
 2155|       |            //epilogue: Load all the pred rows except sixth  and seventh row for the
 2156|       |            //first and second row processing.
 2157|  7.59k|            src_r0_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert));
 2158|  7.59k|            pu1_pred_vert = pu1_pred_vert + src_strd;
 2159|       |
 2160|  7.59k|            src_r1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert));
 2161|  7.59k|            pu1_pred_vert = pu1_pred_vert + src_strd;
 2162|  7.59k|            src_r0_16x8b = _mm_unpacklo_epi32(src_r0_16x8b, src_r1_16x8b);
 2163|       |
 2164|  7.59k|            src_r2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert));
 2165|  7.59k|            pu1_pred_vert = pu1_pred_vert + src_strd;
 2166|  7.59k|            src_r1_16x8b = _mm_unpacklo_epi32(src_r1_16x8b, src_r2_16x8b);
 2167|       |
 2168|  7.59k|            src_r3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert));
 2169|  7.59k|            pu1_pred_vert = pu1_pred_vert + src_strd;
 2170|  7.59k|            src_r2_16x8b = _mm_unpacklo_epi32(src_r2_16x8b, src_r3_16x8b);
 2171|       |
 2172|  7.59k|            src_r4_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert));
 2173|  7.59k|            pu1_pred_vert = pu1_pred_vert + src_strd;
 2174|  7.59k|            src_r3_16x8b = _mm_unpacklo_epi32(src_r3_16x8b, src_r4_16x8b);
 2175|       |
 2176|       |            //Core Loop: Process all the rows.
 2177|  7.59k|            do
 2178|  25.1k|            {
 2179|  25.1k|                src_r5_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert));
 2180|  25.1k|                src_r4_16x8b = _mm_unpacklo_epi32(src_r4_16x8b, src_r5_16x8b);
 2181|       |
 2182|  25.1k|                src_r6_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert + src_strd));
 2183|  25.1k|                src_r5_16x8b = _mm_unpacklo_epi32(src_r5_16x8b, src_r6_16x8b);
 2184|       |
 2185|  25.1k|                src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b);
 2186|  25.1k|                src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b);
 2187|  25.1k|                src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b);
 2188|       |
 2189|  25.1k|                res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
 2190|  25.1k|                res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
 2191|  25.1k|                res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
 2192|       |
 2193|  25.1k|                res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
 2194|  25.1k|                res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b);
 2195|  25.1k|                res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
 2196|       |
 2197|  25.1k|                res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
 2198|  25.1k|                res_r0r1_16x8b = _mm_packus_epi16(res_t1_8x16b, res_t1_8x16b);
 2199|       |
 2200|  25.1k|                _mm_storel_epi64((__m128i *)pu1_tmp1, res_r0r1_16x8b);
 2201|       |
 2202|  25.1k|                src_r0_16x8b = src_r2_16x8b;
 2203|  25.1k|                src_r1_16x8b = src_r3_16x8b;
 2204|  25.1k|                src_r2_16x8b = src_r4_16x8b;
 2205|  25.1k|                src_r3_16x8b = src_r5_16x8b;
 2206|  25.1k|                src_r4_16x8b = src_r6_16x8b;
 2207|       |
 2208|  25.1k|                ht_temp -= 2;
 2209|  25.1k|                pu1_pred_vert += src_strd << 1;
 2210|  25.1k|                pu1_tmp1 += 8;
 2211|  25.1k|            }
 2212|  25.1k|            while(ht_temp > 0);
  ------------------
  |  Branch (2212:19): [True: 17.5k, False: 7.59k]
  ------------------
 2213|  7.59k|        }
 2214|       |
 2215|       |        //horizontal q-pel filter
 2216|  7.59k|        {
 2217|  7.59k|            __m128i src_r0_16x8b, src_r1_16x8b;
 2218|  7.59k|            __m128i src_r0_sht_16x8b, src_r1_sht_16x8b;
 2219|  7.59k|            __m128i src_r0r1_vpel_16x8b, src_r0r1_t1_16x8b;
 2220|       |
 2221|  7.59k|            __m128i res_r0r1_t1_8x16b, res_r0r1_t2_8x16b, res_r0r1_t3_8x16b;
 2222|  7.59k|            __m128i res_r0r1_16x8b;
 2223|       |
 2224|       |            //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
 2225|       |            //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
 2226|       |
 2227|  7.59k|            do
 2228|  25.1k|            {
 2229|  25.1k|                src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_pred_horiz);                  //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
 2230|  25.1k|                src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_horiz + src_strd));     //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
 2231|       |
 2232|  25.1k|                src_r0r1_vpel_16x8b = _mm_loadl_epi64((__m128i *)pu1_tmp2);
 2233|       |
 2234|  25.1k|                src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1);                          //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
 2235|  25.1k|                src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1);                          //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
 2236|       |
 2237|  25.1k|                src_r0_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);            //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
 2238|  25.1k|                src_r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);            //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
 2239|       |
 2240|  25.1k|                src_r0r1_t1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b);          //a0 a1 a1 a2 a2 a3 a3 a4 b0 b1 b1 b2 b2 b3 b3 b4
 2241|  25.1k|                res_r0r1_t1_8x16b = _mm_maddubs_epi16(src_r0r1_t1_16x8b, coeff0_1_16x8b);    //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
 2242|       |                                                                                             //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
 2243|       |
 2244|  25.1k|                src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4);                              //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8  0  0  0  0
 2245|  25.1k|                src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4);                              //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8  0  0  0  0
 2246|       |
 2247|  25.1k|                src_r0r1_t1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b);          //a2 a3 a3 a4 a4 a5 a5 a6 b2 b3 b3 b4 b4 b5 b5 b6
 2248|  25.1k|                res_r0r1_t2_8x16b = _mm_maddubs_epi16(src_r0r1_t1_16x8b, coeff2_3_16x8b);    //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
 2249|       |                                                                                             //b2*c2+b3*c3 b3*c2+b4*c3 b4*c2+b5*c3 b5*c2+b6*c3
 2250|       |
 2251|  25.1k|                src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4);                              //a4 a5 a5 a6 a6 a7 a7 a8  0  0  0  0  0  0  0  0
 2252|  25.1k|                src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4);                              //b4 b5 b5 b6 b6 b7 b7 b8  0  0  0  0  0  0  0  0
 2253|       |
 2254|  25.1k|                src_r0r1_t1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b);          //a4 a5 a5 a6 a6 a7 a7 a8 b4 b5 b5 b6 b6 b7 b7 b8
 2255|  25.1k|                res_r0r1_t3_8x16b = _mm_maddubs_epi16(src_r0r1_t1_16x8b, coeff4_5_16x8b);    //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
 2256|       |                                                                                             //b4*c4+b5*c5 b5*c4+b6*c5 b4*c6+b7*c5 b7*c4+b8*c5
 2257|       |
 2258|  25.1k|                res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t2_8x16b);
 2259|  25.1k|                res_r0r1_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r0r1_t3_8x16b);
 2260|  25.1k|                res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t3_8x16b);     //a0*c0+a1*c1+a2*c2+a3*c3+a4*a4+a5*c5 + 15;
 2261|       |                                                                                             //a1*c0+a2*c1+a2*c2+a3*c3+a5*a4+a6*c5 + 15;
 2262|       |                                                                                             //a2*c0+a3*c1+a4*c2+a5*c3+a6*a4+a7*c5 + 15;
 2263|       |                                                                                             //a3*c0+a4*c1+a5*c2+a6*c3+a6*a4+a8*c5 + 15;
 2264|       |                                                                                             //b0*c0+b1*c1+b2*c2+b3*c3+b4*b4+b5*c5 + 15;
 2265|       |                                                                                             //b1*c0+b2*c1+b2*c2+b3*c3+b5*b4+b6*c5 + 15;
 2266|       |                                                                                             //b2*c0+b3*c1+b4*c2+b5*c3+b6*b4+b7*c5 + 15;
 2267|       |                                                                                             //b3*c0+b4*c1+b5*c2+b6*c3+b6*b4+b8*c5 + 15;
 2268|       |
 2269|  25.1k|                res_r0r1_t1_8x16b = _mm_srai_epi16(res_r0r1_t1_8x16b, 5);                    //shifting right by 5 bits.
 2270|       |
 2271|  25.1k|                res_r0r1_16x8b = _mm_packus_epi16(res_r0r1_t1_8x16b,res_r0r1_t1_8x16b);
 2272|       |
 2273|  25.1k|                res_r0r1_16x8b = _mm_avg_epu8(res_r0r1_16x8b,src_r0r1_vpel_16x8b);
 2274|       |
 2275|  25.1k|                *((WORD32 *)(pu1_dst)) = _mm_cvtsi128_si32(res_r0r1_16x8b);
 2276|  25.1k|                res_r0r1_16x8b = _mm_srli_si128(res_r0r1_16x8b, 4);
 2277|  25.1k|                *((WORD32 *)(pu1_dst + dst_strd)) = _mm_cvtsi128_si32(res_r0r1_16x8b);
 2278|       |
 2279|  25.1k|                ht -= 2;
 2280|  25.1k|                pu1_pred_horiz += src_strd << 1;
 2281|  25.1k|                pu1_tmp2 += 8;
 2282|  25.1k|                pu1_dst += dst_strd << 1;
 2283|  25.1k|            }
 2284|  25.1k|            while(ht > 0);
  ------------------
  |  Branch (2284:19): [True: 17.5k, False: 7.59k]
  ------------------
 2285|  7.59k|        }
 2286|  7.59k|    }
 2287|  84.9k|    else if(wd == 8)
  ------------------
  |  Branch (2287:13): [True: 45.3k, False: 39.5k]
  ------------------
 2288|  45.3k|    {
 2289|       |        //vertical q-pel filter
 2290|  45.3k|        {
 2291|  45.3k|            __m128i src_r0_16x8b, src_r1_16x8b, src_r2_16x8b, src_r3_16x8b;
 2292|  45.3k|            __m128i src_r4_16x8b, src_r5_16x8b, src_r6_16x8b;
 2293|  45.3k|            __m128i src_r0r1_16x8b, src_r2r3_16x8b, src_r4r5_16x8b;
 2294|       |
 2295|  45.3k|            __m128i res_16x8b, res_t1_8x16b, res_t2_8x16b, res_t3_8x16b;
 2296|       |
 2297|       |            //epilogue: Load all the pred rows except sixth  and seventh row for the
 2298|       |            //first and second row processing.
 2299|  45.3k|            src_r0_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert));
 2300|  45.3k|            pu1_pred_vert = pu1_pred_vert + src_strd;
 2301|       |
 2302|  45.3k|            src_r1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert));
 2303|  45.3k|            pu1_pred_vert = pu1_pred_vert + src_strd;
 2304|  45.3k|            src_r0_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b);
 2305|       |
 2306|  45.3k|            src_r2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert));
 2307|  45.3k|            pu1_pred_vert = pu1_pred_vert + src_strd;
 2308|  45.3k|            src_r1_16x8b = _mm_unpacklo_epi64(src_r1_16x8b, src_r2_16x8b);
 2309|       |
 2310|  45.3k|            src_r3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert));
 2311|  45.3k|            pu1_pred_vert = pu1_pred_vert + src_strd;
 2312|  45.3k|            src_r2_16x8b = _mm_unpacklo_epi64(src_r2_16x8b, src_r3_16x8b);
 2313|       |
 2314|  45.3k|            src_r4_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert));
 2315|  45.3k|            pu1_pred_vert = pu1_pred_vert + src_strd;
 2316|  45.3k|            src_r3_16x8b = _mm_unpacklo_epi64(src_r3_16x8b, src_r4_16x8b);
 2317|       |
 2318|       |            //Core Loop: Process all the rows.
 2319|  45.3k|            do
 2320|   236k|            {
 2321|   236k|                src_r5_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert));
 2322|   236k|                src_r4_16x8b = _mm_unpacklo_epi64(src_r4_16x8b, src_r5_16x8b);
 2323|       |
 2324|   236k|                src_r6_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert + src_strd));
 2325|   236k|                src_r5_16x8b = _mm_unpacklo_epi64(src_r5_16x8b, src_r6_16x8b);
 2326|       |
 2327|   236k|                src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b);
 2328|   236k|                src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b);
 2329|   236k|                src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b);
 2330|       |
 2331|   236k|                res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
 2332|   236k|                res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
 2333|   236k|                res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
 2334|       |
 2335|   236k|                res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
 2336|   236k|                res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b);
 2337|   236k|                res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
 2338|       |
 2339|   236k|                res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
 2340|   236k|                res_16x8b = _mm_packus_epi16(res_t1_8x16b, res_t1_8x16b);
 2341|       |
 2342|   236k|                _mm_storel_epi64((__m128i *)(pu1_tmp1), res_16x8b);
 2343|       |
 2344|   236k|                src_r0r1_16x8b = _mm_unpackhi_epi8(src_r0_16x8b, src_r1_16x8b);
 2345|   236k|                src_r2r3_16x8b = _mm_unpackhi_epi8(src_r2_16x8b, src_r3_16x8b);
 2346|   236k|                src_r4r5_16x8b = _mm_unpackhi_epi8(src_r4_16x8b, src_r5_16x8b);
 2347|       |
 2348|   236k|                res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
 2349|   236k|                res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
 2350|   236k|                res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
 2351|       |
 2352|   236k|                res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
 2353|   236k|                res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b);
 2354|   236k|                res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
 2355|       |
 2356|   236k|                res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
 2357|   236k|                res_16x8b = _mm_packus_epi16(res_t1_8x16b, res_t1_8x16b);
 2358|       |
 2359|   236k|                _mm_storel_epi64((__m128i *)(pu1_tmp1 + 8), res_16x8b);
 2360|       |
 2361|   236k|                src_r0_16x8b = src_r2_16x8b;
 2362|   236k|                src_r1_16x8b = src_r3_16x8b;
 2363|   236k|                src_r2_16x8b = src_r4_16x8b;
 2364|   236k|                src_r3_16x8b = src_r5_16x8b;
 2365|   236k|                src_r4_16x8b = src_r6_16x8b;
 2366|       |
 2367|   236k|                ht_temp -= 2;
 2368|   236k|                pu1_pred_vert += src_strd << 1;
 2369|   236k|                pu1_tmp1 += 16;
 2370|   236k|            }
 2371|   236k|            while(ht_temp > 0);
  ------------------
  |  Branch (2371:19): [True: 191k, False: 45.3k]
  ------------------
 2372|  45.3k|        }
 2373|       |
 2374|       |        //horizontal q-pel filter
 2375|  45.3k|        {
 2376|  45.3k|            __m128i src_r0_16x8b, src_r1_16x8b, src_r0_sht_16x8b, src_r1_sht_16x8b;
 2377|  45.3k|            __m128i src_r0_t1_16x8b, src_r1_t1_16x8b;
 2378|  45.3k|            __m128i src_r0_vpel_16x8b, src_r1_vpel_16x8b;
 2379|       |
 2380|  45.3k|            __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b;
 2381|  45.3k|            __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b, res_16x8b;
 2382|       |
 2383|       |            //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
 2384|       |            //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
 2385|       |
 2386|  45.3k|            do
 2387|   236k|            {
 2388|   236k|                src_r0_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_horiz));               //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
 2389|   236k|                src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_horiz + src_strd));    //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
 2390|       |
 2391|   236k|                src_r0_vpel_16x8b = _mm_loadl_epi64((__m128i *)(pu1_tmp2));                //a2 a3 a4 a5 a6 a7 a8....a15 0 or
 2392|       |                                                                                           //a3 a4 a5 a6 a7 a8 a9....a15 0
 2393|   236k|                src_r1_vpel_16x8b = _mm_loadl_epi64((__m128i *)(pu1_tmp2 + 8));
 2394|       |                                                                                           //b2 b3 b4 b5 b6 b7 b8....b15 0 or
 2395|       |                                                                                           //b3 b4 b5 b6 b7 b8 b9....b15 0
 2396|       |
 2397|   236k|                src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1);                        //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
 2398|   236k|                src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1);                        //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
 2399|       |
 2400|   236k|                src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);       //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
 2401|   236k|                src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);       //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
 2402|       |
 2403|   236k|                res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b);      //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
 2404|       |                                                                                           //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1
 2405|   236k|                res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b);      //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
 2406|       |                                                                                           //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1
 2407|       |
 2408|   236k|                src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2);                            //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0
 2409|   236k|                src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2);                            //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0
 2410|       |
 2411|   236k|                src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2);                    //a3 a4 a5 a6 a7 a8 a9....a15 0  0  0
 2412|   236k|                src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2);                    //b3 b4 b5 b6 b7 b8 b9....b15 0  0  0
 2413|       |
 2414|   236k|                src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);       //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10
 2415|   236k|                src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);       //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10
 2416|       |
 2417|   236k|                res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b);      //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
 2418|       |                                                                                           //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3
 2419|   236k|                res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b);      //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3
 2420|       |                                                                                           //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3
 2421|       |
 2422|   236k|                src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2);                            //a4 a5 a6 a7 a8 a9....a15 0  0  0  0
 2423|   236k|                src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2);                            //b4 b5 b6 b7 b8 b9....b15 0  0  0  0
 2424|       |
 2425|   236k|                src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2);                    //a5 a6 a7 a8 a9....a15 0  0  0  0  0
 2426|   236k|                src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2);                    //b5 b6 b7 b8 b9....b15 0  0  0  0  0
 2427|       |
 2428|   236k|                src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);       //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12
 2429|   236k|                src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);       //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12
 2430|       |
 2431|   236k|                res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b);      //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
 2432|       |                                                                                           //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5
 2433|   236k|                res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b);      //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5
 2434|       |                                                                                           //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5
 2435|   236k|                res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b);
 2436|   236k|                res_r0_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r0_t3_8x16b);
 2437|   236k|                res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b);
 2438|   236k|                res_r0_t1_8x16b = _mm_srai_epi16(res_r0_t1_8x16b, 5);                      //shifting right by 5 bits.
 2439|       |
 2440|   236k|                res_16x8b = _mm_packus_epi16(res_r0_t1_8x16b, res_r0_t1_8x16b);
 2441|   236k|                res_16x8b = _mm_avg_epu8(res_16x8b, src_r0_vpel_16x8b);
 2442|       |
 2443|   236k|                _mm_storel_epi64((__m128i *)(pu1_dst), res_16x8b);
 2444|       |
 2445|   236k|                res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b);
 2446|   236k|                res_r1_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r1_t3_8x16b);
 2447|   236k|                res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b);
 2448|   236k|                res_r1_t1_8x16b = _mm_srai_epi16(res_r1_t1_8x16b, 5);                      //shifting right by 5 bits.
 2449|       |
 2450|   236k|                res_16x8b = _mm_packus_epi16(res_r1_t1_8x16b, res_r1_t1_8x16b);
 2451|   236k|                res_16x8b = _mm_avg_epu8(res_16x8b,src_r1_vpel_16x8b);
 2452|       |
 2453|   236k|                _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), res_16x8b);
 2454|       |
 2455|   236k|                ht -= 2;
 2456|   236k|                pu1_pred_horiz += src_strd << 1;
 2457|   236k|                pu1_dst += dst_strd << 1;
 2458|   236k|                pu1_tmp2 += 16;
 2459|   236k|            }
 2460|   236k|            while(ht > 0);
  ------------------
  |  Branch (2460:19): [True: 191k, False: 45.3k]
  ------------------
 2461|  45.3k|        }
 2462|  45.3k|    }
 2463|  39.5k|    else // wd == 16
 2464|  39.5k|    {
 2465|       |        //vertical q-pel filter
 2466|  39.5k|        {
 2467|  39.5k|            __m128i src_r0_16x8b, src_r1_16x8b, src_r2_16x8b, src_r3_16x8b;
 2468|  39.5k|            __m128i src_r4_16x8b, src_r5_16x8b, src_r6_16x8b;
 2469|  39.5k|            __m128i src_r0r1_16x8b, src_r2r3_16x8b, src_r4r5_16x8b;
 2470|       |
 2471|  39.5k|            __m128i res_t0_8x16b, res_t1_8x16b, res_t2_8x16b, res_t3_8x16b;
 2472|  39.5k|            __m128i res_16x8b;
 2473|       |
 2474|       |            //epilogue: Load all the pred rows except sixth  and seventh row for the
 2475|       |            //first and second row processing.
 2476|  39.5k|            src_r0_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_vert));
 2477|  39.5k|            pu1_pred_vert =  pu1_pred_vert + src_strd;
 2478|  39.5k|            src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_vert));
 2479|  39.5k|            pu1_pred_vert =  pu1_pred_vert + src_strd;
 2480|  39.5k|            src_r2_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_vert));
 2481|  39.5k|            pu1_pred_vert =  pu1_pred_vert + src_strd;
 2482|  39.5k|            src_r3_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_vert));
 2483|  39.5k|            pu1_pred_vert =  pu1_pred_vert + src_strd;
 2484|  39.5k|            src_r4_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_vert));
 2485|  39.5k|            pu1_pred_vert =  pu1_pred_vert + src_strd;
 2486|       |
 2487|       |            //Core Loop: Process all the rows.
 2488|  39.5k|            do
 2489|   239k|            {
 2490|   239k|                src_r5_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_vert));
 2491|   239k|                src_r6_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_vert + src_strd));
 2492|       |
 2493|   239k|                src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b);
 2494|   239k|                src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b);
 2495|   239k|                src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b);
 2496|       |
 2497|   239k|                res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
 2498|   239k|                res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
 2499|   239k|                res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
 2500|       |
 2501|   239k|                res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
 2502|   239k|                res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b);
 2503|   239k|                res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
 2504|   239k|                res_t0_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
 2505|       |
 2506|   239k|                src_r0r1_16x8b = _mm_unpackhi_epi8(src_r0_16x8b, src_r1_16x8b);
 2507|   239k|                src_r2r3_16x8b = _mm_unpackhi_epi8(src_r2_16x8b, src_r3_16x8b);
 2508|   239k|                src_r4r5_16x8b = _mm_unpackhi_epi8(src_r4_16x8b, src_r5_16x8b);
 2509|       |
 2510|   239k|                res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
 2511|   239k|                res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
 2512|   239k|                res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
 2513|       |
 2514|   239k|                res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
 2515|   239k|                res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b);
 2516|   239k|                res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
 2517|   239k|                res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
 2518|       |
 2519|   239k|                res_16x8b = _mm_packus_epi16(res_t0_8x16b, res_t1_8x16b);
 2520|       |
 2521|   239k|                _mm_storeu_si128((__m128i *)(pu1_tmp1), res_16x8b);
 2522|       |
 2523|   239k|                src_r0r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r2_16x8b);
 2524|   239k|                src_r2r3_16x8b = _mm_unpacklo_epi8(src_r3_16x8b, src_r4_16x8b);
 2525|   239k|                src_r4r5_16x8b = _mm_unpacklo_epi8(src_r5_16x8b, src_r6_16x8b);
 2526|       |
 2527|   239k|                res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
 2528|   239k|                res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
 2529|   239k|                res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
 2530|       |
 2531|   239k|                res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
 2532|   239k|                res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b);
 2533|   239k|                res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
 2534|   239k|                res_t0_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
 2535|       |
 2536|   239k|                src_r0r1_16x8b = _mm_unpackhi_epi8(src_r1_16x8b, src_r2_16x8b);
 2537|   239k|                src_r2r3_16x8b = _mm_unpackhi_epi8(src_r3_16x8b, src_r4_16x8b);
 2538|   239k|                src_r4r5_16x8b = _mm_unpackhi_epi8(src_r5_16x8b, src_r6_16x8b);
 2539|       |
 2540|   239k|                res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
 2541|   239k|                res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
 2542|   239k|                res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
 2543|       |
 2544|   239k|                res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
 2545|   239k|                res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b);
 2546|   239k|                res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
 2547|   239k|                res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
 2548|       |
 2549|   239k|                res_16x8b = _mm_packus_epi16(res_t0_8x16b, res_t1_8x16b);
 2550|       |
 2551|   239k|                _mm_storeu_si128((__m128i *)(pu1_tmp1 + 16), res_16x8b);
 2552|       |
 2553|   239k|                src_r0_16x8b = src_r2_16x8b;
 2554|   239k|                src_r1_16x8b = src_r3_16x8b;
 2555|   239k|                src_r2_16x8b = src_r4_16x8b;
 2556|   239k|                src_r3_16x8b = src_r5_16x8b;
 2557|   239k|                src_r4_16x8b = src_r6_16x8b;
 2558|       |
 2559|   239k|                ht_temp -= 2;
 2560|   239k|                pu1_pred_vert += src_strd << 1;
 2561|   239k|                pu1_tmp1 += 32;
 2562|   239k|            }
 2563|   239k|            while(ht_temp > 0);
  ------------------
  |  Branch (2563:19): [True: 200k, False: 39.5k]
  ------------------
 2564|  39.5k|        }
 2565|       |        //horizontal q-pel filter
 2566|  39.5k|        {
 2567|  39.5k|            __m128i src_r0_16x8b, src_r1_16x8b, src_r0_sht_16x8b, src_r1_sht_16x8b;
 2568|  39.5k|            __m128i src_r0_t1_16x8b, src_r1_t1_16x8b;
 2569|  39.5k|            __m128i src_vpel_16x8b;
 2570|       |
 2571|  39.5k|            __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b;
 2572|  39.5k|            __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b;
 2573|  39.5k|            __m128i res_16x8b;
 2574|       |
 2575|       |            //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
 2576|       |            //Row0 :                         b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
 2577|       |            //b0 is same a8. Similarly other bn pixels are same as a(n+8) pixels.
 2578|       |
 2579|  39.5k|            do
 2580|   479k|            {
 2581|   479k|                src_r0_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_horiz));             //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
 2582|   479k|                src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_horiz + 8));         //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
 2583|   479k|                src_vpel_16x8b = _mm_loadu_si128((__m128i *)(pu1_tmp2));
 2584|       |
 2585|   479k|                src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1);                      //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
 2586|   479k|                src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1);                      //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
 2587|       |
 2588|   479k|                src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);     //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
 2589|   479k|                src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);     //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
 2590|       |
 2591|   479k|                res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b);    //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
 2592|       |                                                                                         //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1
 2593|   479k|                res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b);    //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
 2594|       |                                                                                         //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1
 2595|       |
 2596|   479k|                src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2);                          //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0
 2597|   479k|                src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2);                          //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0
 2598|       |
 2599|   479k|                src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2);                  //a3 a4 a5 a6 a7 a8 a9....a15 0  0  0
 2600|   479k|                src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2);                  //b3 b4 b5 b6 b7 b8 b9....b15 0  0  0
 2601|       |
 2602|   479k|                src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);     //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10
 2603|   479k|                src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);     //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10
 2604|       |
 2605|   479k|                res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b);    //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
 2606|       |                                                                                         //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3
 2607|   479k|                res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b);    //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3
 2608|       |                                                                                         //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3
 2609|       |
 2610|   479k|                src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2);                          //a4 a5 a6 a7 a8 a9....a15 0  0  0  0
 2611|   479k|                src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2);                          //b4 b5 b6 b7 b8 b9....b15 0  0  0  0
 2612|       |
 2613|   479k|                src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2);                  //a5 a6 a7 a8 a9....a15 0  0  0  0  0
 2614|   479k|                src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2);                  //b5 b6 b7 b8 b9....b15 0  0  0  0  0
 2615|       |
 2616|   479k|                src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);     //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12
 2617|   479k|                src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);     //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12
 2618|       |
 2619|   479k|                res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b);    //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
 2620|       |                                                                                         //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5
 2621|   479k|                res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b);    //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5
 2622|       |                                                                                         //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5
 2623|   479k|                res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b);
 2624|   479k|                res_r0_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r0_t3_8x16b);
 2625|   479k|                res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b);
 2626|   479k|                res_r0_t1_8x16b = _mm_srai_epi16(res_r0_t1_8x16b, 5);                    //shifting right by 5 bits.
 2627|       |
 2628|   479k|                res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b);
 2629|   479k|                res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b);
 2630|   479k|                res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, const_val16_8x16b);
 2631|   479k|                res_r1_t1_8x16b = _mm_srai_epi16(res_r1_t1_8x16b, 5);                    //shifting right by 5 bits.
 2632|       |
 2633|   479k|                res_16x8b = _mm_packus_epi16(res_r0_t1_8x16b, res_r1_t1_8x16b);
 2634|       |
 2635|   479k|                res_16x8b = _mm_avg_epu8(res_16x8b, src_vpel_16x8b);
 2636|   479k|                _mm_storeu_si128((__m128i *)(pu1_dst), res_16x8b);
 2637|       |
 2638|   479k|                ht --;
 2639|   479k|                pu1_pred_horiz  += src_strd;
 2640|   479k|                pu1_dst += dst_strd;
 2641|   479k|                pu1_tmp2 += 16;
 2642|   479k|            }
 2643|   479k|            while(ht > 0);
  ------------------
  |  Branch (2643:19): [True: 440k, False: 39.5k]
  ------------------
 2644|  39.5k|        }
 2645|  39.5k|    }
 2646|  92.5k|}
ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3:
 2686|  33.9k|{
 2687|  33.9k|    WORD32 ht_temp;
 2688|  33.9k|    WORD32 x_offset;
 2689|  33.9k|    WORD32 off0,off1, off2, off3, off4, off5;
 2690|  33.9k|    WORD16 *pi2_temp1,*pi2_temp2,*pi2_temp3;
 2691|       |
 2692|  33.9k|    ht_temp = ht;
 2693|  33.9k|    x_offset = dydx & 0x3;
 2694|  33.9k|    pi2_temp1 = (WORD16 *)pu1_tmp;
 2695|  33.9k|    pi2_temp2 = pi2_temp1;
 2696|  33.9k|    pi2_temp3 = pi2_temp1 + (x_offset >> 1);
 2697|       |
 2698|  33.9k|    pu1_src -= 2 * src_strd;
 2699|  33.9k|    pu1_src -= 2;
 2700|  33.9k|    pi2_temp3 += 2;
 2701|       |    //the filter input starts from x[-2] (till x[3])
 2702|       |
 2703|  33.9k|    if(wd == 4)
  ------------------
  |  Branch (2703:8): [True: 5.81k, False: 28.0k]
  ------------------
 2704|  5.81k|    {
 2705|       |        //vertical half-pel
 2706|  5.81k|        {
 2707|  5.81k|            __m128i src_r0_16x8b, src_r1_16x8b, src_r2_16x8b, src_r3_16x8b, src_r4_16x8b;
 2708|  5.81k|            __m128i src_r5_16x8b, src_r6_16x8b;
 2709|  5.81k|            __m128i src_r0r1_16x8b, src_r2r3_16x8b, src_r4r5_16x8b;
 2710|       |
 2711|  5.81k|            __m128i res_t1_8x16b, res_t2_8x16b, res_t3_8x16b;
 2712|       |
 2713|  5.81k|            __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
 2714|       |
 2715|  5.81k|            coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01);  //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
 2716|  5.81k|            coeff2_3_16x8b = _mm_set1_epi32(0x14141414);  //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
 2717|  5.81k|            coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB);  //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5
 2718|       |                                                          //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20
 2719|  5.81k|            off0 = -((src_strd << 2) + src_strd) + 8;
 2720|  5.81k|            off1 = -(src_strd << 2) + 8;
 2721|  5.81k|            off2 = -((src_strd << 1) + src_strd) + 8;
 2722|  5.81k|            off3 = -(src_strd << 1) + 8;
 2723|  5.81k|            off4 = -src_strd + 8;
 2724|  5.81k|            off5 = 8;
 2725|       |
 2726|       |            //epilogue: Load all the pred rows except sixth  and seventh row for the
 2727|       |            //first and second row processing.
 2728|  5.81k|            src_r0_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src));
 2729|  5.81k|            pu1_src =  pu1_src + src_strd;
 2730|       |
 2731|  5.81k|            src_r1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src));
 2732|  5.81k|            pu1_src =  pu1_src + src_strd;
 2733|       |
 2734|  5.81k|            src_r2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src));
 2735|  5.81k|            pu1_src =  pu1_src + src_strd;
 2736|       |
 2737|  5.81k|            src_r3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src));
 2738|  5.81k|            pu1_src =  pu1_src + src_strd;
 2739|       |
 2740|  5.81k|            src_r4_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src));
 2741|  5.81k|            pu1_src =  pu1_src + src_strd;
 2742|       |
 2743|       |            //Core Loop: Process all the rows.
 2744|  5.81k|            do
 2745|  18.7k|            {
 2746|  18.7k|                src_r5_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src));
 2747|       |
 2748|  18.7k|                src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b);
 2749|  18.7k|                src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b);
 2750|  18.7k|                src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b);
 2751|       |
 2752|  18.7k|                res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
 2753|  18.7k|                res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
 2754|  18.7k|                res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
 2755|       |
 2756|  18.7k|                res_t1_8x16b = _mm_add_epi16(res_t2_8x16b, res_t1_8x16b);
 2757|  18.7k|                res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
 2758|       |
 2759|  18.7k|                _mm_storeu_si128((__m128i *)(pi2_temp1), res_t1_8x16b);
 2760|       |
 2761|  18.7k|                pi2_temp1[8] = pu1_src[off0] + pu1_src[off5]
 2762|  18.7k|                                   - (pu1_src[off1] + pu1_src[off4])
 2763|  18.7k|                                   + ((pu1_src[off2] + pu1_src[off3] - pu1_src[off1] - pu1_src[off4]) << 2)
 2764|  18.7k|                                   + ((pu1_src[off2] + pu1_src[off3]) << 4);
 2765|       |
 2766|  18.7k|                pu1_src = pu1_src + src_strd;
 2767|  18.7k|                pi2_temp1 = pi2_temp1 + 9;
 2768|       |
 2769|  18.7k|                src_r6_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src));
 2770|       |
 2771|  18.7k|                src_r0r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r2_16x8b);
 2772|  18.7k|                src_r2r3_16x8b = _mm_unpacklo_epi8(src_r3_16x8b, src_r4_16x8b);
 2773|  18.7k|                src_r4r5_16x8b = _mm_unpacklo_epi8(src_r5_16x8b, src_r6_16x8b);
 2774|       |
 2775|  18.7k|                res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
 2776|  18.7k|                res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
 2777|  18.7k|                res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
 2778|       |
 2779|  18.7k|                res_t1_8x16b = _mm_add_epi16(res_t2_8x16b, res_t1_8x16b);
 2780|  18.7k|                res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
 2781|       |
 2782|  18.7k|                _mm_storeu_si128((__m128i *)(pi2_temp1), res_t1_8x16b);
 2783|       |
 2784|  18.7k|                pi2_temp1[8] = pu1_src[off0] + pu1_src[off5]
 2785|  18.7k|                                   - (pu1_src[off1] + pu1_src[off4])
 2786|  18.7k|                                   + ((pu1_src[off2] + pu1_src[off3] - pu1_src[off1] - pu1_src[off4]) << 2)
 2787|  18.7k|                                   + ((pu1_src[off2] + pu1_src[off3]) << 4);
 2788|       |
 2789|  18.7k|                ht_temp -= 2;
 2790|  18.7k|                pu1_src = pu1_src + src_strd;
 2791|  18.7k|                pi2_temp1 = pi2_temp1 + 9;
 2792|       |
 2793|  18.7k|                src_r0_16x8b = src_r2_16x8b;
 2794|  18.7k|                src_r1_16x8b = src_r3_16x8b;
 2795|  18.7k|                src_r2_16x8b = src_r4_16x8b;
 2796|  18.7k|                src_r3_16x8b = src_r5_16x8b;
 2797|  18.7k|                src_r4_16x8b = src_r6_16x8b;
 2798|  18.7k|            }
 2799|  18.7k|            while(ht_temp > 0);
  ------------------
  |  Branch (2799:19): [True: 12.8k, False: 5.81k]
  ------------------
 2800|  5.81k|        }
 2801|       |
 2802|       |        //horizontal q-pel
 2803|  5.81k|        {
 2804|  5.81k|            __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b;
 2805|  5.81k|            __m128i src_r3_8x16b, src_r4_8x16b, src_r5_8x16b;
 2806|  5.81k|            __m128i src_r0r1_c0_8x16b, src_r2r3_c0_8x16b, src_r4r5_c0_8x16b;
 2807|  5.81k|            __m128i src_hpel_16x8b, src_hpel_8x16b;
 2808|       |
 2809|  5.81k|            __m128i res_t1_4x32b, res_t2_4x32b, res_t3_4x32b;
 2810|  5.81k|            __m128i res_8x16b, res_16x8b;
 2811|       |
 2812|  5.81k|            __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b;
 2813|  5.81k|            __m128i const_val512_4x32b, const_val16_8x16b;
 2814|       |
 2815|  5.81k|            coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001);
 2816|  5.81k|            coeff2_3_8x16b = _mm_set1_epi32(0x00140014);
 2817|  5.81k|            coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB);
 2818|       |
 2819|  5.81k|            const_val512_4x32b = _mm_set1_epi32(512);
 2820|  5.81k|            const_val16_8x16b = _mm_set1_epi16(16);
 2821|       |
 2822|  5.81k|            do
 2823|  37.4k|            {
 2824|  37.4k|                src_r0_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp2));
 2825|  37.4k|                src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 1));
 2826|  37.4k|                src_r2_8x16b = _mm_srli_si128(src_r1_8x16b, 2);
 2827|  37.4k|                src_r3_8x16b = _mm_srli_si128(src_r1_8x16b, 4);
 2828|  37.4k|                src_r4_8x16b = _mm_srli_si128(src_r1_8x16b, 6);
 2829|  37.4k|                src_r5_8x16b = _mm_srli_si128(src_r1_8x16b, 8);
 2830|       |
 2831|  37.4k|                src_r0r1_c0_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
 2832|  37.4k|                src_r2r3_c0_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
 2833|  37.4k|                src_r4r5_c0_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
 2834|       |
 2835|  37.4k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_c0_8x16b, coeff0_1_8x16b);
 2836|  37.4k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_c0_8x16b, coeff2_3_8x16b);
 2837|  37.4k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_c0_8x16b, coeff4_5_8x16b);
 2838|       |
 2839|  37.4k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 2840|  37.4k|                res_t3_4x32b = _mm_add_epi32(const_val512_4x32b, res_t3_4x32b);
 2841|  37.4k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 2842|  37.4k|                res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 2843|       |
 2844|  37.4k|                res_8x16b = _mm_packs_epi32(res_t1_4x32b, res_t1_4x32b);
 2845|  37.4k|                res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
 2846|       |
 2847|  37.4k|                src_hpel_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp3));
 2848|  37.4k|                src_hpel_8x16b = _mm_add_epi16(src_hpel_8x16b, const_val16_8x16b);
 2849|  37.4k|                src_hpel_8x16b = _mm_srai_epi16(src_hpel_8x16b, 5); //shifting right by 5 bits.
 2850|  37.4k|                src_hpel_16x8b = _mm_packus_epi16(src_hpel_8x16b, src_hpel_8x16b);
 2851|       |
 2852|  37.4k|                res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b);
 2853|       |
 2854|  37.4k|                *((WORD32 *)(pu1_dst)) = _mm_cvtsi128_si32(res_16x8b);
 2855|       |
 2856|  37.4k|                ht--;
 2857|  37.4k|                pi2_temp2 = pi2_temp2 + 4 + 5;
 2858|  37.4k|                pi2_temp3 = pi2_temp3 + 4 + 5;
 2859|  37.4k|                pu1_dst = pu1_dst + dst_strd;
 2860|  37.4k|            }
 2861|  37.4k|            while(ht > 0);
  ------------------
  |  Branch (2861:19): [True: 31.5k, False: 5.81k]
  ------------------
 2862|  5.81k|        }
 2863|  5.81k|    }
 2864|  28.0k|    else if(wd == 8)
  ------------------
  |  Branch (2864:13): [True: 16.6k, False: 11.4k]
  ------------------
 2865|  16.6k|    {
 2866|       |        // vertical half-pel
 2867|  16.6k|        {
 2868|  16.6k|            __m128i src_r0_16x8b, src_r1_16x8b, src_r2_16x8b, src_r3_16x8b, src_r4_16x8b;
 2869|  16.6k|            __m128i src_r5_16x8b, src_r6_16x8b;
 2870|  16.6k|            __m128i src_r0r1_16x8b, src_r2r3_16x8b, src_r4r5_16x8b;
 2871|       |
 2872|  16.6k|            __m128i res_t1_8x16b, res_t2_8x16b, res_t3_8x16b;
 2873|       |
 2874|  16.6k|            __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
 2875|       |
 2876|  16.6k|            coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01);  //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
 2877|  16.6k|            coeff2_3_16x8b = _mm_set1_epi32(0x14141414);  //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
 2878|  16.6k|            coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB);  //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5
 2879|       |
 2880|       |            //epilogue: Load all the pred rows except sixth  and seventh row for the
 2881|       |            //first and second row processing.
 2882|  16.6k|            src_r0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src));
 2883|  16.6k|            pu1_src =  pu1_src + src_strd;
 2884|       |
 2885|  16.6k|            src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src));
 2886|  16.6k|            pu1_src =  pu1_src + src_strd;
 2887|       |
 2888|  16.6k|            src_r2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src));
 2889|  16.6k|            pu1_src =  pu1_src + src_strd;
 2890|       |
 2891|  16.6k|            src_r3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src));
 2892|  16.6k|            pu1_src =  pu1_src + src_strd;
 2893|       |
 2894|  16.6k|            src_r4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src));
 2895|  16.6k|            pu1_src =  pu1_src + src_strd;
 2896|       |
 2897|       |            //Core Loop: Process all the rows.
 2898|  16.6k|            do
 2899|  79.3k|            {
 2900|  79.3k|                src_r5_16x8b = _mm_loadu_si128((__m128i *)(pu1_src));
 2901|  79.3k|                src_r6_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd));
 2902|       |
 2903|  79.3k|                src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b);
 2904|  79.3k|                src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b);
 2905|  79.3k|                src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b);
 2906|       |
 2907|  79.3k|                res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
 2908|  79.3k|                res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
 2909|  79.3k|                res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
 2910|       |
 2911|  79.3k|                res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
 2912|  79.3k|                res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
 2913|       |
 2914|  79.3k|                _mm_storeu_si128((__m128i *)(pi2_temp1), res_t1_8x16b);
 2915|       |
 2916|  79.3k|                src_r0r1_16x8b = _mm_unpackhi_epi8(src_r0_16x8b, src_r1_16x8b);
 2917|  79.3k|                src_r2r3_16x8b = _mm_unpackhi_epi8(src_r2_16x8b, src_r3_16x8b);
 2918|  79.3k|                src_r4r5_16x8b = _mm_unpackhi_epi8(src_r4_16x8b, src_r5_16x8b);
 2919|       |
 2920|  79.3k|                res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
 2921|  79.3k|                res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
 2922|  79.3k|                res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
 2923|       |
 2924|  79.3k|                res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
 2925|  79.3k|                res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
 2926|       |
 2927|  79.3k|                _mm_storeu_si128((__m128i *)(pi2_temp1 + 8), res_t1_8x16b);
 2928|       |
 2929|  79.3k|                src_r0r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r2_16x8b);
 2930|  79.3k|                src_r2r3_16x8b = _mm_unpacklo_epi8(src_r3_16x8b, src_r4_16x8b);
 2931|  79.3k|                src_r4r5_16x8b = _mm_unpacklo_epi8(src_r5_16x8b, src_r6_16x8b);
 2932|       |
 2933|  79.3k|                res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
 2934|  79.3k|                res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
 2935|  79.3k|                res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
 2936|       |
 2937|  79.3k|                res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
 2938|  79.3k|                res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
 2939|       |
 2940|  79.3k|                _mm_storeu_si128((__m128i *)(pi2_temp1 + 8 + 5), res_t1_8x16b);
 2941|       |
 2942|  79.3k|                src_r0r1_16x8b = _mm_unpackhi_epi8(src_r1_16x8b, src_r2_16x8b);
 2943|  79.3k|                src_r2r3_16x8b = _mm_unpackhi_epi8(src_r3_16x8b, src_r4_16x8b);
 2944|  79.3k|                src_r4r5_16x8b = _mm_unpackhi_epi8(src_r5_16x8b, src_r6_16x8b);
 2945|       |
 2946|  79.3k|                res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
 2947|  79.3k|                res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
 2948|  79.3k|                res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
 2949|       |
 2950|  79.3k|                res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
 2951|  79.3k|                res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
 2952|       |
 2953|  79.3k|                _mm_storeu_si128((__m128i *)(pi2_temp1 + 8 + 5 + 8), res_t1_8x16b);
 2954|       |
 2955|  79.3k|                src_r0_16x8b = src_r2_16x8b;
 2956|  79.3k|                src_r1_16x8b = src_r3_16x8b;
 2957|  79.3k|                src_r2_16x8b = src_r4_16x8b;
 2958|  79.3k|                src_r3_16x8b = src_r5_16x8b;
 2959|  79.3k|                src_r4_16x8b = src_r6_16x8b;
 2960|       |
 2961|  79.3k|                ht_temp -= 2;
 2962|  79.3k|                pu1_src =  pu1_src + (src_strd << 1);
 2963|  79.3k|                pi2_temp1 = pi2_temp1 + (13 << 1);
 2964|  79.3k|            }
 2965|  79.3k|            while(ht_temp > 0);
  ------------------
  |  Branch (2965:19): [True: 62.7k, False: 16.6k]
  ------------------
 2966|  16.6k|        }
 2967|       |        // horizontal q-pel
 2968|  16.6k|        {
 2969|  16.6k|            __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b, src_r3_8x16b;
 2970|  16.6k|            __m128i src_r4_8x16b, src_r5_8x16b;
 2971|  16.6k|            __m128i src_r0r1_c0_8x16b, src_r2r3_c0_8x16b, src_r4r5_c0_8x16b;
 2972|  16.6k|            __m128i src_r0r1_c1_8x16b, src_r2r3_c1_8x16b, src_r4r5_c1_8x16b;
 2973|  16.6k|            __m128i src_hpel_8x16b, src_hpel_16x8b;
 2974|       |
 2975|  16.6k|            __m128i res_t0_4x32b, res_t1_4x32b, res_t2_4x32b, res_t3_4x32b;
 2976|  16.6k|            __m128i res_8x16b, res_16x8b;
 2977|       |
 2978|  16.6k|            __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b;
 2979|  16.6k|            __m128i const_val512_4x32b, const_val16_8x16b;
 2980|       |
 2981|  16.6k|            coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001);
 2982|  16.6k|            coeff2_3_8x16b = _mm_set1_epi32(0x00140014);
 2983|  16.6k|            coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB);
 2984|       |
 2985|  16.6k|            const_val512_4x32b = _mm_set1_epi32(512);
 2986|  16.6k|            const_val16_8x16b = _mm_set1_epi16(16);
 2987|       |
 2988|  16.6k|            do
 2989|   158k|            {
 2990|   158k|                src_r0_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2));
 2991|   158k|                src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 1));
 2992|   158k|                src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 2));
 2993|   158k|                src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 3));
 2994|   158k|                src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 4));
 2995|   158k|                src_r5_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 5));
 2996|       |
 2997|   158k|                src_r0r1_c0_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
 2998|   158k|                src_r2r3_c0_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
 2999|   158k|                src_r4r5_c0_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
 3000|       |
 3001|   158k|                src_r0r1_c1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b);
 3002|   158k|                src_r2r3_c1_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b);
 3003|   158k|                src_r4r5_c1_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b);
 3004|       |
 3005|   158k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_c0_8x16b, coeff0_1_8x16b);
 3006|   158k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_c0_8x16b, coeff2_3_8x16b);
 3007|   158k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_c0_8x16b, coeff4_5_8x16b);
 3008|       |
 3009|   158k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 3010|   158k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
 3011|   158k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 3012|       |
 3013|   158k|                res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 3014|       |
 3015|   158k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_c1_8x16b, coeff0_1_8x16b);
 3016|   158k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_c1_8x16b, coeff2_3_8x16b);
 3017|   158k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_c1_8x16b, coeff4_5_8x16b);
 3018|       |
 3019|   158k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 3020|   158k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
 3021|   158k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 3022|       |
 3023|   158k|                res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 3024|       |
 3025|   158k|                res_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b);
 3026|   158k|                res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
 3027|       |
 3028|   158k|                src_hpel_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp3));
 3029|   158k|                src_hpel_8x16b = _mm_add_epi16(src_hpel_8x16b, const_val16_8x16b);
 3030|   158k|                src_hpel_8x16b = _mm_srai_epi16(src_hpel_8x16b, 5); //shifting right by 5 bits.
 3031|   158k|                src_hpel_16x8b = _mm_packus_epi16(src_hpel_8x16b, src_hpel_8x16b);
 3032|       |
 3033|   158k|                res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b);
 3034|       |
 3035|   158k|                _mm_storel_epi64((__m128i *)(pu1_dst), res_16x8b);
 3036|       |
 3037|   158k|                ht--;
 3038|   158k|                pi2_temp2 = pi2_temp2 + 8 + 5;
 3039|   158k|                pi2_temp3 = pi2_temp3 + 8 + 5;
 3040|   158k|                pu1_dst = pu1_dst + dst_strd;
 3041|   158k|            }
 3042|   158k|            while(ht > 0);
  ------------------
  |  Branch (3042:19): [True: 142k, False: 16.6k]
  ------------------
 3043|  16.6k|        }
 3044|  16.6k|    }
 3045|  11.4k|    else // wd == 16
 3046|  11.4k|    {
 3047|       |        // vertical half-pel
 3048|  11.4k|        {
 3049|  11.4k|            __m128i src_r0_16x8b, src_r1_16x8b, src_r2_16x8b, src_r3_16x8b;
 3050|  11.4k|            __m128i src_r4_16x8b, src_r5_16x8b;
 3051|  11.4k|            __m128i src_r0_c2_16x8b, src_r1_c2_16x8b, src_r2_c2_16x8b, src_r3_c2_16x8b;
 3052|  11.4k|            __m128i src_r4_c2_16x8b, src_r5_c2_16x8b;
 3053|  11.4k|            __m128i src_r0r1_16x8b, src_r2r3_16x8b, src_r4r5_16x8b;
 3054|       |
 3055|  11.4k|            __m128i res_t1_8x16b, res_t2_8x16b, res_t3_8x16b;
 3056|       |
 3057|  11.4k|            __m128i coeff0_1_16x8b,coeff2_3_16x8b,coeff4_5_16x8b;
 3058|       |
 3059|  11.4k|            coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01);  //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
 3060|  11.4k|            coeff2_3_16x8b = _mm_set1_epi32(0x14141414);  //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
 3061|  11.4k|            coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB);  //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5
 3062|       |
 3063|  11.4k|            src_r0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src));
 3064|  11.4k|            src_r0_c2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 16));
 3065|  11.4k|            pu1_src =  pu1_src + src_strd;
 3066|  11.4k|            src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src));
 3067|  11.4k|            src_r1_c2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 16));
 3068|  11.4k|            pu1_src =  pu1_src + src_strd;
 3069|  11.4k|            src_r2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src));
 3070|  11.4k|            src_r2_c2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 16));
 3071|  11.4k|            pu1_src =  pu1_src + src_strd;
 3072|  11.4k|            src_r3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src));
 3073|  11.4k|            src_r3_c2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 16));
 3074|  11.4k|            pu1_src =  pu1_src + src_strd;
 3075|  11.4k|            src_r4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src));
 3076|  11.4k|            src_r4_c2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 16));
 3077|  11.4k|            pu1_src =  pu1_src + src_strd;
 3078|       |
 3079|       |            //Core Loop: Process all the rows.
 3080|  11.4k|            do
 3081|   143k|            {
 3082|   143k|                src_r5_16x8b  = _mm_loadu_si128((__m128i *)(pu1_src));
 3083|   143k|                src_r5_c2_16x8b  = _mm_loadu_si128((__m128i *)(pu1_src + 16));
 3084|       |
 3085|   143k|                src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b);
 3086|   143k|                src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b);
 3087|   143k|                src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b);
 3088|       |
 3089|   143k|                res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
 3090|   143k|                res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
 3091|   143k|                res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
 3092|       |
 3093|   143k|                res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
 3094|   143k|                res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
 3095|       |
 3096|   143k|                _mm_storeu_si128((__m128i *)(pi2_temp1), res_t1_8x16b);
 3097|       |
 3098|   143k|                src_r0r1_16x8b = _mm_unpackhi_epi8(src_r0_16x8b, src_r1_16x8b);
 3099|   143k|                src_r2r3_16x8b = _mm_unpackhi_epi8(src_r2_16x8b, src_r3_16x8b);
 3100|   143k|                src_r4r5_16x8b = _mm_unpackhi_epi8(src_r4_16x8b, src_r5_16x8b);
 3101|       |
 3102|   143k|                res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
 3103|   143k|                res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
 3104|   143k|                res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
 3105|       |
 3106|   143k|                res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
 3107|   143k|                res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
 3108|       |
 3109|   143k|                _mm_storeu_si128((__m128i *)(pi2_temp1 + 8), res_t1_8x16b);
 3110|       |
 3111|   143k|                src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_c2_16x8b, src_r1_c2_16x8b);
 3112|   143k|                src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_c2_16x8b, src_r3_c2_16x8b);
 3113|   143k|                src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_c2_16x8b, src_r5_c2_16x8b);
 3114|       |
 3115|   143k|                res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
 3116|   143k|                res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
 3117|   143k|                res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
 3118|       |
 3119|   143k|                res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
 3120|   143k|                res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
 3121|       |
 3122|   143k|                _mm_storeu_si128((__m128i *)(pi2_temp1 + 16), res_t1_8x16b);
 3123|       |
 3124|   143k|                src_r0_16x8b = src_r1_16x8b;
 3125|   143k|                src_r1_16x8b = src_r2_16x8b;
 3126|   143k|                src_r2_16x8b = src_r3_16x8b;
 3127|   143k|                src_r3_16x8b = src_r4_16x8b;
 3128|   143k|                src_r4_16x8b = src_r5_16x8b;
 3129|       |
 3130|   143k|                src_r0_c2_16x8b = src_r1_c2_16x8b;
 3131|   143k|                src_r1_c2_16x8b = src_r2_c2_16x8b;
 3132|   143k|                src_r2_c2_16x8b = src_r3_c2_16x8b;
 3133|   143k|                src_r3_c2_16x8b = src_r4_c2_16x8b;
 3134|   143k|                src_r4_c2_16x8b = src_r5_c2_16x8b;
 3135|       |
 3136|   143k|                ht_temp--;
 3137|   143k|                pu1_src =  pu1_src + src_strd;
 3138|   143k|                pi2_temp1 =  pi2_temp1 + 16 + 5;
 3139|   143k|            }
 3140|   143k|            while(ht_temp > 0);
  ------------------
  |  Branch (3140:19): [True: 132k, False: 11.4k]
  ------------------
 3141|  11.4k|        }
 3142|       |        // horizontal q-pel
 3143|  11.4k|        {
 3144|  11.4k|            __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b, src_r3_8x16b;
 3145|  11.4k|            __m128i src_r4_8x16b, src_r5_8x16b;
 3146|  11.4k|            __m128i src_r0r1_8x16b, src_r2r3_8x16b, src_r4r5_8x16b;
 3147|  11.4k|            __m128i src_hpel1_8x16b, src_hpel2_8x16b, src_hpel_16x8b;
 3148|       |
 3149|  11.4k|            __m128i res_t0_4x32b, res_t1_4x32b, res_t2_4x32b, res_t3_4x32b;
 3150|  11.4k|            __m128i res_c0_8x16b, res_c1_8x16b, res_16x8b;
 3151|       |
 3152|  11.4k|            __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b;
 3153|  11.4k|            __m128i const_val512_4x32b, const_val16_8x16b;
 3154|       |
 3155|  11.4k|            coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001);
 3156|  11.4k|            coeff2_3_8x16b = _mm_set1_epi32(0x00140014);
 3157|  11.4k|            coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB);
 3158|       |
 3159|  11.4k|            const_val512_4x32b = _mm_set1_epi32(512);
 3160|  11.4k|            const_val16_8x16b = _mm_set1_epi16(16);
 3161|       |
 3162|  11.4k|            do
 3163|   143k|            {
 3164|   143k|                src_r0_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2));
 3165|   143k|                src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 1));
 3166|   143k|                src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 2));
 3167|   143k|                src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 3));
 3168|   143k|                src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 4));
 3169|   143k|                src_r5_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 5));
 3170|       |
 3171|   143k|                src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
 3172|   143k|                src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
 3173|   143k|                src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
 3174|       |
 3175|   143k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
 3176|   143k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
 3177|   143k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
 3178|       |
 3179|   143k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 3180|   143k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
 3181|   143k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 3182|   143k|                res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 3183|       |
 3184|   143k|                src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b);
 3185|   143k|                src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b);
 3186|   143k|                src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b);
 3187|       |
 3188|   143k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
 3189|   143k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
 3190|   143k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
 3191|       |
 3192|   143k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 3193|   143k|                res_t3_4x32b = _mm_add_epi32(const_val512_4x32b, res_t3_4x32b);
 3194|   143k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 3195|   143k|                res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 3196|       |
 3197|   143k|                res_c0_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b);
 3198|       |
 3199|   143k|                src_r0_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 8));
 3200|   143k|                src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 8 + 1));
 3201|   143k|                src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 8 + 2));
 3202|   143k|                src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 8 + 3));
 3203|   143k|                src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 8 + 4));
 3204|   143k|                src_r5_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 8 + 5));
 3205|       |
 3206|   143k|                src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
 3207|   143k|                src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
 3208|   143k|                src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
 3209|       |
 3210|   143k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
 3211|   143k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
 3212|   143k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
 3213|       |
 3214|   143k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 3215|   143k|                res_t3_4x32b = _mm_add_epi32(const_val512_4x32b, res_t3_4x32b);
 3216|   143k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 3217|   143k|                res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b ,10);
 3218|       |
 3219|   143k|                src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b);
 3220|   143k|                src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b);
 3221|   143k|                src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b);
 3222|       |
 3223|   143k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
 3224|   143k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
 3225|   143k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
 3226|       |
 3227|   143k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 3228|   143k|                res_t3_4x32b = _mm_add_epi32(const_val512_4x32b, res_t3_4x32b);
 3229|   143k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 3230|   143k|                res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 3231|       |
 3232|   143k|                res_c1_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b);
 3233|   143k|                res_16x8b = _mm_packus_epi16(res_c0_8x16b, res_c1_8x16b);
 3234|       |
 3235|   143k|                src_hpel1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp3));
 3236|   143k|                src_hpel1_8x16b = _mm_add_epi16(src_hpel1_8x16b, const_val16_8x16b);
 3237|   143k|                src_hpel1_8x16b = _mm_srai_epi16(src_hpel1_8x16b, 5); //shifting right by 5 bits.
 3238|       |
 3239|   143k|                src_hpel2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp3 + 8));
 3240|   143k|                src_hpel2_8x16b = _mm_add_epi16(src_hpel2_8x16b, const_val16_8x16b);
 3241|   143k|                src_hpel2_8x16b = _mm_srai_epi16(src_hpel2_8x16b, 5); //shifting right by 5 bits.
 3242|       |
 3243|   143k|                src_hpel_16x8b = _mm_packus_epi16(src_hpel1_8x16b, src_hpel2_8x16b);
 3244|   143k|                res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b);
 3245|       |
 3246|   143k|                _mm_storeu_si128((__m128i *)(pu1_dst), res_16x8b);
 3247|       |
 3248|   143k|                ht--;
 3249|   143k|                pi2_temp2 = pi2_temp2 + 16 + 5;
 3250|   143k|                pi2_temp3 = pi2_temp3 + 16 + 5;
 3251|   143k|                pu1_dst = pu1_dst + dst_strd;
 3252|   143k|            }
 3253|   143k|            while(ht > 0);
  ------------------
  |  Branch (3253:19): [True: 132k, False: 11.4k]
  ------------------
 3254|  11.4k|        }
 3255|  11.4k|    }
 3256|  33.9k|}
ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3:
 3296|  41.3k|{
 3297|  41.3k|    WORD32 ht_temp;
 3298|  41.3k|    WORD32 y_offset;
 3299|  41.3k|    WORD16 *pi2_temp1,*pi2_temp2,*pi2_temp3;
 3300|       |
 3301|  41.3k|    y_offset = (dydx & 0xf) >> 2;
 3302|  41.3k|    pi2_temp1 = (WORD16 *)pu1_tmp;
 3303|  41.3k|    pi2_temp2 = pi2_temp1;
 3304|  41.3k|    pi2_temp3 = pi2_temp1 + (y_offset >> 1) * wd;
 3305|       |
 3306|  41.3k|    ht_temp = ht + 5;
 3307|  41.3k|    pu1_src -= src_strd << 1;
 3308|  41.3k|    pu1_src -= 2;
 3309|  41.3k|    pi2_temp3 += wd << 1;
 3310|       |    //the filter input starts from x[-2] (till x[3])
 3311|       |
 3312|  41.3k|    if(wd == 4)
  ------------------
  |  Branch (3312:8): [True: 7.86k, False: 33.5k]
  ------------------
 3313|  7.86k|    {
 3314|       |        // horizontal half-pel
 3315|  7.86k|        {
 3316|  7.86k|            __m128i src_r0_16x8b, src_r1_16x8b, src_r0r1_t1_16x8b;
 3317|  7.86k|            __m128i src_r0_sht_16x8b, src_r1_sht_16x8b;
 3318|  7.86k|            __m128i res_r0r1_t1_8x16b, res_r0r1_t2_8x16b, res_r0r1_t3_8x16b;
 3319|  7.86k|            __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
 3320|       |
 3321|  7.86k|            coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01);  //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
 3322|  7.86k|            coeff2_3_16x8b = _mm_set1_epi32(0x14141414);  //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
 3323|  7.86k|            coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB);  //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5
 3324|       |
 3325|       |            //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
 3326|       |            //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
 3327|       |
 3328|  7.86k|            do
 3329|  53.1k|            {
 3330|  53.1k|                src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);                         //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
 3331|  53.1k|                src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd));            //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
 3332|       |
 3333|  53.1k|                src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1);                         //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
 3334|  53.1k|                src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1);                         //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
 3335|       |
 3336|  53.1k|                src_r0_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);           //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
 3337|  53.1k|                src_r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);           //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
 3338|       |
 3339|  53.1k|                src_r0r1_t1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b);         //a0 a1 a1 a2 a2 a3 a3 a4 b0 b1 b1 b2 b2 b3 b3 b4
 3340|  53.1k|                res_r0r1_t1_8x16b = _mm_maddubs_epi16(src_r0r1_t1_16x8b, coeff0_1_16x8b);   //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
 3341|       |                                                                                            //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
 3342|       |
 3343|  53.1k|                src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4);                             //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8  0  0  0  0
 3344|  53.1k|                src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4);                             //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8  0  0  0  0
 3345|       |
 3346|  53.1k|                src_r0r1_t1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b);         //a2 a3 a3 a4 a4 a5 a5 a6 b2 b3 b3 b4 b4 b5 b5 b6
 3347|  53.1k|                res_r0r1_t2_8x16b = _mm_maddubs_epi16(src_r0r1_t1_16x8b, coeff2_3_16x8b);   //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
 3348|       |                                                                                            //b2*c2+b3*c3 b3*c2+b4*c3 b4*c2+b5*c3 b5*c2+b6*c3
 3349|       |
 3350|  53.1k|                src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4);                             //a4 a5 a5 a6 a6 a7 a7 a8  0  0  0  0  0  0  0  0
 3351|  53.1k|                src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4);                             //b4 b5 b5 b6 b6 b7 b7 b8  0  0  0  0  0  0  0  0
 3352|       |
 3353|  53.1k|                src_r0r1_t1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b);         //a4 a5 a5 a6 a6 a7 a7 a8 b4 b5 b5 b6 b6 b7 b7 b8
 3354|  53.1k|                res_r0r1_t3_8x16b = _mm_maddubs_epi16(src_r0r1_t1_16x8b, coeff4_5_16x8b);   //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
 3355|       |                                                                                            //b4*c4+b5*c5 b5*c4+b6*c5 b4*c6+b7*c5 b7*c4+b8*c5
 3356|       |
 3357|  53.1k|                res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t2_8x16b);
 3358|  53.1k|                res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t3_8x16b);
 3359|       |
 3360|       |
 3361|  53.1k|                _mm_storeu_si128((__m128i *)(pi2_temp1), res_r0r1_t1_8x16b);
 3362|       |
 3363|  53.1k|                ht_temp -= 2;
 3364|  53.1k|                pu1_src =  pu1_src + (src_strd << 1);
 3365|  53.1k|                pi2_temp1 =  pi2_temp1 + (4 << 1);
 3366|  53.1k|            }
 3367|  53.1k|            while(ht_temp > 0);
  ------------------
  |  Branch (3367:19): [True: 45.2k, False: 7.86k]
  ------------------
 3368|  7.86k|        }
 3369|       |        // vertical q-pel
 3370|  7.86k|        {
 3371|  7.86k|            __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b, src_r3_8x16b;
 3372|  7.86k|            __m128i src_r4_8x16b, src_r5_8x16b, src_r6_8x16b;
 3373|  7.86k|            __m128i src_r0r1_c0_8x16b, src_r2r3_c0_8x16b, src_r4r5_c0_8x16b;
 3374|  7.86k|            __m128i src_hpel_16x8b, src_hpel_8x16b;
 3375|       |
 3376|  7.86k|            __m128i res_t0_4x32b, res_t1_4x32b, res_t2_4x32b, res_t3_4x32b;
 3377|  7.86k|            __m128i res_8x16b, res_16x8b;
 3378|       |
 3379|  7.86k|            __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b;
 3380|  7.86k|            __m128i const_val512_4x32b, const_val16_8x16b;
 3381|       |
 3382|  7.86k|            const_val512_4x32b = _mm_set1_epi32(512);
 3383|  7.86k|            const_val16_8x16b = _mm_set1_epi16(16);
 3384|       |
 3385|  7.86k|            coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001);
 3386|  7.86k|            coeff2_3_8x16b = _mm_set1_epi32(0x00140014);
 3387|  7.86k|            coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB);
 3388|       |
 3389|  7.86k|            src_r0_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp2));
 3390|  7.86k|            src_r1_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp2 + 4));
 3391|  7.86k|            src_r2_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp2 + 8));
 3392|  7.86k|            src_r3_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp2 + 12));
 3393|  7.86k|            src_r4_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp2 + 16));
 3394|  7.86k|            pi2_temp2 += 20;
 3395|       |
 3396|  7.86k|            do
 3397|  29.5k|            {
 3398|  29.5k|                src_r5_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp2));
 3399|  29.5k|                src_r6_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp2 + 4));
 3400|       |
 3401|  29.5k|                src_r0r1_c0_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
 3402|  29.5k|                src_r2r3_c0_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
 3403|  29.5k|                src_r4r5_c0_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
 3404|       |
 3405|  29.5k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_c0_8x16b, coeff0_1_8x16b);
 3406|  29.5k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_c0_8x16b, coeff2_3_8x16b);
 3407|  29.5k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_c0_8x16b, coeff4_5_8x16b);
 3408|       |
 3409|  29.5k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 3410|  29.5k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
 3411|  29.5k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 3412|  29.5k|                res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 3413|       |
 3414|  29.5k|                src_r0r1_c0_8x16b = _mm_unpacklo_epi16(src_r1_8x16b, src_r2_8x16b);
 3415|  29.5k|                src_r2r3_c0_8x16b = _mm_unpacklo_epi16(src_r3_8x16b, src_r4_8x16b);
 3416|  29.5k|                src_r4r5_c0_8x16b = _mm_unpacklo_epi16(src_r5_8x16b, src_r6_8x16b);
 3417|       |
 3418|  29.5k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_c0_8x16b, coeff0_1_8x16b);
 3419|  29.5k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_c0_8x16b, coeff2_3_8x16b);
 3420|  29.5k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_c0_8x16b, coeff4_5_8x16b);
 3421|       |
 3422|  29.5k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 3423|  29.5k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
 3424|  29.5k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 3425|  29.5k|                res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 3426|       |
 3427|  29.5k|                res_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b);
 3428|  29.5k|                res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
 3429|       |
 3430|  29.5k|                src_hpel_8x16b = _mm_loadu_si128((__m128i *)pi2_temp3);
 3431|  29.5k|                src_hpel_8x16b = _mm_add_epi16(src_hpel_8x16b, const_val16_8x16b);
 3432|  29.5k|                src_hpel_8x16b = _mm_srai_epi16(src_hpel_8x16b, 5); //shifting right by 5 bits.
 3433|  29.5k|                src_hpel_16x8b = _mm_packus_epi16(src_hpel_8x16b, src_hpel_8x16b);
 3434|       |
 3435|  29.5k|                res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b);
 3436|       |
 3437|  29.5k|                *((WORD32 *)(pu1_dst)) = _mm_cvtsi128_si32(res_16x8b);
 3438|  29.5k|                res_16x8b = _mm_srli_si128(res_16x8b, 4);
 3439|  29.5k|                *((WORD32 *)(pu1_dst + dst_strd)) = _mm_cvtsi128_si32(res_16x8b);
 3440|       |
 3441|  29.5k|                src_r0_8x16b = src_r2_8x16b;
 3442|  29.5k|                src_r1_8x16b = src_r3_8x16b;
 3443|  29.5k|                src_r2_8x16b = src_r4_8x16b;
 3444|  29.5k|                src_r3_8x16b = src_r5_8x16b;
 3445|  29.5k|                src_r4_8x16b = src_r6_8x16b;
 3446|       |
 3447|  29.5k|                ht -= 2;
 3448|  29.5k|                pi2_temp2 =  pi2_temp2 + (4 << 1);
 3449|  29.5k|                pi2_temp3 =  pi2_temp3 + (4 << 1);
 3450|  29.5k|                pu1_dst = pu1_dst + (dst_strd << 1);
 3451|  29.5k|            }
 3452|  29.5k|            while(ht > 0);
  ------------------
  |  Branch (3452:19): [True: 21.6k, False: 7.86k]
  ------------------
 3453|  7.86k|        }
 3454|  7.86k|    }
 3455|  33.5k|    else if(wd == 8)
  ------------------
  |  Branch (3455:13): [True: 17.1k, False: 16.4k]
  ------------------
 3456|  17.1k|    {
 3457|       |        // horizontal half-pel
 3458|  17.1k|        {
 3459|  17.1k|            __m128i src_r0_16x8b, src_r1_16x8b, src_r0_sht_16x8b, src_r1_sht_16x8b;
 3460|  17.1k|            __m128i src_r0_t1_16x8b, src_r1_t1_16x8b;
 3461|       |
 3462|  17.1k|            __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b;
 3463|  17.1k|            __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b;
 3464|       |
 3465|  17.1k|            __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
 3466|       |
 3467|  17.1k|            coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01);  //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
 3468|  17.1k|            coeff2_3_16x8b = _mm_set1_epi32(0x14141414);  //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
 3469|  17.1k|            coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB);  //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5
 3470|       |
 3471|       |            //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
 3472|       |            //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
 3473|       |
 3474|  17.1k|            do
 3475|   129k|            {
 3476|   129k|                src_r0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src));                   //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
 3477|   129k|                src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd));        //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
 3478|       |
 3479|   129k|                src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1);                     //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
 3480|   129k|                src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1);                     //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
 3481|       |
 3482|   129k|                src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);    //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
 3483|   129k|                src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);    //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
 3484|       |
 3485|   129k|                res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b);   //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
 3486|       |                                                                                        //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1
 3487|   129k|                res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b);   //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
 3488|       |                                                                                        //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1
 3489|       |
 3490|   129k|                src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2);                         //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0
 3491|   129k|                src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2);                         //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0
 3492|       |
 3493|   129k|                src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2);                 //a3 a4 a5 a6 a7 a8 a9....a15 0  0  0
 3494|   129k|                src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2);                 //b3 b4 b5 b6 b7 b8 b9....b15 0  0  0
 3495|       |
 3496|   129k|                src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);    //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10
 3497|   129k|                src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);    //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10
 3498|       |
 3499|   129k|                res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b);   //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
 3500|       |                                                                                        //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3
 3501|   129k|                res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b);   //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3
 3502|       |                                                                                        //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3
 3503|       |
 3504|   129k|                src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2);                         //a4 a5 a6 a7 a8 a9....a15 0  0  0  0
 3505|   129k|                src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2);                         //b4 b5 b6 b7 b8 b9....b15 0  0  0  0
 3506|       |
 3507|   129k|                src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2);                 //a5 a6 a7 a8 a9....a15 0  0  0  0  0
 3508|   129k|                src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2);                 //b5 b6 b7 b8 b9....b15 0  0  0  0  0
 3509|       |
 3510|   129k|                src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);    //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12
 3511|   129k|                src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);    //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12
 3512|       |
 3513|   129k|                res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b);   //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
 3514|       |                                                                                        //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5
 3515|   129k|                res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b);   //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5
 3516|       |                                                                                        //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5
 3517|   129k|                res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b);
 3518|   129k|                res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b);
 3519|       |
 3520|   129k|                res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b);
 3521|   129k|                res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b);
 3522|       |
 3523|   129k|                _mm_storeu_si128((__m128i *)(pi2_temp1), res_r0_t1_8x16b);
 3524|   129k|                _mm_storeu_si128((__m128i *)(pi2_temp1 + 8), res_r1_t1_8x16b);
 3525|       |
 3526|   129k|                ht_temp -= 2;
 3527|   129k|                pu1_src =  pu1_src + (src_strd << 1);
 3528|   129k|                pi2_temp1 =  pi2_temp1 + (8 << 1);
 3529|   129k|            }
 3530|   129k|            while(ht_temp > 0);
  ------------------
  |  Branch (3530:19): [True: 112k, False: 17.1k]
  ------------------
 3531|  17.1k|        }
 3532|       |        // vertical q-pel
 3533|  17.1k|        {
 3534|  17.1k|            __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b, src_r3_8x16b;
 3535|  17.1k|            __m128i src_r4_8x16b, src_r5_8x16b, src_r6_8x16b;
 3536|  17.1k|            __m128i src_r0r1_8x16b, src_r2r3_8x16b, src_r4r5_8x16b;
 3537|  17.1k|            __m128i src_hpel_8x16b, src_hpel_16x8b;
 3538|       |
 3539|  17.1k|            __m128i res_t0_4x32b, res_t1_4x32b, res_t2_4x32b, res_t3_4x32b;
 3540|  17.1k|            __m128i res_8x16b, res_16x8b;
 3541|       |
 3542|  17.1k|            __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b;
 3543|  17.1k|            __m128i const_val512_4x32b, const_val16_8x16b;
 3544|       |
 3545|  17.1k|            coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001);
 3546|  17.1k|            coeff2_3_8x16b = _mm_set1_epi32(0x00140014);
 3547|  17.1k|            coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB);
 3548|       |
 3549|  17.1k|            const_val512_4x32b = _mm_set1_epi32(512);
 3550|  17.1k|            const_val16_8x16b = _mm_set1_epi16(16);
 3551|       |
 3552|  17.1k|            src_r0_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2));
 3553|  17.1k|            src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 8));
 3554|  17.1k|            src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 16));
 3555|  17.1k|            src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 24));
 3556|  17.1k|            src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 32));
 3557|  17.1k|            pi2_temp2 += 40;
 3558|       |
 3559|  17.1k|            do
 3560|  78.6k|            {
 3561|  78.6k|                src_r5_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2));
 3562|  78.6k|                src_r6_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 8));
 3563|       |
 3564|  78.6k|                src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
 3565|  78.6k|                src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
 3566|  78.6k|                src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
 3567|       |
 3568|  78.6k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
 3569|  78.6k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
 3570|  78.6k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
 3571|       |
 3572|  78.6k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 3573|  78.6k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
 3574|  78.6k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 3575|  78.6k|                res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 3576|       |
 3577|  78.6k|                src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b);
 3578|  78.6k|                src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b);
 3579|  78.6k|                src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b);
 3580|       |
 3581|  78.6k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
 3582|  78.6k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
 3583|  78.6k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
 3584|       |
 3585|  78.6k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 3586|  78.6k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
 3587|  78.6k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 3588|  78.6k|                res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 3589|       |
 3590|  78.6k|                res_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b);
 3591|  78.6k|                res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
 3592|       |
 3593|  78.6k|                src_hpel_8x16b = _mm_loadu_si128((__m128i *)pi2_temp3);
 3594|  78.6k|                src_hpel_8x16b = _mm_add_epi16(const_val16_8x16b, src_hpel_8x16b);
 3595|  78.6k|                src_hpel_8x16b = _mm_srai_epi16(src_hpel_8x16b, 5); //shifting right by 5 bits.
 3596|  78.6k|                src_hpel_16x8b = _mm_packus_epi16(src_hpel_8x16b, src_hpel_8x16b);
 3597|       |
 3598|  78.6k|                res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b);
 3599|       |
 3600|  78.6k|                _mm_storel_epi64((__m128i *)(pu1_dst), res_16x8b);
 3601|       |
 3602|  78.6k|                src_r0r1_8x16b = _mm_unpacklo_epi16(src_r1_8x16b, src_r2_8x16b);
 3603|  78.6k|                src_r2r3_8x16b = _mm_unpacklo_epi16(src_r3_8x16b, src_r4_8x16b);
 3604|  78.6k|                src_r4r5_8x16b = _mm_unpacklo_epi16(src_r5_8x16b, src_r6_8x16b);
 3605|       |
 3606|  78.6k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
 3607|  78.6k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
 3608|  78.6k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
 3609|       |
 3610|  78.6k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 3611|  78.6k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
 3612|  78.6k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 3613|  78.6k|                res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 3614|       |
 3615|  78.6k|                src_r0r1_8x16b = _mm_unpackhi_epi16(src_r1_8x16b, src_r2_8x16b);
 3616|  78.6k|                src_r2r3_8x16b = _mm_unpackhi_epi16(src_r3_8x16b, src_r4_8x16b);
 3617|  78.6k|                src_r4r5_8x16b = _mm_unpackhi_epi16(src_r5_8x16b, src_r6_8x16b);
 3618|       |
 3619|  78.6k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
 3620|  78.6k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
 3621|  78.6k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
 3622|       |
 3623|  78.6k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 3624|  78.6k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
 3625|  78.6k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 3626|  78.6k|                res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 3627|       |
 3628|  78.6k|                res_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b);
 3629|  78.6k|                res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
 3630|       |
 3631|  78.6k|                src_hpel_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp3 + 8));
 3632|  78.6k|                src_hpel_8x16b = _mm_add_epi16(const_val16_8x16b, src_hpel_8x16b);
 3633|  78.6k|                src_hpel_8x16b = _mm_srai_epi16(src_hpel_8x16b, 5); //shifting right by 5 bits.
 3634|  78.6k|                src_hpel_16x8b = _mm_packus_epi16(src_hpel_8x16b, src_hpel_8x16b);
 3635|       |
 3636|  78.6k|                res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b);
 3637|       |
 3638|  78.6k|                _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), res_16x8b);
 3639|       |
 3640|  78.6k|                src_r0_8x16b = src_r2_8x16b;
 3641|  78.6k|                src_r1_8x16b = src_r3_8x16b;
 3642|  78.6k|                src_r2_8x16b = src_r4_8x16b;
 3643|  78.6k|                src_r3_8x16b = src_r5_8x16b;
 3644|  78.6k|                src_r4_8x16b = src_r6_8x16b;
 3645|       |
 3646|  78.6k|                ht -= 2;
 3647|  78.6k|                pi2_temp2 = pi2_temp2 + (8 << 1);
 3648|  78.6k|                pi2_temp3 = pi2_temp3 + (8 << 1);
 3649|  78.6k|                pu1_dst = pu1_dst + (dst_strd << 1);
 3650|  78.6k|            }
 3651|  78.6k|            while(ht > 0);
  ------------------
  |  Branch (3651:19): [True: 61.5k, False: 17.1k]
  ------------------
 3652|  17.1k|        }
 3653|  17.1k|    }
 3654|  16.4k|    else // wd == 16
 3655|  16.4k|    {
 3656|  16.4k|        UWORD8 *pu1_dst1;
 3657|  16.4k|        WORD16 *pi2_temp4,*pi2_temp5;
 3658|       |
 3659|  16.4k|        pu1_dst1 = pu1_dst + 8;
 3660|  16.4k|        pi2_temp4 = pi2_temp2 + 8;
 3661|  16.4k|        pi2_temp5 = pi2_temp3 + 8;
 3662|       |
 3663|       |        // horizontal half-pel
 3664|  16.4k|        {
 3665|  16.4k|            __m128i src_r0_16x8b, src_r1_16x8b, src_r0_sht_16x8b, src_r1_sht_16x8b;
 3666|  16.4k|            __m128i src_r0_t1_16x8b, src_r1_t1_16x8b;
 3667|       |
 3668|  16.4k|            __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b;
 3669|  16.4k|            __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b;
 3670|       |
 3671|  16.4k|            __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
 3672|       |
 3673|  16.4k|            coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01);  //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
 3674|  16.4k|            coeff2_3_16x8b = _mm_set1_epi32(0x14141414);  //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
 3675|  16.4k|            coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB);  //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5
 3676|       |
 3677|       |            //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
 3678|       |            //Row0 :                         b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
 3679|       |            //b0 is same a8. Similarly other bn pixels are same as a(n+8) pixels.
 3680|       |
 3681|  16.4k|            do
 3682|   309k|            {
 3683|   309k|                src_r0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src));                  //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
 3684|   309k|                src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8));              //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
 3685|       |
 3686|   309k|                src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1);                    //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
 3687|   309k|                src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1);                    //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
 3688|       |
 3689|   309k|                src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);   //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
 3690|   309k|                src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);   //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
 3691|       |
 3692|   309k|                res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b);   //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
 3693|       |                                                                                        //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1
 3694|   309k|                res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b);   //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
 3695|       |                                                                                        //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1
 3696|       |
 3697|   309k|                src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2);                         //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0
 3698|   309k|                src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2);                         //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0
 3699|       |
 3700|   309k|                src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2);                 //a3 a4 a5 a6 a7 a8 a9....a15 0  0  0
 3701|   309k|                src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2);                 //b3 b4 b5 b6 b7 b8 b9....b15 0  0  0
 3702|       |
 3703|   309k|                src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);    //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10
 3704|   309k|                src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);    //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10
 3705|       |
 3706|   309k|                res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b);   //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
 3707|       |                                                                                        //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3
 3708|   309k|                res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b);   //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3
 3709|       |                                                                                        //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3
 3710|       |
 3711|   309k|                src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2);                         //a4 a5 a6 a7 a8 a9....a15 0  0  0  0
 3712|   309k|                src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2);                         //b4 b5 b6 b7 b8 b9....b15 0  0  0  0
 3713|       |
 3714|   309k|                src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2);                 //a5 a6 a7 a8 a9....a15 0  0  0  0  0
 3715|   309k|                src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2);                 //b5 b6 b7 b8 b9....b15 0  0  0  0  0
 3716|       |
 3717|   309k|                src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);    //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12
 3718|   309k|                src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);    //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12
 3719|       |
 3720|   309k|                res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b);   //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
 3721|       |                                                                                        //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5
 3722|   309k|                res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b);   //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5
 3723|       |                                                                                        //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5
 3724|   309k|                res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b);
 3725|   309k|                res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b);
 3726|       |
 3727|   309k|                res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b);
 3728|   309k|                res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b);
 3729|       |
 3730|   309k|                _mm_storeu_si128((__m128i *)(pi2_temp1), res_r0_t1_8x16b);
 3731|   309k|                _mm_storeu_si128((__m128i *)(pi2_temp1 + 8), res_r1_t1_8x16b);
 3732|       |
 3733|   309k|                ht_temp--;
 3734|   309k|                pu1_src =  pu1_src + src_strd;
 3735|   309k|                pi2_temp1 =  pi2_temp1 + 16;
 3736|   309k|            }
 3737|   309k|            while(ht_temp > 0);
  ------------------
  |  Branch (3737:19): [True: 292k, False: 16.4k]
  ------------------
 3738|  16.4k|        }
 3739|       |        // vertical q-pel
 3740|  16.4k|        {
 3741|  16.4k|            __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b, src_r3_8x16b, src_r4_8x16b;
 3742|  16.4k|            __m128i src_r5_8x16b, src_r6_8x16b;
 3743|  16.4k|            __m128i src_r0r1_8x16b, src_r2r3_8x16b, src_r4r5_8x16b;
 3744|  16.4k|            __m128i src_hpel_8x16b, src_hpel_16x8b;
 3745|       |
 3746|  16.4k|            __m128i res_t0_4x32b, res_t1_4x32b, res_t2_4x32b, res_t3_4x32b;
 3747|  16.4k|            __m128i res_8x16b, res_16x8b;
 3748|       |
 3749|  16.4k|            __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b;
 3750|  16.4k|            __m128i const_val512_4x32b, const_val16_8x16b;
 3751|       |
 3752|  16.4k|            coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001);
 3753|  16.4k|            coeff2_3_8x16b = _mm_set1_epi32(0x00140014);
 3754|  16.4k|            coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB);
 3755|       |
 3756|  16.4k|            const_val512_4x32b = _mm_set1_epi32(512);
 3757|  16.4k|            const_val16_8x16b = _mm_set1_epi16(16);
 3758|       |
 3759|       |            /**********************************************************/
 3760|       |            /*     Do first height x 8 block                          */
 3761|       |            /**********************************************************/
 3762|  16.4k|            src_r0_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2));
 3763|  16.4k|            src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 16));
 3764|  16.4k|            src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 32));
 3765|  16.4k|            src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 48));
 3766|  16.4k|            src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 64));
 3767|  16.4k|            pi2_temp2 += 80;
 3768|       |
 3769|  16.4k|            ht_temp = ht;
 3770|  16.4k|            do
 3771|   113k|            {
 3772|   113k|                src_r5_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2));
 3773|   113k|                src_r6_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 16));
 3774|       |
 3775|   113k|                src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
 3776|   113k|                src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
 3777|   113k|                src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
 3778|       |
 3779|   113k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
 3780|   113k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
 3781|   113k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
 3782|       |
 3783|   113k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 3784|   113k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
 3785|   113k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 3786|   113k|                res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 3787|       |
 3788|   113k|                src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b);
 3789|   113k|                src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b);
 3790|   113k|                src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b);
 3791|       |
 3792|   113k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
 3793|   113k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
 3794|   113k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
 3795|       |
 3796|   113k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 3797|   113k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
 3798|   113k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 3799|   113k|                res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 3800|       |
 3801|   113k|                res_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b);
 3802|   113k|                res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
 3803|       |
 3804|   113k|                src_hpel_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp3));
 3805|   113k|                src_hpel_8x16b = _mm_add_epi16(src_hpel_8x16b, const_val16_8x16b);
 3806|   113k|                src_hpel_8x16b = _mm_srai_epi16(src_hpel_8x16b, 5); //shifting right by 5 bits.
 3807|   113k|                src_hpel_16x8b = _mm_packus_epi16(src_hpel_8x16b, src_hpel_8x16b);
 3808|       |
 3809|   113k|                res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b);
 3810|   113k|                _mm_storel_epi64((__m128i *)(pu1_dst), res_16x8b);
 3811|       |
 3812|   113k|                src_r0r1_8x16b = _mm_unpacklo_epi16(src_r1_8x16b, src_r2_8x16b);
 3813|   113k|                src_r2r3_8x16b = _mm_unpacklo_epi16(src_r3_8x16b, src_r4_8x16b);
 3814|   113k|                src_r4r5_8x16b = _mm_unpacklo_epi16(src_r5_8x16b, src_r6_8x16b);
 3815|       |
 3816|   113k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
 3817|   113k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
 3818|   113k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
 3819|       |
 3820|   113k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 3821|   113k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
 3822|   113k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 3823|   113k|                res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 3824|       |
 3825|   113k|                src_r0r1_8x16b = _mm_unpackhi_epi16(src_r1_8x16b, src_r2_8x16b);
 3826|   113k|                src_r2r3_8x16b = _mm_unpackhi_epi16(src_r3_8x16b, src_r4_8x16b);
 3827|   113k|                src_r4r5_8x16b = _mm_unpackhi_epi16(src_r5_8x16b, src_r6_8x16b);
 3828|       |
 3829|   113k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
 3830|   113k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
 3831|   113k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
 3832|       |
 3833|   113k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 3834|   113k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
 3835|   113k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 3836|   113k|                res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 3837|       |
 3838|   113k|                res_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b);
 3839|   113k|                res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
 3840|       |
 3841|   113k|                src_hpel_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp3 + 16));
 3842|   113k|                src_hpel_8x16b = _mm_add_epi16(src_hpel_8x16b, const_val16_8x16b);
 3843|   113k|                src_hpel_8x16b = _mm_srai_epi16(src_hpel_8x16b, 5); //shifting right by 5 bits.
 3844|   113k|                src_hpel_16x8b = _mm_packus_epi16(src_hpel_8x16b, src_hpel_8x16b);
 3845|       |
 3846|   113k|                res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b);
 3847|   113k|                _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), res_16x8b);
 3848|       |
 3849|   113k|                src_r0_8x16b = src_r2_8x16b;
 3850|   113k|                src_r1_8x16b = src_r3_8x16b;
 3851|   113k|                src_r2_8x16b = src_r4_8x16b;
 3852|   113k|                src_r3_8x16b = src_r5_8x16b;
 3853|   113k|                src_r4_8x16b = src_r6_8x16b;
 3854|       |
 3855|   113k|                ht_temp -= 2;
 3856|   113k|                pi2_temp3 = pi2_temp3 + (16 << 1);
 3857|   113k|                pi2_temp2 = pi2_temp2 + (16 << 1);
 3858|   113k|                pu1_dst = pu1_dst + (dst_strd << 1);
 3859|   113k|            }
 3860|   113k|            while(ht_temp > 0);
  ------------------
  |  Branch (3860:19): [True: 97.2k, False: 16.4k]
  ------------------
 3861|       |
 3862|       |            /**********************************************************/
 3863|       |            /*     Do second height * 8 block                         */
 3864|       |            /**********************************************************/
 3865|  16.4k|            src_r0_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp4));
 3866|  16.4k|            src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp4 + 16));
 3867|  16.4k|            src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp4 + 32));
 3868|  16.4k|            src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp4 + 48));
 3869|  16.4k|            src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp4 + 64));
 3870|  16.4k|            pi2_temp4 += 80;
 3871|       |
 3872|  16.4k|            do
 3873|   113k|            {
 3874|   113k|                src_r5_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp4));
 3875|   113k|                src_r6_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp4 + 16));
 3876|       |
 3877|   113k|                src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
 3878|   113k|                src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
 3879|   113k|                src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
 3880|       |
 3881|   113k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
 3882|   113k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
 3883|   113k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
 3884|       |
 3885|   113k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 3886|   113k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
 3887|   113k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 3888|   113k|                res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 3889|       |
 3890|   113k|                src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b);
 3891|   113k|                src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b);
 3892|   113k|                src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b);
 3893|       |
 3894|   113k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
 3895|   113k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
 3896|   113k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
 3897|       |
 3898|   113k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 3899|   113k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
 3900|   113k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 3901|   113k|                res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 3902|       |
 3903|   113k|                res_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b);
 3904|   113k|                res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
 3905|       |
 3906|   113k|                src_hpel_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp5));
 3907|   113k|                src_hpel_8x16b = _mm_add_epi16(src_hpel_8x16b, const_val16_8x16b);
 3908|   113k|                src_hpel_8x16b = _mm_srai_epi16(src_hpel_8x16b, 5); //shifting right by 5 bits.
 3909|   113k|                src_hpel_16x8b = _mm_packus_epi16(src_hpel_8x16b, src_hpel_8x16b);
 3910|       |
 3911|   113k|                res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b);
 3912|   113k|                _mm_storel_epi64((__m128i *)(pu1_dst1), res_16x8b);
 3913|       |
 3914|   113k|                src_r0r1_8x16b = _mm_unpacklo_epi16(src_r1_8x16b, src_r2_8x16b);
 3915|   113k|                src_r2r3_8x16b = _mm_unpacklo_epi16(src_r3_8x16b, src_r4_8x16b);
 3916|   113k|                src_r4r5_8x16b = _mm_unpacklo_epi16(src_r5_8x16b, src_r6_8x16b);
 3917|       |
 3918|   113k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
 3919|   113k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
 3920|   113k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
 3921|       |
 3922|   113k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 3923|   113k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
 3924|   113k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 3925|   113k|                res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 3926|       |
 3927|   113k|                src_r0r1_8x16b = _mm_unpackhi_epi16(src_r1_8x16b, src_r2_8x16b);
 3928|   113k|                src_r2r3_8x16b = _mm_unpackhi_epi16(src_r3_8x16b, src_r4_8x16b);
 3929|   113k|                src_r4r5_8x16b = _mm_unpackhi_epi16(src_r5_8x16b, src_r6_8x16b);
 3930|       |
 3931|   113k|                res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
 3932|   113k|                res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
 3933|   113k|                res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
 3934|       |
 3935|   113k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
 3936|   113k|                res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
 3937|   113k|                res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
 3938|   113k|                res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
 3939|       |
 3940|   113k|                res_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b);
 3941|   113k|                res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
 3942|       |
 3943|   113k|                src_hpel_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp5 + 16));
 3944|   113k|                src_hpel_8x16b = _mm_add_epi16(src_hpel_8x16b, const_val16_8x16b);
 3945|   113k|                src_hpel_8x16b = _mm_srai_epi16(src_hpel_8x16b, 5); //shifting right by 5 bits.
 3946|   113k|                src_hpel_16x8b = _mm_packus_epi16(src_hpel_8x16b, src_hpel_8x16b);
 3947|       |
 3948|   113k|                res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b);
 3949|   113k|                _mm_storel_epi64((__m128i *)(pu1_dst1 + dst_strd), res_16x8b);
 3950|       |
 3951|   113k|                src_r0_8x16b = src_r2_8x16b;
 3952|   113k|                src_r1_8x16b = src_r3_8x16b;
 3953|   113k|                src_r2_8x16b = src_r4_8x16b;
 3954|   113k|                src_r3_8x16b = src_r5_8x16b;
 3955|   113k|                src_r4_8x16b = src_r6_8x16b;
 3956|       |
 3957|   113k|                ht -= 2;
 3958|   113k|                pi2_temp5 = pi2_temp5 + (16 << 1);
 3959|   113k|                pi2_temp4 = pi2_temp4 + (16 << 1);
 3960|   113k|                pu1_dst1 = pu1_dst1 + (dst_strd << 1);
 3961|   113k|            }
 3962|   113k|            while(ht > 0);
  ------------------
  |  Branch (3962:19): [True: 97.2k, False: 16.4k]
  ------------------
 3963|  16.4k|        }
 3964|  16.4k|    }
 3965|  41.3k|}
ih264_inter_pred_chroma_ssse3:
 4002|  10.3M|{
 4003|  10.3M|    WORD32 i, j, A, B, C, D;
 4004|       |
 4005|  10.3M|    i = 8 - dx;
 4006|  10.3M|    j = 8 - dy;
 4007|       |
 4008|  10.3M|    A = i * j;
 4009|  10.3M|    B = dx * j;
 4010|  10.3M|    C = i * dy;
 4011|  10.3M|    D = dx * dy;
 4012|       |
 4013|  10.3M|    if(wd == 2)
  ------------------
  |  Branch (4013:8): [True: 84.0k, False: 10.2M]
  ------------------
 4014|  84.0k|    {
 4015|  84.0k|        WORD32 tmp1, tmp2, tmp3, tmp4;
 4016|       |
 4017|  84.0k|        do
 4018|   144k|        {
 4019|       |            //U
 4020|   144k|            tmp1 = A * pu1_src[0] + B * pu1_src[2] + C * pu1_src[src_strd] + D * pu1_src[src_strd + 2];
 4021|   144k|            tmp2 = A * pu1_src[2] + B * pu1_src[4] + C * pu1_src[src_strd + 2] + D * pu1_src[src_strd + 4];
 4022|       |            //V
 4023|   144k|            tmp3 = A * pu1_src[1] + B * pu1_src[3] + C * pu1_src[src_strd + 1] + D * pu1_src[src_strd + 3];
 4024|   144k|            tmp4 = A * pu1_src[3] + B * pu1_src[5] + C * pu1_src[src_strd + 3] + D * pu1_src[src_strd + 5];
 4025|       |
 4026|   144k|            tmp1 = (tmp1 + 32) >> 6;
 4027|   144k|            tmp2 = (tmp2 + 32) >> 6;
 4028|   144k|            tmp3 = (tmp3 + 32) >> 6;
 4029|   144k|            tmp4 = (tmp4 + 32) >> 6;
 4030|       |
 4031|   144k|            pu1_dst[0] = CLIP_U8(tmp1);
  ------------------
  |  |   58|   144k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   144k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 0, False: 144k]
  |  |  |  |  |  Branch (77:54): [True: 0, False: 144k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 4032|   144k|            pu1_dst[2] = CLIP_U8(tmp2);
  ------------------
  |  |   58|   144k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   144k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 0, False: 144k]
  |  |  |  |  |  Branch (77:54): [True: 0, False: 144k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 4033|   144k|            pu1_dst[1] = CLIP_U8(tmp3);
  ------------------
  |  |   58|   144k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   144k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 0, False: 144k]
  |  |  |  |  |  Branch (77:54): [True: 0, False: 144k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 4034|   144k|            pu1_dst[3] = CLIP_U8(tmp4);
  ------------------
  |  |   58|   144k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   144k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 0, False: 144k]
  |  |  |  |  |  Branch (77:54): [True: 0, False: 144k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 4035|       |
 4036|   144k|            pu1_src += src_strd;
 4037|   144k|            pu1_dst += dst_strd;
 4038|       |
 4039|   144k|            tmp1 = A * pu1_src[0] + B * pu1_src[2] + C * pu1_src[src_strd] + D * pu1_src[src_strd + 2];
 4040|   144k|            tmp2 = A * pu1_src[2] + B * pu1_src[4] + C * pu1_src[src_strd + 2] + D * pu1_src[src_strd + 4];
 4041|   144k|            tmp3 = A * pu1_src[1] + B * pu1_src[3] + C * pu1_src[src_strd + 1] + D * pu1_src[src_strd + 3];
 4042|   144k|            tmp4 = A * pu1_src[3] + B * pu1_src[5] + C * pu1_src[src_strd + 3] + D * pu1_src[src_strd + 5];
 4043|       |
 4044|   144k|            tmp1 = (tmp1 + 32) >> 6;
 4045|   144k|            tmp2 = (tmp2 + 32) >> 6;
 4046|   144k|            tmp3 = (tmp3 + 32) >> 6;
 4047|   144k|            tmp4 = (tmp4 + 32) >> 6;
 4048|       |
 4049|   144k|            pu1_dst[0] = CLIP_U8(tmp1);
  ------------------
  |  |   58|   144k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   144k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 0, False: 144k]
  |  |  |  |  |  Branch (77:54): [True: 0, False: 144k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 4050|   144k|            pu1_dst[2] = CLIP_U8(tmp2);
  ------------------
  |  |   58|   144k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   144k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 0, False: 144k]
  |  |  |  |  |  Branch (77:54): [True: 0, False: 144k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 4051|   144k|            pu1_dst[1] = CLIP_U8(tmp3);
  ------------------
  |  |   58|   144k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   144k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 0, False: 144k]
  |  |  |  |  |  Branch (77:54): [True: 0, False: 144k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 4052|   144k|            pu1_dst[3] = CLIP_U8(tmp4);
  ------------------
  |  |   58|   144k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   144k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 0, False: 144k]
  |  |  |  |  |  Branch (77:54): [True: 0, False: 144k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 4053|       |
 4054|   144k|            ht -= 2;
 4055|   144k|            pu1_src += src_strd;
 4056|   144k|            pu1_dst += dst_strd;
 4057|   144k|        }
 4058|   144k|        while(ht > 0);
  ------------------
  |  Branch (4058:15): [True: 60.8k, False: 84.0k]
  ------------------
 4059|       |
 4060|  84.0k|    }
 4061|  10.2M|    else if(wd == 4)
  ------------------
  |  Branch (4061:13): [True: 360k, False: 9.90M]
  ------------------
 4062|   360k|    {
 4063|   360k|        WORD32 AB, CD;
 4064|       |
 4065|   360k|        __m128i src_r1_16x8b, src_r2_16x8b, src_r3_16x8b;
 4066|   360k|        __m128i res1_AB_8x16b, res1_CD_8x16b, res1_8x16b, res1_16x8b;
 4067|   360k|        __m128i res2_AB_8x16b, res2_CD_8x16b, res2_8x16b, res2_16x8b;
 4068|       |
 4069|   360k|        __m128i coeffAB_16x8b, coeffCD_16x8b, round_add32_8x16b;
 4070|   360k|        __m128i const_shuff_16x8b;
 4071|       |
 4072|   360k|        AB = (B << 8) + A;
 4073|   360k|        CD = (D << 8) + C;
 4074|       |
 4075|   360k|        coeffAB_16x8b = _mm_set1_epi16(AB);
 4076|   360k|        coeffCD_16x8b = _mm_set1_epi16(CD);
 4077|       |
 4078|   360k|        round_add32_8x16b = _mm_set1_epi16(32);
 4079|       |
 4080|   360k|        const_shuff_16x8b = _mm_setr_epi32(0x03010200, 0x05030402, 0x07050604, 0x09070806);
 4081|       |
 4082|   360k|        src_r1_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
 4083|   360k|        src_r1_16x8b = _mm_shuffle_epi8(src_r1_16x8b, const_shuff_16x8b);
 4084|   360k|        pu1_src += src_strd;
 4085|       |
 4086|   360k|        do
 4087|   858k|        {
 4088|   858k|            src_r2_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
 4089|   858k|            src_r3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd));
 4090|       |
 4091|   858k|            src_r2_16x8b = _mm_shuffle_epi8(src_r2_16x8b, const_shuff_16x8b);
 4092|   858k|            src_r3_16x8b = _mm_shuffle_epi8(src_r3_16x8b, const_shuff_16x8b);
 4093|       |
 4094|   858k|            res1_AB_8x16b = _mm_maddubs_epi16(src_r1_16x8b, coeffAB_16x8b);
 4095|   858k|            res1_CD_8x16b = _mm_maddubs_epi16(src_r2_16x8b, coeffCD_16x8b);
 4096|   858k|            res2_AB_8x16b = _mm_maddubs_epi16(src_r2_16x8b, coeffAB_16x8b);
 4097|   858k|            res2_CD_8x16b = _mm_maddubs_epi16(src_r3_16x8b, coeffCD_16x8b);
 4098|       |
 4099|   858k|            res1_8x16b = _mm_add_epi16(res1_AB_8x16b, res1_CD_8x16b);
 4100|   858k|            res2_8x16b = _mm_add_epi16(res2_AB_8x16b, res2_CD_8x16b);
 4101|   858k|            res1_8x16b = _mm_add_epi16(res1_8x16b, round_add32_8x16b);
 4102|   858k|            res2_8x16b = _mm_add_epi16(res2_8x16b, round_add32_8x16b);
 4103|       |
 4104|   858k|            res1_8x16b = _mm_srai_epi16(res1_8x16b, 6);
 4105|   858k|            res2_8x16b = _mm_srai_epi16(res2_8x16b, 6);
 4106|       |
 4107|   858k|            res1_16x8b = _mm_packus_epi16(res1_8x16b, res1_8x16b);
 4108|   858k|            res2_16x8b = _mm_packus_epi16(res2_8x16b, res2_8x16b);
 4109|       |
 4110|   858k|            _mm_storel_epi64((__m128i *)pu1_dst, res1_16x8b);
 4111|   858k|            _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
 4112|       |
 4113|   858k|            src_r1_16x8b = src_r3_16x8b;
 4114|       |
 4115|   858k|            ht -= 2;
 4116|   858k|            pu1_src += src_strd << 1;
 4117|   858k|            pu1_dst += dst_strd << 1;
 4118|   858k|        }
 4119|   858k|        while(ht > 0);
  ------------------
  |  Branch (4119:15): [True: 498k, False: 360k]
  ------------------
 4120|   360k|    }
 4121|  9.90M|    else // wd == 8
 4122|  9.90M|    {
 4123|  9.90M|        WORD32 AB, CD;
 4124|       |
 4125|  9.90M|        __m128i src_r1l_16x8b, src_r2l_16x8b;
 4126|  9.90M|        __m128i src_r1h_16x8b, src_r2h_16x8b;
 4127|       |
 4128|  9.90M|        __m128i res_l_AB_8x16b, res_l_CD_8x16b;
 4129|  9.90M|        __m128i res_h_AB_8x16b, res_h_CD_8x16b;
 4130|  9.90M|        __m128i res_l_8x16b, res_h_8x16b, res_16x8b;
 4131|       |
 4132|  9.90M|        __m128i coeffAB_16x8b, coeffCD_16x8b, round_add32_8x16b;
 4133|  9.90M|        __m128i const_shuff_16x8b;
 4134|       |
 4135|  9.90M|        AB = (B << 8) + A;
 4136|  9.90M|        CD = (D << 8) + C;
 4137|       |
 4138|  9.90M|        coeffAB_16x8b = _mm_set1_epi16(AB);
 4139|  9.90M|        coeffCD_16x8b = _mm_set1_epi16(CD);
 4140|       |
 4141|  9.90M|        round_add32_8x16b = _mm_set1_epi16(32);
 4142|       |
 4143|  9.90M|        const_shuff_16x8b = _mm_setr_epi32(0x03010200, 0x05030402, 0x07050604, 0x09070806);
 4144|       |
 4145|  9.90M|        src_r1l_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
 4146|  9.90M|        src_r1h_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8));
 4147|       |
 4148|  9.90M|        src_r1l_16x8b = _mm_shuffle_epi8(src_r1l_16x8b, const_shuff_16x8b);
 4149|  9.90M|        src_r1h_16x8b = _mm_shuffle_epi8(src_r1h_16x8b, const_shuff_16x8b);
 4150|       |
 4151|  9.90M|        pu1_src += src_strd;
 4152|       |
 4153|  9.90M|        do
 4154|  19.7M|        {
 4155|       |            //row 1
 4156|  19.7M|            src_r2l_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
 4157|  19.7M|            src_r2h_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8));
 4158|       |
 4159|  19.7M|            src_r2l_16x8b = _mm_shuffle_epi8(src_r2l_16x8b, const_shuff_16x8b);
 4160|  19.7M|            src_r2h_16x8b = _mm_shuffle_epi8(src_r2h_16x8b, const_shuff_16x8b);
 4161|       |
 4162|  19.7M|            res_l_AB_8x16b = _mm_maddubs_epi16(src_r1l_16x8b, coeffAB_16x8b);
 4163|  19.7M|            res_h_AB_8x16b = _mm_maddubs_epi16(src_r1h_16x8b, coeffAB_16x8b);
 4164|  19.7M|            res_l_CD_8x16b = _mm_maddubs_epi16(src_r2l_16x8b, coeffCD_16x8b);
 4165|  19.7M|            res_h_CD_8x16b = _mm_maddubs_epi16(src_r2h_16x8b, coeffCD_16x8b);
 4166|       |
 4167|  19.7M|            res_l_8x16b = _mm_add_epi16(res_l_AB_8x16b, round_add32_8x16b);
 4168|  19.7M|            res_h_8x16b = _mm_add_epi16(res_h_AB_8x16b, round_add32_8x16b);
 4169|  19.7M|            res_l_8x16b = _mm_add_epi16(res_l_8x16b, res_l_CD_8x16b);
 4170|  19.7M|            res_h_8x16b = _mm_add_epi16(res_h_8x16b, res_h_CD_8x16b);
 4171|       |
 4172|  19.7M|            res_l_8x16b = _mm_srai_epi16(res_l_8x16b, 6);
 4173|  19.7M|            res_h_8x16b = _mm_srai_epi16(res_h_8x16b, 6);
 4174|       |
 4175|  19.7M|            res_16x8b = _mm_packus_epi16(res_l_8x16b, res_h_8x16b);
 4176|       |
 4177|  19.7M|            _mm_storeu_si128((__m128i *)pu1_dst, res_16x8b);
 4178|       |
 4179|  19.7M|            pu1_src += src_strd;
 4180|  19.7M|            pu1_dst += dst_strd;
 4181|       |
 4182|       |            //row 2
 4183|  19.7M|            src_r1l_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
 4184|  19.7M|            src_r1h_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8));
 4185|       |
 4186|  19.7M|            src_r1l_16x8b = _mm_shuffle_epi8(src_r1l_16x8b, const_shuff_16x8b);
 4187|  19.7M|            src_r1h_16x8b = _mm_shuffle_epi8(src_r1h_16x8b, const_shuff_16x8b);
 4188|       |
 4189|  19.7M|            res_l_AB_8x16b = _mm_maddubs_epi16(src_r2l_16x8b, coeffAB_16x8b);
 4190|  19.7M|            res_h_AB_8x16b = _mm_maddubs_epi16(src_r2h_16x8b, coeffAB_16x8b);
 4191|  19.7M|            res_l_CD_8x16b = _mm_maddubs_epi16(src_r1l_16x8b, coeffCD_16x8b);
 4192|  19.7M|            res_h_CD_8x16b = _mm_maddubs_epi16(src_r1h_16x8b, coeffCD_16x8b);
 4193|       |
 4194|  19.7M|            res_l_8x16b = _mm_add_epi16(res_l_AB_8x16b, round_add32_8x16b);
 4195|  19.7M|            res_h_8x16b = _mm_add_epi16(res_h_AB_8x16b, round_add32_8x16b);
 4196|  19.7M|            res_l_8x16b = _mm_add_epi16(res_l_8x16b, res_l_CD_8x16b);
 4197|  19.7M|            res_h_8x16b = _mm_add_epi16(res_h_8x16b, res_h_CD_8x16b);
 4198|       |
 4199|  19.7M|            res_l_8x16b = _mm_srai_epi16(res_l_8x16b, 6);
 4200|  19.7M|            res_h_8x16b = _mm_srai_epi16(res_h_8x16b, 6);
 4201|       |
 4202|  19.7M|            res_16x8b = _mm_packus_epi16(res_l_8x16b, res_h_8x16b);
 4203|       |
 4204|  19.7M|            _mm_storeu_si128((__m128i *)pu1_dst, res_16x8b);
 4205|       |
 4206|  19.7M|            pu1_src += src_strd;
 4207|  19.7M|            pu1_dst += dst_strd;
 4208|       |
 4209|       |            //row 3
 4210|  19.7M|            src_r2l_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
 4211|  19.7M|            src_r2h_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8));
 4212|       |
 4213|  19.7M|            src_r2l_16x8b = _mm_shuffle_epi8(src_r2l_16x8b, const_shuff_16x8b);
 4214|  19.7M|            src_r2h_16x8b = _mm_shuffle_epi8(src_r2h_16x8b, const_shuff_16x8b);
 4215|       |
 4216|  19.7M|            res_l_AB_8x16b = _mm_maddubs_epi16(src_r1l_16x8b, coeffAB_16x8b);
 4217|  19.7M|            res_h_AB_8x16b = _mm_maddubs_epi16(src_r1h_16x8b, coeffAB_16x8b);
 4218|  19.7M|            res_l_CD_8x16b = _mm_maddubs_epi16(src_r2l_16x8b, coeffCD_16x8b);
 4219|  19.7M|            res_h_CD_8x16b = _mm_maddubs_epi16(src_r2h_16x8b, coeffCD_16x8b);
 4220|       |
 4221|  19.7M|            res_l_8x16b = _mm_add_epi16(res_l_AB_8x16b, round_add32_8x16b);
 4222|  19.7M|            res_h_8x16b = _mm_add_epi16(res_h_AB_8x16b, round_add32_8x16b);
 4223|  19.7M|            res_l_8x16b = _mm_add_epi16(res_l_8x16b, res_l_CD_8x16b);
 4224|  19.7M|            res_h_8x16b = _mm_add_epi16(res_h_8x16b, res_h_CD_8x16b);
 4225|       |
 4226|  19.7M|            res_l_8x16b = _mm_srai_epi16(res_l_8x16b, 6);
 4227|  19.7M|            res_h_8x16b = _mm_srai_epi16(res_h_8x16b, 6);
 4228|       |
 4229|  19.7M|            res_16x8b = _mm_packus_epi16(res_l_8x16b, res_h_8x16b);
 4230|       |
 4231|  19.7M|            _mm_storeu_si128((__m128i *)pu1_dst, res_16x8b);
 4232|       |
 4233|  19.7M|            pu1_src += src_strd;
 4234|  19.7M|            pu1_dst += dst_strd;
 4235|       |
 4236|       |            //row 1
 4237|  19.7M|            src_r1l_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
 4238|  19.7M|            src_r1h_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8));
 4239|       |
 4240|  19.7M|            src_r1l_16x8b = _mm_shuffle_epi8(src_r1l_16x8b, const_shuff_16x8b);
 4241|  19.7M|            src_r1h_16x8b = _mm_shuffle_epi8(src_r1h_16x8b, const_shuff_16x8b);
 4242|       |
 4243|  19.7M|            res_l_AB_8x16b = _mm_maddubs_epi16(src_r2l_16x8b, coeffAB_16x8b);
 4244|  19.7M|            res_h_AB_8x16b = _mm_maddubs_epi16(src_r2h_16x8b, coeffAB_16x8b);
 4245|  19.7M|            res_l_CD_8x16b = _mm_maddubs_epi16(src_r1l_16x8b, coeffCD_16x8b);
 4246|  19.7M|            res_h_CD_8x16b = _mm_maddubs_epi16(src_r1h_16x8b, coeffCD_16x8b);
 4247|       |
 4248|  19.7M|            res_l_8x16b = _mm_add_epi16(res_l_AB_8x16b, round_add32_8x16b);
 4249|  19.7M|            res_h_8x16b = _mm_add_epi16(res_h_AB_8x16b, round_add32_8x16b);
 4250|  19.7M|            res_l_8x16b = _mm_add_epi16(res_l_8x16b, res_l_CD_8x16b);
 4251|  19.7M|            res_h_8x16b = _mm_add_epi16(res_h_8x16b, res_h_CD_8x16b);
 4252|       |
 4253|  19.7M|            res_l_8x16b = _mm_srai_epi16(res_l_8x16b, 6);
 4254|  19.7M|            res_h_8x16b = _mm_srai_epi16(res_h_8x16b, 6);
 4255|       |
 4256|  19.7M|            res_16x8b = _mm_packus_epi16(res_l_8x16b, res_h_8x16b);
 4257|       |
 4258|  19.7M|            _mm_storeu_si128((__m128i *)pu1_dst, res_16x8b);
 4259|       |
 4260|  19.7M|            ht -= 4;
 4261|  19.7M|            pu1_src += src_strd;
 4262|  19.7M|            pu1_dst += dst_strd;
 4263|  19.7M|        }
 4264|  19.7M|        while(ht > 0);
  ------------------
  |  Branch (4264:15): [True: 9.80M, False: 9.90M]
  ------------------
 4265|  9.90M|    }
 4266|  10.3M|}

ih264_iquant_itrans_recon_4x4_dc_ssse3:
  112|   210k|{
  113|   210k|    UWORD32 *pu4_out = (UWORD32 *)pu1_out;
  114|   210k|    WORD32 q0 = pi2_src[0];
  115|   210k|    WORD16 i_macro, rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
  ------------------
  |  Branch (115:32): [True: 68.7k, False: 141k]
  ------------------
  116|       |
  117|   210k|    __m128i predload_r,pred_r0, pred_r1, pred_r2, pred_r3;
  118|   210k|    __m128i sign_reg;
  119|   210k|    __m128i zero_8x16b = _mm_setzero_si128();          // all bits reset to zero
  120|   210k|    __m128i temp4, temp5, temp6, temp7;
  121|   210k|    __m128i value_add;
  122|       |
  123|   210k|    UNUSED (pi2_tmp);
  ------------------
  |  |   45|   210k|#define UNUSED(x) ((void)(x))
  ------------------
  124|       |
  125|   210k|    INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|   210k|                {\
  |  |  104|   210k|                    i4_value *= quant_scale;\
  |  |  105|   210k|                    i4_value *= weight_scale;\
  |  |  106|   210k|                    i4_value += rndfactor;\
  |  |  107|   210k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|   210k|                    i4_value >>= qbits;\
  |  |  109|   210k|                }
  ------------------
  126|       |
  127|   210k|    if (iq_start_idx != 0 )
  ------------------
  |  Branch (127:9): [True: 182k, False: 28.1k]
  ------------------
  128|   182k|        q0 = pi2_dc_ld_addr[0];     // Restoring dc value for intra case
  129|       |
  130|   210k|    i_macro = ((q0 + 32) >> 6);
  131|       |
  132|   210k|    value_add = _mm_set1_epi16(i_macro);
  133|       |
  134|   210k|    zero_8x16b = _mm_setzero_si128();                  // all bits reset to zero
  135|       |    //Load pred buffer
  136|   210k|    predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); //p00 p01 p02 p03 0 0 0 0 0 0 0 0 -- all 8 bits
  137|   210k|    pred_r0 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p00 p01 p02 p03 0 0 0 0 -- all 16 bits
  138|   210k|    predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd])); //p10 p11 p12 p13 0 0 0 0 0 0 0 0 -- all 8 bits
  139|   210k|    pred_r1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p10 p11 p12 p13 0 0 0 0 -- all 16 bits
  140|   210k|    predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[2*pred_strd])); //p20 p21 p22 p23 0 0 0 0 0 0 0 0 -- all 8 bits
  141|   210k|    pred_r2 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p20 p21 p22 p23 0 0 0 0 -- all 16 bits
  142|   210k|    predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[3*pred_strd])); //p30 p31 p32 p33 0 0 0 0 0 0 0 0 -- all 8 bits
  143|   210k|    pred_r3 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p30 p31 p32 p33 0 0 0 0 -- all 16 bits
  144|       |
  145|   210k|    pred_r0 = _mm_unpacklo_epi64(pred_r0, pred_r1); //p00 p01 p02 p03 p10 p11 p12 p13
  146|   210k|    pred_r2 = _mm_unpacklo_epi64(pred_r2, pred_r3); //p20 p21 p22p p23 p30 p31 p32 p33
  147|       |
  148|   210k|    temp4 = _mm_add_epi16(value_add, pred_r0);
  149|   210k|    temp5 = _mm_add_epi16(value_add, pred_r2);
  150|       |    /*------------------------------------------------------------------*/
  151|       |    //Clipping the results to 8 bits
  152|   210k|    sign_reg = _mm_cmpgt_epi16(temp4, zero_8x16b);                 // sign check
  153|   210k|    temp4 = _mm_and_si128(temp4, sign_reg);
  154|   210k|    sign_reg = _mm_cmpgt_epi16(temp5, zero_8x16b);                 // sign check
  155|   210k|    temp5 = _mm_and_si128(temp5, sign_reg);
  156|       |
  157|   210k|    temp4 = _mm_packus_epi16(temp4,temp5);
  158|   210k|    temp5 = _mm_srli_si128(temp4,4);
  159|   210k|    temp6 = _mm_srli_si128(temp5,4);
  160|   210k|    temp7 = _mm_srli_si128(temp6,4);
  161|       |
  162|   210k|    *pu4_out = _mm_cvtsi128_si32(temp4);
  163|   210k|    pu1_out += out_strd;
  164|   210k|    pu4_out = (UWORD32 *)(pu1_out);
  165|   210k|    *(pu4_out) = _mm_cvtsi128_si32(temp5);
  166|   210k|    pu1_out += out_strd;
  167|   210k|    pu4_out = (UWORD32 *)(pu1_out);
  168|   210k|    *(pu4_out) = _mm_cvtsi128_si32(temp6);
  169|   210k|    pu1_out += out_strd;
  170|   210k|    pu4_out = (UWORD32 *)(pu1_out);
  171|   210k|    *(pu4_out) = _mm_cvtsi128_si32(temp7);
  172|   210k|}
ih264_iquant_itrans_recon_8x8_dc_ssse3:
  238|  8.27k|{
  239|  8.27k|    WORD32 q0 = pi2_src[0];
  240|  8.27k|    WORD16 i_macro, rnd_fact = (qp_div < 6) ? 1 << (5 - qp_div) : 0;
  ------------------
  |  Branch (240:32): [True: 4.03k, False: 4.24k]
  ------------------
  241|       |
  242|  8.27k|    __m128i predload_r,pred_r0, pred_r1, pred_r2, pred_r3,pred_r4,pred_r5,pred_r6,pred_r7;
  243|  8.27k|    __m128i sign_reg;
  244|  8.27k|    __m128i zero_8x16b = _mm_setzero_si128();          // all bits reset to zero
  245|  8.27k|    __m128i temp1,temp2,temp3,temp4, temp5, temp6, temp7,temp8;
  246|  8.27k|    __m128i value_add;
  247|       |
  248|  8.27k|    UNUSED (pi2_tmp);
  ------------------
  |  |   45|  8.27k|#define UNUSED(x) ((void)(x))
  ------------------
  249|  8.27k|    UNUSED (iq_start_idx);
  ------------------
  |  |   45|  8.27k|#define UNUSED(x) ((void)(x))
  ------------------
  250|  8.27k|    UNUSED (pi2_dc_ld_addr);
  ------------------
  |  |   45|  8.27k|#define UNUSED(x) ((void)(x))
  ------------------
  251|       |
  252|  8.27k|    INV_QUANT(q0, pu2_iscale_mat[0], pu2_weigh_mat[0], qp_div, rnd_fact, 6);
  ------------------
  |  |  103|  8.27k|                {\
  |  |  104|  8.27k|                    i4_value *= quant_scale;\
  |  |  105|  8.27k|                    i4_value *= weight_scale;\
  |  |  106|  8.27k|                    i4_value += rndfactor;\
  |  |  107|  8.27k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|  8.27k|                    i4_value >>= qbits;\
  |  |  109|  8.27k|                }
  ------------------
  253|  8.27k|    i_macro = ((q0 + 32) >> 6);
  254|       |
  255|  8.27k|    value_add = _mm_set1_epi16(i_macro);
  256|       |
  257|       |    //Load pred buffer row 0
  258|  8.27k|    predload_r = _mm_loadl_epi64((__m128i *)(&pu1_pred[0])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
  259|  8.27k|    pred_r0 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
  260|       |    //Load pred buffer row 1
  261|  8.27k|    predload_r = _mm_loadl_epi64((__m128i *)(&pu1_pred[pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
  262|  8.27k|    pred_r1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
  263|       |    //Load pred buffer row 2
  264|  8.27k|    predload_r = _mm_loadl_epi64(
  265|  8.27k|                    (__m128i *)(&pu1_pred[2 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
  266|  8.27k|    pred_r2 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
  267|       |    //Load pred buffer row 3
  268|  8.27k|    predload_r = _mm_loadl_epi64(
  269|  8.27k|                    (__m128i *)(&pu1_pred[3 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
  270|  8.27k|    pred_r3 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
  271|       |    //Load pred buffer row 4
  272|  8.27k|    predload_r = _mm_loadl_epi64(
  273|  8.27k|                    (__m128i *)(&pu1_pred[4 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
  274|  8.27k|    pred_r4 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
  275|       |    //Load pred buffer row 5
  276|  8.27k|    predload_r = _mm_loadl_epi64(
  277|  8.27k|                    (__m128i *)(&pu1_pred[5 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bit
  278|  8.27k|    pred_r5 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
  279|       |    //Load pred buffer row 6
  280|  8.27k|    predload_r = _mm_loadl_epi64(
  281|  8.27k|                    (__m128i *)(&pu1_pred[6 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
  282|  8.27k|    pred_r6 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
  283|       |    //Load pred buffer row 7
  284|  8.27k|    predload_r = _mm_loadl_epi64(
  285|  8.27k|                    (__m128i *)(&pu1_pred[7 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
  286|  8.27k|    pred_r7 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
  287|       |
  288|  8.27k|    temp1 = _mm_add_epi16(value_add, pred_r0);
  289|       |
  290|  8.27k|    temp2 = _mm_add_epi16(value_add, pred_r1);
  291|       |
  292|  8.27k|    temp3 = _mm_add_epi16(value_add, pred_r2);
  293|       |
  294|  8.27k|    temp4 = _mm_add_epi16(value_add, pred_r3);
  295|       |
  296|  8.27k|    temp5 = _mm_add_epi16(value_add, pred_r4);
  297|       |
  298|  8.27k|    temp6 = _mm_add_epi16(value_add, pred_r5);
  299|       |
  300|  8.27k|    temp7 = _mm_add_epi16(value_add, pred_r6);
  301|       |
  302|  8.27k|    temp8 = _mm_add_epi16(value_add, pred_r7);
  303|       |    /*------------------------------------------------------------------*/
  304|       |    //Clipping the results to 8 bits
  305|  8.27k|    sign_reg = _mm_cmpgt_epi16(temp1, zero_8x16b); // sign check
  306|  8.27k|    temp1 = _mm_and_si128(temp1, sign_reg);
  307|  8.27k|    sign_reg = _mm_cmpgt_epi16(temp2, zero_8x16b); // sign check
  308|  8.27k|    temp2 = _mm_and_si128(temp2, sign_reg);
  309|  8.27k|    sign_reg = _mm_cmpgt_epi16(temp3, zero_8x16b); // sign check
  310|  8.27k|    temp3 = _mm_and_si128(temp3, sign_reg);
  311|  8.27k|    sign_reg = _mm_cmpgt_epi16(temp4, zero_8x16b); // sign check
  312|  8.27k|    temp4 = _mm_and_si128(temp4, sign_reg);
  313|  8.27k|    sign_reg = _mm_cmpgt_epi16(temp5, zero_8x16b); // sign check
  314|  8.27k|    temp5 = _mm_and_si128(temp5, sign_reg);
  315|  8.27k|    sign_reg = _mm_cmpgt_epi16(temp6, zero_8x16b); // sign check
  316|  8.27k|    temp6 = _mm_and_si128(temp6, sign_reg);
  317|  8.27k|    sign_reg = _mm_cmpgt_epi16(temp7, zero_8x16b); // sign check
  318|  8.27k|    temp7 = _mm_and_si128(temp7, sign_reg);
  319|  8.27k|    sign_reg = _mm_cmpgt_epi16(temp8, zero_8x16b); // sign check
  320|  8.27k|    temp8 = _mm_and_si128(temp8, sign_reg);
  321|       |
  322|  8.27k|    temp1 = _mm_packus_epi16(temp1, zero_8x16b);
  323|  8.27k|    temp2 = _mm_packus_epi16(temp2, zero_8x16b);
  324|  8.27k|    temp3 = _mm_packus_epi16(temp3, zero_8x16b);
  325|  8.27k|    temp4 = _mm_packus_epi16(temp4, zero_8x16b);
  326|  8.27k|    temp5 = _mm_packus_epi16(temp5, zero_8x16b);
  327|  8.27k|    temp6 = _mm_packus_epi16(temp6, zero_8x16b);
  328|  8.27k|    temp7 = _mm_packus_epi16(temp7, zero_8x16b);
  329|  8.27k|    temp8 = _mm_packus_epi16(temp8, zero_8x16b);
  330|       |
  331|  8.27k|    _mm_storel_epi64((__m128i *)(&pu1_out[0]), temp1);
  332|  8.27k|    _mm_storel_epi64((__m128i *)(&pu1_out[out_strd]), temp2);
  333|  8.27k|    _mm_storel_epi64((__m128i *)(&pu1_out[2 * out_strd]), temp3);
  334|  8.27k|    _mm_storel_epi64((__m128i *)(&pu1_out[3 * out_strd]), temp4);
  335|  8.27k|    _mm_storel_epi64((__m128i *)(&pu1_out[4 * out_strd]), temp5);
  336|  8.27k|    _mm_storel_epi64((__m128i *)(&pu1_out[5 * out_strd]), temp6);
  337|  8.27k|    _mm_storel_epi64((__m128i *)(&pu1_out[6 * out_strd]), temp7);
  338|  8.27k|    _mm_storel_epi64((__m128i *)(&pu1_out[7 * out_strd]), temp8);
  339|  8.27k|}
ih264_iquant_itrans_recon_chroma_4x4_dc_ssse3:
  398|   181k| {
  399|   181k|    WORD16 q0 = pi2_dc_src[0];      // DC value won't be dequantized for chroma inverse transform
  400|   181k|    WORD16 i_macro = ((q0 + 32) >> 6);
  401|       |
  402|   181k|    __m128i pred_r0, pred_r1, pred_r2, pred_r3, sign_reg;
  403|   181k|    __m128i zero_8x16b = _mm_setzero_si128();          // all bits reset to zero
  404|   181k|    __m128i chroma_mask = _mm_set1_epi16 (0xFF);
  405|   181k|    __m128i value_add = _mm_set1_epi16(i_macro);
  406|   181k|    __m128i out_r0, out_r1, out_r2, out_r3;
  407|       |
  408|   181k|    UNUSED (pi2_src);
  ------------------
  |  |   45|   181k|#define UNUSED(x) ((void)(x))
  ------------------
  409|   181k|    UNUSED (pu2_iscal_mat);
  ------------------
  |  |   45|   181k|#define UNUSED(x) ((void)(x))
  ------------------
  410|   181k|    UNUSED (pu2_weigh_mat);
  ------------------
  |  |   45|   181k|#define UNUSED(x) ((void)(x))
  ------------------
  411|   181k|    UNUSED (u4_qp_div_6);
  ------------------
  |  |   45|   181k|#define UNUSED(x) ((void)(x))
  ------------------
  412|   181k|    UNUSED (pi2_tmp);
  ------------------
  |  |   45|   181k|#define UNUSED(x) ((void)(x))
  ------------------
  413|       |
  414|       |    //Load pred buffer
  415|   181k|    pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); //p00 p01 p02 p03 0 0 0 0 0 0 0 0 -- all 8 bits
  416|   181k|    pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd])); //p10 p11 p12 p13 0 0 0 0 0 0 0 0 -- all 8 bits
  417|   181k|    pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * pred_strd])); //p20 p21 p22 p23 0 0 0 0 0 0 0 0 -- all 8 bits
  418|   181k|    pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * pred_strd])); //p30 p31 p32 p33 0 0 0 0 0 0 0 0 -- all 8 bits
  419|       |
  420|   181k|    pred_r0 = _mm_and_si128(pred_r0, chroma_mask);
  421|   181k|    pred_r1 = _mm_and_si128(pred_r1, chroma_mask);
  422|   181k|    pred_r2 = _mm_and_si128(pred_r2, chroma_mask);
  423|   181k|    pred_r3 = _mm_and_si128(pred_r3, chroma_mask);
  424|       |
  425|   181k|    pred_r0 = _mm_unpacklo_epi64(pred_r0, pred_r1); //p00 p01 p02 p03 p10 p11 p12 p13
  426|   181k|    pred_r2 = _mm_unpacklo_epi64(pred_r2, pred_r3); //p20 p21 p22p p23 p30 p31 p32 p33
  427|       |
  428|   181k|    pred_r0 = _mm_add_epi16(value_add, pred_r0);
  429|   181k|    pred_r2 = _mm_add_epi16(value_add, pred_r2);
  430|       |
  431|       |    /*------------------------------------------------------------------*/
  432|       |    //Clipping the results to 8 bits
  433|   181k|    sign_reg = _mm_cmpgt_epi16(pred_r0, zero_8x16b);        // sign check
  434|   181k|    pred_r0 = _mm_and_si128(pred_r0, sign_reg);
  435|   181k|    sign_reg = _mm_cmpgt_epi16(pred_r2, zero_8x16b);
  436|   181k|    pred_r2 = _mm_and_si128(pred_r2, sign_reg);
  437|       |
  438|   181k|    pred_r0 = _mm_packus_epi16(pred_r0, pred_r2);
  439|   181k|    pred_r1 = _mm_srli_si128(pred_r0, 4);
  440|   181k|    pred_r2 = _mm_srli_si128(pred_r1, 4);
  441|   181k|    pred_r3 = _mm_srli_si128(pred_r2, 4);
  442|       |
  443|   181k|    pred_r0 = _mm_unpacklo_epi8(pred_r0, zero_8x16b); //p00 p01 p02 p03 -- all 16 bits
  444|   181k|    pred_r1 = _mm_unpacklo_epi8(pred_r1, zero_8x16b); //p10 p11 p12 p13 -- all 16 bits
  445|   181k|    pred_r2 = _mm_unpacklo_epi8(pred_r2, zero_8x16b); //p20 p21 p22 p23 -- all 16 bits
  446|   181k|    pred_r3 = _mm_unpacklo_epi8(pred_r3, zero_8x16b); //p30 p31 p32 p33 -- all 16 bits
  447|       |
  448|   181k|    chroma_mask = _mm_set1_epi16 (0xFF00);
  449|   181k|    out_r0 = _mm_loadl_epi64((__m128i *) (&pu1_out[0]));
  450|   181k|    out_r1 = _mm_loadl_epi64((__m128i *) (&pu1_out[out_strd]));
  451|   181k|    out_r2 = _mm_loadl_epi64((__m128i *) (&pu1_out[2 * out_strd]));
  452|   181k|    out_r3 = _mm_loadl_epi64((__m128i *) (&pu1_out[3 * out_strd]));
  453|       |
  454|   181k|    out_r0 = _mm_and_si128(out_r0, chroma_mask);
  455|   181k|    out_r1 = _mm_and_si128(out_r1, chroma_mask);
  456|   181k|    out_r2 = _mm_and_si128(out_r2, chroma_mask);
  457|   181k|    out_r3 = _mm_and_si128(out_r3, chroma_mask);
  458|       |
  459|   181k|    out_r0 = _mm_add_epi8(out_r0, pred_r0);
  460|   181k|    out_r1 = _mm_add_epi8(out_r1, pred_r1);
  461|   181k|    out_r2 = _mm_add_epi8(out_r2, pred_r2);
  462|   181k|    out_r3 = _mm_add_epi8(out_r3, pred_r3);
  463|       |
  464|   181k|    _mm_storel_epi64((__m128i *)(&pu1_out[0]), out_r0);
  465|   181k|    _mm_storel_epi64((__m128i *)(&pu1_out[out_strd]), out_r1);
  466|   181k|    _mm_storel_epi64((__m128i *)(&pu1_out[2 * out_strd]), out_r2);
  467|   181k|    _mm_storel_epi64((__m128i *)(&pu1_out[3 * out_strd]), out_r3);
  468|   181k|}

ih264_iquant_itrans_recon_4x4_sse42:
  111|   124k| {
  112|   124k|    UWORD32 *pu4_out = (UWORD32 *) pu1_out;
  113|   124k|    __m128i src_r0_r1, src_r2_r3;
  114|   124k|    __m128i src_r0, src_r1, src_r2, src_r3;
  115|   124k|    __m128i scalemat_r0_r1, scalemat_r2_r3;
  116|   124k|    __m128i pred_r0, pred_r1, pred_r2, pred_r3;
  117|   124k|    __m128i sign_reg, dequant_r0_r1, dequant_r2_r3;
  118|   124k|    __m128i zero_8x16b = _mm_setzero_si128();          // all bits reset to zero
  119|   124k|    __m128i temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
  120|   124k|    __m128i resq_r0, resq_r1, resq_r2, resq_r3;
  121|   124k|    __m128i add_rshift = _mm_set1_epi32((u4_qp_div_6 < 4) ? (1 << (3 - u4_qp_div_6)) : 0);
  ------------------
  |  Branch (121:41): [True: 21.3k, False: 103k]
  ------------------
  122|   124k|    __m128i value_32 = _mm_set1_epi32(32);
  123|   124k|    UNUSED (pi2_tmp);
  ------------------
  |  |   45|   124k|#define UNUSED(x) ((void)(x))
  ------------------
  124|       |
  125|       |    /*************************************************************/
  126|       |    /* Dequantization of coefficients. Will be replaced by SIMD  */
  127|       |    /* operations on platform                                    */
  128|       |    /*************************************************************/
  129|   124k|    src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src)); //a00 a01 a02 a03 a10 a11 a12 a13 -- the source matrix 0th,1st row
  130|   124k|    src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); //a20 a21 a22 a23 a30 a31 a32 a33 -- the source matrix 2nd,3rd row
  131|   124k|    scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat)); //b00 b01 b02 b03 b10 b11 b12 b13 -- the scaling matrix 0th,1st row
  132|   124k|    scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 8)); //b20 b21 b22 b23 b30 b31 b32 b33 -- the scaling matrix 2nd,3rd row
  133|   124k|    dequant_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat)); //q00 q01 q02 q03 q10 q11 q12 q13 -- all 16 bits
  134|   124k|    dequant_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat + 8)); //q20 q21 q22 q23 q30 q31 q32 q33 -- all 16 bits
  135|       |
  136|   124k|    temp0 = _mm_mullo_epi16(scalemat_r0_r1, dequant_r0_r1); //b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 b12*q12 b13*q13 -- 16 bit result
  137|   124k|    temp1 = _mm_mullo_epi16(scalemat_r2_r3, dequant_r2_r3); //b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 b12*q12 b13*q13 -- 16 bit result
  138|       |
  139|   124k|    temp4 = _mm_unpacklo_epi16(temp0, zero_8x16b); // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long
  140|   124k|    temp5 = _mm_unpackhi_epi16(temp0, zero_8x16b); // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long
  141|   124k|    temp6 = _mm_unpacklo_epi16(temp1, zero_8x16b); // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long
  142|   124k|    temp7 = _mm_unpackhi_epi16(temp1, zero_8x16b); // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long
  143|       |
  144|   124k|    src_r0 = _mm_unpacklo_epi16(src_r0_r1, zero_8x16b); // a00 0 a01 0 a02 0 a03 0 -- 16 bit long
  145|   124k|    src_r1 = _mm_unpackhi_epi16(src_r0_r1, zero_8x16b); // a10 0 a11 0 a12 0 a13 0 -- 16 bit long
  146|   124k|    src_r2 = _mm_unpacklo_epi16(src_r2_r3, zero_8x16b); // a20 0 a21 0 a22 0 a23 0 -- 16 bit long
  147|   124k|    src_r3 = _mm_unpackhi_epi16(src_r2_r3, zero_8x16b); // a30 0 a31 0 a32 0 a33 0 -- 16 bit long
  148|       |
  149|   124k|    temp4 = _mm_madd_epi16(src_r0, temp4); //a00*b00*q00 a10*b10*q10 a20*b20*q20 a30*b30 q30 -- 32 bits long
  150|   124k|    temp5 = _mm_madd_epi16(src_r1, temp5);
  151|   124k|    temp6 = _mm_madd_epi16(src_r2, temp6);
  152|   124k|    temp7 = _mm_madd_epi16(src_r3, temp7);
  153|       |
  154|   124k|    if (u4_qp_div_6 >= 4) {
  ------------------
  |  Branch (154:9): [True: 103k, False: 21.3k]
  ------------------
  155|   103k|        resq_r0 = _mm_slli_epi32(temp4, u4_qp_div_6 - 4);
  156|   103k|        resq_r1 = _mm_slli_epi32(temp5, u4_qp_div_6 - 4);
  157|   103k|        resq_r2 = _mm_slli_epi32(temp6, u4_qp_div_6 - 4);
  158|   103k|        resq_r3 = _mm_slli_epi32(temp7, u4_qp_div_6 - 4);
  159|   103k|    } else {
  160|  21.3k|        temp4 = _mm_add_epi32(temp4, add_rshift);
  161|  21.3k|        temp5 = _mm_add_epi32(temp5, add_rshift);
  162|  21.3k|        temp6 = _mm_add_epi32(temp6, add_rshift);
  163|  21.3k|        temp7 = _mm_add_epi32(temp7, add_rshift);
  164|  21.3k|        resq_r0 = _mm_srai_epi32(temp4, 4 - u4_qp_div_6);
  165|  21.3k|        resq_r1 = _mm_srai_epi32(temp5, 4 - u4_qp_div_6);
  166|  21.3k|        resq_r2 = _mm_srai_epi32(temp6, 4 - u4_qp_div_6);
  167|  21.3k|        resq_r3 = _mm_srai_epi32(temp7, 4 - u4_qp_div_6);
  168|  21.3k|    }
  169|       |
  170|   124k|    if (iq_start_idx == 1)
  ------------------
  |  Branch (170:9): [True: 3.89k, False: 120k]
  ------------------
  171|  3.89k|        resq_r0 = _mm_insert_epi32(resq_r0,(WORD32)pi2_dc_ld_addr[0],0);
  172|       |    /* Perform Inverse transform */
  173|       |    /*-------------------------------------------------------------*/
  174|       |    /* IDCT [ Horizontal transformation ]                          */
  175|       |    /*-------------------------------------------------------------*/
  176|       |    // Matrix transpose
  177|       |    /*
  178|       |     *  a0 a1 a2 a3
  179|       |     *  b0 b1 b2 b3
  180|       |     *  c0 c1 c2 c3
  181|       |     *  d0 d1 d2 d3
  182|       |     */
  183|   124k|    temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1);                  //a0 b0 a1 b1
  184|   124k|    temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3);                  //c0 d0 c1 d1
  185|   124k|    temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1);                  //a2 b2 a3 b3
  186|   124k|    temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3);                  //c2 d2 c3 d3
  187|   124k|    resq_r0 = _mm_unpacklo_epi64(temp1, temp3);                    //a0 b0 c0 d0
  188|   124k|    resq_r1 = _mm_unpackhi_epi64(temp1, temp3);                    //a1 b1 c1 d1
  189|   124k|    resq_r2 = _mm_unpacklo_epi64(temp2, temp4);                    //a2 b2 c2 d2
  190|   124k|    resq_r3 = _mm_unpackhi_epi64(temp2, temp4);                    //a3 b3 c3 d3
  191|       |    //Transform starts -- horizontal transform
  192|       |    /*------------------------------------------------------------------*/
  193|       |    /* z0 = w0 + w2                                             */
  194|   124k|    temp0 = _mm_add_epi32(resq_r0, resq_r2);
  195|       |    /* z1 = w0 - w2                                             */
  196|   124k|    temp1 = _mm_sub_epi32(resq_r0, resq_r2);
  197|       |    /* z2 = (w1 >> 1) - w3                                      */
  198|   124k|    temp2 = _mm_srai_epi32(resq_r1, 1);                         //(w1>>1)
  199|   124k|    temp2 = _mm_sub_epi32(temp2, resq_r3);                      //(w1>>1) - w3
  200|       |    /* z3 = w1 + (w3 >> 1)                                      */
  201|   124k|    temp3 = _mm_srai_epi32(resq_r3, 1);                         //(w3>>1) + w1
  202|   124k|    temp3 = _mm_add_epi32(temp3, resq_r1);
  203|       |    /*----------------------------------------------------------*/
  204|       |    /* x0 = z0 + z3                                             */
  205|   124k|    resq_r0 = _mm_add_epi32(temp0, temp3);
  206|       |    /* x1 = z1 + z2                                             */
  207|   124k|    resq_r1 = _mm_add_epi32(temp1, temp2);
  208|       |    /* x2 = z1 - z2                                             */
  209|   124k|    resq_r2 = _mm_sub_epi32(temp1, temp2);
  210|       |    /* x3 = z0 - z3                                             */
  211|   124k|    resq_r3 = _mm_sub_epi32(temp0, temp3);
  212|       |    // Matrix transpose
  213|       |    /*
  214|       |     *  a0 b0 c0 d0
  215|       |     *  a1 b1 c1 d1
  216|       |     *  a2 b2 c2 d2
  217|       |     *  a3 b3 c3 d3
  218|       |     */
  219|   124k|    temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1);                  //a0 a1 b0 b1
  220|   124k|    temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3);                  //a2 a3 b2 b3
  221|   124k|    temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1);                  //c0 c1 d0 d1
  222|   124k|    temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3);                  //c2 c3 d2 d3
  223|   124k|    resq_r0 = _mm_unpacklo_epi64(temp1, temp3);                    //a0 a1 a2 a3
  224|   124k|    resq_r1 = _mm_unpackhi_epi64(temp1, temp3);                    //b0 b1 b2 b3
  225|   124k|    resq_r2 = _mm_unpacklo_epi64(temp2, temp4);                    //c0 c1 c2 c3
  226|   124k|    resq_r3 = _mm_unpackhi_epi64(temp2, temp4);                    //d0 d1 d2 d3
  227|       |    //Transform ends -- horizontal transform
  228|       |
  229|       |    //Load pred buffer
  230|   124k|    pred_r0 = loadu_32(&pu1_pred[0]); //p00 p01 p02 p03 -- all 8 bits
  231|   124k|    pred_r1 = loadu_32(&pu1_pred[pred_strd]); //p10 p11 p12 p13 -- all 8 bits
  232|   124k|    pred_r2 = loadu_32(&pu1_pred[2 * pred_strd]); //p20 p21 p22 p23 -- all 8 bits
  233|   124k|    pred_r3 = loadu_32(&pu1_pred[3 * pred_strd]); //p30 p31 p32 p33 -- all 8 bits
  234|       |
  235|   124k|    pred_r0 = _mm_cvtepu8_epi32(pred_r0); //p00 p01 p02 p03 -- all 32 bits
  236|   124k|    pred_r1 = _mm_cvtepu8_epi32(pred_r1); //p10 p11 p12 p13 -- all 32 bits
  237|   124k|    pred_r2 = _mm_cvtepu8_epi32(pred_r2); //p20 p21 p22 p23 -- all 32 bits
  238|   124k|    pred_r3 = _mm_cvtepu8_epi32(pred_r3); //p30 p31 p32 p33 -- all 32 bits
  239|       |
  240|       |    /*--------------------------------------------------------------*/
  241|       |    /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6      */
  242|       |    /*                                                              */
  243|       |    /* Add the prediction and store it back to same buffer          */
  244|       |    /*--------------------------------------------------------------*/
  245|       |    /* z0j = y0j + y2j                                                        */
  246|   124k|    temp0 = _mm_add_epi32(resq_r0, resq_r2);
  247|       |    /* z1j = y0j - y2j                                                        */
  248|   124k|    temp1 = _mm_sub_epi32(resq_r0, resq_r2);
  249|       |    /* z2j = (y1j>>1) - y3j                                                        */
  250|   124k|    temp2 = _mm_srai_epi32(resq_r1, 1);                             //(y1j>>1)
  251|   124k|    temp2 = _mm_sub_epi32(temp2, resq_r3);
  252|       |    /* z3j = y1j + (y3j>>1)                                                        */
  253|   124k|    temp3 = _mm_srai_epi32(resq_r3, 1);                             //(y3j>>1)
  254|   124k|    temp3 = _mm_add_epi32(temp3, resq_r1);
  255|       |
  256|       |    /* x0j = z0j + z3j                                                        */
  257|   124k|    temp4 = _mm_add_epi32(temp0, temp3);
  258|   124k|    temp4 = _mm_add_epi32(temp4, value_32);
  259|   124k|    temp4 = _mm_srai_epi32(temp4, 6);
  260|   124k|    temp4 = _mm_add_epi32(temp4, pred_r0);
  261|       |    /* x1j = z1j + z2j                                                        */
  262|   124k|    temp5 = _mm_add_epi32(temp1, temp2);
  263|   124k|    temp5 = _mm_add_epi32(temp5, value_32);
  264|   124k|    temp5 = _mm_srai_epi32(temp5, 6);
  265|   124k|    temp5 = _mm_add_epi32(temp5, pred_r1);
  266|       |    /* x2j = z1j - z2j                                                        */
  267|   124k|    temp6 = _mm_sub_epi32(temp1, temp2);
  268|   124k|    temp6 = _mm_add_epi32(temp6, value_32);
  269|   124k|    temp6 = _mm_srai_epi32(temp6, 6);
  270|   124k|    temp6 = _mm_add_epi32(temp6, pred_r2);
  271|       |    /* x3j = z0j - z3j                                                        */
  272|   124k|    temp7 = _mm_sub_epi32(temp0, temp3);
  273|   124k|    temp7 = _mm_add_epi32(temp7, value_32);
  274|   124k|    temp7 = _mm_srai_epi32(temp7, 6);
  275|   124k|    temp7 = _mm_add_epi32(temp7, pred_r3);
  276|       |
  277|       |    // 32-bit to 16-bit conversion
  278|   124k|    temp0 = _mm_packs_epi32(temp4, temp5);
  279|   124k|    temp1 = _mm_packs_epi32(temp6, temp7);
  280|       |    /*------------------------------------------------------------------*/
  281|       |    //Clipping the results to 8 bits
  282|   124k|    sign_reg = _mm_cmpgt_epi16(temp0, zero_8x16b);      // sign check
  283|   124k|    temp0 = _mm_and_si128(temp0, sign_reg);
  284|   124k|    sign_reg = _mm_cmpgt_epi16(temp1, zero_8x16b);
  285|   124k|    temp1 = _mm_and_si128(temp1, sign_reg);
  286|       |
  287|   124k|    resq_r0 = _mm_packus_epi16(temp0, temp1);
  288|   124k|    resq_r1 = _mm_srli_si128(resq_r0, 4);
  289|   124k|    resq_r2 = _mm_srli_si128(resq_r1, 4);
  290|   124k|    resq_r3 = _mm_srli_si128(resq_r2, 4);
  291|       |
  292|   124k|    *pu4_out = _mm_cvtsi128_si32(resq_r0);
  293|   124k|    pu1_out += out_strd;
  294|   124k|    pu4_out = (UWORD32 *) (pu1_out);
  295|   124k|    *(pu4_out) = _mm_cvtsi128_si32(resq_r1);
  296|   124k|    pu1_out += out_strd;
  297|   124k|    pu4_out = (UWORD32 *) (pu1_out);
  298|   124k|    *(pu4_out) = _mm_cvtsi128_si32(resq_r2);
  299|   124k|    pu1_out += out_strd;
  300|   124k|    pu4_out = (UWORD32 *) (pu1_out);
  301|   124k|    *(pu4_out) = _mm_cvtsi128_si32(resq_r3);
  302|   124k|}
ih264_iquant_itrans_recon_chroma_4x4_sse42:
  361|  18.1k| {
  362|  18.1k|    __m128i src_r0_r1, src_r2_r3;
  363|  18.1k|    __m128i src_r0, src_r1, src_r2, src_r3;
  364|  18.1k|    __m128i scalemat_r0_r1, scalemat_r2_r3;
  365|  18.1k|    __m128i pred_r0, pred_r1, pred_r2, pred_r3;
  366|  18.1k|    __m128i sign_reg, dequant_r0_r1, dequant_r2_r3;
  367|  18.1k|    __m128i zero_8x16b = _mm_setzero_si128();          // all bits reset to zero
  368|  18.1k|    __m128i temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
  369|  18.1k|    __m128i resq_r0, resq_r1, resq_r2, resq_r3;
  370|  18.1k|    __m128i add_rshift = _mm_set1_epi32((u4_qp_div_6 < 4) ? (1 << (3 - u4_qp_div_6)) : 0);
  ------------------
  |  Branch (370:41): [True: 7.46k, False: 10.7k]
  ------------------
  371|  18.1k|    __m128i value_32 = _mm_set1_epi32(32);
  372|  18.1k|    __m128i chroma_mask = _mm_set1_epi16 (0xFF);
  373|  18.1k|    __m128i out_r0, out_r1, out_r2, out_r3;
  374|  18.1k|    UNUSED (pi2_tmp);
  ------------------
  |  |   45|  18.1k|#define UNUSED(x) ((void)(x))
  ------------------
  375|       |
  376|       |    /*************************************************************/
  377|       |    /* Dequantization of coefficients. Will be replaced by SIMD  */
  378|       |    /* operations on platform                                    */
  379|       |    /*************************************************************/
  380|  18.1k|    src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src)); //a00 a01 a02 a03 a10 a11 a12 a13 -- the source matrix 0th,1st row
  381|  18.1k|    src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); //a20 a21 a22 a23 a30 a31 a32 a33 -- the source matrix 2nd,3rd row
  382|  18.1k|    scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat)); //b00 b01 b02 b03 b10 b11 b12 b13 -- the scaling matrix 0th,1st row
  383|  18.1k|    scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 8)); //b20 b21 b22 b23 b30 b31 b32 b33 -- the scaling matrix 2nd,3rd row
  384|  18.1k|    dequant_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat)); //q00 q01 q02 q03 q10 q11 q12 q13 -- all 16 bits
  385|  18.1k|    dequant_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat + 8)); //q20 q21 q22 q23 q30 q31 q32 q33 -- all 16 bits
  386|       |
  387|  18.1k|    temp0 = _mm_mullo_epi16(scalemat_r0_r1, dequant_r0_r1); //b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 b12*q12 b13*q13 -- 16 bit result
  388|  18.1k|    temp1 = _mm_mullo_epi16(scalemat_r2_r3, dequant_r2_r3); //b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 b12*q12 b13*q13 -- 16 bit result
  389|       |
  390|  18.1k|    temp4 = _mm_unpacklo_epi16(temp0, zero_8x16b); // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long
  391|  18.1k|    temp5 = _mm_unpackhi_epi16(temp0, zero_8x16b); // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long
  392|  18.1k|    temp6 = _mm_unpacklo_epi16(temp1, zero_8x16b); // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long
  393|  18.1k|    temp7 = _mm_unpackhi_epi16(temp1, zero_8x16b); // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long
  394|       |
  395|  18.1k|    src_r0 = _mm_unpacklo_epi16(src_r0_r1, zero_8x16b); // a00 0 a01 0 a02 0 a03 0 -- 16 bit long
  396|  18.1k|    src_r1 = _mm_unpackhi_epi16(src_r0_r1, zero_8x16b); // a10 0 a11 0 a12 0 a13 0 -- 16 bit long
  397|  18.1k|    src_r2 = _mm_unpacklo_epi16(src_r2_r3, zero_8x16b); // a20 0 a21 0 a22 0 a23 0 -- 16 bit long
  398|  18.1k|    src_r3 = _mm_unpackhi_epi16(src_r2_r3, zero_8x16b); // a30 0 a31 0 a32 0 a33 0 -- 16 bit long
  399|       |
  400|  18.1k|    temp4 = _mm_madd_epi16(src_r0, temp4); //a00*b00*q00 a10*b10*q10 a20*b20*q20 a30*b30 q30 -- 32 bits long
  401|  18.1k|    temp5 = _mm_madd_epi16(src_r1, temp5);
  402|  18.1k|    temp6 = _mm_madd_epi16(src_r2, temp6);
  403|  18.1k|    temp7 = _mm_madd_epi16(src_r3, temp7);
  404|       |
  405|  18.1k|    if (u4_qp_div_6 >= 4) {
  ------------------
  |  Branch (405:9): [True: 10.7k, False: 7.46k]
  ------------------
  406|  10.7k|        resq_r0 = _mm_slli_epi32(temp4, u4_qp_div_6 - 4);
  407|  10.7k|        resq_r1 = _mm_slli_epi32(temp5, u4_qp_div_6 - 4);
  408|  10.7k|        resq_r2 = _mm_slli_epi32(temp6, u4_qp_div_6 - 4);
  409|  10.7k|        resq_r3 = _mm_slli_epi32(temp7, u4_qp_div_6 - 4);
  410|  10.7k|    } else {
  411|  7.46k|        temp4 = _mm_add_epi32(temp4, add_rshift);
  412|  7.46k|        temp5 = _mm_add_epi32(temp5, add_rshift);
  413|  7.46k|        temp6 = _mm_add_epi32(temp6, add_rshift);
  414|  7.46k|        temp7 = _mm_add_epi32(temp7, add_rshift);
  415|  7.46k|        resq_r0 = _mm_srai_epi32(temp4, 4 - u4_qp_div_6);
  416|  7.46k|        resq_r1 = _mm_srai_epi32(temp5, 4 - u4_qp_div_6);
  417|  7.46k|        resq_r2 = _mm_srai_epi32(temp6, 4 - u4_qp_div_6);
  418|  7.46k|        resq_r3 = _mm_srai_epi32(temp7, 4 - u4_qp_div_6);
  419|  7.46k|    }
  420|       |
  421|  18.1k|    resq_r0 = _mm_insert_epi32(resq_r0,(WORD32)pi2_dc_ld_addr[0],0);
  422|       |    /* Perform Inverse transform */
  423|       |    /*-------------------------------------------------------------*/
  424|       |    /* IDCT [ Horizontal transformation ]                          */
  425|       |    /*-------------------------------------------------------------*/
  426|       |    // Matrix transpose
  427|       |    /*
  428|       |     *  a0 a1 a2 a3
  429|       |     *  b0 b1 b2 b3
  430|       |     *  c0 c1 c2 c3
  431|       |     *  d0 d1 d2 d3
  432|       |     */
  433|  18.1k|    temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1);                  //a0 b0 a1 b1
  434|  18.1k|    temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3);                  //c0 d0 c1 d1
  435|  18.1k|    temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1);                  //a2 b2 a3 b3
  436|  18.1k|    temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3);                  //c2 d2 c3 d3
  437|  18.1k|    resq_r0 = _mm_unpacklo_epi64(temp1, temp3);                    //a0 b0 c0 d0
  438|  18.1k|    resq_r1 = _mm_unpackhi_epi64(temp1, temp3);                    //a1 b1 c1 d1
  439|  18.1k|    resq_r2 = _mm_unpacklo_epi64(temp2, temp4);                    //a2 b2 c2 d2
  440|  18.1k|    resq_r3 = _mm_unpackhi_epi64(temp2, temp4);                    //a3 b3 c3 d3
  441|       |    //Transform starts -- horizontal transform
  442|       |    /*------------------------------------------------------------------*/
  443|       |    /* z0 = w0 + w2                                             */
  444|  18.1k|    temp0 = _mm_add_epi32(resq_r0, resq_r2);
  445|       |    /* z1 = w0 - w2                                             */
  446|  18.1k|    temp1 = _mm_sub_epi32(resq_r0, resq_r2);
  447|       |    /* z2 = (w1 >> 1) - w3                                      */
  448|  18.1k|    temp2 = _mm_srai_epi32(resq_r1, 1);                         //(w1>>1)
  449|  18.1k|    temp2 = _mm_sub_epi32(temp2, resq_r3);                      //(w1>>1) - w3
  450|       |    /* z3 = w1 + (w3 >> 1)                                      */
  451|  18.1k|    temp3 = _mm_srai_epi32(resq_r3, 1);                         //(w3>>1) + w1
  452|  18.1k|    temp3 = _mm_add_epi32(temp3, resq_r1);
  453|       |    /*----------------------------------------------------------*/
  454|       |    /* x0 = z0 + z3                                             */
  455|  18.1k|    resq_r0 = _mm_add_epi32(temp0, temp3);
  456|       |    /* x1 = z1 + z2                                             */
  457|  18.1k|    resq_r1 = _mm_add_epi32(temp1, temp2);
  458|       |    /* x2 = z1 - z2                                             */
  459|  18.1k|    resq_r2 = _mm_sub_epi32(temp1, temp2);
  460|       |    /* x3 = z0 - z3                                             */
  461|  18.1k|    resq_r3 = _mm_sub_epi32(temp0, temp3);
  462|       |    // Matrix transpose
  463|       |    /*
  464|       |     *  a0 b0 c0 d0
  465|       |     *  a1 b1 c1 d1
  466|       |     *  a2 b2 c2 d2
  467|       |     *  a3 b3 c3 d3
  468|       |     */
  469|  18.1k|    temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1);                  //a0 a1 b0 b1
  470|  18.1k|    temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3);                  //a2 a3 b2 b3
  471|  18.1k|    temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1);                  //c0 c1 d0 d1
  472|  18.1k|    temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3);                  //c2 c3 d2 d3
  473|  18.1k|    resq_r0 = _mm_unpacklo_epi64(temp1, temp3);                    //a0 a1 a2 a3
  474|  18.1k|    resq_r1 = _mm_unpackhi_epi64(temp1, temp3);                    //b0 b1 b2 b3
  475|  18.1k|    resq_r2 = _mm_unpacklo_epi64(temp2, temp4);                    //c0 c1 c2 c3
  476|  18.1k|    resq_r3 = _mm_unpackhi_epi64(temp2, temp4);                    //d0 d1 d2 d3
  477|       |    //Transform ends -- horizontal transform
  478|       |
  479|       |    //Load pred buffer
  480|  18.1k|    pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); //p00 p01 p02 p03 0 0 0 0 0 0 0 0 -- all 8 bits
  481|  18.1k|    pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd])); //p10 p11 p12 p13 0 0 0 0 0 0 0 0 -- all 8 bits
  482|  18.1k|    pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * pred_strd])); //p20 p21 p22 p23 0 0 0 0 0 0 0 0 -- all 8 bits
  483|  18.1k|    pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * pred_strd])); //p30 p31 p32 p33 0 0 0 0 0 0 0 0 -- all 8 bits
  484|       |
  485|  18.1k|    pred_r0 = _mm_and_si128(pred_r0, chroma_mask);
  486|  18.1k|    pred_r1 = _mm_and_si128(pred_r1, chroma_mask);
  487|  18.1k|    pred_r2 = _mm_and_si128(pred_r2, chroma_mask);
  488|  18.1k|    pred_r3 = _mm_and_si128(pred_r3, chroma_mask);
  489|       |
  490|  18.1k|    pred_r0 = _mm_cvtepu16_epi32(pred_r0); //p00 p01 p02 p03 -- all 32 bits
  491|  18.1k|    pred_r1 = _mm_cvtepu16_epi32(pred_r1); //p10 p11 p12 p13 -- all 32 bits
  492|  18.1k|    pred_r2 = _mm_cvtepu16_epi32(pred_r2); //p20 p21 p22 p23 -- all 32 bits
  493|  18.1k|    pred_r3 = _mm_cvtepu16_epi32(pred_r3); //p30 p31 p32 p33 -- all 32 bits
  494|       |
  495|       |    /*--------------------------------------------------------------*/
  496|       |    /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6      */
  497|       |    /*                                                              */
  498|       |    /* Add the prediction and store it back to same buffer          */
  499|       |    /*--------------------------------------------------------------*/
  500|       |    /* z0j = y0j + y2j                                                        */
  501|  18.1k|    temp0 = _mm_add_epi32(resq_r0, resq_r2);
  502|       |    /* z1j = y0j - y2j                                                        */
  503|  18.1k|    temp1 = _mm_sub_epi32(resq_r0, resq_r2);
  504|       |    /* z2j = (y1j>>1) - y3j                                                        */
  505|  18.1k|    temp2 = _mm_srai_epi32(resq_r1, 1);                             //(y1j>>1)
  506|  18.1k|    temp2 = _mm_sub_epi32(temp2, resq_r3);
  507|       |    /* z3j = y1j + (y3j>>1)                                                        */
  508|  18.1k|    temp3 = _mm_srai_epi32(resq_r3, 1);                             //(y3j>>1)
  509|  18.1k|    temp3 = _mm_add_epi32(temp3, resq_r1);
  510|       |
  511|       |    /* x0j = z0j + z3j                                                        */
  512|  18.1k|    temp4 = _mm_add_epi32(temp0, temp3);
  513|  18.1k|    temp4 = _mm_add_epi32(temp4, value_32);
  514|  18.1k|    temp4 = _mm_srai_epi32(temp4, 6);
  515|  18.1k|    temp4 = _mm_add_epi32(temp4, pred_r0);
  516|       |    /* x1j = z1j + z2j                                                        */
  517|  18.1k|    temp5 = _mm_add_epi32(temp1, temp2);
  518|  18.1k|    temp5 = _mm_add_epi32(temp5, value_32);
  519|  18.1k|    temp5 = _mm_srai_epi32(temp5, 6);
  520|  18.1k|    temp5 = _mm_add_epi32(temp5, pred_r1);
  521|       |    /* x2j = z1j - z2j                                                        */
  522|  18.1k|    temp6 = _mm_sub_epi32(temp1, temp2);
  523|  18.1k|    temp6 = _mm_add_epi32(temp6, value_32);
  524|  18.1k|    temp6 = _mm_srai_epi32(temp6, 6);
  525|  18.1k|    temp6 = _mm_add_epi32(temp6, pred_r2);
  526|       |    /* x3j = z0j - z3j                                                        */
  527|  18.1k|    temp7 = _mm_sub_epi32(temp0, temp3);
  528|  18.1k|    temp7 = _mm_add_epi32(temp7, value_32);
  529|  18.1k|    temp7 = _mm_srai_epi32(temp7, 6);
  530|  18.1k|    temp7 = _mm_add_epi32(temp7, pred_r3);
  531|       |
  532|       |    // 32-bit to 16-bit conversion
  533|  18.1k|    temp0 = _mm_packs_epi32(temp4, temp5);
  534|  18.1k|    temp1 = _mm_packs_epi32(temp6, temp7);
  535|       |    /*------------------------------------------------------------------*/
  536|       |    //Clipping the results to 8 bits
  537|  18.1k|    sign_reg = _mm_cmpgt_epi16(temp0, zero_8x16b);      // sign check
  538|  18.1k|    temp0 = _mm_and_si128(temp0, sign_reg);
  539|  18.1k|    sign_reg = _mm_cmpgt_epi16(temp1, zero_8x16b);
  540|  18.1k|    temp1 = _mm_and_si128(temp1, sign_reg);
  541|       |
  542|  18.1k|    resq_r0 = _mm_packus_epi16(temp0, temp1);
  543|  18.1k|    resq_r1 = _mm_srli_si128(resq_r0, 4);
  544|  18.1k|    resq_r2 = _mm_srli_si128(resq_r1, 4);
  545|  18.1k|    resq_r3 = _mm_srli_si128(resq_r2, 4);
  546|       |
  547|  18.1k|    resq_r0 = _mm_cvtepu8_epi16(resq_r0); //p00 p01 p02 p03 -- all 16 bits
  548|  18.1k|    resq_r1 = _mm_cvtepu8_epi16(resq_r1); //p10 p11 p12 p13 -- all 16 bits
  549|  18.1k|    resq_r2 = _mm_cvtepu8_epi16(resq_r2); //p20 p21 p22 p23 -- all 16 bits
  550|  18.1k|    resq_r3 = _mm_cvtepu8_epi16(resq_r3); //p30 p31 p32 p33 -- all 16 bits
  551|       |
  552|  18.1k|    chroma_mask = _mm_set1_epi16 (0xFF00);
  553|  18.1k|    out_r0 = _mm_loadl_epi64((__m128i *) (&pu1_out[0]));
  554|  18.1k|    out_r1 = _mm_loadl_epi64((__m128i *) (&pu1_out[out_strd]));
  555|  18.1k|    out_r2 = _mm_loadl_epi64((__m128i *) (&pu1_out[2 * out_strd]));
  556|  18.1k|    out_r3 = _mm_loadl_epi64((__m128i *) (&pu1_out[3 * out_strd]));
  557|       |
  558|  18.1k|    out_r0 = _mm_and_si128(out_r0, chroma_mask);
  559|  18.1k|    out_r1 = _mm_and_si128(out_r1, chroma_mask);
  560|  18.1k|    out_r2 = _mm_and_si128(out_r2, chroma_mask);
  561|  18.1k|    out_r3 = _mm_and_si128(out_r3, chroma_mask);
  562|       |
  563|  18.1k|    out_r0 = _mm_add_epi8(out_r0, resq_r0);
  564|  18.1k|    out_r1 = _mm_add_epi8(out_r1, resq_r1);
  565|  18.1k|    out_r2 = _mm_add_epi8(out_r2, resq_r2);
  566|  18.1k|    out_r3 = _mm_add_epi8(out_r3, resq_r3);
  567|       |
  568|  18.1k|    _mm_storel_epi64((__m128i *)(&pu1_out[0]), out_r0);
  569|  18.1k|    _mm_storel_epi64((__m128i *)(&pu1_out[out_strd]), out_r1);
  570|  18.1k|    _mm_storel_epi64((__m128i *)(&pu1_out[2 * out_strd]), out_r2);
  571|  18.1k|    _mm_storel_epi64((__m128i *)(&pu1_out[3 * out_strd]), out_r3);
  572|  18.1k|}

ih264_iquant_itrans_recon_4x4_ssse3:
  111|  12.8k|{
  112|  12.8k|    UWORD32 *pu4_out = (UWORD32 *) pu1_out;
  113|  12.8k|    __m128i src_r0_r1, src_r2_r3;
  114|  12.8k|    __m128i src_r0, src_r1, src_r2, src_r3;
  115|  12.8k|    __m128i scalemat_r0_r1, scalemat_r2_r3, predload_r;
  116|  12.8k|    __m128i pred_r0, pred_r1, pred_r2, pred_r3;
  117|  12.8k|    __m128i sign_reg, dequant_r0_r1, dequant_r2_r3;
  118|  12.8k|    __m128i zero_8x16b = _mm_setzero_si128();          // all bits reset to zero
  119|  12.8k|    __m128i temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
  120|  12.8k|    __m128i resq_r0, resq_r1, resq_r2, resq_r3;
  121|  12.8k|    __m128i add_rshift = _mm_set1_epi32((u4_qp_div_6 < 4) ? (1 << (3 - u4_qp_div_6)) : 0);
  ------------------
  |  Branch (121:41): [True: 3.04k, False: 9.79k]
  ------------------
  122|  12.8k|    __m128i value_32 = _mm_set1_epi32(32);
  123|  12.8k|    UNUSED (pi2_tmp);
  ------------------
  |  |   45|  12.8k|#define UNUSED(x) ((void)(x))
  ------------------
  124|  12.8k|    UNUSED (pi2_dc_ld_addr);
  ------------------
  |  |   45|  12.8k|#define UNUSED(x) ((void)(x))
  ------------------
  125|       |
  126|       |    /*************************************************************/
  127|       |    /* Dequantization of coefficients. Will be replaced by SIMD  */
  128|       |    /* operations on platform                                    */
  129|       |    /*************************************************************/
  130|  12.8k|    src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src)); //a00 a01 a02 a03 a10 a11 a12 a13 -- the source matrix 0th,1st row
  131|  12.8k|    src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); //a20 a21 a22 a23 a30 a31 a32 a33 -- the source matrix 2nd,3rd row
  132|  12.8k|    scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat)); //b00 b01 b02 b03 b10 b11 b12 b13 -- the scaling matrix 0th,1st row
  133|  12.8k|    scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 8)); //b20 b21 b22 b23 b30 b31 b32 b33 -- the scaling matrix 2nd,3rd row
  134|  12.8k|    dequant_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat)); //q00 q01 q02 q03 q10 q11 q12 q13 -- all 16 bits
  135|  12.8k|    dequant_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat + 8)); //q20 q21 q22 q23 q30 q31 q32 q33 -- all 16 bits
  136|       |
  137|  12.8k|    temp0 = _mm_mullo_epi16(scalemat_r0_r1, dequant_r0_r1); //b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 b12*q12 b13*q13 -- 16 bit result
  138|  12.8k|    temp1 = _mm_mullo_epi16(scalemat_r2_r3, dequant_r2_r3); //b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 b12*q12 b13*q13 -- 16 bit result
  139|       |
  140|  12.8k|    temp4 = _mm_unpacklo_epi16(temp0, zero_8x16b); // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long
  141|  12.8k|    temp5 = _mm_unpackhi_epi16(temp0, zero_8x16b); // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long
  142|  12.8k|    temp6 = _mm_unpacklo_epi16(temp1, zero_8x16b); // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long
  143|  12.8k|    temp7 = _mm_unpackhi_epi16(temp1, zero_8x16b); // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long
  144|       |
  145|  12.8k|    src_r0 = _mm_unpacklo_epi16(src_r0_r1, zero_8x16b); // a00 0 a01 0 a02 0 a03 0 -- 16 bit long
  146|  12.8k|    src_r1 = _mm_unpackhi_epi16(src_r0_r1, zero_8x16b); // a10 0 a11 0 a12 0 a13 0 -- 16 bit long
  147|  12.8k|    src_r2 = _mm_unpacklo_epi16(src_r2_r3, zero_8x16b); // a20 0 a21 0 a22 0 a23 0 -- 16 bit long
  148|  12.8k|    src_r3 = _mm_unpackhi_epi16(src_r2_r3, zero_8x16b); // a30 0 a31 0 a32 0 a33 0 -- 16 bit long
  149|       |
  150|  12.8k|    temp4 = _mm_madd_epi16(src_r0, temp4); //a00*b00*q00 a10*b10*q10 a20*b20*q20 a30*b30 q30 -- 32 bits long
  151|  12.8k|    temp5 = _mm_madd_epi16(src_r1, temp5);
  152|  12.8k|    temp6 = _mm_madd_epi16(src_r2, temp6);
  153|  12.8k|    temp7 = _mm_madd_epi16(src_r3, temp7);
  154|       |
  155|  12.8k|    if (u4_qp_div_6 >= 4) {
  ------------------
  |  Branch (155:9): [True: 9.79k, False: 3.04k]
  ------------------
  156|  9.79k|        resq_r0 = _mm_slli_epi32(temp4, u4_qp_div_6 - 4);
  157|  9.79k|        resq_r1 = _mm_slli_epi32(temp5, u4_qp_div_6 - 4);
  158|  9.79k|        resq_r2 = _mm_slli_epi32(temp6, u4_qp_div_6 - 4);
  159|  9.79k|        resq_r3 = _mm_slli_epi32(temp7, u4_qp_div_6 - 4);
  160|  9.79k|    } else {
  161|  3.04k|        temp4 = _mm_add_epi32(temp4, add_rshift);
  162|  3.04k|        temp5 = _mm_add_epi32(temp5, add_rshift);
  163|  3.04k|        temp6 = _mm_add_epi32(temp6, add_rshift);
  164|  3.04k|        temp7 = _mm_add_epi32(temp7, add_rshift);
  165|  3.04k|        resq_r0 = _mm_srai_epi32(temp4, 4 - u4_qp_div_6);
  166|  3.04k|        resq_r1 = _mm_srai_epi32(temp5, 4 - u4_qp_div_6);
  167|  3.04k|        resq_r2 = _mm_srai_epi32(temp6, 4 - u4_qp_div_6);
  168|  3.04k|        resq_r3 = _mm_srai_epi32(temp7, 4 - u4_qp_div_6);
  169|  3.04k|    }
  170|       |
  171|  12.8k|    if (iq_start_idx == 1)
  ------------------
  |  Branch (171:9): [True: 1.32k, False: 11.5k]
  ------------------
  172|  1.32k|    {
  173|  1.32k|        resq_r0 = _mm_insert_epi16(resq_r0,(WORD32)pi2_src[0],0);
  174|  1.32k|        if (pi2_src[0] >= 0)
  ------------------
  |  Branch (174:13): [True: 982, False: 344]
  ------------------
  175|    982|            resq_r0 = _mm_insert_epi16(resq_r0,0,1);
  176|    344|        else
  177|    344|            resq_r0 = _mm_insert_epi16(resq_r0,-1,1);
  178|  1.32k|    }
  179|       |    /* Perform Inverse transform */
  180|       |    /*-------------------------------------------------------------*/
  181|       |    /* IDCT [ Horizontal transformation ]                          */
  182|       |    /*-------------------------------------------------------------*/
  183|       |    // Matrix transpose
  184|       |    /*
  185|       |     *  a0 a1 a2 a3
  186|       |     *  b0 b1 b2 b3
  187|       |     *  c0 c1 c2 c3
  188|       |     *  d0 d1 d2 d3
  189|       |     */
  190|  12.8k|    temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1);                  //a0 b0 a1 b1
  191|  12.8k|    temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3);                  //c0 d0 c1 d1
  192|  12.8k|    temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1);                  //a2 b2 a3 b3
  193|  12.8k|    temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3);                  //c2 d2 c3 d3
  194|  12.8k|    resq_r0 = _mm_unpacklo_epi64(temp1, temp3);                    //a0 b0 c0 d0
  195|  12.8k|    resq_r1 = _mm_unpackhi_epi64(temp1, temp3);                    //a1 b1 c1 d1
  196|  12.8k|    resq_r2 = _mm_unpacklo_epi64(temp2, temp4);                    //a2 b2 c2 d2
  197|  12.8k|    resq_r3 = _mm_unpackhi_epi64(temp2, temp4);                    //a3 b3 c3 d3
  198|       |    //Transform starts -- horizontal transform
  199|       |    /*------------------------------------------------------------------*/
  200|       |    /* z0 = w0 + w2                                             */
  201|  12.8k|    temp0 = _mm_add_epi32(resq_r0, resq_r2);
  202|       |    /* z1 = w0 - w2                                             */
  203|  12.8k|    temp1 = _mm_sub_epi32(resq_r0, resq_r2);
  204|       |    /* z2 = (w1 >> 1) - w3                                      */
  205|  12.8k|    temp2 = _mm_srai_epi32(resq_r1, 1);                         //(w1>>1)
  206|  12.8k|    temp2 = _mm_sub_epi32(temp2, resq_r3);                      //(w1>>1) - w3
  207|       |    /* z3 = w1 + (w3 >> 1)                                      */
  208|  12.8k|    temp3 = _mm_srai_epi32(resq_r3, 1);                         //(w3>>1) + w1
  209|  12.8k|    temp3 = _mm_add_epi32(temp3, resq_r1);
  210|       |    /*----------------------------------------------------------*/
  211|       |    /* x0 = z0 + z3                                             */
  212|  12.8k|    resq_r0 = _mm_add_epi32(temp0, temp3);
  213|       |    /* x1 = z1 + z2                                             */
  214|  12.8k|    resq_r1 = _mm_add_epi32(temp1, temp2);
  215|       |    /* x2 = z1 - z2                                             */
  216|  12.8k|    resq_r2 = _mm_sub_epi32(temp1, temp2);
  217|       |    /* x3 = z0 - z3                                             */
  218|  12.8k|    resq_r3 = _mm_sub_epi32(temp0, temp3);
  219|       |    // Matrix transpose
  220|       |    /*
  221|       |     *  a0 b0 c0 d0
  222|       |     *  a1 b1 c1 d1
  223|       |     *  a2 b2 c2 d2
  224|       |     *  a3 b3 c3 d3
  225|       |     */
  226|  12.8k|    temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1);                  //a0 a1 b0 b1
  227|  12.8k|    temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3);                  //a2 a3 b2 b3
  228|  12.8k|    temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1);                  //c0 c1 d0 d1
  229|  12.8k|    temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3);                  //c2 c3 d2 d3
  230|  12.8k|    resq_r0 = _mm_unpacklo_epi64(temp1, temp3);                    //a0 a1 a2 a3
  231|  12.8k|    resq_r1 = _mm_unpackhi_epi64(temp1, temp3);                    //b0 b1 b2 b3
  232|  12.8k|    resq_r2 = _mm_unpacklo_epi64(temp2, temp4);                    //c0 c1 c2 c3
  233|  12.8k|    resq_r3 = _mm_unpackhi_epi64(temp2, temp4);                    //d0 d1 d2 d3
  234|       |    //Transform ends -- horizontal transform
  235|       |
  236|  12.8k|    zero_8x16b = _mm_setzero_si128();                  // all bits reset to zero
  237|       |    //Load pred buffer
  238|  12.8k|    predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); //p00 p01 p02 p03 0 0 0 0 0 0 0 0 -- all 8 bits
  239|  12.8k|    pred_r0 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p00 p01 p02 p03 0 0 0 0 -- all 16 bits
  240|       |
  241|  12.8k|    predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd])); //p10 p11 p12 p13 0 0 0 0 0 0 0 0 -- all 8 bits
  242|  12.8k|    pred_r1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p10 p11 p12 p13 0 0 0 0 -- all 16 bits
  243|       |
  244|  12.8k|    predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * pred_strd])); //p20 p21 p22 p23 0 0 0 0 0 0 0 0 -- all 8 bits
  245|  12.8k|    pred_r2 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p20 p21 p22 p23 0 0 0 0 -- all 16 bits
  246|       |
  247|  12.8k|    predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * pred_strd])); //p30 p31 p32 p33 0 0 0 0 0 0 0 0 -- all 8 bits
  248|  12.8k|    pred_r3 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p30 p31 p32 p33 0 0 0 0 -- all 16 bits
  249|  12.8k|    pred_r0 = _mm_unpacklo_epi16(pred_r0, zero_8x16b); //p00 p01 p02 p03 -- 32 bits sign extended
  250|  12.8k|    pred_r1 = _mm_unpacklo_epi16(pred_r1, zero_8x16b); //p10 p11 p12 p13 -- 32 bits sign extended
  251|  12.8k|    pred_r2 = _mm_unpacklo_epi16(pred_r2, zero_8x16b); //p20 p21 p22 p23 -- 32 bits sign extended
  252|  12.8k|    pred_r3 = _mm_unpacklo_epi16(pred_r3, zero_8x16b); //p30 p31 p32 p33 -- 32 bits sign extended
  253|       |
  254|       |    /*--------------------------------------------------------------*/
  255|       |    /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6      */
  256|       |    /*                                                              */
  257|       |    /* Add the prediction and store it back to same buffer          */
  258|       |    /*--------------------------------------------------------------*/
  259|       |    /* z0j = y0j + y2j                                                        */
  260|  12.8k|    temp0 = _mm_add_epi32(resq_r0, resq_r2);
  261|       |    /* z1j = y0j - y2j                                                        */
  262|  12.8k|    temp1 = _mm_sub_epi32(resq_r0, resq_r2);
  263|       |    /* z2j = (y1j>>1) - y3j                                                        */
  264|  12.8k|    temp2 = _mm_srai_epi32(resq_r1, 1);                             //(y1j>>1)
  265|  12.8k|    temp2 = _mm_sub_epi32(temp2, resq_r3);
  266|       |    /* z3j = y1j + (y3j>>1)                                                        */
  267|  12.8k|    temp3 = _mm_srai_epi32(resq_r3, 1);                             //(y3j>>1)
  268|  12.8k|    temp3 = _mm_add_epi32(temp3, resq_r1);
  269|       |
  270|       |    /* x0j = z0j + z3j                                                        */
  271|  12.8k|    temp4 = _mm_add_epi32(temp0, temp3);
  272|  12.8k|    temp4 = _mm_add_epi32(temp4, value_32);
  273|  12.8k|    temp4 = _mm_srai_epi32(temp4, 6);
  274|  12.8k|    temp4 = _mm_add_epi32(temp4, pred_r0);
  275|       |    /* x1j = z1j + z2j                                                        */
  276|  12.8k|    temp5 = _mm_add_epi32(temp1, temp2);
  277|  12.8k|    temp5 = _mm_add_epi32(temp5, value_32);
  278|  12.8k|    temp5 = _mm_srai_epi32(temp5, 6);
  279|  12.8k|    temp5 = _mm_add_epi32(temp5, pred_r1);
  280|       |    /* x2j = z1j - z2j                                                        */
  281|  12.8k|    temp6 = _mm_sub_epi32(temp1, temp2);
  282|  12.8k|    temp6 = _mm_add_epi32(temp6, value_32);
  283|  12.8k|    temp6 = _mm_srai_epi32(temp6, 6);
  284|  12.8k|    temp6 = _mm_add_epi32(temp6, pred_r2);
  285|       |    /* x3j = z0j - z3j                                                        */
  286|  12.8k|    temp7 = _mm_sub_epi32(temp0, temp3);
  287|  12.8k|    temp7 = _mm_add_epi32(temp7, value_32);
  288|  12.8k|    temp7 = _mm_srai_epi32(temp7, 6);
  289|  12.8k|    temp7 = _mm_add_epi32(temp7, pred_r3);
  290|       |
  291|       |    // 32-bit to 16-bit conversion
  292|  12.8k|    temp0 = _mm_packs_epi32(temp4, temp5);
  293|  12.8k|    temp1 = _mm_packs_epi32(temp6, temp7);
  294|       |    /*------------------------------------------------------------------*/
  295|       |    //Clipping the results to 8 bits
  296|  12.8k|    sign_reg = _mm_cmpgt_epi16(temp0, zero_8x16b);      // sign check
  297|  12.8k|    temp0 = _mm_and_si128(temp0, sign_reg);
  298|  12.8k|    sign_reg = _mm_cmpgt_epi16(temp1, zero_8x16b);
  299|  12.8k|    temp1 = _mm_and_si128(temp1, sign_reg);
  300|       |
  301|  12.8k|    resq_r0 = _mm_packus_epi16(temp0, temp1);
  302|  12.8k|    resq_r1 = _mm_srli_si128(resq_r0, 4);
  303|  12.8k|    resq_r2 = _mm_srli_si128(resq_r1, 4);
  304|  12.8k|    resq_r3 = _mm_srli_si128(resq_r2, 4);
  305|       |
  306|  12.8k|    *pu4_out = _mm_cvtsi128_si32(resq_r0);
  307|  12.8k|    pu1_out += out_strd;
  308|  12.8k|    pu4_out = (UWORD32 *) (pu1_out);
  309|  12.8k|    *(pu4_out) = _mm_cvtsi128_si32(resq_r1);
  310|  12.8k|    pu1_out += out_strd;
  311|  12.8k|    pu4_out = (UWORD32 *) (pu1_out);
  312|  12.8k|    *(pu4_out) = _mm_cvtsi128_si32(resq_r2);
  313|  12.8k|    pu1_out += out_strd;
  314|  12.8k|    pu4_out = (UWORD32 *) (pu1_out);
  315|  12.8k|    *(pu4_out) = _mm_cvtsi128_si32(resq_r3);
  316|  12.8k|}
ih264_iquant_itrans_recon_8x8_ssse3:
  380|  48.8k|{
  381|  48.8k|    __m128i src_r0;
  382|  48.8k|    __m128i scalemat_r0;
  383|  48.8k|    __m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero
  384|       |    // __m128i one_8x16b = _mm_set1_epi8(255); // all bits set to 1
  385|       |    // __m128i one_zero_mask = _mm_unpacklo_epi16(one_8x16b, zero_8x16b); // 1 0 1 0 1 0 1 0 --- 16 bits size
  386|  48.8k|    __m128i value_32 = _mm_set1_epi32(32);
  387|  48.8k|    __m128i add_rshift = _mm_set1_epi32((qp_div < 6) ? (1 << (5 - qp_div)) : 0);
  ------------------
  |  Branch (387:41): [True: 40.5k, False: 8.21k]
  ------------------
  388|  48.8k|    __m128i dequant_r0;
  389|  48.8k|    __m128i predload_r;
  390|  48.8k|    __m128i pred_r0_1, pred_r1_1, pred_r2_1, pred_r3_1, pred_r4_1, pred_r5_1,
  391|  48.8k|            pred_r6_1, pred_r7_1;
  392|  48.8k|    __m128i sign_reg;
  393|  48.8k|    __m128i src_r0_1, src_r0_2;
  394|  48.8k|    __m128i scalemat_r0_1, scalemat_r0_2;
  395|  48.8k|    __m128i temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
  396|  48.8k|    __m128i temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17,
  397|  48.8k|            temp18, temp19, temp20;
  398|       |    // To store dequantization results
  399|  48.8k|    __m128i resq_r0_1, resq_r0_2, resq_r1_1, resq_r1_2, resq_r2_1, resq_r2_2,
  400|  48.8k|            resq_r3_1, resq_r3_2, resq_r4_1, resq_r4_2, resq_r5_1, resq_r5_2,
  401|  48.8k|            resq_r6_1, resq_r6_2, resq_r7_1, resq_r7_2;
  402|  48.8k|    UNUSED (pi2_tmp);
  ------------------
  |  |   45|  48.8k|#define UNUSED(x) ((void)(x))
  ------------------
  403|  48.8k|    UNUSED (iq_start_idx);
  ------------------
  |  |   45|  48.8k|#define UNUSED(x) ((void)(x))
  ------------------
  404|  48.8k|    UNUSED (pi2_dc_ld_addr);
  ------------------
  |  |   45|  48.8k|#define UNUSED(x) ((void)(x))
  ------------------
  405|       |
  406|       |    /*************************************************************/
  407|       |    /* Dequantization of coefficients. Will be replaced by SIMD  */
  408|       |    /* operations on platform. Note : DC coeff is not scaled     */
  409|       |    /*************************************************************/
  410|       |
  411|       |    // Row 0 processing
  412|  48.8k|    src_r0 = _mm_loadu_si128((__m128i *) (pi2_src)); //a00 a01 a02 a03 a04 a05 a06 a07 -- the source matrix 0th row
  413|  48.8k|    scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat)); //b00 b01 b02 b03 b04 b05 b06 b07 -- the scaling matrix 0th row
  414|  48.8k|    dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[0])); //q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
  415|  48.8k|    src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); //a00 0 a01 0 a02 0 a03 0 -- 16 bit long
  416|  48.8k|    src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
  417|  48.8k|    temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0); //b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result
  418|  48.8k|    scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
  419|  48.8k|    scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
  420|       |
  421|  48.8k|    temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long
  422|  48.8k|    temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long
  423|       |
  424|  48.8k|    if (qp_div >= 6) {
  ------------------
  |  Branch (424:9): [True: 8.21k, False: 40.5k]
  ------------------
  425|  8.21k|        resq_r0_1 = _mm_slli_epi32(temp5, qp_div - 6);
  426|  8.21k|        resq_r0_2 = _mm_slli_epi32(temp7, qp_div - 6);
  427|  40.5k|    } else {
  428|  40.5k|        temp5 = _mm_add_epi32(temp5, add_rshift);
  429|  40.5k|        temp7 = _mm_add_epi32(temp7, add_rshift);
  430|  40.5k|        resq_r0_1 = _mm_srai_epi32(temp5, 6 - qp_div);
  431|  40.5k|        resq_r0_2 = _mm_srai_epi32(temp7, 6 - qp_div);
  432|  40.5k|    }
  433|  48.8k|    resq_r0_1 = _mm_packs_epi32(resq_r0_1, resq_r0_2); //a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long
  434|       |    // Row 1 processing
  435|  48.8k|    src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); //a00 a01 a02 a03 a04 a05 a06 a07 a08 -- the source matrix 1st row
  436|  48.8k|    scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 8)); //b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 1st row
  437|  48.8k|    dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[8])); //q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
  438|  48.8k|    src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); //a00 0 a01 0 a02 0 a03 0 -- 16 bit long
  439|  48.8k|    src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
  440|  48.8k|    temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0); //b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result
  441|  48.8k|    scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
  442|  48.8k|    scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
  443|  48.8k|    temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long
  444|  48.8k|    temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long
  445|  48.8k|    if (qp_div >= 6) {
  ------------------
  |  Branch (445:9): [True: 8.21k, False: 40.5k]
  ------------------
  446|  8.21k|        resq_r1_1 = _mm_slli_epi32(temp5, qp_div - 6);
  447|  8.21k|        resq_r1_2 = _mm_slli_epi32(temp7, qp_div - 6);
  448|  40.5k|    } else {
  449|  40.5k|        temp5 = _mm_add_epi32(temp5, add_rshift);
  450|  40.5k|        temp7 = _mm_add_epi32(temp7, add_rshift);
  451|  40.5k|        resq_r1_1 = _mm_srai_epi32(temp5, 6 - qp_div);
  452|  40.5k|        resq_r1_2 = _mm_srai_epi32(temp7, 6 - qp_div);
  453|  40.5k|    }
  454|  48.8k|    resq_r1_1 = _mm_packs_epi32(resq_r1_1, resq_r1_2); //a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long
  455|       |    // Row 2 processing
  456|  48.8k|    src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 16)); //a00 a01 a02 a03 a04 a05 a06 a07 a08 -- the source matrix 2nd row
  457|  48.8k|    scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 16)); //b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 2nd row
  458|  48.8k|    dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[16])); //q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
  459|  48.8k|    src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); //a00 0 a01 0 a02 0 a03 0 -- 16 bit long
  460|  48.8k|    src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
  461|  48.8k|    temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0); //b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result
  462|  48.8k|    scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
  463|  48.8k|    scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
  464|  48.8k|    temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long
  465|  48.8k|    temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long
  466|  48.8k|    if (qp_div >= 6) {
  ------------------
  |  Branch (466:9): [True: 8.21k, False: 40.5k]
  ------------------
  467|  8.21k|        resq_r2_1 = _mm_slli_epi32(temp5, qp_div - 6);
  468|  8.21k|        resq_r2_2 = _mm_slli_epi32(temp7, qp_div - 6);
  469|  40.5k|    } else {
  470|  40.5k|        temp5 = _mm_add_epi32(temp5, add_rshift);
  471|  40.5k|        temp7 = _mm_add_epi32(temp7, add_rshift);
  472|  40.5k|        resq_r2_1 = _mm_srai_epi32(temp5, 6 - qp_div);
  473|  40.5k|        resq_r2_2 = _mm_srai_epi32(temp7, 6 - qp_div);
  474|  40.5k|    }
  475|  48.8k|    resq_r2_1 = _mm_packs_epi32(resq_r2_1, resq_r2_2); //a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long
  476|       |    // Row 3 processing
  477|  48.8k|    src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 24)); //a00 a01 a02 a03 a04 a05 a06 a07 a08 -- the source matrix 3rd row
  478|  48.8k|    scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 24)); //b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 3rd row
  479|  48.8k|    dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[24])); //q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
  480|  48.8k|    src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); //a00 0 a01 0 a02 0 a03 0 -- 16 bit long
  481|  48.8k|    src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
  482|  48.8k|    temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0); //b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result
  483|  48.8k|    scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
  484|  48.8k|    scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
  485|  48.8k|    temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 - 32 bits long
  486|  48.8k|    temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long
  487|  48.8k|    if (qp_div >= 6) {
  ------------------
  |  Branch (487:9): [True: 8.21k, False: 40.5k]
  ------------------
  488|  8.21k|        resq_r3_1 = _mm_slli_epi32(temp5, qp_div - 6);
  489|  8.21k|        resq_r3_2 = _mm_slli_epi32(temp7, qp_div - 6);
  490|  40.5k|    } else {
  491|  40.5k|        temp5 = _mm_add_epi32(temp5, add_rshift);
  492|  40.5k|        temp7 = _mm_add_epi32(temp7, add_rshift);
  493|  40.5k|        resq_r3_1 = _mm_srai_epi32(temp5, 6 - qp_div);
  494|  40.5k|        resq_r3_2 = _mm_srai_epi32(temp7, 6 - qp_div);
  495|  40.5k|    }
  496|  48.8k|    resq_r3_1 = _mm_packs_epi32(resq_r3_1, resq_r3_2); //a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long
  497|       |    // Row 4 processing
  498|  48.8k|    src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 32)); //a00 a01 a02 a03 a04 a05 a06 a07 a08 -- the source matrix 4th row
  499|  48.8k|    scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 32)); //b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 4th row
  500|  48.8k|    dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[32])); //q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
  501|  48.8k|    src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); //a00 0 a01 0 a02 0 a03 0 -- 16 bit long
  502|  48.8k|    src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
  503|  48.8k|    temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0); //b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result
  504|  48.8k|    scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
  505|  48.8k|    scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
  506|  48.8k|    temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long
  507|  48.8k|    temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long
  508|  48.8k|    if (qp_div >= 6) {
  ------------------
  |  Branch (508:9): [True: 8.21k, False: 40.5k]
  ------------------
  509|  8.21k|        resq_r4_1 = _mm_slli_epi32(temp5, qp_div - 6);
  510|  8.21k|        resq_r4_2 = _mm_slli_epi32(temp7, qp_div - 6);
  511|       |
  512|  40.5k|    } else {
  513|  40.5k|        temp5 = _mm_add_epi32(temp5, add_rshift);
  514|  40.5k|        temp7 = _mm_add_epi32(temp7, add_rshift);
  515|  40.5k|        resq_r4_1 = _mm_srai_epi32(temp5, 6 - qp_div);
  516|  40.5k|        resq_r4_2 = _mm_srai_epi32(temp7, 6 - qp_div);
  517|  40.5k|    }
  518|  48.8k|    resq_r4_1 = _mm_packs_epi32(resq_r4_1, resq_r4_2); //a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long
  519|       |    // Row 5 processing
  520|  48.8k|    src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 40)); //a00 a01 a02 a03 a04 a05 a06 a07 a08 -- the source matrix 5th row
  521|  48.8k|    scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 40)); //b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 5th row
  522|  48.8k|    dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[40])); //q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
  523|  48.8k|    src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); //a00 0 a01 0 a02 0 a03 0 -- 16 bit long
  524|  48.8k|    src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
  525|  48.8k|    temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0); //b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result
  526|  48.8k|    scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
  527|  48.8k|    scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
  528|  48.8k|    temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long
  529|  48.8k|    temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long
  530|  48.8k|    if (qp_div >= 6) {
  ------------------
  |  Branch (530:9): [True: 8.21k, False: 40.5k]
  ------------------
  531|  8.21k|        resq_r5_1 = _mm_slli_epi32(temp5, qp_div - 6);
  532|  8.21k|        resq_r5_2 = _mm_slli_epi32(temp7, qp_div - 6);
  533|       |        //resq_r5_1 = _mm_and_si128(resq_r5_1,one_zero_mask);
  534|       |        //resq_r5_2 = _mm_and_si128(resq_r5_2,one_zero_mask);
  535|  40.5k|    } else {
  536|  40.5k|        temp5 = _mm_add_epi32(temp5, add_rshift);
  537|  40.5k|        temp7 = _mm_add_epi32(temp7, add_rshift);
  538|  40.5k|        resq_r5_1 = _mm_srai_epi32(temp5, 6 - qp_div);
  539|  40.5k|        resq_r5_2 = _mm_srai_epi32(temp7, 6 - qp_div);
  540|  40.5k|    }
  541|  48.8k|    resq_r5_1 = _mm_packs_epi32(resq_r5_1, resq_r5_2); //a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long
  542|       |    // Row 6 processing
  543|  48.8k|    src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 48)); //a00 a01 a02 a03 a04 a05 a06 a07 a08 -- the source matrix 6th row
  544|  48.8k|    scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 48)); //b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 6th row
  545|  48.8k|    dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[48])); //q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
  546|  48.8k|    src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); //a00 0 a01 0 a02 0 a03 0 -- 16 bit long
  547|  48.8k|    src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
  548|  48.8k|    temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0); //b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result
  549|  48.8k|    scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
  550|  48.8k|    scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
  551|  48.8k|    temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long
  552|  48.8k|    temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long
  553|  48.8k|    if (qp_div >= 6) {
  ------------------
  |  Branch (553:9): [True: 8.21k, False: 40.5k]
  ------------------
  554|  8.21k|        resq_r6_1 = _mm_slli_epi32(temp5, qp_div - 6);
  555|  8.21k|        resq_r6_2 = _mm_slli_epi32(temp7, qp_div - 6);
  556|       |        //resq_r6_1 = _mm_and_si128(resq_r6_1,one_zero_mask);
  557|       |        //resq_r6_2 = _mm_and_si128(resq_r6_2,one_zero_mask);
  558|  40.5k|    } else {
  559|  40.5k|        temp5 = _mm_add_epi32(temp5, add_rshift);
  560|  40.5k|        temp7 = _mm_add_epi32(temp7, add_rshift);
  561|  40.5k|        resq_r6_1 = _mm_srai_epi32(temp5, 6 - qp_div);
  562|  40.5k|        resq_r6_2 = _mm_srai_epi32(temp7, 6 - qp_div);
  563|       |        //resq_r6_1 = _mm_and_si128(resq_r6_1,one_zero_mask);
  564|       |        //resq_r6_2 = _mm_and_si128(resq_r6_2,one_zero_mask);
  565|  40.5k|    }
  566|  48.8k|    resq_r6_1 = _mm_packs_epi32(resq_r6_1, resq_r6_2); //a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long
  567|       |    // Row 7 processing
  568|  48.8k|    src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 56)); //a00 a01 a02 a03 a04 a05 a06 a07 a08 -- the source matrix 7th row
  569|  48.8k|    scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 56)); //b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 7th row
  570|  48.8k|    dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[56])); //q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
  571|  48.8k|    src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); //a00 0 a01 0 a02 0 a03 0 -- 16 bit long
  572|  48.8k|    src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
  573|  48.8k|    temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0); //b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result
  574|  48.8k|    scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
  575|  48.8k|    scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
  576|  48.8k|    temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long
  577|  48.8k|    temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long
  578|  48.8k|    if (qp_div >= 6) {
  ------------------
  |  Branch (578:9): [True: 8.21k, False: 40.5k]
  ------------------
  579|  8.21k|        resq_r7_1 = _mm_slli_epi32(temp5, qp_div - 6);
  580|  8.21k|        resq_r7_2 = _mm_slli_epi32(temp7, qp_div - 6);
  581|  40.5k|    } else {
  582|  40.5k|        temp5 = _mm_add_epi32(temp5, add_rshift);
  583|  40.5k|        temp7 = _mm_add_epi32(temp7, add_rshift);
  584|  40.5k|        resq_r7_1 = _mm_srai_epi32(temp5, 6 - qp_div);
  585|  40.5k|        resq_r7_2 = _mm_srai_epi32(temp7, 6 - qp_div);
  586|  40.5k|    }
  587|  48.8k|    resq_r7_1 = _mm_packs_epi32(resq_r7_1, resq_r7_2); //a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long
  588|       |    /* Perform Inverse transform */
  589|       |    /*--------------------------------------------------------------------*/
  590|       |    /* IDCT [ Horizontal transformation ]                                 */
  591|       |    /*--------------------------------------------------------------------*/
  592|       |    // Matrix transpose
  593|       |    /*
  594|       |     *  a0 a1 a2 a3 a4 a5 a6 a7
  595|       |     *  b0 b1 b2 b3 b4 b5 b6 b7
  596|       |     *  c0 c1 c2 c3 c4 c5 c6 c7
  597|       |     *  d0 d1 d2 d3 d4 d5 d6 d7
  598|       |     */
  599|  48.8k|    temp1 = _mm_unpacklo_epi16(resq_r0_1, resq_r1_1); //a0 b0 a1 b1 a2 b2 a3 b3
  600|  48.8k|    temp3 = _mm_unpacklo_epi16(resq_r2_1, resq_r3_1); //c0 d0 c1 d1 c2 d2 c3 d3
  601|  48.8k|    temp2 = _mm_unpackhi_epi16(resq_r0_1, resq_r1_1); //a4 b4 a5 b5 a6 b6 a7 b7
  602|  48.8k|    temp4 = _mm_unpackhi_epi16(resq_r2_1, resq_r3_1); //c4 d4 c5 d5 c6 d6 c7 d7
  603|  48.8k|    resq_r0_1 = _mm_unpacklo_epi32(temp1, temp3); //a0 b0 c0 d0 a1 b1 c1 d1
  604|  48.8k|    resq_r1_1 = _mm_unpackhi_epi32(temp1, temp3); //a2 b2 c2 d2 a3 b3 c3 d3
  605|  48.8k|    resq_r2_1 = _mm_unpacklo_epi32(temp2, temp4); //a4 b4 c4 d4 a5 b5 c5 d5
  606|  48.8k|    resq_r3_1 = _mm_unpackhi_epi32(temp2, temp4); //a6 b6 c6 d6 a7 b7 c7 d7
  607|       |    /*
  608|       |     * e0 e1 e2 e3 e4 e5 e6 e7
  609|       |     * f0 f1 f2 f3 f4 f5 f6 f7
  610|       |     * g0 g1 g2 g3 g4 g5 g6 g7
  611|       |     * h0 h1 h2 h3 h4 h5 h6 h7
  612|       |     */
  613|  48.8k|    temp1 = _mm_unpacklo_epi16(resq_r4_1, resq_r5_1); //e0 f0 e1 f1 e2 f2 e2 f3
  614|  48.8k|    temp3 = _mm_unpacklo_epi16(resq_r6_1, resq_r7_1); //g0 h0 g1 h1 g2 h2 g3 h3
  615|  48.8k|    temp2 = _mm_unpackhi_epi16(resq_r4_1, resq_r5_1); //e4 f4 e5 f5 e6 f6 e7 f7
  616|  48.8k|    temp4 = _mm_unpackhi_epi16(resq_r6_1, resq_r7_1); //g4 h4 g5 h5 g6 h6 g7 h7
  617|  48.8k|    resq_r4_1 = _mm_unpacklo_epi32(temp1, temp3); //e0 f0 g0 h0 e1 f1 g1 h1
  618|  48.8k|    resq_r5_1 = _mm_unpackhi_epi32(temp1, temp3); //e2 f2 g2 h2 e3 f3 g3 h3
  619|  48.8k|    resq_r6_1 = _mm_unpacklo_epi32(temp2, temp4); //e4 f4 g4 h4 e5 f5 g5 h5
  620|  48.8k|    resq_r7_1 = _mm_unpackhi_epi32(temp2, temp4); //e6 f6 g6 h6 e7 f7 g7 h7
  621|       |    /*
  622|       |     * a0 b0 c0 d0 a1 b1 c1 d1
  623|       |     * a2 b2 c2 d2 a3 b3 c3 d3
  624|       |     * a4 b4 c4 d4 a5 b5 c5 d5
  625|       |     * a6 b6 c6 d6 a7 b7 c7 d7
  626|       |     * e0 f0 g0 h0 e1 f1 g1 h1
  627|       |     * e2 f2 g2 h2 e3 f3 g3 h3
  628|       |     * e4 f4 g4 h4 e5 f5 g5 h5
  629|       |     * e6 f6 g6 h6 e7 f7 g7 h7
  630|       |     */
  631|  48.8k|    resq_r0_2 = _mm_unpacklo_epi64(resq_r0_1, resq_r4_1); //a0 b0 c0 d0 e0 f0 g0 h0
  632|  48.8k|    resq_r1_2 = _mm_unpackhi_epi64(resq_r0_1, resq_r4_1); //a1 b1 c1 d1 e1 f1 g1 h1
  633|  48.8k|    resq_r2_2 = _mm_unpacklo_epi64(resq_r1_1, resq_r5_1); //a2 b2 c2 d2 e2 f2 g2 h2
  634|  48.8k|    resq_r3_2 = _mm_unpackhi_epi64(resq_r1_1, resq_r5_1); //a3 b3 c3 d3 e3 f3 g3 h3
  635|  48.8k|    resq_r4_2 = _mm_unpacklo_epi64(resq_r2_1, resq_r6_1); //a4 b4 c4 d4 e4 f4 g4 h4
  636|  48.8k|    resq_r5_2 = _mm_unpackhi_epi64(resq_r2_1, resq_r6_1); //a5 b5 c5 d5 e5 f5 g5 h5
  637|  48.8k|    resq_r6_2 = _mm_unpacklo_epi64(resq_r3_1, resq_r7_1); //a6 b6 c6 d6 e6 f6 g6 h6
  638|  48.8k|    resq_r7_2 = _mm_unpackhi_epi64(resq_r3_1, resq_r7_1); //a7 b7 c7 d7 e7 f7 g7 h7
  639|       |
  640|  48.8k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r1_2);
  641|  48.8k|    resq_r1_1 = _mm_unpacklo_epi16(resq_r1_2, sign_reg); //a1 b1 c1 d1 -- 32 bit
  642|  48.8k|    resq_r1_2 = _mm_unpackhi_epi16(resq_r1_2, sign_reg); //e1 f1 g1 h1 -- 32 bit
  643|  48.8k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r3_2);
  644|  48.8k|    resq_r3_1 = _mm_unpacklo_epi16(resq_r3_2, sign_reg); //a3 b3 c3 d3 -- 32 bit
  645|  48.8k|    resq_r3_2 = _mm_unpackhi_epi16(resq_r3_2, sign_reg); //e3 f3 g3 h3 -- 32 bit
  646|  48.8k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r5_2);
  647|  48.8k|    resq_r5_1 = _mm_unpacklo_epi16(resq_r5_2, sign_reg); //a5 b5 c5 d5 -- 32 bit
  648|  48.8k|    resq_r5_2 = _mm_unpackhi_epi16(resq_r5_2, sign_reg); //e5 f5 g5 h5 -- 32 bit
  649|  48.8k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r7_2);
  650|  48.8k|    resq_r7_1 = _mm_unpacklo_epi16(resq_r7_2, sign_reg); //a7 b7 c7 d7 -- 32 bit
  651|  48.8k|    resq_r7_2 = _mm_unpackhi_epi16(resq_r7_2, sign_reg); //e7 f7 g7 h7 -- 32 bit
  652|       |    //Transform starts -- horizontal transform
  653|       |    /*------------------------------------------------------------------*/
  654|       |    /* y0 = w0 + w4                                                     */
  655|  48.8k|    temp1 = _mm_add_epi16(resq_r0_2, resq_r4_2);
  656|       |    /* y2 = w0 - w4                                                      */
  657|  48.8k|    temp3 = _mm_sub_epi16(resq_r0_2, resq_r4_2);
  658|       |    /* y1 = -w3 + w5 - w7 - (w7 >> 1)                                   */
  659|  48.8k|    temp2 = _mm_sub_epi32(resq_r5_1, resq_r3_1); //-w3+w5
  660|  48.8k|    temp10 = _mm_sub_epi32(resq_r5_2, resq_r3_2);
  661|  48.8k|    temp4 = _mm_sub_epi32(temp2, resq_r7_1); //-w3+w5-w7
  662|  48.8k|    temp12 = _mm_sub_epi32(temp10, resq_r7_2);
  663|  48.8k|    temp5 = _mm_srai_epi32(resq_r7_1, 1); //w7>>1
  664|  48.8k|    temp13 = _mm_srai_epi32(resq_r7_2, 1);
  665|  48.8k|    temp2 = _mm_sub_epi32(temp4, temp5); //-w3+w5-w7 -(w7>>1)
  666|  48.8k|    temp10 = _mm_sub_epi32(temp12, temp13);
  667|  48.8k|    temp2 = _mm_packs_epi32(temp2, temp10);
  668|       |    /* y3 = w1 + w7 - w3 - (w3 >> 1)                                    */
  669|  48.8k|    temp4 = _mm_add_epi32(resq_r1_1, resq_r7_1); //w1+w7
  670|  48.8k|    temp12 = _mm_add_epi32(resq_r1_2, resq_r7_2);
  671|  48.8k|    temp4 = _mm_sub_epi32(temp4, resq_r3_1); //w1+w7-w3
  672|  48.8k|    temp12 = _mm_sub_epi32(temp12, resq_r3_2);
  673|  48.8k|    temp5 = _mm_srai_epi32(resq_r3_1, 1); //w3>>1
  674|  48.8k|    temp13 = _mm_srai_epi32(resq_r3_2, 1);
  675|  48.8k|    temp4 = _mm_sub_epi32(temp4, temp5); //w1+w7-w3-(w3>>1)
  676|  48.8k|    temp12 = _mm_sub_epi32(temp12, temp13);
  677|  48.8k|    temp4 = _mm_packs_epi32(temp4, temp12);
  678|       |    /* y4 = (w2 >> 1) - w6                                              */
  679|  48.8k|    temp5 = _mm_srai_epi16(resq_r2_2, 1); //w2>>1
  680|  48.8k|    temp5 = _mm_sub_epi16(temp5, resq_r6_2); //(w2>>1)-w6
  681|       |    /* y5 = -w1 + w7 + w5 + (w5 >> 1)                                   */
  682|  48.8k|    temp6 = _mm_sub_epi32(resq_r7_1, resq_r1_1); //w7-w1
  683|  48.8k|    temp14 = _mm_sub_epi32(resq_r7_2, resq_r1_2);
  684|  48.8k|    temp6 = _mm_add_epi32(temp6, resq_r5_1); //w7-w1+w5
  685|  48.8k|    temp14 = _mm_add_epi32(temp14, resq_r5_2);
  686|  48.8k|    temp7 = _mm_srai_epi32(resq_r5_1, 1); //w5>>1
  687|  48.8k|    temp15 = _mm_srai_epi32(resq_r5_2, 1);
  688|  48.8k|    temp6 = _mm_add_epi32(temp6, temp7); //w7-w1_w5+(w5>>1)
  689|  48.8k|    temp14 = _mm_add_epi32(temp14, temp15);
  690|  48.8k|    temp6 = _mm_packs_epi32(temp6, temp14);
  691|       |    /* y6 = w2 + (w6 >> 1)                                              */
  692|  48.8k|    temp7 = _mm_srai_epi16(resq_r6_2, 1); //w6>>1
  693|  48.8k|    temp7 = _mm_add_epi16(temp7, resq_r2_2); //(w6>>1)+w2
  694|       |    /* y7 = w3 + w5 + w1 + (w1 >> 1)                                    */
  695|  48.8k|    temp8 = _mm_add_epi32(resq_r3_1, resq_r5_1); //w3+w5
  696|  48.8k|    temp16 = _mm_add_epi32(resq_r3_2, resq_r5_2);
  697|  48.8k|    temp8 = _mm_add_epi32(temp8, resq_r1_1); //w3+w5+w1
  698|  48.8k|    temp16 = _mm_add_epi32(temp16, resq_r1_2);
  699|  48.8k|    temp17 = _mm_srai_epi32(resq_r1_1, 1); //w1>>1
  700|  48.8k|    temp18 = _mm_srai_epi32(resq_r1_2, 1);
  701|  48.8k|    temp8 = _mm_add_epi32(temp8, temp17); //w3+w5+w1+(w1>>1)
  702|  48.8k|    temp16 = _mm_add_epi32(temp16, temp18);
  703|  48.8k|    temp8 = _mm_packs_epi32(temp8, temp16);
  704|       |    /*------------------------------------------------------------------*/
  705|       |    /*------------------------------------------------------------------*/
  706|       |    /* z0 = y0 + y6                                                        */
  707|  48.8k|    resq_r0_1 = _mm_add_epi16(temp1, temp7);
  708|       |    /* z1 = y1 + (y7 >> 2)                                                */
  709|  48.8k|    resq_r1_1 = _mm_srai_epi16(temp8, 2);
  710|  48.8k|    resq_r1_1 = _mm_add_epi16(resq_r1_1, temp2);
  711|       |    /* z2 = y2 + y4                                                        */
  712|  48.8k|    resq_r2_1 = _mm_add_epi16(temp3, temp5);
  713|       |    /* z3 = y3 + (y5 >> 2)                                                */
  714|  48.8k|    resq_r3_1 = _mm_srai_epi16(temp6, 2);
  715|  48.8k|    resq_r3_1 = _mm_add_epi16(resq_r3_1, temp4);
  716|       |    /* z4 = y2 - y4                                                        */
  717|  48.8k|    resq_r4_1 = _mm_sub_epi16(temp3, temp5);
  718|       |    /* z5 = (y3 >> 2) - y5                                                 */
  719|  48.8k|    resq_r5_1 = _mm_srai_epi16(temp4, 2);
  720|  48.8k|    resq_r5_1 = _mm_sub_epi16(resq_r5_1, temp6);
  721|       |    /* z6 = y0 - y6                                                     */
  722|  48.8k|    resq_r6_1 = _mm_sub_epi16(temp1, temp7);
  723|       |    /* z7 = y7 - (y1 >> 2)                                                 */
  724|  48.8k|    resq_r7_1 = _mm_srai_epi16(temp2, 2);
  725|  48.8k|    resq_r7_1 = _mm_sub_epi16(temp8, resq_r7_1);
  726|       |    /*------------------------------------------------------------------*/
  727|       |    /*------------------------------------------------------------------*/
  728|       |    /* x0 = z0 + z7                                                        */
  729|  48.8k|    temp1 = _mm_add_epi16(resq_r0_1, resq_r7_1);
  730|       |    /* x1 = z2 + z5                                                        */
  731|  48.8k|    temp2 = _mm_add_epi16(resq_r2_1, resq_r5_1);
  732|       |    /* x2 = z4 + z3                                                        */
  733|  48.8k|    temp3 = _mm_add_epi16(resq_r4_1, resq_r3_1);
  734|       |    /* x3 = z6 + z1                                                        */
  735|  48.8k|    temp4 = _mm_add_epi16(resq_r6_1, resq_r1_1);
  736|       |    /* x4 = z6 - z1                                                        */
  737|  48.8k|    temp5 = _mm_sub_epi16(resq_r6_1, resq_r1_1);
  738|       |    /* x5 = z4 - z3                                                        */
  739|  48.8k|    temp6 = _mm_sub_epi16(resq_r4_1, resq_r3_1);
  740|       |    /* x6 = z2 - z5                                                        */
  741|  48.8k|    temp7 = _mm_sub_epi16(resq_r2_1, resq_r5_1);
  742|       |    /* x7 = z0 - z7                                                        */
  743|  48.8k|    temp8 = _mm_sub_epi16(resq_r0_1, resq_r7_1);
  744|       |    /*------------------------------------------------------------------*/
  745|       |    // Matrix transpose
  746|       |    /*
  747|       |     *  a0 b0 c0 d0 e0 f0 g0 h0
  748|       |     *  a1 b1 c1 d1 e1 f1 g1 h1
  749|       |     *  a2 b2 c2 d2 e2 f2 g2 h2
  750|       |     *  a3 b3 c3 d3 e3 f3 g3 h3
  751|       |     */
  752|  48.8k|    temp17 = _mm_unpacklo_epi16(temp1, temp2); //a0 a1 b0 b1 c0 c1 d0 d1
  753|  48.8k|    temp19 = _mm_unpacklo_epi16(temp3, temp4); //a2 a3 b2 b3 c2 c3 d2 d3
  754|  48.8k|    temp18 = _mm_unpackhi_epi16(temp1, temp2); //e0 e1 f0 f1 g0 g1 h0 h1
  755|  48.8k|    temp20 = _mm_unpackhi_epi16(temp3, temp4); //e2 e3 f2 f3 g2 g3 h2 h3
  756|       |
  757|  48.8k|    resq_r0_1 = _mm_unpacklo_epi32(temp17, temp19); //a0 a1 a2 a3 b0 b1 b2 b3
  758|  48.8k|    resq_r1_1 = _mm_unpackhi_epi32(temp17, temp19); //c0 c1 c2 c3 d0 d1 d2 d3
  759|  48.8k|    resq_r2_1 = _mm_unpacklo_epi32(temp18, temp20); //e0 e1 e2 e3 f0 f1 f2 f3
  760|  48.8k|    resq_r3_1 = _mm_unpackhi_epi32(temp18, temp20); //g0 g2 g2 g3 h0 h1 h2 h3
  761|       |    /*
  762|       |     *  a4 b4 c4 d4 e4 f4 g4 h4
  763|       |     *  a5 b5 c5 d5 e5 f5 g5 h5
  764|       |     *  a6 b6 c6 d6 e6 f6 g6 h6
  765|       |     *  a7 b7 c7 d7 e7 f7 g7 h7
  766|       |     */
  767|  48.8k|    temp17 = _mm_unpacklo_epi16(temp5, temp6); //a4 a5 b4 b5 c4 c5 d4 d5
  768|  48.8k|    temp19 = _mm_unpacklo_epi16(temp7, temp8); //a6 a7 b6 b7 c6 c7 d6 d7
  769|  48.8k|    temp18 = _mm_unpackhi_epi16(temp5, temp6); //e4 e5 f4 f5 g4 g5 h4 h5
  770|  48.8k|    temp20 = _mm_unpackhi_epi16(temp7, temp8); //e6 e7 f6 f7 g6 g7 h6 h7
  771|       |
  772|  48.8k|    resq_r4_1 = _mm_unpacklo_epi32(temp17, temp19); //a4 a5 a6 a7 b4 b5 b6 b7
  773|  48.8k|    resq_r5_1 = _mm_unpackhi_epi32(temp17, temp19); //c4 c5 c6 c7 d4 d5 d6 d7
  774|  48.8k|    resq_r6_1 = _mm_unpacklo_epi32(temp18, temp20); //e4 e5 e6 e7 f4 f5 f6 f7
  775|  48.8k|    resq_r7_1 = _mm_unpackhi_epi32(temp18, temp20); //g4 g5 g6 g7 h4 h5 h6 h7
  776|       |    /*  a0 a1 a2 a3 b0 b1 b2 b3
  777|       |     *  c0 c1 c2 c3 d0 d1 d2 d3
  778|       |     *  e0 e1 e2 e3 f0 f1 f2 f3
  779|       |     *  g0 g2 g2 g3 h0 h1 h2 h3
  780|       |     *  a4 a5 a6 a7 b4 b5 b6 b7
  781|       |     *  c4 c5 c6 c7 d4 d5 d6 d7
  782|       |     *  e4 e5 e6 e7 f4 f5 f6 f7
  783|       |     *  g4 g5 g6 g7 h4 h5 h6 h7
  784|       |     */
  785|  48.8k|    resq_r0_2 = _mm_unpacklo_epi64(resq_r0_1, resq_r4_1); //a0 a1 a2 a3 a4 a5 a6 a7
  786|  48.8k|    resq_r1_2 = _mm_unpackhi_epi64(resq_r0_1, resq_r4_1); //b0 b1 b2 b3 b4 b5 b6 b7
  787|  48.8k|    resq_r2_2 = _mm_unpacklo_epi64(resq_r1_1, resq_r5_1); //c0 c1 c2 c3 c4 c5 c6 c7
  788|  48.8k|    resq_r3_2 = _mm_unpackhi_epi64(resq_r1_1, resq_r5_1); //d0 d1 d2 d3 d4 d5 d6 d7
  789|  48.8k|    resq_r4_2 = _mm_unpacklo_epi64(resq_r2_1, resq_r6_1); //e0 e1 e2 e3 e4 e5 e6 e7
  790|  48.8k|    resq_r5_2 = _mm_unpackhi_epi64(resq_r2_1, resq_r6_1); //f0 f1 f2 f3 f4 f5 f6 f7
  791|  48.8k|    resq_r6_2 = _mm_unpacklo_epi64(resq_r3_1, resq_r7_1); //g0 g1 g2 g3 g4 g5 g6 g7
  792|  48.8k|    resq_r7_2 = _mm_unpackhi_epi64(resq_r3_1, resq_r7_1); //h0 h1 h2 h3 h4 h5 h6 h7
  793|       |
  794|  48.8k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r1_2);
  795|  48.8k|    resq_r1_1 = _mm_unpacklo_epi16(resq_r1_2, sign_reg); //a1 b1 c1 d1 -- 32 bit
  796|  48.8k|    resq_r1_2 = _mm_unpackhi_epi16(resq_r1_2, sign_reg); //e1 f1 g1 h1 -- 32 bit
  797|  48.8k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r3_2);
  798|  48.8k|    resq_r3_1 = _mm_unpacklo_epi16(resq_r3_2, sign_reg); //a3 b3 c3 d3 -- 32 bit
  799|  48.8k|    resq_r3_2 = _mm_unpackhi_epi16(resq_r3_2, sign_reg); //e3 f3 g3 h3 -- 32 bit
  800|  48.8k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r5_2);
  801|  48.8k|    resq_r5_1 = _mm_unpacklo_epi16(resq_r5_2, sign_reg); //a5 b5 c5 d5 -- 32 bit
  802|  48.8k|    resq_r5_2 = _mm_unpackhi_epi16(resq_r5_2, sign_reg); //e5 f5 g5 h5 -- 32 bit
  803|  48.8k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r7_2);
  804|  48.8k|    resq_r7_1 = _mm_unpacklo_epi16(resq_r7_2, sign_reg); //a7 b7 c7 d7 -- 32 bit
  805|  48.8k|    resq_r7_2 = _mm_unpackhi_epi16(resq_r7_2, sign_reg); //e7 f7 g7 h7 -- 32 bit
  806|       |
  807|  48.8k|    zero_8x16b = _mm_setzero_si128(); // all bits reset to zero
  808|       |    //Load pred buffer row 0
  809|  48.8k|    predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
  810|  48.8k|    pred_r0_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
  811|       |    //Load pred buffer row 1
  812|  48.8k|    predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
  813|  48.8k|    pred_r1_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
  814|       |    //Load pred buffer row 2
  815|  48.8k|    predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
  816|  48.8k|    pred_r2_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
  817|       |    //Load pred buffer row 3
  818|  48.8k|    predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
  819|  48.8k|    pred_r3_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
  820|       |    //Load pred buffer row 4
  821|  48.8k|    predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[4 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
  822|  48.8k|    pred_r4_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
  823|       |    //Load pred buffer row 5
  824|  48.8k|    predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[5 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bit
  825|  48.8k|    pred_r5_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
  826|       |    //Load pred buffer row 6
  827|  48.8k|    predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[6 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
  828|  48.8k|    pred_r6_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
  829|       |    //Load pred buffer row 7
  830|  48.8k|    predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[7 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
  831|  48.8k|    pred_r7_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
  832|       |
  833|       |    /*--------------------------------------------------------------------*/
  834|       |    /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6            */
  835|       |    /*                                                                    */
  836|       |    /* Add the prediction and store it back to reconstructed frame buffer */
  837|       |    /* [Prediction buffer itself in this case]                            */
  838|       |    /*--------------------------------------------------------------------*/
  839|       |
  840|       |    /* y0j = w0j + w4j                                                     */
  841|  48.8k|    temp1 = _mm_add_epi16(resq_r0_2, resq_r4_2);
  842|       |    /* y2j = w0j - w4j                                                      */
  843|  48.8k|    temp3 = _mm_sub_epi16(resq_r0_2, resq_r4_2);
  844|       |    /* y1j = -w3j + w5j - w7j - (w7j >> 1)                                   */
  845|  48.8k|    temp2 = _mm_sub_epi32(resq_r5_1, resq_r3_1); //-w3+w5
  846|  48.8k|    temp10 = _mm_sub_epi32(resq_r5_2, resq_r3_2);
  847|  48.8k|    temp4 = _mm_sub_epi32(temp2, resq_r7_1); //-w3+w5-w7
  848|  48.8k|    temp12 = _mm_sub_epi32(temp10, resq_r7_2);
  849|  48.8k|    temp5 = _mm_srai_epi32(resq_r7_1, 1); //w7>>1
  850|  48.8k|    temp13 = _mm_srai_epi32(resq_r7_2, 1);
  851|  48.8k|    temp2 = _mm_sub_epi32(temp4, temp5); //-w3+w5-w7 -(w7>>1)
  852|  48.8k|    temp10 = _mm_sub_epi32(temp12, temp13);
  853|  48.8k|    temp2 = _mm_packs_epi32(temp2, temp10);
  854|       |    /* y3j = w1j + w7j - w3j - (w3j >> 1)                                    */
  855|  48.8k|    temp4 = _mm_add_epi32(resq_r1_1, resq_r7_1); //w1+w7
  856|  48.8k|    temp12 = _mm_add_epi32(resq_r1_2, resq_r7_2);
  857|  48.8k|    temp4 = _mm_sub_epi32(temp4, resq_r3_1); //w1+w7-w3
  858|  48.8k|    temp12 = _mm_sub_epi32(temp12, resq_r3_2);
  859|  48.8k|    temp5 = _mm_srai_epi32(resq_r3_1, 1); //w3>>1
  860|  48.8k|    temp13 = _mm_srai_epi32(resq_r3_2, 1);
  861|  48.8k|    temp4 = _mm_sub_epi32(temp4, temp5); //w1+w7-w3-(w3>>1)
  862|  48.8k|    temp12 = _mm_sub_epi32(temp12, temp13);
  863|  48.8k|    temp4 = _mm_packs_epi32(temp4, temp12);
  864|       |    /* y4j = (w2j >> 1) - w6j                                              */
  865|  48.8k|    temp5 = _mm_srai_epi16(resq_r2_2, 1); //w2>>1
  866|  48.8k|    temp5 = _mm_sub_epi16(temp5, resq_r6_2); //(w2>>1)-w6
  867|       |    /* y5j = -w1j + w7j + w5j + (w5j >> 1)                                   */
  868|  48.8k|    temp6 = _mm_sub_epi32(resq_r7_1, resq_r1_1); //w7-w1
  869|  48.8k|    temp14 = _mm_sub_epi32(resq_r7_2, resq_r1_2);
  870|  48.8k|    temp6 = _mm_add_epi32(temp6, resq_r5_1); //w7-w1+w5
  871|  48.8k|    temp14 = _mm_add_epi32(temp14, resq_r5_2);
  872|  48.8k|    temp7 = _mm_srai_epi32(resq_r5_1, 1); //w5>>1
  873|  48.8k|    temp15 = _mm_srai_epi32(resq_r5_2, 1);
  874|  48.8k|    temp6 = _mm_add_epi32(temp6, temp7); //w7-w1_w5+(w5>>1)
  875|  48.8k|    temp14 = _mm_add_epi32(temp14, temp15);
  876|  48.8k|    temp6 = _mm_packs_epi32(temp6, temp14);
  877|       |    /* y6j = w2j + (w6j >> 1)                                              */
  878|  48.8k|    temp7 = _mm_srai_epi16(resq_r6_2, 1); //w6>>1
  879|  48.8k|    temp7 = _mm_add_epi16(temp7, resq_r2_2); //(w6>>1)+w2
  880|       |    /* y7j = w3j + w5j + w1j + (w1j >> 1)                                    */
  881|  48.8k|    temp8 = _mm_add_epi32(resq_r3_1, resq_r5_1); //w3+w5
  882|  48.8k|    temp16 = _mm_add_epi32(resq_r3_2, resq_r5_2);
  883|  48.8k|    temp8 = _mm_add_epi32(temp8, resq_r1_1); //w3+w5+w1
  884|  48.8k|    temp16 = _mm_add_epi32(temp16, resq_r1_2);
  885|  48.8k|    temp17 = _mm_srai_epi32(resq_r1_1, 1); //w1>>1
  886|  48.8k|    temp18 = _mm_srai_epi32(resq_r1_2, 1);
  887|  48.8k|    temp8 = _mm_add_epi32(temp8, temp17); //w3+w5+w1+(w1>>1)
  888|  48.8k|    temp16 = _mm_add_epi32(temp16, temp18);
  889|  48.8k|    temp8 = _mm_packs_epi32(temp8, temp16);
  890|       |    /*------------------------------------------------------------------*/
  891|       |    /*------------------------------------------------------------------*/
  892|       |    /* z0j = y0j + y6j                                                        */
  893|  48.8k|    resq_r0_1 = _mm_add_epi16(temp1, temp7);
  894|       |    /* z1j = y1j + (y7j >> 2)                                                */
  895|  48.8k|    resq_r1_1 = _mm_srai_epi16(temp8, 2);
  896|  48.8k|    resq_r1_1 = _mm_add_epi16(resq_r1_1, temp2);
  897|       |    /* z2j = y2j + y4j                                                        */
  898|  48.8k|    resq_r2_1 = _mm_add_epi16(temp3, temp5);
  899|       |    /* z3j = y3j + (y5j >> 2)                                                */
  900|  48.8k|    resq_r3_1 = _mm_srai_epi16(temp6, 2);
  901|  48.8k|    resq_r3_1 = _mm_add_epi16(resq_r3_1, temp4);
  902|       |    /* z4j = y2j - y4j                                                        */
  903|  48.8k|    resq_r4_1 = _mm_sub_epi16(temp3, temp5);
  904|       |    /* z5j = (y3j >> 2) - y5j                                                 */
  905|  48.8k|    resq_r5_1 = _mm_srai_epi16(temp4, 2);
  906|  48.8k|    resq_r5_1 = _mm_sub_epi16(resq_r5_1, temp6);
  907|       |    /* z6j = y0j - y6j                                                     */
  908|  48.8k|    resq_r6_1 = _mm_sub_epi16(temp1, temp7);
  909|       |    /* z7j = y7j - (y1j >> 2)                                                 */
  910|  48.8k|    resq_r7_1 = _mm_srai_epi16(temp2, 2);
  911|  48.8k|    resq_r7_1 = _mm_sub_epi16(temp8, resq_r7_1);
  912|       |    /*------------------------------------------------------------------*/
  913|       |
  914|       |    /*------------------------------------------------------------------*/
  915|       |    /* x0j = z0j + z7j                                                        */
  916|  48.8k|    temp1 = _mm_add_epi16(resq_r0_1, resq_r7_1);
  917|  48.8k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp1);
  918|  48.8k|    temp10 = _mm_unpacklo_epi16(temp1, sign_reg);
  919|  48.8k|    temp11 = _mm_unpackhi_epi16(temp1, sign_reg);
  920|  48.8k|    temp10 = _mm_add_epi32(temp10, value_32);
  921|  48.8k|    temp11 = _mm_add_epi32(temp11, value_32);
  922|  48.8k|    temp10 = _mm_srai_epi32(temp10, 6);
  923|  48.8k|    temp11 = _mm_srai_epi32(temp11, 6);
  924|  48.8k|    temp10 = _mm_packs_epi32(temp10, temp11);
  925|  48.8k|    temp1 = _mm_add_epi16(temp10, pred_r0_1);
  926|       |    /* x1j = z2j + z5j                                                        */
  927|  48.8k|    temp2 = _mm_add_epi16(resq_r2_1, resq_r5_1);
  928|  48.8k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp2);
  929|  48.8k|    temp10 = _mm_unpacklo_epi16(temp2, sign_reg);
  930|  48.8k|    temp11 = _mm_unpackhi_epi16(temp2, sign_reg);
  931|  48.8k|    temp10 = _mm_add_epi32(temp10, value_32);
  932|  48.8k|    temp11 = _mm_add_epi32(temp11, value_32);
  933|  48.8k|    temp10 = _mm_srai_epi32(temp10, 6);
  934|  48.8k|    temp11 = _mm_srai_epi32(temp11, 6);
  935|  48.8k|    temp10 = _mm_packs_epi32(temp10, temp11);
  936|  48.8k|    temp2 = _mm_add_epi16(temp10, pred_r1_1);
  937|       |    /* x2j = z4j + z3j                                                        */
  938|  48.8k|    temp3 = _mm_add_epi16(resq_r4_1, resq_r3_1);
  939|  48.8k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp3);
  940|  48.8k|    temp10 = _mm_unpacklo_epi16(temp3, sign_reg);
  941|  48.8k|    temp11 = _mm_unpackhi_epi16(temp3, sign_reg);
  942|  48.8k|    temp10 = _mm_add_epi32(temp10, value_32);
  943|  48.8k|    temp11 = _mm_add_epi32(temp11, value_32);
  944|  48.8k|    temp10 = _mm_srai_epi32(temp10, 6);
  945|  48.8k|    temp11 = _mm_srai_epi32(temp11, 6);
  946|  48.8k|    temp10 = _mm_packs_epi32(temp10, temp11);
  947|  48.8k|    temp3 = _mm_add_epi16(temp10, pred_r2_1);
  948|       |    /* x3j = z6j + z1j                                                        */
  949|  48.8k|    temp4 = _mm_add_epi16(resq_r6_1, resq_r1_1);
  950|  48.8k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp4);
  951|  48.8k|    temp10 = _mm_unpacklo_epi16(temp4, sign_reg);
  952|  48.8k|    temp11 = _mm_unpackhi_epi16(temp4, sign_reg);
  953|  48.8k|    temp10 = _mm_add_epi32(temp10, value_32);
  954|  48.8k|    temp11 = _mm_add_epi32(temp11, value_32);
  955|  48.8k|    temp10 = _mm_srai_epi32(temp10, 6);
  956|  48.8k|    temp11 = _mm_srai_epi32(temp11, 6);
  957|  48.8k|    temp10 = _mm_packs_epi32(temp10, temp11);
  958|  48.8k|    temp4 = _mm_add_epi16(temp10, pred_r3_1);
  959|       |    /* x4j = z6j - z1j                                                        */
  960|  48.8k|    temp5 = _mm_sub_epi16(resq_r6_1, resq_r1_1);
  961|  48.8k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp5);
  962|  48.8k|    temp10 = _mm_unpacklo_epi16(temp5, sign_reg);
  963|  48.8k|    temp11 = _mm_unpackhi_epi16(temp5, sign_reg);
  964|  48.8k|    temp10 = _mm_add_epi32(temp10, value_32);
  965|  48.8k|    temp11 = _mm_add_epi32(temp11, value_32);
  966|  48.8k|    temp10 = _mm_srai_epi32(temp10, 6);
  967|  48.8k|    temp11 = _mm_srai_epi32(temp11, 6);
  968|  48.8k|    temp10 = _mm_packs_epi32(temp10, temp11);
  969|  48.8k|    temp5 = _mm_add_epi16(temp10, pred_r4_1);
  970|       |    /* x5j = z4j - z3j                                                        */
  971|  48.8k|    temp6 = _mm_sub_epi16(resq_r4_1, resq_r3_1);
  972|  48.8k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp6);
  973|  48.8k|    temp10 = _mm_unpacklo_epi16(temp6, sign_reg);
  974|  48.8k|    temp11 = _mm_unpackhi_epi16(temp6, sign_reg);
  975|  48.8k|    temp10 = _mm_add_epi32(temp10, value_32);
  976|  48.8k|    temp11 = _mm_add_epi32(temp11, value_32);
  977|  48.8k|    temp10 = _mm_srai_epi32(temp10, 6);
  978|  48.8k|    temp11 = _mm_srai_epi32(temp11, 6);
  979|  48.8k|    temp10 = _mm_packs_epi32(temp10, temp11);
  980|  48.8k|    temp6 = _mm_add_epi16(temp10, pred_r5_1);
  981|       |    /* x6j = z2j - z5j                                                        */
  982|  48.8k|    temp7 = _mm_sub_epi16(resq_r2_1, resq_r5_1);
  983|  48.8k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp7);
  984|  48.8k|    temp10 = _mm_unpacklo_epi16(temp7, sign_reg);
  985|  48.8k|    temp11 = _mm_unpackhi_epi16(temp7, sign_reg);
  986|  48.8k|    temp10 = _mm_add_epi32(temp10, value_32);
  987|  48.8k|    temp11 = _mm_add_epi32(temp11, value_32);
  988|  48.8k|    temp10 = _mm_srai_epi32(temp10, 6);
  989|  48.8k|    temp11 = _mm_srai_epi32(temp11, 6);
  990|  48.8k|    temp10 = _mm_packs_epi32(temp10, temp11);
  991|  48.8k|    temp7 = _mm_add_epi16(temp10, pred_r6_1);
  992|       |    /* x7j = z0j - z7j                                                        */
  993|  48.8k|    temp8 = _mm_sub_epi16(resq_r0_1, resq_r7_1);
  994|  48.8k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp8);
  995|  48.8k|    temp10 = _mm_unpacklo_epi16(temp8, sign_reg);
  996|  48.8k|    temp11 = _mm_unpackhi_epi16(temp8, sign_reg);
  997|  48.8k|    temp10 = _mm_add_epi32(temp10, value_32);
  998|  48.8k|    temp11 = _mm_add_epi32(temp11, value_32);
  999|  48.8k|    temp10 = _mm_srai_epi32(temp10, 6);
 1000|  48.8k|    temp11 = _mm_srai_epi32(temp11, 6);
 1001|  48.8k|    temp10 = _mm_packs_epi32(temp10, temp11);
 1002|  48.8k|    temp8 = _mm_add_epi16(temp10, pred_r7_1);
 1003|       |    /*------------------------------------------------------------------*/
 1004|       |    //Clipping the results to 8 bits
 1005|  48.8k|    sign_reg = _mm_cmpgt_epi16(temp1, zero_8x16b); // sign check
 1006|  48.8k|    temp1 = _mm_and_si128(temp1, sign_reg);
 1007|  48.8k|    sign_reg = _mm_cmpgt_epi16(temp2, zero_8x16b); // sign check
 1008|  48.8k|    temp2 = _mm_and_si128(temp2, sign_reg);
 1009|  48.8k|    sign_reg = _mm_cmpgt_epi16(temp3, zero_8x16b); // sign check
 1010|  48.8k|    temp3 = _mm_and_si128(temp3, sign_reg);
 1011|  48.8k|    sign_reg = _mm_cmpgt_epi16(temp4, zero_8x16b); // sign check
 1012|  48.8k|    temp4 = _mm_and_si128(temp4, sign_reg);
 1013|  48.8k|    sign_reg = _mm_cmpgt_epi16(temp5, zero_8x16b); // sign check
 1014|  48.8k|    temp5 = _mm_and_si128(temp5, sign_reg);
 1015|  48.8k|    sign_reg = _mm_cmpgt_epi16(temp6, zero_8x16b); // sign check
 1016|  48.8k|    temp6 = _mm_and_si128(temp6, sign_reg);
 1017|  48.8k|    sign_reg = _mm_cmpgt_epi16(temp7, zero_8x16b); // sign check
 1018|  48.8k|    temp7 = _mm_and_si128(temp7, sign_reg);
 1019|  48.8k|    sign_reg = _mm_cmpgt_epi16(temp8, zero_8x16b); // sign check
 1020|  48.8k|    temp8 = _mm_and_si128(temp8, sign_reg);
 1021|       |
 1022|  48.8k|    resq_r0_2 = _mm_packus_epi16(temp1, zero_8x16b);
 1023|  48.8k|    resq_r1_2 = _mm_packus_epi16(temp2, zero_8x16b);
 1024|  48.8k|    resq_r2_2 = _mm_packus_epi16(temp3, zero_8x16b);
 1025|  48.8k|    resq_r3_2 = _mm_packus_epi16(temp4, zero_8x16b);
 1026|  48.8k|    resq_r4_2 = _mm_packus_epi16(temp5, zero_8x16b);
 1027|  48.8k|    resq_r5_2 = _mm_packus_epi16(temp6, zero_8x16b);
 1028|  48.8k|    resq_r6_2 = _mm_packus_epi16(temp7, zero_8x16b);
 1029|  48.8k|    resq_r7_2 = _mm_packus_epi16(temp8, zero_8x16b);
 1030|       |
 1031|  48.8k|    _mm_storel_epi64((__m128i *) (&pu1_out[0]), resq_r0_2);
 1032|  48.8k|    _mm_storel_epi64((__m128i *) (&pu1_out[out_strd]), resq_r1_2);
 1033|  48.8k|    _mm_storel_epi64((__m128i *) (&pu1_out[2 * out_strd]), resq_r2_2);
 1034|  48.8k|    _mm_storel_epi64((__m128i *) (&pu1_out[3 * out_strd]), resq_r3_2);
 1035|  48.8k|    _mm_storel_epi64((__m128i *) (&pu1_out[4 * out_strd]), resq_r4_2);
 1036|  48.8k|    _mm_storel_epi64((__m128i *) (&pu1_out[5 * out_strd]), resq_r5_2);
 1037|  48.8k|    _mm_storel_epi64((__m128i *) (&pu1_out[6 * out_strd]), resq_r6_2);
 1038|  48.8k|    _mm_storel_epi64((__m128i *) (&pu1_out[7 * out_strd]), resq_r7_2);
 1039|  48.8k|}

ih264_intra_pred_luma_4x4_mode_vert_ssse3:
  122|  38.6k|{
  123|  38.6k|    UWORD8 *pu1_top;
  124|  38.6k|    WORD32 dst_strd2, dst_strd3;
  125|  38.6k|    WORD32 i4_top;
  126|       |
  127|  38.6k|    UNUSED(src_strd);
  ------------------
  |  |   45|  38.6k|#define UNUSED(x) ((void)(x))
  ------------------
  128|  38.6k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  38.6k|#define UNUSED(x) ((void)(x))
  ------------------
  129|       |
  130|  38.6k|    pu1_top = pu1_src + BLK_SIZE + 1;
  ------------------
  |  |  511|  38.6k|#define BLK_SIZE            4
  ------------------
  131|       |
  132|  38.6k|    i4_top = *((WORD32 *)pu1_top);
  133|       |
  134|  38.6k|    dst_strd2 = dst_strd << 1;
  135|  38.6k|    dst_strd3 = dst_strd + dst_strd2;
  136|       |
  137|  38.6k|    *((WORD32 *)(pu1_dst)) = i4_top;
  138|  38.6k|    *((WORD32 *)(pu1_dst + dst_strd)) = i4_top;
  139|  38.6k|    *((WORD32 *)(pu1_dst + dst_strd2)) = i4_top;
  140|  38.6k|    *((WORD32 *)(pu1_dst + dst_strd3)) = i4_top;
  141|  38.6k|}
ih264_intra_pred_luma_4x4_mode_horz_ssse3:
  181|  14.4k|{
  182|  14.4k|    UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
  183|  14.4k|    WORD32 row1,row2,row3,row4;
  184|  14.4k|    UWORD8 val;
  185|  14.4k|    WORD32 dst_strd2, dst_strd3;
  186|       |
  187|  14.4k|    UNUSED(src_strd);
  ------------------
  |  |   45|  14.4k|#define UNUSED(x) ((void)(x))
  ------------------
  188|  14.4k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  14.4k|#define UNUSED(x) ((void)(x))
  ------------------
  189|  14.4k|    pu1_left = pu1_src + BLK_SIZE - 1;
  ------------------
  |  |  511|  14.4k|#define BLK_SIZE            4
  ------------------
  190|       |
  191|  14.4k|    val  = *pu1_left;
  192|  14.4k|    row1 = val + (val << 8) + (val << 16) + (val << 24);
  193|  14.4k|    val  = *(pu1_left - 1);
  194|  14.4k|    row2 = val + (val << 8) + (val << 16) + (val << 24);
  195|  14.4k|    val  = *(pu1_left - 2);
  196|  14.4k|    row3 = val + (val << 8) + (val << 16) + (val << 24);
  197|  14.4k|    val  = *(pu1_left - 3);
  198|  14.4k|    row4 = val + (val << 8) + (val << 16) + (val << 24);
  199|       |
  200|  14.4k|    dst_strd2 = dst_strd << 1;
  201|  14.4k|    dst_strd3 = dst_strd + dst_strd2;
  202|       |
  203|  14.4k|    *((WORD32 *)(pu1_dst)) = row1;
  204|  14.4k|    *((WORD32 *)(pu1_dst + dst_strd)) = row2;
  205|  14.4k|    *((WORD32 *)(pu1_dst + dst_strd2)) = row3;
  206|  14.4k|    *((WORD32 *)(pu1_dst + dst_strd3)) = row4;
  207|  14.4k|}
ih264_intra_pred_luma_4x4_mode_dc_ssse3:
  246|  68.2k|{
  247|  68.2k|    UWORD8 u1_useleft; /* availability of left predictors (only for DC) */
  248|  68.2k|    UWORD8 u1_usetop; /* availability of top predictors (only for DC) */
  249|  68.2k|    UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
  250|  68.2k|    UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
  251|  68.2k|    WORD32 dst_strd2, dst_strd3;
  252|  68.2k|    WORD32 val = 0;
  253|  68.2k|    UNUSED(src_strd);
  ------------------
  |  |   45|  68.2k|#define UNUSED(x) ((void)(x))
  ------------------
  254|  68.2k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  68.2k|#define UNUSED(x) ((void)(x))
  ------------------
  255|  68.2k|    u1_useleft = BOOLEAN(ngbr_avail & LEFT_MB_AVAILABLE_MASK);
  ------------------
  |  |   84|  68.2k|#define BOOLEAN(x) (!!(x))
  ------------------
  256|  68.2k|    u1_usetop = BOOLEAN(ngbr_avail & TOP_MB_AVAILABLE_MASK);
  ------------------
  |  |   84|  68.2k|#define BOOLEAN(x) (!!(x))
  ------------------
  257|  68.2k|    pu1_top = pu1_src + BLK_SIZE + 1;
  ------------------
  |  |  511|  68.2k|#define BLK_SIZE            4
  ------------------
  258|  68.2k|    pu1_left = pu1_src + BLK_SIZE - 1;
  ------------------
  |  |  511|  68.2k|#define BLK_SIZE            4
  ------------------
  259|       |
  260|  68.2k|    if(u1_useleft)
  ------------------
  |  Branch (260:8): [True: 56.2k, False: 11.9k]
  ------------------
  261|  56.2k|    {
  262|  56.2k|        val += *pu1_left--;
  263|  56.2k|        val += *pu1_left--;
  264|  56.2k|        val += *pu1_left--;
  265|  56.2k|        val += *pu1_left + 2;
  266|  56.2k|    }
  267|  68.2k|    if(u1_usetop)
  ------------------
  |  Branch (267:8): [True: 56.7k, False: 11.4k]
  ------------------
  268|  56.7k|    {
  269|  56.7k|        val += *pu1_top + *(pu1_top + 1) + *(pu1_top + 2) + *(pu1_top + 3)
  270|  56.7k|                        + 2;
  271|  56.7k|    }
  272|       |    /* Since 2 is added if either left/top pred is there,
  273|       |     val still being zero implies both preds are not there */
  274|  68.2k|    val = (val) ? (val >> (1 + u1_useleft + u1_usetop)) : 128;
  ------------------
  |  Branch (274:11): [True: 66.2k, False: 1.91k]
  ------------------
  275|       |
  276|  68.2k|    val = val + (val << 8) + (val << 16) + (val << 24);
  277|       |
  278|  68.2k|    dst_strd2 = dst_strd << 1;
  279|  68.2k|    dst_strd3 = dst_strd + dst_strd2;
  280|       |
  281|  68.2k|    *((WORD32 *)(pu1_dst)) = val;
  282|  68.2k|    *((WORD32 *)(pu1_dst + dst_strd)) = val;
  283|  68.2k|    *((WORD32 *)(pu1_dst + dst_strd2)) = val;
  284|  68.2k|    *((WORD32 *)(pu1_dst + dst_strd3)) = val;
  285|  68.2k|}
ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3:
  324|  12.1k|{
  325|  12.1k|    UWORD8 *pu1_top;
  326|  12.1k|    WORD32 dst_strd2, dst_strd3;
  327|       |
  328|  12.1k|    __m128i top_16x8b, top_8x16b, top_sh_8x16b;
  329|  12.1k|    __m128i res1_8x16b, res2_8x16b, res_16x8b;
  330|  12.1k|    __m128i zero_vector, const_2_8x16b;
  331|  12.1k|    WORD32 row1,row2,row3,row4;
  332|       |
  333|  12.1k|    UNUSED(src_strd);
  ------------------
  |  |   45|  12.1k|#define UNUSED(x) ((void)(x))
  ------------------
  334|  12.1k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  12.1k|#define UNUSED(x) ((void)(x))
  ------------------
  335|       |
  336|  12.1k|    pu1_top = pu1_src + BLK_SIZE + 1;
  ------------------
  |  |  511|  12.1k|#define BLK_SIZE            4
  ------------------
  337|       |
  338|  12.1k|    top_16x8b = _mm_loadl_epi64((__m128i *)pu1_top);
  339|  12.1k|    zero_vector = _mm_setzero_si128();
  340|  12.1k|    top_8x16b = _mm_unpacklo_epi8(top_16x8b, zero_vector);    //t0 t1 t2 t3 t4 t5 t6 t7
  341|       |
  342|  12.1k|    top_sh_8x16b = _mm_srli_si128(top_8x16b, 2);              //t1 t2 t3 t4 t5 t6 t7 0
  343|  12.1k|    const_2_8x16b = _mm_set1_epi16(2);
  344|       |
  345|  12.1k|    top_sh_8x16b = _mm_shufflehi_epi16(top_sh_8x16b, 0xa4);   //t1 t2 t3 t4 t5 t6 t7 t7
  346|  12.1k|    res1_8x16b = _mm_add_epi16(top_8x16b, top_sh_8x16b);
  347|  12.1k|    res2_8x16b = _mm_srli_si128(res1_8x16b, 2);
  348|       |
  349|  12.1k|    res1_8x16b = _mm_add_epi16(res1_8x16b, const_2_8x16b);
  350|  12.1k|    res1_8x16b = _mm_add_epi16(res2_8x16b, res1_8x16b);
  351|  12.1k|    res1_8x16b = _mm_srai_epi16(res1_8x16b, 2);
  352|       |
  353|  12.1k|    dst_strd2 = dst_strd << 1;
  354|  12.1k|    dst_strd3 = dst_strd + dst_strd2;
  355|       |
  356|  12.1k|    res_16x8b = _mm_packus_epi16(res1_8x16b, res1_8x16b);
  357|  12.1k|    row1 = _mm_cvtsi128_si32(res_16x8b);
  358|  12.1k|    res_16x8b = _mm_srli_si128(res_16x8b, 1);
  359|  12.1k|    row2 = _mm_cvtsi128_si32(res_16x8b);
  360|  12.1k|    res_16x8b = _mm_srli_si128(res_16x8b, 1);
  361|  12.1k|    row3 = _mm_cvtsi128_si32(res_16x8b);
  362|  12.1k|    res_16x8b = _mm_srli_si128(res_16x8b, 1);
  363|  12.1k|    row4 = _mm_cvtsi128_si32(res_16x8b);
  364|       |
  365|  12.1k|    *((WORD32 *)(pu1_dst)) = row1;
  366|  12.1k|    *((WORD32 *)(pu1_dst + dst_strd)) = row2;
  367|  12.1k|    *((WORD32 *)(pu1_dst + dst_strd2)) = row3;
  368|  12.1k|    *((WORD32 *)(pu1_dst + dst_strd3)) = row4;
  369|  12.1k|}
ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3:
  408|  2.27k|{
  409|  2.27k|    UWORD8 *pu1_left;
  410|  2.27k|    WORD32 dst_strd2, dst_strd3;
  411|       |
  412|  2.27k|    __m128i top_left_16x8b, top_left_8x16b;
  413|  2.27k|    __m128i top_left_sh_16x8b, top_left_sh_8x16b;
  414|  2.27k|    __m128i res1_8x16b, res2_8x16b;
  415|  2.27k|    __m128i res1_16x8b, res2_16x8b;
  416|  2.27k|    __m128i zero_vector, const_2_8x16b;
  417|  2.27k|    WORD32 row1,row2,row3,row4;
  418|       |
  419|  2.27k|    UNUSED(src_strd);
  ------------------
  |  |   45|  2.27k|#define UNUSED(x) ((void)(x))
  ------------------
  420|  2.27k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  2.27k|#define UNUSED(x) ((void)(x))
  ------------------
  421|       |
  422|  2.27k|    pu1_left = pu1_src + BLK_SIZE - 1;
  ------------------
  |  |  511|  2.27k|#define BLK_SIZE            4
  ------------------
  423|       |
  424|  2.27k|    top_left_16x8b = _mm_loadu_si128((__m128i *)(pu1_left - 3));             //l3 l2 l1 l0 tl t0 t1 t2...
  425|  2.27k|    zero_vector = _mm_setzero_si128();
  426|  2.27k|    top_left_sh_16x8b = _mm_srli_si128(top_left_16x8b, 1);                   //l2 l1 l0 tl t0 t1 t2 t3...
  427|       |
  428|  2.27k|    top_left_8x16b = _mm_unpacklo_epi8(top_left_16x8b, zero_vector);
  429|  2.27k|    top_left_sh_8x16b = _mm_unpacklo_epi8(top_left_sh_16x8b, zero_vector);
  430|       |
  431|  2.27k|    res1_8x16b = _mm_add_epi16(top_left_8x16b, top_left_sh_8x16b);           //l3+l2 l2+l1 l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2+t3...
  432|  2.27k|    const_2_8x16b = _mm_set1_epi16(2);
  433|  2.27k|    res2_8x16b = _mm_srli_si128(res1_8x16b, 2);                              //l2+l1 l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2+t3...
  434|       |
  435|  2.27k|    res1_8x16b = _mm_add_epi16(res1_8x16b, const_2_8x16b);
  436|  2.27k|    res1_8x16b = _mm_add_epi16(res2_8x16b, res1_8x16b);                      //l3+2*l2+l1+2 l2+2*l1+l0+2...
  437|  2.27k|    res1_8x16b = _mm_srai_epi16(res1_8x16b, 2);
  438|  2.27k|    res1_16x8b = _mm_packus_epi16(res1_8x16b, res1_8x16b);
  439|       |
  440|  2.27k|    dst_strd2 = dst_strd << 1;
  441|  2.27k|    dst_strd3 = dst_strd + dst_strd2;
  442|       |
  443|  2.27k|    res2_16x8b = _mm_srli_si128(res1_16x8b, 3);
  444|       |
  445|  2.27k|    row1 = _mm_cvtsi128_si32(res2_16x8b);
  446|  2.27k|    res2_16x8b = _mm_srli_si128(res1_16x8b, 2);
  447|  2.27k|    row2 = _mm_cvtsi128_si32(res2_16x8b);
  448|  2.27k|    res2_16x8b = _mm_srli_si128(res1_16x8b, 1);
  449|  2.27k|    row3 = _mm_cvtsi128_si32(res2_16x8b);
  450|  2.27k|    row4 = _mm_cvtsi128_si32(res1_16x8b);
  451|       |
  452|  2.27k|    *((WORD32 *)(pu1_dst)) = row1;
  453|  2.27k|    *((WORD32 *)(pu1_dst + dst_strd)) = row2;
  454|  2.27k|    *((WORD32 *)(pu1_dst + dst_strd2)) = row3;
  455|  2.27k|    *((WORD32 *)(pu1_dst + dst_strd3)) = row4;
  456|  2.27k|}
ih264_intra_pred_luma_4x4_mode_vert_r_ssse3:
  495|  1.57k|{
  496|  1.57k|    UWORD8 *pu1_left;
  497|  1.57k|    WORD32 dst_strd2, dst_strd3;
  498|       |
  499|  1.57k|    __m128i val_16x8b, temp_16x8b;
  500|  1.57k|    __m128i w11_a1_16x8b, w11_a2_16x8b;
  501|  1.57k|    __m128i w121_a1_8x16b, w121_a2_8x16b, w121_sh_8x16b;
  502|  1.57k|    __m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b;
  503|  1.57k|    __m128i zero_vector, const_2_8x16b;
  504|  1.57k|    WORD32 row1,row2,row3,row4;
  505|       |
  506|  1.57k|    UNUSED(src_strd);
  ------------------
  |  |   45|  1.57k|#define UNUSED(x) ((void)(x))
  ------------------
  507|  1.57k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  1.57k|#define UNUSED(x) ((void)(x))
  ------------------
  508|       |
  509|  1.57k|    pu1_left = pu1_src + BLK_SIZE - 1;
  ------------------
  |  |  511|  1.57k|#define BLK_SIZE            4
  ------------------
  510|       |
  511|  1.57k|    val_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 2));
  512|  1.57k|    zero_vector = _mm_setzero_si128();
  513|       |
  514|  1.57k|    w121_a1_8x16b = _mm_unpacklo_epi8(val_16x8b, zero_vector);        //l2 l1 l0 tl t0 t1 t2 t3
  515|  1.57k|    w11_a1_16x8b = _mm_srli_si128(val_16x8b, 3);
  516|  1.57k|    w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2);                 //l1 l0 tl t0 t1 t2 t3 0
  517|  1.57k|    w11_a2_16x8b = _mm_srli_si128(val_16x8b, 4);
  518|       |
  519|  1.57k|    w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b);      //l2+l1 l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2+t3 t3
  520|  1.57k|    row1_16x8b = _mm_avg_epu8(w11_a1_16x8b, w11_a2_16x8b);
  521|  1.57k|    w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2);                 //l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2+t3 t3    0
  522|       |
  523|  1.57k|    const_2_8x16b = _mm_set1_epi16(2);
  524|  1.57k|    w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b);      //l2+2*l1+l0 l1+2*l0+tl ...
  525|  1.57k|    w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, const_2_8x16b);
  526|  1.57k|    w121_a1_8x16b = _mm_srai_epi16(w121_a1_8x16b, 2);
  527|       |
  528|  1.57k|    w121_sh_8x16b = _mm_shufflelo_epi16(w121_a1_8x16b, 0xe1);
  529|  1.57k|    w121_sh_8x16b = _mm_srli_si128(w121_sh_8x16b, 2);
  530|       |
  531|  1.57k|    row4_16x8b = _mm_packus_epi16(w121_sh_8x16b, w121_sh_8x16b);
  532|  1.57k|    temp_16x8b = _mm_slli_si128(w121_a1_8x16b, 13);
  533|  1.57k|    row2_16x8b = _mm_srli_si128(row4_16x8b, 1);
  534|  1.57k|    row3_16x8b = _mm_alignr_epi8(row1_16x8b, temp_16x8b, 15);
  535|       |
  536|  1.57k|    dst_strd2 = dst_strd << 1;
  537|  1.57k|    dst_strd3 = dst_strd + dst_strd2;
  538|       |
  539|  1.57k|    row1 = _mm_cvtsi128_si32(row1_16x8b);
  540|  1.57k|    row2 = _mm_cvtsi128_si32(row2_16x8b);
  541|  1.57k|    row3 = _mm_cvtsi128_si32(row3_16x8b);
  542|  1.57k|    row4 = _mm_cvtsi128_si32(row4_16x8b);
  543|       |
  544|  1.57k|    *((WORD32 *)(pu1_dst)) = row1;
  545|  1.57k|    *((WORD32 *)(pu1_dst + dst_strd)) = row2;
  546|  1.57k|    *((WORD32 *)(pu1_dst + dst_strd2)) = row3;
  547|  1.57k|    *((WORD32 *)(pu1_dst + dst_strd3)) = row4;
  548|  1.57k|}
ih264_intra_pred_luma_4x4_mode_horz_d_ssse3:
  587|  4.36k|{
  588|  4.36k|    UWORD8 *pu1_left;
  589|  4.36k|    WORD32 dst_strd2, dst_strd3;
  590|  4.36k|    WORD32 val_121_t0t1;
  591|       |
  592|  4.36k|    __m128i val_16x8b, val_sh_16x8b;
  593|  4.36k|    __m128i w11_16x8b;
  594|  4.36k|    __m128i w121_a1_8x16b, w121_a2_8x16b, w121_16x8b;
  595|  4.36k|    __m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b;
  596|       |
  597|  4.36k|    __m128i zero_vector, const_2_8x16b;
  598|  4.36k|    WORD32 row1,row2,row3,row4;
  599|       |
  600|  4.36k|    UNUSED(src_strd);
  ------------------
  |  |   45|  4.36k|#define UNUSED(x) ((void)(x))
  ------------------
  601|  4.36k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  4.36k|#define UNUSED(x) ((void)(x))
  ------------------
  602|       |
  603|  4.36k|    pu1_left = pu1_src + BLK_SIZE - 1;
  ------------------
  |  |  511|  4.36k|#define BLK_SIZE            4
  ------------------
  604|       |
  605|  4.36k|    val_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 3));
  606|  4.36k|    zero_vector = _mm_setzero_si128();
  607|  4.36k|    val_sh_16x8b = _mm_srli_si128(val_16x8b, 1);
  608|  4.36k|    w11_16x8b = _mm_avg_epu8(val_16x8b, val_sh_16x8b);
  609|       |
  610|  4.36k|    w121_a1_8x16b = _mm_unpacklo_epi8(val_16x8b, zero_vector);        //l3 l2 l1 l0 tl t0 t1 t2
  611|  4.36k|    w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2);                 //l2 l1 l0 tl t0 t1 t2 0
  612|  4.36k|    w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b);      //l3+l2 l2+l1 l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2
  613|  4.36k|    w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2);                 //l2+l1 l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2    0
  614|       |
  615|  4.36k|    zero_vector = _mm_setzero_si128();
  616|  4.36k|    const_2_8x16b = _mm_set1_epi16(2);
  617|       |
  618|  4.36k|    w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b);      //l3+2*l2+l1 l2+2*l1+l0 l1+2*l0+tl ...
  619|  4.36k|    w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, const_2_8x16b);
  620|  4.36k|    w121_a1_8x16b = _mm_srai_epi16(w121_a1_8x16b, 2);
  621|       |
  622|  4.36k|    w121_16x8b = _mm_packus_epi16(w121_a1_8x16b, w121_a1_8x16b);
  623|       |
  624|  4.36k|    row4_16x8b = _mm_unpacklo_epi8(w11_16x8b, w121_16x8b);
  625|  4.36k|    val_121_t0t1 = _mm_extract_epi16(w121_16x8b, 2);
  626|  4.36k|    row4_16x8b = _mm_insert_epi16(row4_16x8b, val_121_t0t1, 4);
  627|       |
  628|  4.36k|    dst_strd2 = dst_strd << 1;
  629|  4.36k|    dst_strd3 = dst_strd + dst_strd2;
  630|       |
  631|  4.36k|    row1_16x8b = _mm_srli_si128(row4_16x8b, 6);
  632|  4.36k|    row2_16x8b = _mm_srli_si128(row4_16x8b, 4);
  633|  4.36k|    row3_16x8b = _mm_srli_si128(row4_16x8b, 2);
  634|       |
  635|  4.36k|    row1 = _mm_cvtsi128_si32(row1_16x8b);
  636|  4.36k|    row2 = _mm_cvtsi128_si32(row2_16x8b);
  637|  4.36k|    row3 = _mm_cvtsi128_si32(row3_16x8b);
  638|  4.36k|    row4 = _mm_cvtsi128_si32(row4_16x8b);
  639|       |
  640|  4.36k|    *((WORD32 *)(pu1_dst)) = row1;
  641|  4.36k|    *((WORD32 *)(pu1_dst + dst_strd)) = row2;
  642|  4.36k|    *((WORD32 *)(pu1_dst + dst_strd2)) = row3;
  643|  4.36k|    *((WORD32 *)(pu1_dst + dst_strd3)) = row4;
  644|  4.36k|}
ih264_intra_pred_luma_4x4_mode_vert_l_ssse3:
  683|  8.09k|{
  684|  8.09k|    UWORD8 *pu1_top;
  685|  8.09k|    WORD32 dst_strd2, dst_strd3;
  686|       |
  687|  8.09k|    __m128i val_16x8b, val_sh_16x8b;
  688|  8.09k|    __m128i w121_a1_8x16b, w121_a2_8x16b;
  689|  8.09k|    __m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b;
  690|       |
  691|  8.09k|    __m128i zero_vector, const_2_8x16b;
  692|  8.09k|    WORD32 row1,row2,row3,row4;
  693|       |
  694|  8.09k|    UNUSED(src_strd);
  ------------------
  |  |   45|  8.09k|#define UNUSED(x) ((void)(x))
  ------------------
  695|  8.09k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  8.09k|#define UNUSED(x) ((void)(x))
  ------------------
  696|       |
  697|  8.09k|    pu1_top = pu1_src +BLK_SIZE + 1;
  ------------------
  |  |  511|  8.09k|#define BLK_SIZE            4
  ------------------
  698|       |
  699|  8.09k|    val_16x8b = _mm_loadl_epi64((__m128i *)pu1_top);
  700|  8.09k|    zero_vector = _mm_setzero_si128();
  701|  8.09k|    val_sh_16x8b = _mm_srli_si128(val_16x8b, 1);
  702|  8.09k|    row1_16x8b = _mm_avg_epu8(val_16x8b, val_sh_16x8b);
  703|       |
  704|  8.09k|    w121_a1_8x16b = _mm_unpacklo_epi8(val_16x8b, zero_vector);        //t0 t1 t2 t3 t4 t5...
  705|  8.09k|    w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2);                 //t1 t2 t3 t4 t5 t6...
  706|  8.09k|    w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b);      //t0+t1 t1+t2 t2+t3 t3+t4 t4+t5...
  707|  8.09k|    w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2);                 //t1+t2 t2+t3 t3+t4 t4+t5 t5+t6...
  708|       |
  709|  8.09k|    zero_vector = _mm_setzero_si128();
  710|  8.09k|    const_2_8x16b = _mm_set1_epi16(2);
  711|       |
  712|  8.09k|    w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b);      //t0+2*t1+t2 t1+2*t2+t3 t2+2*t3+t4...
  713|  8.09k|    w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, const_2_8x16b);
  714|  8.09k|    w121_a1_8x16b = _mm_srai_epi16(w121_a1_8x16b, 2);
  715|       |
  716|  8.09k|    row2_16x8b = _mm_packus_epi16(w121_a1_8x16b, w121_a1_8x16b);
  717|       |
  718|  8.09k|    dst_strd2 = dst_strd << 1;
  719|  8.09k|    dst_strd3 = dst_strd + dst_strd2;
  720|       |
  721|  8.09k|    row3_16x8b = _mm_srli_si128(row1_16x8b, 1);
  722|  8.09k|    row4_16x8b = _mm_srli_si128(row2_16x8b, 1);
  723|       |
  724|  8.09k|    row1 = _mm_cvtsi128_si32(row1_16x8b);
  725|  8.09k|    row2 = _mm_cvtsi128_si32(row2_16x8b);
  726|  8.09k|    row3 = _mm_cvtsi128_si32(row3_16x8b);
  727|  8.09k|    row4 = _mm_cvtsi128_si32(row4_16x8b);
  728|       |
  729|  8.09k|    *((WORD32 *)(pu1_dst)) = row1;
  730|  8.09k|    *((WORD32 *)(pu1_dst + dst_strd)) = row2;
  731|  8.09k|    *((WORD32 *)(pu1_dst + dst_strd2)) = row3;
  732|  8.09k|    *((WORD32 *)(pu1_dst + dst_strd3)) = row4;
  733|  8.09k|}
ih264_intra_pred_luma_4x4_mode_horz_u_ssse3:
  772|  12.3k|{
  773|  12.3k|    UWORD8 *pu1_left;
  774|  12.3k|    WORD32 dst_strd2, dst_strd3;
  775|       |
  776|  12.3k|    __m128i val_16x8b, val_sh_16x8b;
  777|  12.3k|    __m128i w11_16x8b;
  778|  12.3k|    __m128i w121_a1_8x16b, w121_a2_8x16b, w121_16x8b;
  779|  12.3k|    __m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b;
  780|       |
  781|  12.3k|    __m128i zero_vector, const_2_8x16b, rev_16x8b;
  782|  12.3k|    WORD32 row1,row2,row3,row4;
  783|       |
  784|  12.3k|    UNUSED(src_strd);
  ------------------
  |  |   45|  12.3k|#define UNUSED(x) ((void)(x))
  ------------------
  785|  12.3k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  12.3k|#define UNUSED(x) ((void)(x))
  ------------------
  786|       |
  787|  12.3k|    pu1_left = pu1_src + BLK_SIZE - 1;
  ------------------
  |  |  511|  12.3k|#define BLK_SIZE            4
  ------------------
  788|       |
  789|  12.3k|    zero_vector = _mm_setzero_si128();
  790|  12.3k|    rev_16x8b = _mm_setr_epi8(3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
  791|       |
  792|  12.3k|    val_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 3));           //l3 l2 l1 l0 0  0  0...
  793|  12.3k|    val_16x8b = _mm_shuffle_epi8(val_16x8b, rev_16x8b);                //l0 l1 l2 l3 l3 l3 l3...
  794|       |
  795|  12.3k|    val_sh_16x8b = _mm_srli_si128(val_16x8b, 1);
  796|  12.3k|    w11_16x8b = _mm_avg_epu8(val_16x8b, val_sh_16x8b);
  797|       |
  798|  12.3k|    w121_a1_8x16b = _mm_unpacklo_epi8(val_16x8b, zero_vector);        //l0 l1 l2 l3 l3 l3...
  799|  12.3k|    w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2);                 //l1 l2 l3 l3 l3 l3...
  800|       |
  801|  12.3k|    w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b);      //l0+t1 l1+l2 l2+l3 2*l3 2*l3...
  802|  12.3k|    w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2);                 //l1+t2 l2+l3 2*l3  2*l3 2*l3...
  803|       |
  804|  12.3k|    zero_vector = _mm_setzero_si128();
  805|  12.3k|    const_2_8x16b = _mm_set1_epi16(2);
  806|       |
  807|  12.3k|    w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b);      //l0+2*l1+l2 l1+2*l2+l3 l2+3*l3 4*l3 4*l3...
  808|  12.3k|    w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, const_2_8x16b);
  809|  12.3k|    w121_a1_8x16b = _mm_srai_epi16(w121_a1_8x16b, 2);
  810|       |
  811|  12.3k|    w121_16x8b = _mm_packus_epi16(w121_a1_8x16b, w121_a1_8x16b);
  812|       |
  813|  12.3k|    dst_strd2 = dst_strd << 1;
  814|  12.3k|    dst_strd3 = dst_strd + dst_strd2;
  815|       |
  816|  12.3k|    row1_16x8b = _mm_unpacklo_epi8(w11_16x8b, w121_16x8b);
  817|  12.3k|    row2_16x8b = _mm_srli_si128(row1_16x8b, 2);
  818|  12.3k|    row3_16x8b = _mm_srli_si128(row1_16x8b, 4);
  819|  12.3k|    row4_16x8b = _mm_srli_si128(row1_16x8b, 6);
  820|       |
  821|  12.3k|    row1 = _mm_cvtsi128_si32(row1_16x8b);
  822|  12.3k|    row2 = _mm_cvtsi128_si32(row2_16x8b);
  823|  12.3k|    row3 = _mm_cvtsi128_si32(row3_16x8b);
  824|  12.3k|    row4 = _mm_cvtsi128_si32(row4_16x8b);
  825|       |
  826|  12.3k|    *((WORD32 *)(pu1_dst)) = row1;
  827|  12.3k|    *((WORD32 *)(pu1_dst + dst_strd)) = row2;
  828|  12.3k|    *((WORD32 *)(pu1_dst + dst_strd2)) = row3;
  829|  12.3k|    *((WORD32 *)(pu1_dst + dst_strd3)) = row4;
  830|  12.3k|}
ih264_intra_pred_luma_8x8_mode_vert_ssse3:
  872|  16.4k|{
  873|  16.4k|    UWORD8 *pu1_top = NULL;
  874|  16.4k|    __m128i top_8x8b;
  875|  16.4k|    UNUSED(src_strd);
  ------------------
  |  |   45|  16.4k|#define UNUSED(x) ((void)(x))
  ------------------
  876|  16.4k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  16.4k|#define UNUSED(x) ((void)(x))
  ------------------
  877|  16.4k|    pu1_top = pu1_src + BLK8x8SIZE + 1;
  ------------------
  |  |  510|  16.4k|#define BLK8x8SIZE          8
  ------------------
  878|       |
  879|  16.4k|    top_8x8b = _mm_loadl_epi64((__m128i *)pu1_top);
  880|       |
  881|  16.4k|    _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), top_8x8b);
  882|  16.4k|    _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), top_8x8b);
  883|  16.4k|    _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), top_8x8b);
  884|  16.4k|    _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), top_8x8b);
  885|  16.4k|    _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), top_8x8b);
  886|  16.4k|    _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), top_8x8b);
  887|  16.4k|    _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), top_8x8b);
  888|  16.4k|    _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), top_8x8b);
  889|  16.4k|}
ih264_intra_pred_luma_8x8_mode_horz_ssse3:
  929|  4.39k|{
  930|  4.39k|    UWORD8 *pu1_left = pu1_src + BLK8x8SIZE - 1;
  ------------------
  |  |  510|  4.39k|#define BLK8x8SIZE          8
  ------------------
  931|  4.39k|    __m128i row1_8x8b, row2_8x8b, row3_8x8b, row4_8x8b;
  932|  4.39k|    __m128i row5_8x8b, row6_8x8b, row7_8x8b, row8_8x8b;
  933|       |
  934|  4.39k|    UNUSED(src_strd);
  ------------------
  |  |   45|  4.39k|#define UNUSED(x) ((void)(x))
  ------------------
  935|  4.39k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  4.39k|#define UNUSED(x) ((void)(x))
  ------------------
  936|       |
  937|  4.39k|    row1_8x8b = _mm_set1_epi8(pu1_left[0]);
  938|  4.39k|    row2_8x8b = _mm_set1_epi8(pu1_left[-1]);
  939|  4.39k|    row3_8x8b = _mm_set1_epi8(pu1_left[-2]);
  940|  4.39k|    row4_8x8b = _mm_set1_epi8(pu1_left[-3]);
  941|  4.39k|    row5_8x8b = _mm_set1_epi8(pu1_left[-4]);
  942|  4.39k|    row6_8x8b = _mm_set1_epi8(pu1_left[-5]);
  943|  4.39k|    row7_8x8b = _mm_set1_epi8(pu1_left[-6]);
  944|  4.39k|    row8_8x8b = _mm_set1_epi8(pu1_left[-7]);
  945|       |
  946|  4.39k|    _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), row1_8x8b);
  947|  4.39k|    _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), row2_8x8b);
  948|  4.39k|    _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), row3_8x8b);
  949|  4.39k|    _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), row4_8x8b);
  950|  4.39k|    _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), row5_8x8b);
  951|  4.39k|    _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), row6_8x8b);
  952|  4.39k|    _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), row7_8x8b);
  953|  4.39k|    _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), row8_8x8b);
  954|  4.39k|}
ih264_intra_pred_luma_8x8_mode_dc_ssse3:
  993|  31.9k|{
  994|  31.9k|    UWORD8 u1_useleft; /* availability of left predictors (only for DC) */
  995|  31.9k|    UWORD8 u1_usetop; /* availability of top predictors (only for DC) */
  996|  31.9k|    UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
  997|  31.9k|    UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
  998|  31.9k|    __m128i dc_val_8x8b;
  999|  31.9k|    WORD32 dc_val = 0;
 1000|  31.9k|    UNUSED(src_strd);
  ------------------
  |  |   45|  31.9k|#define UNUSED(x) ((void)(x))
  ------------------
 1001|       |
 1002|  31.9k|    u1_useleft = BOOLEAN(ngbr_avail & LEFT_MB_AVAILABLE_MASK);
  ------------------
  |  |   84|  31.9k|#define BOOLEAN(x) (!!(x))
  ------------------
 1003|  31.9k|    u1_usetop = BOOLEAN(ngbr_avail & TOP_MB_AVAILABLE_MASK);
  ------------------
  |  |   84|  31.9k|#define BOOLEAN(x) (!!(x))
  ------------------
 1004|  31.9k|    pu1_top = pu1_src + BLK8x8SIZE + 1;
  ------------------
  |  |  510|  31.9k|#define BLK8x8SIZE          8
  ------------------
 1005|  31.9k|    pu1_left = pu1_src + BLK8x8SIZE - 1;
  ------------------
  |  |  510|  31.9k|#define BLK8x8SIZE          8
  ------------------
 1006|       |
 1007|  31.9k|    if(u1_useleft || u1_usetop)
  ------------------
  |  Branch (1007:8): [True: 25.9k, False: 5.94k]
  |  Branch (1007:22): [True: 5.08k, False: 860]
  ------------------
 1008|  31.0k|    {
 1009|  31.0k|        WORD32 shft = 2;
 1010|  31.0k|        __m128i val_8x8b, zero_8x8b, sum_8x16b;
 1011|       |
 1012|  31.0k|        zero_8x8b = _mm_setzero_si128();
 1013|       |
 1014|  31.0k|        if(u1_useleft)
  ------------------
  |  Branch (1014:12): [True: 25.9k, False: 5.08k]
  ------------------
 1015|  25.9k|        {
 1016|  25.9k|            val_8x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 7));
 1017|  25.9k|            sum_8x16b = _mm_sad_epu8(zero_8x8b, val_8x8b);
 1018|       |
 1019|  25.9k|            shft++;
 1020|  25.9k|            dc_val += 4;
 1021|  25.9k|            dc_val += _mm_extract_epi16(sum_8x16b, 0);
 1022|  25.9k|        }
 1023|  31.0k|        if(u1_usetop)
  ------------------
  |  Branch (1023:12): [True: 23.2k, False: 7.84k]
  ------------------
 1024|  23.2k|        {
 1025|  23.2k|            val_8x8b = _mm_loadl_epi64((__m128i *)pu1_top);
 1026|  23.2k|            sum_8x16b = _mm_sad_epu8(zero_8x8b, val_8x8b);
 1027|       |
 1028|  23.2k|            shft++;
 1029|  23.2k|            dc_val += 4;
 1030|  23.2k|            dc_val += _mm_extract_epi16(sum_8x16b, 0);
 1031|  23.2k|        }
 1032|  31.0k|        dc_val = dc_val >> shft;
 1033|  31.0k|    }
 1034|    860|    else
 1035|    860|        dc_val = 128;
 1036|       |
 1037|  31.9k|    dc_val_8x8b = _mm_set1_epi8(dc_val);
 1038|       |
 1039|  31.9k|    _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), dc_val_8x8b);
 1040|  31.9k|    _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), dc_val_8x8b);
 1041|  31.9k|    _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), dc_val_8x8b);
 1042|  31.9k|    _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), dc_val_8x8b);
 1043|  31.9k|    _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), dc_val_8x8b);
 1044|  31.9k|    _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), dc_val_8x8b);
 1045|  31.9k|    _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), dc_val_8x8b);
 1046|  31.9k|    _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), dc_val_8x8b);
 1047|  31.9k|}
ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3:
 1086|    534|{
 1087|    534|    UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
 1088|    534|    __m128i top_16x8;
 1089|    534|    __m128i out_15x16;
 1090|    534|    __m128i a0_8x16, a1_8x16, a2_8x16;
 1091|    534|    __m128i temp1, temp2;
 1092|    534|    __m128i res1_8x16, res2_8x16;
 1093|    534|    __m128i zero = _mm_setzero_si128();
 1094|    534|    __m128i const_val2_8x16 = _mm_set1_epi16(2);
 1095|       |
 1096|    534|    UNUSED(src_strd);
  ------------------
  |  |   45|    534|#define UNUSED(x) ((void)(x))
  ------------------
 1097|    534|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|    534|#define UNUSED(x) ((void)(x))
  ------------------
 1098|       |
 1099|    534|    pu1_top = pu1_src + BLK8x8SIZE + 1;
  ------------------
  |  |  510|    534|#define BLK8x8SIZE          8
  ------------------
 1100|       |
 1101|    534|    top_16x8 = _mm_loadu_si128((__m128i *)(pu1_top));
 1102|       |
 1103|    534|    temp1 = _mm_srli_si128(top_16x8, 1);
 1104|    534|    temp2 = _mm_srli_si128(top_16x8, 2);
 1105|    534|    a0_8x16 = _mm_unpacklo_epi8(top_16x8, zero);
 1106|    534|    a1_8x16 = _mm_unpacklo_epi8(temp1, zero);
 1107|    534|    a2_8x16 = _mm_unpacklo_epi8(temp2, zero);
 1108|       |
 1109|    534|    a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
 1110|    534|    a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
 1111|    534|    a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
 1112|    534|    a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
 1113|    534|    res1_8x16 = _mm_srai_epi16(a0_8x16, 2);
 1114|       |
 1115|    534|    temp2 = _mm_srli_si128(top_16x8, 2);
 1116|    534|    temp1 = _mm_srli_si128(top_16x8, 1);
 1117|    534|    a2_8x16 = _mm_unpackhi_epi8(temp2, zero);
 1118|    534|    a0_8x16 = _mm_unpackhi_epi8(top_16x8, zero);
 1119|    534|    a2_8x16 = _mm_shufflehi_epi16(a2_8x16, 0x14);
 1120|    534|    a1_8x16 = _mm_unpackhi_epi8(temp1, zero);
 1121|       |
 1122|    534|    a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
 1123|    534|    a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
 1124|    534|    a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
 1125|    534|    a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
 1126|    534|    res2_8x16 = _mm_srai_epi16(a0_8x16, 2);
 1127|       |
 1128|    534|    out_15x16 = _mm_packus_epi16(res1_8x16, res2_8x16);
 1129|       |
 1130|    534|    _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), out_15x16);
 1131|    534|    out_15x16 = _mm_srli_si128(out_15x16, 1);
 1132|    534|    _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), out_15x16);
 1133|    534|    out_15x16 = _mm_srli_si128(out_15x16, 1);
 1134|    534|    _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), out_15x16);
 1135|    534|    out_15x16 = _mm_srli_si128(out_15x16, 1);
 1136|    534|    _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), out_15x16);
 1137|    534|    out_15x16 = _mm_srli_si128(out_15x16, 1);
 1138|    534|    _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), out_15x16);
 1139|    534|    out_15x16 = _mm_srli_si128(out_15x16, 1);
 1140|    534|    _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), out_15x16);
 1141|    534|    out_15x16 = _mm_srli_si128(out_15x16, 1);
 1142|    534|    _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), out_15x16);
 1143|       |    out_15x16 = _mm_srli_si128(out_15x16, 1);
 1144|    534|    _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), out_15x16);
 1145|    534|}
ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3:
 1184|    666|{
 1185|    666|    UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
 1186|    666|    UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
 1187|    666|    __m128i top_8x8, left_16x8;
 1188|    666|    __m128i out_15x16;
 1189|    666|    __m128i a0_8x16, a1_8x16, a2_8x16;
 1190|    666|    __m128i temp1, temp2;
 1191|    666|    __m128i res1_8x16, res2_8x16;
 1192|    666|    __m128i zero = _mm_setzero_si128();
 1193|    666|    __m128i const_val2_8x16 = _mm_set1_epi16(2);
 1194|    666|    __m128i str_8x8;
 1195|       |
 1196|    666|    UNUSED(src_strd);
  ------------------
  |  |   45|    666|#define UNUSED(x) ((void)(x))
  ------------------
 1197|    666|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|    666|#define UNUSED(x) ((void)(x))
  ------------------
 1198|       |
 1199|    666|    pu1_left = pu1_src + BLK8x8SIZE - 1;
  ------------------
  |  |  510|    666|#define BLK8x8SIZE          8
  ------------------
 1200|    666|    pu1_top = pu1_src + BLK8x8SIZE + 1;
  ------------------
  |  |  510|    666|#define BLK8x8SIZE          8
  ------------------
 1201|       |
 1202|    666|    left_16x8 = _mm_loadu_si128((__m128i *)(pu1_left - 7));
 1203|       |
 1204|    666|    temp1 = _mm_srli_si128(left_16x8, 1);
 1205|    666|    temp2 = _mm_srli_si128(left_16x8, 2);
 1206|    666|    a0_8x16 = _mm_unpacklo_epi8(left_16x8, zero);
 1207|    666|    a1_8x16 = _mm_unpacklo_epi8(temp1, zero);
 1208|    666|    a2_8x16 = _mm_unpacklo_epi8(temp2, zero);
 1209|       |
 1210|    666|    a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
 1211|    666|    a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
 1212|    666|    a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
 1213|    666|    a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
 1214|    666|    res1_8x16 = _mm_srai_epi16(a0_8x16, 2);
 1215|       |
 1216|    666|    top_8x8 = _mm_loadu_si128((__m128i *)(pu1_top - 1));
 1217|       |
 1218|    666|    temp1 = _mm_srli_si128(top_8x8, 1);
 1219|    666|    temp2 = _mm_srli_si128(top_8x8, 2);
 1220|    666|    a0_8x16 = _mm_unpacklo_epi8(top_8x8, zero);
 1221|    666|    a1_8x16 = _mm_unpacklo_epi8(temp1, zero);
 1222|    666|    a2_8x16 = _mm_unpacklo_epi8(temp2, zero);
 1223|       |
 1224|    666|    a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
 1225|    666|    a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
 1226|    666|    a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
 1227|    666|    a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
 1228|    666|    res2_8x16 = _mm_srai_epi16(a0_8x16, 2);
 1229|       |
 1230|    666|    out_15x16 = _mm_packus_epi16(res1_8x16, res2_8x16);
 1231|       |
 1232|    666|    str_8x8 = _mm_srli_si128(out_15x16, 7);
 1233|    666|    _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), str_8x8);
 1234|    666|    str_8x8 = _mm_srli_si128(out_15x16, 6);
 1235|    666|    _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), str_8x8);
 1236|    666|    str_8x8 = _mm_srli_si128(out_15x16, 5);
 1237|    666|    _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), str_8x8);
 1238|    666|    str_8x8 = _mm_srli_si128(out_15x16, 4);
 1239|    666|    _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), str_8x8);
 1240|    666|    str_8x8 = _mm_srli_si128(out_15x16, 3);
 1241|    666|    _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), str_8x8);
 1242|    666|    str_8x8 = _mm_srli_si128(out_15x16, 2);
 1243|    666|    _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), str_8x8);
 1244|       |    str_8x8 = _mm_srli_si128(out_15x16, 1);
 1245|    666|    _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), str_8x8);
 1246|    666|    _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), out_15x16);
 1247|    666|}
ih264_intra_pred_luma_8x8_mode_vert_r_ssse3:
 1286|    570|{
 1287|    570|    UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
 1288|    570|    UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
 1289|    570|    __m128i top_8x8, left_16x8;
 1290|    570|    __m128i out1_16x16, out2_16x16;
 1291|    570|    __m128i a0_8x16, a1_8x16, a2_8x16;
 1292|    570|    __m128i temp1, temp2;
 1293|    570|    __m128i res1_8x16, res2_8x16, res3_8x16;
 1294|    570|    __m128i zero = _mm_setzero_si128();
 1295|    570|    __m128i const_val2_8x16 = _mm_set1_epi16(2);
 1296|    570|    __m128i str_8x8;
 1297|    570|    __m128i mask = _mm_set1_epi32(0xFFFF);
 1298|       |
 1299|    570|    UNUSED(src_strd);
  ------------------
  |  |   45|    570|#define UNUSED(x) ((void)(x))
  ------------------
 1300|    570|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|    570|#define UNUSED(x) ((void)(x))
  ------------------
 1301|       |
 1302|    570|    pu1_left = pu1_src + BLK8x8SIZE - 1;
  ------------------
  |  |  510|    570|#define BLK8x8SIZE          8
  ------------------
 1303|    570|    pu1_top = pu1_src + BLK8x8SIZE + 1;
  ------------------
  |  |  510|    570|#define BLK8x8SIZE          8
  ------------------
 1304|       |
 1305|    570|    left_16x8 = _mm_loadu_si128((__m128i *)(pu1_left - 6));
 1306|       |
 1307|    570|    temp1 = _mm_srli_si128(left_16x8, 1);
 1308|    570|    temp2 = _mm_srli_si128(left_16x8, 2);
 1309|    570|    a0_8x16 = _mm_unpacklo_epi8(left_16x8, zero);
 1310|    570|    a1_8x16 = _mm_unpacklo_epi8(temp1, zero);
 1311|    570|    a2_8x16 = _mm_unpacklo_epi8(temp2, zero);
 1312|       |
 1313|    570|    a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
 1314|    570|    a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
 1315|    570|    a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
 1316|    570|    a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
 1317|    570|    res1_8x16 = _mm_srai_epi16(a0_8x16, 2);
 1318|       |
 1319|    570|    top_8x8 = _mm_loadu_si128((__m128i *)(pu1_top - 1));
 1320|       |
 1321|    570|    temp1 = _mm_srli_si128(top_8x8, 1);
 1322|    570|    temp2 = _mm_srli_si128(top_8x8, 2);
 1323|    570|    a0_8x16 = _mm_unpacklo_epi8(top_8x8, zero);
 1324|    570|    a1_8x16 = _mm_unpacklo_epi8(temp1, zero);
 1325|    570|    a2_8x16 = _mm_unpacklo_epi8(temp2, zero);
 1326|       |
 1327|    570|    res3_8x16 = _mm_avg_epu16(a0_8x16, a1_8x16);
 1328|       |
 1329|    570|    a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
 1330|    570|    a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
 1331|    570|    a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
 1332|    570|    a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
 1333|    570|    res2_8x16 = _mm_srai_epi16(a0_8x16, 2);
 1334|       |
 1335|    570|    str_8x8 = _mm_packus_epi16(res3_8x16, zero);
 1336|    570|    _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), str_8x8);
 1337|       |
 1338|    570|    temp1 = _mm_and_si128(res1_8x16, mask);
 1339|    570|    temp1 = _mm_packs_epi32(temp1, temp1);
 1340|    570|    out1_16x16 = _mm_packus_epi16(temp1, res2_8x16);
 1341|       |
 1342|    570|    res1_8x16 = _mm_slli_si128(res1_8x16, 2);
 1343|    570|    temp1 = _mm_and_si128(res1_8x16, mask);
 1344|    570|    temp1 = _mm_packs_epi32(temp1, temp1);
 1345|    570|    out2_16x16 = _mm_packus_epi16(temp1, res3_8x16);
 1346|       |
 1347|    570|    str_8x8 = _mm_srli_si128(out1_16x16, 7);
 1348|    570|    _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), str_8x8);
 1349|       |
 1350|    570|    str_8x8 = _mm_srli_si128(out2_16x16, 7);
 1351|    570|    _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), str_8x8);
 1352|       |
 1353|    570|    str_8x8 = _mm_srli_si128(out1_16x16, 6);
 1354|    570|    _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), str_8x8);
 1355|       |
 1356|    570|    str_8x8 = _mm_srli_si128(out2_16x16, 6);
 1357|    570|    _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), str_8x8);
 1358|       |
 1359|    570|    str_8x8 = _mm_srli_si128(out1_16x16, 5);
 1360|    570|    _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), str_8x8);
 1361|       |
 1362|    570|    str_8x8 = _mm_srli_si128(out2_16x16, 5);
 1363|    570|    _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), str_8x8);
 1364|       |
 1365|       |    str_8x8 = _mm_srli_si128(out1_16x16, 4);
 1366|    570|    _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), str_8x8);
 1367|    570|}
ih264_intra_pred_luma_8x8_mode_horz_d_ssse3:
 1406|  1.26k|{
 1407|  1.26k|    UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
 1408|  1.26k|    __m128i pels_16x16;
 1409|  1.26k|    __m128i temp1, temp2, temp3, temp4;
 1410|  1.26k|    __m128i a0_8x16, a1_8x16, a2_8x16;
 1411|  1.26k|    __m128i zero = _mm_setzero_si128();
 1412|  1.26k|    __m128i const_val2_8x16 = _mm_set1_epi16(2);
 1413|  1.26k|    __m128i res1_8x16, res2_8x16;
 1414|  1.26k|    __m128i out1_16x16, out2_16x16;
 1415|  1.26k|    __m128i str_8x8;
 1416|  1.26k|    UNUSED(src_strd);
  ------------------
  |  |   45|  1.26k|#define UNUSED(x) ((void)(x))
  ------------------
 1417|  1.26k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  1.26k|#define UNUSED(x) ((void)(x))
  ------------------
 1418|       |
 1419|  1.26k|    pu1_left = pu1_src + BLK8x8SIZE - 1;
  ------------------
  |  |  510|  1.26k|#define BLK8x8SIZE          8
  ------------------
 1420|       |
 1421|  1.26k|    pels_16x16 = _mm_loadu_si128((__m128i *)(pu1_left - 7));
 1422|       |
 1423|  1.26k|    temp1 = _mm_srli_si128(pels_16x16, 1);
 1424|  1.26k|    temp2 = _mm_srli_si128(pels_16x16, 2);
 1425|  1.26k|    a0_8x16 = _mm_unpacklo_epi8(pels_16x16, zero);
 1426|  1.26k|    a1_8x16 = _mm_unpacklo_epi8(temp1, zero);
 1427|  1.26k|    a2_8x16 = _mm_unpacklo_epi8(temp2, zero);
 1428|       |
 1429|  1.26k|    res1_8x16 = _mm_avg_epu16(a0_8x16, a1_8x16);
 1430|       |
 1431|  1.26k|    a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
 1432|  1.26k|    a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
 1433|  1.26k|    a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
 1434|  1.26k|    a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
 1435|  1.26k|    res2_8x16 = _mm_srai_epi16(a0_8x16, 2);
 1436|       |
 1437|  1.26k|    temp3 = _mm_unpacklo_epi16(res1_8x16, res2_8x16);
 1438|  1.26k|    temp4 = _mm_unpackhi_epi16(res1_8x16, res2_8x16);
 1439|  1.26k|    out2_16x16 = _mm_packus_epi16(temp3, temp4);
 1440|       |
 1441|  1.26k|    a0_8x16 = _mm_unpackhi_epi8(pels_16x16, zero);
 1442|  1.26k|    a1_8x16 = _mm_unpackhi_epi8(temp1, zero);
 1443|  1.26k|    a2_8x16 = _mm_unpackhi_epi8(temp2, zero);
 1444|       |
 1445|  1.26k|    a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
 1446|  1.26k|    a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
 1447|  1.26k|    a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
 1448|  1.26k|    a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
 1449|  1.26k|    res2_8x16 = _mm_srai_epi16(a0_8x16, 2);
 1450|       |
 1451|  1.26k|    out1_16x16 = _mm_packus_epi16(res2_8x16, zero);
 1452|  1.26k|    temp1 = _mm_srli_si128(out2_16x16, 8);
 1453|  1.26k|    out1_16x16 = _mm_unpacklo_epi64(temp1, out1_16x16);
 1454|       |
 1455|  1.26k|    str_8x8 = _mm_srli_si128(out1_16x16, 6);
 1456|  1.26k|    _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), str_8x8);
 1457|  1.26k|    str_8x8 = _mm_srli_si128(out1_16x16, 4);
 1458|  1.26k|    _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), str_8x8);
 1459|  1.26k|    str_8x8 = _mm_srli_si128(out1_16x16, 2);
 1460|  1.26k|    _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), str_8x8);
 1461|  1.26k|    _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), out1_16x16);
 1462|       |
 1463|  1.26k|    str_8x8 = _mm_srli_si128(out2_16x16, 6);
 1464|  1.26k|    _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), str_8x8);
 1465|  1.26k|    str_8x8 = _mm_srli_si128(out2_16x16, 4);
 1466|  1.26k|    _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), str_8x8);
 1467|       |    str_8x8 = _mm_srli_si128(out2_16x16, 2);
 1468|  1.26k|    _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), str_8x8);
 1469|  1.26k|    _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), out2_16x16);
 1470|  1.26k|}
ih264_intra_pred_luma_8x8_mode_vert_l_ssse3:
 1510|  2.11k|{
 1511|  2.11k|    UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
 1512|  2.11k|    __m128i top_16x16;
 1513|  2.11k|    __m128i temp1, temp2;
 1514|  2.11k|    __m128i a0_8x16, a1_8x16, a2_8x16;
 1515|  2.11k|    __m128i zero = _mm_setzero_si128();
 1516|  2.11k|    __m128i const_val2_8x16 = _mm_set1_epi16(2);
 1517|  2.11k|    __m128i res1_8x16, res2_8x16, res3_8x16, res4_8x16;
 1518|  2.11k|    __m128i out1_16x16, out2_16x16;
 1519|  2.11k|    UNUSED(src_strd);
  ------------------
  |  |   45|  2.11k|#define UNUSED(x) ((void)(x))
  ------------------
 1520|  2.11k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  2.11k|#define UNUSED(x) ((void)(x))
  ------------------
 1521|  2.11k|    pu1_top = pu1_src + BLK8x8SIZE + 1;
  ------------------
  |  |  510|  2.11k|#define BLK8x8SIZE          8
  ------------------
 1522|       |
 1523|  2.11k|    top_16x16 = _mm_loadu_si128((__m128i *)(pu1_top));
 1524|  2.11k|    temp1 = _mm_srli_si128(top_16x16, 1);
 1525|  2.11k|    temp2 = _mm_srli_si128(top_16x16, 2);
 1526|  2.11k|    a0_8x16 = _mm_unpacklo_epi8(top_16x16, zero);
 1527|  2.11k|    a1_8x16 = _mm_unpacklo_epi8(temp1, zero);
 1528|  2.11k|    a2_8x16 = _mm_unpacklo_epi8(temp2, zero);
 1529|       |
 1530|  2.11k|    res1_8x16 = _mm_avg_epu16(a0_8x16, a1_8x16);
 1531|       |
 1532|  2.11k|    a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
 1533|  2.11k|    a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
 1534|  2.11k|    a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
 1535|  2.11k|    a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
 1536|  2.11k|    res2_8x16 = _mm_srai_epi16(a0_8x16, 2);
 1537|       |
 1538|  2.11k|    a0_8x16 = _mm_unpackhi_epi8(top_16x16, zero);
 1539|  2.11k|    a1_8x16 = _mm_unpackhi_epi8(temp1, zero);
 1540|  2.11k|    a2_8x16 = _mm_unpackhi_epi8(temp2, zero);
 1541|       |
 1542|  2.11k|    res3_8x16 = _mm_avg_epu16(a0_8x16, a1_8x16);
 1543|       |
 1544|  2.11k|    a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
 1545|  2.11k|    a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
 1546|  2.11k|    a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
 1547|  2.11k|    a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
 1548|  2.11k|    res4_8x16 = _mm_srai_epi16(a0_8x16, 2);
 1549|       |
 1550|  2.11k|    out1_16x16 = _mm_packus_epi16(res1_8x16, res3_8x16);
 1551|  2.11k|    out2_16x16 = _mm_packus_epi16(res2_8x16, res4_8x16);
 1552|       |
 1553|  2.11k|    _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), out1_16x16);
 1554|  2.11k|    _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), out2_16x16);
 1555|  2.11k|    out1_16x16 = _mm_srli_si128(out1_16x16, 1);
 1556|  2.11k|    out2_16x16 = _mm_srli_si128(out2_16x16, 1);
 1557|  2.11k|    _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), out1_16x16);
 1558|  2.11k|    _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), out2_16x16);
 1559|  2.11k|    out1_16x16 = _mm_srli_si128(out1_16x16, 1);
 1560|  2.11k|    out2_16x16 = _mm_srli_si128(out2_16x16, 1);
 1561|  2.11k|    _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), out1_16x16);
 1562|  2.11k|    _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), out2_16x16);
 1563|  2.11k|    out1_16x16 = _mm_srli_si128(out1_16x16, 1);
 1564|       |    out2_16x16 = _mm_srli_si128(out2_16x16, 1);
 1565|  2.11k|    _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), out1_16x16);
 1566|  2.11k|    _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), out2_16x16);
 1567|  2.11k|}
ih264_intra_pred_luma_8x8_mode_horz_u_ssse3:
 1606|  4.75k|{
 1607|  4.75k|    UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
 1608|  4.75k|    __m128i left_16x16;
 1609|  4.75k|    __m128i temp1, temp2;
 1610|  4.75k|    __m128i a0_8x16, a1_8x16, a2_8x16;
 1611|  4.75k|    __m128i zero = _mm_setzero_si128();
 1612|  4.75k|    __m128i const_val2_8x16 = _mm_set1_epi16(2);
 1613|  4.75k|    __m128i res1_8x16, res2_8x16;
 1614|  4.75k|    __m128i out1_16x16;
 1615|  4.75k|    __m128i str_8x8;
 1616|  4.75k|    __m128i shuffle_16x16;
 1617|  4.75k|    UNUSED(src_strd);
  ------------------
  |  |   45|  4.75k|#define UNUSED(x) ((void)(x))
  ------------------
 1618|  4.75k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  4.75k|#define UNUSED(x) ((void)(x))
  ------------------
 1619|       |
 1620|  4.75k|    pu1_left = pu1_src + BLK8x8SIZE - 1;
  ------------------
  |  |  510|  4.75k|#define BLK8x8SIZE          8
  ------------------
 1621|  4.75k|    shuffle_16x16 = _mm_set_epi8(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
 1622|  4.75k|                                 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E,
 1623|  4.75k|                                 0x0F);
 1624|       |
 1625|  4.75k|    left_16x16 = _mm_loadu_si128((__m128i *)(pu1_left - 7));
 1626|  4.75k|    temp1 = _mm_srli_si128(left_16x16, 1);
 1627|  4.75k|    a0_8x16 = _mm_unpacklo_epi8(left_16x16, zero);
 1628|  4.75k|    a0_8x16 = _mm_slli_si128(a0_8x16, 2);
 1629|  4.75k|    a1_8x16 = _mm_unpacklo_epi8(left_16x16, zero);
 1630|  4.75k|    a0_8x16 = _mm_shufflelo_epi16(a0_8x16, 0xE5);
 1631|  4.75k|    a2_8x16 = _mm_unpacklo_epi8(temp1, zero);
 1632|       |
 1633|  4.75k|    res1_8x16 = _mm_avg_epu16(a0_8x16, a1_8x16);
 1634|       |
 1635|  4.75k|    a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
 1636|  4.75k|    a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
 1637|  4.75k|    a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
 1638|  4.75k|    a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
 1639|  4.75k|    res2_8x16 = _mm_srai_epi16(a0_8x16, 2);
 1640|       |
 1641|  4.75k|    temp1 = _mm_unpacklo_epi16(res1_8x16, res2_8x16);
 1642|  4.75k|    temp2 = _mm_unpackhi_epi16(res1_8x16, res2_8x16);
 1643|  4.75k|    out1_16x16 = _mm_packus_epi16(temp1, temp2);
 1644|  4.75k|    out1_16x16 = _mm_shuffle_epi8(out1_16x16, shuffle_16x16);
 1645|       |
 1646|  4.75k|    str_8x8 = _mm_srli_si128(out1_16x16, 1);
 1647|  4.75k|    _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), str_8x8);
 1648|  4.75k|    str_8x8 = _mm_srli_si128(out1_16x16, 3);
 1649|  4.75k|    _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), str_8x8);
 1650|  4.75k|    str_8x8 = _mm_srli_si128(out1_16x16, 5);
 1651|  4.75k|    _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), str_8x8);
 1652|  4.75k|    str_8x8 = _mm_srli_si128(out1_16x16, 7);
 1653|  4.75k|    _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), str_8x8);
 1654|  4.75k|    temp1 = _mm_set1_epi8(pu1_left[-7]);
 1655|  4.75k|    str_8x8 = _mm_unpacklo_epi64(str_8x8, temp1);
 1656|  4.75k|    str_8x8 = _mm_srli_si128(str_8x8, 2);
 1657|  4.75k|    _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), str_8x8);
 1658|  4.75k|    str_8x8 = _mm_srli_si128(str_8x8, 2);
 1659|  4.75k|    _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), str_8x8);
 1660|  4.75k|    str_8x8 = _mm_srli_si128(str_8x8, 2);
 1661|  4.75k|    _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), str_8x8);
 1662|  4.75k|    str_8x8 = _mm_srli_si128(str_8x8, 2);
 1663|  4.75k|    _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), str_8x8);
 1664|       |
 1665|  4.75k|}
ih264_intra_pred_luma_16x16_mode_vert_ssse3:
 1707|  13.7k|{
 1708|  13.7k|    UWORD8 *pu1_top;
 1709|  13.7k|    WORD32 dst_strd2, dst_strd3, dst_strd4;
 1710|       |
 1711|  13.7k|    __m128i top_16x8b;
 1712|       |
 1713|  13.7k|    UNUSED(src_strd);
  ------------------
  |  |   45|  13.7k|#define UNUSED(x) ((void)(x))
  ------------------
 1714|  13.7k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  13.7k|#define UNUSED(x) ((void)(x))
  ------------------
 1715|       |
 1716|  13.7k|    pu1_top = pu1_src + MB_SIZE + 1;
  ------------------
  |  |  509|  13.7k|#define MB_SIZE             16
  ------------------
 1717|       |
 1718|  13.7k|    dst_strd2 = dst_strd << 1;
 1719|  13.7k|    dst_strd4 = dst_strd << 2;
 1720|       |
 1721|  13.7k|    top_16x8b = _mm_loadu_si128((__m128i *)pu1_top);
 1722|       |
 1723|  13.7k|    dst_strd3 = dst_strd + dst_strd2;
 1724|       |
 1725|  13.7k|    _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
 1726|  13.7k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b);
 1727|  13.7k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), top_16x8b);
 1728|  13.7k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), top_16x8b);
 1729|  13.7k|    pu1_dst += dst_strd4;
 1730|       |
 1731|  13.7k|    _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
 1732|  13.7k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b);
 1733|  13.7k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), top_16x8b);
 1734|  13.7k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), top_16x8b);
 1735|  13.7k|    pu1_dst += dst_strd4;
 1736|       |
 1737|  13.7k|    _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
 1738|  13.7k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b);
 1739|  13.7k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), top_16x8b);
 1740|  13.7k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), top_16x8b);
 1741|  13.7k|    pu1_dst += dst_strd4;
 1742|       |
 1743|  13.7k|    _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
 1744|  13.7k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b);
 1745|  13.7k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), top_16x8b);
 1746|  13.7k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), top_16x8b);
 1747|  13.7k|}
ih264_intra_pred_luma_16x16_mode_horz_ssse3:
 1786|  8.50k|{
 1787|  8.50k|    UWORD8 *pu1_left;
 1788|  8.50k|    WORD32 dst_strd2, dst_strd3, dst_strd4;
 1789|       |
 1790|  8.50k|    __m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b;
 1791|       |
 1792|  8.50k|    UNUSED(src_strd);
  ------------------
  |  |   45|  8.50k|#define UNUSED(x) ((void)(x))
  ------------------
 1793|  8.50k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  8.50k|#define UNUSED(x) ((void)(x))
  ------------------
 1794|       |
 1795|  8.50k|    pu1_left = pu1_src + MB_SIZE - 1;
  ------------------
  |  |  509|  8.50k|#define MB_SIZE             16
  ------------------
 1796|       |
 1797|  8.50k|    dst_strd4 = dst_strd << 2;
 1798|       |
 1799|  8.50k|    dst_strd2 = dst_strd << 1;
 1800|  8.50k|    dst_strd3 = dst_strd4 - dst_strd;
 1801|       |
 1802|  8.50k|    row1_16x8b = _mm_set1_epi8(*(pu1_left));
 1803|  8.50k|    row2_16x8b = _mm_set1_epi8(*(pu1_left - 1));
 1804|  8.50k|    row3_16x8b = _mm_set1_epi8(*(pu1_left - 2));
 1805|  8.50k|    row4_16x8b = _mm_set1_epi8(*(pu1_left - 3));
 1806|       |
 1807|  8.50k|    _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b);
 1808|  8.50k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b);
 1809|  8.50k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), row3_16x8b);
 1810|  8.50k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), row4_16x8b);
 1811|       |
 1812|  8.50k|    pu1_dst += dst_strd4;
 1813|  8.50k|    row1_16x8b = _mm_set1_epi8(*(pu1_left - 4));
 1814|  8.50k|    row2_16x8b = _mm_set1_epi8(*(pu1_left - 5));
 1815|  8.50k|    row3_16x8b = _mm_set1_epi8(*(pu1_left - 6));
 1816|  8.50k|    row4_16x8b = _mm_set1_epi8(*(pu1_left - 7));
 1817|       |
 1818|  8.50k|    _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b);
 1819|  8.50k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b);
 1820|  8.50k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), row3_16x8b);
 1821|  8.50k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), row4_16x8b);
 1822|       |
 1823|  8.50k|    pu1_dst += dst_strd4;
 1824|  8.50k|    row1_16x8b = _mm_set1_epi8(*(pu1_left - 8));
 1825|  8.50k|    row2_16x8b = _mm_set1_epi8(*(pu1_left - 9));
 1826|  8.50k|    row3_16x8b = _mm_set1_epi8(*(pu1_left - 10));
 1827|  8.50k|    row4_16x8b = _mm_set1_epi8(*(pu1_left - 11));
 1828|       |
 1829|  8.50k|    _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b);
 1830|  8.50k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b);
 1831|  8.50k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), row3_16x8b);
 1832|  8.50k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), row4_16x8b);
 1833|       |
 1834|  8.50k|    pu1_dst += dst_strd4;
 1835|  8.50k|    row1_16x8b = _mm_set1_epi8(*(pu1_left - 12));
 1836|  8.50k|    row2_16x8b = _mm_set1_epi8(*(pu1_left - 13));
 1837|  8.50k|    row3_16x8b = _mm_set1_epi8(*(pu1_left - 14));
 1838|  8.50k|    row4_16x8b = _mm_set1_epi8(*(pu1_left - 15));
 1839|       |
 1840|  8.50k|    _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b);
 1841|  8.50k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b);
 1842|  8.50k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), row3_16x8b);
 1843|  8.50k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), row4_16x8b);
 1844|  8.50k|}
ih264_intra_pred_luma_16x16_mode_dc_ssse3:
 1883|  12.2k|{
 1884|  12.2k|    WORD8 u1_useleft, u1_usetop;
 1885|  12.2k|    WORD32 dc_val;
 1886|       |
 1887|  12.2k|    WORD32 dst_strd2, dst_strd3, dst_strd4;
 1888|       |
 1889|  12.2k|    __m128i dc_val_16x8b;
 1890|       |
 1891|  12.2k|    UNUSED(src_strd);
  ------------------
  |  |   45|  12.2k|#define UNUSED(x) ((void)(x))
  ------------------
 1892|       |
 1893|  12.2k|    u1_useleft = BOOLEAN(ngbr_avail & LEFT_MB_AVAILABLE_MASK);
  ------------------
  |  |   84|  12.2k|#define BOOLEAN(x) (!!(x))
  ------------------
 1894|  12.2k|    u1_usetop = BOOLEAN(ngbr_avail & TOP_MB_AVAILABLE_MASK);
  ------------------
  |  |   84|  12.2k|#define BOOLEAN(x) (!!(x))
  ------------------
 1895|       |
 1896|  12.2k|    if(u1_useleft || u1_usetop)
  ------------------
  |  Branch (1896:8): [True: 6.19k, False: 6.02k]
  |  Branch (1896:22): [True: 2.23k, False: 3.79k]
  ------------------
 1897|  8.42k|    {
 1898|  8.42k|        WORD32 shft;
 1899|  8.42k|        __m128i val_16x8b, zero_16x8b, sum_8x16b;
 1900|       |
 1901|  8.42k|        dc_val = 0;
 1902|  8.42k|        shft = 3;
 1903|       |
 1904|  8.42k|        zero_16x8b = _mm_setzero_si128();
 1905|       |
 1906|  8.42k|        if(u1_useleft)
  ------------------
  |  Branch (1906:12): [True: 6.19k, False: 2.23k]
  ------------------
 1907|  6.19k|        {
 1908|  6.19k|            UWORD8 *pu1_left;
 1909|       |
 1910|  6.19k|            pu1_left = pu1_src + MB_SIZE - 1;
  ------------------
  |  |  509|  6.19k|#define MB_SIZE             16
  ------------------
 1911|       |
 1912|  6.19k|            val_16x8b = _mm_loadu_si128((__m128i *)(pu1_left - 15));
 1913|  6.19k|            sum_8x16b = _mm_sad_epu8(zero_16x8b, val_16x8b);
 1914|       |
 1915|  6.19k|            shft++;
 1916|  6.19k|            dc_val += 8;
 1917|  6.19k|            dc_val += _mm_extract_epi16(sum_8x16b, 0);
 1918|  6.19k|            dc_val += _mm_extract_epi16(sum_8x16b, 4);
 1919|  6.19k|        }
 1920|  8.42k|        if(u1_usetop)
  ------------------
  |  Branch (1920:12): [True: 6.87k, False: 1.55k]
  ------------------
 1921|  6.87k|        {
 1922|  6.87k|            UWORD8 *pu1_top;
 1923|       |
 1924|  6.87k|            pu1_top = pu1_src + MB_SIZE + 1;
  ------------------
  |  |  509|  6.87k|#define MB_SIZE             16
  ------------------
 1925|       |
 1926|  6.87k|            val_16x8b = _mm_loadu_si128((__m128i *)pu1_top);
 1927|  6.87k|            sum_8x16b = _mm_sad_epu8(zero_16x8b, val_16x8b);
 1928|       |
 1929|  6.87k|            shft++;
 1930|  6.87k|            dc_val += 8;
 1931|  6.87k|            dc_val += _mm_extract_epi16(sum_8x16b, 0);
 1932|  6.87k|            dc_val += _mm_extract_epi16(sum_8x16b, 4);
 1933|  6.87k|        }
 1934|  8.42k|        dc_val = dc_val >> shft;
 1935|  8.42k|    }
 1936|  3.79k|    else
 1937|  3.79k|        dc_val = 128;
 1938|       |
 1939|  12.2k|    dc_val_16x8b =  _mm_set1_epi8(dc_val);
 1940|       |
 1941|  12.2k|    dst_strd2 = dst_strd << 1;
 1942|  12.2k|    dst_strd4 = dst_strd << 2;
 1943|  12.2k|    dst_strd3 = dst_strd + dst_strd2;
 1944|       |
 1945|  12.2k|    _mm_storeu_si128((__m128i *)pu1_dst, dc_val_16x8b);
 1946|  12.2k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), dc_val_16x8b);
 1947|  12.2k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), dc_val_16x8b);
 1948|  12.2k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), dc_val_16x8b);
 1949|  12.2k|    pu1_dst += dst_strd4;
 1950|       |
 1951|  12.2k|    _mm_storeu_si128((__m128i *)pu1_dst, dc_val_16x8b);
 1952|  12.2k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), dc_val_16x8b);
 1953|  12.2k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), dc_val_16x8b);
 1954|  12.2k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), dc_val_16x8b);
 1955|  12.2k|    pu1_dst += dst_strd4;
 1956|       |
 1957|  12.2k|    _mm_storeu_si128((__m128i *)pu1_dst, dc_val_16x8b);
 1958|  12.2k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), dc_val_16x8b);
 1959|  12.2k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), dc_val_16x8b);
 1960|  12.2k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), dc_val_16x8b);
 1961|  12.2k|    pu1_dst += dst_strd4;
 1962|       |
 1963|  12.2k|    _mm_storeu_si128((__m128i *)pu1_dst, dc_val_16x8b);
 1964|  12.2k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), dc_val_16x8b);
 1965|  12.2k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), dc_val_16x8b);
 1966|  12.2k|    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), dc_val_16x8b);
 1967|  12.2k|}
ih264_intra_pred_luma_16x16_mode_plane_ssse3:
 2006|  1.28k|{
 2007|  1.28k|    UWORD8 *pu1_left, *pu1_top;
 2008|  1.28k|    WORD32 a, b, c;
 2009|       |
 2010|  1.28k|    __m128i rev_8x16b, mul_8x16b, zero_16x8b;
 2011|       |
 2012|  1.28k|    UNUSED(src_strd);
  ------------------
  |  |   45|  1.28k|#define UNUSED(x) ((void)(x))
  ------------------
 2013|  1.28k|    UNUSED(ngbr_avail);
  ------------------
  |  |   45|  1.28k|#define UNUSED(x) ((void)(x))
  ------------------
 2014|       |
 2015|  1.28k|    pu1_top = pu1_src + MB_SIZE + 1;
  ------------------
  |  |  509|  1.28k|#define MB_SIZE             16
  ------------------
 2016|  1.28k|    pu1_left = pu1_src + MB_SIZE - 1;
  ------------------
  |  |  509|  1.28k|#define MB_SIZE             16
  ------------------
 2017|       |
 2018|  1.28k|    rev_8x16b = _mm_setr_epi16(0x0f0e, 0x0d0c, 0x0b0a, 0x0908, 0x0706, 0x0504, 0x0302, 0x0100);
 2019|       |    //used to reverse the order of 16-bit values in a vector
 2020|       |
 2021|  1.28k|    mul_8x16b = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
 2022|  1.28k|    zero_16x8b = _mm_setzero_si128();
 2023|       |
 2024|       |    //calculating a, b and c
 2025|  1.28k|    {
 2026|  1.28k|        WORD32 h, v;
 2027|       |
 2028|  1.28k|        __m128i h_val1_16x8b, h_val2_16x8b;
 2029|  1.28k|        __m128i h_val1_8x16b, h_val2_8x16b, h_val_4x32b;
 2030|  1.28k|        __m128i v_val1_16x8b, v_val2_16x8b;
 2031|  1.28k|        __m128i v_val1_8x16b, v_val2_8x16b, v_val_4x32b;
 2032|  1.28k|        __m128i hv_val_4x32b;
 2033|       |
 2034|  1.28k|        a = (pu1_top[15] + pu1_left[-15]) << 4;
 2035|       |
 2036|  1.28k|        h_val1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_top + 8));
 2037|  1.28k|        h_val2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_top - 1));
 2038|  1.28k|        v_val1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 15));
 2039|  1.28k|        v_val2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 6));
 2040|       |
 2041|  1.28k|        h_val1_8x16b = _mm_unpacklo_epi8(h_val1_16x8b, zero_16x8b);
 2042|  1.28k|        h_val2_8x16b = _mm_unpacklo_epi8(h_val2_16x8b, zero_16x8b);
 2043|  1.28k|        v_val1_8x16b = _mm_unpacklo_epi8(v_val1_16x8b, zero_16x8b);
 2044|  1.28k|        v_val2_8x16b = _mm_unpacklo_epi8(v_val2_16x8b, zero_16x8b);
 2045|       |
 2046|  1.28k|        h_val2_8x16b = _mm_shuffle_epi8(h_val2_8x16b, rev_8x16b);
 2047|  1.28k|        v_val1_8x16b = _mm_shuffle_epi8(v_val1_8x16b, rev_8x16b);
 2048|       |
 2049|  1.28k|        h_val1_8x16b = _mm_sub_epi16(h_val1_8x16b, h_val2_8x16b);
 2050|  1.28k|        v_val1_8x16b = _mm_sub_epi16(v_val1_8x16b, v_val2_8x16b);
 2051|       |
 2052|  1.28k|        h_val_4x32b = _mm_madd_epi16(mul_8x16b, h_val1_8x16b);
 2053|  1.28k|        v_val_4x32b = _mm_madd_epi16(mul_8x16b, v_val1_8x16b);
 2054|       |
 2055|  1.28k|        hv_val_4x32b = _mm_hadd_epi32(h_val_4x32b, v_val_4x32b);
 2056|  1.28k|        hv_val_4x32b = _mm_hadd_epi32(hv_val_4x32b, hv_val_4x32b);
 2057|       |
 2058|  1.28k|        h = _mm_extract_epi16(hv_val_4x32b, 0);
 2059|  1.28k|        v = _mm_extract_epi16(hv_val_4x32b, 2);
 2060|  1.28k|        h = (h << 16) >> 16;
 2061|  1.28k|        v = (v << 16) >> 16;
 2062|       |
 2063|  1.28k|        b = ((h << 2) + h + 32) >> 6;
 2064|  1.28k|        c = ((v << 2) + v + 32) >> 6;
 2065|  1.28k|    }
 2066|       |
 2067|       |    //using a, b and c to compute the fitted plane values
 2068|  1.28k|    {
 2069|  1.28k|        __m128i const_8x16b, b_8x16b, c_8x16b, c2_8x16b;
 2070|  1.28k|        __m128i res1_l_8x16b, res1_h_8x16b;
 2071|  1.28k|        __m128i res2_l_8x16b, res2_h_8x16b;
 2072|  1.28k|        __m128i res1_sh_l_8x16b, res1_sh_h_8x16b, res1_16x8b;
 2073|  1.28k|        __m128i res2_sh_l_8x16b, res2_sh_h_8x16b, res2_16x8b;
 2074|       |
 2075|  1.28k|        b_8x16b = _mm_set1_epi16(b);
 2076|  1.28k|        c_8x16b = _mm_set1_epi16(c);
 2077|  1.28k|        c2_8x16b = _mm_set1_epi16(c << 1);
 2078|  1.28k|        const_8x16b = _mm_set1_epi16(a - c*7 + 16);
 2079|       |
 2080|  1.28k|        res1_h_8x16b = _mm_mullo_epi16(mul_8x16b, b_8x16b);
 2081|       |        //contains {b*1, b*2, b*3,... b*8}
 2082|       |
 2083|  1.28k|        res1_l_8x16b = _mm_shuffle_epi8(res1_h_8x16b, rev_8x16b);
 2084|  1.28k|        res1_l_8x16b = _mm_srli_si128(res1_l_8x16b, 2);
 2085|  1.28k|        res1_l_8x16b = _mm_sub_epi16(zero_16x8b, res1_l_8x16b);
 2086|       |        //contains {-b*7, -b*6,... -b*1, b*0}
 2087|       |
 2088|       |        // rows 1, 2
 2089|  1.28k|        res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, const_8x16b);
 2090|  1.28k|        res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, const_8x16b);
 2091|  1.28k|        res2_h_8x16b = _mm_add_epi16(res1_h_8x16b, c_8x16b);
 2092|  1.28k|        res2_l_8x16b = _mm_add_epi16(res1_l_8x16b, c_8x16b);
 2093|       |
 2094|  1.28k|        res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
 2095|  1.28k|        res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
 2096|  1.28k|        res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
 2097|  1.28k|        res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
 2098|       |
 2099|  1.28k|        res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
 2100|  1.28k|        res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
 2101|       |
 2102|  1.28k|        _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
 2103|  1.28k|        _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
 2104|       |
 2105|       |        // rows 3, 4
 2106|  1.28k|        res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b);
 2107|  1.28k|        res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b);
 2108|  1.28k|        res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b);
 2109|  1.28k|        res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b);
 2110|       |
 2111|  1.28k|        res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
 2112|  1.28k|        res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
 2113|  1.28k|        res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
 2114|  1.28k|        res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
 2115|       |
 2116|  1.28k|        pu1_dst += dst_strd << 1;
 2117|       |
 2118|  1.28k|        res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
 2119|  1.28k|        res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
 2120|       |
 2121|  1.28k|        _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
 2122|  1.28k|        _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
 2123|       |
 2124|       |        // rows 5, 6
 2125|  1.28k|        res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b);
 2126|  1.28k|        res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b);
 2127|  1.28k|        res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b);
 2128|  1.28k|        res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b);
 2129|       |
 2130|  1.28k|        res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
 2131|  1.28k|        res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
 2132|  1.28k|        res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
 2133|  1.28k|        res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
 2134|       |
 2135|  1.28k|        pu1_dst += dst_strd << 1;
 2136|       |
 2137|  1.28k|        res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
 2138|  1.28k|        res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
 2139|       |
 2140|  1.28k|        _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
 2141|  1.28k|        _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
 2142|       |
 2143|       |        // rows 7, 8
 2144|  1.28k|        res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b);
 2145|  1.28k|        res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b);
 2146|  1.28k|        res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b);
 2147|  1.28k|        res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b);
 2148|       |
 2149|  1.28k|        res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
 2150|  1.28k|        res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
 2151|  1.28k|        res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
 2152|  1.28k|        res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
 2153|       |
 2154|  1.28k|        pu1_dst += dst_strd << 1;
 2155|       |
 2156|  1.28k|        res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
 2157|  1.28k|        res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
 2158|       |
 2159|  1.28k|        _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
 2160|  1.28k|        _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
 2161|       |
 2162|       |        // rows 9, 10
 2163|  1.28k|        res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b);
 2164|  1.28k|        res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b);
 2165|  1.28k|        res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b);
 2166|  1.28k|        res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b);
 2167|       |
 2168|  1.28k|        res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
 2169|  1.28k|        res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
 2170|  1.28k|        res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
 2171|  1.28k|        res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
 2172|       |
 2173|  1.28k|        pu1_dst += dst_strd << 1;
 2174|       |
 2175|  1.28k|        res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
 2176|  1.28k|        res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
 2177|       |
 2178|  1.28k|        _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
 2179|  1.28k|        _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
 2180|       |
 2181|       |        // rows 11, 12
 2182|  1.28k|        res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b);
 2183|  1.28k|        res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b);
 2184|  1.28k|        res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b);
 2185|  1.28k|        res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b);
 2186|       |
 2187|  1.28k|        res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
 2188|  1.28k|        res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
 2189|  1.28k|        res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
 2190|  1.28k|        res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
 2191|       |
 2192|  1.28k|        pu1_dst += dst_strd << 1;
 2193|       |
 2194|  1.28k|        res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
 2195|  1.28k|        res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
 2196|       |
 2197|  1.28k|        _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
 2198|  1.28k|        _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
 2199|       |
 2200|       |        // rows 13, 14
 2201|  1.28k|        res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b);
 2202|  1.28k|        res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b);
 2203|  1.28k|        res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b);
 2204|  1.28k|        res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b);
 2205|       |
 2206|  1.28k|        res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
 2207|  1.28k|        res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
 2208|  1.28k|        res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
 2209|  1.28k|        res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
 2210|       |
 2211|  1.28k|        pu1_dst += dst_strd << 1;
 2212|       |
 2213|  1.28k|        res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
 2214|  1.28k|        res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
 2215|       |
 2216|  1.28k|        _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
 2217|  1.28k|        _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
 2218|       |
 2219|       |        // rows 15, 16
 2220|  1.28k|        res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b);
 2221|  1.28k|        res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b);
 2222|  1.28k|        res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b);
 2223|  1.28k|        res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b);
 2224|       |
 2225|  1.28k|        res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
 2226|  1.28k|        res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
 2227|  1.28k|        res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
 2228|  1.28k|        res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
 2229|       |
 2230|  1.28k|        pu1_dst += dst_strd << 1;
 2231|       |
 2232|  1.28k|        res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
 2233|  1.28k|        res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
 2234|       |
 2235|  1.28k|        _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
 2236|  1.28k|        _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
 2237|  1.28k|    }
 2238|  1.28k|}

ih264_pad_left_luma_ssse3:
   96|  70.6k|{
   97|  70.6k|    WORD32 row;
   98|  70.6k|    WORD32 i;
   99|  70.6k|    UWORD8 *pu1_dst;
  100|       |
  101|  70.6k|    ASSERT(pad_size % 8 == 0);
  ------------------
  |  |   56|  70.6k|#define ASSERT(x) assert((x))
  ------------------
  |  Branch (101:5): [True: 0, False: 70.6k]
  |  Branch (101:5): [True: 70.6k, False: 0]
  ------------------
  102|       |
  103|  18.9M|    for(row = 0; row < ht; row++)
  ------------------
  |  Branch (103:18): [True: 18.9M, False: 70.6k]
  ------------------
  104|  18.9M|    {
  105|  18.9M|        __m128i src_temp0_16x8b;
  106|       |
  107|  18.9M|        pu1_dst = pu1_src - pad_size;
  108|  18.9M|        src_temp0_16x8b = _mm_set1_epi8(*pu1_src);
  109|  94.5M|        for(i = 0; i < pad_size; i += 8)
  ------------------
  |  Branch (109:20): [True: 75.6M, False: 18.9M]
  ------------------
  110|  75.6M|        {
  111|  75.6M|            _mm_storel_epi64((__m128i *)(pu1_dst + i), src_temp0_16x8b);
  112|  75.6M|        }
  113|  18.9M|        pu1_src += src_strd;
  114|  18.9M|    }
  115|       |
  116|  70.6k|}
ih264_pad_left_chroma_ssse3:
  163|  70.6k|{
  164|  70.6k|    WORD32 row;
  165|  70.6k|    WORD32 col;
  166|  70.6k|    UWORD8 *pu1_dst;
  167|       |
  168|  70.6k|    ASSERT(pad_size % 8 == 0);
  ------------------
  |  |   56|  70.6k|#define ASSERT(x) assert((x))
  ------------------
  |  Branch (168:5): [True: 0, False: 70.6k]
  |  Branch (168:5): [True: 70.6k, False: 0]
  ------------------
  169|  9.52M|    for(row = 0; row < ht; row++)
  ------------------
  |  Branch (169:18): [True: 9.45M, False: 70.6k]
  ------------------
  170|  9.45M|    {
  171|  9.45M|        __m128i src_temp0_16x8b;
  172|       |
  173|  9.45M|        pu1_dst = pu1_src - pad_size;
  174|  9.45M|        src_temp0_16x8b = _mm_set1_epi16(*((UWORD16 *)pu1_src));
  175|  47.2M|        for(col = 0; col < pad_size; col += 8)
  ------------------
  |  Branch (175:22): [True: 37.8M, False: 9.45M]
  ------------------
  176|  37.8M|        {
  177|  37.8M|            _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
  178|  37.8M|        }
  179|  9.45M|        pu1_src += src_strd;
  180|  9.45M|    }
  181|       |
  182|  70.6k|}
ih264_pad_right_luma_ssse3:
  229|  70.6k|{
  230|  70.6k|    WORD32 row;
  231|  70.6k|    WORD32 col;
  232|  70.6k|    UWORD8 *pu1_dst;
  233|       |
  234|  70.6k|    ASSERT(pad_size % 8 == 0);
  ------------------
  |  |   56|  70.6k|#define ASSERT(x) assert((x))
  ------------------
  |  Branch (234:5): [True: 0, False: 70.6k]
  |  Branch (234:5): [True: 70.6k, False: 0]
  ------------------
  235|       |
  236|  18.9M|    for(row = 0; row < ht; row++)
  ------------------
  |  Branch (236:18): [True: 18.9M, False: 70.6k]
  ------------------
  237|  18.9M|    {
  238|  18.9M|        __m128i src_temp0_16x8b;
  239|       |
  240|  18.9M|        pu1_dst = pu1_src;
  241|  18.9M|        src_temp0_16x8b = _mm_set1_epi8(*(pu1_src - 1));
  242|  94.5M|        for(col = 0; col < pad_size; col += 8)
  ------------------
  |  Branch (242:22): [True: 75.6M, False: 18.9M]
  ------------------
  243|  75.6M|        {
  244|  75.6M|            _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
  245|  75.6M|        }
  246|  18.9M|        pu1_src += src_strd;
  247|  18.9M|    }
  248|       |
  249|  70.6k|}
ih264_pad_right_chroma_ssse3:
  296|  70.6k|{
  297|  70.6k|    WORD32 row;
  298|  70.6k|    WORD32 col;
  299|  70.6k|    UWORD8 *pu1_dst;
  300|       |
  301|  70.6k|    ASSERT(pad_size % 8 == 0);
  ------------------
  |  |   56|  70.6k|#define ASSERT(x) assert((x))
  ------------------
  |  Branch (301:5): [True: 0, False: 70.6k]
  |  Branch (301:5): [True: 70.6k, False: 0]
  ------------------
  302|       |
  303|  9.52M|    for(row = 0; row < ht; row++)
  ------------------
  |  Branch (303:18): [True: 9.45M, False: 70.6k]
  ------------------
  304|  9.45M|    {
  305|  9.45M|        __m128i src_temp0_16x8b;
  306|       |
  307|  9.45M|        pu1_dst = pu1_src;
  308|  9.45M|        src_temp0_16x8b = _mm_set1_epi16(*((UWORD16 *)(pu1_src - 2)));
  309|  47.2M|        for(col = 0; col < pad_size; col += 8)
  ------------------
  |  Branch (309:22): [True: 37.8M, False: 9.45M]
  ------------------
  310|  37.8M|        {
  311|  37.8M|            _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
  312|  37.8M|        }
  313|       |
  314|  9.45M|        pu1_src += src_strd;
  315|  9.45M|    }
  316|  70.6k|}

isvcd_parse_epslice.c:CLZ:
   97|   630k|{
   98|   630k|    if(u4_word)
  ------------------
  |  Branch (98:8): [True: 620k, False: 10.2k]
  ------------------
   99|   620k|    return(__builtin_clz(u4_word));
  100|  10.2k|    else
  101|  10.2k|        return 31;
  102|   630k|}
ih264_iquant_itrans_recon_sse42.c:loadu_32:
   47|   499k|{
   48|   499k|  struct __loadu_si32 {
   49|   499k|    int __v;
   50|   499k|  } __attribute__((__packed__, __may_alias__));
   51|   499k|  int __u = ((struct __loadu_si32*)__a)->__v;
   52|   499k|  return __extension__ (__m128i)(__v4si){__u, 0, 0, 0};
   53|   499k|}
ih264d_cabac.c:CLZ:
   97|  6.87M|{
   98|  6.87M|    if(u4_word)
  ------------------
  |  Branch (98:8): [True: 6.87M, False: 0]
  ------------------
   99|  6.87M|    return(__builtin_clz(u4_word));
  100|      0|    else
  101|      0|        return 31;
  102|  6.87M|}
ih264d_parse_cabac.c:CLZ:
   97|  13.5M|{
   98|  13.5M|    if(u4_word)
  ------------------
  |  Branch (98:8): [True: 13.5M, False: 0]
  ------------------
   99|  13.5M|    return(__builtin_clz(u4_word));
  100|      0|    else
  101|      0|        return 31;
  102|  13.5M|}
ih264d_parse_cavlc.c:CLZ:
   97|  7.50M|{
   98|  7.50M|    if(u4_word)
  ------------------
  |  Branch (98:8): [True: 7.21M, False: 288k]
  ------------------
   99|  7.21M|    return(__builtin_clz(u4_word));
  100|   288k|    else
  101|   288k|        return 31;
  102|  7.50M|}
ih264d_parse_islice.c:CLZ:
   97|   117k|{
   98|   117k|    if(u4_word)
  ------------------
  |  Branch (98:8): [True: 116k, False: 1.40k]
  ------------------
   99|   116k|    return(__builtin_clz(u4_word));
  100|  1.40k|    else
  101|  1.40k|        return 31;
  102|   117k|}
ih264d_parse_mb_header.c:CLZ:
   97|  6.89M|{
   98|  6.89M|    if(u4_word)
  ------------------
  |  Branch (98:8): [True: 6.89M, False: 0]
  ------------------
   99|  6.89M|    return(__builtin_clz(u4_word));
  100|      0|    else
  101|      0|        return 31;
  102|  6.89M|}
ih264d_parse_pslice.c:CLZ:
   97|   160k|{
   98|   160k|    if(u4_word)
  ------------------
  |  Branch (98:8): [True: 154k, False: 6.06k]
  ------------------
   99|   154k|    return(__builtin_clz(u4_word));
  100|  6.06k|    else
  101|  6.06k|        return 31;
  102|   160k|}
ih264d_process_intra_mb.c:CLZ:
   97|  1.43M|{
   98|  1.43M|    if(u4_word)
  ------------------
  |  Branch (98:8): [True: 1.43M, False: 0]
  ------------------
   99|  1.43M|    return(__builtin_clz(u4_word));
  100|      0|    else
  101|      0|        return 31;
  102|  1.43M|}
ih264d_utils.c:CLZ:
   97|   264k|{
   98|   264k|    if(u4_word)
  ------------------
  |  Branch (98:8): [True: 264k, False: 0]
  ------------------
   99|   264k|    return(__builtin_clz(u4_word));
  100|      0|    else
  101|      0|        return 31;
  102|   264k|}
isvcd_parse_cavlc.c:CLZ:
   97|  15.3k|{
   98|  15.3k|    if(u4_word)
  ------------------
  |  Branch (98:8): [True: 15.0k, False: 261]
  ------------------
   99|  15.0k|    return(__builtin_clz(u4_word));
  100|    261|    else
  101|    261|        return 31;
  102|  15.3k|}
isvcd_parse_ebslice.c:CLZ:
   97|  90.5k|{
   98|  90.5k|    if(u4_word)
  ------------------
  |  Branch (98:8): [True: 88.5k, False: 2.02k]
  ------------------
   99|  88.5k|    return(__builtin_clz(u4_word));
  100|  2.02k|    else
  101|  2.02k|        return 31;
  102|  90.5k|}
isvcd_parse_eislice.c:CLZ:
   97|   287k|{
   98|   287k|    if(u4_word)
  ------------------
  |  Branch (98:8): [True: 285k, False: 1.96k]
  ------------------
   99|   285k|    return(__builtin_clz(u4_word));
  100|  1.96k|    else
  101|  1.96k|        return 31;
  102|   287k|}
ih264d_parse_bslice.c:CLZ:
   97|   162k|{
   98|   162k|    if(u4_word)
  ------------------
  |  Branch (98:8): [True: 158k, False: 4.84k]
  ------------------
   99|   158k|    return(__builtin_clz(u4_word));
  100|  4.84k|    else
  101|  4.84k|        return 31;
  102|   162k|}

ih264_default_weighted_pred_luma_sse42:
   93|   255k|{
   94|   255k|    __m128i y0_0_16x8b, y0_1_16x8b, y0_2_16x8b, y0_3_16x8b;
   95|   255k|    __m128i y1_0_16x8b, y1_1_16x8b, y1_2_16x8b, y1_3_16x8b;
   96|       |
   97|   255k|    if(wd == 4)
  ------------------
  |  Branch (97:8): [True: 3.83k, False: 251k]
  ------------------
   98|  3.83k|    {
   99|  3.83k|        do
  100|  5.44k|        {
  101|  5.44k|            y0_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src1);
  102|  5.44k|            y0_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1));
  103|  5.44k|            y0_2_16x8b = _mm_loadl_epi64(
  104|  5.44k|                            (__m128i *)(pu1_src1 + (src_strd1 << 1)));
  105|  5.44k|            y0_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1 * 3));
  106|       |
  107|  5.44k|            y1_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src2);
  108|  5.44k|            y1_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2));
  109|  5.44k|            y1_2_16x8b = _mm_loadl_epi64(
  110|  5.44k|                            (__m128i *)(pu1_src2 + (src_strd2 << 1)));
  111|  5.44k|            y1_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2 * 3));
  112|       |
  113|  5.44k|            y0_0_16x8b = _mm_avg_epu8(y0_0_16x8b, y1_0_16x8b);
  114|  5.44k|            y0_1_16x8b = _mm_avg_epu8(y0_1_16x8b, y1_1_16x8b);
  115|  5.44k|            y0_2_16x8b = _mm_avg_epu8(y0_2_16x8b, y1_2_16x8b);
  116|  5.44k|            y0_3_16x8b = _mm_avg_epu8(y0_3_16x8b, y1_3_16x8b);
  117|       |
  118|  5.44k|            *((WORD32 *)(pu1_dst)) = _mm_cvtsi128_si32(y0_0_16x8b);
  119|  5.44k|            *((WORD32 *)(pu1_dst + dst_strd)) = _mm_cvtsi128_si32(y0_1_16x8b);
  120|  5.44k|            *((WORD32 *)(pu1_dst + (dst_strd << 1))) = _mm_cvtsi128_si32(y0_2_16x8b);
  121|  5.44k|            *((WORD32 *)(pu1_dst + dst_strd * 3)) = _mm_cvtsi128_si32(y0_3_16x8b);
  122|       |
  123|  5.44k|            ht -= 4;
  124|  5.44k|            pu1_src1 += src_strd1 << 2;
  125|  5.44k|            pu1_src2 += src_strd2 << 2;
  126|  5.44k|            pu1_dst += dst_strd << 2;
  127|  5.44k|        }
  128|  5.44k|        while(ht > 0);
  ------------------
  |  Branch (128:15): [True: 1.61k, False: 3.83k]
  ------------------
  129|  3.83k|    }
  130|   251k|    else if(wd == 8)
  ------------------
  |  Branch (130:13): [True: 28.3k, False: 222k]
  ------------------
  131|  28.3k|    {
  132|  28.3k|        do
  133|  72.4k|        {
  134|  72.4k|            y0_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src1);
  135|  72.4k|            y0_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1));
  136|  72.4k|            y0_2_16x8b = _mm_loadl_epi64(
  137|  72.4k|                            (__m128i *)(pu1_src1 + (src_strd1 << 1)));
  138|  72.4k|            y0_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1 * 3));
  139|       |
  140|  72.4k|            y1_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src2);
  141|  72.4k|            y1_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2));
  142|  72.4k|            y1_2_16x8b = _mm_loadl_epi64(
  143|  72.4k|                            (__m128i *)(pu1_src2 + (src_strd2 << 1)));
  144|  72.4k|            y1_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2 * 3));
  145|       |
  146|  72.4k|            y0_0_16x8b = _mm_avg_epu8(y0_0_16x8b, y1_0_16x8b);
  147|  72.4k|            y0_1_16x8b = _mm_avg_epu8(y0_1_16x8b, y1_1_16x8b);
  148|  72.4k|            y0_2_16x8b = _mm_avg_epu8(y0_2_16x8b, y1_2_16x8b);
  149|  72.4k|            y0_3_16x8b = _mm_avg_epu8(y0_3_16x8b, y1_3_16x8b);
  150|       |
  151|  72.4k|            _mm_storel_epi64((__m128i *)pu1_dst, y0_0_16x8b);
  152|  72.4k|            _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), y0_1_16x8b);
  153|  72.4k|            _mm_storel_epi64((__m128i *)(pu1_dst + (dst_strd << 1)), y0_2_16x8b);
  154|  72.4k|            _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd * 3), y0_3_16x8b);
  155|       |
  156|  72.4k|            ht -= 4;
  157|  72.4k|            pu1_src1 += src_strd1 << 2;
  158|  72.4k|            pu1_src2 += src_strd2 << 2;
  159|  72.4k|            pu1_dst += dst_strd << 2;
  160|  72.4k|        }
  161|  72.4k|        while(ht > 0);
  ------------------
  |  Branch (161:15): [True: 44.0k, False: 28.3k]
  ------------------
  162|  28.3k|    }
  163|   222k|    else // wd == 16
  164|   222k|    {
  165|   222k|        __m128i y0_4_16x8b, y0_5_16x8b, y0_6_16x8b, y0_7_16x8b;
  166|   222k|        __m128i y1_4_16x8b, y1_5_16x8b, y1_6_16x8b, y1_7_16x8b;
  167|       |
  168|   222k|        do
  169|   429k|        {
  170|   429k|            y0_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src1);
  171|   429k|            y0_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src1 + src_strd1));
  172|   429k|            y0_2_16x8b = _mm_loadu_si128(
  173|   429k|                            (__m128i *)(pu1_src1 + (src_strd1 << 1)));
  174|   429k|            y0_3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src1 + src_strd1 * 3));
  175|   429k|            y0_4_16x8b = _mm_loadu_si128(
  176|   429k|                            (__m128i *)(pu1_src1 + (src_strd1 << 2)));
  177|   429k|            y0_5_16x8b = _mm_loadu_si128((__m128i *)(pu1_src1 + src_strd1 * 5));
  178|   429k|            y0_6_16x8b = _mm_loadu_si128((__m128i *)(pu1_src1 + src_strd1 * 6));
  179|   429k|            y0_7_16x8b = _mm_loadu_si128((__m128i *)(pu1_src1 + src_strd1 * 7));
  180|       |
  181|   429k|            y1_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src2);
  182|   429k|            y1_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src2 + src_strd2));
  183|   429k|            y1_2_16x8b = _mm_loadu_si128(
  184|   429k|                            (__m128i *)(pu1_src2 + (src_strd2 << 1)));
  185|   429k|            y1_3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src2 + src_strd2 * 3));
  186|   429k|            y1_4_16x8b = _mm_loadu_si128(
  187|   429k|                            (__m128i *)(pu1_src2 + (src_strd2 << 2)));
  188|   429k|            y1_5_16x8b = _mm_loadu_si128((__m128i *)(pu1_src2 + src_strd2 * 5));
  189|   429k|            y1_6_16x8b = _mm_loadu_si128((__m128i *)(pu1_src2 + src_strd2 * 6));
  190|   429k|            y1_7_16x8b = _mm_loadu_si128((__m128i *)(pu1_src2 + src_strd2 * 7));
  191|       |
  192|   429k|            y0_0_16x8b = _mm_avg_epu8(y0_0_16x8b, y1_0_16x8b);
  193|   429k|            y0_1_16x8b = _mm_avg_epu8(y0_1_16x8b, y1_1_16x8b);
  194|   429k|            y0_2_16x8b = _mm_avg_epu8(y0_2_16x8b, y1_2_16x8b);
  195|   429k|            y0_3_16x8b = _mm_avg_epu8(y0_3_16x8b, y1_3_16x8b);
  196|   429k|            y0_4_16x8b = _mm_avg_epu8(y0_4_16x8b, y1_4_16x8b);
  197|   429k|            y0_5_16x8b = _mm_avg_epu8(y0_5_16x8b, y1_5_16x8b);
  198|   429k|            y0_6_16x8b = _mm_avg_epu8(y0_6_16x8b, y1_6_16x8b);
  199|   429k|            y0_7_16x8b = _mm_avg_epu8(y0_7_16x8b, y1_7_16x8b);
  200|       |
  201|   429k|            _mm_storeu_si128((__m128i *)pu1_dst, y0_0_16x8b);
  202|   429k|            _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), y0_1_16x8b);
  203|   429k|            _mm_storeu_si128((__m128i *)(pu1_dst + (dst_strd << 1)), y0_2_16x8b);
  204|   429k|            _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd * 3), y0_3_16x8b);
  205|   429k|            _mm_storeu_si128((__m128i *)(pu1_dst + (dst_strd << 2)), y0_4_16x8b);
  206|   429k|            _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd * 5), y0_5_16x8b);
  207|   429k|            _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd * 6), y0_6_16x8b);
  208|   429k|            _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd * 7), y0_7_16x8b);
  209|       |
  210|   429k|            ht -= 8;
  211|   429k|            pu1_src1 += src_strd1 << 3;
  212|   429k|            pu1_src2 += src_strd2 << 3;
  213|   429k|            pu1_dst += dst_strd << 3;
  214|   429k|        }
  215|   429k|        while(ht > 0);
  ------------------
  |  Branch (215:15): [True: 206k, False: 222k]
  ------------------
  216|   222k|    }
  217|   255k|}
ih264_default_weighted_pred_chroma_sse42:
  256|   255k|{
  257|   255k|    __m128i uv0_0_16x8b, uv0_1_16x8b;
  258|   255k|    __m128i uv1_0_16x8b, uv1_1_16x8b;
  259|       |
  260|   255k|    if(wd == 2)
  ------------------
  |  Branch (260:8): [True: 3.83k, False: 251k]
  ------------------
  261|  3.83k|    {
  262|  3.83k|        do
  263|  5.44k|        {
  264|  5.44k|            uv0_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src1);
  265|  5.44k|            uv0_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1));
  266|       |
  267|  5.44k|            uv1_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src2);
  268|  5.44k|            uv1_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2));
  269|       |
  270|  5.44k|            uv0_0_16x8b = _mm_avg_epu8(uv0_0_16x8b, uv1_0_16x8b);
  271|  5.44k|            uv0_1_16x8b = _mm_avg_epu8(uv0_1_16x8b, uv1_1_16x8b);
  272|       |
  273|  5.44k|            *((WORD32 *)(pu1_dst)) = _mm_cvtsi128_si32(uv0_0_16x8b);
  274|  5.44k|            *((WORD32 *)(pu1_dst + dst_strd)) = _mm_cvtsi128_si32(uv0_1_16x8b);
  275|       |
  276|  5.44k|            ht -= 2;
  277|  5.44k|            pu1_src1 += src_strd1 << 1;
  278|  5.44k|            pu1_src2 += src_strd2 << 1;
  279|  5.44k|            pu1_dst += dst_strd << 1;
  280|  5.44k|        }
  281|  5.44k|        while(ht > 0);
  ------------------
  |  Branch (281:15): [True: 1.61k, False: 3.83k]
  ------------------
  282|  3.83k|    }
  283|   251k|    else if(wd == 4)
  ------------------
  |  Branch (283:13): [True: 28.3k, False: 222k]
  ------------------
  284|  28.3k|    {
  285|  28.3k|        do
  286|  72.4k|        {
  287|  72.4k|            uv0_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src1);
  288|  72.4k|            uv0_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1));
  289|       |
  290|  72.4k|            uv1_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src2);
  291|  72.4k|            uv1_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2));
  292|       |
  293|  72.4k|            uv0_0_16x8b = _mm_avg_epu8(uv0_0_16x8b, uv1_0_16x8b);
  294|  72.4k|            uv0_1_16x8b = _mm_avg_epu8(uv0_1_16x8b, uv1_1_16x8b);
  295|       |
  296|  72.4k|            _mm_storel_epi64((__m128i *)pu1_dst, uv0_0_16x8b);
  297|  72.4k|            _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), uv0_1_16x8b);
  298|       |
  299|  72.4k|            ht -= 2;
  300|  72.4k|            pu1_src1 += src_strd1 << 1;
  301|  72.4k|            pu1_src2 += src_strd2 << 1;
  302|  72.4k|            pu1_dst += dst_strd << 1;
  303|  72.4k|        }
  304|  72.4k|        while(ht > 0);
  ------------------
  |  Branch (304:15): [True: 44.0k, False: 28.3k]
  ------------------
  305|  28.3k|    }
  306|   222k|    else // wd == 8
  307|   222k|    {
  308|   222k|        __m128i uv0_2_16x8b, uv0_3_16x8b;
  309|   222k|        __m128i uv1_2_16x8b, uv1_3_16x8b;
  310|       |
  311|   222k|        do
  312|   429k|        {
  313|   429k|            uv0_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src1);
  314|   429k|            uv0_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src1 + src_strd1));
  315|   429k|            uv0_2_16x8b = _mm_loadu_si128(
  316|   429k|                            (__m128i *)(pu1_src1 + (src_strd1 << 1)));
  317|   429k|            uv0_3_16x8b = _mm_loadu_si128(
  318|   429k|                            (__m128i *)(pu1_src1 + src_strd1 * 3));
  319|       |
  320|   429k|            uv1_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src2);
  321|   429k|            uv1_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src2 + src_strd2));
  322|   429k|            uv1_2_16x8b = _mm_loadu_si128(
  323|   429k|                            (__m128i *)(pu1_src2 + (src_strd2 << 1)));
  324|   429k|            uv1_3_16x8b = _mm_loadu_si128(
  325|   429k|                            (__m128i *)(pu1_src2 + src_strd2 * 3));
  326|       |
  327|   429k|            uv0_0_16x8b = _mm_avg_epu8(uv0_0_16x8b, uv1_0_16x8b);
  328|   429k|            uv0_1_16x8b = _mm_avg_epu8(uv0_1_16x8b, uv1_1_16x8b);
  329|   429k|            uv0_2_16x8b = _mm_avg_epu8(uv0_2_16x8b, uv1_2_16x8b);
  330|   429k|            uv0_3_16x8b = _mm_avg_epu8(uv0_3_16x8b, uv1_3_16x8b);
  331|       |
  332|   429k|            _mm_storeu_si128((__m128i *)pu1_dst, uv0_0_16x8b);
  333|   429k|            _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), uv0_1_16x8b);
  334|   429k|            _mm_storeu_si128(
  335|   429k|                            (__m128i *)(pu1_dst + (dst_strd << 1)), uv0_2_16x8b);
  336|   429k|            _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd * 3), uv0_3_16x8b);
  337|       |
  338|   429k|            ht -= 4;
  339|   429k|            pu1_src1 += src_strd1 << 2;
  340|   429k|            pu1_src2 += src_strd2 << 2;
  341|   429k|            pu1_dst += dst_strd << 2;
  342|   429k|        }
  343|   429k|        while(ht > 0);
  ------------------
  |  Branch (343:15): [True: 206k, False: 222k]
  ------------------
  344|   222k|    }
  345|   255k|}
ih264_weighted_pred_luma_sse42:
  387|  4.01M|{
  388|  4.01M|    __m128i y_0_16x8b, y_1_16x8b, y_2_16x8b, y_3_16x8b;
  389|       |
  390|  4.01M|    __m128i wt_8x16b, round_8x16b, ofst_8x16b;
  391|       |
  392|  4.01M|    WORD32 round_val;
  393|       |
  394|  4.01M|    wt = (WORD16)(wt & 0xffff);
  395|  4.01M|    round_val = 1 << (log_wd - 1);
  396|  4.01M|    ofst = (WORD8)(ofst & 0xff);
  397|       |
  398|  4.01M|    wt_8x16b = _mm_set1_epi16(wt);
  399|  4.01M|    round_8x16b = _mm_set1_epi16(round_val);
  400|  4.01M|    ofst_8x16b = _mm_set1_epi16(ofst);
  401|       |
  402|  4.01M|    if(wd == 4)
  ------------------
  |  Branch (402:8): [True: 29.6k, False: 3.98M]
  ------------------
  403|  29.6k|    {
  404|  29.6k|        __m128i y_0_8x16b, y_2_8x16b;
  405|       |
  406|  29.6k|        do
  407|  55.4k|        {
  408|  55.4k|            y_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
  409|  55.4k|            y_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd));
  410|  55.4k|            y_2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + (src_strd << 1)));
  411|  55.4k|            y_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd * 3));
  412|       |
  413|  55.4k|            y_0_16x8b = _mm_unpacklo_epi32(y_0_16x8b, y_1_16x8b);
  414|  55.4k|            y_2_16x8b = _mm_unpacklo_epi32(y_2_16x8b, y_3_16x8b);
  415|       |
  416|  55.4k|            y_0_8x16b = _mm_cvtepu8_epi16(y_0_16x8b);
  417|  55.4k|            y_2_8x16b = _mm_cvtepu8_epi16(y_2_16x8b);
  418|       |
  419|  55.4k|            y_0_8x16b = _mm_mullo_epi16(y_0_8x16b, wt_8x16b);
  420|  55.4k|            y_2_8x16b = _mm_mullo_epi16(y_2_8x16b, wt_8x16b);
  421|       |
  422|  55.4k|            y_0_8x16b = _mm_adds_epi16(round_8x16b, y_0_8x16b);
  423|  55.4k|            y_2_8x16b = _mm_adds_epi16(round_8x16b, y_2_8x16b);
  424|       |
  425|  55.4k|            y_0_8x16b = _mm_srai_epi16(y_0_8x16b, log_wd);
  426|  55.4k|            y_2_8x16b = _mm_srai_epi16(y_2_8x16b, log_wd);
  427|       |
  428|  55.4k|            y_0_8x16b = _mm_adds_epi16(ofst_8x16b, y_0_8x16b);
  429|  55.4k|            y_2_8x16b = _mm_adds_epi16(ofst_8x16b, y_2_8x16b);
  430|       |
  431|  55.4k|            y_0_16x8b = _mm_packus_epi16(y_0_8x16b, y_2_8x16b);
  432|  55.4k|            y_1_16x8b = _mm_srli_si128(y_0_16x8b, 4);
  433|  55.4k|            y_2_16x8b = _mm_srli_si128(y_0_16x8b, 8);
  434|  55.4k|            y_3_16x8b = _mm_srli_si128(y_0_16x8b, 12);
  435|       |
  436|  55.4k|            *((WORD32 *)(pu1_dst)) = _mm_cvtsi128_si32(y_0_16x8b);
  437|  55.4k|            *((WORD32 *)(pu1_dst + dst_strd)) = _mm_cvtsi128_si32(y_1_16x8b);
  438|  55.4k|            *((WORD32 *)(pu1_dst + (dst_strd << 1))) = _mm_cvtsi128_si32(y_2_16x8b);
  439|  55.4k|            *((WORD32 *)(pu1_dst + dst_strd * 3)) = _mm_cvtsi128_si32(y_3_16x8b);
  440|       |
  441|  55.4k|            ht -= 4;
  442|  55.4k|            pu1_src += src_strd << 2;
  443|  55.4k|            pu1_dst += dst_strd << 2;
  444|  55.4k|        }
  445|  55.4k|        while(ht > 0);
  ------------------
  |  Branch (445:15): [True: 25.7k, False: 29.6k]
  ------------------
  446|  29.6k|    }
  447|  3.98M|    else if(wd == 8)
  ------------------
  |  Branch (447:13): [True: 15.1k, False: 3.96M]
  ------------------
  448|  15.1k|    {
  449|  15.1k|        __m128i y_0_8x16b, y_1_8x16b, y_2_8x16b, y_3_8x16b;
  450|       |
  451|  15.1k|        do
  452|  33.0k|        {
  453|  33.0k|            y_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
  454|  33.0k|            y_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd));
  455|  33.0k|            y_2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + (src_strd << 1)));
  456|  33.0k|            y_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd * 3));
  457|       |
  458|  33.0k|            y_0_8x16b = _mm_cvtepu8_epi16(y_0_16x8b);
  459|  33.0k|            y_1_8x16b = _mm_cvtepu8_epi16(y_1_16x8b);
  460|  33.0k|            y_2_8x16b = _mm_cvtepu8_epi16(y_2_16x8b);
  461|  33.0k|            y_3_8x16b = _mm_cvtepu8_epi16(y_3_16x8b);
  462|       |
  463|  33.0k|            y_0_8x16b = _mm_mullo_epi16(y_0_8x16b, wt_8x16b);
  464|  33.0k|            y_1_8x16b = _mm_mullo_epi16(y_1_8x16b, wt_8x16b);
  465|  33.0k|            y_2_8x16b = _mm_mullo_epi16(y_2_8x16b, wt_8x16b);
  466|  33.0k|            y_3_8x16b = _mm_mullo_epi16(y_3_8x16b, wt_8x16b);
  467|       |
  468|  33.0k|            y_0_8x16b = _mm_adds_epi16(round_8x16b, y_0_8x16b);
  469|  33.0k|            y_1_8x16b = _mm_adds_epi16(round_8x16b, y_1_8x16b);
  470|  33.0k|            y_2_8x16b = _mm_adds_epi16(round_8x16b, y_2_8x16b);
  471|  33.0k|            y_3_8x16b = _mm_adds_epi16(round_8x16b, y_3_8x16b);
  472|       |
  473|  33.0k|            y_0_8x16b = _mm_srai_epi16(y_0_8x16b, log_wd);
  474|  33.0k|            y_1_8x16b = _mm_srai_epi16(y_1_8x16b, log_wd);
  475|  33.0k|            y_2_8x16b = _mm_srai_epi16(y_2_8x16b, log_wd);
  476|  33.0k|            y_3_8x16b = _mm_srai_epi16(y_3_8x16b, log_wd);
  477|       |
  478|  33.0k|            y_0_8x16b = _mm_adds_epi16(ofst_8x16b, y_0_8x16b);
  479|  33.0k|            y_1_8x16b = _mm_adds_epi16(ofst_8x16b, y_1_8x16b);
  480|  33.0k|            y_2_8x16b = _mm_adds_epi16(ofst_8x16b, y_2_8x16b);
  481|  33.0k|            y_3_8x16b = _mm_adds_epi16(ofst_8x16b, y_3_8x16b);
  482|       |
  483|  33.0k|            y_0_16x8b = _mm_packus_epi16(y_0_8x16b, y_1_8x16b);
  484|  33.0k|            y_2_16x8b = _mm_packus_epi16(y_2_8x16b, y_3_8x16b);
  485|  33.0k|            y_1_16x8b = _mm_srli_si128(y_0_16x8b, 8);
  486|  33.0k|            y_3_16x8b = _mm_srli_si128(y_2_16x8b, 8);
  487|       |
  488|  33.0k|            _mm_storel_epi64((__m128i *)pu1_dst, y_0_16x8b);
  489|  33.0k|            _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), y_1_16x8b);
  490|  33.0k|            _mm_storel_epi64((__m128i *)(pu1_dst + (dst_strd << 1)), y_2_16x8b);
  491|  33.0k|            _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd * 3), y_3_16x8b);
  492|       |
  493|  33.0k|            ht -= 4;
  494|  33.0k|            pu1_src += src_strd << 2;
  495|  33.0k|            pu1_dst += dst_strd << 2;
  496|  33.0k|        }
  497|  33.0k|        while(ht > 0);
  ------------------
  |  Branch (497:15): [True: 17.9k, False: 15.1k]
  ------------------
  498|  15.1k|    }
  499|  3.96M|    else // wd == 16
  500|  3.96M|    {
  501|  3.96M|        __m128i y_0L_8x16b, y_1L_8x16b, y_2L_8x16b, y_3L_8x16b;
  502|  3.96M|        __m128i y_0H_8x16b, y_1H_8x16b, y_2H_8x16b, y_3H_8x16b;
  503|       |
  504|  3.96M|        __m128i zero_16x8b;
  505|  3.96M|        zero_16x8b = _mm_set1_epi8(0);
  506|       |
  507|  3.96M|        do
  508|  15.8M|        {
  509|  15.8M|            y_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
  510|  15.8M|            y_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd));
  511|  15.8M|            y_2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + (src_strd << 1)));
  512|  15.8M|            y_3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd * 3));
  513|       |
  514|  15.8M|            y_0L_8x16b = _mm_cvtepu8_epi16(y_0_16x8b);
  515|  15.8M|            y_0H_8x16b = _mm_unpackhi_epi8(y_0_16x8b, zero_16x8b);
  516|  15.8M|            y_1L_8x16b = _mm_cvtepu8_epi16(y_1_16x8b);
  517|  15.8M|            y_1H_8x16b = _mm_unpackhi_epi8(y_1_16x8b, zero_16x8b);
  518|  15.8M|            y_2L_8x16b = _mm_cvtepu8_epi16(y_2_16x8b);
  519|  15.8M|            y_2H_8x16b = _mm_unpackhi_epi8(y_2_16x8b, zero_16x8b);
  520|  15.8M|            y_3L_8x16b = _mm_cvtepu8_epi16(y_3_16x8b);
  521|  15.8M|            y_3H_8x16b = _mm_unpackhi_epi8(y_3_16x8b, zero_16x8b);
  522|       |
  523|  15.8M|            y_0L_8x16b = _mm_mullo_epi16(y_0L_8x16b, wt_8x16b);
  524|  15.8M|            y_0H_8x16b = _mm_mullo_epi16(y_0H_8x16b, wt_8x16b);
  525|  15.8M|            y_1L_8x16b = _mm_mullo_epi16(y_1L_8x16b, wt_8x16b);
  526|  15.8M|            y_1H_8x16b = _mm_mullo_epi16(y_1H_8x16b, wt_8x16b);
  527|  15.8M|            y_2L_8x16b = _mm_mullo_epi16(y_2L_8x16b, wt_8x16b);
  528|  15.8M|            y_2H_8x16b = _mm_mullo_epi16(y_2H_8x16b, wt_8x16b);
  529|  15.8M|            y_3L_8x16b = _mm_mullo_epi16(y_3L_8x16b, wt_8x16b);
  530|  15.8M|            y_3H_8x16b = _mm_mullo_epi16(y_3H_8x16b, wt_8x16b);
  531|       |
  532|  15.8M|            y_0L_8x16b = _mm_adds_epi16(round_8x16b, y_0L_8x16b);
  533|  15.8M|            y_0H_8x16b = _mm_adds_epi16(round_8x16b, y_0H_8x16b);
  534|  15.8M|            y_1L_8x16b = _mm_adds_epi16(round_8x16b, y_1L_8x16b);
  535|  15.8M|            y_1H_8x16b = _mm_adds_epi16(round_8x16b, y_1H_8x16b);
  536|  15.8M|            y_2L_8x16b = _mm_adds_epi16(round_8x16b, y_2L_8x16b);
  537|  15.8M|            y_2H_8x16b = _mm_adds_epi16(round_8x16b, y_2H_8x16b);
  538|  15.8M|            y_3L_8x16b = _mm_adds_epi16(round_8x16b, y_3L_8x16b);
  539|  15.8M|            y_3H_8x16b = _mm_adds_epi16(round_8x16b, y_3H_8x16b);
  540|       |
  541|  15.8M|            y_0L_8x16b = _mm_srai_epi16(y_0L_8x16b, log_wd);
  542|  15.8M|            y_0H_8x16b = _mm_srai_epi16(y_0H_8x16b, log_wd);
  543|  15.8M|            y_1L_8x16b = _mm_srai_epi16(y_1L_8x16b, log_wd);
  544|  15.8M|            y_1H_8x16b = _mm_srai_epi16(y_1H_8x16b, log_wd);
  545|  15.8M|            y_2L_8x16b = _mm_srai_epi16(y_2L_8x16b, log_wd);
  546|  15.8M|            y_2H_8x16b = _mm_srai_epi16(y_2H_8x16b, log_wd);
  547|  15.8M|            y_3L_8x16b = _mm_srai_epi16(y_3L_8x16b, log_wd);
  548|  15.8M|            y_3H_8x16b = _mm_srai_epi16(y_3H_8x16b, log_wd);
  549|       |
  550|  15.8M|            y_0L_8x16b = _mm_adds_epi16(ofst_8x16b, y_0L_8x16b);
  551|  15.8M|            y_0H_8x16b = _mm_adds_epi16(ofst_8x16b, y_0H_8x16b);
  552|  15.8M|            y_1L_8x16b = _mm_adds_epi16(ofst_8x16b, y_1L_8x16b);
  553|  15.8M|            y_1H_8x16b = _mm_adds_epi16(ofst_8x16b, y_1H_8x16b);
  554|  15.8M|            y_2L_8x16b = _mm_adds_epi16(ofst_8x16b, y_2L_8x16b);
  555|  15.8M|            y_2H_8x16b = _mm_adds_epi16(ofst_8x16b, y_2H_8x16b);
  556|  15.8M|            y_3L_8x16b = _mm_adds_epi16(ofst_8x16b, y_3L_8x16b);
  557|  15.8M|            y_3H_8x16b = _mm_adds_epi16(ofst_8x16b, y_3H_8x16b);
  558|       |
  559|  15.8M|            y_0_16x8b = _mm_packus_epi16(y_0L_8x16b, y_0H_8x16b);
  560|  15.8M|            y_1_16x8b = _mm_packus_epi16(y_1L_8x16b, y_1H_8x16b);
  561|  15.8M|            y_2_16x8b = _mm_packus_epi16(y_2L_8x16b, y_2H_8x16b);
  562|  15.8M|            y_3_16x8b = _mm_packus_epi16(y_3L_8x16b, y_3H_8x16b);
  563|       |
  564|  15.8M|            _mm_storeu_si128((__m128i *)pu1_dst, y_0_16x8b);
  565|  15.8M|            _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), y_1_16x8b);
  566|  15.8M|            _mm_storeu_si128((__m128i *)(pu1_dst + (dst_strd << 1)), y_2_16x8b);
  567|  15.8M|            _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd * 3), y_3_16x8b);
  568|       |
  569|  15.8M|            ht -= 4;
  570|  15.8M|            pu1_src += src_strd << 2;
  571|  15.8M|            pu1_dst += dst_strd << 2;
  572|  15.8M|        }
  573|  15.8M|        while(ht > 0);
  ------------------
  |  Branch (573:15): [True: 11.8M, False: 3.96M]
  ------------------
  574|  3.96M|    }
  575|  4.01M|}
ih264_weighted_pred_chroma_sse42:
  617|  4.01M|{
  618|  4.01M|    __m128i y_0_16x8b, y_1_16x8b;
  619|       |
  620|  4.01M|    __m128i wt_8x16b, round_8x16b, ofst_8x16b;
  621|       |
  622|  4.01M|    WORD32 ofst_u, ofst_v;
  623|  4.01M|    WORD32 round_val;
  624|       |
  625|  4.01M|    ofst_u = (WORD8)(ofst & 0xff);
  626|  4.01M|    ofst_v = (WORD8)(ofst >> 8);
  627|  4.01M|    round_val = 1 << (log_wd - 1);
  628|  4.01M|    ofst = (ofst_u & 0xffff) | (ofst_v << 16);
  629|       |
  630|  4.01M|    wt_8x16b = _mm_set1_epi32(wt);
  631|  4.01M|    round_8x16b = _mm_set1_epi16(round_val);
  632|  4.01M|    ofst_8x16b = _mm_set1_epi32(ofst);
  633|       |
  634|  4.01M|    if(wd == 2)
  ------------------
  |  Branch (634:8): [True: 29.6k, False: 3.98M]
  ------------------
  635|  29.6k|    {
  636|  29.6k|        __m128i y_0_8x16b;
  637|       |
  638|  29.6k|        do
  639|  55.4k|        {
  640|  55.4k|            y_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
  641|  55.4k|            y_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd));
  642|       |
  643|  55.4k|            y_0_16x8b = _mm_unpacklo_epi32(y_0_16x8b, y_1_16x8b);
  644|       |
  645|  55.4k|            y_0_8x16b = _mm_cvtepu8_epi16(y_0_16x8b);
  646|       |
  647|  55.4k|            y_0_8x16b = _mm_mullo_epi16(y_0_8x16b, wt_8x16b);
  648|       |
  649|  55.4k|            y_0_8x16b = _mm_adds_epi16(round_8x16b, y_0_8x16b);
  650|       |
  651|  55.4k|            y_0_8x16b = _mm_srai_epi16(y_0_8x16b, log_wd);
  652|       |
  653|  55.4k|            y_0_8x16b = _mm_adds_epi16(ofst_8x16b, y_0_8x16b);
  654|       |
  655|  55.4k|            y_0_16x8b = _mm_packus_epi16(y_0_8x16b, y_0_8x16b);
  656|  55.4k|            y_1_16x8b = _mm_srli_si128(y_0_16x8b, 4);
  657|       |
  658|  55.4k|            *((WORD32 *)(pu1_dst)) = _mm_cvtsi128_si32(y_0_16x8b);
  659|  55.4k|            *((WORD32 *)(pu1_dst + dst_strd)) = _mm_cvtsi128_si32(y_1_16x8b);
  660|       |
  661|  55.4k|            ht -= 2;
  662|  55.4k|            pu1_src += src_strd << 1;
  663|  55.4k|            pu1_dst += dst_strd << 1;
  664|  55.4k|        }
  665|  55.4k|        while(ht > 0);
  ------------------
  |  Branch (665:15): [True: 25.7k, False: 29.6k]
  ------------------
  666|  29.6k|    }
  667|  3.98M|    else if(wd == 4)
  ------------------
  |  Branch (667:13): [True: 15.1k, False: 3.96M]
  ------------------
  668|  15.1k|    {
  669|  15.1k|        __m128i y_0_8x16b, y_1_8x16b;
  670|       |
  671|  15.1k|        do
  672|  33.0k|        {
  673|  33.0k|            y_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
  674|  33.0k|            y_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd));
  675|       |
  676|  33.0k|            y_0_8x16b = _mm_cvtepu8_epi16(y_0_16x8b);
  677|  33.0k|            y_1_8x16b = _mm_cvtepu8_epi16(y_1_16x8b);
  678|       |
  679|  33.0k|            y_0_8x16b = _mm_mullo_epi16(y_0_8x16b, wt_8x16b);
  680|  33.0k|            y_1_8x16b = _mm_mullo_epi16(y_1_8x16b, wt_8x16b);
  681|       |
  682|  33.0k|            y_0_8x16b = _mm_adds_epi16(round_8x16b, y_0_8x16b);
  683|  33.0k|            y_1_8x16b = _mm_adds_epi16(round_8x16b, y_1_8x16b);
  684|       |
  685|  33.0k|            y_0_8x16b = _mm_srai_epi16(y_0_8x16b, log_wd);
  686|  33.0k|            y_1_8x16b = _mm_srai_epi16(y_1_8x16b, log_wd);
  687|       |
  688|  33.0k|            y_0_8x16b = _mm_adds_epi16(ofst_8x16b, y_0_8x16b);
  689|  33.0k|            y_1_8x16b = _mm_adds_epi16(ofst_8x16b, y_1_8x16b);
  690|       |
  691|  33.0k|            y_0_16x8b = _mm_packus_epi16(y_0_8x16b, y_1_8x16b);
  692|  33.0k|            y_1_16x8b = _mm_srli_si128(y_0_16x8b, 8);
  693|       |
  694|  33.0k|            _mm_storel_epi64((__m128i *)pu1_dst, y_0_16x8b);
  695|  33.0k|            _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), y_1_16x8b);
  696|       |
  697|  33.0k|            ht -= 2;
  698|  33.0k|            pu1_src += src_strd << 1;
  699|  33.0k|            pu1_dst += dst_strd << 1;
  700|  33.0k|        }
  701|  33.0k|        while(ht > 0);
  ------------------
  |  Branch (701:15): [True: 17.9k, False: 15.1k]
  ------------------
  702|  15.1k|    }
  703|  3.96M|    else // wd == 16
  704|  3.96M|    {
  705|  3.96M|        __m128i y_2_16x8b, y_3_16x8b;
  706|  3.96M|        __m128i y_0L_8x16b, y_1L_8x16b, y_2L_8x16b, y_3L_8x16b;
  707|  3.96M|        __m128i y_0H_8x16b, y_1H_8x16b, y_2H_8x16b, y_3H_8x16b;
  708|       |
  709|  3.96M|        __m128i zero_16x8b;
  710|  3.96M|        zero_16x8b = _mm_set1_epi8(0);
  711|       |
  712|  3.96M|        do
  713|  7.92M|        {
  714|  7.92M|            y_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
  715|  7.92M|            y_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd));
  716|  7.92M|            y_2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + (src_strd << 1)));
  717|  7.92M|            y_3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd * 3));
  718|       |
  719|  7.92M|            y_0L_8x16b = _mm_cvtepu8_epi16(y_0_16x8b);
  720|  7.92M|            y_0H_8x16b = _mm_unpackhi_epi8(y_0_16x8b, zero_16x8b);
  721|  7.92M|            y_1L_8x16b = _mm_cvtepu8_epi16(y_1_16x8b);
  722|  7.92M|            y_1H_8x16b = _mm_unpackhi_epi8(y_1_16x8b, zero_16x8b);
  723|  7.92M|            y_2L_8x16b = _mm_cvtepu8_epi16(y_2_16x8b);
  724|  7.92M|            y_2H_8x16b = _mm_unpackhi_epi8(y_2_16x8b, zero_16x8b);
  725|  7.92M|            y_3L_8x16b = _mm_cvtepu8_epi16(y_3_16x8b);
  726|  7.92M|            y_3H_8x16b = _mm_unpackhi_epi8(y_3_16x8b, zero_16x8b);
  727|       |
  728|  7.92M|            y_0L_8x16b = _mm_mullo_epi16(y_0L_8x16b, wt_8x16b);
  729|  7.92M|            y_0H_8x16b = _mm_mullo_epi16(y_0H_8x16b, wt_8x16b);
  730|  7.92M|            y_1L_8x16b = _mm_mullo_epi16(y_1L_8x16b, wt_8x16b);
  731|  7.92M|            y_1H_8x16b = _mm_mullo_epi16(y_1H_8x16b, wt_8x16b);
  732|  7.92M|            y_2L_8x16b = _mm_mullo_epi16(y_2L_8x16b, wt_8x16b);
  733|  7.92M|            y_2H_8x16b = _mm_mullo_epi16(y_2H_8x16b, wt_8x16b);
  734|  7.92M|            y_3L_8x16b = _mm_mullo_epi16(y_3L_8x16b, wt_8x16b);
  735|  7.92M|            y_3H_8x16b = _mm_mullo_epi16(y_3H_8x16b, wt_8x16b);
  736|       |
  737|  7.92M|            y_0L_8x16b = _mm_adds_epi16(round_8x16b, y_0L_8x16b);
  738|  7.92M|            y_0H_8x16b = _mm_adds_epi16(round_8x16b, y_0H_8x16b);
  739|  7.92M|            y_1L_8x16b = _mm_adds_epi16(round_8x16b, y_1L_8x16b);
  740|  7.92M|            y_1H_8x16b = _mm_adds_epi16(round_8x16b, y_1H_8x16b);
  741|  7.92M|            y_2L_8x16b = _mm_adds_epi16(round_8x16b, y_2L_8x16b);
  742|  7.92M|            y_2H_8x16b = _mm_adds_epi16(round_8x16b, y_2H_8x16b);
  743|  7.92M|            y_3L_8x16b = _mm_adds_epi16(round_8x16b, y_3L_8x16b);
  744|  7.92M|            y_3H_8x16b = _mm_adds_epi16(round_8x16b, y_3H_8x16b);
  745|       |
  746|  7.92M|            y_0L_8x16b = _mm_srai_epi16(y_0L_8x16b, log_wd);
  747|  7.92M|            y_0H_8x16b = _mm_srai_epi16(y_0H_8x16b, log_wd);
  748|  7.92M|            y_1L_8x16b = _mm_srai_epi16(y_1L_8x16b, log_wd);
  749|  7.92M|            y_1H_8x16b = _mm_srai_epi16(y_1H_8x16b, log_wd);
  750|  7.92M|            y_2L_8x16b = _mm_srai_epi16(y_2L_8x16b, log_wd);
  751|  7.92M|            y_2H_8x16b = _mm_srai_epi16(y_2H_8x16b, log_wd);
  752|  7.92M|            y_3L_8x16b = _mm_srai_epi16(y_3L_8x16b, log_wd);
  753|  7.92M|            y_3H_8x16b = _mm_srai_epi16(y_3H_8x16b, log_wd);
  754|       |
  755|  7.92M|            y_0L_8x16b = _mm_adds_epi16(ofst_8x16b, y_0L_8x16b);
  756|  7.92M|            y_0H_8x16b = _mm_adds_epi16(ofst_8x16b, y_0H_8x16b);
  757|  7.92M|            y_1L_8x16b = _mm_adds_epi16(ofst_8x16b, y_1L_8x16b);
  758|  7.92M|            y_1H_8x16b = _mm_adds_epi16(ofst_8x16b, y_1H_8x16b);
  759|  7.92M|            y_2L_8x16b = _mm_adds_epi16(ofst_8x16b, y_2L_8x16b);
  760|  7.92M|            y_2H_8x16b = _mm_adds_epi16(ofst_8x16b, y_2H_8x16b);
  761|  7.92M|            y_3L_8x16b = _mm_adds_epi16(ofst_8x16b, y_3L_8x16b);
  762|  7.92M|            y_3H_8x16b = _mm_adds_epi16(ofst_8x16b, y_3H_8x16b);
  763|       |
  764|  7.92M|            y_0_16x8b = _mm_packus_epi16(y_0L_8x16b, y_0H_8x16b);
  765|  7.92M|            y_1_16x8b = _mm_packus_epi16(y_1L_8x16b, y_1H_8x16b);
  766|  7.92M|            y_2_16x8b = _mm_packus_epi16(y_2L_8x16b, y_2H_8x16b);
  767|  7.92M|            y_3_16x8b = _mm_packus_epi16(y_3L_8x16b, y_3H_8x16b);
  768|       |
  769|  7.92M|            _mm_storeu_si128((__m128i *)pu1_dst, y_0_16x8b);
  770|  7.92M|            _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), y_1_16x8b);
  771|  7.92M|            _mm_storeu_si128((__m128i *)(pu1_dst + (dst_strd << 1)), y_2_16x8b);
  772|  7.92M|            _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd * 3), y_3_16x8b);
  773|       |
  774|  7.92M|            ht -= 4;
  775|  7.92M|            pu1_src += src_strd << 2;
  776|  7.92M|            pu1_dst += dst_strd << 2;
  777|  7.92M|        }
  778|  7.92M|        while(ht > 0);
  ------------------
  |  Branch (778:15): [True: 3.96M, False: 3.96M]
  ------------------
  779|  3.96M|    }
  780|  4.01M|}
ih264_weighted_bi_pred_luma_sse42:
  830|   266k|{
  831|   266k|    __m128i y1_0_16x8b, y1_1_16x8b;
  832|   266k|    __m128i y2_0_16x8b, y2_1_16x8b;
  833|       |
  834|   266k|    __m128i wt1_8x16b, wt2_8x16b;
  835|   266k|    __m128i ofst_8x16b, round_8x16b;
  836|       |
  837|   266k|    WORD32 ofst;
  838|   266k|    WORD32 round_val, shft;
  839|       |
  840|   266k|    wt1 = (WORD16)(wt1 & 0xffff);
  841|   266k|    wt2 = (WORD16)(wt2 & 0xffff);
  842|   266k|    round_val = 1 << log_wd;
  843|   266k|    shft = log_wd + 1;
  844|   266k|    ofst1 = (WORD8)(ofst1 & 0xff);
  845|   266k|    ofst2 = (WORD8)(ofst2 & 0xff);
  846|   266k|    ofst = (ofst1 + ofst2 + 1) >> 1;
  847|       |
  848|   266k|    wt1_8x16b = _mm_set1_epi16(wt1);
  849|   266k|    wt2_8x16b = _mm_set1_epi16(wt2);
  850|   266k|    round_8x16b = _mm_set1_epi16(round_val);
  851|   266k|    ofst_8x16b = _mm_set1_epi16(ofst);
  852|       |
  853|   266k|    if(wd == 4)
  ------------------
  |  Branch (853:8): [True: 1.30k, False: 265k]
  ------------------
  854|  1.30k|    {
  855|  1.30k|        __m128i y1_2_16x8b, y1_3_16x8b;
  856|  1.30k|        __m128i y2_2_16x8b, y2_3_16x8b;
  857|       |
  858|  1.30k|        __m128i y1_0_8x16b, y1_2_8x16b;
  859|  1.30k|        __m128i y2_0_8x16b, y2_2_8x16b;
  860|       |
  861|  1.30k|        do
  862|  1.68k|        {
  863|  1.68k|            y1_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src1);
  864|  1.68k|            y1_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1));
  865|  1.68k|            y1_2_16x8b = _mm_loadl_epi64(
  866|  1.68k|                            (__m128i *)(pu1_src1 + (src_strd1 << 1)));
  867|  1.68k|            y1_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1 * 3));
  868|       |
  869|  1.68k|            y2_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src2);
  870|  1.68k|            y2_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2));
  871|  1.68k|            y2_2_16x8b = _mm_loadl_epi64(
  872|  1.68k|                            (__m128i *)(pu1_src2 + (src_strd2 << 1)));
  873|  1.68k|            y2_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2 * 3));
  874|       |
  875|  1.68k|            y1_0_16x8b = _mm_unpacklo_epi32(y1_0_16x8b, y1_1_16x8b);
  876|  1.68k|            y1_2_16x8b = _mm_unpacklo_epi32(y1_2_16x8b, y1_3_16x8b);
  877|  1.68k|            y2_0_16x8b = _mm_unpacklo_epi32(y2_0_16x8b, y2_1_16x8b);
  878|  1.68k|            y2_2_16x8b = _mm_unpacklo_epi32(y2_2_16x8b, y2_3_16x8b);
  879|       |
  880|  1.68k|            y1_0_8x16b = _mm_cvtepu8_epi16(y1_0_16x8b);
  881|  1.68k|            y1_2_8x16b = _mm_cvtepu8_epi16(y1_2_16x8b);
  882|  1.68k|            y2_0_8x16b = _mm_cvtepu8_epi16(y2_0_16x8b);
  883|  1.68k|            y2_2_8x16b = _mm_cvtepu8_epi16(y2_2_16x8b);
  884|       |
  885|  1.68k|            y1_0_8x16b = _mm_mullo_epi16(y1_0_8x16b, wt1_8x16b);
  886|  1.68k|            y2_0_8x16b = _mm_mullo_epi16(y2_0_8x16b, wt2_8x16b);
  887|  1.68k|            y1_2_8x16b = _mm_mullo_epi16(y1_2_8x16b, wt1_8x16b);
  888|  1.68k|            y2_2_8x16b = _mm_mullo_epi16(y2_2_8x16b, wt2_8x16b);
  889|       |
  890|  1.68k|            y1_0_8x16b = _mm_adds_epi16(y1_0_8x16b, y2_0_8x16b);
  891|  1.68k|            y1_2_8x16b = _mm_adds_epi16(y1_2_8x16b, y2_2_8x16b);
  892|       |
  893|  1.68k|            y1_0_8x16b = _mm_adds_epi16(round_8x16b, y1_0_8x16b);
  894|  1.68k|            y1_2_8x16b = _mm_adds_epi16(round_8x16b, y1_2_8x16b);
  895|       |
  896|  1.68k|            y1_0_8x16b = _mm_srai_epi16(y1_0_8x16b, shft);
  897|  1.68k|            y1_2_8x16b = _mm_srai_epi16(y1_2_8x16b, shft);
  898|       |
  899|  1.68k|            y1_0_8x16b = _mm_adds_epi16(ofst_8x16b, y1_0_8x16b);
  900|  1.68k|            y1_2_8x16b = _mm_adds_epi16(ofst_8x16b, y1_2_8x16b);
  901|       |
  902|  1.68k|            y1_0_16x8b = _mm_packus_epi16(y1_0_8x16b, y1_2_8x16b);
  903|  1.68k|            y1_1_16x8b = _mm_srli_si128(y1_0_16x8b, 4);
  904|  1.68k|            y1_2_16x8b = _mm_srli_si128(y1_0_16x8b, 8);
  905|  1.68k|            y1_3_16x8b = _mm_srli_si128(y1_0_16x8b, 12);
  906|       |
  907|  1.68k|            *((WORD32 *)(pu1_dst)) = _mm_cvtsi128_si32(y1_0_16x8b);
  908|  1.68k|            *((WORD32 *)(pu1_dst + dst_strd)) = _mm_cvtsi128_si32(y1_1_16x8b);
  909|  1.68k|            *((WORD32 *)(pu1_dst + (dst_strd << 1))) = _mm_cvtsi128_si32(y1_2_16x8b);
  910|  1.68k|            *((WORD32 *)(pu1_dst + dst_strd * 3)) = _mm_cvtsi128_si32(y1_3_16x8b);
  911|       |
  912|       |
  913|  1.68k|            ht -= 4;
  914|  1.68k|            pu1_src1 += src_strd1 << 2;
  915|  1.68k|            pu1_src2 += src_strd2 << 2;
  916|  1.68k|            pu1_dst += dst_strd << 2;
  917|  1.68k|        }
  918|  1.68k|        while(ht > 0);
  ------------------
  |  Branch (918:15): [True: 380, False: 1.30k]
  ------------------
  919|  1.30k|    }
  920|   265k|    else if(wd == 8)
  ------------------
  |  Branch (920:13): [True: 20.5k, False: 244k]
  ------------------
  921|  20.5k|    {
  922|  20.5k|        __m128i y1_2_16x8b, y1_3_16x8b;
  923|  20.5k|        __m128i y2_2_16x8b, y2_3_16x8b;
  924|       |
  925|  20.5k|        __m128i y1_0_8x16b, y1_1_8x16b, y1_2_8x16b, y1_3_8x16b;
  926|  20.5k|        __m128i y2_0_8x16b, y2_1_8x16b, y2_2_8x16b, y2_3_8x16b;
  927|       |
  928|  20.5k|        do
  929|  50.3k|        {
  930|  50.3k|            y1_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src1);
  931|  50.3k|            y1_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1));
  932|  50.3k|            y1_2_16x8b = _mm_loadl_epi64(
  933|  50.3k|                            (__m128i *)(pu1_src1 + (src_strd1 << 1)));
  934|  50.3k|            y1_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1 * 3));
  935|       |
  936|  50.3k|            y2_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src2);
  937|  50.3k|            y2_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2));
  938|  50.3k|            y2_2_16x8b = _mm_loadl_epi64(
  939|  50.3k|                            (__m128i *)(pu1_src2 + (src_strd2 << 1)));
  940|  50.3k|            y2_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2 * 3));
  941|       |
  942|  50.3k|            y1_0_8x16b = _mm_cvtepu8_epi16(y1_0_16x8b);
  943|  50.3k|            y1_1_8x16b = _mm_cvtepu8_epi16(y1_1_16x8b);
  944|  50.3k|            y1_2_8x16b = _mm_cvtepu8_epi16(y1_2_16x8b);
  945|  50.3k|            y1_3_8x16b = _mm_cvtepu8_epi16(y1_3_16x8b);
  946|       |
  947|  50.3k|            y2_0_8x16b = _mm_cvtepu8_epi16(y2_0_16x8b);
  948|  50.3k|            y2_1_8x16b = _mm_cvtepu8_epi16(y2_1_16x8b);
  949|  50.3k|            y2_2_8x16b = _mm_cvtepu8_epi16(y2_2_16x8b);
  950|  50.3k|            y2_3_8x16b = _mm_cvtepu8_epi16(y2_3_16x8b);
  951|       |
  952|  50.3k|            y1_0_8x16b = _mm_mullo_epi16(y1_0_8x16b, wt1_8x16b);
  953|  50.3k|            y2_0_8x16b = _mm_mullo_epi16(y2_0_8x16b, wt2_8x16b);
  954|  50.3k|            y1_1_8x16b = _mm_mullo_epi16(y1_1_8x16b, wt1_8x16b);
  955|  50.3k|            y2_1_8x16b = _mm_mullo_epi16(y2_1_8x16b, wt2_8x16b);
  956|       |
  957|  50.3k|            y1_2_8x16b = _mm_mullo_epi16(y1_2_8x16b, wt1_8x16b);
  958|  50.3k|            y2_2_8x16b = _mm_mullo_epi16(y2_2_8x16b, wt2_8x16b);
  959|  50.3k|            y1_3_8x16b = _mm_mullo_epi16(y1_3_8x16b, wt1_8x16b);
  960|  50.3k|            y2_3_8x16b = _mm_mullo_epi16(y2_3_8x16b, wt2_8x16b);
  961|       |
  962|  50.3k|            y1_0_8x16b = _mm_adds_epi16(y1_0_8x16b, y2_0_8x16b);
  963|  50.3k|            y1_1_8x16b = _mm_adds_epi16(y1_1_8x16b, y2_1_8x16b);
  964|  50.3k|            y1_2_8x16b = _mm_adds_epi16(y1_2_8x16b, y2_2_8x16b);
  965|  50.3k|            y1_3_8x16b = _mm_adds_epi16(y1_3_8x16b, y2_3_8x16b);
  966|       |
  967|  50.3k|            y1_0_8x16b = _mm_adds_epi16(round_8x16b, y1_0_8x16b);
  968|  50.3k|            y1_1_8x16b = _mm_adds_epi16(round_8x16b, y1_1_8x16b);
  969|  50.3k|            y1_2_8x16b = _mm_adds_epi16(round_8x16b, y1_2_8x16b);
  970|  50.3k|            y1_3_8x16b = _mm_adds_epi16(round_8x16b, y1_3_8x16b);
  971|       |
  972|  50.3k|            y1_0_8x16b = _mm_srai_epi16(y1_0_8x16b, shft);
  973|  50.3k|            y1_1_8x16b = _mm_srai_epi16(y1_1_8x16b, shft);
  974|  50.3k|            y1_2_8x16b = _mm_srai_epi16(y1_2_8x16b, shft);
  975|  50.3k|            y1_3_8x16b = _mm_srai_epi16(y1_3_8x16b, shft);
  976|       |
  977|  50.3k|            y1_0_8x16b = _mm_adds_epi16(ofst_8x16b, y1_0_8x16b);
  978|  50.3k|            y1_1_8x16b = _mm_adds_epi16(ofst_8x16b, y1_1_8x16b);
  979|  50.3k|            y1_2_8x16b = _mm_adds_epi16(ofst_8x16b, y1_2_8x16b);
  980|  50.3k|            y1_3_8x16b = _mm_adds_epi16(ofst_8x16b, y1_3_8x16b);
  981|       |
  982|  50.3k|            y1_0_16x8b = _mm_packus_epi16(y1_0_8x16b, y1_1_8x16b);
  983|  50.3k|            y1_2_16x8b = _mm_packus_epi16(y1_2_8x16b, y1_3_8x16b);
  984|  50.3k|            y1_1_16x8b = _mm_srli_si128(y1_0_16x8b, 8);
  985|  50.3k|            y1_3_16x8b = _mm_srli_si128(y1_2_16x8b, 8);
  986|       |
  987|  50.3k|            _mm_storel_epi64((__m128i *)pu1_dst, y1_0_16x8b);
  988|  50.3k|            _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), y1_1_16x8b);
  989|  50.3k|            _mm_storel_epi64((__m128i *)(pu1_dst + (dst_strd << 1)), y1_2_16x8b);
  990|  50.3k|            _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd * 3), y1_3_16x8b);
  991|       |
  992|  50.3k|            ht -= 4;
  993|  50.3k|            pu1_src1 += src_strd1 << 2;
  994|  50.3k|            pu1_src2 += src_strd2 << 2;
  995|  50.3k|            pu1_dst += dst_strd << 2;
  996|  50.3k|        }
  997|  50.3k|        while(ht > 0);
  ------------------
  |  Branch (997:15): [True: 29.7k, False: 20.5k]
  ------------------
  998|  20.5k|    }
  999|   244k|    else // wd == 16
 1000|   244k|    {
 1001|   244k|        __m128i y1_0L_8x16b, y1_0H_8x16b, y1_1L_8x16b, y1_1H_8x16b;
 1002|   244k|        __m128i y2_0L_8x16b, y2_0H_8x16b, y2_1L_8x16b, y2_1H_8x16b;
 1003|       |
 1004|   244k|        __m128i zero_16x8b;
 1005|   244k|        zero_16x8b = _mm_set1_epi8(0);
 1006|       |
 1007|   244k|        do
 1008|  1.95M|        {
 1009|  1.95M|            y1_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src1);
 1010|  1.95M|            y1_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src1 + src_strd1));
 1011|  1.95M|            y2_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src2);
 1012|  1.95M|            y2_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src2 + src_strd2));
 1013|       |
 1014|  1.95M|            y1_0L_8x16b = _mm_cvtepu8_epi16(y1_0_16x8b);
 1015|  1.95M|            y1_0H_8x16b = _mm_unpackhi_epi8(y1_0_16x8b, zero_16x8b);
 1016|  1.95M|            y1_1L_8x16b = _mm_cvtepu8_epi16(y1_1_16x8b);
 1017|  1.95M|            y1_1H_8x16b = _mm_unpackhi_epi8(y1_1_16x8b, zero_16x8b);
 1018|       |
 1019|  1.95M|            y2_0L_8x16b = _mm_cvtepu8_epi16(y2_0_16x8b);
 1020|  1.95M|            y2_0H_8x16b = _mm_unpackhi_epi8(y2_0_16x8b, zero_16x8b);
 1021|  1.95M|            y2_1L_8x16b = _mm_cvtepu8_epi16(y2_1_16x8b);
 1022|  1.95M|            y2_1H_8x16b = _mm_unpackhi_epi8(y2_1_16x8b, zero_16x8b);
 1023|       |
 1024|  1.95M|            y1_0L_8x16b = _mm_mullo_epi16(y1_0L_8x16b, wt1_8x16b);
 1025|  1.95M|            y1_0H_8x16b = _mm_mullo_epi16(y1_0H_8x16b, wt1_8x16b);
 1026|  1.95M|            y1_1L_8x16b = _mm_mullo_epi16(y1_1L_8x16b, wt1_8x16b);
 1027|  1.95M|            y1_1H_8x16b = _mm_mullo_epi16(y1_1H_8x16b, wt1_8x16b);
 1028|       |
 1029|  1.95M|            y2_0L_8x16b = _mm_mullo_epi16(y2_0L_8x16b, wt2_8x16b);
 1030|  1.95M|            y2_0H_8x16b = _mm_mullo_epi16(y2_0H_8x16b, wt2_8x16b);
 1031|  1.95M|            y2_1L_8x16b = _mm_mullo_epi16(y2_1L_8x16b, wt2_8x16b);
 1032|  1.95M|            y2_1H_8x16b = _mm_mullo_epi16(y2_1H_8x16b, wt2_8x16b);
 1033|       |
 1034|  1.95M|            y1_0L_8x16b = _mm_adds_epi16(y1_0L_8x16b, y2_0L_8x16b);
 1035|  1.95M|            y1_0H_8x16b = _mm_adds_epi16(y1_0H_8x16b, y2_0H_8x16b);
 1036|  1.95M|            y1_1L_8x16b = _mm_adds_epi16(y1_1L_8x16b, y2_1L_8x16b);
 1037|  1.95M|            y1_1H_8x16b = _mm_adds_epi16(y1_1H_8x16b, y2_1H_8x16b);
 1038|       |
 1039|  1.95M|            y1_0L_8x16b = _mm_adds_epi16(round_8x16b, y1_0L_8x16b);
 1040|  1.95M|            y1_0H_8x16b = _mm_adds_epi16(round_8x16b, y1_0H_8x16b);
 1041|  1.95M|            y1_1L_8x16b = _mm_adds_epi16(round_8x16b, y1_1L_8x16b);
 1042|  1.95M|            y1_1H_8x16b = _mm_adds_epi16(round_8x16b, y1_1H_8x16b);
 1043|       |
 1044|  1.95M|            y1_0L_8x16b = _mm_srai_epi16(y1_0L_8x16b, shft);
 1045|  1.95M|            y1_0H_8x16b = _mm_srai_epi16(y1_0H_8x16b, shft);
 1046|  1.95M|            y1_1L_8x16b = _mm_srai_epi16(y1_1L_8x16b, shft);
 1047|  1.95M|            y1_1H_8x16b = _mm_srai_epi16(y1_1H_8x16b, shft);
 1048|       |
 1049|  1.95M|            y1_0L_8x16b = _mm_adds_epi16(ofst_8x16b, y1_0L_8x16b);
 1050|  1.95M|            y1_0H_8x16b = _mm_adds_epi16(ofst_8x16b, y1_0H_8x16b);
 1051|  1.95M|            y1_1L_8x16b = _mm_adds_epi16(ofst_8x16b, y1_1L_8x16b);
 1052|  1.95M|            y1_1H_8x16b = _mm_adds_epi16(ofst_8x16b, y1_1H_8x16b);
 1053|       |
 1054|  1.95M|            y1_0_16x8b = _mm_packus_epi16(y1_0L_8x16b, y1_0H_8x16b);
 1055|  1.95M|            y1_1_16x8b = _mm_packus_epi16(y1_1L_8x16b, y1_1H_8x16b);
 1056|       |
 1057|  1.95M|            _mm_storeu_si128((__m128i *)pu1_dst, y1_0_16x8b);
 1058|  1.95M|            _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), y1_1_16x8b);
 1059|       |
 1060|  1.95M|            ht -= 2;
 1061|  1.95M|            pu1_src1 += src_strd1 << 1;
 1062|  1.95M|            pu1_src2 += src_strd2 << 1;
 1063|  1.95M|            pu1_dst += dst_strd << 1;
 1064|  1.95M|        }
 1065|  1.95M|        while(ht > 0);
  ------------------
  |  Branch (1065:15): [True: 1.70M, False: 244k]
  ------------------
 1066|   244k|    }
 1067|   266k|}
ih264_weighted_bi_pred_chroma_sse42:
 1117|   266k|{
 1118|   266k|    __m128i y1_0_16x8b, y1_1_16x8b;
 1119|   266k|    __m128i y2_0_16x8b, y2_1_16x8b;
 1120|       |
 1121|   266k|    __m128i wt1_8x16b, wt2_8x16b;
 1122|   266k|    __m128i ofst_8x16b, round_8x16b;
 1123|       |
 1124|   266k|    WORD32 ofst1_u, ofst2_u, ofst_u;
 1125|   266k|    WORD32 ofst1_v, ofst2_v, ofst_v;
 1126|   266k|    WORD32 round_val, shft, ofst_val;
 1127|       |
 1128|   266k|    round_val = 1 << log_wd;
 1129|   266k|    shft = log_wd + 1;
 1130|       |
 1131|   266k|    ofst1_u = (WORD8)(ofst1 & 0xff);
 1132|   266k|    ofst1_v = (WORD8)(ofst1 >> 8);
 1133|   266k|    ofst2_u = (WORD8)(ofst2 & 0xff);
 1134|   266k|    ofst2_v = (WORD8)(ofst2 >> 8);
 1135|       |
 1136|   266k|    wt1_8x16b = _mm_set1_epi32(wt1);
 1137|   266k|    wt2_8x16b = _mm_set1_epi32(wt2);
 1138|       |
 1139|   266k|    ofst_u = (ofst1_u + ofst2_u + 1) >> 1;
 1140|   266k|    ofst_v = (ofst1_v + ofst2_v + 1) >> 1;
 1141|   266k|    ofst_val = (ofst_u & 0xffff) | (ofst_v << 16);
 1142|       |
 1143|   266k|    round_8x16b = _mm_set1_epi16(round_val);
 1144|   266k|    ofst_8x16b = _mm_set1_epi32(ofst_val);
 1145|       |
 1146|   266k|    if(wd == 2)
  ------------------
  |  Branch (1146:8): [True: 1.30k, False: 265k]
  ------------------
 1147|  1.30k|    {
 1148|  1.30k|        __m128i y1_0_8x16b, y2_0_8x16b;
 1149|       |
 1150|  1.30k|        do
 1151|  1.68k|        {
 1152|  1.68k|            y1_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src1);
 1153|  1.68k|            y1_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1));
 1154|       |
 1155|  1.68k|            y2_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src2);
 1156|  1.68k|            y2_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2));
 1157|       |
 1158|  1.68k|            y1_0_16x8b = _mm_unpacklo_epi32(y1_0_16x8b, y1_1_16x8b);
 1159|  1.68k|            y2_0_16x8b = _mm_unpacklo_epi32(y2_0_16x8b, y2_1_16x8b);
 1160|       |
 1161|  1.68k|            y1_0_8x16b = _mm_cvtepu8_epi16(y1_0_16x8b);
 1162|  1.68k|            y2_0_8x16b = _mm_cvtepu8_epi16(y2_0_16x8b);
 1163|       |
 1164|  1.68k|            y1_0_8x16b = _mm_mullo_epi16(y1_0_8x16b, wt1_8x16b);
 1165|  1.68k|            y2_0_8x16b = _mm_mullo_epi16(y2_0_8x16b, wt2_8x16b);
 1166|       |
 1167|  1.68k|            y1_0_8x16b = _mm_adds_epi16(y1_0_8x16b, y2_0_8x16b);
 1168|  1.68k|            y1_0_8x16b = _mm_adds_epi16(round_8x16b, y1_0_8x16b);
 1169|       |
 1170|  1.68k|            y1_0_8x16b = _mm_srai_epi16(y1_0_8x16b, shft);
 1171|  1.68k|            y1_0_8x16b = _mm_adds_epi16(ofst_8x16b, y1_0_8x16b);
 1172|       |
 1173|  1.68k|            y1_0_16x8b = _mm_packus_epi16(y1_0_8x16b, y1_0_8x16b);
 1174|  1.68k|            y1_1_16x8b = _mm_srli_si128(y1_0_16x8b, 4);
 1175|       |
 1176|  1.68k|            *((WORD32 *)(pu1_dst)) = _mm_cvtsi128_si32(y1_0_16x8b);
 1177|  1.68k|            *((WORD32 *)(pu1_dst + dst_strd)) = _mm_cvtsi128_si32(y1_1_16x8b);
 1178|       |
 1179|  1.68k|            ht -= 2;
 1180|  1.68k|            pu1_src1 += src_strd1 << 1;
 1181|  1.68k|            pu1_src2 += src_strd2 << 1;
 1182|  1.68k|            pu1_dst += dst_strd << 1;
 1183|  1.68k|        }
 1184|  1.68k|        while(ht > 0);
  ------------------
  |  Branch (1184:15): [True: 380, False: 1.30k]
  ------------------
 1185|  1.30k|    }
 1186|   265k|    else if(wd == 4)
  ------------------
  |  Branch (1186:13): [True: 20.5k, False: 244k]
  ------------------
 1187|  20.5k|    {
 1188|  20.5k|        __m128i y1_0_8x16b, y1_1_8x16b;
 1189|  20.5k|        __m128i y2_0_8x16b, y2_1_8x16b;
 1190|       |
 1191|  20.5k|        do
 1192|  50.3k|        {
 1193|  50.3k|            y1_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src1);
 1194|  50.3k|            y1_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1));
 1195|       |
 1196|  50.3k|            y2_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src2);
 1197|  50.3k|            y2_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2));
 1198|       |
 1199|  50.3k|            y1_0_8x16b = _mm_cvtepu8_epi16(y1_0_16x8b);
 1200|  50.3k|            y1_1_8x16b = _mm_cvtepu8_epi16(y1_1_16x8b);
 1201|       |
 1202|  50.3k|            y2_0_8x16b = _mm_cvtepu8_epi16(y2_0_16x8b);
 1203|  50.3k|            y2_1_8x16b = _mm_cvtepu8_epi16(y2_1_16x8b);
 1204|       |
 1205|  50.3k|            y1_0_8x16b = _mm_mullo_epi16(y1_0_8x16b, wt1_8x16b);
 1206|  50.3k|            y2_0_8x16b = _mm_mullo_epi16(y2_0_8x16b, wt2_8x16b);
 1207|  50.3k|            y1_1_8x16b = _mm_mullo_epi16(y1_1_8x16b, wt1_8x16b);
 1208|  50.3k|            y2_1_8x16b = _mm_mullo_epi16(y2_1_8x16b, wt2_8x16b);
 1209|       |
 1210|  50.3k|            y1_0_8x16b = _mm_adds_epi16(y1_0_8x16b, y2_0_8x16b);
 1211|  50.3k|            y1_1_8x16b = _mm_adds_epi16(y1_1_8x16b, y2_1_8x16b);
 1212|       |
 1213|  50.3k|            y1_0_8x16b = _mm_adds_epi16(round_8x16b, y1_0_8x16b);
 1214|  50.3k|            y1_1_8x16b = _mm_adds_epi16(round_8x16b, y1_1_8x16b);
 1215|       |
 1216|  50.3k|            y1_0_8x16b = _mm_srai_epi16(y1_0_8x16b, shft);
 1217|  50.3k|            y1_1_8x16b = _mm_srai_epi16(y1_1_8x16b, shft);
 1218|       |
 1219|  50.3k|            y1_0_8x16b = _mm_adds_epi16(ofst_8x16b, y1_0_8x16b);
 1220|  50.3k|            y1_1_8x16b = _mm_adds_epi16(ofst_8x16b, y1_1_8x16b);
 1221|       |
 1222|  50.3k|            y1_0_16x8b = _mm_packus_epi16(y1_0_8x16b, y1_1_8x16b);
 1223|  50.3k|            y1_1_16x8b = _mm_srli_si128(y1_0_16x8b, 8);
 1224|       |
 1225|  50.3k|            _mm_storel_epi64((__m128i *)pu1_dst, y1_0_16x8b);
 1226|  50.3k|            _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), y1_1_16x8b);
 1227|       |
 1228|  50.3k|            ht -= 2;
 1229|  50.3k|            pu1_src1 += src_strd1 << 1;
 1230|  50.3k|            pu1_src2 += src_strd2 << 1;
 1231|  50.3k|            pu1_dst += dst_strd << 1;
 1232|  50.3k|        }
 1233|  50.3k|        while(ht > 0);
  ------------------
  |  Branch (1233:15): [True: 29.7k, False: 20.5k]
  ------------------
 1234|  20.5k|    }
 1235|   244k|    else // wd == 8
 1236|   244k|    {
 1237|   244k|        __m128i y1_0L_8x16b, y1_0H_8x16b, y1_1L_8x16b, y1_1H_8x16b;
 1238|   244k|        __m128i y2_0L_8x16b, y2_0H_8x16b, y2_1L_8x16b, y2_1H_8x16b;
 1239|       |
 1240|   244k|        __m128i zero_16x8b;
 1241|   244k|        zero_16x8b = _mm_set1_epi8(0);
 1242|       |
 1243|   244k|        do
 1244|   976k|        {
 1245|   976k|            y1_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src1);
 1246|   976k|            y1_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src1 + src_strd1));
 1247|   976k|            y2_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src2);
 1248|   976k|            y2_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src2 + src_strd2));
 1249|       |
 1250|   976k|            y1_0L_8x16b = _mm_cvtepu8_epi16(y1_0_16x8b);
 1251|   976k|            y1_0H_8x16b = _mm_unpackhi_epi8(y1_0_16x8b, zero_16x8b);
 1252|   976k|            y1_1L_8x16b = _mm_cvtepu8_epi16(y1_1_16x8b);
 1253|   976k|            y1_1H_8x16b = _mm_unpackhi_epi8(y1_1_16x8b, zero_16x8b);
 1254|       |
 1255|   976k|            y2_0L_8x16b = _mm_cvtepu8_epi16(y2_0_16x8b);
 1256|   976k|            y2_0H_8x16b = _mm_unpackhi_epi8(y2_0_16x8b, zero_16x8b);
 1257|   976k|            y2_1L_8x16b = _mm_cvtepu8_epi16(y2_1_16x8b);
 1258|   976k|            y2_1H_8x16b = _mm_unpackhi_epi8(y2_1_16x8b, zero_16x8b);
 1259|       |
 1260|   976k|            y1_0L_8x16b = _mm_mullo_epi16(y1_0L_8x16b, wt1_8x16b);
 1261|   976k|            y1_0H_8x16b = _mm_mullo_epi16(y1_0H_8x16b, wt1_8x16b);
 1262|   976k|            y1_1L_8x16b = _mm_mullo_epi16(y1_1L_8x16b, wt1_8x16b);
 1263|   976k|            y1_1H_8x16b = _mm_mullo_epi16(y1_1H_8x16b, wt1_8x16b);
 1264|       |
 1265|   976k|            y2_0L_8x16b = _mm_mullo_epi16(y2_0L_8x16b, wt2_8x16b);
 1266|   976k|            y2_0H_8x16b = _mm_mullo_epi16(y2_0H_8x16b, wt2_8x16b);
 1267|   976k|            y2_1L_8x16b = _mm_mullo_epi16(y2_1L_8x16b, wt2_8x16b);
 1268|   976k|            y2_1H_8x16b = _mm_mullo_epi16(y2_1H_8x16b, wt2_8x16b);
 1269|       |
 1270|   976k|            y1_0L_8x16b = _mm_adds_epi16(y1_0L_8x16b, y2_0L_8x16b);
 1271|   976k|            y1_0H_8x16b = _mm_adds_epi16(y1_0H_8x16b, y2_0H_8x16b);
 1272|   976k|            y1_1L_8x16b = _mm_adds_epi16(y1_1L_8x16b, y2_1L_8x16b);
 1273|   976k|            y1_1H_8x16b = _mm_adds_epi16(y1_1H_8x16b, y2_1H_8x16b);
 1274|       |
 1275|   976k|            y1_0L_8x16b = _mm_adds_epi16(round_8x16b, y1_0L_8x16b);
 1276|   976k|            y1_0H_8x16b = _mm_adds_epi16(round_8x16b, y1_0H_8x16b);
 1277|   976k|            y1_1L_8x16b = _mm_adds_epi16(round_8x16b, y1_1L_8x16b);
 1278|   976k|            y1_1H_8x16b = _mm_adds_epi16(round_8x16b, y1_1H_8x16b);
 1279|       |
 1280|   976k|            y1_0L_8x16b = _mm_srai_epi16(y1_0L_8x16b, shft);
 1281|   976k|            y1_0H_8x16b = _mm_srai_epi16(y1_0H_8x16b, shft);
 1282|   976k|            y1_1L_8x16b = _mm_srai_epi16(y1_1L_8x16b, shft);
 1283|   976k|            y1_1H_8x16b = _mm_srai_epi16(y1_1H_8x16b, shft);
 1284|       |
 1285|   976k|            y1_0L_8x16b = _mm_adds_epi16(ofst_8x16b, y1_0L_8x16b);
 1286|   976k|            y1_0H_8x16b = _mm_adds_epi16(ofst_8x16b, y1_0H_8x16b);
 1287|   976k|            y1_1L_8x16b = _mm_adds_epi16(ofst_8x16b, y1_1L_8x16b);
 1288|   976k|            y1_1H_8x16b = _mm_adds_epi16(ofst_8x16b, y1_1H_8x16b);
 1289|       |
 1290|   976k|            y1_0_16x8b = _mm_packus_epi16(y1_0L_8x16b, y1_0H_8x16b);
 1291|   976k|            y1_1_16x8b = _mm_packus_epi16(y1_1L_8x16b, y1_1H_8x16b);
 1292|       |
 1293|   976k|            _mm_storeu_si128((__m128i *)pu1_dst, y1_0_16x8b);
 1294|   976k|            _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), y1_1_16x8b);
 1295|       |
 1296|   976k|            ht -= 2;
 1297|   976k|            pu1_src1 += src_strd1 << 1;
 1298|   976k|            pu1_src2 += src_strd2 << 1;
 1299|   976k|            pu1_dst += dst_strd << 1;
 1300|   976k|        }
 1301|   976k|        while(ht > 0);
  ------------------
  |  Branch (1301:15): [True: 731k, False: 244k]
  ------------------
 1302|   244k|    }
 1303|   266k|}

ih264d_export_sei_params:
  191|   195k|{
  192|   195k|    WORD32 i4_status = IV_SUCCESS;
  193|   195k|    sei *ps_sei = (sei *)ps_dec->pv_disp_sei_params;
  194|       |
  195|   195k|    i4_status = ih264d_export_sei_mdcv_params(ps_sei_decode_op, ps_sei, &ps_dec->s_sei_export);
  196|   195k|    i4_status = ih264d_export_sei_cll_params(ps_sei_decode_op, ps_sei, &ps_dec->s_sei_export);
  197|   195k|    i4_status = ih264d_export_sei_ave_params(ps_sei_decode_op, ps_sei, &ps_dec->s_sei_export);
  198|   195k|    i4_status = ih264d_export_sei_ccv_params(ps_sei_decode_op, ps_sei, &ps_dec->s_sei_export);
  199|   195k|    i4_status = ih264d_export_sei_sii_params(ps_sei_decode_op, ps_sei, &ps_dec->s_sei_export);
  200|   195k|    i4_status = ih264d_export_sei_fgc_params(ps_sei_decode_op, ps_sei, &ps_dec->s_sei_export);
  201|       |
  202|   195k|    UNUSED(i4_status);
  ------------------
  |  |   45|   195k|#define UNUSED(x) ((void)(x))
  ------------------
  203|   195k|}
ih264d_map_error:
 1927|  84.4k|{
 1928|  84.4k|    UWORD32 temp = 0;
 1929|       |
 1930|  84.4k|    switch(i4_err_status)
  ------------------
  |  Branch (1930:12): [True: 3.06k, False: 81.3k]
  ------------------
 1931|  84.4k|    {
 1932|      0|        case ERROR_MEM_ALLOC_ISRAM_T:
  ------------------
  |  Branch (1932:9): [True: 0, False: 84.4k]
  ------------------
 1933|      0|        case ERROR_MEM_ALLOC_SDRAM_T:
  ------------------
  |  Branch (1933:9): [True: 0, False: 84.4k]
  ------------------
 1934|      0|        case ERROR_BUF_MGR:
  ------------------
  |  Branch (1934:9): [True: 0, False: 84.4k]
  ------------------
 1935|      0|        case ERROR_MB_GROUP_ASSGN_T:
  ------------------
  |  Branch (1935:9): [True: 0, False: 84.4k]
  ------------------
 1936|      0|        case ERROR_FRAME_LIMIT_OVER:
  ------------------
  |  Branch (1936:9): [True: 0, False: 84.4k]
  ------------------
 1937|      0|        case ERROR_ACTUAL_RESOLUTION_GREATER_THAN_INIT:
  ------------------
  |  Branch (1937:9): [True: 0, False: 84.4k]
  ------------------
 1938|      0|        case ERROR_PROFILE_NOT_SUPPORTED:
  ------------------
  |  Branch (1938:9): [True: 0, False: 84.4k]
  ------------------
 1939|      0|        case ERROR_INIT_NOT_DONE:
  ------------------
  |  Branch (1939:9): [True: 0, False: 84.4k]
  ------------------
 1940|      0|        case IVD_MEM_ALLOC_FAILED:
  ------------------
  |  Branch (1940:9): [True: 0, False: 84.4k]
  ------------------
 1941|    566|        case ERROR_FEATURE_UNAVAIL:
  ------------------
  |  Branch (1941:9): [True: 566, False: 83.8k]
  ------------------
 1942|  1.07k|        case IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED:
  ------------------
  |  Branch (1942:9): [True: 513, False: 83.9k]
  ------------------
 1943|  1.07k|            temp = 1 << IVD_FATALERROR;
 1944|  1.07k|            H264_DEC_DEBUG_PRINT("\nFatal Error\n");
  ------------------
  |  |   39|  1.07k|#define H264_DEC_DEBUG_PRINT(...) {}
  ------------------
 1945|  1.07k|            break;
 1946|       |
 1947|      0|        case ERROR_DBP_MANAGER_T:
  ------------------
  |  Branch (1947:9): [True: 0, False: 84.4k]
  ------------------
 1948|      0|        case ERROR_GAPS_IN_FRM_NUM:
  ------------------
  |  Branch (1948:9): [True: 0, False: 84.4k]
  ------------------
 1949|      0|        case ERROR_UNKNOWN_NAL:
  ------------------
  |  Branch (1949:9): [True: 0, False: 84.4k]
  ------------------
 1950|      0|        case ERROR_INV_MB_SLC_GRP_T:
  ------------------
  |  Branch (1950:9): [True: 0, False: 84.4k]
  ------------------
 1951|      0|        case ERROR_MULTIPLE_SLC_GRP_T:
  ------------------
  |  Branch (1951:9): [True: 0, False: 84.4k]
  ------------------
 1952|      0|        case ERROR_UNKNOWN_LEVEL:
  ------------------
  |  Branch (1952:9): [True: 0, False: 84.4k]
  ------------------
 1953|      0|        case ERROR_UNAVAIL_PICBUF_T:
  ------------------
  |  Branch (1953:9): [True: 0, False: 84.4k]
  ------------------
 1954|      0|        case ERROR_UNAVAIL_MVBUF_T:
  ------------------
  |  Branch (1954:9): [True: 0, False: 84.4k]
  ------------------
 1955|      0|        case ERROR_UNAVAIL_DISPBUF_T:
  ------------------
  |  Branch (1955:9): [True: 0, False: 84.4k]
  ------------------
 1956|     87|        case ERROR_NUM_REF:
  ------------------
  |  Branch (1956:9): [True: 87, False: 84.3k]
  ------------------
 1957|     87|        case ERROR_REFIDX_ORDER_T:
  ------------------
  |  Branch (1957:9): [True: 0, False: 84.4k]
  ------------------
 1958|     87|        case ERROR_PIC0_NOT_FOUND_T:
  ------------------
  |  Branch (1958:9): [True: 0, False: 84.4k]
  ------------------
 1959|     87|        case ERROR_MB_TYPE:
  ------------------
  |  Branch (1959:9): [True: 0, False: 84.4k]
  ------------------
 1960|     87|        case ERROR_SUB_MB_TYPE:
  ------------------
  |  Branch (1960:9): [True: 0, False: 84.4k]
  ------------------
 1961|     87|        case ERROR_CBP:
  ------------------
  |  Branch (1961:9): [True: 0, False: 84.4k]
  ------------------
 1962|    262|        case ERROR_REF_IDX:
  ------------------
  |  Branch (1962:9): [True: 175, False: 84.2k]
  ------------------
 1963|    262|        case ERROR_NUM_MV:
  ------------------
  |  Branch (1963:9): [True: 0, False: 84.4k]
  ------------------
 1964|    262|        case ERROR_CHROMA_PRED_MODE:
  ------------------
  |  Branch (1964:9): [True: 0, False: 84.4k]
  ------------------
 1965|    262|        case ERROR_INTRAPRED:
  ------------------
  |  Branch (1965:9): [True: 0, False: 84.4k]
  ------------------
 1966|    262|        case ERROR_NEXT_MB_ADDRESS_T:
  ------------------
  |  Branch (1966:9): [True: 0, False: 84.4k]
  ------------------
 1967|    262|        case ERROR_MB_ADDRESS_T:
  ------------------
  |  Branch (1967:9): [True: 0, False: 84.4k]
  ------------------
 1968|    262|        case ERROR_PIC1_NOT_FOUND_T:
  ------------------
  |  Branch (1968:9): [True: 0, False: 84.4k]
  ------------------
 1969|    262|        case ERROR_CAVLC_NUM_COEFF_T:
  ------------------
  |  Branch (1969:9): [True: 0, False: 84.4k]
  ------------------
 1970|    262|        case ERROR_CAVLC_SCAN_POS_T:
  ------------------
  |  Branch (1970:9): [True: 0, False: 84.4k]
  ------------------
 1971|    262|        case ERROR_PRED_WEIGHT_TABLE_T:
  ------------------
  |  Branch (1971:9): [True: 0, False: 84.4k]
  ------------------
 1972|    268|        case ERROR_CORRUPTED_SLICE:
  ------------------
  |  Branch (1972:9): [True: 6, False: 84.4k]
  ------------------
 1973|    268|            temp = 1 << IVD_CORRUPTEDDATA;
 1974|    268|            break;
 1975|       |
 1976|      0|        case ERROR_NOT_SUPP_RESOLUTION:
  ------------------
  |  Branch (1976:9): [True: 0, False: 84.4k]
  ------------------
 1977|      0|        case ERROR_ACTUAL_LEVEL_GREATER_THAN_INIT:
  ------------------
  |  Branch (1977:9): [True: 0, False: 84.4k]
  ------------------
 1978|      0|            temp = 1 << IVD_UNSUPPORTEDINPUT;
 1979|      0|            break;
 1980|       |
 1981|      0|        case ERROR_INVALID_PIC_PARAM:
  ------------------
  |  Branch (1981:9): [True: 0, False: 84.4k]
  ------------------
 1982|      9|        case ERROR_INVALID_SEQ_PARAM:
  ------------------
  |  Branch (1982:9): [True: 9, False: 84.4k]
  ------------------
 1983|      9|        case ERROR_EGC_EXCEED_32_1_T:
  ------------------
  |  Branch (1983:9): [True: 0, False: 84.4k]
  ------------------
 1984|      9|        case ERROR_EGC_EXCEED_32_2_T:
  ------------------
  |  Branch (1984:9): [True: 0, False: 84.4k]
  ------------------
 1985|      9|        case ERROR_INV_RANGE_TEV_T:
  ------------------
  |  Branch (1985:9): [True: 0, False: 84.4k]
  ------------------
 1986|      9|        case ERROR_INV_SLC_TYPE_T:
  ------------------
  |  Branch (1986:9): [True: 0, False: 84.4k]
  ------------------
 1987|     60|        case ERROR_INV_POC_TYPE_T:
  ------------------
  |  Branch (1987:9): [True: 51, False: 84.3k]
  ------------------
 1988|    305|        case ERROR_INV_RANGE_QP_T:
  ------------------
  |  Branch (1988:9): [True: 245, False: 84.1k]
  ------------------
 1989|  1.27k|        case ERROR_INV_SPS_PPS_T:
  ------------------
  |  Branch (1989:9): [True: 970, False: 83.4k]
  ------------------
 1990|  1.27k|        case ERROR_INV_SLICE_HDR_T:
  ------------------
  |  Branch (1990:9): [True: 0, False: 84.4k]
  ------------------
 1991|  1.28k|        case ERROR_INV_SEI_MDCV_PARAMS:
  ------------------
  |  Branch (1991:9): [True: 9, False: 84.4k]
  ------------------
 1992|  1.28k|        case ERROR_INV_SEI_CLL_PARAMS:
  ------------------
  |  Branch (1992:9): [True: 0, False: 84.4k]
  ------------------
 1993|  1.28k|        case ERROR_INV_SEI_AVE_PARAMS:
  ------------------
  |  Branch (1993:9): [True: 0, False: 84.4k]
  ------------------
 1994|  1.29k|        case ERROR_INV_SEI_CCV_PARAMS:
  ------------------
  |  Branch (1994:9): [True: 13, False: 84.4k]
  ------------------
 1995|  1.29k|        case ERROR_INV_SEI_SII_PARAMS:
  ------------------
  |  Branch (1995:9): [True: 1, False: 84.4k]
  ------------------
 1996|       |
 1997|  1.29k|            temp = 1 << IVD_CORRUPTEDHEADER;
 1998|  1.29k|            break;
 1999|       |
 2000|      0|        case ERROR_EOB_FLUSHBITS_T:
  ------------------
  |  Branch (2000:9): [True: 0, False: 84.4k]
  ------------------
 2001|    420|        case ERROR_EOB_GETBITS_T:
  ------------------
  |  Branch (2001:9): [True: 420, False: 84.0k]
  ------------------
 2002|    420|        case ERROR_EOB_GETBIT_T:
  ------------------
  |  Branch (2002:9): [True: 0, False: 84.4k]
  ------------------
 2003|    420|        case ERROR_EOB_BYPASS_T:
  ------------------
  |  Branch (2003:9): [True: 0, False: 84.4k]
  ------------------
 2004|    420|        case ERROR_EOB_DECISION_T:
  ------------------
  |  Branch (2004:9): [True: 0, False: 84.4k]
  ------------------
 2005|    420|        case ERROR_EOB_TERMINATE_T:
  ------------------
  |  Branch (2005:9): [True: 0, False: 84.4k]
  ------------------
 2006|    420|        case ERROR_EOB_READCOEFF4X4CAB_T:
  ------------------
  |  Branch (2006:9): [True: 0, False: 84.4k]
  ------------------
 2007|    420|            temp = 1 << IVD_INSUFFICIENTDATA;
 2008|    420|            break;
 2009|      0|        case ERROR_DYNAMIC_RESOLUTION_NOT_SUPPORTED:
  ------------------
  |  Branch (2009:9): [True: 0, False: 84.4k]
  ------------------
 2010|      0|        case ERROR_DISP_WIDTH_RESET_TO_PIC_WIDTH:
  ------------------
  |  Branch (2010:9): [True: 0, False: 84.4k]
  ------------------
 2011|      0|            temp = 1 << IVD_UNSUPPORTEDPARAM | 1 << IVD_FATALERROR;
 2012|      0|            break;
 2013|       |
 2014|      0|        case ERROR_DANGLING_FIELD_IN_PIC:
  ------------------
  |  Branch (2014:9): [True: 0, False: 84.4k]
  ------------------
 2015|      0|            temp = 1 << IVD_APPLIEDCONCEALMENT;
 2016|      0|            break;
 2017|       |
 2018|  84.4k|    }
 2019|       |
 2020|  84.4k|    return temp;
 2021|       |
 2022|  84.4k|}
ih264d_get_outbuf_size:
 2028|   135k|{
 2029|   135k|    UWORD32 u4_min_num_out_bufs = 0;
 2030|       |
 2031|   135k|    if(u1_chroma_format == IV_YUV_420P)
  ------------------
  |  Branch (2031:8): [True: 70.9k, False: 64.5k]
  ------------------
 2032|  70.9k|        u4_min_num_out_bufs = MIN_OUT_BUFS_420;
  ------------------
  |  |  120|  70.9k|#define MIN_OUT_BUFS_420        3
  ------------------
 2033|  64.5k|    else if(u1_chroma_format == IV_YUV_422ILE)
  ------------------
  |  Branch (2033:13): [True: 0, False: 64.5k]
  ------------------
 2034|      0|        u4_min_num_out_bufs = MIN_OUT_BUFS_422ILE;
  ------------------
  |  |  121|      0|#define MIN_OUT_BUFS_422ILE     1
  ------------------
 2035|  64.5k|    else if(u1_chroma_format == IV_RGB_565)
  ------------------
  |  Branch (2035:13): [True: 0, False: 64.5k]
  ------------------
 2036|      0|        u4_min_num_out_bufs = MIN_OUT_BUFS_RGB565;
  ------------------
  |  |  122|      0|#define MIN_OUT_BUFS_RGB565     1
  ------------------
 2037|  64.5k|    else if((u1_chroma_format == IV_YUV_420SP_UV)
  ------------------
  |  Branch (2037:13): [True: 37.3k, False: 27.1k]
  ------------------
 2038|  27.1k|                    || (u1_chroma_format == IV_YUV_420SP_VU))
  ------------------
  |  Branch (2038:24): [True: 27.1k, False: 0]
  ------------------
 2039|  64.5k|        u4_min_num_out_bufs = MIN_OUT_BUFS_420SP;
  ------------------
  |  |  123|  64.5k|#define MIN_OUT_BUFS_420SP      2
  ------------------
 2040|       |
 2041|   135k|    if(u1_chroma_format == IV_YUV_420P)
  ------------------
  |  Branch (2041:8): [True: 70.9k, False: 64.5k]
  ------------------
 2042|  70.9k|    {
 2043|  70.9k|        p_buf_size[0] = (pic_wd * pic_ht);
 2044|  70.9k|        p_buf_size[1] = (pic_wd * pic_ht) >> 2;
 2045|  70.9k|        p_buf_size[2] = (pic_wd * pic_ht) >> 2;
 2046|  70.9k|    }
 2047|  64.5k|    else if(u1_chroma_format == IV_YUV_422ILE)
  ------------------
  |  Branch (2047:13): [True: 0, False: 64.5k]
  ------------------
 2048|      0|    {
 2049|      0|        p_buf_size[0] = (pic_wd * pic_ht) * 2;
 2050|      0|        p_buf_size[1] = p_buf_size[2] = 0;
 2051|      0|    }
 2052|  64.5k|    else if(u1_chroma_format == IV_RGB_565)
  ------------------
  |  Branch (2052:13): [True: 0, False: 64.5k]
  ------------------
 2053|      0|    {
 2054|      0|        p_buf_size[0] = (pic_wd * pic_ht) * 2;
 2055|      0|        p_buf_size[1] = p_buf_size[2] = 0;
 2056|      0|    }
 2057|  64.5k|    else if((u1_chroma_format == IV_YUV_420SP_UV)
  ------------------
  |  Branch (2057:13): [True: 37.3k, False: 27.1k]
  ------------------
 2058|  27.1k|                    || (u1_chroma_format == IV_YUV_420SP_VU))
  ------------------
  |  Branch (2058:24): [True: 27.1k, False: 0]
  ------------------
 2059|  64.5k|    {
 2060|  64.5k|        p_buf_size[0] = (pic_wd * pic_ht);
 2061|  64.5k|        p_buf_size[1] = (pic_wd * pic_ht) >> 1;
 2062|  64.5k|        p_buf_size[2] = 0;
 2063|  64.5k|    }
 2064|       |
 2065|   135k|    return u4_min_num_out_bufs;
 2066|   135k|}
check_app_out_buf_size:
 2069|   135k|{
 2070|   135k|    UWORD32 au4_min_out_buf_size[IVD_VIDDEC_MAX_IO_BUFFERS];
 2071|   135k|    UWORD32 u4_min_num_out_bufs, i;
 2072|   135k|    UWORD32 pic_wd, pic_ht;
 2073|       |
 2074|   135k|    if(0 == ps_dec->u4_share_disp_buf)
  ------------------
  |  Branch (2074:8): [True: 135k, False: 0]
  ------------------
 2075|   135k|    {
 2076|   135k|        pic_wd = ps_dec->u2_disp_width;
 2077|   135k|        pic_ht = ps_dec->u2_disp_height;
 2078|       |
 2079|   135k|    }
 2080|      0|    else
 2081|      0|    {
 2082|      0|        pic_wd = ps_dec->u2_frm_wd_y;
 2083|      0|        pic_ht = ps_dec->u2_frm_ht_y;
 2084|      0|    }
 2085|       |
 2086|   135k|    if(ps_dec->u4_app_disp_width > pic_wd)
  ------------------
  |  Branch (2086:8): [True: 0, False: 135k]
  ------------------
 2087|      0|        pic_wd = ps_dec->u4_app_disp_width;
 2088|       |
 2089|   135k|    u4_min_num_out_bufs = ih264d_get_outbuf_size(pic_wd, pic_ht,
 2090|   135k|                                                 ps_dec->u1_chroma_format,
 2091|   135k|                                                 &au4_min_out_buf_size[0]);
 2092|       |
 2093|       |
 2094|   135k|    if(0 == ps_dec->u4_share_disp_buf)
  ------------------
  |  Branch (2094:8): [True: 135k, False: 0]
  ------------------
 2095|   135k|    {
 2096|   135k|        if(ps_dec->ps_out_buffer->u4_num_bufs < u4_min_num_out_bufs)
  ------------------
  |  Branch (2096:12): [True: 0, False: 135k]
  ------------------
 2097|      0|            return IV_FAIL;
 2098|       |
 2099|   473k|        for(i = 0; i < u4_min_num_out_bufs; i++)
  ------------------
  |  Branch (2099:20): [True: 339k, False: 134k]
  ------------------
 2100|   339k|        {
 2101|   339k|            if(ps_dec->ps_out_buffer->u4_min_out_buf_size[i]
  ------------------
  |  Branch (2101:16): [True: 1.37k, False: 337k]
  ------------------
 2102|   339k|                            < au4_min_out_buf_size[i])
 2103|  1.37k|                return (IV_FAIL);
 2104|   339k|        }
 2105|   135k|    }
 2106|      0|    else
 2107|      0|    {
 2108|      0|        if(ps_dec->disp_bufs[0].u4_num_bufs < u4_min_num_out_bufs)
  ------------------
  |  Branch (2108:12): [True: 0, False: 0]
  ------------------
 2109|      0|            return IV_FAIL;
 2110|       |
 2111|      0|        for(i = 0; i < u4_min_num_out_bufs; i++)
  ------------------
  |  Branch (2111:20): [True: 0, False: 0]
  ------------------
 2112|      0|        {
 2113|       |            /* We need to check only with the disp_buffer[0], because we have
 2114|       |             * already ensured that all the buffers are of the same size in
 2115|       |             * ih264d_set_display_frame.
 2116|       |             */
 2117|      0|            if(ps_dec->disp_bufs[0].u4_bufsize[i] < au4_min_out_buf_size[i])
  ------------------
  |  Branch (2117:16): [True: 0, False: 0]
  ------------------
 2118|      0|                return (IV_FAIL);
 2119|      0|        }
 2120|       |
 2121|      0|    }
 2122|       |
 2123|   134k|    return (IV_SUCCESS);
 2124|   135k|}

ih264d_get_bit_h264:
   64|  5.24M|{
   65|  5.24M|    UWORD32 u4_code;
   66|       |
   67|  5.24M|    GETBIT(u4_code, ps_stream->u4_ofst, ps_stream->pu4_buffer);
  ------------------
  |  |  105|  5.24M|#define   GETBIT(u4_code, u4_offset, pu4_bitstream)                         \
  |  |  106|  5.24M|{                                                                           \
  |  |  107|  5.24M|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  108|  5.24M|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  109|  5.24M|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  110|  5.24M|    u4_code = pu4_buf[u4_word_off] << u4_bit_off;                           \
  |  |  111|  5.24M|    (u4_offset)++;                                                          \
  |  |  112|  5.24M|    u4_code = (u4_code >> 31);                                              \
  |  |  113|  5.24M|}
  ------------------
   68|  5.24M|    return (u4_code);
   69|  5.24M|}
ih264d_get_bits_h264:
   91|  8.02M|{
   92|  8.02M|    UWORD32 u4_code = 0;
   93|  8.02M|    if(u4_num_bits)
  ------------------
  |  Branch (93:8): [True: 8.02M, False: 0]
  ------------------
   94|  8.02M|        GETBITS(u4_code, ps_bitstrm->u4_ofst, ps_bitstrm->pu4_buffer, u4_num_bits);
  ------------------
  |  |  120|  8.02M|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  8.02M|{                                                                           \
  |  |  122|  8.02M|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  8.02M|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  8.02M|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  8.02M|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  8.02M|                                                                            \
  |  |  127|  8.02M|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 7.21M, False: 810k]
  |  |  ------------------
  |  |  128|  8.02M|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  7.21M|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  8.02M|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  8.02M|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  8.02M|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  8.02M|}                                                                           \
  ------------------
   95|  8.02M|    return (u4_code);
   96|  8.02M|}
ih264d_flush_bits_h264:
  151|  31.6k|{
  152|  31.6k|    ps_bitstrm->u4_ofst += u4_num_bits;
  153|       |
  154|  31.6k|    if(ps_bitstrm->u4_ofst > ps_bitstrm->u4_max_ofst)
  ------------------
  |  Branch (154:8): [True: 0, False: 31.6k]
  ------------------
  155|      0|    {
  156|      0|        return ERROR_EOB_FLUSHBITS_T;
  157|      0|    }
  158|  31.6k|    return OK;
  ------------------
  |  |  114|  31.6k|#define OK        0
  ------------------
  159|  31.6k|}
ih264d_check_byte_aligned:
  176|   288k|{
  177|   288k|    if(ps_bitstrm->u4_ofst & 0x07)
  ------------------
  |  Branch (177:8): [True: 219k, False: 68.8k]
  ------------------
  178|   219k|        return (0);
  179|  68.8k|    else
  180|  68.8k|        return (1);
  181|   288k|}

ih264d_init_cabac_dec_envirnoment:
   64|  28.6k|{
   65|  28.6k|    UWORD32 u4_code_int_val_ofst;
   66|       |
   67|  28.6k|    ps_cab_env->u4_code_int_range = (HALF - 2) << 23;
  ------------------
  |  |   45|  28.6k|#define   HALF      (1 << (B_BITS-1))
  |  |  ------------------
  |  |  |  |   43|  28.6k|#define   B_BITS    10
  |  |  ------------------
  ------------------
   68|  28.6k|    NEXTBITS(u4_code_int_val_ofst, ps_bitstrm->u4_ofst, ps_bitstrm->pu4_buffer,
  ------------------
  |  |  137|  28.6k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  138|  28.6k|{                                                                           \
  |  |  139|  28.6k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  140|  28.6k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  141|  28.6k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  142|  28.6k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  143|  28.6k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (143:8): [True: 19.8k, False: 8.80k]
  |  |  ------------------
  |  |  144|  28.6k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  19.8k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  145|  28.6k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  28.6k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  146|  28.6k|}
  ------------------
   69|  28.6k|             32);
   70|  28.6k|    FLUSHBITS(ps_bitstrm->u4_ofst, 9)
  ------------------
  |  |  193|  28.6k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  194|  28.6k|{                                                                           \
  |  |  195|  28.6k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  196|  28.6k|}
  ------------------
   71|       |
   72|  28.6k|    if(EXCEED_OFFSET(ps_bitstrm))
  ------------------
  |  |   93|  28.6k|  (ps_bitstrm->u4_ofst > ps_bitstrm->u4_max_ofst)
  |  |  ------------------
  |  |  |  Branch (93:3): [True: 2.26k, False: 26.4k]
  |  |  ------------------
  ------------------
   73|  2.26k|        return ERROR_EOB_FLUSHBITS_T;
   74|       |
   75|  26.4k|    ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst;
   76|       |
   77|       |    /*brief description of the design adopted for CABAC*/
   78|       |    /*according to the standard the u4_code_int_range needs to be initialized 0x 1FE(10 bits) and
   79|       |     9 bits from the bit stream need to be read and into the u4_code_int_val_ofst.As and when the
   80|       |     u4_code_int_range becomes less than 10 bits we need to renormalize and read from the bitstream*
   81|       |
   82|       |     In the implemented design
   83|       |     initially
   84|       |
   85|       |     range_new = range <<23
   86|       |     valOffset_new = valOffset << 23 + 23 bits(read from the bit stream)
   87|       |
   88|       |     Thus we have read 23 more bits ahead of time.
   89|       |
   90|       |     It can be mathematical proved that even with the modified range and u4_ofst the operations
   91|       |     like comparison and subtraction needed for a bin decode are still valid(both in the regular case and the bypass case)
   92|       |
   93|       |     As bins are decoded..we consume the bits that we have already read into the valOffset.The clz of Range
   94|       |     gives us the number of bits we consumed of the 23 bits that we have read ahead of time.
   95|       |
   96|       |     when the number bits we have consumed exceeds 23 ,we renormalize..and  we read from the bitstream again*/
   97|       |
   98|  26.4k|RESET_BIN_COUNTS(ps_cab_env)
   99|       |
  100|  26.4k|    return OK;
  ------------------
  |  |  114|  26.4k|#define OK        0
  ------------------
  101|  28.6k|}
ih264d_init_cabac_contexts:
  124|  20.5k|{
  125|       |
  126|  20.5k|    bin_ctxt_model_t *p_cabac_ctxt_table_t = ps_dec->p_cabac_ctxt_table_t;
  127|  20.5k|    UWORD8 u1_qp_y = ps_dec->ps_cur_slice->u1_slice_qp;
  128|  20.5k|    UWORD8 u1_cabac_init_Idc = 0;
  129|       |
  130|  20.5k|    if(I_SLICE != u1_slice_type)
  ------------------
  |  |  370|  20.5k|#define I_SLICE  2
  ------------------
  |  Branch (130:8): [True: 17.1k, False: 3.45k]
  ------------------
  131|  17.1k|    {
  132|  17.1k|        u1_cabac_init_Idc = ps_dec->ps_cur_slice->u1_cabac_init_idc;
  133|  17.1k|    }
  134|       |
  135|  20.5k|    {
  136|       |        /* MAKING ps_dec->p_ctxt_inc_mb_map a scratch buffer */
  137|       |        /* 0th entry of CtxtIncMbMap will be always be containing default values
  138|       |         for CABAC context representing MB not available */
  139|  20.5k|        ctxt_inc_mb_info_t *p_DefCtxt = ps_dec->p_ctxt_inc_mb_map - 1;
  140|  20.5k|        UWORD8 *pu1_temp;
  141|  20.5k|        WORD8 i;
  142|  20.5k|        p_DefCtxt->u1_mb_type = CAB_SKIP;
  ------------------
  |  |  402|  20.5k|#define CAB_SKIP          0x10 /* 0001 0000 */
  ------------------
  143|       |
  144|  20.5k|        p_DefCtxt->u1_cbp = 0x0f;
  145|  20.5k|        p_DefCtxt->u1_intra_chroma_pred_mode = 0;
  146|       |
  147|  20.5k|        p_DefCtxt->u1_yuv_dc_csbp = 0x7;
  148|       |
  149|  20.5k|        p_DefCtxt->u1_transform8x8_ctxt = 0;
  150|       |
  151|  20.5k|        pu1_temp = (UWORD8*)p_DefCtxt->i1_ref_idx;
  152|   102k|        for(i = 0; i < 4; i++, pu1_temp++)
  ------------------
  |  Branch (152:20): [True: 82.2k, False: 20.5k]
  ------------------
  153|  82.2k|            (*pu1_temp) = 0;
  154|  20.5k|        pu1_temp = (UWORD8*)p_DefCtxt->u1_mv;
  155|   349k|        for(i = 0; i < 16; i++, pu1_temp++)
  ------------------
  |  Branch (155:20): [True: 329k, False: 20.5k]
  ------------------
  156|   329k|            (*pu1_temp) = 0;
  157|  20.5k|        ps_dec->ps_def_ctxt_mb_info = p_DefCtxt;
  158|  20.5k|    }
  159|       |
  160|  20.5k|    if(u1_slice_type == I_SLICE)
  ------------------
  |  |  370|  20.5k|#define I_SLICE  2
  ------------------
  |  Branch (160:8): [True: 3.45k, False: 17.1k]
  ------------------
  161|  3.45k|    {
  162|  3.45k|        u1_cabac_init_Idc = 3;
  163|  3.45k|        ps_dec->p_mb_type_t = p_cabac_ctxt_table_t + MB_TYPE_I_SLICE;
  164|  3.45k|    }
  165|  17.1k|    else if(u1_slice_type == P_SLICE)
  ------------------
  |  |  368|  17.1k|#define P_SLICE  0
  ------------------
  |  Branch (165:13): [True: 7.64k, False: 9.46k]
  ------------------
  166|  7.64k|    {
  167|  7.64k|        ps_dec->p_mb_type_t = p_cabac_ctxt_table_t + MB_TYPE_P_SLICE;
  168|  7.64k|        ps_dec->p_mb_skip_flag_t = p_cabac_ctxt_table_t + MB_SKIP_FLAG_P_SLICE;
  169|  7.64k|        ps_dec->p_sub_mb_type_t = p_cabac_ctxt_table_t + SUB_MB_TYPE_P_SLICE;
  170|  7.64k|    }
  171|  9.46k|    else if(u1_slice_type == B_SLICE)
  ------------------
  |  |  369|  9.46k|#define B_SLICE  1
  ------------------
  |  Branch (171:13): [True: 9.46k, False: 0]
  ------------------
  172|  9.46k|    {
  173|  9.46k|        ps_dec->p_mb_type_t = p_cabac_ctxt_table_t + MB_TYPE_B_SLICE;
  174|  9.46k|        ps_dec->p_mb_skip_flag_t = p_cabac_ctxt_table_t + MB_SKIP_FLAG_B_SLICE;
  175|  9.46k|        ps_dec->p_sub_mb_type_t = p_cabac_ctxt_table_t + SUB_MB_TYPE_B_SLICE;
  176|  9.46k|    }
  177|  20.5k|    {
  178|  20.5k|        bin_ctxt_model_t *p_cabac_ctxt_table_t_tmp = p_cabac_ctxt_table_t;
  179|  20.5k|        if(ps_dec->ps_cur_slice->u1_field_pic_flag)
  ------------------
  |  Branch (179:12): [True: 0, False: 20.5k]
  ------------------
  180|      0|        {
  181|      0|            p_cabac_ctxt_table_t_tmp += SIGNIFICANT_COEFF_FLAG_FLD;
  182|       |
  183|      0|        }
  184|  20.5k|        else
  185|  20.5k|        {
  186|  20.5k|            p_cabac_ctxt_table_t_tmp += SIGNIFICANT_COEFF_FLAG_FRAME;
  187|  20.5k|        }
  188|  20.5k|        {
  189|  20.5k|            bin_ctxt_model_t * * p_significant_coeff_flag_t =
  190|  20.5k|                            ps_dec->p_significant_coeff_flag_t;
  191|  20.5k|            p_significant_coeff_flag_t[0] = p_cabac_ctxt_table_t_tmp
  192|  20.5k|                            + SIG_COEFF_CTXT_CAT_0_OFFSET;
  193|  20.5k|            p_significant_coeff_flag_t[1] = p_cabac_ctxt_table_t_tmp
  194|  20.5k|                            + SIG_COEFF_CTXT_CAT_1_OFFSET;
  195|  20.5k|            p_significant_coeff_flag_t[2] = p_cabac_ctxt_table_t_tmp
  196|  20.5k|                            + SIG_COEFF_CTXT_CAT_2_OFFSET;
  197|  20.5k|            p_significant_coeff_flag_t[3] = p_cabac_ctxt_table_t_tmp
  198|  20.5k|                            + SIG_COEFF_CTXT_CAT_3_OFFSET;
  199|  20.5k|            p_significant_coeff_flag_t[4] = p_cabac_ctxt_table_t_tmp
  200|  20.5k|                            + SIG_COEFF_CTXT_CAT_4_OFFSET;
  201|       |
  202|  20.5k|            p_significant_coeff_flag_t[5] = p_cabac_ctxt_table_t_tmp
  203|  20.5k|                            + SIG_COEFF_CTXT_CAT_5_OFFSET;
  204|       |
  205|  20.5k|        }
  206|  20.5k|    }
  207|       |
  208|  20.5k|    memcpy(p_cabac_ctxt_table_t,
  209|  20.5k|           gau1_ih264d_cabac_ctxt_init_table[u1_cabac_init_Idc][u1_qp_y],
  210|  20.5k|           NUM_CABAC_CTXTS * sizeof(bin_ctxt_model_t));
  ------------------
  |  |   81|  20.5k|#define NUM_CABAC_CTXTS 460
  ------------------
  211|  20.5k|}
ih264d_decode_bin:
  237|  3.65M|{
  238|       |
  239|  3.65M|    UWORD32 u4_qnt_int_range, u4_code_int_range, u4_code_int_val_ofst,
  240|  3.65M|                    u4_int_range_lps;
  241|       |
  242|  3.65M|    UWORD32 u4_symbol, u4_mps_state;
  243|       |
  244|  3.65M|    bin_ctxt_model_t *ps_bin_ctxt;
  245|       |
  246|  3.65M|    UWORD32 table_lookup;
  247|  3.65M|    const UWORD32 *pu4_table = (const UWORD32 *)ps_cab_env->cabac_table;
  248|  3.65M|    UWORD32 u4_clz;
  249|       |
  250|  3.65M|    ps_bin_ctxt = ps_src_bin_ctxt + u4_ctx_inc;
  251|       |
  252|  3.65M|    u4_code_int_range = ps_cab_env->u4_code_int_range;
  253|  3.65M|    u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst;
  254|       |
  255|  3.65M|    u4_mps_state = (ps_bin_ctxt->u1_mps_state);
  256|  3.65M|    u4_clz = CLZ(u4_code_int_range);
  257|       |
  258|  3.65M|    u4_qnt_int_range = u4_code_int_range << u4_clz;
  259|  3.65M|    u4_qnt_int_range = (u4_qnt_int_range >> 29) & 0x3;
  260|       |
  261|  3.65M|    table_lookup = pu4_table[(u4_mps_state << 2) + u4_qnt_int_range];
  262|  3.65M|    u4_int_range_lps = table_lookup & 0xff;
  263|       |
  264|  3.65M|    u4_int_range_lps = u4_int_range_lps << (23 - u4_clz);
  265|  3.65M|    u4_code_int_range = u4_code_int_range - u4_int_range_lps;
  266|       |
  267|  3.65M|    u4_symbol = ((u4_mps_state >> 6) & 0x1);
  268|       |
  269|  3.65M|    u4_mps_state = (table_lookup >> 8) & 0x7F;
  270|       |
  271|  3.65M|    CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst, u4_symbol,
  ------------------
  |  |  184|  3.65M|                    u4_codeIntRangeLPS_m,u1_mps_state_m,table_lookup_m)                     \
  |  |  185|  3.65M|{                                                                                         \
  |  |  186|  3.65M|  if(u4_codeIntValOffset_m >= u4_codeIntRange_m)                                            \
  |  |  ------------------
  |  |  |  Branch (186:6): [True: 246k, False: 3.40M]
  |  |  ------------------
  |  |  187|  3.65M|  {                                                                                         \
  |  |  188|   246k|      u4_symbol_m = 1 - u4_symbol_m;                                                        \
  |  |  189|   246k|      u4_codeIntValOffset_m -= u4_codeIntRange_m;                                           \
  |  |  190|   246k|      u4_codeIntRange_m = u4_codeIntRangeLPS_m;                                             \
  |  |  191|   246k|      u1_mps_state_m = (table_lookup_m >> 15) & 0x7F;                                       \
  |  |  192|   246k|  }                                                                                         \
  |  |  193|  3.65M|}
  ------------------
  272|  3.65M|                 u4_int_range_lps, u4_mps_state, table_lookup)
  273|       |
  274|  3.65M|    if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_8)
  ------------------
  |  |  113|  3.65M|#define ONE_RIGHT_SHIFTED_BY_8 1<<8
  ------------------
  |  Branch (274:8): [True: 9.08k, False: 3.64M]
  ------------------
  275|  9.08k|    {
  276|  9.08k|        UWORD32 *pu4_buffer, u4_offset;
  277|       |
  278|  9.08k|        pu4_buffer = ps_bitstrm->pu4_buffer;
  279|  9.08k|        u4_offset = ps_bitstrm->u4_ofst;
  280|       |
  281|  9.08k|        RENORM_RANGE_OFFSET(u4_code_int_range, u4_code_int_val_ofst, u4_offset,
  ------------------
  |  |  170|  9.08k|  {                                                                                         \
  |  |  171|  9.08k|    UWORD32 read_bits_m,u4_clz_m  ;                                                         \
  |  |  172|  9.08k|    u4_clz_m = CLZ(u4_codeIntRange_m);                                                  \
  |  |  173|  9.08k|    NEXTBITS(read_bits_m,(u4_offset_m+23),pu4_buffer_m,u4_clz_m)                            \
  |  |  ------------------
  |  |  |  |  137|  9.08k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  |  |  138|  9.08k|{                                                                           \
  |  |  |  |  139|  9.08k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  140|  9.08k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  141|  9.08k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  142|  9.08k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  143|  9.08k|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (143:8): [True: 8.41k, False: 666]
  |  |  |  |  ------------------
  |  |  |  |  144|  9.08k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  8.41k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  145|  9.08k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  9.08k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  146|  9.08k|}
  |  |  ------------------
  |  |  174|  9.08k|    FLUSHBITS(u4_offset_m,(u4_clz_m))                                                       \
  |  |  ------------------
  |  |  |  |  193|  9.08k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  |  |  194|  9.08k|{                                                                           \
  |  |  |  |  195|  9.08k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  |  |  196|  9.08k|}
  |  |  ------------------
  |  |  175|  9.08k|    u4_codeIntRange_m = u4_codeIntRange_m << u4_clz_m;                                      \
  |  |  176|  9.08k|    u4_codeIntValOffset_m = (u4_codeIntValOffset_m << u4_clz_m) | read_bits_m;              \
  |  |  177|  9.08k|  }
  ------------------
  282|  9.08k|                            pu4_buffer)
  283|       |
  284|  9.08k|        ps_bitstrm->u4_ofst = u4_offset;
  285|  9.08k|    }
  286|       |
  287|  3.65M|    INC_BIN_COUNT(ps_cab_env)
  288|       |
  289|  3.65M|    ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst;
  290|  3.65M|    ps_cab_env->u4_code_int_range = u4_code_int_range;
  291|  3.65M|    ps_bin_ctxt->u1_mps_state = u4_mps_state;
  292|       |
  293|  3.65M|    return (u4_symbol);
  294|  3.65M|}
ih264d_decode_terminate:
  315|  1.87M|{
  316|  1.87M|    UWORD32 u4_symbol;
  317|  1.87M|    UWORD32 u4_code_int_val_ofst, u4_code_int_range;
  318|  1.87M|    UWORD32 u4_clz;
  319|       |
  320|  1.87M|    u4_code_int_range = ps_cab_env->u4_code_int_range;
  321|  1.87M|    u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst;
  322|       |
  323|  1.87M|    u4_clz = CLZ(u4_code_int_range);
  324|  1.87M|    u4_code_int_range -= (2 << (23 - u4_clz));
  325|       |
  326|  1.87M|    if(u4_code_int_val_ofst >= u4_code_int_range)
  ------------------
  |  Branch (326:8): [True: 6.52k, False: 1.87M]
  ------------------
  327|  6.52k|    {
  328|       |        /* S=1 */
  329|  6.52k|        u4_symbol = 1;
  330|       |
  331|  6.52k|        {
  332|       |
  333|       |            /*the u4_ofst needs to be updated before termination*/
  334|  6.52k|            ps_stream->u4_ofst += u4_clz;
  335|       |
  336|  6.52k|        }
  337|       |
  338|  6.52k|    }
  339|  1.87M|    else
  340|  1.87M|    {
  341|       |        /* S=0 */
  342|  1.87M|        u4_symbol = 0;
  343|       |
  344|  1.87M|        if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_8)
  ------------------
  |  |  113|  1.87M|#define ONE_RIGHT_SHIFTED_BY_8 1<<8
  ------------------
  |  Branch (344:12): [True: 957, False: 1.87M]
  ------------------
  345|    957|        {
  346|    957|            UWORD32 *pu4_buffer, u4_offset;
  347|       |
  348|    957|            pu4_buffer = ps_stream->pu4_buffer;
  349|    957|            u4_offset = ps_stream->u4_ofst;
  350|       |
  351|    957|            RENORM_RANGE_OFFSET(u4_code_int_range, u4_code_int_val_ofst, u4_offset,
  ------------------
  |  |  170|    957|  {                                                                                         \
  |  |  171|    957|    UWORD32 read_bits_m,u4_clz_m  ;                                                         \
  |  |  172|    957|    u4_clz_m = CLZ(u4_codeIntRange_m);                                                  \
  |  |  173|    957|    NEXTBITS(read_bits_m,(u4_offset_m+23),pu4_buffer_m,u4_clz_m)                            \
  |  |  ------------------
  |  |  |  |  137|    957|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  |  |  138|    957|{                                                                           \
  |  |  |  |  139|    957|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  140|    957|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  141|    957|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  142|    957|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  143|    957|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (143:8): [True: 684, False: 273]
  |  |  |  |  ------------------
  |  |  |  |  144|    957|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|    684|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  145|    957|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|    957|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  146|    957|}
  |  |  ------------------
  |  |  174|    957|    FLUSHBITS(u4_offset_m,(u4_clz_m))                                                       \
  |  |  ------------------
  |  |  |  |  193|    957|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  |  |  194|    957|{                                                                           \
  |  |  |  |  195|    957|        (u4_offset) += (u4_no_bits);                                        \
  |  |  |  |  196|    957|}
  |  |  ------------------
  |  |  175|    957|    u4_codeIntRange_m = u4_codeIntRange_m << u4_clz_m;                                      \
  |  |  176|    957|    u4_codeIntValOffset_m = (u4_codeIntValOffset_m << u4_clz_m) | read_bits_m;              \
  |  |  177|    957|  }
  ------------------
  352|    957|                                pu4_buffer)
  353|    957|            ps_stream->u4_ofst = u4_offset;
  354|    957|        }
  355|  1.87M|    }
  356|       |
  357|  1.87M|    ps_cab_env->u4_code_int_range = u4_code_int_range;
  358|  1.87M|    ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst;
  359|       |
  360|  1.87M|    INC_BIN_COUNT(ps_cab_env)
  361|       |
  362|  1.87M|    return (u4_symbol);
  363|  1.87M|}
ih264d_decode_bins_tunary:
  394|  88.0k|{
  395|  88.0k|    UWORD32 u4_value;
  396|  88.0k|    UWORD32 u4_symbol;
  397|  88.0k|    UWORD8 u4_ctx_Inc;
  398|  88.0k|    bin_ctxt_model_t *ps_bin_ctxt;
  399|  88.0k|    UWORD32 u4_code_int_range, u4_code_int_val_ofst;
  400|  88.0k|    const UWORD32 *pu4_table = (const UWORD32 *)ps_cab_env->cabac_table;
  401|       |
  402|  88.0k|    u4_value = 0;
  403|       |
  404|       |    /*u1_max_bins has to be less than or equal to 4, u1_max_bins <= 4 for  this function*/
  405|       |
  406|       |    /*here the valid length is assumed to be equal to 3 ,so the calling function is expected
  407|       |     to duplicate CtxInc if valid lenth is 2 and cmaxbin is greater than2*/
  408|  88.0k|    u4_code_int_range = ps_cab_env->u4_code_int_range;
  409|  88.0k|    u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst;
  410|       |
  411|  88.0k|    do
  412|   144k|    {
  413|   144k|        u4_ctx_Inc = u4_ctx_inc & 0xF;
  414|   144k|        u4_ctx_inc = u4_ctx_inc >> 4;
  415|       |
  416|   144k|        ps_bin_ctxt = ps_src_bin_ctxt + u4_ctx_Inc;
  417|       |
  418|   144k|        DECODE_ONE_BIN_MACRO(ps_bin_ctxt, u4_code_int_range, u4_code_int_val_ofst,
  ------------------
  |  |  217|   144k|                     p_DecBitStream_arg,u4_symbol)                                           \
  |  |  218|   144k|{                                                                                       \
  |  |  219|   144k|    bin_ctxt_model_t *p_binCtxt_m = (bin_ctxt_model_t *) p_binCtxt_arg;                           \
  |  |  220|   144k|    dec_bit_stream_t *p_DecBitStream_m = (dec_bit_stream_t *) p_DecBitStream_arg;                 \
  |  |  221|   144k|    const UWORD32 *pu4_table_m = (const UWORD32 *) pu4_table_arg;                         \
  |  |  222|   144k|                                                                                        \
  |  |  223|   144k|    UWORD32 u4_quantCodeIntRange_m,u4_codeIntRangeLPS_m;                                    \
  |  |  224|   144k|    UWORD32 u1_mps_state_m;                                                               \
  |  |  225|   144k|    UWORD32 table_lookup_m;                                                               \
  |  |  226|   144k|    UWORD32 u4_clz_m;                                                                     \
  |  |  227|   144k|                                                                                        \
  |  |  228|   144k|    u1_mps_state_m = (p_binCtxt_m->u1_mps_state);                                           \
  |  |  229|   144k|    u4_clz_m = CLZ(u4_code_int_range);                                                  \
  |  |  230|   144k|    u4_quantCodeIntRange_m = u4_code_int_range << u4_clz_m;                                   \
  |  |  231|   144k|    u4_quantCodeIntRange_m = (u4_quantCodeIntRange_m >> 29) & 0x3;                          \
  |  |  232|   144k|    table_lookup_m = pu4_table_m[(u1_mps_state_m << 2)+u4_quantCodeIntRange_m];                 \
  |  |  233|   144k|    u4_codeIntRangeLPS_m = table_lookup_m & 0xff;                                           \
  |  |  234|   144k|                                                                                        \
  |  |  235|   144k|    u4_codeIntRangeLPS_m = u4_codeIntRangeLPS_m << (23 - u4_clz_m);                           \
  |  |  236|   144k|    u4_code_int_range = u4_code_int_range - u4_codeIntRangeLPS_m;                             \
  |  |  237|   144k|    u4_symbol = ((u1_mps_state_m>> 6) & 0x1);                                             \
  |  |  238|   144k|    /*if mps*/                                                                          \
  |  |  239|   144k|    u1_mps_state_m = (table_lookup_m >> 8) & 0x7F;                                          \
  |  |  240|   144k|    if(u4_code_int_val_ofst >= u4_code_int_range)                                          \
  |  |  ------------------
  |  |  |  Branch (240:8): [True: 14.9k, False: 129k]
  |  |  ------------------
  |  |  241|   144k|  {                                                                                     \
  |  |  242|  14.9k|                                                                                        \
  |  |  243|  14.9k|    u4_symbol = 1 - u4_symbol;                                                          \
  |  |  244|  14.9k|    u4_code_int_val_ofst -= u4_code_int_range;                                             \
  |  |  245|  14.9k|    u4_code_int_range = u4_codeIntRangeLPS_m;                                               \
  |  |  246|  14.9k|    u1_mps_state_m = (table_lookup_m >> 15) & 0x7F;                                         \
  |  |  247|  14.9k|  }                                                                                     \
  |  |  248|   144k|    if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_8)                                        \
  |  |  ------------------
  |  |  |  |  113|   288k|#define ONE_RIGHT_SHIFTED_BY_8 1<<8
  |  |  ------------------
  |  |  |  Branch (248:8): [True: 1.76k, False: 142k]
  |  |  ------------------
  |  |  249|   144k|    {                                                                                   \
  |  |  250|  1.76k|        UWORD32 *pu4_buffer,u4_offset;                                                  \
  |  |  251|  1.76k|        UWORD32 read_bits,u4_clz_m  ;                                                     \
  |  |  252|  1.76k|                                                                                        \
  |  |  253|  1.76k|        pu4_buffer = p_DecBitStream_m->pu4_buffer;                                         \
  |  |  254|  1.76k|        u4_offset = p_DecBitStream_m->u4_ofst;                                          \
  |  |  255|  1.76k|        u4_clz_m = CLZ(u4_code_int_range);                                              \
  |  |  256|  1.76k|        NEXTBITS(read_bits,(u4_offset+23),pu4_buffer,u4_clz_m)                            \
  |  |  ------------------
  |  |  |  |  137|  1.76k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  |  |  138|  1.76k|{                                                                           \
  |  |  |  |  139|  1.76k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  140|  1.76k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  141|  1.76k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  142|  1.76k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  143|  1.76k|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (143:8): [True: 1.47k, False: 285]
  |  |  |  |  ------------------
  |  |  |  |  144|  1.76k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  1.47k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  145|  1.76k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  1.76k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  146|  1.76k|}
  |  |  ------------------
  |  |  257|  1.76k|        FLUSHBITS(u4_offset,(u4_clz_m))                                                   \
  |  |  ------------------
  |  |  |  |  193|  1.76k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  |  |  194|  1.76k|{                                                                           \
  |  |  |  |  195|  1.76k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  |  |  196|  1.76k|}
  |  |  ------------------
  |  |  258|  1.76k|        u4_code_int_range = u4_code_int_range << u4_clz_m;                                    \
  |  |  259|  1.76k|        u4_code_int_val_ofst= (u4_code_int_val_ofst << u4_clz_m) | read_bits;               \
  |  |  260|  1.76k|                                                                                        \
  |  |  261|  1.76k|                                                                                        \
  |  |  262|  1.76k|        p_DecBitStream_m->u4_ofst = u4_offset;                                          \
  |  |  263|  1.76k|    }                                                                                   \
  |  |  264|   144k|    p_binCtxt_m->u1_mps_state = u1_mps_state_m;                                             \
  |  |  265|   144k|}
  ------------------
  419|   144k|                             pu4_table, ps_bitstrm, u4_symbol)
  420|       |
  421|   144k|        INC_BIN_COUNT(ps_cab_env);INC_DECISION_BINS(ps_cab_env);
  422|       |
  423|   144k|        u4_value++;
  424|   144k|    }
  425|   144k|    while((u4_value < u1_max_bins) & (u4_symbol));
  ------------------
  |  Branch (425:11): [True: 56.2k, False: 88.0k]
  ------------------
  426|       |
  427|  88.0k|    u4_value = u4_value - 1 + u4_symbol;
  428|       |
  429|  88.0k|    ps_cab_env->u4_code_int_range = u4_code_int_range;
  430|  88.0k|    ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst;
  431|       |
  432|  88.0k|    return (u4_value);
  433|       |
  434|  88.0k|}
ih264d_decode_bins:
  465|   179k|{
  466|   179k|    UWORD32 u4_value;
  467|   179k|    UWORD32 u4_symbol, i;
  468|   179k|    UWORD32 u4_ctxt_inc;
  469|   179k|    bin_ctxt_model_t *ps_bin_ctxt;
  470|   179k|    UWORD32 u4_code_int_range, u4_code_int_val_ofst;
  471|   179k|    const UWORD32 *pu4_table = (const UWORD32 *)ps_cab_env->cabac_table;
  472|       |
  473|   179k|    i = 0;
  474|       |
  475|   179k|    u4_value = 0;
  476|       |
  477|       |    /*u1_max_bins has to be less than or equal to 4, u1_max_bins <= 4 for  this fucntion*/
  478|   179k|    u4_code_int_range = ps_cab_env->u4_code_int_range;
  479|   179k|    u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst;
  480|       |
  481|   179k|    do
  482|   414k|    {
  483|   414k|        u4_ctxt_inc = u4_ctx_inc & 0xf;
  484|   414k|        u4_ctx_inc = u4_ctx_inc >> 4;
  485|       |
  486|   414k|        ps_bin_ctxt = ps_src_bin_ctxt + u4_ctxt_inc;
  487|       |
  488|   414k|        DECODE_ONE_BIN_MACRO(ps_bin_ctxt, u4_code_int_range, u4_code_int_val_ofst,
  ------------------
  |  |  217|   414k|                     p_DecBitStream_arg,u4_symbol)                                           \
  |  |  218|   414k|{                                                                                       \
  |  |  219|   414k|    bin_ctxt_model_t *p_binCtxt_m = (bin_ctxt_model_t *) p_binCtxt_arg;                           \
  |  |  220|   414k|    dec_bit_stream_t *p_DecBitStream_m = (dec_bit_stream_t *) p_DecBitStream_arg;                 \
  |  |  221|   414k|    const UWORD32 *pu4_table_m = (const UWORD32 *) pu4_table_arg;                         \
  |  |  222|   414k|                                                                                        \
  |  |  223|   414k|    UWORD32 u4_quantCodeIntRange_m,u4_codeIntRangeLPS_m;                                    \
  |  |  224|   414k|    UWORD32 u1_mps_state_m;                                                               \
  |  |  225|   414k|    UWORD32 table_lookup_m;                                                               \
  |  |  226|   414k|    UWORD32 u4_clz_m;                                                                     \
  |  |  227|   414k|                                                                                        \
  |  |  228|   414k|    u1_mps_state_m = (p_binCtxt_m->u1_mps_state);                                           \
  |  |  229|   414k|    u4_clz_m = CLZ(u4_code_int_range);                                                  \
  |  |  230|   414k|    u4_quantCodeIntRange_m = u4_code_int_range << u4_clz_m;                                   \
  |  |  231|   414k|    u4_quantCodeIntRange_m = (u4_quantCodeIntRange_m >> 29) & 0x3;                          \
  |  |  232|   414k|    table_lookup_m = pu4_table_m[(u1_mps_state_m << 2)+u4_quantCodeIntRange_m];                 \
  |  |  233|   414k|    u4_codeIntRangeLPS_m = table_lookup_m & 0xff;                                           \
  |  |  234|   414k|                                                                                        \
  |  |  235|   414k|    u4_codeIntRangeLPS_m = u4_codeIntRangeLPS_m << (23 - u4_clz_m);                           \
  |  |  236|   414k|    u4_code_int_range = u4_code_int_range - u4_codeIntRangeLPS_m;                             \
  |  |  237|   414k|    u4_symbol = ((u1_mps_state_m>> 6) & 0x1);                                             \
  |  |  238|   414k|    /*if mps*/                                                                          \
  |  |  239|   414k|    u1_mps_state_m = (table_lookup_m >> 8) & 0x7F;                                          \
  |  |  240|   414k|    if(u4_code_int_val_ofst >= u4_code_int_range)                                          \
  |  |  ------------------
  |  |  |  Branch (240:8): [True: 69.2k, False: 345k]
  |  |  ------------------
  |  |  241|   414k|  {                                                                                     \
  |  |  242|  69.2k|                                                                                        \
  |  |  243|  69.2k|    u4_symbol = 1 - u4_symbol;                                                          \
  |  |  244|  69.2k|    u4_code_int_val_ofst -= u4_code_int_range;                                             \
  |  |  245|  69.2k|    u4_code_int_range = u4_codeIntRangeLPS_m;                                               \
  |  |  246|  69.2k|    u1_mps_state_m = (table_lookup_m >> 15) & 0x7F;                                         \
  |  |  247|  69.2k|  }                                                                                     \
  |  |  248|   414k|    if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_8)                                        \
  |  |  ------------------
  |  |  |  |  113|   829k|#define ONE_RIGHT_SHIFTED_BY_8 1<<8
  |  |  ------------------
  |  |  |  Branch (248:8): [True: 3.14k, False: 411k]
  |  |  ------------------
  |  |  249|   414k|    {                                                                                   \
  |  |  250|  3.14k|        UWORD32 *pu4_buffer,u4_offset;                                                  \
  |  |  251|  3.14k|        UWORD32 read_bits,u4_clz_m  ;                                                     \
  |  |  252|  3.14k|                                                                                        \
  |  |  253|  3.14k|        pu4_buffer = p_DecBitStream_m->pu4_buffer;                                         \
  |  |  254|  3.14k|        u4_offset = p_DecBitStream_m->u4_ofst;                                          \
  |  |  255|  3.14k|        u4_clz_m = CLZ(u4_code_int_range);                                              \
  |  |  256|  3.14k|        NEXTBITS(read_bits,(u4_offset+23),pu4_buffer,u4_clz_m)                            \
  |  |  ------------------
  |  |  |  |  137|  3.14k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  |  |  138|  3.14k|{                                                                           \
  |  |  |  |  139|  3.14k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  140|  3.14k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  141|  3.14k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  142|  3.14k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  143|  3.14k|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (143:8): [True: 2.96k, False: 178]
  |  |  |  |  ------------------
  |  |  |  |  144|  3.14k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  2.96k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  145|  3.14k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  3.14k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  146|  3.14k|}
  |  |  ------------------
  |  |  257|  3.14k|        FLUSHBITS(u4_offset,(u4_clz_m))                                                   \
  |  |  ------------------
  |  |  |  |  193|  3.14k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  |  |  194|  3.14k|{                                                                           \
  |  |  |  |  195|  3.14k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  |  |  196|  3.14k|}
  |  |  ------------------
  |  |  258|  3.14k|        u4_code_int_range = u4_code_int_range << u4_clz_m;                                    \
  |  |  259|  3.14k|        u4_code_int_val_ofst= (u4_code_int_val_ofst << u4_clz_m) | read_bits;               \
  |  |  260|  3.14k|                                                                                        \
  |  |  261|  3.14k|                                                                                        \
  |  |  262|  3.14k|        p_DecBitStream_m->u4_ofst = u4_offset;                                          \
  |  |  263|  3.14k|    }                                                                                   \
  |  |  264|   414k|    p_binCtxt_m->u1_mps_state = u1_mps_state_m;                                             \
  |  |  265|   414k|}
  ------------------
  489|   414k|                             pu4_table, ps_bitstrm, u4_symbol)
  490|       |
  491|   414k|        INC_BIN_COUNT(ps_cab_env);INC_DECISION_BINS(ps_cab_env);
  492|       |
  493|   414k|        u4_value = (u4_value << 1) | (u4_symbol);
  494|       |
  495|   414k|        i++;
  496|   414k|    }
  497|   414k|    while(i < u1_max_bins);
  ------------------
  |  Branch (497:11): [True: 235k, False: 179k]
  ------------------
  498|       |
  499|   179k|    ps_cab_env->u4_code_int_range = u4_code_int_range;
  500|   179k|    ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst;
  501|       |
  502|   179k|    return (u4_value);
  503|       |
  504|   179k|}
ih264d_decode_bins_unary:
  533|   475k|{
  534|   475k|    UWORD32 u4_value;
  535|   475k|    UWORD32 u4_symbol;
  536|   475k|    bin_ctxt_model_t *ps_bin_ctxt;
  537|   475k|    UWORD32 u4_ctx_Inc;
  538|   475k|    UWORD32 u4_code_int_range, u4_code_int_val_ofst;
  539|   475k|    const UWORD32 *pu4_table = (const UWORD32 *)ps_cab_env->cabac_table;
  540|       |
  541|       |    /* in this function the valid length for u4_ctx_inc is always taken to be,so if the
  542|       |     the valid length is lessthan 5 the caller need to duplicate accordingly*/
  543|       |
  544|       |    /*u1_max_bins is always greater or equal to 9 we have the check for u1_max_bins only after the 2 loop*/
  545|   475k|    u4_value = 0;
  546|   475k|    u4_code_int_range = ps_cab_env->u4_code_int_range;
  547|   475k|    u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst;
  548|       |
  549|   475k|    do
  550|   663k|    {
  551|   663k|        u4_ctx_Inc = u4_ctx_inc & 0xf;
  552|   663k|        u4_ctx_inc = u4_ctx_inc >> 4;
  553|       |
  554|   663k|        ps_bin_ctxt = ps_src_bin_ctxt + u4_ctx_Inc;
  555|       |
  556|   663k|        DECODE_ONE_BIN_MACRO(ps_bin_ctxt, u4_code_int_range, u4_code_int_val_ofst,
  ------------------
  |  |  217|   663k|                     p_DecBitStream_arg,u4_symbol)                                           \
  |  |  218|   663k|{                                                                                       \
  |  |  219|   663k|    bin_ctxt_model_t *p_binCtxt_m = (bin_ctxt_model_t *) p_binCtxt_arg;                           \
  |  |  220|   663k|    dec_bit_stream_t *p_DecBitStream_m = (dec_bit_stream_t *) p_DecBitStream_arg;                 \
  |  |  221|   663k|    const UWORD32 *pu4_table_m = (const UWORD32 *) pu4_table_arg;                         \
  |  |  222|   663k|                                                                                        \
  |  |  223|   663k|    UWORD32 u4_quantCodeIntRange_m,u4_codeIntRangeLPS_m;                                    \
  |  |  224|   663k|    UWORD32 u1_mps_state_m;                                                               \
  |  |  225|   663k|    UWORD32 table_lookup_m;                                                               \
  |  |  226|   663k|    UWORD32 u4_clz_m;                                                                     \
  |  |  227|   663k|                                                                                        \
  |  |  228|   663k|    u1_mps_state_m = (p_binCtxt_m->u1_mps_state);                                           \
  |  |  229|   663k|    u4_clz_m = CLZ(u4_code_int_range);                                                  \
  |  |  230|   663k|    u4_quantCodeIntRange_m = u4_code_int_range << u4_clz_m;                                   \
  |  |  231|   663k|    u4_quantCodeIntRange_m = (u4_quantCodeIntRange_m >> 29) & 0x3;                          \
  |  |  232|   663k|    table_lookup_m = pu4_table_m[(u1_mps_state_m << 2)+u4_quantCodeIntRange_m];                 \
  |  |  233|   663k|    u4_codeIntRangeLPS_m = table_lookup_m & 0xff;                                           \
  |  |  234|   663k|                                                                                        \
  |  |  235|   663k|    u4_codeIntRangeLPS_m = u4_codeIntRangeLPS_m << (23 - u4_clz_m);                           \
  |  |  236|   663k|    u4_code_int_range = u4_code_int_range - u4_codeIntRangeLPS_m;                             \
  |  |  237|   663k|    u4_symbol = ((u1_mps_state_m>> 6) & 0x1);                                             \
  |  |  238|   663k|    /*if mps*/                                                                          \
  |  |  239|   663k|    u1_mps_state_m = (table_lookup_m >> 8) & 0x7F;                                          \
  |  |  240|   663k|    if(u4_code_int_val_ofst >= u4_code_int_range)                                          \
  |  |  ------------------
  |  |  |  Branch (240:8): [True: 94.4k, False: 569k]
  |  |  ------------------
  |  |  241|   663k|  {                                                                                     \
  |  |  242|  94.4k|                                                                                        \
  |  |  243|  94.4k|    u4_symbol = 1 - u4_symbol;                                                          \
  |  |  244|  94.4k|    u4_code_int_val_ofst -= u4_code_int_range;                                             \
  |  |  245|  94.4k|    u4_code_int_range = u4_codeIntRangeLPS_m;                                               \
  |  |  246|  94.4k|    u1_mps_state_m = (table_lookup_m >> 15) & 0x7F;                                         \
  |  |  247|  94.4k|  }                                                                                     \
  |  |  248|   663k|    if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_8)                                        \
  |  |  ------------------
  |  |  |  |  113|  1.32M|#define ONE_RIGHT_SHIFTED_BY_8 1<<8
  |  |  ------------------
  |  |  |  Branch (248:8): [True: 4.45k, False: 659k]
  |  |  ------------------
  |  |  249|   663k|    {                                                                                   \
  |  |  250|  4.45k|        UWORD32 *pu4_buffer,u4_offset;                                                  \
  |  |  251|  4.45k|        UWORD32 read_bits,u4_clz_m  ;                                                     \
  |  |  252|  4.45k|                                                                                        \
  |  |  253|  4.45k|        pu4_buffer = p_DecBitStream_m->pu4_buffer;                                         \
  |  |  254|  4.45k|        u4_offset = p_DecBitStream_m->u4_ofst;                                          \
  |  |  255|  4.45k|        u4_clz_m = CLZ(u4_code_int_range);                                              \
  |  |  256|  4.45k|        NEXTBITS(read_bits,(u4_offset+23),pu4_buffer,u4_clz_m)                            \
  |  |  ------------------
  |  |  |  |  137|  4.45k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  |  |  138|  4.45k|{                                                                           \
  |  |  |  |  139|  4.45k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  140|  4.45k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  141|  4.45k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  142|  4.45k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  143|  4.45k|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (143:8): [True: 4.24k, False: 205]
  |  |  |  |  ------------------
  |  |  |  |  144|  4.45k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  4.24k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  145|  4.45k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  4.45k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  146|  4.45k|}
  |  |  ------------------
  |  |  257|  4.45k|        FLUSHBITS(u4_offset,(u4_clz_m))                                                   \
  |  |  ------------------
  |  |  |  |  193|  4.45k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  |  |  194|  4.45k|{                                                                           \
  |  |  |  |  195|  4.45k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  |  |  196|  4.45k|}
  |  |  ------------------
  |  |  258|  4.45k|        u4_code_int_range = u4_code_int_range << u4_clz_m;                                    \
  |  |  259|  4.45k|        u4_code_int_val_ofst= (u4_code_int_val_ofst << u4_clz_m) | read_bits;               \
  |  |  260|  4.45k|                                                                                        \
  |  |  261|  4.45k|                                                                                        \
  |  |  262|  4.45k|        p_DecBitStream_m->u4_ofst = u4_offset;                                          \
  |  |  263|  4.45k|    }                                                                                   \
  |  |  264|   663k|    p_binCtxt_m->u1_mps_state = u1_mps_state_m;                                             \
  |  |  265|   663k|}
  ------------------
  557|   663k|                             pu4_table, ps_bitstrm, u4_symbol)
  558|       |
  559|   663k|        INC_BIN_COUNT(ps_cab_env);INC_DECISION_BINS(ps_cab_env);
  560|       |
  561|   663k|        u4_value++;
  562|       |
  563|   663k|    }
  564|   663k|    while(u4_symbol && u4_value < 4);
  ------------------
  |  Branch (564:11): [True: 194k, False: 468k]
  |  Branch (564:24): [True: 188k, False: 6.66k]
  ------------------
  565|       |
  566|   475k|    if(u4_symbol && (u4_value < u1_max_bins))
  ------------------
  |  Branch (566:8): [True: 6.66k, False: 468k]
  |  Branch (566:21): [True: 6.66k, False: 0]
  ------------------
  567|  6.66k|    {
  568|       |
  569|  6.66k|        u4_ctx_Inc = u4_ctx_inc & 0xf;
  570|       |
  571|  6.66k|        ps_bin_ctxt = ps_src_bin_ctxt + u4_ctx_Inc;
  572|       |
  573|  6.66k|        do
  574|  55.1k|        {
  575|       |
  576|  55.1k|            DECODE_ONE_BIN_MACRO(ps_bin_ctxt, u4_code_int_range, u4_code_int_val_ofst,
  ------------------
  |  |  217|  55.1k|                     p_DecBitStream_arg,u4_symbol)                                           \
  |  |  218|  55.1k|{                                                                                       \
  |  |  219|  55.1k|    bin_ctxt_model_t *p_binCtxt_m = (bin_ctxt_model_t *) p_binCtxt_arg;                           \
  |  |  220|  55.1k|    dec_bit_stream_t *p_DecBitStream_m = (dec_bit_stream_t *) p_DecBitStream_arg;                 \
  |  |  221|  55.1k|    const UWORD32 *pu4_table_m = (const UWORD32 *) pu4_table_arg;                         \
  |  |  222|  55.1k|                                                                                        \
  |  |  223|  55.1k|    UWORD32 u4_quantCodeIntRange_m,u4_codeIntRangeLPS_m;                                    \
  |  |  224|  55.1k|    UWORD32 u1_mps_state_m;                                                               \
  |  |  225|  55.1k|    UWORD32 table_lookup_m;                                                               \
  |  |  226|  55.1k|    UWORD32 u4_clz_m;                                                                     \
  |  |  227|  55.1k|                                                                                        \
  |  |  228|  55.1k|    u1_mps_state_m = (p_binCtxt_m->u1_mps_state);                                           \
  |  |  229|  55.1k|    u4_clz_m = CLZ(u4_code_int_range);                                                  \
  |  |  230|  55.1k|    u4_quantCodeIntRange_m = u4_code_int_range << u4_clz_m;                                   \
  |  |  231|  55.1k|    u4_quantCodeIntRange_m = (u4_quantCodeIntRange_m >> 29) & 0x3;                          \
  |  |  232|  55.1k|    table_lookup_m = pu4_table_m[(u1_mps_state_m << 2)+u4_quantCodeIntRange_m];                 \
  |  |  233|  55.1k|    u4_codeIntRangeLPS_m = table_lookup_m & 0xff;                                           \
  |  |  234|  55.1k|                                                                                        \
  |  |  235|  55.1k|    u4_codeIntRangeLPS_m = u4_codeIntRangeLPS_m << (23 - u4_clz_m);                           \
  |  |  236|  55.1k|    u4_code_int_range = u4_code_int_range - u4_codeIntRangeLPS_m;                             \
  |  |  237|  55.1k|    u4_symbol = ((u1_mps_state_m>> 6) & 0x1);                                             \
  |  |  238|  55.1k|    /*if mps*/                                                                          \
  |  |  239|  55.1k|    u1_mps_state_m = (table_lookup_m >> 8) & 0x7F;                                          \
  |  |  240|  55.1k|    if(u4_code_int_val_ofst >= u4_code_int_range)                                          \
  |  |  ------------------
  |  |  |  Branch (240:8): [True: 5.40k, False: 49.7k]
  |  |  ------------------
  |  |  241|  55.1k|  {                                                                                     \
  |  |  242|  5.40k|                                                                                        \
  |  |  243|  5.40k|    u4_symbol = 1 - u4_symbol;                                                          \
  |  |  244|  5.40k|    u4_code_int_val_ofst -= u4_code_int_range;                                             \
  |  |  245|  5.40k|    u4_code_int_range = u4_codeIntRangeLPS_m;                                               \
  |  |  246|  5.40k|    u1_mps_state_m = (table_lookup_m >> 15) & 0x7F;                                         \
  |  |  247|  5.40k|  }                                                                                     \
  |  |  248|  55.1k|    if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_8)                                        \
  |  |  ------------------
  |  |  |  |  113|   110k|#define ONE_RIGHT_SHIFTED_BY_8 1<<8
  |  |  ------------------
  |  |  |  Branch (248:8): [True: 916, False: 54.2k]
  |  |  ------------------
  |  |  249|  55.1k|    {                                                                                   \
  |  |  250|    916|        UWORD32 *pu4_buffer,u4_offset;                                                  \
  |  |  251|    916|        UWORD32 read_bits,u4_clz_m  ;                                                     \
  |  |  252|    916|                                                                                        \
  |  |  253|    916|        pu4_buffer = p_DecBitStream_m->pu4_buffer;                                         \
  |  |  254|    916|        u4_offset = p_DecBitStream_m->u4_ofst;                                          \
  |  |  255|    916|        u4_clz_m = CLZ(u4_code_int_range);                                              \
  |  |  256|    916|        NEXTBITS(read_bits,(u4_offset+23),pu4_buffer,u4_clz_m)                            \
  |  |  ------------------
  |  |  |  |  137|    916|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  |  |  138|    916|{                                                                           \
  |  |  |  |  139|    916|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  140|    916|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  141|    916|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  142|    916|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  143|    916|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (143:8): [True: 649, False: 267]
  |  |  |  |  ------------------
  |  |  |  |  144|    916|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|    649|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  145|    916|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|    916|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  146|    916|}
  |  |  ------------------
  |  |  257|    916|        FLUSHBITS(u4_offset,(u4_clz_m))                                                   \
  |  |  ------------------
  |  |  |  |  193|    916|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  |  |  194|    916|{                                                                           \
  |  |  |  |  195|    916|        (u4_offset) += (u4_no_bits);                                        \
  |  |  |  |  196|    916|}
  |  |  ------------------
  |  |  258|    916|        u4_code_int_range = u4_code_int_range << u4_clz_m;                                    \
  |  |  259|    916|        u4_code_int_val_ofst= (u4_code_int_val_ofst << u4_clz_m) | read_bits;               \
  |  |  260|    916|                                                                                        \
  |  |  261|    916|                                                                                        \
  |  |  262|    916|        p_DecBitStream_m->u4_ofst = u4_offset;                                          \
  |  |  263|    916|    }                                                                                   \
  |  |  264|  55.1k|    p_binCtxt_m->u1_mps_state = u1_mps_state_m;                                             \
  |  |  265|  55.1k|}
  ------------------
  577|  55.1k|                                 pu4_table, ps_bitstrm, u4_symbol)
  578|       |
  579|  55.1k|            INC_BIN_COUNT(ps_cab_env);INC_DECISION_BINS(ps_cab_env);
  580|       |
  581|  55.1k|            u4_value++;
  582|       |
  583|  55.1k|        }
  584|  55.1k|        while(u4_symbol && (u4_value < u1_max_bins));
  ------------------
  |  Branch (584:15): [True: 50.2k, False: 4.85k]
  |  Branch (584:28): [True: 48.4k, False: 1.81k]
  ------------------
  585|       |
  586|  6.66k|    }
  587|       |
  588|   475k|    ps_cab_env->u4_code_int_range = u4_code_int_range;
  589|   475k|    ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst;
  590|       |
  591|   475k|    u4_value = u4_value - 1 + u4_symbol;
  592|       |
  593|   475k|    return (u4_value);
  594|       |
  595|   475k|}
ih264d_decode_bypass_bins_unary:
  622|   229k|{
  623|   229k|    UWORD32 u4_value;
  624|   229k|    UWORD32 u4_bin;
  625|   229k|    UWORD32 u4_code_int_val_ofst, u4_code_int_range;
  626|       |
  627|   229k|    UWORD32 u1_max_bins;
  628|       |
  629|   229k|    u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst;
  630|   229k|    u4_code_int_range = ps_cab_env->u4_code_int_range;
  631|       |
  632|   229k|    if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_9)
  ------------------
  |  |  114|   229k|#define ONE_RIGHT_SHIFTED_BY_9    1<<9
  ------------------
  |  Branch (632:8): [True: 3.71k, False: 225k]
  ------------------
  633|  3.71k|    {
  634|  3.71k|        UWORD32 *pu4_buffer, u4_offset;
  635|       |
  636|  3.71k|        pu4_buffer = ps_bitstrm->pu4_buffer;
  637|  3.71k|        u4_offset = ps_bitstrm->u4_ofst;
  638|       |
  639|  3.71k|        RENORM_RANGE_OFFSET(u4_code_int_range, u4_code_int_val_ofst, u4_offset,
  ------------------
  |  |  170|  3.71k|  {                                                                                         \
  |  |  171|  3.71k|    UWORD32 read_bits_m,u4_clz_m  ;                                                         \
  |  |  172|  3.71k|    u4_clz_m = CLZ(u4_codeIntRange_m);                                                  \
  |  |  173|  3.71k|    NEXTBITS(read_bits_m,(u4_offset_m+23),pu4_buffer_m,u4_clz_m)                            \
  |  |  ------------------
  |  |  |  |  137|  3.71k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  |  |  138|  3.71k|{                                                                           \
  |  |  |  |  139|  3.71k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  140|  3.71k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  141|  3.71k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  142|  3.71k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  143|  3.71k|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (143:8): [True: 3.56k, False: 158]
  |  |  |  |  ------------------
  |  |  |  |  144|  3.71k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  3.56k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  145|  3.71k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  3.71k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  146|  3.71k|}
  |  |  ------------------
  |  |  174|  3.71k|    FLUSHBITS(u4_offset_m,(u4_clz_m))                                                       \
  |  |  ------------------
  |  |  |  |  193|  3.71k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  |  |  194|  3.71k|{                                                                           \
  |  |  |  |  195|  3.71k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  |  |  196|  3.71k|}
  |  |  ------------------
  |  |  175|  3.71k|    u4_codeIntRange_m = u4_codeIntRange_m << u4_clz_m;                                      \
  |  |  176|  3.71k|    u4_codeIntValOffset_m = (u4_codeIntValOffset_m << u4_clz_m) | read_bits_m;              \
  |  |  177|  3.71k|  }
  ------------------
  640|  3.71k|                            pu4_buffer)
  641|  3.71k|        ps_bitstrm->u4_ofst = u4_offset;
  642|  3.71k|    }
  643|       |
  644|       |    /*as it is called only form mvd*/
  645|   229k|    u1_max_bins = 32;
  646|   229k|    u4_value = 0;
  647|       |
  648|   229k|    do
  649|   274k|    {
  650|   274k|        u4_value++;
  651|       |
  652|   274k|        u4_code_int_range = u4_code_int_range >> 1;
  653|   274k|        if(u4_code_int_val_ofst >= u4_code_int_range)
  ------------------
  |  Branch (653:12): [True: 45.0k, False: 229k]
  ------------------
  654|  45.0k|        {
  655|       |            /* S=1 */
  656|  45.0k|            u4_bin = 1;
  657|  45.0k|            u4_code_int_val_ofst -= u4_code_int_range;
  658|  45.0k|        }
  659|   229k|        else
  660|   229k|        {
  661|       |            /* S=0 */
  662|   229k|            u4_bin = 0;
  663|   229k|        }
  664|       |
  665|   274k|        INC_BIN_COUNT(ps_cab_env);INC_BYPASS_BINS(ps_cab_env);
  666|       |
  667|   274k|        if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_9)
  ------------------
  |  |  114|   274k|#define ONE_RIGHT_SHIFTED_BY_9    1<<9
  ------------------
  |  Branch (667:12): [True: 18.3k, False: 256k]
  ------------------
  668|  18.3k|        {
  669|  18.3k|            UWORD32 *pu4_buffer, u4_offset;
  670|       |
  671|  18.3k|            pu4_buffer = ps_bitstrm->pu4_buffer;
  672|  18.3k|            u4_offset = ps_bitstrm->u4_ofst;
  673|       |
  674|  18.3k|            RENORM_RANGE_OFFSET(u4_code_int_range, u4_code_int_val_ofst, u4_offset,
  ------------------
  |  |  170|  18.3k|  {                                                                                         \
  |  |  171|  18.3k|    UWORD32 read_bits_m,u4_clz_m  ;                                                         \
  |  |  172|  18.3k|    u4_clz_m = CLZ(u4_codeIntRange_m);                                                  \
  |  |  173|  18.3k|    NEXTBITS(read_bits_m,(u4_offset_m+23),pu4_buffer_m,u4_clz_m)                            \
  |  |  ------------------
  |  |  |  |  137|  18.3k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  |  |  138|  18.3k|{                                                                           \
  |  |  |  |  139|  18.3k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  140|  18.3k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  141|  18.3k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  142|  18.3k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  143|  18.3k|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (143:8): [True: 17.2k, False: 1.03k]
  |  |  |  |  ------------------
  |  |  |  |  144|  18.3k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  17.2k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  145|  18.3k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  18.3k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  146|  18.3k|}
  |  |  ------------------
  |  |  174|  18.3k|    FLUSHBITS(u4_offset_m,(u4_clz_m))                                                       \
  |  |  ------------------
  |  |  |  |  193|  18.3k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  |  |  194|  18.3k|{                                                                           \
  |  |  |  |  195|  18.3k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  |  |  196|  18.3k|}
  |  |  ------------------
  |  |  175|  18.3k|    u4_codeIntRange_m = u4_codeIntRange_m << u4_clz_m;                                      \
  |  |  176|  18.3k|    u4_codeIntValOffset_m = (u4_codeIntValOffset_m << u4_clz_m) | read_bits_m;              \
  |  |  177|  18.3k|  }
  ------------------
  675|  18.3k|                                pu4_buffer)
  676|       |
  677|  18.3k|            ps_bitstrm->u4_ofst = u4_offset;
  678|  18.3k|        }
  679|       |
  680|   274k|    }
  681|   274k|    while(u4_bin && (u4_value < u1_max_bins));
  ------------------
  |  Branch (681:11): [True: 45.0k, False: 229k]
  |  Branch (681:21): [True: 45.0k, False: 46]
  ------------------
  682|       |
  683|   229k|    ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst;
  684|   229k|    ps_cab_env->u4_code_int_range = u4_code_int_range;
  685|   229k|    u4_value = (u4_value - 1 + u4_bin);
  686|       |
  687|   229k|return (u4_value);
  688|   229k|}
ih264d_decode_bypass_bins:
  716|   229k|{
  717|   229k|    UWORD32 u4_bins;
  718|   229k|    UWORD32 u4_bin;
  719|   229k|    UWORD32 u4_code_int_val_ofst, u4_code_int_range;
  720|       |
  721|   229k|    u4_bins = 0;
  722|   229k|    u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst;
  723|   229k|    u4_code_int_range = ps_cab_env->u4_code_int_range;
  724|       |
  725|   229k|    if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_9)
  ------------------
  |  |  114|   229k|#define ONE_RIGHT_SHIFTED_BY_9    1<<9
  ------------------
  |  Branch (725:8): [True: 0, False: 229k]
  ------------------
  726|      0|    {
  727|      0|        UWORD32 *pu4_buffer, u4_offset;
  728|       |
  729|      0|        pu4_buffer = ps_bitstrm->pu4_buffer;
  730|      0|        u4_offset = ps_bitstrm->u4_ofst;
  731|       |
  732|      0|        RENORM_RANGE_OFFSET(u4_code_int_range, u4_code_int_val_ofst, u4_offset,
  ------------------
  |  |  170|      0|  {                                                                                         \
  |  |  171|      0|    UWORD32 read_bits_m,u4_clz_m  ;                                                         \
  |  |  172|      0|    u4_clz_m = CLZ(u4_codeIntRange_m);                                                  \
  |  |  173|      0|    NEXTBITS(read_bits_m,(u4_offset_m+23),pu4_buffer_m,u4_clz_m)                            \
  |  |  ------------------
  |  |  |  |  137|      0|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  |  |  138|      0|{                                                                           \
  |  |  |  |  139|      0|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  140|      0|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  141|      0|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  142|      0|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  143|      0|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (143:8): [True: 0, False: 0]
  |  |  |  |  ------------------
  |  |  |  |  144|      0|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|      0|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  145|      0|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|      0|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  146|      0|}
  |  |  ------------------
  |  |  174|      0|    FLUSHBITS(u4_offset_m,(u4_clz_m))                                                       \
  |  |  ------------------
  |  |  |  |  193|      0|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  |  |  194|      0|{                                                                           \
  |  |  |  |  195|      0|        (u4_offset) += (u4_no_bits);                                        \
  |  |  |  |  196|      0|}
  |  |  ------------------
  |  |  175|      0|    u4_codeIntRange_m = u4_codeIntRange_m << u4_clz_m;                                      \
  |  |  176|      0|    u4_codeIntValOffset_m = (u4_codeIntValOffset_m << u4_clz_m) | read_bits_m;              \
  |  |  177|      0|  }
  ------------------
  733|      0|                            pu4_buffer)
  734|      0|        ps_bitstrm->u4_ofst = u4_offset;
  735|      0|    }
  736|       |
  737|   229k|    do
  738|   733k|    {
  739|       |
  740|   733k|        u4_code_int_range = u4_code_int_range >> 1;
  741|       |
  742|   733k|        if(u4_code_int_val_ofst >= u4_code_int_range)
  ------------------
  |  Branch (742:12): [True: 89.3k, False: 644k]
  ------------------
  743|  89.3k|        {
  744|       |            /* S=1 */
  745|  89.3k|            u4_bin = 1;
  746|  89.3k|            u4_code_int_val_ofst -= u4_code_int_range;
  747|  89.3k|        }
  748|   644k|        else
  749|   644k|        {
  750|       |            /* S=0 */
  751|   644k|            u4_bin = 0;
  752|   644k|        }
  753|       |
  754|   733k|        INC_BIN_COUNT(ps_cab_env);INC_BYPASS_BINS(ps_cab_env);
  755|       |
  756|   733k|        u4_bins = ((u4_bins << 1) | u4_bin);
  757|   733k|        u1_max_bins--;
  758|       |
  759|   733k|        if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_9)
  ------------------
  |  |  114|   733k|#define ONE_RIGHT_SHIFTED_BY_9    1<<9
  ------------------
  |  Branch (759:12): [True: 24.8k, False: 708k]
  ------------------
  760|  24.8k|        {
  761|  24.8k|            UWORD32 *pu4_buffer, u4_offset;
  762|       |
  763|  24.8k|            pu4_buffer = ps_bitstrm->pu4_buffer;
  764|  24.8k|            u4_offset = ps_bitstrm->u4_ofst;
  765|       |
  766|  24.8k|            RENORM_RANGE_OFFSET(u4_code_int_range, u4_code_int_val_ofst, u4_offset,
  ------------------
  |  |  170|  24.8k|  {                                                                                         \
  |  |  171|  24.8k|    UWORD32 read_bits_m,u4_clz_m  ;                                                         \
  |  |  172|  24.8k|    u4_clz_m = CLZ(u4_codeIntRange_m);                                                  \
  |  |  173|  24.8k|    NEXTBITS(read_bits_m,(u4_offset_m+23),pu4_buffer_m,u4_clz_m)                            \
  |  |  ------------------
  |  |  |  |  137|  24.8k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  |  |  138|  24.8k|{                                                                           \
  |  |  |  |  139|  24.8k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  140|  24.8k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  141|  24.8k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  142|  24.8k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  143|  24.8k|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (143:8): [True: 23.7k, False: 1.09k]
  |  |  |  |  ------------------
  |  |  |  |  144|  24.8k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  23.7k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  145|  24.8k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  24.8k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  146|  24.8k|}
  |  |  ------------------
  |  |  174|  24.8k|    FLUSHBITS(u4_offset_m,(u4_clz_m))                                                       \
  |  |  ------------------
  |  |  |  |  193|  24.8k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  |  |  194|  24.8k|{                                                                           \
  |  |  |  |  195|  24.8k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  |  |  196|  24.8k|}
  |  |  ------------------
  |  |  175|  24.8k|    u4_codeIntRange_m = u4_codeIntRange_m << u4_clz_m;                                      \
  |  |  176|  24.8k|    u4_codeIntValOffset_m = (u4_codeIntValOffset_m << u4_clz_m) | read_bits_m;              \
  |  |  177|  24.8k|  }
  ------------------
  767|  24.8k|                                pu4_buffer)
  768|  24.8k|            ps_bitstrm->u4_ofst = u4_offset;
  769|  24.8k|        }
  770|       |
  771|   733k|    }
  772|   733k|    while(u1_max_bins);
  ------------------
  |  Branch (772:11): [True: 503k, False: 229k]
  ------------------
  773|       |
  774|   229k|    ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst;
  775|   229k|    ps_cab_env->u4_code_int_range = u4_code_int_range;
  776|       |
  777|   229k|    return (u4_bins);
  778|   229k|}

ih264d_update_csbp_8x8:
   32|   394k|{
   33|   394k|    UWORD16 u2_mod_csbp;
   34|       |
   35|   394k|    u2_mod_csbp = u2_luma_csbp;
   36|       |
   37|   394k|    if(u2_mod_csbp & 0x0033)
  ------------------
  |  Branch (37:8): [True: 40.1k, False: 354k]
  ------------------
   38|  40.1k|    {
   39|  40.1k|        u2_mod_csbp |= 0x0033;
   40|  40.1k|    }
   41|       |
   42|   394k|    if(u2_mod_csbp & 0x00CC)
  ------------------
  |  Branch (42:8): [True: 39.0k, False: 355k]
  ------------------
   43|  39.0k|    {
   44|  39.0k|        u2_mod_csbp |= 0x00CC;
   45|  39.0k|    }
   46|       |
   47|   394k|    if(u2_mod_csbp & 0x3300)
  ------------------
  |  Branch (47:8): [True: 40.4k, False: 353k]
  ------------------
   48|  40.4k|    {
   49|  40.4k|        u2_mod_csbp |= 0x3300;
   50|  40.4k|    }
   51|       |
   52|   394k|    if(u2_mod_csbp & 0xCC00)
  ------------------
  |  Branch (52:8): [True: 39.4k, False: 354k]
  ------------------
   53|  39.4k|    {
   54|  39.4k|        u2_mod_csbp |= 0xCC00;
   55|  39.4k|    }
   56|       |
   57|   394k|    return u2_mod_csbp;
   58|   394k|}
ih264d_fill_bs2_horz_vert:
  115|  6.72M|{
  116|       |    /*************************************************************************/
  117|       |    /*u4_nbr_horz_csbp=11C|10C|9C|8C|7C|6C|5C|4C|3C|2C|1C|0C|15T|14T|13T|12T */
  118|       |    /*************************************************************************/
  119|  6.72M|    UWORD32 u4_nbr_horz_csbp = (u4_cur_mb_csbp << 4) | (u4_top_mb_csbp >> 12);
  120|  6.72M|    UWORD32 u4_horz_bs2_dec = u4_cur_mb_csbp | u4_nbr_horz_csbp;
  121|       |
  122|       |    /*************************************************************************/
  123|       |    /*u4_left_mb_masked_csbp = 15L|0|0|0|11L|0|0|0|7L|0|0|0|3L|0|0|0         */
  124|       |    /*************************************************************************/
  125|  6.72M|    UWORD32 u4_left_mb_masked_csbp = u4_left_mb_csbp & CSBP_RIGHT_BLOCK_MASK;
  ------------------
  |  |  108|  6.72M|#define CSBP_RIGHT_BLOCK_MASK 0x8888
  ------------------
  126|       |
  127|       |    /*************************************************************************/
  128|       |    /*u4_cur_mb_masked_csbp =14C|13C|12C|x|10C|9C|8C|x|6C|5C|4C|x|2C|1C|0C|x */
  129|       |    /*************************************************************************/
  130|  6.72M|    UWORD32 u4_cur_mb_masked_csbp = (u4_cur_mb_csbp << 1)
  131|  6.72M|                    & (~CSBP_LEFT_BLOCK_MASK);
  ------------------
  |  |  107|  6.72M|#define CSBP_LEFT_BLOCK_MASK 0x1111
  ------------------
  132|       |
  133|       |    /*************************************************************************/
  134|       |    /*u4_nbr_vert_csbp=14C|13C|12C|15L|10C|9C|8C|11L|6C|5C|4C|7L|2C|1C|0C|3L */
  135|       |    /*************************************************************************/
  136|  6.72M|    UWORD32 u4_nbr_vert_csbp = (u4_cur_mb_masked_csbp)
  137|  6.72M|                    | (u4_left_mb_masked_csbp >> 3);
  138|       |
  139|  6.72M|    UWORD32 u4_vert_bs2_dec = u4_cur_mb_csbp | u4_nbr_vert_csbp;
  140|       |
  141|  6.72M|    UWORD32 u4_reordered_vert_bs2_dec, u4_temp;
  142|       |
  143|  6.72M|    PROFILE_DISABLE_BOUNDARY_STRENGTH()
  ------------------
  |  |  125|  6.72M|#define PROFILE_DISABLE_BOUNDARY_STRENGTH() ;
  ------------------
  144|       |
  145|       |    /*************************************************************************/
  146|       |    /* Fill horz edges (0,1,2,3) boundary strengths 2 using look up table    */
  147|       |    /*************************************************************************/
  148|  6.72M|    pu4_bs[0] = pu4_packed_bs2[u4_horz_bs2_dec & 0xF];
  149|  6.72M|    pu4_bs[1] = pu4_packed_bs2[(u4_horz_bs2_dec >> 4) & 0xF];
  150|  6.72M|    pu4_bs[2] = pu4_packed_bs2[(u4_horz_bs2_dec >> 8) & 0xF];
  151|  6.72M|    pu4_bs[3] = pu4_packed_bs2[(u4_horz_bs2_dec >> 12) & 0xF];
  152|       |
  153|       |    /*************************************************************************/
  154|       |    /* Do 4x4 tranpose of u4_vert_bs2_dec by using look up table for reorder */
  155|       |    /*************************************************************************/
  156|  6.72M|    u4_reordered_vert_bs2_dec = pu2_4x4_v2h_reorder[u4_vert_bs2_dec & 0xF];
  157|  6.72M|    u4_temp = pu2_4x4_v2h_reorder[(u4_vert_bs2_dec >> 4) & 0xF];
  158|  6.72M|    u4_reordered_vert_bs2_dec |= (u4_temp << 1);
  159|  6.72M|    u4_temp = pu2_4x4_v2h_reorder[(u4_vert_bs2_dec >> 8) & 0xF];
  160|  6.72M|    u4_reordered_vert_bs2_dec |= (u4_temp << 2);
  161|  6.72M|    u4_temp = pu2_4x4_v2h_reorder[(u4_vert_bs2_dec >> 12) & 0xF];
  162|  6.72M|    u4_reordered_vert_bs2_dec |= (u4_temp << 3);
  163|       |
  164|       |    /*************************************************************************/
  165|       |    /* Fill vert edges (4,5,6,7) boundary strengths 2 using look up table    */
  166|       |    /*************************************************************************/
  167|  6.72M|    pu4_bs[4] = pu4_packed_bs2[u4_reordered_vert_bs2_dec & 0xF];
  168|  6.72M|    pu4_bs[5] = pu4_packed_bs2[(u4_reordered_vert_bs2_dec >> 4) & 0xF];
  169|  6.72M|    pu4_bs[6] = pu4_packed_bs2[(u4_reordered_vert_bs2_dec >> 8) & 0xF];
  170|  6.72M|    pu4_bs[7] = pu4_packed_bs2[(u4_reordered_vert_bs2_dec >> 12) & 0xF];
  171|  6.72M|}
ih264d_fill_bs1_16x16mb_pslice:
  206|  2.98M|{
  207|  2.98M|    WORD16 i2_q_mv0, i2_q_mv1;
  208|  2.98M|    WORD16 i2_p_mv0, i2_p_mv1;
  209|  2.98M|    void *pv_cur_pic_addr0, *pv_cur_pic_addr1;
  210|  2.98M|    void *pv_nbr_pic_addr0, *pv_nbr_pic_addr1;
  211|  2.98M|    void **ppv_map_ref_idx_to_poc_l0; //,*ppv_map_ref_idx_to_poc_l1;
  212|  2.98M|    UWORD32 i;
  213|  2.98M|    UWORD32 u4_bs_horz = pu4_bs_table[0];
  214|  2.98M|    UWORD32 u4_bs_vert = pu4_bs_table[4];
  215|       |
  216|  2.98M|    PROFILE_DISABLE_BOUNDARY_STRENGTH()
  ------------------
  |  |  125|  2.98M|#define PROFILE_DISABLE_BOUNDARY_STRENGTH() ;
  ------------------
  217|       |
  218|  2.98M|    ppv_map_ref_idx_to_poc_l0 = ppv_map_ref_idx_to_poc;
  219|       |
  220|  2.98M|    i2_q_mv0 = ps_cur_mv_pred->i2_mv[0];
  221|  2.98M|    i2_q_mv1 = ps_cur_mv_pred->i2_mv[1];
  222|  2.98M|    pv_cur_pic_addr0 = ppv_map_ref_idx_to_poc_l0[ps_cur_mv_pred->i1_ref_frame[0]];
  223|  2.98M|    pv_cur_pic_addr1 = 0;
  224|       |
  225|       |    /*********************************/
  226|       |    /* Computing Bs for the top edge */
  227|       |    /*********************************/
  228|  14.9M|    for(i = 0; i < 4; i++, ps_top_mv_pred++)
  ------------------
  |  Branch (228:16): [True: 11.9M, False: 2.98M]
  ------------------
  229|  11.9M|    {
  230|  11.9M|        UWORD32 u4_idx = 24 - (i << 3);
  231|       |
  232|       |        /*********************************/
  233|       |        /* check if Bs is already set    */
  234|       |        /*********************************/
  235|  11.9M|        if(!((u4_bs_horz >> u4_idx) & 0xf))
  ------------------
  |  Branch (235:12): [True: 11.7M, False: 208k]
  ------------------
  236|  11.7M|        {
  237|       |            /************************************************************/
  238|       |            /* If Bs is not set, use left edge and current edge mvs and */
  239|       |            /* reference pictures addresses to evaluate Bs==1           */
  240|       |            /************************************************************/
  241|  11.7M|            UWORD32 u4_bs_temp1;
  242|  11.7M|            UWORD32 u4_bs;
  243|       |
  244|       |            /*********************************************************/
  245|       |            /* If any motion vector component differs by more than 1 */
  246|       |            /* integer pel or if reference pictures are different Bs */
  247|       |            /* is set to 1. Note that this condition shall be met for*/
  248|       |            /* both (fwd-fwd,bwd-bwd) and (fwd-bwd,bwd-fwd) direction*/
  249|       |            /*********************************************************/
  250|  11.7M|            i2_p_mv0 = ps_top_mv_pred->i2_mv[0];
  251|  11.7M|            i2_p_mv1 = ps_top_mv_pred->i2_mv[1];
  252|  11.7M|            pv_nbr_pic_addr0 = u4_pic_addrress[i & 2];
  253|  11.7M|            pv_nbr_pic_addr1 = u4_pic_addrress[1 + (i & 2)];
  254|       |
  255|  11.7M|            u4_bs_temp1 = ((ABS((i2_p_mv0 - i2_q_mv0)) >= 4) ||
  ------------------
  |  |  100|  11.7M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 76.2k, False: 11.6M]
  |  |  ------------------
  ------------------
  |  Branch (255:28): [True: 65.3k, False: 11.6M]
  ------------------
  256|  11.6M|                           (ABS((i2_p_mv1 - i2_q_mv1)) >= i4_ver_mvlimit));
  ------------------
  |  |  100|  11.6M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 49.2k, False: 11.6M]
  |  |  ------------------
  ------------------
  |  Branch (256:28): [True: 39.1k, False: 11.6M]
  ------------------
  257|       |
  258|  11.7M|            u4_bs = ((pv_cur_pic_addr0 != pv_nbr_pic_addr0)
  ------------------
  |  Branch (258:22): [True: 506k, False: 11.2M]
  ------------------
  259|  11.2M|                            || (pv_cur_pic_addr1 != pv_nbr_pic_addr1)
  ------------------
  |  Branch (259:32): [True: 4.35k, False: 11.2M]
  ------------------
  260|  11.2M|                            || u4_bs_temp1);
  ------------------
  |  Branch (260:32): [True: 90.5k, False: 11.1M]
  ------------------
  261|       |
  262|  11.7M|            u4_bs_horz |= (u4_bs << u4_idx);
  263|  11.7M|        }
  264|  11.9M|    }
  265|  2.98M|    pu4_bs_table[0] = u4_bs_horz;
  266|       |
  267|       |    /***********************************/
  268|       |    /* Computing Bs for the left edge  */
  269|       |    /***********************************/
  270|  14.9M|    for(i = 0; i < 4; i++, ps_leftmost_mv_pred += 4)
  ------------------
  |  Branch (270:16): [True: 11.9M, False: 2.98M]
  ------------------
  271|  11.9M|    {
  272|  11.9M|        UWORD32 u4_idx = 24 - (i << 3);
  273|       |
  274|       |        /*********************************/
  275|       |        /* check if Bs is already set    */
  276|       |        /*********************************/
  277|  11.9M|        if(!((u4_bs_vert >> u4_idx) & 0xf))
  ------------------
  |  Branch (277:12): [True: 11.8M, False: 94.0k]
  ------------------
  278|  11.8M|        {
  279|       |            /****************************************************/
  280|       |            /* If Bs is not set, evalaute conditions for Bs=1   */
  281|       |            /****************************************************/
  282|  11.8M|            UWORD32 u4_bs_temp1;
  283|  11.8M|            UWORD32 u4_bs;
  284|       |            /*********************************************************/
  285|       |            /* If any motion vector component differs by more than 1 */
  286|       |            /* integer pel or if reference pictures are different Bs */
  287|       |            /* is set to 1. Note that this condition shall be met for*/
  288|       |            /* both (fwd-fwd,bwd-bwd) and (fwd-bwd,bwd-fwd) direction*/
  289|       |            /*********************************************************/
  290|       |
  291|  11.8M|            i2_p_mv0 = ps_leftmost_mv_pred->i2_mv[0];
  292|  11.8M|            i2_p_mv1 = ps_leftmost_mv_pred->i2_mv[1];
  293|  11.8M|            pv_nbr_pic_addr0 = ps_left_addr->u4_add[i & 2];
  294|  11.8M|            pv_nbr_pic_addr1 = ps_left_addr->u4_add[1 + (i & 2)];
  295|       |
  296|  11.8M|            u4_bs_temp1 =
  297|  11.8M|                            ((ABS((i2_p_mv0 - i2_q_mv0))
  ------------------
  |  |  100|  11.8M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 54.3k, False: 11.7M]
  |  |  ------------------
  ------------------
  298|  11.8M|                                            >= 4)
  299|  11.8M|                                            | (ABS((i2_p_mv1 - i2_q_mv1))
  ------------------
  |  |  100|  11.8M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 49.4k, False: 11.7M]
  |  |  ------------------
  ------------------
  300|  11.8M|                                                            >= i4_ver_mvlimit));
  301|       |
  302|  11.8M|            u4_bs = ((pv_cur_pic_addr0 != pv_nbr_pic_addr0)
  ------------------
  |  Branch (302:22): [True: 50.1k, False: 11.7M]
  ------------------
  303|  11.7M|                            || (pv_cur_pic_addr1 != pv_nbr_pic_addr1)
  ------------------
  |  Branch (303:32): [True: 1.95k, False: 11.7M]
  ------------------
  304|  11.7M|                            || u4_bs_temp1);
  ------------------
  |  Branch (304:32): [True: 83.4k, False: 11.7M]
  ------------------
  305|       |
  306|  11.8M|            u4_bs_vert |= (u4_bs << u4_idx);
  307|  11.8M|        }
  308|  11.9M|    }
  309|  2.98M|    pu4_bs_table[4] = u4_bs_vert;
  310|       |
  311|  2.98M|    return;
  312|  2.98M|}
ih264d_fill_bs1_non16x16mb_pslice:
  347|  67.5k|{
  348|  67.5k|    UWORD32 edge;
  349|  67.5k|    void **ppv_map_ref_idx_to_poc_l0; //,*ppv_map_ref_idx_to_poc_l1;
  350|       |
  351|  67.5k|    PROFILE_DISABLE_BOUNDARY_STRENGTH()
  ------------------
  |  |  125|  67.5k|#define PROFILE_DISABLE_BOUNDARY_STRENGTH() ;
  ------------------
  352|       |
  353|  67.5k|    ppv_map_ref_idx_to_poc_l0 = ppv_map_ref_idx_to_poc;
  354|       |
  355|       |
  356|   337k|    for(edge = 0; edge < 4; edge++, ps_top_mv_pred = ps_cur_mv_pred - 4)
  ------------------
  |  Branch (356:19): [True: 270k, False: 67.5k]
  ------------------
  357|   270k|    {
  358|       |        /*********************************************************************/
  359|       |        /* Each iteration of this loop fills the four BS values of one HORIZ */
  360|       |        /* edge and one BS value for each of the four VERT edges.            */
  361|       |        /*********************************************************************/
  362|   270k|        WORD32 i;
  363|   270k|        UWORD32 u4_vert_idx = 24 - (edge << 3);
  364|   270k|        UWORD32 u4_bs_horz = pu4_bs_table[edge];
  365|   270k|        mv_pred_t *ps_left_mv_pred = ps_leftmost_mv_pred + (edge << 2);
  366|       |
  367|  1.35M|        for(i = 0; i < 4; i++, ps_top_mv_pred++, ps_cur_mv_pred++)
  ------------------
  |  Branch (367:20): [True: 1.08M, False: 270k]
  ------------------
  368|  1.08M|        {
  369|  1.08M|            WORD16 i2_cur_mv0, i2_cur_mv1;
  370|  1.08M|            WORD8 i1_cur_ref0;
  371|  1.08M|            void *pv_cur_pic_addr0, *pv_cur_pic_addr1 = 0;
  372|  1.08M|            void *pv_nbr_pic_addr0, *pv_nbr_pic_addr1;
  373|       |
  374|       |            /******************************************************/
  375|       |            /* Each iteration of this inner loop computes a HORIZ */
  376|       |            /* and a VERT BS value for a 4x4 block                */
  377|       |            /******************************************************/
  378|  1.08M|            UWORD32 u4_bs_vert = (pu4_bs_table[i + 4] >> u4_vert_idx) & 0xf;
  379|  1.08M|            UWORD32 u4_horz_idx = 24 - (i << 3);
  380|       |
  381|       |            /*****************************************************/
  382|       |            /* check if vert Bs for this block is already set    */
  383|       |            /*****************************************************/
  384|  1.08M|            if(!u4_bs_vert)
  ------------------
  |  Branch (384:16): [True: 994k, False: 86.3k]
  ------------------
  385|   994k|            {
  386|   994k|                WORD16 i2_left_mv0, i2_left_mv1;
  387|       |                /************************************************************/
  388|       |                /* If Bs is not set, use left edge and current edge mvs and */
  389|       |                /* reference pictures addresses to evaluate Bs==1           */
  390|       |                /************************************************************/
  391|   994k|                i2_left_mv0 = ps_left_mv_pred->i2_mv[0];
  392|   994k|                i2_left_mv1 = ps_left_mv_pred->i2_mv[1];
  393|       |
  394|   994k|                i2_cur_mv0 = ps_cur_mv_pred->i2_mv[0];
  395|   994k|                i2_cur_mv1 = ps_cur_mv_pred->i2_mv[1];
  396|       |
  397|   994k|                i1_cur_ref0 = ps_cur_mv_pred->i1_ref_frame[0];
  398|       |
  399|   994k|                pv_cur_pic_addr0 = ppv_map_ref_idx_to_poc_l0[i1_cur_ref0];
  400|   994k|                if(i)
  ------------------
  |  Branch (400:20): [True: 749k, False: 245k]
  ------------------
  401|   749k|                {
  402|   749k|                    WORD8 i1_left_ref0 = ps_left_mv_pred->i1_ref_frame[0];
  403|   749k|                    pv_nbr_pic_addr0 = ppv_map_ref_idx_to_poc_l0[i1_left_ref0];
  404|   749k|                    pv_nbr_pic_addr1 = 0;
  405|   749k|                }
  406|   245k|                else
  407|   245k|                {
  408|   245k|                    pv_nbr_pic_addr0 = ps_left_addr->u4_add[edge & 2];
  409|   245k|                    pv_nbr_pic_addr1 = ps_left_addr->u4_add[1 + (edge & 2)];
  410|   245k|                }
  411|       |
  412|   994k|                {
  413|   994k|                    UWORD32 u4_bs_temp1;
  414|       |                    /*********************************************************/
  415|       |                    /* If any motion vector component differs by more than 1 */
  416|       |                    /* integer pel or if reference pictures are different Bs */
  417|       |                    /* is set to 1. Note that this condition shall be met for*/
  418|       |                    /* both (fwd-fwd,bwd-bwd) and (fwd-bwd,bwd-fwd) direction*/
  419|       |                    /*********************************************************/
  420|       |
  421|   994k|                    u4_bs_temp1 =
  422|   994k|                                    ((ABS((i2_left_mv0 - i2_cur_mv0))
  ------------------
  |  |  100|   994k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 97.8k, False: 896k]
  |  |  ------------------
  ------------------
  423|   994k|                                                    >= 4)
  424|   994k|                                                    | (ABS((i2_left_mv1
  ------------------
  |  |  100|   994k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 103k, False: 890k]
  |  |  ------------------
  ------------------
  425|   994k|                                                                    - i2_cur_mv1))
  426|   994k|                                                                    >= i4_ver_mvlimit));
  427|       |
  428|   994k|                    u4_bs_vert = ((pv_nbr_pic_addr0 != pv_cur_pic_addr0)
  ------------------
  |  Branch (428:35): [True: 5.95k, False: 988k]
  ------------------
  429|   988k|                                    || (pv_nbr_pic_addr1 != pv_cur_pic_addr1)
  ------------------
  |  Branch (429:40): [True: 135, False: 988k]
  ------------------
  430|   988k|                                    || u4_bs_temp1);
  ------------------
  |  Branch (430:40): [True: 89.4k, False: 898k]
  ------------------
  431|       |
  432|   994k|                    pu4_bs_table[i + 4] |= (u4_bs_vert << u4_vert_idx);
  433|   994k|                }
  434|   994k|            }
  435|       |
  436|       |            /*****************************************************/
  437|       |            /* check if horz Bs for this block is already set    */
  438|       |            /*****************************************************/
  439|  1.08M|            if(!((u4_bs_horz >> u4_horz_idx) & 0xf))
  ------------------
  |  Branch (439:16): [True: 993k, False: 87.3k]
  ------------------
  440|   993k|            {
  441|   993k|                WORD16 i2_top_mv0, i2_top_mv1;
  442|       |                /************************************************************/
  443|       |                /* If Bs is not set, use top edge and current edge mvs and  */
  444|       |                /* reference pictures addresses to evaluate Bs==1           */
  445|       |                /************************************************************/
  446|   993k|                i2_cur_mv0 = ps_cur_mv_pred->i2_mv[0];
  447|   993k|                i2_cur_mv1 = ps_cur_mv_pred->i2_mv[1];
  448|       |
  449|   993k|                i1_cur_ref0 = ps_cur_mv_pred->i1_ref_frame[0];
  450|       |
  451|   993k|                i2_top_mv0 = ps_top_mv_pred->i2_mv[0];
  452|   993k|                i2_top_mv1 = ps_top_mv_pred->i2_mv[1];
  453|       |
  454|   993k|                pv_cur_pic_addr0 = ppv_map_ref_idx_to_poc_l0[i1_cur_ref0];
  455|   993k|                if(edge)
  ------------------
  |  Branch (455:20): [True: 746k, False: 246k]
  ------------------
  456|   746k|                {
  457|   746k|                    WORD8 i1_top_ref0 = ps_top_mv_pred->i1_ref_frame[0];
  458|   746k|                    pv_nbr_pic_addr0 = ppv_map_ref_idx_to_poc_l0[i1_top_ref0];
  459|   746k|                    pv_nbr_pic_addr1 = 0;
  460|   746k|                }
  461|   246k|                else
  462|   246k|                {
  463|   246k|                    pv_nbr_pic_addr0 = u4_pic_addrress[i & 2];
  464|   246k|                    pv_nbr_pic_addr1 = u4_pic_addrress[1 + (i & 2)];
  465|   246k|                }
  466|       |
  467|   993k|                {
  468|   993k|                    UWORD32 u4_bs_temp1;
  469|   993k|                    UWORD32 u4_bs;
  470|       |                    /*********************************************************/
  471|       |                    /* If any motion vector component differs by more than 1 */
  472|       |                    /* integer pel or if reference pictures are different Bs */
  473|       |                    /* is set to 1. Note that this condition shall be met for*/
  474|       |                    /* both (fwd-fwd,bwd-bwd) and (fwd-bwd,bwd-fwd) direction*/
  475|       |                    /*********************************************************/
  476|       |
  477|   993k|                    u4_bs_temp1 =
  478|   993k|                                    ((ABS((i2_top_mv0 - i2_cur_mv0))
  ------------------
  |  |  100|   993k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 107k, False: 885k]
  |  |  ------------------
  ------------------
  479|   993k|                                                    >= 4)
  480|   993k|                                                    | (ABS((i2_top_mv1
  ------------------
  |  |  100|   993k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 110k, False: 883k]
  |  |  ------------------
  ------------------
  481|   993k|                                                                    - i2_cur_mv1))
  482|   993k|                                                                    >= i4_ver_mvlimit));
  483|       |
  484|   993k|                    u4_bs = ((pv_nbr_pic_addr0 != pv_cur_pic_addr0)
  ------------------
  |  Branch (484:30): [True: 11.7k, False: 981k]
  ------------------
  485|   981k|                                    || (pv_nbr_pic_addr1 != pv_cur_pic_addr1)
  ------------------
  |  Branch (485:40): [True: 449, False: 981k]
  ------------------
  486|   981k|                                    || u4_bs_temp1);
  ------------------
  |  Branch (486:40): [True: 77.3k, False: 903k]
  ------------------
  487|       |
  488|   993k|                    u4_bs_horz |= (u4_bs << u4_horz_idx);
  489|   993k|                }
  490|   993k|            }
  491|       |
  492|  1.08M|            ps_left_mv_pred = ps_cur_mv_pred;
  493|  1.08M|        }
  494|       |
  495|   270k|        pu4_bs_table[edge] = u4_bs_horz;
  496|   270k|    }
  497|  67.5k|}
ih264d_fill_bs1_16x16mb_bslice:
  532|  3.54M|{
  533|  3.54M|    WORD16 i2_q_mv0, i2_q_mv1, i2_q_mv2, i2_q_mv3;
  534|  3.54M|    WORD16 i2_p_mv0, i2_p_mv1, i2_p_mv2, i2_p_mv3;
  535|  3.54M|    void *pv_cur_pic_addr0, *pv_cur_pic_addr1;
  536|  3.54M|    void *pv_nbr_pic_addr0, *pv_nbr_pic_addr1;
  537|  3.54M|    void **ppv_map_ref_idx_to_poc_l0, **ppv_map_ref_idx_to_poc_l1;
  538|  3.54M|    UWORD32 i;
  539|  3.54M|    UWORD32 u4_bs_horz = pu4_bs_table[0];
  540|  3.54M|    UWORD32 u4_bs_vert = pu4_bs_table[4];
  541|       |
  542|  3.54M|    PROFILE_DISABLE_BOUNDARY_STRENGTH()
  ------------------
  |  |  125|  3.54M|#define PROFILE_DISABLE_BOUNDARY_STRENGTH() ;
  ------------------
  543|       |
  544|  3.54M|    ppv_map_ref_idx_to_poc_l0 = ppv_map_ref_idx_to_poc;
  545|  3.54M|    ppv_map_ref_idx_to_poc_l1 = ppv_map_ref_idx_to_poc + POC_LIST_L0_TO_L1_DIFF;
  ------------------
  |  |   86|  3.54M|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  ------------------
  |  |  |  |  600|  3.54M|#define MAX_FRAMES              16
  |  |  ------------------
  ------------------
  546|  3.54M|    i2_q_mv0 = ps_cur_mv_pred->i2_mv[0];
  547|  3.54M|    i2_q_mv1 = ps_cur_mv_pred->i2_mv[1];
  548|  3.54M|    i2_q_mv2 = ps_cur_mv_pred->i2_mv[2];
  549|  3.54M|    i2_q_mv3 = ps_cur_mv_pred->i2_mv[3];
  550|  3.54M|    pv_cur_pic_addr0 = ppv_map_ref_idx_to_poc_l0[ps_cur_mv_pred->i1_ref_frame[0]];
  551|  3.54M|    pv_cur_pic_addr1 = ppv_map_ref_idx_to_poc_l1[ps_cur_mv_pred->i1_ref_frame[1]];
  552|       |
  553|       |    /*********************************/
  554|       |    /* Computing Bs for the top edge */
  555|       |    /*********************************/
  556|  17.7M|    for(i = 0; i < 4; i++, ps_top_mv_pred++)
  ------------------
  |  Branch (556:16): [True: 14.1M, False: 3.54M]
  ------------------
  557|  14.1M|    {
  558|  14.1M|        UWORD32 u4_idx = 24 - (i << 3);
  559|       |
  560|       |        /*********************************/
  561|       |        /* check if Bs is already set    */
  562|       |        /*********************************/
  563|  14.1M|        if(!((u4_bs_horz >> u4_idx) & 0xf))
  ------------------
  |  Branch (563:12): [True: 14.1M, False: 50.8k]
  ------------------
  564|  14.1M|        {
  565|       |            /************************************************************/
  566|       |            /* If Bs is not set, use left edge and current edge mvs and */
  567|       |            /* reference pictures addresses to evaluate Bs==1           */
  568|       |            /************************************************************/
  569|  14.1M|            UWORD32 u4_bs_temp1, u4_bs_temp2;
  570|  14.1M|            UWORD32 u4_bs;
  571|       |
  572|       |            /*********************************************************/
  573|       |            /* If any motion vector component differs by more than 1 */
  574|       |            /* integer pel or if reference pictures are different Bs */
  575|       |            /* is set to 1. Note that this condition shall be met for*/
  576|       |            /* both (fwd-fwd,bwd-bwd) and (fwd-bwd,bwd-fwd) direction*/
  577|       |            /*********************************************************/
  578|  14.1M|            i2_p_mv0 = ps_top_mv_pred->i2_mv[0];
  579|  14.1M|            i2_p_mv1 = ps_top_mv_pred->i2_mv[1];
  580|  14.1M|            i2_p_mv2 = ps_top_mv_pred->i2_mv[2];
  581|  14.1M|            i2_p_mv3 = ps_top_mv_pred->i2_mv[3];
  582|  14.1M|            pv_nbr_pic_addr0 = u4_pic_addrress[i & 2];
  583|  14.1M|            pv_nbr_pic_addr1 = u4_pic_addrress[1 + (i & 2)];
  584|       |
  585|  14.1M|            u4_bs_temp1 =
  586|  14.1M|                            ((ABS((i2_p_mv0 - i2_q_mv0))
  ------------------
  |  |  100|  14.1M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 56.0k, False: 14.0M]
  |  |  ------------------
  ------------------
  587|  14.1M|                                            >= 4)
  588|  14.1M|                                            | (ABS((i2_p_mv1 - i2_q_mv1))
  ------------------
  |  |  100|  14.1M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 70.6k, False: 14.0M]
  |  |  ------------------
  ------------------
  589|  14.1M|                                                            >= i4_ver_mvlimit)
  590|  14.1M|                                            | (ABS((i2_p_mv2 - i2_q_mv2))
  ------------------
  |  |  100|  14.1M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 69.8k, False: 14.0M]
  |  |  ------------------
  ------------------
  591|  14.1M|                                                            >= 4)
  592|  14.1M|                                            | (ABS((i2_p_mv3 - i2_q_mv3))
  ------------------
  |  |  100|  14.1M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 49.2k, False: 14.0M]
  |  |  ------------------
  ------------------
  593|  14.1M|                                                            >= i4_ver_mvlimit));
  594|       |
  595|  14.1M|            u4_bs_temp2 =
  596|  14.1M|                            ((ABS((i2_p_mv0 - i2_q_mv2))
  ------------------
  |  |  100|  14.1M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 339k, False: 13.8M]
  |  |  ------------------
  ------------------
  597|  14.1M|                                            >= 4)
  598|  14.1M|                                            | (ABS((i2_p_mv1 - i2_q_mv3))
  ------------------
  |  |  100|  14.1M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 415k, False: 13.7M]
  |  |  ------------------
  ------------------
  599|  14.1M|                                                            >= i4_ver_mvlimit)
  600|  14.1M|                                            | (ABS((i2_p_mv2 - i2_q_mv0))
  ------------------
  |  |  100|  14.1M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 9.23M, False: 4.90M]
  |  |  ------------------
  ------------------
  601|  14.1M|                                                            >= 4)
  602|  14.1M|                                            | (ABS((i2_p_mv3 - i2_q_mv1))
  ------------------
  |  |  100|  14.1M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 9.27M, False: 4.86M]
  |  |  ------------------
  ------------------
  603|  14.1M|                                                            >= i4_ver_mvlimit));
  604|       |
  605|  14.1M|            u4_bs = ((pv_cur_pic_addr0 != pv_nbr_pic_addr0)
  ------------------
  |  Branch (605:22): [True: 486k, False: 13.6M]
  ------------------
  606|  13.6M|                            || (pv_cur_pic_addr1 != pv_nbr_pic_addr1)
  ------------------
  |  Branch (606:32): [True: 312k, False: 13.3M]
  ------------------
  607|  13.3M|                            || u4_bs_temp1)
  ------------------
  |  Branch (607:32): [True: 31.6k, False: 13.3M]
  ------------------
  608|   830k|                            && ((pv_cur_pic_addr0 != pv_nbr_pic_addr1)
  ------------------
  |  Branch (608:33): [True: 556k, False: 274k]
  ------------------
  609|   274k|                                            || (pv_cur_pic_addr1
  ------------------
  |  Branch (609:48): [True: 220k, False: 54.1k]
  ------------------
  610|   274k|                                                            != pv_nbr_pic_addr0)
  611|  54.1k|                                            || u4_bs_temp2);
  ------------------
  |  Branch (611:48): [True: 30.4k, False: 23.7k]
  ------------------
  612|       |
  613|  14.1M|            u4_bs_horz |= (u4_bs << u4_idx);
  614|  14.1M|        }
  615|  14.1M|    }
  616|  3.54M|    pu4_bs_table[0] = u4_bs_horz;
  617|       |
  618|       |    /***********************************/
  619|       |    /* Computing Bs for the left edge  */
  620|       |    /***********************************/
  621|  17.7M|    for(i = 0; i < 4; i++, ps_leftmost_mv_pred += 4)
  ------------------
  |  Branch (621:16): [True: 14.1M, False: 3.54M]
  ------------------
  622|  14.1M|    {
  623|  14.1M|        UWORD32 u4_idx = 24 - (i << 3);
  624|       |
  625|       |        /*********************************/
  626|       |        /* check if Bs is already set    */
  627|       |        /*********************************/
  628|  14.1M|        if(!((u4_bs_vert >> u4_idx) & 0xf))
  ------------------
  |  Branch (628:12): [True: 14.1M, False: 47.2k]
  ------------------
  629|  14.1M|        {
  630|       |            /****************************************************/
  631|       |            /* If Bs is not set, evalaute conditions for Bs=1   */
  632|       |            /****************************************************/
  633|  14.1M|            UWORD32 u4_bs_temp1, u4_bs_temp2;
  634|  14.1M|            UWORD32 u4_bs;
  635|       |            /*********************************************************/
  636|       |            /* If any motion vector component differs by more than 1 */
  637|       |            /* integer pel or if reference pictures are different Bs */
  638|       |            /* is set to 1. Note that this condition shall be met for*/
  639|       |            /* both (fwd-fwd,bwd-bwd) and (fwd-bwd,bwd-fwd) direction*/
  640|       |            /*********************************************************/
  641|       |
  642|  14.1M|            i2_p_mv0 = ps_leftmost_mv_pred->i2_mv[0];
  643|  14.1M|            i2_p_mv1 = ps_leftmost_mv_pred->i2_mv[1];
  644|  14.1M|            i2_p_mv2 = ps_leftmost_mv_pred->i2_mv[2];
  645|  14.1M|            i2_p_mv3 = ps_leftmost_mv_pred->i2_mv[3];
  646|  14.1M|            pv_nbr_pic_addr0 = ps_left_addr->u4_add[i & 2];
  647|  14.1M|            pv_nbr_pic_addr1 = ps_left_addr->u4_add[1 + (i & 2)];
  648|       |
  649|  14.1M|            u4_bs_temp1 =
  650|  14.1M|                            ((ABS((i2_p_mv0 - i2_q_mv0))
  ------------------
  |  |  100|  14.1M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 42.2k, False: 14.1M]
  |  |  ------------------
  ------------------
  651|  14.1M|                                            >= 4)
  652|  14.1M|                                            | (ABS((i2_p_mv1 - i2_q_mv1))
  ------------------
  |  |  100|  14.1M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 50.7k, False: 14.0M]
  |  |  ------------------
  ------------------
  653|  14.1M|                                                            >= i4_ver_mvlimit)
  654|  14.1M|                                            | (ABS((i2_p_mv2 - i2_q_mv2))
  ------------------
  |  |  100|  14.1M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 48.1k, False: 14.1M]
  |  |  ------------------
  ------------------
  655|  14.1M|                                                            >= 4)
  656|  14.1M|                                            | (ABS((i2_p_mv3 - i2_q_mv3))
  ------------------
  |  |  100|  14.1M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 42.9k, False: 14.1M]
  |  |  ------------------
  ------------------
  657|  14.1M|                                                            >= i4_ver_mvlimit));
  658|       |
  659|  14.1M|            u4_bs_temp2 =
  660|  14.1M|                            ((ABS((i2_p_mv0 - i2_q_mv2))
  ------------------
  |  |  100|  14.1M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 339k, False: 13.8M]
  |  |  ------------------
  ------------------
  661|  14.1M|                                            >= 4)
  662|  14.1M|                                            | (ABS((i2_p_mv1 - i2_q_mv3))
  ------------------
  |  |  100|  14.1M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 412k, False: 13.7M]
  |  |  ------------------
  ------------------
  663|  14.1M|                                                            >= i4_ver_mvlimit)
  664|  14.1M|                                            | (ABS((i2_p_mv2 - i2_q_mv0))
  ------------------
  |  |  100|  14.1M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 9.58M, False: 4.56M]
  |  |  ------------------
  ------------------
  665|  14.1M|                                                            >= 4)
  666|  14.1M|                                            | (ABS((i2_p_mv3 - i2_q_mv1))
  ------------------
  |  |  100|  14.1M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 9.62M, False: 4.51M]
  |  |  ------------------
  ------------------
  667|  14.1M|                                                            >= i4_ver_mvlimit));
  668|       |
  669|  14.1M|            u4_bs = ((pv_cur_pic_addr0 != pv_nbr_pic_addr0)
  ------------------
  |  Branch (669:22): [True: 152k, False: 13.9M]
  ------------------
  670|  13.9M|                            || (pv_cur_pic_addr1 != pv_nbr_pic_addr1)
  ------------------
  |  Branch (670:32): [True: 121k, False: 13.8M]
  ------------------
  671|  13.8M|                            || u4_bs_temp1)
  ------------------
  |  Branch (671:32): [True: 49.3k, False: 13.8M]
  ------------------
  672|   323k|                            && ((pv_cur_pic_addr0 != pv_nbr_pic_addr1)
  ------------------
  |  Branch (672:33): [True: 183k, False: 140k]
  ------------------
  673|   140k|                                            || (pv_cur_pic_addr1
  ------------------
  |  Branch (673:48): [True: 87.4k, False: 52.9k]
  ------------------
  674|   140k|                                                            != pv_nbr_pic_addr0)
  675|  52.9k|                                            || u4_bs_temp2);
  ------------------
  |  Branch (675:48): [True: 44.1k, False: 8.78k]
  ------------------
  676|       |
  677|  14.1M|            u4_bs_vert |= (u4_bs << u4_idx);
  678|  14.1M|        }
  679|  14.1M|    }
  680|  3.54M|    pu4_bs_table[4] = u4_bs_vert;
  681|       |
  682|  3.54M|    return;
  683|  3.54M|}
ih264d_fill_bs1_non16x16mb_bslice:
  718|   126k|{
  719|   126k|    UWORD32 edge;
  720|   126k|    void **ppv_map_ref_idx_to_poc_l0, **ppv_map_ref_idx_to_poc_l1;
  721|   126k|    ppv_map_ref_idx_to_poc_l0 = ppv_map_ref_idx_to_poc;
  722|   126k|    ppv_map_ref_idx_to_poc_l1 = ppv_map_ref_idx_to_poc + POC_LIST_L0_TO_L1_DIFF;
  ------------------
  |  |   86|   126k|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  ------------------
  |  |  |  |  600|   126k|#define MAX_FRAMES              16
  |  |  ------------------
  ------------------
  723|       |
  724|   126k|    PROFILE_DISABLE_BOUNDARY_STRENGTH()
  ------------------
  |  |  125|   126k|#define PROFILE_DISABLE_BOUNDARY_STRENGTH() ;
  ------------------
  725|       |
  726|   632k|    for(edge = 0; edge < 4; edge++, ps_top_mv_pred = ps_cur_mv_pred - 4)
  ------------------
  |  Branch (726:19): [True: 506k, False: 126k]
  ------------------
  727|   506k|    {
  728|       |        /*********************************************************************/
  729|       |        /* Each iteration of this loop fills the four BS values of one HORIZ */
  730|       |        /* edge and one BS value for each of the four VERT edges.            */
  731|       |        /*********************************************************************/
  732|   506k|        WORD32 i;
  733|   506k|        UWORD32 u4_vert_idx = 24 - (edge << 3);
  734|   506k|        UWORD32 u4_bs_horz = pu4_bs_table[edge];
  735|   506k|        mv_pred_t *ps_left_mv_pred = ps_leftmost_mv_pred + (edge << 2);
  736|       |
  737|  2.53M|        for(i = 0; i < 4; i++, ps_top_mv_pred++, ps_cur_mv_pred++)
  ------------------
  |  Branch (737:20): [True: 2.02M, False: 506k]
  ------------------
  738|  2.02M|        {
  739|  2.02M|            WORD16 i2_cur_mv0, i2_cur_mv1, i16_curMv2, i16_curMv3;
  740|  2.02M|            WORD8 i1_cur_ref0, i1_cur_ref1;
  741|  2.02M|            void *pv_cur_pic_addr0, *pv_cur_pic_addr1;
  742|  2.02M|            void *pv_nbr_pic_addr0, *pv_nbr_pic_addr1;
  743|       |
  744|       |            /******************************************************/
  745|       |            /* Each iteration of this inner loop computes a HORIZ */
  746|       |            /* and a VERT BS value for a 4x4 block                */
  747|       |            /******************************************************/
  748|  2.02M|            UWORD32 u4_bs_vert = (pu4_bs_table[i + 4] >> u4_vert_idx) & 0xf;
  749|  2.02M|            UWORD32 u4_horz_idx = 24 - (i << 3);
  750|       |
  751|       |            /*****************************************************/
  752|       |            /* check if vert Bs for this block is already set    */
  753|       |            /*****************************************************/
  754|  2.02M|            if(!u4_bs_vert)
  ------------------
  |  Branch (754:16): [True: 1.91M, False: 113k]
  ------------------
  755|  1.91M|            {
  756|  1.91M|                WORD16 i2_left_mv0, i2_left_mv1, i2_left_mv2, i2_left_mv3;
  757|       |                /************************************************************/
  758|       |                /* If Bs is not set, use left edge and current edge mvs and */
  759|       |                /* reference pictures addresses to evaluate Bs==1           */
  760|       |                /************************************************************/
  761|  1.91M|                i2_left_mv0 = ps_left_mv_pred->i2_mv[0];
  762|  1.91M|                i2_left_mv1 = ps_left_mv_pred->i2_mv[1];
  763|  1.91M|                i2_left_mv2 = ps_left_mv_pred->i2_mv[2];
  764|  1.91M|                i2_left_mv3 = ps_left_mv_pred->i2_mv[3];
  765|       |
  766|  1.91M|                i2_cur_mv0 = ps_cur_mv_pred->i2_mv[0];
  767|  1.91M|                i2_cur_mv1 = ps_cur_mv_pred->i2_mv[1];
  768|  1.91M|                i16_curMv2 = ps_cur_mv_pred->i2_mv[2];
  769|  1.91M|                i16_curMv3 = ps_cur_mv_pred->i2_mv[3];
  770|  1.91M|                i1_cur_ref0 = ps_cur_mv_pred->i1_ref_frame[0];
  771|  1.91M|                i1_cur_ref1 = ps_cur_mv_pred->i1_ref_frame[1];
  772|  1.91M|                pv_cur_pic_addr0 = ppv_map_ref_idx_to_poc_l0[i1_cur_ref0];
  773|  1.91M|                pv_cur_pic_addr1 = ppv_map_ref_idx_to_poc_l1[i1_cur_ref1];
  774|       |
  775|  1.91M|                if(i)
  ------------------
  |  Branch (775:20): [True: 1.43M, False: 474k]
  ------------------
  776|  1.43M|                {
  777|  1.43M|                    WORD8 i1_left_ref0, i1_left_ref1;
  778|  1.43M|                    i1_left_ref0 = ps_left_mv_pred->i1_ref_frame[0];
  779|  1.43M|                    i1_left_ref1 = ps_left_mv_pred->i1_ref_frame[1];
  780|  1.43M|                    pv_nbr_pic_addr0 = ppv_map_ref_idx_to_poc_l0[i1_left_ref0];
  781|  1.43M|                    pv_nbr_pic_addr1 = ppv_map_ref_idx_to_poc_l1[i1_left_ref1];
  782|  1.43M|                }
  783|   474k|                else
  784|   474k|                {
  785|   474k|                    pv_nbr_pic_addr0 = ps_left_addr->u4_add[edge & 2];
  786|   474k|                    pv_nbr_pic_addr1 = ps_left_addr->u4_add[1 + (edge & 2)];
  787|   474k|                }
  788|       |
  789|  1.91M|                {
  790|  1.91M|                    UWORD32 u4_bs_temp1, u4_bs_temp2;
  791|       |                    /*********************************************************/
  792|       |                    /* If any motion vector component differs by more than 1 */
  793|       |                    /* integer pel or if reference pictures are different Bs */
  794|       |                    /* is set to 1. Note that this condition shall be met for*/
  795|       |                    /* both (fwd-fwd,bwd-bwd) and (fwd-bwd,bwd-fwd) direction*/
  796|       |                    /*********************************************************/
  797|       |
  798|  1.91M|                    u4_bs_temp1 =
  799|  1.91M|                                    ((ABS((i2_left_mv0 - i2_cur_mv0))
  ------------------
  |  |  100|  1.91M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 263k, False: 1.64M]
  |  |  ------------------
  ------------------
  800|  1.91M|                                                    >= 4)
  801|  1.91M|                                                    | (ABS((i2_left_mv1
  ------------------
  |  |  100|  1.91M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 256k, False: 1.65M]
  |  |  ------------------
  ------------------
  802|  1.91M|                                                                    - i2_cur_mv1))
  803|  1.91M|                                                                    >= i4_ver_mvlimit)
  804|  1.91M|                                                    | (ABS((i2_left_mv2
  ------------------
  |  |  100|  1.91M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 73.2k, False: 1.83M]
  |  |  ------------------
  ------------------
  805|  1.91M|                                                                    - i16_curMv2))
  806|  1.91M|                                                                    >= 4)
  807|  1.91M|                                                    | (ABS((i2_left_mv3
  ------------------
  |  |  100|  1.91M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 52.4k, False: 1.85M]
  |  |  ------------------
  ------------------
  808|  1.91M|                                                                    - i16_curMv3))
  809|  1.91M|                                                                    >= i4_ver_mvlimit));
  810|       |
  811|  1.91M|                    u4_bs_temp2 =
  812|  1.91M|                                    ((ABS((i2_left_mv0 - i16_curMv2))
  ------------------
  |  |  100|  1.91M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 259k, False: 1.65M]
  |  |  ------------------
  ------------------
  813|  1.91M|                                                    >= 4)
  814|  1.91M|                                                    | (ABS((i2_left_mv1
  ------------------
  |  |  100|  1.91M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 261k, False: 1.64M]
  |  |  ------------------
  ------------------
  815|  1.91M|                                                                    - i16_curMv3))
  816|  1.91M|                                                                    >= i4_ver_mvlimit)
  817|  1.91M|                                                    | (ABS((i2_left_mv2
  ------------------
  |  |  100|  1.91M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 1.07M, False: 833k]
  |  |  ------------------
  ------------------
  818|  1.91M|                                                                    - i2_cur_mv0))
  819|  1.91M|                                                                    >= 4)
  820|  1.91M|                                                    | (ABS((i2_left_mv3
  ------------------
  |  |  100|  1.91M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 1.03M, False: 872k]
  |  |  ------------------
  ------------------
  821|  1.91M|                                                                    - i2_cur_mv1))
  822|  1.91M|                                                                    >= i4_ver_mvlimit));
  823|       |
  824|  1.91M|                    u4_bs_vert =
  825|  1.91M|                                    ((pv_nbr_pic_addr0 != pv_cur_pic_addr0)
  ------------------
  |  Branch (825:38): [True: 105k, False: 1.80M]
  ------------------
  826|  1.80M|                                                    || (pv_nbr_pic_addr1
  ------------------
  |  Branch (826:56): [True: 78.2k, False: 1.72M]
  ------------------
  827|  1.80M|                                                                    != pv_cur_pic_addr1)
  828|  1.72M|                                                    || u4_bs_temp1)
  ------------------
  |  Branch (828:56): [True: 282k, False: 1.44M]
  ------------------
  829|   465k|                                                    && ((pv_nbr_pic_addr0
  ------------------
  |  Branch (829:57): [True: 275k, False: 190k]
  ------------------
  830|   465k|                                                                    != pv_cur_pic_addr1)
  831|   190k|                                                                    || (pv_nbr_pic_addr1
  ------------------
  |  Branch (831:72): [True: 62.6k, False: 127k]
  ------------------
  832|   190k|                                                                                    != pv_cur_pic_addr0)
  833|   127k|                                                                    || u4_bs_temp2);
  ------------------
  |  Branch (833:72): [True: 116k, False: 11.2k]
  ------------------
  834|       |
  835|  1.91M|                    pu4_bs_table[i + 4] |= (u4_bs_vert << u4_vert_idx);
  836|  1.91M|                }
  837|  1.91M|            }
  838|       |
  839|       |            /*****************************************************/
  840|       |            /* check if horz Bs for this block is already set    */
  841|       |            /*****************************************************/
  842|  2.02M|            if(!((u4_bs_horz >> u4_horz_idx) & 0xf))
  ------------------
  |  Branch (842:16): [True: 1.90M, False: 117k]
  ------------------
  843|  1.90M|            {
  844|  1.90M|                WORD16 i2_top_mv0, i2_top_mv1, i16_topMv2, i16_topMv3;
  845|       |                /************************************************************/
  846|       |                /* If Bs is not set, use top edge and current edge mvs and  */
  847|       |                /* reference pictures addresses to evaluate Bs==1           */
  848|       |                /************************************************************/
  849|  1.90M|                i2_cur_mv0 = ps_cur_mv_pred->i2_mv[0];
  850|  1.90M|                i2_cur_mv1 = ps_cur_mv_pred->i2_mv[1];
  851|  1.90M|                i16_curMv2 = ps_cur_mv_pred->i2_mv[2];
  852|  1.90M|                i16_curMv3 = ps_cur_mv_pred->i2_mv[3];
  853|  1.90M|                i1_cur_ref0 = ps_cur_mv_pred->i1_ref_frame[0];
  854|  1.90M|                i1_cur_ref1 = ps_cur_mv_pred->i1_ref_frame[1];
  855|       |
  856|  1.90M|                i2_top_mv0 = ps_top_mv_pred->i2_mv[0];
  857|  1.90M|                i2_top_mv1 = ps_top_mv_pred->i2_mv[1];
  858|  1.90M|                i16_topMv2 = ps_top_mv_pred->i2_mv[2];
  859|  1.90M|                i16_topMv3 = ps_top_mv_pred->i2_mv[3];
  860|  1.90M|                pv_cur_pic_addr0 = ppv_map_ref_idx_to_poc_l0[i1_cur_ref0];
  861|  1.90M|                pv_cur_pic_addr1 = ppv_map_ref_idx_to_poc_l1[i1_cur_ref1];
  862|  1.90M|                if(edge)
  ------------------
  |  Branch (862:20): [True: 1.43M, False: 473k]
  ------------------
  863|  1.43M|                {
  864|  1.43M|                    WORD8 i1_top_ref0, i1_top_ref1;
  865|  1.43M|                    i1_top_ref0 = ps_top_mv_pred->i1_ref_frame[0];
  866|  1.43M|                    i1_top_ref1 = ps_top_mv_pred->i1_ref_frame[1];
  867|  1.43M|                    pv_nbr_pic_addr0 = ppv_map_ref_idx_to_poc_l0[i1_top_ref0];
  868|  1.43M|                    pv_nbr_pic_addr1 = ppv_map_ref_idx_to_poc_l1[i1_top_ref1];
  869|  1.43M|                }
  870|   473k|                else
  871|   473k|                {
  872|   473k|                    pv_nbr_pic_addr0 = u4_pic_addrress[i & 2];
  873|   473k|                    pv_nbr_pic_addr1 = u4_pic_addrress[1 + (i & 2)];
  874|   473k|                }
  875|       |
  876|  1.90M|                {
  877|  1.90M|                    UWORD32 u4_bs_temp1, u4_bs_temp2;
  878|  1.90M|                    UWORD32 u4_bs;
  879|       |                    /*********************************************************/
  880|       |                    /* If any motion vector component differs by more than 1 */
  881|       |                    /* integer pel or if reference pictures are different Bs */
  882|       |                    /* is set to 1. Note that this condition shall be met for*/
  883|       |                    /* both (fwd-fwd,bwd-bwd) and (fwd-bwd,bwd-fwd) direction*/
  884|       |                    /*********************************************************/
  885|       |
  886|  1.90M|                    u4_bs_temp1 =
  887|  1.90M|                                    ((ABS((i2_top_mv0 - i2_cur_mv0))
  ------------------
  |  |  100|  1.90M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 442k, False: 1.46M]
  |  |  ------------------
  ------------------
  888|  1.90M|                                                    >= 4)
  889|  1.90M|                                                    | (ABS((i2_top_mv1
  ------------------
  |  |  100|  1.90M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 397k, False: 1.50M]
  |  |  ------------------
  ------------------
  890|  1.90M|                                                                    - i2_cur_mv1))
  891|  1.90M|                                                                    >= i4_ver_mvlimit)
  892|  1.90M|                                                    | (ABS((i16_topMv2
  ------------------
  |  |  100|  1.90M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 93.5k, False: 1.81M]
  |  |  ------------------
  ------------------
  893|  1.90M|                                                                    - i16_curMv2))
  894|  1.90M|                                                                    >= 4)
  895|  1.90M|                                                    | (ABS((i16_topMv3
  ------------------
  |  |  100|  1.90M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 63.6k, False: 1.84M]
  |  |  ------------------
  ------------------
  896|  1.90M|                                                                    - i16_curMv3))
  897|  1.90M|                                                                    >= i4_ver_mvlimit));
  898|       |
  899|  1.90M|                    u4_bs_temp2 =
  900|  1.90M|                                    ((ABS((i2_top_mv0 - i16_curMv2))
  ------------------
  |  |  100|  1.90M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 258k, False: 1.64M]
  |  |  ------------------
  ------------------
  901|  1.90M|                                                    >= 4)
  902|  1.90M|                                                    | (ABS((i2_top_mv1
  ------------------
  |  |  100|  1.90M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 254k, False: 1.65M]
  |  |  ------------------
  ------------------
  903|  1.90M|                                                                    - i16_curMv3))
  904|  1.90M|                                                                    >= i4_ver_mvlimit)
  905|  1.90M|                                                    | (ABS((i16_topMv2
  ------------------
  |  |  100|  1.90M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 1.06M, False: 838k]
  |  |  ------------------
  ------------------
  906|  1.90M|                                                                    - i2_cur_mv0))
  907|  1.90M|                                                                    >= 4)
  908|  1.90M|                                                    | (ABS((i16_topMv3
  ------------------
  |  |  100|  1.90M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 1.03M, False: 872k]
  |  |  ------------------
  ------------------
  909|  1.90M|                                                                    - i2_cur_mv1))
  910|  1.90M|                                                                    >= i4_ver_mvlimit));
  911|       |
  912|  1.90M|                    u4_bs =
  913|  1.90M|                                    ((pv_nbr_pic_addr0 != pv_cur_pic_addr0)
  ------------------
  |  Branch (913:38): [True: 109k, False: 1.79M]
  ------------------
  914|  1.79M|                                                    || (pv_nbr_pic_addr1
  ------------------
  |  Branch (914:56): [True: 121k, False: 1.67M]
  ------------------
  915|  1.79M|                                                                    != pv_cur_pic_addr1)
  916|  1.67M|                                                    || u4_bs_temp1)
  ------------------
  |  Branch (916:56): [True: 383k, False: 1.29M]
  ------------------
  917|   614k|                                                    && ((pv_nbr_pic_addr0
  ------------------
  |  Branch (917:57): [True: 348k, False: 265k]
  ------------------
  918|   614k|                                                                    != pv_cur_pic_addr1)
  919|   265k|                                                                    || (pv_nbr_pic_addr1
  ------------------
  |  Branch (919:72): [True: 98.7k, False: 166k]
  ------------------
  920|   265k|                                                                                    != pv_cur_pic_addr0)
  921|   166k|                                                                    || u4_bs_temp2);
  ------------------
  |  Branch (921:72): [True: 148k, False: 18.1k]
  ------------------
  922|       |
  923|  1.90M|                    u4_bs_horz |= (u4_bs << u4_horz_idx);
  924|  1.90M|                }
  925|  1.90M|            }
  926|       |
  927|  2.02M|            ps_left_mv_pred = ps_cur_mv_pred;
  928|  2.02M|        }
  929|       |
  930|   506k|        pu4_bs_table[edge] = u4_bs_horz;
  931|   506k|    }
  932|   126k|}
ih264d_compute_bs_non_mbaff:
 1146|  6.82M|{
 1147|       |    /* Mvpred and Nnz for top and Courrent */
 1148|  6.82M|    mv_pred_t *ps_cur_mv_pred, *ps_top_mv_pred = NULL, *ps_left_mv_pred;
 1149|       |    /* deblk_mb_t Params */
 1150|  6.82M|    deblk_mb_t *ps_cur_mb_params; /*< Parameters of current MacroBlock */
 1151|  6.82M|    deblkmb_neighbour_t *ps_deblk_top_mb;
 1152|       |
 1153|       |    /* Reference Index to POC mapping*/
 1154|  6.82M|    void ** apv_map_ref_idx_to_poc;
 1155|  6.82M|    UWORD32 u4_leftmbtype;
 1156|       |
 1157|  6.82M|    UWORD16 u2_left_csbp, u2_top_csbp, u2_cur_csbp;
 1158|       |
 1159|       |    /* Set of flags */
 1160|  6.82M|    UWORD32 u4_cur_mb_intra, u1_top_mb_typ, u4_cur_mb_fld;
 1161|  6.82M|    UWORD32 u1_cur_mb_type;
 1162|  6.82M|    UWORD32 * pu4_bs_table;
 1163|       |
 1164|       |    /* Neighbour availability */
 1165|       |    /* Initialization */
 1166|  6.82M|    const UWORD32 u2_mbx = ps_cur_mb_info->u2_mbx;
 1167|  6.82M|    const UWORD32 u2_mby = ps_cur_mb_info->u2_mby;
 1168|  6.82M|    const UWORD32 u1_pingpong = u2_mbx & 0x01;
 1169|       |
 1170|  6.82M|    PROFILE_DISABLE_BOUNDARY_STRENGTH()
  ------------------
  |  |  125|  6.82M|#define PROFILE_DISABLE_BOUNDARY_STRENGTH() ;
  ------------------
 1171|       |
 1172|  6.82M|    ps_deblk_top_mb = ps_dec->ps_deblk_top_mb + u2_mbx;
 1173|       |
 1174|       |
 1175|       |    /* Pointer assignment for Current DeblkMB, Current Mv Pred  */
 1176|  6.82M|    ps_cur_mb_params = ps_dec->ps_deblk_mbn + u2_mbxn_mb;
 1177|  6.82M|    ps_cur_mv_pred = ps_dec->ps_mv_cur + (u2_mbxn_mb << 4);
 1178|       |
 1179|  6.82M|    apv_map_ref_idx_to_poc = ps_dec->ppv_map_ref_idx_to_poc + 1;
 1180|  6.82M|    u1_cur_mb_type = ps_cur_mb_params->u1_mb_type;
 1181|  6.82M|    u1_top_mb_typ = ps_deblk_top_mb->u1_mb_type;
 1182|  6.82M|    ps_deblk_top_mb->u1_mb_type = u1_cur_mb_type;
 1183|       |
 1184|  6.82M|    {
 1185|  6.82M|        UWORD8 mb_qp_temp;
 1186|       |
 1187|  6.82M|        ps_cur_mb_params->u1_topmb_qp = ps_deblk_top_mb->u1_mb_qp;
 1188|  6.82M|        ps_deblk_top_mb->u1_mb_qp = ps_cur_mb_params->u1_mb_qp;
 1189|       |
 1190|  6.82M|        ps_cur_mb_params->u1_left_mb_qp = ps_dec->deblk_left_mb[1].u1_mb_qp;
 1191|  6.82M|        ps_dec->deblk_left_mb[1].u1_mb_qp = ps_cur_mb_params->u1_mb_qp;
 1192|       |
 1193|  6.82M|    }
 1194|       |
 1195|       |    /* if no deblocking required for current Mb then continue */
 1196|       |    /* Check next Mbs   in Mb group                           */
 1197|  6.82M|    if(ps_cur_mb_params->u1_deblocking_mode & MB_DISABLE_FILTERING)
  ------------------
  |  |   70|  6.82M|#define MB_DISABLE_FILTERING          0x01
  ------------------
  |  Branch (1197:8): [True: 1.19M, False: 5.62M]
  ------------------
 1198|  1.19M|    {
 1199|  1.19M|        void ** pu4_map_ref_idx_to_poc_l1 = apv_map_ref_idx_to_poc +
 1200|  1.19M|        POC_LIST_L0_TO_L1_DIFF;
  ------------------
  |  |   86|  1.19M|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  ------------------
  |  |  |  |  600|  1.19M|#define MAX_FRAMES              16
  |  |  ------------------
  ------------------
 1201|  1.19M|        {
 1202|       |            /* Store Parameter for Top MvPred refernce frame Address */
 1203|       |
 1204|  1.19M|            void ** ppv_top_mv_pred_addr = ps_cur_mb_info->ps_curmb->u4_pic_addrress;
 1205|  1.19M|            WORD8 * p1_refTop0 = (ps_cur_mv_pred + 12)->i1_ref_frame;
 1206|  1.19M|            WORD8 * p1_refTop1 = (ps_cur_mv_pred + 14)->i1_ref_frame;
 1207|       |
 1208|       |            /* Store Left addresses for Next Mb   */
 1209|  1.19M|            void ** ppv_left_mv_pred_addr =
 1210|  1.19M|                            ps_dec->ps_left_mvpred_addr[!u1_pingpong][1].u4_add;
 1211|  1.19M|            WORD8 * p1_refleft0 = (ps_cur_mv_pred + 3)->i1_ref_frame;
 1212|       |
 1213|       |
 1214|  1.19M|            ppv_top_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refTop0[0]];
 1215|  1.19M|            ppv_top_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refTop0[1]];
 1216|       |
 1217|  1.19M|            ppv_left_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]];
 1218|  1.19M|            ppv_top_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]];
 1219|  1.19M|            ppv_left_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]];
 1220|  1.19M|            ppv_top_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]];
 1221|       |
 1222|  1.19M|            ppv_left_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refleft0[0]];
 1223|  1.19M|            ppv_left_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refleft0[1]];
 1224|       |            //}
 1225|       |            /* Storing the leftMbtype for next Mb */
 1226|  1.19M|            ps_dec->deblk_left_mb[1].u1_mb_type = ps_cur_mb_params->u1_mb_type;
 1227|  1.19M|        }
 1228|       |
 1229|  1.19M|        return;
 1230|  1.19M|    }
 1231|       |
 1232|       |    /* Flag for extra left Edge */
 1233|  5.62M|    ps_cur_mb_params->u1_single_call = 1;
 1234|       |
 1235|       |    /* Update the Left deblk_mb_t and Left MvPred Parameters           */
 1236|  5.62M|    if(!u2_mbx)
  ------------------
  |  Branch (1236:8): [True: 698k, False: 4.92M]
  ------------------
 1237|   698k|    {
 1238|   698k|        u4_leftmbtype = 0;
 1239|       |
 1240|       |        /* Initialize the ps_left_mv_pred with Junk but Valid Location */
 1241|       |        /* to avoid invalid memory access                           */
 1242|       |        /* this is read only pointer                                */
 1243|   698k|        ps_left_mv_pred = ps_dec->ps_mv_cur + 3;
 1244|   698k|    }
 1245|  4.92M|    else
 1246|  4.92M|    {
 1247|  4.92M|        u4_leftmbtype = ps_dec->deblk_left_mb[1].u1_mb_type;
 1248|       |
 1249|       |        /* Come to Left Most Edge of the MB */
 1250|  4.92M|        ps_left_mv_pred = (u2_mbxn_mb) ?
  ------------------
  |  Branch (1250:27): [True: 4.90M, False: 22.5k]
  ------------------
 1251|  4.90M|                        ps_dec->ps_mv_cur + ((u2_mbxn_mb - 1) << 4) + 3 :
 1252|  4.92M|                        ps_dec->ps_mv_left + 3;
 1253|  4.92M|    }
 1254|       |
 1255|  5.62M|    if(!u2_mby)
  ------------------
  |  Branch (1255:8): [True: 212k, False: 5.40M]
  ------------------
 1256|   212k|        u1_top_mb_typ = 0;
 1257|       |
 1258|       |    /* MvPred Pointer Calculation */
 1259|       |    /* CHANGED CODE */
 1260|  5.62M|    ps_top_mv_pred = ps_cur_mv_pred - (ps_dec->u2_frm_wd_in_mbs << 4) + 12;
 1261|       |
 1262|  5.62M|    u4_cur_mb_intra = u1_cur_mb_type & D_INTRA_MB;
  ------------------
  |  |  382|  5.62M|#define D_INTRA_MB        1
  ------------------
 1263|  5.62M|    u4_cur_mb_fld = !!(u1_cur_mb_type & D_FLD_MB);
  ------------------
  |  |  386|  5.62M|#define D_FLD_MB          0x80
  ------------------
 1264|       |    /* Compute BS function */
 1265|  5.62M|    pu4_bs_table = ps_cur_mb_params->u4_bs_table;
 1266|       |
 1267|  5.62M|    u2_cur_csbp = ps_cur_mb_info->ps_curmb->u2_luma_csbp;
 1268|  5.62M|    u2_left_csbp = ps_cur_mb_info->ps_left_mb->u2_luma_csbp;
 1269|  5.62M|    u2_top_csbp = ps_cur_mb_info->ps_top_mb->u2_luma_csbp;
 1270|       |    /* Compute BS function */
 1271|  5.62M|    if(ps_dec->ps_cur_sps->u1_profile_idc == HIGH_PROFILE_IDC)
  ------------------
  |  |  278|  5.62M|#define HIGH_PROFILE_IDC   100
  ------------------
  |  Branch (1271:8): [True: 1.22M, False: 4.39M]
  ------------------
 1272|  1.22M|    {
 1273|  1.22M|        if(ps_cur_mb_info->u1_tran_form8x8 == 1)
  ------------------
  |  Branch (1273:12): [True: 128k, False: 1.09M]
  ------------------
 1274|   128k|        {
 1275|   128k|            u2_cur_csbp = ih264d_update_csbp_8x8(
 1276|   128k|                            ps_cur_mb_info->ps_curmb->u2_luma_csbp);
 1277|   128k|        }
 1278|       |
 1279|  1.22M|        if(ps_cur_mb_info->ps_left_mb->u1_tran_form8x8 == 1)
  ------------------
  |  Branch (1279:12): [True: 86.8k, False: 1.14M]
  ------------------
 1280|  86.8k|        {
 1281|  86.8k|            u2_left_csbp = ih264d_update_csbp_8x8(
 1282|  86.8k|                            ps_cur_mb_info->ps_left_mb->u2_luma_csbp);
 1283|  86.8k|        }
 1284|       |
 1285|  1.22M|        if(ps_cur_mb_info->ps_top_mb->u1_tran_form8x8 == 1)
  ------------------
  |  Branch (1285:12): [True: 107k, False: 1.11M]
  ------------------
 1286|   107k|        {
 1287|   107k|            u2_top_csbp = ih264d_update_csbp_8x8(
 1288|   107k|                            ps_cur_mb_info->ps_top_mb->u2_luma_csbp);
 1289|   107k|        }
 1290|  1.22M|    }
 1291|  5.62M|    if(u4_cur_mb_intra)
  ------------------
  |  Branch (1291:8): [True: 31.2k, False: 5.59M]
  ------------------
 1292|  31.2k|    {
 1293|       |
 1294|  31.2k|        pu4_bs_table[4] = 0x04040404;
 1295|  31.2k|        pu4_bs_table[0] = u4_cur_mb_fld ? 0x03030303 : 0x04040404;
  ------------------
  |  Branch (1295:27): [True: 0, False: 31.2k]
  ------------------
 1296|  31.2k|        pu4_bs_table[1] = 0x03030303;
 1297|  31.2k|        pu4_bs_table[2] = 0x03030303;
 1298|  31.2k|        pu4_bs_table[3] = 0x03030303;
 1299|  31.2k|        pu4_bs_table[5] = 0x03030303;
 1300|  31.2k|        pu4_bs_table[6] = 0x03030303;
 1301|  31.2k|        pu4_bs_table[7] = 0x03030303;
 1302|  31.2k|    }
 1303|  5.59M|    else
 1304|  5.59M|    {
 1305|  5.59M|        UWORD32 u4_is_non16x16 = !!(u1_cur_mb_type & D_PRED_NON_16x16);
  ------------------
  |  |  383|  5.59M|#define D_PRED_NON_16x16  2
  ------------------
 1306|  5.59M|        UWORD32 u4_is_b = ps_dec->u1_B;
 1307|       |
 1308|  5.59M|        ih264d_fill_bs2_horz_vert(
 1309|  5.59M|                        pu4_bs_table, u2_left_csbp, u2_top_csbp, u2_cur_csbp,
 1310|  5.59M|                        (const UWORD32 *)(gau4_ih264d_packed_bs2),
 1311|  5.59M|                        (const UWORD16 *)(gau2_ih264d_4x4_v2h_reorder));
 1312|       |
 1313|  5.59M|        if(u4_leftmbtype & D_INTRA_MB)
  ------------------
  |  |  382|  5.59M|#define D_INTRA_MB        1
  ------------------
  |  Branch (1313:12): [True: 5.67k, False: 5.58M]
  ------------------
 1314|  5.67k|            pu4_bs_table[4] = 0x04040404;
 1315|       |
 1316|  5.59M|        if(u1_top_mb_typ & D_INTRA_MB)
  ------------------
  |  |  382|  5.59M|#define D_INTRA_MB        1
  ------------------
  |  Branch (1316:12): [True: 26.9k, False: 5.56M]
  ------------------
 1317|  26.9k|            pu4_bs_table[0] = u4_cur_mb_fld ? 0x03030303 : 0x04040404;
  ------------------
  |  Branch (1317:31): [True: 0, False: 26.9k]
  ------------------
 1318|       |
 1319|  5.59M|        ps_dec->pf_fill_bs1[u4_is_b][u4_is_non16x16](
 1320|  5.59M|                        ps_cur_mv_pred, ps_top_mv_pred, apv_map_ref_idx_to_poc,
 1321|  5.59M|                        pu4_bs_table, ps_left_mv_pred,
 1322|  5.59M|                        &(ps_dec->ps_left_mvpred_addr[u1_pingpong][1]),
 1323|  5.59M|                        ps_cur_mb_info->ps_top_mb->u4_pic_addrress,
 1324|  5.59M|                        (4 >> u4_cur_mb_fld));
 1325|  5.59M|    }
 1326|       |
 1327|  5.62M|    {
 1328|  5.62M|        void ** pu4_map_ref_idx_to_poc_l1 = apv_map_ref_idx_to_poc +
 1329|  5.62M|        POC_LIST_L0_TO_L1_DIFF;
  ------------------
  |  |   86|  5.62M|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  ------------------
  |  |  |  |  600|  5.62M|#define MAX_FRAMES              16
  |  |  ------------------
  ------------------
 1330|  5.62M|        {
 1331|       |            /* Store Parameter for Top MvPred refernce frame Address */
 1332|       |
 1333|  5.62M|            void ** ppv_top_mv_pred_addr = ps_cur_mb_info->ps_curmb->u4_pic_addrress;
 1334|  5.62M|            WORD8 * p1_refTop0 = (ps_cur_mv_pred + 12)->i1_ref_frame;
 1335|  5.62M|            WORD8 * p1_refTop1 = (ps_cur_mv_pred + 14)->i1_ref_frame;
 1336|       |
 1337|       |            /* Store Left addresses for Next Mb   */
 1338|  5.62M|            void ** ppv_left_mv_pred_addr =
 1339|  5.62M|                            ps_dec->ps_left_mvpred_addr[!u1_pingpong][1].u4_add;
 1340|  5.62M|            WORD8 * p1_refleft0 = (ps_cur_mv_pred + 3)->i1_ref_frame;
 1341|       |
 1342|  5.62M|            ppv_top_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refTop0[0]];
 1343|  5.62M|            ppv_top_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refTop0[1]];
 1344|       |
 1345|  5.62M|            ppv_left_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]];
 1346|  5.62M|            ppv_top_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]];
 1347|  5.62M|            ppv_left_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]];
 1348|  5.62M|            ppv_top_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]];
 1349|       |
 1350|  5.62M|            ppv_left_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refleft0[0]];
 1351|  5.62M|            ppv_left_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refleft0[1]];
 1352|       |
 1353|       |            /* Storing the leftMbtype for next Mb */
 1354|  5.62M|            ps_dec->deblk_left_mb[1].u1_mb_type = ps_cur_mb_params->u1_mb_type;
 1355|       |
 1356|  5.62M|        }
 1357|  5.62M|    }
 1358|       |
 1359|       |    /* For transform 8x8 disable deblocking of the intrernal edges of a 8x8 block */
 1360|  5.62M|    if(ps_cur_mb_info->u1_tran_form8x8)
  ------------------
  |  Branch (1360:8): [True: 141k, False: 5.48M]
  ------------------
 1361|   141k|    {
 1362|   141k|        pu4_bs_table[1] = 0;
 1363|   141k|        pu4_bs_table[3] = 0;
 1364|   141k|        pu4_bs_table[5] = 0;
 1365|   141k|        pu4_bs_table[7] = 0;
 1366|   141k|    }
 1367|  5.62M|}

ih264d_filter_boundary_left_nonmbaff:
   87|  4.88M|{
   88|  4.88M|    UWORD8 *pu1_y, *pu1_u, *pu1_v;
   89|  4.88M|    WORD32 uc_tmp, qp_avg;
   90|  4.88M|    WORD32 alpha_u = 0, beta_u = 0, alpha_v = 0, beta_v = 0;
   91|  4.88M|    WORD32 alpha_y = 0, beta_y = 0;
   92|       |
   93|  4.88M|    WORD32 idx_b_u, idx_a_u, idx_b_v, idx_a_v;
   94|  4.88M|    WORD32 idx_b_y, idx_a_y;
   95|       |
   96|  4.88M|    UWORD32 u4_bs_val;
   97|       |
   98|  4.88M|    UWORD8 *pu1_cliptab_u, *pu1_cliptab_v, *pu1_cliptab_y;
   99|       |
  100|  4.88M|    UWORD8 u1_double_cl = !ps_cur_mb->u1_single_call;
  101|  4.88M|    WORD32 ofst_a = ps_cur_mb->i1_slice_alpha_c0_offset;
  102|  4.88M|    WORD32 ofst_b = ps_cur_mb->i1_slice_beta_offset;
  103|       |
  104|  4.88M|    PROFILE_DISABLE_DEBLK()
  ------------------
  |  |   91|  4.88M|#define PROFILE_DISABLE_DEBLK() ;
  ------------------
  105|       |
  106|  4.88M|    pu1_y = ps_tfr_cxt->pu1_mb_y;
  107|  4.88M|    pu1_u = ps_tfr_cxt->pu1_mb_u;
  108|  4.88M|    pu1_v = ps_tfr_cxt->pu1_mb_v;
  109|       |
  110|       |    /* LUMA values */
  111|       |    /* Deblock rounding change */
  112|  4.88M|    qp_avg =
  113|  4.88M|                    (UWORD8)((ps_cur_mb->u1_left_mb_qp + ps_cur_mb->u1_mb_qp + 1)
  114|  4.88M|                                    >> 1);
  115|       |
  116|  4.88M|    idx_a_y = qp_avg + ofst_a;
  117|  4.88M|    alpha_y = gau1_ih264d_alpha_table[12 + idx_a_y];
  118|  4.88M|    idx_b_y = qp_avg + ofst_b;
  119|  4.88M|    beta_y = gau1_ih264d_beta_table[12 + idx_b_y];
  120|       |
  121|       |    /* Chroma cb values */
  122|  4.88M|    {
  123|  4.88M|        WORD32 mb_qp1, mb_qp2;
  124|  4.88M|        mb_qp1 = (ps_cur_mb->u1_left_mb_qp + i1_cb_qp_idx_ofst);
  125|  4.88M|        mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cb_qp_idx_ofst);
  126|  4.88M|        qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + mb_qp1]
  127|  4.88M|                        + gau1_ih264d_qp_scale_cr[12 + mb_qp2] + 1) >> 1);
  128|  4.88M|    }
  129|  4.88M|    idx_a_u = qp_avg + ofst_a;
  130|  4.88M|    alpha_u = gau1_ih264d_alpha_table[12 + idx_a_u];
  131|  4.88M|    idx_b_u = qp_avg + ofst_b;
  132|  4.88M|    beta_u = gau1_ih264d_beta_table[12 + idx_b_u];
  133|       |    /* Chroma cr values */
  134|  4.88M|    {
  135|  4.88M|        WORD32 mb_qp1, mb_qp2;
  136|  4.88M|        mb_qp1 = (ps_cur_mb->u1_left_mb_qp + i1_cr_qp_idx_ofst);
  137|  4.88M|        mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cr_qp_idx_ofst);
  138|  4.88M|        qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + mb_qp1]
  139|  4.88M|                        + gau1_ih264d_qp_scale_cr[12 + mb_qp2] + 1) >> 1);
  140|  4.88M|    }
  141|  4.88M|    idx_a_v = qp_avg + ofst_a;
  142|  4.88M|    alpha_v = gau1_ih264d_alpha_table[12 + idx_a_v];
  143|  4.88M|    idx_b_v = qp_avg + ofst_b;
  144|  4.88M|    beta_v = gau1_ih264d_beta_table[12 + idx_b_v];
  145|       |
  146|  4.88M|    if(u1_double_cl == 0)
  ------------------
  |  Branch (146:8): [True: 3.83M, False: 1.04M]
  ------------------
  147|  3.83M|    {
  148|  3.83M|        u4_bs_val = pu4_bs_tab[4];
  149|       |
  150|  3.83M|        if(0x04040404 == u4_bs_val)
  ------------------
  |  Branch (150:12): [True: 75.6k, False: 3.75M]
  ------------------
  151|  75.6k|        {
  152|  75.6k|            ps_dec->pf_deblk_luma_vert_bs4(pu1_y, i4_strd_y, alpha_y, beta_y);
  153|  75.6k|            ps_dec->pf_deblk_chroma_vert_bs4(pu1_u, i4_strd_uv, alpha_u,
  154|  75.6k|                                             beta_u, alpha_v, beta_v);
  155|  75.6k|        }
  156|  3.75M|        else
  157|  3.75M|        {
  158|  3.75M|            if(u4_bs_val)
  ------------------
  |  Branch (158:16): [True: 141k, False: 3.61M]
  ------------------
  159|   141k|            {
  160|       |
  161|   141k|                pu1_cliptab_y = (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_y];
  162|   141k|                pu1_cliptab_u = (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_u];
  163|   141k|                pu1_cliptab_v = (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_v];
  164|   141k|                ps_dec->pf_deblk_luma_vert_bslt4(pu1_y, i4_strd_y, alpha_y,
  165|   141k|                                                 beta_y, u4_bs_val,
  166|   141k|                                                 pu1_cliptab_y);
  167|   141k|                ps_dec->pf_deblk_chroma_vert_bslt4(pu1_u, i4_strd_uv, alpha_u,
  168|   141k|                                                   beta_u, alpha_v, beta_v,
  169|   141k|                                                   u4_bs_val, pu1_cliptab_u,
  170|   141k|                                                   pu1_cliptab_v);
  171|       |
  172|   141k|            }
  173|  3.75M|        }
  174|       |
  175|  3.83M|    }
  176|  1.04M|    else
  177|  1.04M|    {
  178|       |
  179|  1.04M|        i4_strd_y <<= (!u1_cur_fld);
  180|  1.04M|        u4_bs_val = pu4_bs_tab[4];
  181|  1.04M|        i4_strd_uv <<= (!u1_cur_fld);
  182|       |
  183|  1.04M|        if(0x04040404 == u4_bs_val)
  ------------------
  |  Branch (183:12): [True: 0, False: 1.04M]
  ------------------
  184|      0|        {
  185|       |
  186|      0|            ps_dec->pf_deblk_luma_vert_bs4_mbaff(pu1_y, i4_strd_y, alpha_y,
  187|      0|                                                 beta_y);
  188|      0|            ps_dec->pf_deblk_chroma_vert_bs4_mbaff(pu1_u, i4_strd_uv, alpha_u,
  189|      0|                                                   beta_u, alpha_v, beta_v);
  190|       |
  191|      0|        }
  192|  1.04M|        else
  193|  1.04M|        {
  194|  1.04M|            if(u4_bs_val)
  ------------------
  |  Branch (194:16): [True: 0, False: 1.04M]
  ------------------
  195|      0|            {
  196|       |
  197|      0|                pu1_cliptab_y = (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_y];
  198|      0|                pu1_cliptab_u = (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_u];
  199|      0|                pu1_cliptab_v = (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_v];
  200|       |
  201|      0|                ps_dec->pf_deblk_luma_vert_bslt4_mbaff(pu1_y, i4_strd_y,
  202|      0|                                                       alpha_y, beta_y,
  203|      0|                                                       u4_bs_val,
  204|      0|                                                       pu1_cliptab_y);
  205|      0|                ps_dec->pf_deblk_chroma_vert_bslt4_mbaff(pu1_u, i4_strd_uv,
  206|      0|                                                         alpha_u, beta_u,
  207|      0|                                                         alpha_v, beta_v,
  208|      0|                                                         u4_bs_val,
  209|      0|                                                         pu1_cliptab_u,
  210|      0|                                                         pu1_cliptab_v);
  211|      0|            }
  212|  1.04M|        }
  213|       |
  214|  1.04M|        {
  215|       |
  216|  1.04M|            UWORD16 u2_shift = (i4_strd_y >> 1) << (u1_cur_fld ? 4 : 0);
  ------------------
  |  Branch (216:53): [True: 0, False: 1.04M]
  ------------------
  217|  1.04M|            pu1_y += u2_shift;
  218|  1.04M|            u2_shift = (i4_strd_uv >> 1) << (u1_cur_fld ? 3 : 0);
  ------------------
  |  Branch (218:46): [True: 0, False: 1.04M]
  ------------------
  219|  1.04M|            pu1_u += u2_shift;
  220|  1.04M|            pu1_v += u2_shift;
  221|  1.04M|        }
  222|       |
  223|  1.04M|        qp_avg = (((ps_left_mb + 1)->u1_mb_qp + ps_cur_mb->u1_mb_qp + 1) >> 1);
  224|       |
  225|  1.04M|        idx_a_y = qp_avg + ofst_a;
  226|  1.04M|        alpha_y = gau1_ih264d_alpha_table[12 + idx_a_y];
  227|  1.04M|        idx_b_y = qp_avg + ofst_b;
  228|  1.04M|        beta_y = gau1_ih264d_beta_table[12 + idx_b_y];
  229|  1.04M|        u4_bs_val = pu4_bs_tab[9];
  230|       |
  231|  1.04M|        {
  232|  1.04M|            WORD32 mb_qp1, mb_qp2;
  233|  1.04M|            mb_qp1 = ((ps_left_mb + 1)->u1_mb_qp + i1_cb_qp_idx_ofst);
  234|  1.04M|            mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cb_qp_idx_ofst);
  235|  1.04M|            qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + mb_qp1]
  236|  1.04M|                            + gau1_ih264d_qp_scale_cr[12 + mb_qp2] + 1) >> 1);
  237|  1.04M|        }
  238|  1.04M|        idx_a_u = qp_avg + ofst_a;
  239|  1.04M|        alpha_u = gau1_ih264d_alpha_table[12 + idx_a_u];
  240|  1.04M|        idx_b_u = qp_avg + ofst_b;
  241|  1.04M|        beta_u = gau1_ih264d_beta_table[12 + idx_b_u];
  242|  1.04M|        u4_bs_val = pu4_bs_tab[9];
  243|  1.04M|        {
  244|  1.04M|            WORD32 mb_qp1, mb_qp2;
  245|  1.04M|            mb_qp1 = ((ps_left_mb + 1)->u1_mb_qp + i1_cr_qp_idx_ofst);
  246|  1.04M|            mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cr_qp_idx_ofst);
  247|  1.04M|            qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + mb_qp1]
  248|  1.04M|                            + gau1_ih264d_qp_scale_cr[12 + mb_qp2] + 1) >> 1);
  249|  1.04M|        }
  250|  1.04M|        idx_a_v = qp_avg + ofst_a;
  251|  1.04M|        alpha_v = gau1_ih264d_alpha_table[12 + idx_a_v];
  252|  1.04M|        idx_b_v = qp_avg + ofst_b;
  253|  1.04M|        beta_v = gau1_ih264d_beta_table[12 + idx_b_v];
  254|       |
  255|  1.04M|        if(0x04040404 == u4_bs_val)
  ------------------
  |  Branch (255:12): [True: 0, False: 1.04M]
  ------------------
  256|      0|        {
  257|      0|            ps_dec->pf_deblk_luma_vert_bs4_mbaff(pu1_y, i4_strd_y, alpha_y,
  258|      0|                                                 beta_y);
  259|      0|            ps_dec->pf_deblk_chroma_vert_bs4_mbaff(pu1_u, i4_strd_uv, alpha_u,
  260|      0|                                                   beta_u, alpha_v, beta_v);
  261|       |
  262|      0|        }
  263|  1.04M|        else
  264|  1.04M|        {
  265|  1.04M|            if(u4_bs_val)
  ------------------
  |  Branch (265:16): [True: 0, False: 1.04M]
  ------------------
  266|      0|            {
  267|       |
  268|      0|                pu1_cliptab_y = (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_y];
  269|      0|                pu1_cliptab_u = (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_u];
  270|      0|                pu1_cliptab_v = (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_v];
  271|       |
  272|      0|                ps_dec->pf_deblk_luma_vert_bslt4_mbaff(pu1_y, i4_strd_y,
  273|      0|                                                       alpha_y, beta_y,
  274|      0|                                                       u4_bs_val,
  275|      0|                                                       pu1_cliptab_y);
  276|      0|                ps_dec->pf_deblk_chroma_vert_bslt4_mbaff(pu1_u, i4_strd_uv,
  277|      0|                                                         alpha_u, beta_u,
  278|      0|                                                         alpha_v, beta_v,
  279|      0|                                                         u4_bs_val,
  280|      0|                                                         pu1_cliptab_u,
  281|      0|                                                         pu1_cliptab_v);
  282|       |
  283|      0|            }
  284|  1.04M|        }
  285|  1.04M|    }
  286|       |
  287|  4.88M|}
ih264d_filter_boundary_top_nonmbaff:
  310|  5.52M|{
  311|  5.52M|    UWORD8 *pu1_y, *pu1_u;
  312|  5.52M|    WORD32 alpha_u = 0, beta_u = 0, alpha_v = 0, beta_v = 0;
  313|  5.52M|    WORD32 alpha_y = 0, beta_y = 0;
  314|  5.52M|    WORD32 qp_avg;
  315|  5.52M|    WORD32 idx_b_u, idx_a_u, idx_b_v, idx_a_v;
  316|  5.52M|    WORD32 idx_b_y, idx_a_y;
  317|  5.52M|    UWORD16 uc_tmp;
  318|       |
  319|  5.52M|    UWORD8 *pu1_cliptab_u, *pu1_cliptab_v, *pu1_cliptab_y;
  320|  5.52M|    WORD32 ofst_a = ps_cur_mb->i1_slice_alpha_c0_offset;
  321|  5.52M|    WORD32 ofst_b = ps_cur_mb->i1_slice_beta_offset;
  322|       |
  323|  5.52M|    UNUSED(ps_top_mb);
  ------------------
  |  |   45|  5.52M|#define UNUSED(x) ((void)(x))
  ------------------
  324|       |    /* LUMA values */
  325|       |    /* Deblock rounding change */
  326|  5.52M|    uc_tmp = ((ps_cur_mb->u1_topmb_qp + ps_cur_mb->u1_mb_qp + 1) >> 1);
  327|  5.52M|    qp_avg = (UWORD8)uc_tmp;
  328|  5.52M|    idx_a_y = qp_avg + ofst_a;
  329|  5.52M|    alpha_y = gau1_ih264d_alpha_table[12 + idx_a_y];
  330|  5.52M|    idx_b_y = qp_avg + ofst_b;
  331|  5.52M|    beta_y = gau1_ih264d_beta_table[12 + idx_b_y];
  332|  5.52M|    pu1_y = ps_tfr_cxt->pu1_mb_y;
  333|       |
  334|       |    /* CHROMA cb values */
  335|  5.52M|    {
  336|  5.52M|        WORD32 mb_qp1, mb_qp2;
  337|  5.52M|        mb_qp1 = (ps_cur_mb->u1_topmb_qp + i1_cb_qp_idx_ofst);
  338|  5.52M|        mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cb_qp_idx_ofst);
  339|  5.52M|        qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + mb_qp1]
  340|  5.52M|                        + gau1_ih264d_qp_scale_cr[12 + mb_qp2] + 1) >> 1);
  341|  5.52M|    }
  342|       |
  343|  5.52M|    idx_a_u = qp_avg + ofst_a;
  344|  5.52M|    alpha_u = gau1_ih264d_alpha_table[12 + idx_a_u];
  345|  5.52M|    idx_b_u = qp_avg + ofst_b;
  346|  5.52M|    beta_u = gau1_ih264d_beta_table[12 + idx_b_u];
  347|       |    /* CHROMA cr values */
  348|  5.52M|    {
  349|  5.52M|        WORD32 mb_qp1, mb_qp2;
  350|  5.52M|        mb_qp1 = (ps_cur_mb->u1_topmb_qp + i1_cr_qp_idx_ofst);
  351|  5.52M|        mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cr_qp_idx_ofst);
  352|  5.52M|        qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + mb_qp1]
  353|  5.52M|                        + gau1_ih264d_qp_scale_cr[12 + mb_qp2] + 1) >> 1);
  354|  5.52M|    }
  355|       |
  356|  5.52M|    idx_a_v = qp_avg + ofst_a;
  357|  5.52M|    alpha_v = gau1_ih264d_alpha_table[12 + idx_a_v];
  358|  5.52M|    idx_b_v = qp_avg + ofst_b;
  359|  5.52M|    beta_v = gau1_ih264d_beta_table[12 + idx_b_v];
  360|  5.52M|    pu1_u = ps_tfr_cxt->pu1_mb_u;
  361|       |
  362|  5.52M|    if(u4_bs == 0x04040404)
  ------------------
  |  Branch (362:8): [True: 85.3k, False: 5.44M]
  ------------------
  363|  85.3k|    {
  364|       |        /* Code specific to the assembly module */
  365|       |
  366|  85.3k|        ps_dec->pf_deblk_luma_horz_bs4(pu1_y, i4_strd_y, alpha_y, beta_y);
  367|  85.3k|        ps_dec->pf_deblk_chroma_horz_bs4(pu1_u, i4_strd_uv, alpha_u, beta_u,
  368|  85.3k|                                         alpha_v, beta_v);
  369|  85.3k|    }
  370|  5.44M|    else
  371|  5.44M|    {
  372|  5.44M|        if(u4_bs)
  ------------------
  |  Branch (372:12): [True: 188k, False: 5.25M]
  ------------------
  373|   188k|        {
  374|       |
  375|   188k|            pu1_cliptab_y = (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_y];
  376|   188k|            pu1_cliptab_u =
  377|   188k|                            (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_u];
  378|   188k|            pu1_cliptab_v =
  379|   188k|                            (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_v];
  380|       |
  381|   188k|            ps_dec->pf_deblk_luma_horz_bslt4(pu1_y, i4_strd_y, alpha_y, beta_y,
  382|   188k|                                             u4_bs, pu1_cliptab_y);
  383|   188k|            ps_dec->pf_deblk_chroma_horz_bslt4(pu1_u, i4_strd_uv, alpha_u,
  384|   188k|                                               beta_u, alpha_v, beta_v,
  385|   188k|                                               u4_bs, pu1_cliptab_u,
  386|   188k|                                               pu1_cliptab_v);
  387|       |
  388|   188k|        }
  389|  5.44M|    }
  390|       |
  391|  5.52M|}
ih264d_deblock_mb_nonmbaff:
  399|  7.17M|{
  400|  7.17M|    UWORD8 *pu1_y, *pu1_u;
  401|  7.17M|    UWORD32 u4_bs;
  402|       |
  403|  7.17M|    WORD32 alpha, beta, alpha_u, beta_u, alpha_v, beta_v;
  404|       |
  405|  7.17M|    UWORD8 *pu1_cliptab_u;
  406|  7.17M|    UWORD8 *pu1_cliptab_v;
  407|  7.17M|    UWORD8 *pu1_cliptab_y;
  408|       |
  409|  7.17M|    UWORD32 * pu4_bs_tab;
  410|  7.17M|    WORD32 idx_a_y, idx_a_u, idx_a_v;
  411|  7.17M|    UWORD32 u4_deb_mode, u4_mbs_next;
  412|  7.17M|    UWORD32 u4_image_wd_mb;
  413|  7.17M|    deblk_mb_t *ps_top_mb,*ps_left_mb,*ps_cur_mb;
  414|       |
  415|  7.17M|    PROFILE_DISABLE_DEBLK()
  ------------------
  |  |   91|  7.17M|#define PROFILE_DISABLE_DEBLK() ;
  ------------------
  416|       |    /* Return from here to switch off deblocking */
  417|       |
  418|  7.17M|    u4_image_wd_mb = ps_dec->u2_frm_wd_in_mbs;
  419|       |
  420|  7.17M|    ps_cur_mb = ps_dec->ps_cur_deblk_mb;
  421|  7.17M|    pu4_bs_tab = ps_cur_mb->u4_bs_table;
  422|  7.17M|    u4_deb_mode = ps_cur_mb->u1_deblocking_mode;
  423|  7.17M|     if(!(u4_deb_mode & MB_DISABLE_FILTERING))
  ------------------
  |  |   70|  7.17M|#define MB_DISABLE_FILTERING          0x01
  ------------------
  |  Branch (423:9): [True: 5.84M, False: 1.32M]
  ------------------
  424|  5.84M|     {
  425|       |
  426|  5.84M|         if(ps_dec->u4_deblk_mb_x)
  ------------------
  |  Branch (426:13): [True: 4.88M, False: 966k]
  ------------------
  427|  4.88M|         {
  428|  4.88M|             ps_left_mb = ps_cur_mb - 1;
  429|       |
  430|  4.88M|         }
  431|   966k|         else
  432|   966k|         {
  433|   966k|             ps_left_mb = NULL;
  434|       |
  435|   966k|         }
  436|  5.84M|         if(ps_dec->u4_deblk_mb_y != 0)
  ------------------
  |  Branch (436:13): [True: 5.53M, False: 310k]
  ------------------
  437|  5.53M|         {
  438|  5.53M|             ps_top_mb = ps_cur_mb - (u4_image_wd_mb);
  439|  5.53M|         }
  440|   310k|         else
  441|   310k|         {
  442|   310k|             ps_top_mb = NULL;
  443|   310k|         }
  444|       |
  445|  5.84M|         if(u4_deb_mode & MB_DISABLE_LEFT_EDGE)
  ------------------
  |  |   72|  5.84M|#define MB_DISABLE_LEFT_EDGE          0x04
  ------------------
  |  Branch (445:13): [True: 95.3k, False: 5.75M]
  ------------------
  446|  95.3k|             ps_left_mb = NULL;
  447|  5.84M|         if(u4_deb_mode & MB_DISABLE_TOP_EDGE)
  ------------------
  |  |   71|  5.84M|#define MB_DISABLE_TOP_EDGE           0x02
  ------------------
  |  Branch (447:13): [True: 37.5k, False: 5.81M]
  ------------------
  448|  37.5k|             ps_top_mb = NULL;
  449|       |
  450|       |        /*---------------------------------------------------------------------*/
  451|       |        /* Filter wrt Left edge                                                */
  452|       |        /* except                                                              */
  453|       |        /*      - Left Egde is Picture Boundary                                */
  454|       |        /*      - Left Egde is part of Slice Boundary and Deblocking           */
  455|       |        /*        parameters of slice disable Filtering of Slice Boundary Edges*/
  456|       |        /*---------------------------------------------------------------------*/
  457|  5.84M|        if(ps_left_mb)
  ------------------
  |  Branch (457:12): [True: 4.88M, False: 966k]
  ------------------
  458|  4.88M|            ih264d_filter_boundary_left_nonmbaff(ps_dec, ps_tfr_cxt,
  459|  4.88M|                                                 i1_cb_qp_idx_ofst,
  460|  4.88M|                                                 i1_cr_qp_idx_ofst, ps_cur_mb,
  461|  4.88M|                                                 i4_strd_y, i4_strd_uv, ps_left_mb,
  462|  4.88M|                                                 pu4_bs_tab, 0);
  463|       |
  464|       |        /*--------------------------------------------------------------------*/
  465|       |        /* Filter wrt Other Vertical Edges                                    */
  466|       |        /*--------------------------------------------------------------------*/
  467|  5.84M|        {
  468|  5.84M|            WORD32 ofst_a, ofst_b, idx_b_y, idx_b_u,
  469|  5.84M|                            idx_b_v;
  470|  5.84M|            WORD32 qp_avg, qp_avg_u, qp_avg_v;
  471|  5.84M|            ofst_a = ps_cur_mb->i1_slice_alpha_c0_offset;
  472|  5.84M|            ofst_b = ps_cur_mb->i1_slice_beta_offset;
  473|       |
  474|  5.84M|            qp_avg = ps_cur_mb->u1_mb_qp;
  475|       |
  476|  5.84M|            idx_a_y = qp_avg + ofst_a;
  477|  5.84M|            alpha = gau1_ih264d_alpha_table[12 + idx_a_y];
  478|  5.84M|            idx_b_y = qp_avg + ofst_b;
  479|  5.84M|            beta = gau1_ih264d_beta_table[12 + idx_b_y];
  480|       |
  481|       |            /* CHROMA values */
  482|       |            /* CHROMA Cb values */
  483|  5.84M|            qp_avg_u = (qp_avg + i1_cb_qp_idx_ofst);
  484|  5.84M|            qp_avg_u = gau1_ih264d_qp_scale_cr[12 + qp_avg_u];
  485|  5.84M|            idx_a_u = qp_avg_u + ofst_a;
  486|  5.84M|            alpha_u = gau1_ih264d_alpha_table[12 + idx_a_u];
  487|  5.84M|            idx_b_u = qp_avg_u + ofst_b;
  488|  5.84M|            beta_u = gau1_ih264d_beta_table[12 + idx_b_u];
  489|       |            /* CHROMA Cr values */
  490|  5.84M|            qp_avg_v = (qp_avg + i1_cr_qp_idx_ofst);
  491|  5.84M|            qp_avg_v = gau1_ih264d_qp_scale_cr[12 + qp_avg_v];
  492|  5.84M|            idx_a_v = qp_avg_v + ofst_a;
  493|  5.84M|            alpha_v = gau1_ih264d_alpha_table[12 + idx_a_v];
  494|  5.84M|            idx_b_v = qp_avg_v + ofst_b;
  495|  5.84M|            beta_v = gau1_ih264d_beta_table[12 + idx_b_v];
  496|  5.84M|        }
  497|       |
  498|  5.84M|        pu1_cliptab_y = (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_y]; //this for Luma
  499|  5.84M|        pu1_cliptab_u = (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_u]; //this for chroma
  500|  5.84M|        pu1_cliptab_v = (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_v]; //this for chroma
  501|       |
  502|       |        //edge=1
  503|       |
  504|       |
  505|  5.84M|        u4_bs = pu4_bs_tab[5];
  506|  5.84M|        pu1_y = ps_tfr_cxt->pu1_mb_y;
  507|  5.84M|        pu1_u = ps_tfr_cxt->pu1_mb_u;
  508|       |
  509|  5.84M|        if(u4_bs)
  ------------------
  |  Branch (509:12): [True: 112k, False: 5.73M]
  ------------------
  510|   112k|        {
  511|       |
  512|   112k|            ps_dec->pf_deblk_luma_vert_bslt4(pu1_y + 4, i4_strd_y, alpha, beta,
  513|   112k|                                             u4_bs, pu1_cliptab_y);
  514|       |
  515|   112k|        }
  516|       |        //edge=2
  517|       |
  518|  5.84M|        u4_bs = pu4_bs_tab[6];
  519|  5.84M|        if(u4_bs)
  ------------------
  |  Branch (519:12): [True: 182k, False: 5.66M]
  ------------------
  520|   182k|        {
  521|   182k|            ps_dec->pf_deblk_luma_vert_bslt4(pu1_y + 8, i4_strd_y, alpha, beta,
  522|   182k|                                             u4_bs, pu1_cliptab_y);
  523|   182k|            ps_dec->pf_deblk_chroma_vert_bslt4(pu1_u + 4 * YUV420SP_FACTOR,
  ------------------
  |  |  119|   182k|#define YUV420SP_FACTOR 2
  ------------------
  524|   182k|                                               i4_strd_uv, alpha_u, beta_u,
  525|   182k|                                               alpha_v, beta_v, u4_bs,
  526|   182k|                                               pu1_cliptab_u, pu1_cliptab_v);
  527|       |
  528|   182k|        }
  529|       |        //edge=3
  530|       |
  531|  5.84M|        u4_bs = pu4_bs_tab[7];
  532|  5.84M|        if(u4_bs)
  ------------------
  |  Branch (532:12): [True: 102k, False: 5.74M]
  ------------------
  533|   102k|        {
  534|   102k|            ps_dec->pf_deblk_luma_vert_bslt4(pu1_y + 12, i4_strd_y, alpha, beta,
  535|   102k|                                             u4_bs, pu1_cliptab_y);
  536|       |
  537|   102k|        }
  538|       |
  539|       |        /*--------------------------------------------------------------------*/
  540|       |        /* Filter wrt Top edge                                                */
  541|       |        /* except                                                             */
  542|       |        /*      - Top Egde is Picture Boundary                                */
  543|       |        /*      - Top Egde is part of Slice Boundary and Deblocking           */
  544|       |        /*        parameters of slice disable Filtering of Slice Boundary Edges*/
  545|       |        /*--------------------------------------------------------------------*/
  546|  5.84M|        if(ps_top_mb)
  ------------------
  |  Branch (546:12): [True: 5.52M, False: 319k]
  ------------------
  547|  5.52M|        {
  548|       |            /** if top MB and MB AFF and cur MB is frame and top is field then  */
  549|       |            /*  one extra top edge needs to be deblocked                        */
  550|       |
  551|  5.52M|            ih264d_filter_boundary_top_nonmbaff(ps_dec, ps_tfr_cxt,
  552|  5.52M|                                                i1_cb_qp_idx_ofst,
  553|  5.52M|                                                i1_cr_qp_idx_ofst, ps_cur_mb,
  554|  5.52M|                                                i4_strd_y, i4_strd_uv, ps_top_mb,
  555|  5.52M|                                                pu4_bs_tab[0]);
  556|       |
  557|  5.52M|        }
  558|       |
  559|       |        /*--------------------------------------------------------------------*/
  560|       |        /* Filter wrt Other Horizontal Edges                                  */
  561|       |        /*--------------------------------------------------------------------*/
  562|       |
  563|       |        //edge1
  564|  5.84M|        u4_bs = pu4_bs_tab[1];
  565|       |
  566|  5.84M|        if(u4_bs)
  ------------------
  |  Branch (566:12): [True: 108k, False: 5.74M]
  ------------------
  567|   108k|        {
  568|   108k|            ps_dec->pf_deblk_luma_horz_bslt4(pu1_y + (i4_strd_y << 2), i4_strd_y,
  569|   108k|                                             alpha, beta, u4_bs, pu1_cliptab_y);
  570|       |
  571|   108k|        }
  572|       |        //edge2
  573|  5.84M|        u4_bs = pu4_bs_tab[2];
  574|       |
  575|  5.84M|        if(u4_bs)
  ------------------
  |  Branch (575:12): [True: 192k, False: 5.65M]
  ------------------
  576|   192k|        {
  577|       |
  578|   192k|            ps_dec->pf_deblk_luma_horz_bslt4(pu1_y + (i4_strd_y << 3), i4_strd_y,
  579|   192k|                                             alpha, beta, u4_bs, pu1_cliptab_y);
  580|   192k|            ps_dec->pf_deblk_chroma_horz_bslt4(pu1_u + (i4_strd_uv << 2),
  581|   192k|                                               i4_strd_uv, alpha_u, beta_u,
  582|   192k|                                               alpha_v, beta_v, u4_bs,
  583|   192k|                                               pu1_cliptab_u, pu1_cliptab_v);
  584|       |
  585|   192k|        }
  586|       |        //edge3
  587|  5.84M|        u4_bs = pu4_bs_tab[3];
  588|  5.84M|        if(u4_bs)
  ------------------
  |  Branch (588:12): [True: 103k, False: 5.74M]
  ------------------
  589|   103k|        {
  590|   103k|            ps_dec->pf_deblk_luma_horz_bslt4(
  591|   103k|                            (pu1_y + (i4_strd_y << 3) + (i4_strd_y << 2)),
  592|   103k|                            i4_strd_y, alpha, beta, u4_bs, pu1_cliptab_y);
  593|       |
  594|   103k|        }
  595|  5.84M|     }
  596|       |
  597|  7.17M|     ps_dec->u4_deblk_mb_x++;
  598|  7.17M|     ps_dec->ps_cur_deblk_mb++;
  599|  7.17M|     ps_dec->u4_cur_deblk_mb_num++;
  600|  7.17M|     u4_mbs_next = u4_image_wd_mb - ps_dec->u4_deblk_mb_x;
  601|       |
  602|  7.17M|     ps_tfr_cxt->pu1_mb_y += 16;
  603|  7.17M|     ps_tfr_cxt->pu1_mb_u += 8 * YUV420SP_FACTOR;
  ------------------
  |  |  119|  7.17M|#define YUV420SP_FACTOR 2
  ------------------
  604|  7.17M|     ps_tfr_cxt->pu1_mb_v += 8;
  605|       |
  606|  7.17M|     if(!u4_mbs_next)
  ------------------
  |  Branch (606:9): [True: 1.23M, False: 5.93M]
  ------------------
  607|  1.23M|     {
  608|  1.23M|         ps_tfr_cxt->pu1_mb_y += ps_tfr_cxt->u4_y_inc;
  609|  1.23M|         ps_tfr_cxt->pu1_mb_u += ps_tfr_cxt->u4_uv_inc;
  610|  1.23M|         ps_tfr_cxt->pu1_mb_v += ps_tfr_cxt->u4_uv_inc;
  611|  1.23M|         ps_dec->u4_deblk_mb_y++;
  612|  1.23M|         ps_dec->u4_deblk_mb_x = 0;
  613|  1.23M|     }
  614|       |
  615|  7.17M|}
ih264d_init_deblk_tfr_ctxt:
  634|   266k|{
  635|       |
  636|   266k|    UWORD32 i4_wd_y;
  637|   266k|    UWORD32 i4_wd_uv;
  638|   266k|    UWORD8 u1_field_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag; /*< Field u4_flag  */
  639|   266k|    UNUSED(u2_image_wd_mb);
  ------------------
  |  |   45|   266k|#define UNUSED(x) ((void)(x))
  ------------------
  640|   266k|    ps_tfr_cxt->pu1_src_y = ps_dec->s_cur_pic.pu1_buf1 - 4;
  641|   266k|    ps_tfr_cxt->pu1_src_u = ps_dec->s_cur_pic.pu1_buf2 - 4;
  642|   266k|    ps_tfr_cxt->pu1_src_v = ps_dec->s_cur_pic.pu1_buf3 - 4;
  643|   266k|    ps_tfr_cxt->pu1_dest_y = ps_tfr_cxt->pu1_src_y;
  644|   266k|    ps_tfr_cxt->pu1_dest_u = ps_tfr_cxt->pu1_src_u;
  645|   266k|    ps_tfr_cxt->pu1_dest_v = ps_tfr_cxt->pu1_src_v;
  646|       |
  647|   266k|    ps_tfr_cxt->pu1_mb_y = ps_tfr_cxt->pu1_src_y + 4;
  648|   266k|    ps_tfr_cxt->pu1_mb_u = ps_tfr_cxt->pu1_src_u + 4;
  649|   266k|    ps_tfr_cxt->pu1_mb_v = ps_tfr_cxt->pu1_src_v + 4;
  650|       |
  651|   266k|    i4_wd_y = ps_dec->u2_frm_wd_y << u1_field_pic_flag;
  652|   266k|    i4_wd_uv = ps_dec->u2_frm_wd_uv << u1_field_pic_flag;
  653|   266k|    ps_tfr_cxt->u4_y_inc = ((i4_wd_y << u1_mbaff) * 16
  654|   266k|                    - (ps_dec->u2_frm_wd_in_mbs << 4));
  655|       |
  656|   266k|    ps_tfr_cxt->u4_uv_inc = (i4_wd_uv << u1_mbaff) * 8
  657|   266k|                    - (ps_dec->u2_frm_wd_in_mbs << 4);
  658|       |
  659|       |    /* padding related initialisations */
  660|   266k|    if(ps_dec->ps_cur_slice->u1_nal_ref_idc)
  ------------------
  |  Branch (660:8): [True: 231k, False: 35.1k]
  ------------------
  661|   231k|    {
  662|   231k|        ps_pad_mgr->u1_vert_pad_top = !(ps_dec->ps_cur_slice->u1_field_pic_flag
  ------------------
  |  Branch (662:41): [True: 0, False: 231k]
  ------------------
  663|      0|                        && ps_dec->ps_cur_slice->u1_bottom_field_flag);
  ------------------
  |  Branch (663:28): [True: 0, False: 0]
  ------------------
  664|   231k|        ps_pad_mgr->u1_vert_pad_bot =
  665|   231k|                        ((!ps_dec->ps_cur_slice->u1_field_pic_flag)
  ------------------
  |  Branch (665:26): [True: 231k, False: 0]
  ------------------
  666|      0|                                        || ps_dec->ps_cur_slice->u1_bottom_field_flag);
  ------------------
  |  Branch (666:44): [True: 0, False: 0]
  ------------------
  667|   231k|        ps_pad_mgr->u1_horz_pad = 1;
  668|   231k|    }
  669|  35.1k|    else
  670|  35.1k|    {
  671|  35.1k|        ps_pad_mgr->u1_horz_pad = 0;
  672|  35.1k|        ps_pad_mgr->u1_vert_pad_top = 0;
  673|  35.1k|        ps_pad_mgr->u1_vert_pad_bot = 0;
  674|  35.1k|    }
  675|   266k|}
ih264d_deblock_picture_progressive:
 1103|   126k|{
 1104|   126k|    deblk_mb_t *ps_cur_mb;
 1105|       |
 1106|   126k|    UWORD8 u1_vert_pad_top = 1;
 1107|   126k|    UWORD8 u1_mbs_next;
 1108|   126k|    UWORD8 u1_deb_mode;
 1109|   126k|    WORD32 i4_wd_y, i4_wd_uv;
 1110|       |
 1111|       |
 1112|       |    /**************************************************/
 1113|       |    /* one time loads from ps_dec which will be used  */
 1114|       |    /* frequently throughout the deblocking procedure */
 1115|       |    /**************************************************/
 1116|   126k|    pad_mgr_t * ps_pad_mgr = &ps_dec->s_pad_mgr;
 1117|       |
 1118|   126k|    tfr_ctxt_t s_tfr_ctxt;
 1119|   126k|    tfr_ctxt_t * ps_tfr_cxt = &s_tfr_ctxt; // = &ps_dec->s_tran_addrecon;
 1120|   126k|    UWORD16 u2_image_wd_mb = ps_dec->u2_frm_wd_in_mbs;
 1121|   126k|    UWORD16 u2_image_ht_mb = ps_dec->u2_frm_ht_in_mbs;
 1122|   126k|    UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
 1123|       |
 1124|   126k|    WORD8 i1_cb_qp_idx_ofst = ps_dec->ps_cur_pps->i1_chroma_qp_index_offset;
 1125|   126k|    WORD8 i1_cr_qp_idx_ofst =
 1126|   126k|                    ps_dec->ps_cur_pps->i1_second_chroma_qp_index_offset;
 1127|       |
 1128|       |    /* Set up Parameter for  deblocking */
 1129|   126k|    ih264d_init_deblk_tfr_ctxt(ps_dec, ps_pad_mgr, ps_tfr_cxt, u2_image_wd_mb,
 1130|   126k|                               0);
 1131|       |
 1132|       |    /* Pic level Initialisations */
 1133|       |
 1134|   126k|    i4_wd_y = ps_dec->u2_frm_wd_y;
 1135|   126k|    i4_wd_uv = ps_dec->u2_frm_wd_uv;
 1136|       |    /* Initial filling of the buffers with deblocking data */
 1137|   126k|    ps_cur_mb = ps_dec->ps_deblk_pic;
 1138|       |
 1139|   126k|    if(ps_dec->u4_app_disable_deblk_frm == 0)
  ------------------
  |  Branch (1139:8): [True: 126k, False: 0]
  ------------------
 1140|   126k|    {
 1141|   126k|        if(ps_dec->ps_cur_sps->u1_mb_aff_flag == 1)
  ------------------
  |  Branch (1141:12): [True: 0, False: 126k]
  ------------------
 1142|      0|        {
 1143|      0|            while( ps_dec->u4_deblk_mb_y < u2_image_ht_mb)
  ------------------
  |  Branch (1143:20): [True: 0, False: 0]
  ------------------
 1144|      0|            {
 1145|      0|                ih264d_deblock_mb_nonmbaff(ps_dec, ps_tfr_cxt,
 1146|      0|                                           i1_cb_qp_idx_ofst,
 1147|      0|                                           i1_cr_qp_idx_ofst,
 1148|      0|                                           i4_wd_y, i4_wd_uv);
 1149|      0|                ps_cur_mb++;
 1150|      0|            }
 1151|      0|        }
 1152|       |
 1153|   126k|    }
 1154|       |
 1155|       |    //Padd the Picture
 1156|       |    //Horizontal Padd
 1157|   126k|    if(ps_pad_mgr->u1_horz_pad)
  ------------------
  |  Branch (1157:8): [True: 109k, False: 16.6k]
  ------------------
 1158|   109k|    {
 1159|   109k|        UWORD32 u1_field_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag;
 1160|   109k|        ps_dec->pf_pad_left_luma(ps_tfr_cxt->pu1_src_y + 4,
 1161|   109k|                                 ps_dec->u2_frm_wd_y << u1_field_pic_flag,
 1162|   109k|                                 ps_dec->u2_pic_ht >> u1_field_pic_flag,
 1163|   109k|                                 PAD_LEN_Y_H);
  ------------------
  |  |  571|   109k|#define PAD_LEN_Y_H                   32
  ------------------
 1164|   109k|        ps_dec->pf_pad_right_luma(
 1165|   109k|                        ps_tfr_cxt->pu1_src_y + 4
 1166|   109k|                                        + (ps_dec->u2_frm_wd_in_mbs << 4),
 1167|   109k|                        ps_dec->u2_frm_wd_y << u1_field_pic_flag,
 1168|   109k|                        ps_dec->u2_pic_ht >> u1_field_pic_flag, PAD_LEN_Y_H);
  ------------------
  |  |  571|   109k|#define PAD_LEN_Y_H                   32
  ------------------
 1169|       |
 1170|   109k|        ps_dec->pf_pad_left_chroma(ps_tfr_cxt->pu1_src_u + 4,
 1171|   109k|                                   ps_dec->u2_frm_wd_uv << u1_field_pic_flag,
 1172|   109k|                                   (ps_dec->u2_pic_ht / 2) >> u1_field_pic_flag,
 1173|   109k|                                   PAD_LEN_UV_H * YUV420SP_FACTOR);
  ------------------
  |  |  573|   109k|#define PAD_LEN_UV_H                  16
  ------------------
                                                 PAD_LEN_UV_H * YUV420SP_FACTOR);
  ------------------
  |  |  119|   109k|#define YUV420SP_FACTOR 2
  ------------------
 1174|   109k|        ps_dec->pf_pad_right_chroma(
 1175|   109k|                        ps_tfr_cxt->pu1_src_u + 4
 1176|   109k|                                        + (ps_dec->u2_frm_wd_in_mbs << 4),
 1177|   109k|                        ps_dec->u2_frm_wd_uv << u1_field_pic_flag,
 1178|   109k|                        (ps_dec->u2_pic_ht / 2) >> u1_field_pic_flag,
 1179|   109k|                        PAD_LEN_UV_H * YUV420SP_FACTOR);
  ------------------
  |  |  573|   109k|#define PAD_LEN_UV_H                  16
  ------------------
                                      PAD_LEN_UV_H * YUV420SP_FACTOR);
  ------------------
  |  |  119|   109k|#define YUV420SP_FACTOR 2
  ------------------
 1180|       |
 1181|   109k|    }
 1182|       |
 1183|       |//Vertical Padd Top
 1184|   126k|    if(ps_pad_mgr->u1_vert_pad_top)
  ------------------
  |  Branch (1184:8): [True: 109k, False: 16.6k]
  ------------------
 1185|   109k|    {
 1186|   109k|        ps_dec->pf_pad_top(ps_dec->ps_cur_pic->pu1_buf1 - PAD_LEN_Y_H,
  ------------------
  |  |  571|   109k|#define PAD_LEN_Y_H                   32
  ------------------
 1187|   109k|                           ps_dec->u2_frm_wd_y, ps_dec->u2_frm_wd_y,
 1188|   109k|                           ps_pad_mgr->u1_pad_len_y_v);
 1189|   109k|        ps_dec->pf_pad_top(
 1190|   109k|                        ps_dec->ps_cur_pic->pu1_buf2
 1191|   109k|                                        - PAD_LEN_UV_H * YUV420SP_FACTOR,
  ------------------
  |  |  573|   109k|#define PAD_LEN_UV_H                  16
  ------------------
                                                      - PAD_LEN_UV_H * YUV420SP_FACTOR,
  ------------------
  |  |  119|   109k|#define YUV420SP_FACTOR 2
  ------------------
 1192|   109k|                        ps_dec->u2_frm_wd_uv, ps_dec->u2_frm_wd_uv,
 1193|   109k|                        ps_pad_mgr->u1_pad_len_cr_v);
 1194|       |
 1195|   109k|    }
 1196|       |
 1197|       |//Vertical Padd Bottom
 1198|   126k|    if(ps_pad_mgr->u1_vert_pad_bot)
  ------------------
  |  Branch (1198:8): [True: 109k, False: 16.6k]
  ------------------
 1199|   109k|    {
 1200|       |
 1201|   109k|        UWORD8 *pu1_buf;
 1202|   109k|        pu1_buf = ps_dec->ps_cur_pic->pu1_buf1 - PAD_LEN_Y_H;
  ------------------
  |  |  571|   109k|#define PAD_LEN_Y_H                   32
  ------------------
 1203|   109k|        pu1_buf += ps_dec->u2_pic_ht * ps_dec->u2_frm_wd_y;
 1204|   109k|        ps_dec->pf_pad_bottom(pu1_buf, ps_dec->u2_frm_wd_y, ps_dec->u2_frm_wd_y,
 1205|   109k|                              ps_pad_mgr->u1_pad_len_y_v);
 1206|   109k|        pu1_buf = ps_dec->ps_cur_pic->pu1_buf2 - PAD_LEN_UV_H * YUV420SP_FACTOR;
  ------------------
  |  |  573|   109k|#define PAD_LEN_UV_H                  16
  ------------------
                      pu1_buf = ps_dec->ps_cur_pic->pu1_buf2 - PAD_LEN_UV_H * YUV420SP_FACTOR;
  ------------------
  |  |  119|   109k|#define YUV420SP_FACTOR 2
  ------------------
 1207|   109k|        pu1_buf += (ps_dec->u2_pic_ht >> 1) * ps_dec->u2_frm_wd_uv;
 1208|       |
 1209|   109k|        ps_dec->pf_pad_bottom(pu1_buf, ps_dec->u2_frm_wd_uv,
 1210|   109k|                              ps_dec->u2_frm_wd_uv,
 1211|   109k|                              ps_pad_mgr->u1_pad_len_cr_v);
 1212|       |
 1213|   109k|    }
 1214|   126k|}
ih264d_set_deblocking_parameters:
 1237|  13.7M|{
 1238|       |    /*------------------------------------------------------------------*/
 1239|       |    /* Set the deblocking parameters                                  */
 1240|       |    /*------------------------------------------------------------------*/
 1241|  13.7M|    ps_cur_mb->i1_slice_alpha_c0_offset = ps_slice->i1_slice_alpha_c0_offset;
 1242|  13.7M|    ps_cur_mb->i1_slice_beta_offset = ps_slice->i1_slice_beta_offset;
 1243|  13.7M|    ps_cur_mb->u1_mb_type = (u1_mb_field_decoding_flag << 7);
 1244|       |
 1245|  13.7M|    switch(ps_slice->u1_disable_dblk_filter_idc)
  ------------------
  |  Branch (1245:12): [True: 13.6M, False: 1.09k]
  ------------------
 1246|  13.7M|    {
 1247|  11.7M|        case DBLK_ENABLED:
  ------------------
  |  |  549|  11.7M|#define DBLK_ENABLED                  0
  ------------------
  |  Branch (1247:9): [True: 11.7M, False: 1.90M]
  ------------------
 1248|  11.7M|            ps_cur_mb->u1_deblocking_mode = MB_ENABLE_FILTERING;
  ------------------
  |  |   69|  11.7M|#define MB_ENABLE_FILTERING           0x00
  ------------------
 1249|  11.7M|            break;
 1250|   518k|        case DBLK_DISABLED:
  ------------------
  |  |  548|   518k|#define DBLK_DISABLED                 1
  ------------------
  |  Branch (1250:9): [True: 518k, False: 13.1M]
  ------------------
 1251|   518k|            ps_cur_mb->u1_deblocking_mode = MB_DISABLE_FILTERING;
  ------------------
  |  |   70|   518k|#define MB_DISABLE_FILTERING          0x01
  ------------------
 1252|   518k|            break;
 1253|  1.38M|        case SLICE_BOUNDARY_DBLK_DISABLED:
  ------------------
  |  |  547|  1.38M|#define SLICE_BOUNDARY_DBLK_DISABLED  2
  ------------------
  |  Branch (1253:9): [True: 1.38M, False: 12.3M]
  ------------------
 1254|  1.38M|        {
 1255|  1.38M|            ps_cur_mb->u1_deblocking_mode = MB_ENABLE_FILTERING;
  ------------------
  |  |   69|  1.38M|#define MB_ENABLE_FILTERING           0x00
  ------------------
 1256|  1.38M|            if(!(u1_mb_ngbr_availablity & LEFT_MB_AVAILABLE_MASK))
  ------------------
  |  |   53|  1.38M|#define LEFT_MB_AVAILABLE_MASK      0x01
  ------------------
  |  Branch (1256:16): [True: 172k, False: 1.20M]
  ------------------
 1257|   172k|                ps_cur_mb->u1_deblocking_mode |= MB_DISABLE_LEFT_EDGE;
  ------------------
  |  |   72|   172k|#define MB_DISABLE_LEFT_EDGE          0x04
  ------------------
 1258|  1.38M|            if(!(u1_mb_ngbr_availablity & TOP_MB_AVAILABLE_MASK))
  ------------------
  |  |   55|  1.38M|#define TOP_MB_AVAILABLE_MASK       0x04
  ------------------
  |  Branch (1258:16): [True: 103k, False: 1.27M]
  ------------------
 1259|   103k|                ps_cur_mb->u1_deblocking_mode |= MB_DISABLE_TOP_EDGE;
  ------------------
  |  |   71|   103k|#define MB_DISABLE_TOP_EDGE           0x02
  ------------------
 1260|  1.38M|            break;
 1261|      0|        }
 1262|  13.7M|    }
 1263|       |
 1264|  13.7M|    return (0);
 1265|  13.7M|}
ih264d_copy_intra_pred_line:
 1270|  13.5M|{
 1271|  13.5M|    UWORD8 *pu1_mb_last_row, u1_mb_field_decoding_flag;
 1272|  13.5M|    UWORD32 u4_recWidth, u4_recwidth_cr;
 1273|       |
 1274|  13.5M|    u1_mb_field_decoding_flag = ps_cur_mb_info->u1_mb_field_decodingflag;
 1275|       |
 1276|  13.5M|    u4_recWidth = ps_dec->u2_frm_wd_y << u1_mb_field_decoding_flag;
 1277|  13.5M|    u4_recwidth_cr = ps_dec->u2_frm_wd_uv << u1_mb_field_decoding_flag;
 1278|       |
 1279|  13.5M|    pu1_mb_last_row = ps_dec->ps_frame_buf_ip_recon->pu1_dest_y
 1280|  13.5M|                    + (u4_recWidth * (MB_SIZE - 1));
  ------------------
  |  |  554|  13.5M|#define MB_SIZE             16
  ------------------
 1281|  13.5M|    pu1_mb_last_row += MB_SIZE * nmb_index;
  ------------------
  |  |  554|  13.5M|#define MB_SIZE             16
  ------------------
 1282|  13.5M|    MEMCPY_16BYTES(ps_dec->pu1_cur_y_intra_pred_line, pu1_mb_last_row);
  ------------------
  |  |  657|  13.5M|#define MEMCPY_16BYTES(dst,src)                                 \
  |  |  658|  13.5M|{                                                               \
  |  |  659|  13.5M|    memcpy(dst,src,16);                                         \
  |  |  660|  13.5M|}
  ------------------
 1283|       |
 1284|  13.5M|    pu1_mb_last_row = ps_dec->ps_frame_buf_ip_recon->pu1_dest_u
 1285|  13.5M|                    + (u4_recwidth_cr * (BLK8x8SIZE - 1));
  ------------------
  |  |  555|  13.5M|#define BLK8x8SIZE          8
  ------------------
 1286|  13.5M|    pu1_mb_last_row += BLK8x8SIZE * nmb_index * YUV420SP_FACTOR;
  ------------------
  |  |  555|  13.5M|#define BLK8x8SIZE          8
  ------------------
                  pu1_mb_last_row += BLK8x8SIZE * nmb_index * YUV420SP_FACTOR;
  ------------------
  |  |  119|  13.5M|#define YUV420SP_FACTOR 2
  ------------------
 1287|       |
 1288|  13.5M|    MEMCPY_16BYTES(ps_dec->pu1_cur_u_intra_pred_line, pu1_mb_last_row);
  ------------------
  |  |  657|  13.5M|#define MEMCPY_16BYTES(dst,src)                                 \
  |  |  658|  13.5M|{                                                               \
  |  |  659|  13.5M|    memcpy(dst,src,16);                                         \
  |  |  660|  13.5M|}
  ------------------
 1289|       |
 1290|  13.5M|    ps_dec->pu1_cur_y_intra_pred_line = ps_dec->pu1_cur_y_intra_pred_line_base
 1291|  13.5M|                    + (MB_SIZE * (ps_cur_mb_info->u2_mbx + 1));
  ------------------
  |  |  554|  13.5M|#define MB_SIZE             16
  ------------------
 1292|  13.5M|    ps_dec->pu1_cur_u_intra_pred_line = ps_dec->pu1_cur_u_intra_pred_line_base
 1293|  13.5M|                    + (BLK8x8SIZE * (ps_cur_mb_info->u2_mbx + 1))
  ------------------
  |  |  555|  13.5M|#define BLK8x8SIZE          8
  ------------------
 1294|  13.5M|                                    * YUV420SP_FACTOR;
  ------------------
  |  |  119|  13.5M|#define YUV420SP_FACTOR 2
  ------------------
 1295|  13.5M|    ps_dec->pu1_cur_v_intra_pred_line = ps_dec->pu1_cur_v_intra_pred_line_base
 1296|  13.5M|                    + (BLK8x8SIZE * (ps_cur_mb_info->u2_mbx + 1));
  ------------------
  |  |  555|  13.5M|#define BLK8x8SIZE          8
  ------------------
 1297|       |
 1298|  13.5M|    if(ps_cur_mb_info->u2_mbx == (ps_dec->u2_frm_wd_in_mbs - 1))
  ------------------
  |  Branch (1298:8): [True: 2.22M, False: 11.3M]
  ------------------
 1299|  2.22M|    {
 1300|  2.22M|        UWORD8* pu1_temp;
 1301|       |
 1302|  2.22M|        ps_dec->pu1_cur_y_intra_pred_line =
 1303|  2.22M|                        ps_dec->pu1_cur_y_intra_pred_line_base;
 1304|  2.22M|        ps_dec->pu1_cur_u_intra_pred_line =
 1305|  2.22M|                        ps_dec->pu1_cur_u_intra_pred_line_base;
 1306|  2.22M|        ps_dec->pu1_cur_v_intra_pred_line =
 1307|  2.22M|                        ps_dec->pu1_cur_v_intra_pred_line_base;
 1308|       |
 1309|       |        /*swap current and previous rows*/
 1310|  2.22M|        pu1_temp = ps_dec->pu1_cur_y_intra_pred_line;
 1311|  2.22M|        ps_dec->pu1_cur_y_intra_pred_line = ps_dec->pu1_prev_y_intra_pred_line;
 1312|  2.22M|        ps_dec->pu1_prev_y_intra_pred_line = pu1_temp;
 1313|       |
 1314|  2.22M|        pu1_temp = ps_dec->pu1_cur_u_intra_pred_line;
 1315|  2.22M|        ps_dec->pu1_cur_u_intra_pred_line = ps_dec->pu1_prev_u_intra_pred_line;
 1316|  2.22M|        ps_dec->pu1_prev_u_intra_pred_line = pu1_temp;
 1317|       |
 1318|  2.22M|        pu1_temp = ps_dec->pu1_cur_v_intra_pred_line;
 1319|  2.22M|        ps_dec->pu1_cur_v_intra_pred_line = ps_dec->pu1_prev_v_intra_pred_line;
 1320|  2.22M|        ps_dec->pu1_prev_v_intra_pred_line = pu1_temp;
 1321|       |
 1322|  2.22M|        ps_dec->pu1_cur_y_intra_pred_line_base =
 1323|  2.22M|                        ps_dec->pu1_cur_y_intra_pred_line;
 1324|  2.22M|        ps_dec->pu1_cur_u_intra_pred_line_base =
 1325|  2.22M|                        ps_dec->pu1_cur_u_intra_pred_line;
 1326|  2.22M|        ps_dec->pu1_cur_v_intra_pred_line_base =
 1327|  2.22M|                        ps_dec->pu1_cur_v_intra_pred_line;
 1328|       |
 1329|       |
 1330|       |
 1331|       |
 1332|       |
 1333|  2.22M|    }
 1334|       |
 1335|  13.5M|}

ih264d_init_ref_bufs:
   81|   107k|{
   82|   107k|    UWORD32 i;
   83|   107k|    struct dpb_info_t *ps_dpb_info = ps_dpb_mgr->as_dpb_info;
   84|  3.56M|    for(i = 0; i < MAX_REF_BUFS; i++)
  ------------------
  |  |   75|  3.56M|#define MAX_REF_BUFS    32
  ------------------
  |  Branch (84:16): [True: 3.45M, False: 107k]
  ------------------
   85|  3.45M|    {
   86|  3.45M|        ps_dpb_info[i].u1_used_as_ref = UNUSED_FOR_REF;
  ------------------
  |  |  595|  3.45M|#define UNUSED_FOR_REF 0
  ------------------
   87|  3.45M|        ps_dpb_info[i].u1_lt_idx = MAX_REF_BUFS + 1;
  ------------------
  |  |   75|  3.45M|#define MAX_REF_BUFS    32
  ------------------
   88|  3.45M|        ps_dpb_info[i].ps_prev_short = NULL;
   89|  3.45M|        ps_dpb_info[i].ps_prev_long = NULL;
   90|  3.45M|        ps_dpb_info[i].ps_pic_buf = NULL;
   91|  3.45M|        ps_dpb_info[i].s_top_field.u1_reference_info = UNUSED_FOR_REF;
  ------------------
  |  |  595|  3.45M|#define UNUSED_FOR_REF 0
  ------------------
   92|  3.45M|        ps_dpb_info[i].s_bot_field.u1_reference_info = UNUSED_FOR_REF;
  ------------------
  |  |  595|  3.45M|#define UNUSED_FOR_REF 0
  ------------------
   93|  3.45M|        ps_dpb_info[i].s_top_field.u1_long_term_frame_idx = MAX_REF_BUFS + 1;
  ------------------
  |  |   75|  3.45M|#define MAX_REF_BUFS    32
  ------------------
   94|  3.45M|        ps_dpb_info[i].s_bot_field.u1_long_term_frame_idx = MAX_REF_BUFS + 1;
  ------------------
  |  |   75|  3.45M|#define MAX_REF_BUFS    32
  ------------------
   95|       |
   96|  3.45M|    }
   97|   107k|    ps_dpb_mgr->u1_num_st_ref_bufs = ps_dpb_mgr->u1_num_lt_ref_bufs = 0;
   98|   107k|    ps_dpb_mgr->ps_dpb_st_head = NULL;
   99|   107k|    ps_dpb_mgr->ps_dpb_ht_head = NULL;
  100|   107k|    ps_dpb_mgr->i1_gaps_deleted = 0;
  101|   107k|    ps_dpb_mgr->i1_poc_buf_id_entries = 0;
  102|   107k|    ps_dpb_mgr->u1_mmco_error_in_seq = 0;
  103|       |
  104|   107k|    ps_dpb_mgr->u1_num_gaps = 0;
  105|  1.83M|    for(i = 0; i < MAX_FRAMES; i++)
  ------------------
  |  |  600|  1.83M|#define MAX_FRAMES              16
  ------------------
  |  Branch (105:16): [True: 1.72M, False: 107k]
  ------------------
  106|  1.72M|    {
  107|  1.72M|        ps_dpb_mgr->ai4_gaps_start_frm_num[i] = INVALID_FRAME_NUM;
  ------------------
  |  |  601|  1.72M|#define INVALID_FRAME_NUM       0x0fffffff
  ------------------
  108|  1.72M|        ps_dpb_mgr->ai4_gaps_end_frm_num[i] = 0;
  109|  1.72M|        ps_dpb_mgr->ai1_gaps_per_seq[i] = 0;
  110|  1.72M|        ps_dpb_mgr->ai4_poc_buf_id_map[i][0] = -1;
  111|  1.72M|        ps_dpb_mgr->ai4_poc_buf_id_map[i][1] = 0x7fffffff;
  112|  1.72M|        ps_dpb_mgr->ai4_poc_buf_id_map[i][2] = 0;
  113|  1.72M|    }
  114|       |
  115|   107k|}
ih264d_free_ref_pic_mv_bufs:
  118|  85.0k|{
  119|  85.0k|    dec_struct_t *ps_dec = (dec_struct_t *)pv_dec;
  120|       |
  121|  85.0k|    if((pic_buf_id == ps_dec->u1_pic_buf_id) &&
  ------------------
  |  Branch (121:8): [True: 38.0k, False: 46.9k]
  ------------------
  122|  38.0k|                    ps_dec->ps_cur_slice->u1_field_pic_flag &&
  ------------------
  |  Branch (122:21): [True: 0, False: 38.0k]
  ------------------
  123|      0|                    (ps_dec->u1_top_bottom_decoded == 0))
  ------------------
  |  Branch (123:21): [True: 0, False: 0]
  ------------------
  124|      0|    {
  125|      0|        return;
  126|      0|    }
  127|       |
  128|  85.0k|    ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_pic_buf_mgr,
  129|  85.0k|                          pic_buf_id,
  130|  85.0k|                          BUF_MGR_REF);
  ------------------
  |  |   50|  85.0k|#define BUF_MGR_REF          (1 << 2)
  ------------------
  131|  85.0k|    ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_mv_buf_mgr,
  132|  85.0k|                          ps_dec->as_buf_id_info_map[pic_buf_id].mv_buf_id,
  133|  85.0k|                          BUF_MGR_REF);
  ------------------
  |  |   50|  85.0k|#define BUF_MGR_REF          (1 << 2)
  ------------------
  134|  85.0k|}
ih264d_delete_lt_node:
  151|  19.7k|{
  152|  19.7k|    *pi4_status = 0;
  153|  19.7k|    if(ps_dpb_mgr->u1_num_lt_ref_bufs > 0)
  ------------------
  |  Branch (153:8): [True: 3.01k, False: 16.7k]
  ------------------
  154|  3.01k|    {
  155|  3.01k|        WORD32 i;
  156|  3.01k|        struct dpb_info_t *ps_next_dpb;
  157|       |        /* ps_unmark_node points to the node to be removed */
  158|       |        /* from long term list.                            */
  159|  3.01k|        struct dpb_info_t *ps_unmark_node;
  160|       |        //Find the node with matching LTIndex
  161|  3.01k|        ps_next_dpb = ps_dpb_mgr->ps_dpb_ht_head;
  162|  3.01k|        if(ps_next_dpb->u1_lt_idx == u4_lt_idx)
  ------------------
  |  Branch (162:12): [True: 1.30k, False: 1.71k]
  ------------------
  163|  1.30k|        {
  164|  1.30k|            ps_unmark_node = ps_next_dpb;
  165|  1.30k|        }
  166|  1.71k|        else
  167|  1.71k|        {
  168|  2.17k|            for(i = 1; i < ps_dpb_mgr->u1_num_lt_ref_bufs; i++)
  ------------------
  |  Branch (168:24): [True: 823, False: 1.35k]
  ------------------
  169|    823|            {
  170|    823|                if(ps_next_dpb->ps_prev_long->u1_lt_idx == u4_lt_idx)
  ------------------
  |  Branch (170:20): [True: 358, False: 465]
  ------------------
  171|    358|                    break;
  172|    465|                ps_next_dpb = ps_next_dpb->ps_prev_long;
  173|    465|            }
  174|  1.71k|            if(i == ps_dpb_mgr->u1_num_lt_ref_bufs)
  ------------------
  |  Branch (174:16): [True: 1.35k, False: 358]
  ------------------
  175|  1.35k|                *pi4_status = 1;
  176|    358|            else
  177|    358|                ps_unmark_node = ps_next_dpb->ps_prev_long;
  178|  1.71k|        }
  179|       |
  180|  3.01k|        if(*pi4_status == 0)
  ------------------
  |  Branch (180:12): [True: 1.66k, False: 1.35k]
  ------------------
  181|  1.66k|        {
  182|  1.66k|            if(u1_fld_pic_flag)
  ------------------
  |  Branch (182:16): [True: 0, False: 1.66k]
  ------------------
  183|      0|            {
  184|      0|                if(ps_lt_node_to_insert != ps_unmark_node)
  ------------------
  |  Branch (184:20): [True: 0, False: 0]
  ------------------
  185|      0|                {
  186|      0|                    UWORD8 u1_deleted = 0;
  187|       |                    /* for the ps_unmark_node mark the corresponding field */
  188|       |                    /* field as unused for reference                       */
  189|       |
  190|      0|                    if(ps_unmark_node->s_top_field.u1_long_term_frame_idx
  ------------------
  |  Branch (190:24): [True: 0, False: 0]
  ------------------
  191|      0|                                    == u4_lt_idx)
  192|      0|                    {
  193|      0|                        ps_unmark_node->s_top_field.u1_reference_info =
  194|      0|                                        UNUSED_FOR_REF;
  ------------------
  |  |  595|      0|#define UNUSED_FOR_REF 0
  ------------------
  195|      0|                        ps_unmark_node->s_top_field.u1_long_term_frame_idx =
  196|      0|                        MAX_REF_BUFS + 1;
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
  197|      0|                        u1_deleted = 1;
  198|      0|                    }
  199|      0|                    if(ps_unmark_node->s_bot_field.u1_long_term_frame_idx
  ------------------
  |  Branch (199:24): [True: 0, False: 0]
  ------------------
  200|      0|                                    == u4_lt_idx)
  201|      0|                    {
  202|      0|                        ps_unmark_node->s_bot_field.u1_reference_info =
  203|      0|                                        UNUSED_FOR_REF;
  ------------------
  |  |  595|      0|#define UNUSED_FOR_REF 0
  ------------------
  204|      0|                        ps_unmark_node->s_bot_field.u1_long_term_frame_idx =
  205|      0|                        MAX_REF_BUFS + 1;
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
  206|      0|                        u1_deleted = 1;
  207|      0|                    }
  208|       |
  209|      0|                    if(!u1_deleted)
  ------------------
  |  Branch (209:24): [True: 0, False: 0]
  ------------------
  210|      0|                    {
  211|       |
  212|      0|                        UWORD32 i4_error_code;
  213|      0|                        i4_error_code = ERROR_DBP_MANAGER_T;
  214|       |
  215|      0|                        return i4_error_code;
  216|      0|                    }
  217|      0|                }
  218|       |
  219|      0|                ps_unmark_node->u1_used_as_ref =
  220|      0|                                ps_unmark_node->s_top_field.u1_reference_info
  221|      0|                                                | ps_unmark_node->s_bot_field.u1_reference_info;
  222|      0|            }
  223|  1.66k|            else
  224|  1.66k|                ps_unmark_node->u1_used_as_ref = UNUSED_FOR_REF;
  ------------------
  |  |  595|  1.66k|#define UNUSED_FOR_REF 0
  ------------------
  225|       |
  226|  1.66k|            if(UNUSED_FOR_REF == ps_unmark_node->u1_used_as_ref)
  ------------------
  |  |  595|  1.66k|#define UNUSED_FOR_REF 0
  ------------------
  |  Branch (226:16): [True: 1.66k, False: 0]
  ------------------
  227|  1.66k|            {
  228|  1.66k|                if(ps_unmark_node == ps_dpb_mgr->ps_dpb_ht_head)
  ------------------
  |  Branch (228:20): [True: 1.30k, False: 358]
  ------------------
  229|  1.30k|                    ps_dpb_mgr->ps_dpb_ht_head = ps_next_dpb->ps_prev_long;
  230|       |
  231|  1.66k|                ps_unmark_node->u1_lt_idx = MAX_REF_BUFS + 1;
  ------------------
  |  |   75|  1.66k|#define MAX_REF_BUFS    32
  ------------------
  232|  1.66k|                ps_unmark_node->s_top_field.u1_reference_info =
  233|  1.66k|                UNUSED_FOR_REF;
  ------------------
  |  |  595|  1.66k|#define UNUSED_FOR_REF 0
  ------------------
  234|  1.66k|                ps_unmark_node->s_bot_field.u1_reference_info =
  235|  1.66k|                UNUSED_FOR_REF;
  ------------------
  |  |  595|  1.66k|#define UNUSED_FOR_REF 0
  ------------------
  236|       |                // Release the physical buffer
  237|  1.66k|                ih264d_free_ref_pic_mv_bufs(ps_dpb_mgr->pv_codec_handle,
  238|  1.66k|                                            ps_unmark_node->u1_buf_id);
  239|  1.66k|                ps_next_dpb->ps_prev_long = ps_unmark_node->ps_prev_long; //update link
  240|  1.66k|                ps_unmark_node->ps_prev_long = NULL;
  241|  1.66k|                ps_dpb_mgr->u1_num_lt_ref_bufs--; //decrement LT buf count
  242|  1.66k|            }
  243|  1.66k|        }
  244|  3.01k|    }
  245|  19.7k|    return OK;
  ------------------
  |  |  114|  19.7k|#define OK        0
  ------------------
  246|  19.7k|}
ih264d_insert_lt_node:
  263|  17.7k|{
  264|  17.7k|    UWORD8 u1_mark_top_field_long_term = 0;
  265|  17.7k|    UWORD8 u1_mark_bot_field_long_term = 0;
  266|       |
  267|  17.7k|    {
  268|  17.7k|        if(u1_fld_pic_flag)
  ------------------
  |  Branch (268:12): [True: 0, False: 17.7k]
  ------------------
  269|      0|        {
  270|       |            /* Assign corresponding field (top or bottom) long_term_frame_idx */
  271|       |
  272|      0|            if((ps_mov_node->s_top_field.u1_reference_info == IS_LONG_TERM)
  ------------------
  |  |  597|      0|#define IS_LONG_TERM   2
  ------------------
  |  Branch (272:16): [True: 0, False: 0]
  ------------------
  273|      0|                            && (ps_mov_node->s_bot_field.u1_reference_info
  ------------------
  |  Branch (273:32): [True: 0, False: 0]
  ------------------
  274|      0|                                            == IS_LONG_TERM))
  ------------------
  |  |  597|      0|#define IS_LONG_TERM   2
  ------------------
  275|      0|            {
  276|      0|                if(ps_mov_node->u1_lt_idx == u4_lt_idx)
  ------------------
  |  Branch (276:20): [True: 0, False: 0]
  ------------------
  277|      0|                    u1_mark_bot_field_long_term = 1;
  278|      0|                else
  279|      0|                {
  280|       |
  281|      0|                    UWORD32 i4_error_code;
  282|      0|                    i4_error_code = ERROR_DBP_MANAGER_T;
  283|       |
  284|      0|                    return i4_error_code;
  285|       |
  286|      0|                }
  287|      0|            }
  288|      0|            else if(ps_mov_node->s_top_field.u1_reference_info == IS_LONG_TERM)
  ------------------
  |  |  597|      0|#define IS_LONG_TERM   2
  ------------------
  |  Branch (288:21): [True: 0, False: 0]
  ------------------
  289|      0|            {
  290|      0|                u1_mark_top_field_long_term = 1;
  291|      0|            }
  292|       |
  293|      0|            if(!(u1_mark_top_field_long_term || u1_mark_bot_field_long_term))
  ------------------
  |  Branch (293:18): [True: 0, False: 0]
  |  Branch (293:49): [True: 0, False: 0]
  ------------------
  294|      0|            {
  295|      0|                UWORD32 i4_error_code;
  296|      0|                i4_error_code = ERROR_DBP_MANAGER_T;
  297|      0|                return i4_error_code;
  298|      0|            }
  299|      0|        }
  300|  17.7k|        else
  301|  17.7k|        {
  302|  17.7k|            ps_mov_node->s_top_field.u1_reference_info = IS_LONG_TERM;
  ------------------
  |  |  597|  17.7k|#define IS_LONG_TERM   2
  ------------------
  303|  17.7k|            ps_mov_node->s_bot_field.u1_reference_info = IS_LONG_TERM;
  ------------------
  |  |  597|  17.7k|#define IS_LONG_TERM   2
  ------------------
  304|  17.7k|            ps_mov_node->s_top_field.u1_long_term_frame_idx = u4_lt_idx;
  305|  17.7k|            ps_mov_node->s_bot_field.u1_long_term_frame_idx = u4_lt_idx;
  306|  17.7k|            u1_mark_bot_field_long_term = 1;
  307|  17.7k|            u1_mark_top_field_long_term = 1;
  308|  17.7k|        }
  309|       |
  310|  17.7k|        ps_mov_node->u1_lt_idx = u4_lt_idx; //Assign the LT index to the node
  311|  17.7k|        ps_mov_node->ps_pic_buf->u1_long_term_frm_idx = u4_lt_idx;
  312|  17.7k|        ps_mov_node->u1_used_as_ref = IS_LONG_TERM;
  ------------------
  |  |  597|  17.7k|#define IS_LONG_TERM   2
  ------------------
  313|       |
  314|       |        /* Insert the new long term in the LT list with  u4_lt_idx    */
  315|       |        /* in ascending order.                                         */
  316|  17.7k|        if(ps_dpb_mgr->u1_num_lt_ref_bufs > 0)
  ------------------
  |  Branch (316:12): [True: 1.81k, False: 15.9k]
  ------------------
  317|  1.81k|        {
  318|  1.81k|            struct dpb_info_t *ps_next_dpb = ps_dpb_mgr->ps_dpb_ht_head;
  319|  1.81k|            if(u4_lt_idx < ps_next_dpb->u1_lt_idx)
  ------------------
  |  Branch (319:16): [True: 551, False: 1.26k]
  ------------------
  320|    551|            {
  321|       |                //LTIndex to be inserted is the smallest LT index
  322|       |                //Update head and point prev to the next higher index
  323|    551|                ps_mov_node->ps_prev_long = ps_next_dpb;
  324|    551|                ps_dpb_mgr->ps_dpb_ht_head = ps_mov_node;
  325|    551|            }
  326|  1.26k|            else
  327|  1.26k|            {
  328|  1.26k|                WORD32 i;
  329|  1.26k|                struct dpb_info_t *ps_nxtDPB = ps_next_dpb;
  330|  1.26k|                ps_next_dpb = ps_next_dpb->ps_prev_long;
  331|  1.57k|                for(i = 1; i < ps_dpb_mgr->u1_num_lt_ref_bufs; i++)
  ------------------
  |  Branch (331:28): [True: 577, False: 996]
  ------------------
  332|    577|                {
  333|    577|                    if(ps_next_dpb->u1_lt_idx > u4_lt_idx)
  ------------------
  |  Branch (333:24): [True: 266, False: 311]
  ------------------
  334|    266|                        break;
  335|    311|                    ps_nxtDPB = ps_next_dpb;
  336|    311|                    ps_next_dpb = ps_next_dpb->ps_prev_long;
  337|    311|                }
  338|       |
  339|  1.26k|                ps_nxtDPB->ps_prev_long = ps_mov_node;
  340|  1.26k|                ps_mov_node->ps_prev_long = ps_next_dpb;
  341|  1.26k|            }
  342|  1.81k|        }
  343|  15.9k|        else
  344|  15.9k|        {
  345|  15.9k|            ps_dpb_mgr->ps_dpb_ht_head = ps_mov_node;
  346|  15.9k|            ps_mov_node->ps_prev_long = NULL;
  347|  15.9k|        }
  348|       |        /* Identify the picture buffer as a long term picture buffer */
  349|  17.7k|        ps_mov_node->ps_pic_buf->u1_is_short = 0;
  350|       |
  351|       |        /* Increment LT buf count only if new LT node inserted    */
  352|       |        /* If Increment during top_field is done, don't increment */
  353|       |        /* for bottom field, as both them are part of same pic.   */
  354|  17.7k|        if(u1_mark_bot_field_long_term)
  ------------------
  |  Branch (354:12): [True: 17.7k, False: 0]
  ------------------
  355|  17.7k|            ps_dpb_mgr->u1_num_lt_ref_bufs++;
  356|       |
  357|  17.7k|    }
  358|  17.7k|    return OK;
  ------------------
  |  |  114|  17.7k|#define OK        0
  ------------------
  359|  17.7k|}
ih264d_insert_st_node:
  379|   109k|{
  380|   109k|    WORD32 i;
  381|   109k|    struct dpb_info_t *ps_dpb_info = ps_dpb_mgr->as_dpb_info;
  382|   109k|    UWORD8 u1_picture_type = ps_pic_buf->u1_picturetype;
  383|       |    /* Find an unused dpb location */
  384|   122k|    for(i = 0; i < MAX_REF_BUFS; i++)
  ------------------
  |  |   75|   122k|#define MAX_REF_BUFS    32
  ------------------
  |  Branch (384:16): [True: 122k, False: 0]
  ------------------
  385|   122k|    {
  386|   122k|        if((ps_dpb_info[i].ps_pic_buf == ps_pic_buf)
  ------------------
  |  Branch (386:12): [True: 689, False: 122k]
  ------------------
  387|    689|                        && ps_dpb_info[i].u1_used_as_ref)
  ------------------
  |  Branch (387:28): [True: 165, False: 524]
  ------------------
  388|    165|        {
  389|       |            /*signal an error in the case of frame pic*/
  390|    165|            if(ps_dpb_info[i].ps_pic_buf->u1_pic_type == FRM_PIC)
  ------------------
  |  |  352|    165|#define FRM_PIC         0x00
  ------------------
  |  Branch (390:16): [True: 165, False: 0]
  ------------------
  391|    165|            {
  392|    165|                return ERROR_DBP_MANAGER_T;
  393|    165|            }
  394|      0|            else
  395|      0|            {
  396|       |                /* Can occur only for field bottom pictures */
  397|      0|                ps_dpb_info[i].s_bot_field.u1_reference_info = IS_SHORT_TERM;
  ------------------
  |  |  596|      0|#define IS_SHORT_TERM  1
  ------------------
  398|      0|                return OK;
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
  399|      0|            }
  400|    165|        }
  401|       |
  402|   122k|        if((ps_dpb_info[i].u1_used_as_ref == UNUSED_FOR_REF)
  ------------------
  |  |  595|   122k|#define UNUSED_FOR_REF 0
  ------------------
  |  Branch (402:12): [True: 109k, False: 13.3k]
  ------------------
  403|   109k|                        && (ps_dpb_info[i].s_top_field.u1_reference_info
  ------------------
  |  Branch (403:28): [True: 109k, False: 0]
  ------------------
  404|   109k|                                        == UNUSED_FOR_REF)
  ------------------
  |  |  595|   109k|#define UNUSED_FOR_REF 0
  ------------------
  405|   109k|                        && (ps_dpb_info[i].s_bot_field.u1_reference_info
  ------------------
  |  Branch (405:28): [True: 109k, False: 0]
  ------------------
  406|   109k|                                        == UNUSED_FOR_REF))
  ------------------
  |  |  595|   109k|#define UNUSED_FOR_REF 0
  ------------------
  407|   109k|            break;
  408|   122k|    }
  409|   109k|    if(i == MAX_REF_BUFS)
  ------------------
  |  |   75|   109k|#define MAX_REF_BUFS    32
  ------------------
  |  Branch (409:8): [True: 0, False: 109k]
  ------------------
  410|      0|    {
  411|      0|        UWORD32 i4_error_code;
  412|      0|        i4_error_code = ERROR_DBP_MANAGER_T;
  413|      0|        return i4_error_code;
  414|      0|    }
  415|       |
  416|       |    /* Create dpb info */
  417|   109k|    ps_dpb_info[i].ps_pic_buf = ps_pic_buf;
  418|   109k|    ps_dpb_info[i].ps_prev_short = ps_dpb_mgr->ps_dpb_st_head;
  419|   109k|    ps_dpb_info[i].u1_buf_id = u1_buf_id;
  420|   109k|    ps_dpb_info[i].u1_used_as_ref = TRUE;
  ------------------
  |  |  591|   109k|#define TRUE    1
  ------------------
  421|   109k|    ps_dpb_info[i].u1_lt_idx = MAX_REF_BUFS + 1;
  ------------------
  |  |   75|   109k|#define MAX_REF_BUFS    32
  ------------------
  422|   109k|    ps_dpb_info[i].i4_frame_num = u4_cur_pic_num;
  423|   109k|    ps_dpb_info[i].ps_pic_buf->i4_frame_num = u4_cur_pic_num;
  424|       |
  425|       |    /* update the head node of linked list to point to the cur Pic */
  426|   109k|    ps_dpb_mgr->ps_dpb_st_head = ps_dpb_info + i;
  427|       |
  428|       |    // Increment Short term bufCount
  429|   109k|    ps_dpb_mgr->u1_num_st_ref_bufs++;
  430|       |    /* Identify the picture as a short term picture buffer */
  431|   109k|    ps_pic_buf->u1_is_short = IS_SHORT_TERM;
  ------------------
  |  |  596|   109k|#define IS_SHORT_TERM  1
  ------------------
  432|       |
  433|   109k|    if((u1_picture_type & 0x03) == FRM_PIC)
  ------------------
  |  |  352|   109k|#define FRM_PIC         0x00
  ------------------
  |  Branch (433:8): [True: 109k, False: 0]
  ------------------
  434|   109k|    {
  435|   109k|        ps_dpb_info[i].u1_used_as_ref = IS_SHORT_TERM;
  ------------------
  |  |  596|   109k|#define IS_SHORT_TERM  1
  ------------------
  436|   109k|        ps_dpb_info[i].s_top_field.u1_reference_info = IS_SHORT_TERM;
  ------------------
  |  |  596|   109k|#define IS_SHORT_TERM  1
  ------------------
  437|   109k|        ps_dpb_info[i].s_bot_field.u1_reference_info = IS_SHORT_TERM;
  ------------------
  |  |  596|   109k|#define IS_SHORT_TERM  1
  ------------------
  438|   109k|    }
  439|       |
  440|   109k|    if((u1_picture_type & 0x03) == TOP_FLD)
  ------------------
  |  |  353|   109k|#define TOP_FLD         0x01
  ------------------
  |  Branch (440:8): [True: 0, False: 109k]
  ------------------
  441|      0|        ps_dpb_info[i].s_top_field.u1_reference_info = IS_SHORT_TERM;
  ------------------
  |  |  596|      0|#define IS_SHORT_TERM  1
  ------------------
  442|       |
  443|   109k|    if((u1_picture_type & 0x03) == BOT_FLD)
  ------------------
  |  |  354|   109k|#define BOT_FLD         0x02
  ------------------
  |  Branch (443:8): [True: 0, False: 109k]
  ------------------
  444|      0|        ps_dpb_info[i].s_bot_field.u1_reference_info = IS_SHORT_TERM;
  ------------------
  |  |  596|      0|#define IS_SHORT_TERM  1
  ------------------
  445|       |
  446|   109k|    return OK;
  ------------------
  |  |  114|   109k|#define OK        0
  ------------------
  447|   109k|}
ih264d_delete_st_node_or_make_lt:
  468|  18.4k|{
  469|  18.4k|    WORD32 i;
  470|  18.4k|    struct dpb_info_t *ps_next_dpb;
  471|  18.4k|    WORD32 i4_frame_num = i4_pic_num;
  472|  18.4k|    struct dpb_info_t *ps_unmark_node = NULL;
  473|  18.4k|    UWORD8 u1_del_node = 0, u1_del_st = 0;
  474|  18.4k|    UWORD8 u1_reference_type = UNUSED_FOR_REF;
  ------------------
  |  |  595|  18.4k|#define UNUSED_FOR_REF 0
  ------------------
  475|  18.4k|    WORD32 ret;
  476|       |
  477|  18.4k|    if(u1_fld_pic_flag)
  ------------------
  |  Branch (477:8): [True: 0, False: 18.4k]
  ------------------
  478|      0|    {
  479|      0|        i4_frame_num = i4_frame_num >> 1;
  480|       |
  481|      0|        if(u4_lt_idx == (MAX_REF_BUFS + 1))
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
  |  Branch (481:12): [True: 0, False: 0]
  ------------------
  482|      0|            u1_reference_type = UNUSED_FOR_REF;
  ------------------
  |  |  595|      0|#define UNUSED_FOR_REF 0
  ------------------
  483|      0|        else
  484|      0|            u1_reference_type = IS_LONG_TERM;
  ------------------
  |  |  597|      0|#define IS_LONG_TERM   2
  ------------------
  485|      0|    }
  486|       |
  487|       |    //Find the node with matching picNum
  488|  18.4k|    ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head;
  489|  18.4k|    if((WORD32)ps_next_dpb->i4_frame_num == i4_frame_num)
  ------------------
  |  Branch (489:8): [True: 17.9k, False: 550]
  ------------------
  490|  17.9k|    {
  491|  17.9k|        ps_unmark_node = ps_next_dpb;
  492|  17.9k|    }
  493|    550|    else
  494|    550|    {
  495|    839|        for(i = 1; i < ps_dpb_mgr->u1_num_st_ref_bufs; i++)
  ------------------
  |  Branch (495:20): [True: 412, False: 427]
  ------------------
  496|    412|        {
  497|    412|            if((WORD32)ps_next_dpb->ps_prev_short->i4_frame_num == i4_frame_num)
  ------------------
  |  Branch (497:16): [True: 123, False: 289]
  ------------------
  498|    123|                break;
  499|    289|            ps_next_dpb = ps_next_dpb->ps_prev_short;
  500|    289|        }
  501|       |
  502|    550|        if(i == ps_dpb_mgr->u1_num_st_ref_bufs)
  ------------------
  |  Branch (502:12): [True: 427, False: 123]
  ------------------
  503|    427|        {
  504|    427|            if(ps_dpb_mgr->u1_num_gaps)
  ------------------
  |  Branch (504:16): [True: 0, False: 427]
  ------------------
  505|      0|            {
  506|      0|                ret = ih264d_delete_gap_frm_mmco(ps_dpb_mgr, i4_frame_num, &u1_del_st);
  507|      0|                if(ret != OK)
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
  |  Branch (507:20): [True: 0, False: 0]
  ------------------
  508|      0|                    return ret;
  509|      0|            }
  510|    427|            else
  511|    427|            {
  512|    427|                UWORD32 i4_error_code;
  513|    427|                i4_error_code = ERROR_DBP_MANAGER_T;
  514|       |
  515|    427|                return i4_error_code;
  516|    427|            }
  517|       |
  518|      0|            if(u1_del_st)
  ------------------
  |  Branch (518:16): [True: 0, False: 0]
  ------------------
  519|      0|            {
  520|      0|                UWORD32 i4_error_code;
  521|      0|                i4_error_code = ERROR_DBP_MANAGER_T;
  522|      0|                return i4_error_code;
  523|      0|            }
  524|      0|            else
  525|      0|            {
  526|      0|                return 0;
  527|      0|            }
  528|      0|        }
  529|    123|        else
  530|    123|            ps_unmark_node = ps_next_dpb->ps_prev_short;
  531|    550|    }
  532|       |
  533|  18.0k|    if(u1_fld_pic_flag)
  ------------------
  |  Branch (533:8): [True: 0, False: 18.0k]
  ------------------
  534|      0|    {
  535|       |        /* Mark the corresponding field ( top or bot) as  */
  536|       |        /* UNUSED_FOR_REF or IS_LONG_TERM depending on    */
  537|       |        /* u1_reference_type.                             */
  538|      0|        if(ps_unmark_node->s_top_field.i4_pic_num == i4_pic_num)
  ------------------
  |  Branch (538:12): [True: 0, False: 0]
  ------------------
  539|      0|        {
  540|      0|            ps_unmark_node->s_top_field.u1_reference_info = u1_reference_type;
  541|      0|            ps_unmark_node->s_top_field.u1_long_term_frame_idx = u4_lt_idx;
  542|      0|            {
  543|      0|                UWORD8 *pu1_src = ps_unmark_node->ps_pic_buf->pu1_col_zero_flag;
  544|      0|                WORD32 i4_size = ((ps_dpb_mgr->u2_pic_wd
  545|      0|                                * ps_dpb_mgr->u2_pic_ht) >> 5);
  546|       |                /* memset the colocated zero u4_flag buffer */
  547|      0|                memset(pu1_src, 0, i4_size);
  548|      0|            }
  549|      0|        }
  550|       |
  551|      0|        else if(ps_unmark_node->s_bot_field.i4_pic_num == i4_pic_num)
  ------------------
  |  Branch (551:17): [True: 0, False: 0]
  ------------------
  552|      0|        {
  553|       |
  554|      0|            ps_unmark_node->s_bot_field.u1_reference_info = u1_reference_type;
  555|      0|            ps_unmark_node->s_bot_field.u1_long_term_frame_idx = u4_lt_idx;
  556|      0|            {
  557|      0|                UWORD8 *pu1_src =
  558|      0|                                ps_unmark_node->ps_pic_buf->pu1_col_zero_flag
  559|      0|                                                + ((ps_dpb_mgr->u2_pic_wd
  560|      0|                                                                * ps_dpb_mgr->u2_pic_ht)
  561|      0|                                                                >> 5);
  562|      0|                WORD32 i4_size = ((ps_dpb_mgr->u2_pic_wd
  563|      0|                                * ps_dpb_mgr->u2_pic_ht) >> 5);
  564|       |                /* memset the colocated zero u4_flag buffer */
  565|      0|                memset(pu1_src, 0, i4_size);
  566|      0|            }
  567|      0|        }
  568|      0|        ps_unmark_node->u1_used_as_ref =
  569|      0|                        ps_unmark_node->s_top_field.u1_reference_info
  570|      0|                                        | ps_unmark_node->s_bot_field.u1_reference_info;
  571|      0|    }
  572|  18.0k|    else
  573|  18.0k|    {
  574|  18.0k|        ps_unmark_node->u1_used_as_ref = UNUSED_FOR_REF;
  ------------------
  |  |  595|  18.0k|#define UNUSED_FOR_REF 0
  ------------------
  575|  18.0k|        ps_unmark_node->s_top_field.u1_reference_info = UNUSED_FOR_REF;
  ------------------
  |  |  595|  18.0k|#define UNUSED_FOR_REF 0
  ------------------
  576|  18.0k|        ps_unmark_node->s_bot_field.u1_reference_info = UNUSED_FOR_REF;
  ------------------
  |  |  595|  18.0k|#define UNUSED_FOR_REF 0
  ------------------
  577|       |
  578|  18.0k|        {
  579|  18.0k|            UWORD8 *pu1_src = ps_unmark_node->ps_pic_buf->pu1_col_zero_flag;
  580|       |
  581|  18.0k|            WORD32 i4_size = ((ps_dpb_mgr->u2_pic_wd
  582|  18.0k|                            * ps_dpb_mgr->u2_pic_ht) >> 4);
  583|       |            /* memset the colocated zero u4_flag buffer */
  584|  18.0k|            memset(pu1_src, 0, i4_size);
  585|  18.0k|        }
  586|  18.0k|    }
  587|       |
  588|  18.0k|    if(!(ps_unmark_node->u1_used_as_ref & IS_SHORT_TERM))
  ------------------
  |  |  596|  18.0k|#define IS_SHORT_TERM  1
  ------------------
  |  Branch (588:8): [True: 18.0k, False: 0]
  ------------------
  589|  18.0k|    {
  590|  18.0k|        if(ps_unmark_node == ps_dpb_mgr->ps_dpb_st_head)
  ------------------
  |  Branch (590:12): [True: 17.9k, False: 123]
  ------------------
  591|  17.9k|            ps_dpb_mgr->ps_dpb_st_head = ps_next_dpb->ps_prev_short;
  592|    123|        else
  593|    123|            ps_next_dpb->ps_prev_short = ps_unmark_node->ps_prev_short; //update link
  594|  18.0k|        ps_dpb_mgr->u1_num_st_ref_bufs--; //decrement ST buf count
  595|  18.0k|        u1_del_node = 1;
  596|  18.0k|    }
  597|       |
  598|  18.0k|    if(u4_lt_idx == MAX_REF_BUFS + 1)
  ------------------
  |  |   75|  18.0k|#define MAX_REF_BUFS    32
  ------------------
  |  Branch (598:8): [True: 273, False: 17.7k]
  ------------------
  599|    273|    {
  600|    273|        if(u1_del_node)
  ------------------
  |  Branch (600:12): [True: 273, False: 0]
  ------------------
  601|    273|        {
  602|       |            // Release the physical buffer
  603|    273|            ih264d_free_ref_pic_mv_bufs(ps_dpb_mgr->pv_codec_handle,
  604|    273|                                        ps_unmark_node->u1_buf_id);
  605|    273|            ps_unmark_node->ps_prev_short = NULL;
  606|    273|        }
  607|    273|    }
  608|  17.7k|    else
  609|  17.7k|    {
  610|  17.7k|        WORD32 i4_status;
  611|       |        //If another node has the same LT index, delete that node
  612|  17.7k|        ret = ih264d_delete_lt_node(ps_dpb_mgr, u4_lt_idx,
  613|  17.7k|                              u1_fld_pic_flag, ps_unmark_node, &i4_status);
  614|  17.7k|        if(ret != OK)
  ------------------
  |  |  114|  17.7k|#define OK        0
  ------------------
  |  Branch (614:12): [True: 0, False: 17.7k]
  ------------------
  615|      0|            return ret;
  616|       |        // Now insert the short term node as a long term node
  617|  17.7k|        ret = ih264d_insert_lt_node(ps_dpb_mgr, ps_unmark_node, u4_lt_idx,
  618|  17.7k|                              u1_fld_pic_flag);
  619|  17.7k|        if(ret != OK)
  ------------------
  |  |  114|  17.7k|#define OK        0
  ------------------
  |  Branch (619:12): [True: 0, False: 17.7k]
  ------------------
  620|      0|            return ret;
  621|  17.7k|    }
  622|  18.0k|    return OK;
  ------------------
  |  |  114|  18.0k|#define OK        0
  ------------------
  623|  18.0k|}
ih264d_reset_ref_bufs:
  636|   194k|{
  637|   194k|    WORD32 i;
  638|   194k|    struct dpb_info_t *ps_dpb_info = ps_dpb_mgr->as_dpb_info;
  639|       |
  640|  6.40M|    for(i = 0; i < MAX_REF_BUFS; i++)
  ------------------
  |  |   75|  6.40M|#define MAX_REF_BUFS    32
  ------------------
  |  Branch (640:16): [True: 6.21M, False: 194k]
  ------------------
  641|  6.21M|    {
  642|  6.21M|        if(ps_dpb_info[i].u1_used_as_ref)
  ------------------
  |  Branch (642:12): [True: 78.7k, False: 6.13M]
  ------------------
  643|  78.7k|        {
  644|  78.7k|            ps_dpb_info[i].u1_used_as_ref = UNUSED_FOR_REF;
  ------------------
  |  |  595|  78.7k|#define UNUSED_FOR_REF 0
  ------------------
  645|  78.7k|            ps_dpb_info[i].u1_lt_idx = MAX_REF_BUFS + 1;
  ------------------
  |  |   75|  78.7k|#define MAX_REF_BUFS    32
  ------------------
  646|  78.7k|            ps_dpb_info[i].ps_prev_short = NULL;
  647|  78.7k|            ps_dpb_info[i].ps_prev_long = NULL;
  648|  78.7k|            ps_dpb_info[i].ps_pic_buf = NULL;
  649|  78.7k|            ps_dpb_info[i].s_top_field.u1_reference_info = UNUSED_FOR_REF;
  ------------------
  |  |  595|  78.7k|#define UNUSED_FOR_REF 0
  ------------------
  650|  78.7k|            ps_dpb_info[i].s_bot_field.u1_reference_info = UNUSED_FOR_REF;
  ------------------
  |  |  595|  78.7k|#define UNUSED_FOR_REF 0
  ------------------
  651|  78.7k|            ps_dpb_info[i].s_top_field.u1_long_term_frame_idx = MAX_REF_BUFS + 1;
  ------------------
  |  |   75|  78.7k|#define MAX_REF_BUFS    32
  ------------------
  652|  78.7k|            ps_dpb_info[i].s_bot_field.u1_long_term_frame_idx = MAX_REF_BUFS + 1;
  ------------------
  |  |   75|  78.7k|#define MAX_REF_BUFS    32
  ------------------
  653|       |
  654|       |            //Release physical buffer
  655|  78.7k|            ih264d_free_ref_pic_mv_bufs(ps_dpb_mgr->pv_codec_handle,
  656|  78.7k|                                        ps_dpb_info[i].u1_buf_id);
  657|  78.7k|        }
  658|  6.21M|    }
  659|   194k|    ps_dpb_mgr->u1_num_st_ref_bufs = ps_dpb_mgr->u1_num_lt_ref_bufs = 0;
  660|   194k|    ps_dpb_mgr->ps_dpb_st_head = NULL;
  661|   194k|    ps_dpb_mgr->ps_dpb_ht_head = NULL;
  662|   194k|    ps_dpb_mgr->u1_mmco_error_in_seq = 0;
  663|       |
  664|       |    /* release all gaps */
  665|   194k|    ps_dpb_mgr->u1_num_gaps = 0;
  666|  3.30M|    for(i = 0; i < MAX_FRAMES; i++)
  ------------------
  |  |  600|  3.30M|#define MAX_FRAMES              16
  ------------------
  |  Branch (666:16): [True: 3.10M, False: 194k]
  ------------------
  667|  3.10M|    {
  668|  3.10M|        ps_dpb_mgr->ai4_gaps_start_frm_num[i] = INVALID_FRAME_NUM;
  ------------------
  |  |  601|  3.10M|#define INVALID_FRAME_NUM       0x0fffffff
  ------------------
  669|  3.10M|        ps_dpb_mgr->ai4_gaps_end_frm_num[i] = 0;
  670|  3.10M|        ps_dpb_mgr->ai1_gaps_per_seq[i] = 0;
  671|  3.10M|    }
  672|   194k|}
ih264d_update_default_index_list:
  687|   109k|{
  688|   109k|    WORD32 i;
  689|   109k|    struct dpb_info_t *ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head;
  690|       |
  691|   214k|    for(i = 0; i < ps_dpb_mgr->u1_num_st_ref_bufs; i++)
  ------------------
  |  Branch (691:16): [True: 104k, False: 109k]
  ------------------
  692|   104k|    {
  693|   104k|        ps_dpb_mgr->ps_def_dpb[i] = ps_next_dpb->ps_pic_buf;
  694|   104k|        ps_next_dpb = ps_next_dpb->ps_prev_short;
  695|   104k|    }
  696|       |
  697|   109k|    ps_next_dpb = ps_dpb_mgr->ps_dpb_ht_head;
  698|   127k|    for(;i< ps_dpb_mgr->u1_num_st_ref_bufs + ps_dpb_mgr->u1_num_lt_ref_bufs; i++)
  ------------------
  |  Branch (698:10): [True: 17.7k, False: 109k]
  ------------------
  699|  17.7k|    {
  700|  17.7k|        ps_dpb_mgr->ps_def_dpb[i] = ps_next_dpb->ps_pic_buf;
  701|  17.7k|        ps_next_dpb = ps_next_dpb->ps_prev_long;
  702|  17.7k|    }
  703|   109k|    return 0;
  704|   109k|}
ih264d_ref_idx_reordering:
  729|  67.7k|{
  730|  67.7k|    dpb_manager_t *ps_dpb_mgr = ps_dec->ps_dpb_mgr;
  731|  67.7k|    UWORD16 u4_cur_pic_num = ps_dec->ps_cur_slice->u2_frame_num;
  732|       |    /*< Maximum Picture Number Minus 1 */
  733|  67.7k|    UWORD32 ui_max_frame_num =
  734|  67.7k|                    ps_dec->ps_cur_sps->u2_u4_max_pic_num_minus1 + 1;
  735|       |
  736|  67.7k|    WORD32 i, count = 0;
  737|  67.7k|    UWORD32 ui_remapIdc, ui_nextUev;
  738|  67.7k|    WORD16 u2_pred_frame_num = u4_cur_pic_num;
  739|  67.7k|    WORD32 i_temp;
  740|  67.7k|    UWORD16 u2_def_mod_flag = 0; /* Flag to keep track of which indices have been remapped */
  741|  67.7k|    UWORD8 modCount = 0;
  742|  67.7k|    UWORD32 *pu4_bitstrm_buf = ps_dec->ps_bitstrm->pu4_buffer;
  743|  67.7k|    UWORD32 *pu4_bitstrm_ofst = &ps_dec->ps_bitstrm->u4_ofst;
  744|  67.7k|    dec_slice_params_t *ps_cur_slice = ps_dec->ps_cur_slice;
  745|  67.7k|    UWORD8 u1_field_pic_flag = ps_cur_slice->u1_field_pic_flag;
  746|       |
  747|  67.7k|    if(u1_field_pic_flag)
  ------------------
  |  Branch (747:8): [True: 0, False: 67.7k]
  ------------------
  748|      0|    {
  749|      0|        u4_cur_pic_num = u4_cur_pic_num * 2 + 1;
  750|      0|        ui_max_frame_num = ui_max_frame_num * 2;
  751|      0|    }
  752|       |
  753|  67.7k|    u2_pred_frame_num = u4_cur_pic_num;
  754|       |
  755|  67.7k|    ui_remapIdc = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  756|       |
  757|  69.6k|    while((ui_remapIdc != 3)
  ------------------
  |  Branch (757:11): [True: 68.4k, False: 1.16k]
  ------------------
  758|  68.4k|                    && (count < ps_cur_slice->u1_num_ref_idx_lx_active[uc_lx]))
  ------------------
  |  Branch (758:24): [True: 67.6k, False: 833]
  ------------------
  759|  67.6k|    {
  760|  67.6k|        ui_nextUev = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  761|  67.6k|        if(ui_remapIdc != 2)
  ------------------
  |  Branch (761:12): [True: 62.7k, False: 4.91k]
  ------------------
  762|  62.7k|        {
  763|  62.7k|            if(ui_nextUev > ui_max_frame_num)
  ------------------
  |  Branch (763:16): [True: 3.83k, False: 58.8k]
  ------------------
  764|  3.83k|                return ERROR_DBP_MANAGER_T;
  765|       |
  766|  58.8k|            ui_nextUev = ui_nextUev + 1;
  767|       |
  768|  58.8k|            if(ui_remapIdc == 0)
  ------------------
  |  Branch (768:16): [True: 43.1k, False: 15.7k]
  ------------------
  769|  43.1k|            {
  770|       |                // diffPicNum is -ve
  771|  43.1k|                i_temp = (WORD32)u2_pred_frame_num - (WORD32)ui_nextUev;
  772|  43.1k|                if(i_temp < 0)
  ------------------
  |  Branch (772:20): [True: 7.64k, False: 35.5k]
  ------------------
  773|  7.64k|                    i_temp += ui_max_frame_num;
  774|  43.1k|            }
  775|  15.7k|            else
  776|  15.7k|            {
  777|       |                // diffPicNum is +ve
  778|  15.7k|                i_temp = (WORD32)u2_pred_frame_num + (WORD32)ui_nextUev;
  779|  15.7k|                if(i_temp >= (WORD32)ui_max_frame_num)
  ------------------
  |  Branch (779:20): [True: 3.19k, False: 12.5k]
  ------------------
  780|  3.19k|                    i_temp -= ui_max_frame_num;
  781|  15.7k|            }
  782|       |            /* Find the dpb with the matching picNum (picNum==frameNum for framePic) */
  783|       |
  784|  58.8k|            if(i_temp > u4_cur_pic_num)
  ------------------
  |  Branch (784:16): [True: 19.8k, False: 39.0k]
  ------------------
  785|  19.8k|                i_temp = i_temp - ui_max_frame_num;
  786|       |
  787|   109k|            for(i = 0; i < (ps_cur_slice->u1_initial_list_size[uc_lx]); i++)
  ------------------
  |  Branch (787:24): [True: 52.1k, False: 57.5k]
  ------------------
  788|  52.1k|            {
  789|  52.1k|                if(ps_dpb_mgr->ps_init_dpb[uc_lx][i]->i4_pic_num == i_temp)
  ------------------
  |  Branch (789:20): [True: 1.32k, False: 50.8k]
  ------------------
  790|  1.32k|                    break;
  791|  52.1k|            }
  792|  58.8k|            if(i == (ps_cur_slice->u1_initial_list_size[uc_lx]))
  ------------------
  |  Branch (792:16): [True: 57.5k, False: 1.32k]
  ------------------
  793|  57.5k|            {
  794|  57.5k|                UWORD32 i4_error_code;
  795|  57.5k|                i4_error_code = ERROR_DBP_MANAGER_T;
  796|  57.5k|                return i4_error_code;
  797|  57.5k|            }
  798|       |
  799|  1.32k|            u2_def_mod_flag |= (1 << i);
  800|  1.32k|            ps_dpb_mgr->ps_mod_dpb[uc_lx][modCount++] =
  801|  1.32k|                            ps_dpb_mgr->ps_init_dpb[uc_lx][i];
  802|  1.32k|            u2_pred_frame_num = i_temp; //update predictor to be the picNum just obtained
  803|  1.32k|        }
  804|  4.91k|        else //2
  805|  4.91k|        {
  806|  4.91k|            UWORD8 u1_lt_idx;
  807|       |
  808|  4.91k|            if(ui_nextUev > (MAX_REF_BUFS + 1))
  ------------------
  |  |   75|  4.91k|#define MAX_REF_BUFS    32
  ------------------
  |  Branch (808:16): [True: 436, False: 4.47k]
  ------------------
  809|    436|                return ERROR_DBP_MANAGER_T;
  810|       |
  811|  4.47k|            u1_lt_idx = (UWORD8)ui_nextUev;
  812|       |
  813|  8.89k|            for(i = 0; i < (ps_cur_slice->u1_initial_list_size[uc_lx]); i++)
  ------------------
  |  Branch (813:24): [True: 4.98k, False: 3.91k]
  ------------------
  814|  4.98k|            {
  815|  4.98k|                if(!ps_dpb_mgr->ps_init_dpb[uc_lx][i]->u1_is_short)
  ------------------
  |  Branch (815:20): [True: 1.00k, False: 3.98k]
  ------------------
  816|  1.00k|                {
  817|  1.00k|                    if(ps_dpb_mgr->ps_init_dpb[uc_lx][i]->u1_long_term_pic_num
  ------------------
  |  Branch (817:24): [True: 565, False: 441]
  ------------------
  818|  1.00k|                                    == u1_lt_idx)
  819|    565|                        break;
  820|  1.00k|                }
  821|  4.98k|            }
  822|  4.47k|            if(i == (ps_cur_slice->u1_initial_list_size[uc_lx]))
  ------------------
  |  Branch (822:16): [True: 3.91k, False: 565]
  ------------------
  823|  3.91k|            {
  824|  3.91k|                UWORD32 i4_error_code;
  825|  3.91k|                i4_error_code = ERROR_DBP_MANAGER_T;
  826|  3.91k|                return i4_error_code;
  827|  3.91k|            }
  828|       |
  829|    565|            u2_def_mod_flag |= (1 << i);
  830|    565|            ps_dpb_mgr->ps_mod_dpb[uc_lx][modCount++] =
  831|    565|                            ps_dpb_mgr->ps_init_dpb[uc_lx][i];
  832|    565|        }
  833|       |
  834|  1.89k|        ui_remapIdc = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  835|       |        /* Get the remapping_idc - 0/1/2/3 */
  836|  1.89k|        count++;
  837|  1.89k|    }
  838|       |
  839|       |    //Handle the ref indices that were not remapped
  840|  7.01k|    for(i = 0; i < (ps_cur_slice->u1_num_ref_idx_lx_active[uc_lx]); i++)
  ------------------
  |  Branch (840:16): [True: 5.02k, False: 1.99k]
  ------------------
  841|  5.02k|    {
  842|  5.02k|        if(!(u2_def_mod_flag & (1 << i)))
  ------------------
  |  Branch (842:12): [True: 4.08k, False: 932]
  ------------------
  843|  4.08k|            ps_dpb_mgr->ps_mod_dpb[uc_lx][modCount++] =
  844|  4.08k|                            ps_dpb_mgr->ps_init_dpb[uc_lx][i];
  845|  5.02k|    }
  846|  1.99k|    return OK;
  ------------------
  |  |  114|  1.99k|#define OK        0
  ------------------
  847|  67.7k|}
ih264d_read_mmco_commands:
  866|   133k|{
  867|   133k|    dec_pic_params_t *ps_pps = ps_dec->ps_cur_pps;
  868|   133k|    dec_seq_params_t *ps_sps = ps_pps->ps_sps;
  869|   133k|    dec_bit_stream_t *ps_bitstrm = ps_dec->ps_bitstrm;
  870|   133k|    dpb_commands_t *ps_dpb_cmds = &(ps_dec->s_dpb_cmds_scratch);
  871|   133k|    dec_slice_params_t * ps_slice = ps_dec->ps_cur_slice;
  872|   133k|    WORD32 j;
  873|   133k|    UWORD8 u1_buf_mode;
  874|   133k|    struct MMCParams *ps_mmc_params;
  875|   133k|    UWORD32 *pu4_bitstrm_buf = ps_dec->ps_bitstrm->pu4_buffer;
  876|   133k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
  877|   133k|    UWORD32 u4_bit_ofst = ps_dec->ps_bitstrm->u4_ofst;
  878|       |
  879|   133k|    ps_slice->u1_mmco_equalto5 = 0;
  880|   133k|    {
  881|   133k|        if(ps_dec->u1_nal_unit_type == IDR_SLICE_NAL)
  ------------------
  |  |  328|   133k|#define IDR_SLICE_NAL                   5
  ------------------
  |  Branch (881:12): [True: 118k, False: 15.1k]
  ------------------
  882|   118k|        {
  883|   118k|            ps_slice->u1_no_output_of_prior_pics_flag =
  884|   118k|                            ih264d_get_bit_h264(ps_bitstrm);
  885|   118k|            COPYTHECONTEXT("SH: no_output_of_prior_pics_flag",
  886|   118k|                            ps_slice->u1_no_output_of_prior_pics_flag);
  887|   118k|            ps_slice->u1_long_term_reference_flag = ih264d_get_bit_h264(
  888|   118k|                            ps_bitstrm);
  889|   118k|            COPYTHECONTEXT("SH: long_term_reference_flag",
  890|   118k|                            ps_slice->u1_long_term_reference_flag);
  891|   118k|            ps_dpb_cmds->u1_idr_pic = 1;
  892|   118k|            ps_dpb_cmds->u1_no_output_of_prior_pics_flag =
  893|   118k|                            ps_slice->u1_no_output_of_prior_pics_flag;
  894|   118k|            ps_dpb_cmds->u1_long_term_reference_flag =
  895|   118k|                            ps_slice->u1_long_term_reference_flag;
  896|   118k|        }
  897|  15.1k|        else
  898|  15.1k|        {
  899|  15.1k|            u1_buf_mode = ih264d_get_bit_h264(ps_bitstrm); //0 - sliding window; 1 - arbitrary
  900|  15.1k|            COPYTHECONTEXT("SH: adaptive_ref_pic_buffering_flag", u1_buf_mode);
  901|  15.1k|            ps_dpb_cmds->u1_buf_mode = u1_buf_mode;
  902|  15.1k|            j = 0;
  903|       |
  904|  15.1k|            if(u1_buf_mode == 1)
  ------------------
  |  Branch (904:16): [True: 7.44k, False: 7.66k]
  ------------------
  905|  7.44k|            {
  906|  7.44k|                UWORD32 u4_mmco;
  907|  7.44k|                UWORD32 u4_diff_pic_num;
  908|  7.44k|                UWORD32 u4_lt_idx, u4_max_lt_idx_plus1;
  909|       |
  910|  7.44k|                u4_mmco = ih264d_uev(pu4_bitstrm_ofst,
  911|  7.44k|                                     pu4_bitstrm_buf);
  912|  51.2k|                while(u4_mmco != END_OF_MMCO)
  ------------------
  |  |   43|  51.2k|#define END_OF_MMCO                 0
  ------------------
  |  Branch (912:23): [True: 44.6k, False: 6.54k]
  ------------------
  913|  44.6k|                {
  914|  44.6k|                    if (j >= MAX_REF_BUFS)
  ------------------
  |  |   75|  44.6k|#define MAX_REF_BUFS    32
  ------------------
  |  Branch (914:25): [True: 716, False: 43.9k]
  ------------------
  915|    716|                    {
  916|       |#ifdef __ANDROID__
  917|       |                        ALOGE("b/25818142");
  918|       |                        android_errorWriteLog(0x534e4554, "25818142");
  919|       |#endif
  920|    716|                        ps_dpb_cmds->u1_num_of_commands = 0;
  921|    716|                        return -1;
  922|    716|                    }
  923|  43.9k|                    ps_mmc_params = &ps_dpb_cmds->as_mmc_params[j];
  924|  43.9k|                    ps_mmc_params->u4_mmco = u4_mmco;
  925|  43.9k|                    switch(u4_mmco)
  926|  43.9k|                    {
  927|  3.13k|                        case MARK_ST_PICNUM_AS_NONREF:
  ------------------
  |  |   44|  3.13k|#define MARK_ST_PICNUM_AS_NONREF    1
  ------------------
  |  Branch (927:25): [True: 3.13k, False: 40.8k]
  ------------------
  928|  3.13k|                            u4_diff_pic_num = ih264d_uev(pu4_bitstrm_ofst,
  929|  3.13k|                                                         pu4_bitstrm_buf);
  930|       |                            //Get absDiffPicnumMinus1
  931|  3.13k|                            ps_mmc_params->u4_diff_pic_num = u4_diff_pic_num;
  932|  3.13k|                            break;
  933|       |
  934|  2.70k|                        case MARK_LT_INDEX_AS_NONREF:
  ------------------
  |  |   45|  2.70k|#define MARK_LT_INDEX_AS_NONREF     2
  ------------------
  |  Branch (934:25): [True: 2.70k, False: 41.2k]
  ------------------
  935|  2.70k|                            u4_lt_idx = ih264d_uev(pu4_bitstrm_ofst,
  936|  2.70k|                                                   pu4_bitstrm_buf);
  937|  2.70k|                            ps_mmc_params->u4_lt_idx = u4_lt_idx;
  938|  2.70k|                            break;
  939|       |
  940|  1.56k|                        case MARK_ST_PICNUM_AS_LT_INDEX:
  ------------------
  |  |   46|  1.56k|#define MARK_ST_PICNUM_AS_LT_INDEX  3
  ------------------
  |  Branch (940:25): [True: 1.56k, False: 42.4k]
  ------------------
  941|  1.56k|                            u4_diff_pic_num = ih264d_uev(pu4_bitstrm_ofst,
  942|  1.56k|                                                         pu4_bitstrm_buf);
  943|  1.56k|                            ps_mmc_params->u4_diff_pic_num = u4_diff_pic_num;
  944|  1.56k|                            u4_lt_idx = ih264d_uev(pu4_bitstrm_ofst,
  945|  1.56k|                                                   pu4_bitstrm_buf);
  946|  1.56k|                            ps_mmc_params->u4_lt_idx = u4_lt_idx;
  947|  1.56k|                            break;
  948|       |
  949|  3.73k|                        case SET_MAX_LT_INDEX:
  ------------------
  |  |   47|  3.73k|#define SET_MAX_LT_INDEX            4
  ------------------
  |  Branch (949:25): [True: 3.73k, False: 40.2k]
  ------------------
  950|  3.73k|                        {
  951|  3.73k|                            u4_max_lt_idx_plus1 = ih264d_uev(pu4_bitstrm_ofst,
  952|  3.73k|                                                             pu4_bitstrm_buf);
  953|  3.73k|                            if (u4_max_lt_idx_plus1 > ps_sps->u1_num_ref_frames)
  ------------------
  |  Branch (953:33): [True: 180, False: 3.55k]
  ------------------
  954|    180|                            {
  955|       |                                /* Invalid max LT ref index */
  956|    180|                                return -1;
  957|    180|                            }
  958|  3.55k|                            ps_mmc_params->u4_max_lt_idx_plus1 = u4_max_lt_idx_plus1;
  959|  3.55k|                            break;
  960|  3.73k|                        }
  961|  2.64k|                        case RESET_REF_PICTURES:
  ------------------
  |  |   48|  2.64k|#define RESET_REF_PICTURES          5
  ------------------
  |  Branch (961:25): [True: 2.64k, False: 41.3k]
  ------------------
  962|  2.64k|                        {
  963|  2.64k|                            ps_slice->u1_mmco_equalto5 = 1;
  964|  2.64k|                            break;
  965|  3.73k|                        }
  966|       |
  967|  1.65k|                        case SET_LT_INDEX:
  ------------------
  |  |   49|  1.65k|#define SET_LT_INDEX                6
  ------------------
  |  Branch (967:25): [True: 1.65k, False: 42.3k]
  ------------------
  968|  1.65k|                            u4_lt_idx = ih264d_uev(pu4_bitstrm_ofst,
  969|  1.65k|                                                   pu4_bitstrm_buf);
  970|  1.65k|                            ps_mmc_params->u4_lt_idx = u4_lt_idx;
  971|  1.65k|                            break;
  972|       |
  973|  28.5k|                        default:
  ------------------
  |  Branch (973:25): [True: 28.5k, False: 15.4k]
  ------------------
  974|  28.5k|                            break;
  975|  43.9k|                    }
  976|  43.7k|                    u4_mmco = ih264d_uev(pu4_bitstrm_ofst,
  977|  43.7k|                                         pu4_bitstrm_buf);
  978|       |
  979|  43.7k|                    j++;
  980|  43.7k|                }
  981|  6.54k|                ps_dpb_cmds->u1_num_of_commands = j;
  982|  6.54k|            }
  983|  15.1k|        }
  984|   132k|        ps_dpb_cmds->u1_dpb_commands_read = 1;
  985|   132k|        ps_dpb_cmds->u1_dpb_commands_read_slc = 1;
  986|       |
  987|   132k|    }
  988|      0|    u4_bit_ofst = ps_dec->ps_bitstrm->u4_ofst - u4_bit_ofst;
  989|   132k|    return u4_bit_ofst;
  990|   133k|}
ih264d_do_mmco_buffer:
 1017|  11.3k|{
 1018|  11.3k|    WORD32 i;
 1019|  11.3k|    UWORD8 u1_buf_mode, u1_marked_lt;
 1020|  11.3k|    struct dpb_info_t *ps_next_dpb;
 1021|  11.3k|    UWORD8 u1_num_gaps;
 1022|  11.3k|    UWORD8 u1_del_node = 1;
 1023|  11.3k|    UWORD8 u1_insert_st_pic = 1;
 1024|  11.3k|    WORD32 ret;
 1025|  11.3k|    UNUSED(u1_nal_unit_type);
  ------------------
  |  |   45|  11.3k|#define UNUSED(x) ((void)(x))
  ------------------
 1026|  11.3k|    UNUSED(u2_u4_max_pic_num_minus1);
  ------------------
  |  |   45|  11.3k|#define UNUSED(x) ((void)(x))
  ------------------
 1027|  11.3k|    u1_buf_mode = ps_dpb_cmds->u1_buf_mode; //0 - sliding window; 1 - Adaptive
 1028|  11.3k|    u1_marked_lt = 0;
 1029|  11.3k|    u1_num_gaps = ps_dpb_mgr->u1_num_gaps;
 1030|       |
 1031|  11.3k|    if(!u1_buf_mode)
  ------------------
  |  Branch (1031:8): [True: 6.52k, False: 4.82k]
  ------------------
 1032|  6.52k|    {
 1033|       |        //Sliding window - implements 8.2.5.3
 1034|  6.52k|        if((ps_dpb_mgr->u1_num_st_ref_bufs
  ------------------
  |  Branch (1034:12): [True: 2.86k, False: 3.66k]
  ------------------
 1035|  6.52k|                        + ps_dpb_mgr->u1_num_lt_ref_bufs + u1_num_gaps)
 1036|  6.52k|                        == u1_numRef_frames_for_seq)
 1037|  2.86k|        {
 1038|  2.86k|            UWORD8 u1_new_node_flag = 1;
 1039|  2.86k|            if((0 == ps_dpb_mgr->u1_num_st_ref_bufs) && (0 == u1_num_gaps))
  ------------------
  |  Branch (1039:16): [True: 224, False: 2.64k]
  |  Branch (1039:57): [True: 224, False: 0]
  ------------------
 1040|    224|            {
 1041|    224|                UWORD32 i4_error_code;
 1042|    224|                i4_error_code = ERROR_DBP_MANAGER_T;
 1043|    224|                return i4_error_code;
 1044|    224|            }
 1045|       |
 1046|       |            // Chase the links to reach the last but one picNum, if available
 1047|  2.64k|            ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head;
 1048|       |
 1049|  2.64k|            if(ps_dpb_mgr->u1_num_st_ref_bufs > 1)
  ------------------
  |  Branch (1049:16): [True: 897, False: 1.74k]
  ------------------
 1050|    897|            {
 1051|    897|                if(ps_next_dpb->i4_frame_num == (WORD32)u4_cur_pic_num)
  ------------------
  |  Branch (1051:20): [True: 86, False: 811]
  ------------------
 1052|     86|                {
 1053|       |                    /* Incase of  filed pictures top_field has been allocated   */
 1054|       |                    /* picture buffer and complementary bottom field pair comes */
 1055|       |                    /* then the sliding window mechanism should not allocate a  */
 1056|       |                    /* new node                                                 */
 1057|     86|                    u1_new_node_flag = 0;
 1058|     86|                }
 1059|       |
 1060|  1.75k|                for(i = 1; i < (ps_dpb_mgr->u1_num_st_ref_bufs - 1); i++)
  ------------------
  |  Branch (1060:28): [True: 853, False: 897]
  ------------------
 1061|    853|                {
 1062|    853|                    if(ps_next_dpb == NULL)
  ------------------
  |  Branch (1062:24): [True: 0, False: 853]
  ------------------
 1063|      0|                    {
 1064|      0|                        UWORD32 i4_error_code;
 1065|      0|                        i4_error_code = ERROR_DBP_MANAGER_T;
 1066|      0|                        return i4_error_code;
 1067|      0|                    }
 1068|    853|                    if(ps_next_dpb->i4_frame_num == (WORD32)u4_cur_pic_num)
  ------------------
  |  Branch (1068:24): [True: 157, False: 696]
  ------------------
 1069|    157|                    {
 1070|       |                        /* Incase of  field pictures top_field has been allocated   */
 1071|       |                        /* picture buffer and complementary bottom field pair comes */
 1072|       |                        /* then the sliding window mechanism should not allocate a  */
 1073|       |                        /* new node                                                 */
 1074|    157|                        u1_new_node_flag = 0;
 1075|    157|                    }
 1076|    853|                    ps_next_dpb = ps_next_dpb->ps_prev_short;
 1077|    853|                }
 1078|       |
 1079|    897|                if(ps_next_dpb->ps_prev_short->ps_prev_short != NULL)
  ------------------
  |  Branch (1079:20): [True: 34, False: 863]
  ------------------
 1080|     34|                {
 1081|     34|                    UWORD32 i4_error_code;
 1082|     34|                    i4_error_code = ERROR_DBP_MANAGER_T;
 1083|     34|                    return i4_error_code;
 1084|     34|                }
 1085|       |
 1086|    863|                if(u1_new_node_flag)
  ------------------
  |  Branch (1086:20): [True: 753, False: 110]
  ------------------
 1087|    753|                {
 1088|    753|                    if(u1_num_gaps)
  ------------------
  |  Branch (1088:24): [True: 0, False: 753]
  ------------------
 1089|      0|                    {
 1090|      0|                        ret = ih264d_delete_gap_frm_sliding(ps_dpb_mgr,
 1091|      0|                                                            ps_next_dpb->ps_prev_short->i4_frame_num,
 1092|      0|                                                            &u1_del_node);
 1093|      0|                        if(ret != OK)
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
  |  Branch (1093:28): [True: 0, False: 0]
  ------------------
 1094|      0|                            return ret;
 1095|      0|                    }
 1096|       |
 1097|    753|                    if(u1_del_node)
  ------------------
  |  Branch (1097:24): [True: 753, False: 0]
  ------------------
 1098|    753|                    {
 1099|    753|                        ps_dpb_mgr->u1_num_st_ref_bufs--;
 1100|    753|                        ps_next_dpb->ps_prev_short->u1_used_as_ref =
 1101|    753|                                        UNUSED_FOR_REF;
  ------------------
  |  |  595|    753|#define UNUSED_FOR_REF 0
  ------------------
 1102|    753|                        ps_next_dpb->ps_prev_short->s_top_field.u1_reference_info =
 1103|    753|                                        UNUSED_FOR_REF;
  ------------------
  |  |  595|    753|#define UNUSED_FOR_REF 0
  ------------------
 1104|    753|                        ps_next_dpb->ps_prev_short->s_bot_field.u1_reference_info =
 1105|    753|                                        UNUSED_FOR_REF;
  ------------------
  |  |  595|    753|#define UNUSED_FOR_REF 0
  ------------------
 1106|    753|                        ih264d_free_ref_pic_mv_bufs(ps_dpb_mgr->pv_codec_handle,
 1107|    753|                                                    ps_next_dpb->ps_prev_short->u1_buf_id);
 1108|    753|                        ps_next_dpb->ps_prev_short->ps_pic_buf = NULL;
 1109|    753|                        ps_next_dpb->ps_prev_short = NULL;
 1110|    753|                    }
 1111|    753|                }
 1112|    863|            }
 1113|  1.74k|            else
 1114|  1.74k|            {
 1115|  1.74k|                if(ps_dpb_mgr->u1_num_st_ref_bufs)
  ------------------
  |  Branch (1115:20): [True: 1.74k, False: 0]
  ------------------
 1116|  1.74k|                {
 1117|  1.74k|                    ret = ih264d_delete_gap_frm_sliding(ps_dpb_mgr,
 1118|  1.74k|                                                       ps_next_dpb->i4_frame_num,
 1119|  1.74k|                                                       &u1_del_node);
 1120|  1.74k|                    if(ret != OK)
  ------------------
  |  |  114|  1.74k|#define OK        0
  ------------------
  |  Branch (1120:24): [True: 0, False: 1.74k]
  ------------------
 1121|      0|                        return ret;
 1122|  1.74k|                    if((ps_next_dpb->i4_frame_num != (WORD32)u4_cur_pic_num)
  ------------------
  |  Branch (1122:24): [True: 389, False: 1.35k]
  ------------------
 1123|    389|                                    && u1_del_node)
  ------------------
  |  Branch (1123:40): [True: 389, False: 0]
  ------------------
 1124|    389|                    {
 1125|    389|                        ps_dpb_mgr->u1_num_st_ref_bufs--;
 1126|    389|                        ps_next_dpb->u1_used_as_ref = FALSE;
  ------------------
  |  |  592|    389|#define FALSE   0
  ------------------
 1127|    389|                        ps_next_dpb->s_top_field.u1_reference_info =
 1128|    389|                                        UNUSED_FOR_REF;
  ------------------
  |  |  595|    389|#define UNUSED_FOR_REF 0
  ------------------
 1129|    389|                        ps_next_dpb->s_bot_field.u1_reference_info =
 1130|    389|                                        UNUSED_FOR_REF;
  ------------------
  |  |  595|    389|#define UNUSED_FOR_REF 0
  ------------------
 1131|    389|                        ih264d_free_ref_pic_mv_bufs(ps_dpb_mgr->pv_codec_handle,
 1132|    389|                                                    ps_next_dpb->u1_buf_id);
 1133|    389|                        ps_next_dpb->ps_pic_buf = NULL;
 1134|    389|                        ps_next_dpb->ps_prev_short = NULL;
 1135|    389|                        ps_dpb_mgr->ps_dpb_st_head = NULL;
 1136|    389|                        ps_next_dpb = NULL;
 1137|    389|                    }
 1138|  1.35k|                    else if(ps_next_dpb->i4_frame_num == (WORD32)u4_cur_pic_num)
  ------------------
  |  Branch (1138:29): [True: 1.35k, False: 0]
  ------------------
 1139|  1.35k|                    {
 1140|  1.35k|                        if(u1_curr_pic_in_err)
  ------------------
  |  Branch (1140:28): [True: 0, False: 1.35k]
  ------------------
 1141|      0|                        {
 1142|      0|                            u1_insert_st_pic = 0;
 1143|      0|                        }
 1144|  1.35k|                        else if(ps_dpb_mgr->u1_num_st_ref_bufs > 0)
  ------------------
  |  Branch (1144:33): [True: 1.35k, False: 0]
  ------------------
 1145|  1.35k|                        {
 1146|  1.35k|                            ps_dpb_mgr->u1_num_st_ref_bufs--;
 1147|  1.35k|                            ps_next_dpb->u1_used_as_ref = FALSE;
  ------------------
  |  |  592|  1.35k|#define FALSE   0
  ------------------
 1148|  1.35k|                            ps_next_dpb->s_top_field.u1_reference_info =
 1149|  1.35k|                                            UNUSED_FOR_REF;
  ------------------
  |  |  595|  1.35k|#define UNUSED_FOR_REF 0
  ------------------
 1150|  1.35k|                            ps_next_dpb->s_bot_field.u1_reference_info =
 1151|  1.35k|                                            UNUSED_FOR_REF;
  ------------------
  |  |  595|  1.35k|#define UNUSED_FOR_REF 0
  ------------------
 1152|  1.35k|                            ih264d_free_ref_pic_mv_bufs(ps_dpb_mgr->pv_codec_handle,
 1153|  1.35k|                                                        ps_next_dpb->u1_buf_id);
 1154|  1.35k|                            ps_next_dpb->ps_pic_buf = NULL;
 1155|  1.35k|                            ps_next_dpb = NULL;
 1156|  1.35k|                        }
 1157|  1.35k|                    }
 1158|  1.74k|                }
 1159|      0|                else
 1160|      0|                {
 1161|      0|                    ret = ih264d_delete_gap_frm_sliding(ps_dpb_mgr,
 1162|      0|                                                        INVALID_FRAME_NUM,
  ------------------
  |  |  601|      0|#define INVALID_FRAME_NUM       0x0fffffff
  ------------------
 1163|      0|                                                        &u1_del_node);
 1164|      0|                    if(ret != OK)
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
  |  Branch (1164:24): [True: 0, False: 0]
  ------------------
 1165|      0|                        return ret;
 1166|      0|                    if(u1_del_node)
  ------------------
  |  Branch (1166:24): [True: 0, False: 0]
  ------------------
 1167|      0|                    {
 1168|      0|                        UWORD32 i4_error_code;
 1169|      0|                        i4_error_code = ERROR_DBP_MANAGER_T;
 1170|      0|                        return i4_error_code;
 1171|      0|                    }
 1172|      0|                }
 1173|  1.74k|            }
 1174|  2.64k|        }
 1175|  6.52k|    }
 1176|  4.82k|    else
 1177|  4.82k|    {
 1178|       |        //Adaptive memory control - implements 8.2.5.4
 1179|  4.82k|        UWORD32 u4_mmco;
 1180|  4.82k|        UWORD32 u4_diff_pic_num;
 1181|  4.82k|        WORD32 i4_pic_num;
 1182|  4.82k|        UWORD32 u4_lt_idx;
 1183|  4.82k|        WORD32 j;
 1184|  4.82k|        struct MMCParams *ps_mmc_params;
 1185|       |
 1186|  36.5k|        for(j = 0; j < ps_dpb_cmds->u1_num_of_commands; j++)
  ------------------
  |  Branch (1186:20): [True: 33.1k, False: 3.40k]
  ------------------
 1187|  33.1k|        {
 1188|  33.1k|            ps_mmc_params = &ps_dpb_cmds->as_mmc_params[j];
 1189|  33.1k|            u4_mmco = ps_mmc_params->u4_mmco; //Get MMCO
 1190|       |
 1191|  33.1k|            switch(u4_mmco)
 1192|  33.1k|            {
 1193|  2.35k|                case MARK_ST_PICNUM_AS_NONREF:
  ------------------
  |  |   44|  2.35k|#define MARK_ST_PICNUM_AS_NONREF    1
  ------------------
  |  Branch (1193:17): [True: 2.35k, False: 30.8k]
  ------------------
 1194|  2.35k|                {
 1195|       |
 1196|  2.35k|                    {
 1197|  2.35k|                        UWORD32 i4_cur_pic_num = u4_cur_pic_num;
 1198|  2.35k|                        WORD64 i8_pic_num;
 1199|  2.35k|                        u4_diff_pic_num = ps_mmc_params->u4_diff_pic_num; //Get absDiffPicnumMinus1
 1200|  2.35k|                        if(u1_fld_pic_flag)
  ------------------
  |  Branch (1200:28): [True: 0, False: 2.35k]
  ------------------
 1201|      0|                            i4_cur_pic_num = i4_cur_pic_num * 2 + 1;
 1202|  2.35k|                        i8_pic_num = ((WORD64)i4_cur_pic_num - ((WORD64)u4_diff_pic_num + 1));
 1203|  2.35k|                        if(IS_OUT_OF_RANGE_S32(i8_pic_num))
  ------------------
  |  |   58|  2.35k|#define IS_OUT_OF_RANGE_S32(a) (((a) < INT32_MIN) || ((a) > INT32_MAX))
  |  |  ------------------
  |  |  |  Branch (58:33): [True: 166, False: 2.18k]
  |  |  |  Branch (58:54): [True: 0, False: 2.18k]
  |  |  ------------------
  ------------------
 1204|    166|                        {
 1205|    166|                            return ERROR_DBP_MANAGER_T;
 1206|    166|                        }
 1207|  2.18k|                        i4_pic_num = i8_pic_num;
 1208|  2.18k|                    }
 1209|       |
 1210|  2.18k|                    if(ps_dpb_mgr->u1_num_st_ref_bufs > 0)
  ------------------
  |  Branch (1210:24): [True: 614, False: 1.57k]
  ------------------
 1211|    614|                    {
 1212|    614|                        ret = ih264d_delete_st_node_or_make_lt(ps_dpb_mgr,
 1213|    614|                                                               i4_pic_num,
 1214|    614|                                                               MAX_REF_BUFS + 1,
  ------------------
  |  |   75|    614|#define MAX_REF_BUFS    32
  ------------------
 1215|    614|                                                               u1_fld_pic_flag);
 1216|    614|                        if(ret != OK)
  ------------------
  |  |  114|    614|#define OK        0
  ------------------
  |  Branch (1216:28): [True: 341, False: 273]
  ------------------
 1217|    341|                            return ret;
 1218|    614|                    }
 1219|  1.57k|                    else
 1220|  1.57k|                    {
 1221|  1.57k|                        UWORD8 u1_dummy;
 1222|  1.57k|                        ret = ih264d_delete_gap_frm_mmco(ps_dpb_mgr, i4_pic_num, &u1_dummy);
 1223|  1.57k|                        if(ret != OK)
  ------------------
  |  |  114|  1.57k|#define OK        0
  ------------------
  |  Branch (1223:28): [True: 0, False: 1.57k]
  ------------------
 1224|      0|                            return ret;
 1225|  1.57k|                    }
 1226|  1.84k|                    break;
 1227|  2.18k|                }
 1228|  1.97k|                case MARK_LT_INDEX_AS_NONREF:
  ------------------
  |  |   45|  1.97k|#define MARK_LT_INDEX_AS_NONREF     2
  ------------------
  |  Branch (1228:17): [True: 1.97k, False: 31.1k]
  ------------------
 1229|  1.97k|                {
 1230|  1.97k|                    WORD32 i4_status;
 1231|  1.97k|                    u4_lt_idx = ps_mmc_params->u4_lt_idx; //Get long term index
 1232|  1.97k|                    ret = ih264d_delete_lt_node(ps_dpb_mgr,
 1233|  1.97k|                                                u4_lt_idx,
 1234|  1.97k|                                                u1_fld_pic_flag,
 1235|  1.97k|                                                0, &i4_status);
 1236|  1.97k|                    if(ret != OK)
  ------------------
  |  |  114|  1.97k|#define OK        0
  ------------------
  |  Branch (1236:24): [True: 0, False: 1.97k]
  ------------------
 1237|      0|                        return ret;
 1238|  1.97k|                    if(i4_status)
  ------------------
  |  Branch (1238:24): [True: 134, False: 1.83k]
  ------------------
 1239|    134|                    {
 1240|    134|                        UWORD32 i4_error_code;
 1241|    134|                        i4_error_code = ERROR_DBP_MANAGER_T;
 1242|    134|                        return i4_error_code;
 1243|    134|                    }
 1244|  1.83k|                    break;
 1245|  1.97k|                }
 1246|       |
 1247|  1.83k|                case MARK_ST_PICNUM_AS_LT_INDEX:
  ------------------
  |  |   46|  1.48k|#define MARK_ST_PICNUM_AS_LT_INDEX  3
  ------------------
  |  Branch (1247:17): [True: 1.48k, False: 31.6k]
  ------------------
 1248|  1.48k|                {
 1249|  1.48k|                    {
 1250|  1.48k|                        UWORD32 i4_cur_pic_num = u4_cur_pic_num;
 1251|  1.48k|                        WORD64 i8_pic_num;
 1252|  1.48k|                        u4_diff_pic_num = ps_mmc_params->u4_diff_pic_num; //Get absDiffPicnumMinus1
 1253|  1.48k|                        if(u1_fld_pic_flag)
  ------------------
  |  Branch (1253:28): [True: 0, False: 1.48k]
  ------------------
 1254|      0|                            i4_cur_pic_num = i4_cur_pic_num * 2 + 1;
 1255|       |
 1256|  1.48k|                        i8_pic_num = (WORD64)i4_cur_pic_num - ((WORD64)u4_diff_pic_num + 1);
 1257|  1.48k|                        if(IS_OUT_OF_RANGE_S32(i8_pic_num))
  ------------------
  |  |   58|  1.48k|#define IS_OUT_OF_RANGE_S32(a) (((a) < INT32_MIN) || ((a) > INT32_MAX))
  |  |  ------------------
  |  |  |  Branch (58:33): [True: 147, False: 1.33k]
  |  |  |  Branch (58:54): [True: 0, False: 1.33k]
  |  |  ------------------
  ------------------
 1258|    147|                        {
 1259|    147|                            return ERROR_DBP_MANAGER_T;
 1260|    147|                        }
 1261|  1.33k|                        i4_pic_num = i8_pic_num;
 1262|  1.33k|                    }
 1263|       |
 1264|      0|                    u4_lt_idx = ps_mmc_params->u4_lt_idx; //Get long term index
 1265|       |
 1266|  1.33k|                    if((ps_dpb_mgr->u1_max_lt_frame_idx == NO_LONG_TERM_INDICIES) ||
  ------------------
  |  |   53|  1.33k|#define NO_LONG_TERM_INDICIES      255
  ------------------
  |  Branch (1266:24): [True: 150, False: 1.18k]
  ------------------
 1267|  1.18k|                        (u4_lt_idx > ps_dpb_mgr->u1_max_lt_frame_idx))
  ------------------
  |  Branch (1267:25): [True: 83, False: 1.10k]
  ------------------
 1268|    233|                    {
 1269|    233|                        return ERROR_DBP_MANAGER_T;
 1270|    233|                    }
 1271|       |
 1272|  1.10k|                    if(ps_dpb_mgr->u1_num_st_ref_bufs > 0)
  ------------------
  |  Branch (1272:24): [True: 271, False: 829]
  ------------------
 1273|    271|                    {
 1274|    271|                        ret = ih264d_delete_st_node_or_make_lt(ps_dpb_mgr,
 1275|    271|                                                               i4_pic_num, u4_lt_idx,
 1276|    271|                                                               u1_fld_pic_flag);
 1277|    271|                        if(ret != OK)
  ------------------
  |  |  114|    271|#define OK        0
  ------------------
  |  Branch (1277:28): [True: 86, False: 185]
  ------------------
 1278|     86|                            return ret;
 1279|    271|                    }
 1280|  1.01k|                    break;
 1281|  1.10k|                }
 1282|  11.5k|                case SET_MAX_LT_INDEX:
  ------------------
  |  |   47|  11.5k|#define SET_MAX_LT_INDEX            4
  ------------------
  |  Branch (1282:17): [True: 11.5k, False: 21.5k]
  ------------------
 1283|  11.5k|                {
 1284|  11.5k|                    UWORD8 uc_numLT = ps_dpb_mgr->u1_num_lt_ref_bufs;
 1285|  11.5k|                    u4_lt_idx = ps_mmc_params->u4_max_lt_idx_plus1; //Get Max_long_term_index_plus1
 1286|  11.5k|                    if(u4_lt_idx <= ps_dpb_mgr->u1_max_lt_frame_idx
  ------------------
  |  Branch (1286:24): [True: 5.49k, False: 6.08k]
  ------------------
 1287|  5.49k|                                    && uc_numLT > 0)
  ------------------
  |  Branch (1287:40): [True: 3.03k, False: 2.46k]
  ------------------
 1288|  3.03k|                    {
 1289|  3.03k|                        struct dpb_info_t *ps_nxtDPB;
 1290|       |                        //Set all LT buffers with index >= u4_lt_idx to nonreference
 1291|  3.03k|                        ps_nxtDPB = ps_dpb_mgr->ps_dpb_ht_head;
 1292|  3.03k|                        ps_next_dpb = ps_nxtDPB->ps_prev_long;
 1293|  3.03k|                        if(ps_nxtDPB->u1_lt_idx >= u4_lt_idx)
  ------------------
  |  Branch (1293:28): [True: 844, False: 2.18k]
  ------------------
 1294|    844|                        {
 1295|    844|                            i = 0;
 1296|    844|                            ps_dpb_mgr->ps_dpb_ht_head = NULL;
 1297|    844|                        }
 1298|  2.18k|                        else
 1299|  2.18k|                        {
 1300|  2.89k|                            for(i = 1; i < uc_numLT; i++)
  ------------------
  |  Branch (1300:40): [True: 1.53k, False: 1.36k]
  ------------------
 1301|  1.53k|                            {
 1302|  1.53k|                                if(ps_next_dpb->u1_lt_idx >= u4_lt_idx)
  ------------------
  |  Branch (1302:36): [True: 827, False: 706]
  ------------------
 1303|    827|                                    break;
 1304|    706|                                ps_nxtDPB = ps_next_dpb;
 1305|    706|                                ps_next_dpb = ps_next_dpb->ps_prev_long;
 1306|    706|                            }
 1307|  2.18k|                            ps_nxtDPB->ps_prev_long = NULL; //Terminate the link of the closest LTIndex that is <=Max
 1308|  2.18k|                        }
 1309|  3.03k|                        ps_dpb_mgr->u1_num_lt_ref_bufs = i;
 1310|  3.03k|                        if(i == 0)
  ------------------
  |  Branch (1310:28): [True: 844, False: 2.18k]
  ------------------
 1311|    844|                            ps_next_dpb = ps_nxtDPB;
 1312|       |
 1313|  4.80k|                        for(; i < uc_numLT; i++)
  ------------------
  |  Branch (1313:31): [True: 1.77k, False: 3.03k]
  ------------------
 1314|  1.77k|                        {
 1315|  1.77k|                            ps_nxtDPB = ps_next_dpb;
 1316|  1.77k|                            ps_nxtDPB->u1_lt_idx = MAX_REF_BUFS + 1;
  ------------------
  |  |   75|  1.77k|#define MAX_REF_BUFS    32
  ------------------
 1317|  1.77k|                            ps_nxtDPB->u1_used_as_ref = UNUSED_FOR_REF;
  ------------------
  |  |  595|  1.77k|#define UNUSED_FOR_REF 0
  ------------------
 1318|  1.77k|                            ps_nxtDPB->s_top_field.u1_reference_info =
 1319|  1.77k|                                            UNUSED_FOR_REF;
  ------------------
  |  |  595|  1.77k|#define UNUSED_FOR_REF 0
  ------------------
 1320|  1.77k|                            ps_nxtDPB->s_bot_field.u1_reference_info =
 1321|  1.77k|                                            UNUSED_FOR_REF;
  ------------------
  |  |  595|  1.77k|#define UNUSED_FOR_REF 0
  ------------------
 1322|       |
 1323|  1.77k|                            ps_nxtDPB->ps_pic_buf = NULL;
 1324|       |                            //Release buffer
 1325|  1.77k|                            ih264d_free_ref_pic_mv_bufs(ps_dpb_mgr->pv_codec_handle,
 1326|  1.77k|                                                        ps_nxtDPB->u1_buf_id);
 1327|  1.77k|                            ps_next_dpb = ps_nxtDPB->ps_prev_long;
 1328|  1.77k|                            ps_nxtDPB->ps_prev_long = NULL;
 1329|  1.77k|                        }
 1330|  3.03k|                    }
 1331|  11.5k|                    if(u4_lt_idx == 0)
  ------------------
  |  Branch (1331:24): [True: 1.05k, False: 10.5k]
  ------------------
 1332|  1.05k|                    {
 1333|  1.05k|                        ps_dpb_mgr->u1_max_lt_frame_idx = NO_LONG_TERM_INDICIES;
  ------------------
  |  |   53|  1.05k|#define NO_LONG_TERM_INDICIES      255
  ------------------
 1334|  1.05k|                    }
 1335|  10.5k|                    else
 1336|  10.5k|                    {
 1337|  10.5k|                        ps_dpb_mgr->u1_max_lt_frame_idx = u4_lt_idx - 1;
 1338|  10.5k|                    }
 1339|       |
 1340|  11.5k|                    break;
 1341|  1.10k|                }
 1342|  4.23k|                case SET_LT_INDEX:
  ------------------
  |  |   49|  4.23k|#define SET_LT_INDEX                6
  ------------------
  |  Branch (1342:17): [True: 4.23k, False: 28.9k]
  ------------------
 1343|  4.23k|                {
 1344|  4.23k|                    u4_lt_idx = ps_mmc_params->u4_lt_idx; //Get long term index
 1345|  4.23k|                    if((ps_dpb_mgr->u1_max_lt_frame_idx == NO_LONG_TERM_INDICIES) ||
  ------------------
  |  |   53|  4.23k|#define NO_LONG_TERM_INDICIES      255
  ------------------
  |  Branch (1345:24): [True: 129, False: 4.10k]
  ------------------
 1346|  4.10k|                        (u4_lt_idx > ps_dpb_mgr->u1_max_lt_frame_idx))
  ------------------
  |  Branch (1346:25): [True: 114, False: 3.98k]
  ------------------
 1347|    243|                    {
 1348|    243|                        return ERROR_DBP_MANAGER_T;
 1349|    243|                    }
 1350|  3.98k|                    ret = ih264d_insert_st_node(ps_dpb_mgr, ps_pic_buf, u1_buf_id,
 1351|  3.98k|                                          u4_cur_pic_num);
 1352|  3.98k|                    if(ret != OK)
  ------------------
  |  |  114|  3.98k|#define OK        0
  ------------------
  |  Branch (1352:24): [True: 73, False: 3.91k]
  ------------------
 1353|     73|                        return ret;
 1354|       |
 1355|  3.91k|                    if(ps_dpb_mgr->u1_num_st_ref_bufs > 0)
  ------------------
  |  Branch (1355:24): [True: 3.91k, False: 0]
  ------------------
 1356|       |
 1357|  3.91k|                    {
 1358|  3.91k|                        ret = ih264d_delete_st_node_or_make_lt(ps_dpb_mgr,
 1359|  3.91k|                                                               u4_cur_pic_num,
 1360|  3.91k|                                                               u4_lt_idx,
 1361|  3.91k|                                                               u1_fld_pic_flag);
 1362|  3.91k|                        if(ret != OK)
  ------------------
  |  |  114|  3.91k|#define OK        0
  ------------------
  |  Branch (1362:28): [True: 0, False: 3.91k]
  ------------------
 1363|      0|                            return ret;
 1364|  3.91k|                    }
 1365|      0|                    else
 1366|      0|                    {
 1367|      0|                        return ERROR_DBP_MANAGER_T;
 1368|      0|                    }
 1369|       |
 1370|  3.91k|                    u1_marked_lt = 1;
 1371|  3.91k|                    break;
 1372|  3.91k|                }
 1373|       |
 1374|  11.5k|                default:
  ------------------
  |  Branch (1374:17): [True: 11.5k, False: 21.6k]
  ------------------
 1375|  11.5k|                    break;
 1376|  33.1k|            }
 1377|  31.7k|            if(u4_mmco == RESET_REF_PICTURES || u4_mmco == RESET_ALL_PICTURES)
  ------------------
  |  |   48|  63.4k|#define RESET_REF_PICTURES          5
  ------------------
                          if(u4_mmco == RESET_REF_PICTURES || u4_mmco == RESET_ALL_PICTURES)
  ------------------
  |  |   51|  29.1k|#define RESET_ALL_PICTURES          8
  ------------------
  |  Branch (1377:16): [True: 2.61k, False: 29.1k]
  |  Branch (1377:49): [True: 307, False: 28.8k]
  ------------------
 1378|  2.91k|            {
 1379|  2.91k|                ih264d_reset_ref_bufs(ps_dpb_mgr);
 1380|  2.91k|                u4_cur_pic_num = 0;
 1381|  2.91k|            }
 1382|  31.7k|        }
 1383|  4.82k|    }
 1384|  9.67k|    if(!u1_marked_lt && u1_insert_st_pic)
  ------------------
  |  Branch (1384:8): [True: 7.86k, False: 1.80k]
  |  Branch (1384:25): [True: 7.86k, False: 0]
  ------------------
 1385|  7.86k|    {
 1386|  7.86k|        ret = ih264d_insert_st_node(ps_dpb_mgr, ps_pic_buf, u1_buf_id,
 1387|  7.86k|                              u4_cur_pic_num);
 1388|  7.86k|        if(ret != OK)
  ------------------
  |  |  114|  7.86k|#define OK        0
  ------------------
  |  Branch (1388:12): [True: 92, False: 7.77k]
  ------------------
 1389|     92|            return ret;
 1390|  7.86k|    }
 1391|  9.58k|    return OK;
  ------------------
  |  |  114|  9.58k|#define OK        0
  ------------------
 1392|  9.67k|}
ih264d_release_pics_in_dpb:
 1417|      3|{
 1418|      3|    WORD8 i;
 1419|      3|    dec_struct_t *ps_dec = (dec_struct_t *)pv_dec;
 1420|       |
 1421|     40|    for(i = 0; i < u1_disp_bufs; i++)
  ------------------
  |  Branch (1421:16): [True: 37, False: 3]
  ------------------
 1422|     37|    {
 1423|     37|        ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_pic_buf_mgr,
 1424|     37|                              i,
 1425|     37|                              BUF_MGR_REF);
  ------------------
  |  |   50|     37|#define BUF_MGR_REF          (1 << 2)
  ------------------
 1426|     37|        ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_mv_buf_mgr,
 1427|     37|                              ps_dec->as_buf_id_info_map[i].mv_buf_id,
 1428|     37|                              BUF_MGR_REF);
  ------------------
  |  |   50|     37|#define BUF_MGR_REF          (1 << 2)
  ------------------
 1429|     37|    }
 1430|      3|}
ih264d_delete_gap_frm_sliding:
 1460|  1.74k|{
 1461|  1.74k|    WORD8 i1_gap_idx, i, j, j_min;
 1462|  1.74k|    WORD32 *pi4_gaps_start_frm_num, *pi4_gaps_end_frm_num, i4_gap_frame_num;
 1463|  1.74k|    WORD32 i4_start_frm_num, i4_end_frm_num;
 1464|  1.74k|    WORD32 i4_max_frm_num;
 1465|  1.74k|    WORD32 i4_frm_num, i4_gap_frm_num_min;
 1466|       |
 1467|       |    /* find the least frame num from gaps and current DPB node    */
 1468|       |    /* Delete the least one                                       */
 1469|  1.74k|    *pu1_del_node = 1;
 1470|  1.74k|    if(0 == ps_dpb_mgr->u1_num_gaps)
  ------------------
  |  Branch (1470:8): [True: 1.74k, False: 0]
  ------------------
 1471|  1.74k|        return OK;
  ------------------
  |  |  114|  1.74k|#define OK        0
  ------------------
 1472|      0|    pi4_gaps_start_frm_num = ps_dpb_mgr->ai4_gaps_start_frm_num;
 1473|      0|    pi4_gaps_end_frm_num = ps_dpb_mgr->ai4_gaps_end_frm_num;
 1474|      0|    i4_gap_frame_num = INVALID_FRAME_NUM;
  ------------------
  |  |  601|      0|#define INVALID_FRAME_NUM       0x0fffffff
  ------------------
 1475|      0|    i4_max_frm_num = ps_dpb_mgr->i4_max_frm_num;
 1476|       |
 1477|      0|    i1_gap_idx = -1;
 1478|      0|    if(INVALID_FRAME_NUM != i4_frame_num)
  ------------------
  |  |  601|      0|#define INVALID_FRAME_NUM       0x0fffffff
  ------------------
  |  Branch (1478:8): [True: 0, False: 0]
  ------------------
 1479|      0|    {
 1480|      0|        i4_gap_frame_num = i4_frame_num;
 1481|      0|        for(i = 0; i < MAX_FRAMES; i++)
  ------------------
  |  |  600|      0|#define MAX_FRAMES              16
  ------------------
  |  Branch (1481:20): [True: 0, False: 0]
  ------------------
 1482|      0|        {
 1483|      0|            i4_start_frm_num = pi4_gaps_start_frm_num[i];
 1484|      0|            if(INVALID_FRAME_NUM != i4_start_frm_num)
  ------------------
  |  |  601|      0|#define INVALID_FRAME_NUM       0x0fffffff
  ------------------
  |  Branch (1484:16): [True: 0, False: 0]
  ------------------
 1485|      0|            {
 1486|      0|                i4_end_frm_num = pi4_gaps_end_frm_num[i];
 1487|      0|                if(i4_end_frm_num < i4_max_frm_num)
  ------------------
  |  Branch (1487:20): [True: 0, False: 0]
  ------------------
 1488|      0|                {
 1489|      0|                    if(i4_start_frm_num <= i4_gap_frame_num)
  ------------------
  |  Branch (1489:24): [True: 0, False: 0]
  ------------------
 1490|      0|                    {
 1491|      0|                        i4_gap_frame_num = i4_start_frm_num;
 1492|      0|                        i1_gap_idx = i;
 1493|      0|                    }
 1494|      0|                }
 1495|      0|                else
 1496|      0|                {
 1497|      0|                    if(((i4_start_frm_num <= i4_gap_frame_num)
  ------------------
  |  Branch (1497:25): [True: 0, False: 0]
  ------------------
 1498|      0|                                    && (i4_gap_frame_num <= i4_max_frm_num))
  ------------------
  |  Branch (1498:40): [True: 0, False: 0]
  ------------------
 1499|      0|                                    || ((i4_start_frm_num >= i4_gap_frame_num)
  ------------------
  |  Branch (1499:41): [True: 0, False: 0]
  ------------------
 1500|      0|                                                    && ((i4_gap_frame_num
  ------------------
  |  Branch (1500:56): [True: 0, False: 0]
  ------------------
 1501|      0|                                                                    + i4_max_frm_num)
 1502|      0|                                                                    >= i4_end_frm_num)))
 1503|      0|                    {
 1504|      0|                        i4_gap_frame_num = i4_start_frm_num;
 1505|      0|                        i1_gap_idx = i;
 1506|      0|                    }
 1507|      0|                }
 1508|      0|            }
 1509|      0|        }
 1510|      0|    }
 1511|      0|    else
 1512|      0|    {
 1513|       |        /* no valid short term buffers, delete one gap from the least start */
 1514|       |        /* of gap sequence                                                  */
 1515|      0|        i4_gap_frame_num = pi4_gaps_start_frm_num[0];
 1516|      0|        i1_gap_idx = 0;
 1517|      0|        for(i = 1; i < MAX_FRAMES; i++)
  ------------------
  |  |  600|      0|#define MAX_FRAMES              16
  ------------------
  |  Branch (1517:20): [True: 0, False: 0]
  ------------------
 1518|      0|        {
 1519|      0|            if(INVALID_FRAME_NUM != pi4_gaps_start_frm_num[i])
  ------------------
  |  |  601|      0|#define INVALID_FRAME_NUM       0x0fffffff
  ------------------
  |  Branch (1519:16): [True: 0, False: 0]
  ------------------
 1520|      0|            {
 1521|      0|                if(pi4_gaps_start_frm_num[i] < i4_gap_frame_num)
  ------------------
  |  Branch (1521:20): [True: 0, False: 0]
  ------------------
 1522|      0|                {
 1523|      0|                    i4_gap_frame_num = pi4_gaps_start_frm_num[i];
 1524|      0|                    i1_gap_idx = i;
 1525|      0|                }
 1526|      0|            }
 1527|      0|        }
 1528|      0|        if(INVALID_FRAME_NUM == i4_gap_frame_num)
  ------------------
  |  |  601|      0|#define INVALID_FRAME_NUM       0x0fffffff
  ------------------
  |  Branch (1528:12): [True: 0, False: 0]
  ------------------
 1529|      0|        {
 1530|      0|            UWORD32 i4_error_code;
 1531|      0|            i4_error_code = ERROR_DBP_MANAGER_T;
 1532|      0|            return i4_error_code;
 1533|      0|        }
 1534|      0|    }
 1535|       |
 1536|      0|    if(-1 != i1_gap_idx)
  ------------------
  |  Branch (1536:8): [True: 0, False: 0]
  ------------------
 1537|      0|    {
 1538|       |        /* find least frame_num in the poc_map, which is in this range */
 1539|      0|        i4_start_frm_num = pi4_gaps_start_frm_num[i1_gap_idx];
 1540|      0|        if(i4_start_frm_num < 0)
  ------------------
  |  Branch (1540:12): [True: 0, False: 0]
  ------------------
 1541|      0|            i4_start_frm_num += i4_max_frm_num;
 1542|      0|        i4_end_frm_num = pi4_gaps_end_frm_num[i1_gap_idx];
 1543|      0|        if(i4_end_frm_num < 0)
  ------------------
  |  Branch (1543:12): [True: 0, False: 0]
  ------------------
 1544|      0|            i4_end_frm_num += i4_max_frm_num;
 1545|       |
 1546|      0|        i4_gap_frm_num_min = 0xfffffff;
 1547|      0|        j_min = MAX_FRAMES;
  ------------------
  |  |  600|      0|#define MAX_FRAMES              16
  ------------------
 1548|      0|        for(j = 0; j < MAX_FRAMES; j++)
  ------------------
  |  |  600|      0|#define MAX_FRAMES              16
  ------------------
  |  Branch (1548:20): [True: 0, False: 0]
  ------------------
 1549|      0|        {
 1550|      0|            i4_frm_num = ps_dpb_mgr->ai4_poc_buf_id_map[j][2];
 1551|      0|            if((i4_start_frm_num <= i4_frm_num)
  ------------------
  |  Branch (1551:16): [True: 0, False: 0]
  ------------------
 1552|      0|                            && (i4_end_frm_num >= i4_frm_num))
  ------------------
  |  Branch (1552:32): [True: 0, False: 0]
  ------------------
 1553|      0|            {
 1554|      0|                if(i4_frm_num < i4_gap_frm_num_min)
  ------------------
  |  Branch (1554:20): [True: 0, False: 0]
  ------------------
 1555|      0|                {
 1556|      0|                    j_min = j;
 1557|      0|                    i4_gap_frm_num_min = i4_frm_num;
 1558|      0|                }
 1559|      0|            }
 1560|      0|        }
 1561|       |
 1562|      0|        if(j_min != MAX_FRAMES)
  ------------------
  |  |  600|      0|#define MAX_FRAMES              16
  ------------------
  |  Branch (1562:12): [True: 0, False: 0]
  ------------------
 1563|      0|        {
 1564|       |
 1565|      0|            ps_dpb_mgr->ai4_poc_buf_id_map[j_min][0] = -1;
 1566|      0|            ps_dpb_mgr->ai4_poc_buf_id_map[j_min][1] = 0x7fffffff;
 1567|      0|            ps_dpb_mgr->ai4_poc_buf_id_map[j_min][2] = GAP_FRAME_NUM;
  ------------------
  |  |   80|      0|#define GAP_FRAME_NUM           0x1fffffff
  ------------------
 1568|      0|            ps_dpb_mgr->i1_gaps_deleted++;
 1569|       |
 1570|      0|            ps_dpb_mgr->ai1_gaps_per_seq[i1_gap_idx]--;
 1571|      0|            ps_dpb_mgr->u1_num_gaps--;
 1572|      0|            *pu1_del_node = 0;
 1573|      0|            if(0 == ps_dpb_mgr->ai1_gaps_per_seq[i1_gap_idx])
  ------------------
  |  Branch (1573:16): [True: 0, False: 0]
  ------------------
 1574|      0|            {
 1575|      0|                ps_dpb_mgr->ai4_gaps_start_frm_num[i1_gap_idx] =
 1576|      0|                INVALID_FRAME_NUM;
  ------------------
  |  |  601|      0|#define INVALID_FRAME_NUM       0x0fffffff
  ------------------
 1577|      0|                ps_dpb_mgr->ai4_gaps_end_frm_num[i1_gap_idx] = 0;
 1578|      0|            }
 1579|      0|        }
 1580|      0|    }
 1581|       |
 1582|      0|    return OK;
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
 1583|      0|}
ih264d_delete_gap_frm_mmco:
 1614|  1.57k|{
 1615|  1.57k|    WORD8 i, j;
 1616|  1.57k|    WORD32 *pi4_start, *pi4_end;
 1617|  1.57k|    WORD32 i4_start_frm_num, i4_end_frm_num, i4_max_frm_num;
 1618|       |
 1619|       |    /* find the least frame num from gaps and current DPB node    */
 1620|       |    /* Delete the gaps                                            */
 1621|  1.57k|    *pu1_del_node = 1;
 1622|  1.57k|    pi4_start = ps_dpb_mgr->ai4_gaps_start_frm_num;
 1623|  1.57k|    pi4_end = ps_dpb_mgr->ai4_gaps_end_frm_num;
 1624|  1.57k|    i4_max_frm_num = ps_dpb_mgr->i4_max_frm_num;
 1625|       |
 1626|  1.57k|    if(0 == ps_dpb_mgr->u1_num_gaps)
  ------------------
  |  Branch (1626:8): [True: 1.57k, False: 0]
  ------------------
 1627|  1.57k|        return OK;
  ------------------
  |  |  114|  1.57k|#define OK        0
  ------------------
 1628|       |
 1629|      0|    if(i4_frame_num < 0)
  ------------------
  |  Branch (1629:8): [True: 0, False: 0]
  ------------------
 1630|      0|        i4_frame_num += i4_max_frm_num;
 1631|      0|    for(i = 0; i < MAX_FRAMES; i++)
  ------------------
  |  |  600|      0|#define MAX_FRAMES              16
  ------------------
  |  Branch (1631:16): [True: 0, False: 0]
  ------------------
 1632|      0|    {
 1633|      0|        i4_start_frm_num = pi4_start[i];
 1634|      0|        if(i4_start_frm_num < 0)
  ------------------
  |  Branch (1634:12): [True: 0, False: 0]
  ------------------
 1635|      0|            i4_start_frm_num += i4_max_frm_num;
 1636|      0|        if(INVALID_FRAME_NUM != i4_start_frm_num)
  ------------------
  |  |  601|      0|#define INVALID_FRAME_NUM       0x0fffffff
  ------------------
  |  Branch (1636:12): [True: 0, False: 0]
  ------------------
 1637|      0|        {
 1638|      0|            i4_end_frm_num = pi4_end[i];
 1639|      0|            if(i4_end_frm_num < 0)
  ------------------
  |  Branch (1639:16): [True: 0, False: 0]
  ------------------
 1640|      0|                i4_end_frm_num += i4_max_frm_num;
 1641|       |
 1642|      0|            if((i4_frame_num >= i4_start_frm_num)
  ------------------
  |  Branch (1642:16): [True: 0, False: 0]
  ------------------
 1643|      0|                            && (i4_frame_num <= i4_end_frm_num))
  ------------------
  |  Branch (1643:32): [True: 0, False: 0]
  ------------------
 1644|      0|            {
 1645|      0|                break;
 1646|      0|            }
 1647|      0|            else
 1648|      0|            {
 1649|      0|                if(((i4_frame_num + i4_max_frm_num) >= i4_start_frm_num)
  ------------------
  |  Branch (1649:20): [True: 0, False: 0]
  ------------------
 1650|      0|                                && ((i4_frame_num + i4_max_frm_num)
  ------------------
  |  Branch (1650:36): [True: 0, False: 0]
  ------------------
 1651|      0|                                                <= i4_end_frm_num))
 1652|      0|                {
 1653|      0|                    UWORD32 i4_error_code;
 1654|      0|                    i4_error_code = ERROR_DBP_MANAGER_T;
 1655|      0|                    return i4_error_code;
 1656|      0|                }
 1657|      0|            }
 1658|      0|        }
 1659|      0|    }
 1660|       |
 1661|       |    /* find frame_num index, in the poc_map which needs to be deleted */
 1662|      0|    for(j = 0; j < MAX_FRAMES; j++)
  ------------------
  |  |  600|      0|#define MAX_FRAMES              16
  ------------------
  |  Branch (1662:16): [True: 0, False: 0]
  ------------------
 1663|      0|    {
 1664|      0|        if(i4_frame_num == ps_dpb_mgr->ai4_poc_buf_id_map[j][2])
  ------------------
  |  Branch (1664:12): [True: 0, False: 0]
  ------------------
 1665|      0|            break;
 1666|      0|    }
 1667|       |
 1668|      0|    if(MAX_FRAMES != i)
  ------------------
  |  |  600|      0|#define MAX_FRAMES              16
  ------------------
  |  Branch (1668:8): [True: 0, False: 0]
  ------------------
 1669|      0|    {
 1670|      0|        if(j == MAX_FRAMES)
  ------------------
  |  |  600|      0|#define MAX_FRAMES              16
  ------------------
  |  Branch (1670:12): [True: 0, False: 0]
  ------------------
 1671|      0|        {
 1672|      0|            UWORD32 i4_error_code;
 1673|      0|            i4_error_code = ERROR_DBP_MANAGER_T;
 1674|      0|            return i4_error_code;
 1675|      0|        }
 1676|       |
 1677|      0|        ps_dpb_mgr->ai4_poc_buf_id_map[j][0] = -1;
 1678|      0|        ps_dpb_mgr->ai4_poc_buf_id_map[j][1] = 0x7fffffff;
 1679|      0|        ps_dpb_mgr->ai4_poc_buf_id_map[j][2] = GAP_FRAME_NUM;
  ------------------
  |  |   80|      0|#define GAP_FRAME_NUM           0x1fffffff
  ------------------
 1680|      0|        ps_dpb_mgr->i1_gaps_deleted++;
 1681|       |
 1682|      0|        ps_dpb_mgr->ai1_gaps_per_seq[i]--;
 1683|      0|        ps_dpb_mgr->u1_num_gaps--;
 1684|      0|        *pu1_del_node = 0;
 1685|      0|        if(0 == ps_dpb_mgr->ai1_gaps_per_seq[i])
  ------------------
  |  Branch (1685:12): [True: 0, False: 0]
  ------------------
 1686|      0|        {
 1687|      0|            ps_dpb_mgr->ai4_gaps_start_frm_num[i] = INVALID_FRAME_NUM;
  ------------------
  |  |  601|      0|#define INVALID_FRAME_NUM       0x0fffffff
  ------------------
 1688|      0|            ps_dpb_mgr->ai4_gaps_end_frm_num[i] = 0;
 1689|      0|        }
 1690|      0|    }
 1691|      0|    else
 1692|      0|    {
 1693|      0|        UWORD32 i4_error_code;
 1694|      0|        i4_error_code = ERROR_DBP_MANAGER_T;
 1695|      0|        return i4_error_code;
 1696|      0|    }
 1697|       |
 1698|      0|    return OK;
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
 1699|      0|}
ih264d_delete_nonref_nondisplay_pics:
 2006|   233k|{
 2007|   233k|    WORD8 i;
 2008|   233k|    WORD32 (*i4_poc_buf_id_map)[3] = ps_dpb_mgr->ai4_poc_buf_id_map;
 2009|       |
 2010|       |    /* remove all gaps marked as unused for ref */
 2011|   233k|    for(i = 0; (i < MAX_FRAMES) && ps_dpb_mgr->i1_gaps_deleted; i++)
  ------------------
  |  |  600|   233k|#define MAX_FRAMES              16
  ------------------
  |  Branch (2011:16): [True: 233k, False: 0]
  |  Branch (2011:36): [True: 0, False: 233k]
  ------------------
 2012|      0|    {
 2013|      0|        if(GAP_FRAME_NUM == i4_poc_buf_id_map[i][2])
  ------------------
  |  |   80|      0|#define GAP_FRAME_NUM           0x1fffffff
  ------------------
  |  Branch (2013:12): [True: 0, False: 0]
  ------------------
 2014|      0|        {
 2015|      0|            ps_dpb_mgr->i1_gaps_deleted--;
 2016|      0|            ps_dpb_mgr->i1_poc_buf_id_entries--;
 2017|      0|            i4_poc_buf_id_map[i][0] = -1;
 2018|      0|            i4_poc_buf_id_map[i][1] = 0x7fffffff;
 2019|      0|            i4_poc_buf_id_map[i][2] = 0;
 2020|      0|        }
 2021|      0|    }
 2022|   233k|}
ih264d_insert_pic_in_display_list:
 2050|   126k|{
 2051|   126k|    WORD8 i;
 2052|   126k|    WORD32 (*i4_poc_buf_id_map)[3] = ps_dpb_mgr->ai4_poc_buf_id_map;
 2053|       |
 2054|   218k|    for(i = 0; i < MAX_FRAMES; i++)
  ------------------
  |  |  600|   218k|#define MAX_FRAMES              16
  ------------------
  |  Branch (2054:16): [True: 218k, False: 0]
  ------------------
 2055|   218k|    {
 2056|       |        /* Find an empty slot */
 2057|   218k|        if(i4_poc_buf_id_map[i][0] == -1)
  ------------------
  |  Branch (2057:12): [True: 126k, False: 91.8k]
  ------------------
 2058|   126k|        {
 2059|   126k|            if(GAP_FRAME_NUM == i4_poc_buf_id_map[i][2])
  ------------------
  |  |   80|   126k|#define GAP_FRAME_NUM           0x1fffffff
  ------------------
  |  Branch (2059:16): [True: 0, False: 126k]
  ------------------
 2060|      0|                ps_dpb_mgr->i1_gaps_deleted--;
 2061|   126k|            else
 2062|   126k|                ps_dpb_mgr->i1_poc_buf_id_entries++;
 2063|       |
 2064|   126k|            i4_poc_buf_id_map[i][0] = u1_buf_id;
 2065|   126k|            i4_poc_buf_id_map[i][1] = i4_display_poc;
 2066|   126k|            i4_poc_buf_id_map[i][2] = u4_frame_num;
 2067|       |
 2068|   126k|            break;
 2069|   126k|        }
 2070|   218k|    }
 2071|       |
 2072|   126k|    if(MAX_FRAMES == i)
  ------------------
  |  |  600|   126k|#define MAX_FRAMES              16
  ------------------
  |  Branch (2072:8): [True: 0, False: 126k]
  ------------------
 2073|      0|    {
 2074|       |
 2075|      0|        UWORD32 i4_error_code;
 2076|      0|        i4_error_code = ERROR_GAPS_IN_FRM_NUM;
 2077|      0|        return i4_error_code;
 2078|      0|    }
 2079|   126k|    return OK;
  ------------------
  |  |  114|   126k|#define OK        0
  ------------------
 2080|   126k|}

ih264d_fmt_conv_420sp_to_420sp:
  383|  40.6k|{
  384|  40.6k|    UWORD8 *pu1_src, *pu1_dst;
  385|  40.6k|    WORD32 num_rows, num_cols, src_strd, dst_strd;
  386|  40.6k|    WORD32 i;
  387|       |
  388|       |    /* copy luma */
  389|  40.6k|    pu1_src = (UWORD8 *)pu1_y_src;
  390|  40.6k|    pu1_dst = (UWORD8 *)pu1_y_dst;
  391|       |
  392|  40.6k|    num_rows = ht;
  393|  40.6k|    num_cols = wd;
  394|       |
  395|  40.6k|    src_strd = src_y_strd;
  396|  40.6k|    dst_strd = dst_y_strd;
  397|       |
  398|  1.92M|    for(i = 0; i < num_rows; i++)
  ------------------
  |  Branch (398:16): [True: 1.88M, False: 40.6k]
  ------------------
  399|  1.88M|    {
  400|  1.88M|        memcpy(pu1_dst, pu1_src, num_cols);
  401|  1.88M|        pu1_dst += dst_strd;
  402|  1.88M|        pu1_src += src_strd;
  403|  1.88M|    }
  404|       |
  405|       |    /* copy U and V */
  406|  40.6k|    pu1_src = (UWORD8 *)pu1_uv_src;
  407|  40.6k|    pu1_dst = (UWORD8 *)pu1_uv_dst;
  408|       |
  409|  40.6k|    num_rows = ht >> 1;
  410|  40.6k|    num_cols = wd;
  411|       |
  412|  40.6k|    src_strd = src_uv_strd;
  413|  40.6k|    dst_strd = dst_uv_strd;
  414|       |
  415|   984k|    for(i = 0; i < num_rows; i++)
  ------------------
  |  Branch (415:16): [True: 943k, False: 40.6k]
  ------------------
  416|   943k|    {
  417|   943k|        memcpy(pu1_dst, pu1_src, num_cols);
  418|   943k|        pu1_dst += dst_strd;
  419|   943k|        pu1_src += src_strd;
  420|   943k|    }
  421|  40.6k|    return;
  422|  40.6k|}
ih264d_fmt_conv_420sp_to_420sp_swap_uv:
  480|  10.4k|{
  481|  10.4k|    UWORD8 *pu1_src, *pu1_dst;
  482|  10.4k|    WORD32 num_rows, num_cols, src_strd, dst_strd;
  483|  10.4k|    WORD32 i;
  484|       |
  485|       |    /* copy luma */
  486|  10.4k|    pu1_src = (UWORD8 *)pu1_y_src;
  487|  10.4k|    pu1_dst = (UWORD8 *)pu1_y_dst;
  488|       |
  489|  10.4k|    num_rows = ht;
  490|  10.4k|    num_cols = wd;
  491|       |
  492|  10.4k|    src_strd = src_y_strd;
  493|  10.4k|    dst_strd = dst_y_strd;
  494|       |
  495|  1.25M|    for(i = 0; i < num_rows; i++)
  ------------------
  |  Branch (495:16): [True: 1.24M, False: 10.4k]
  ------------------
  496|  1.24M|    {
  497|  1.24M|        memcpy(pu1_dst, pu1_src, num_cols);
  498|  1.24M|        pu1_dst += dst_strd;
  499|  1.24M|        pu1_src += src_strd;
  500|  1.24M|    }
  501|       |
  502|       |    /* copy U and V */
  503|  10.4k|    pu1_src = (UWORD8 *)pu1_uv_src;
  504|  10.4k|    pu1_dst = (UWORD8 *)pu1_uv_dst;
  505|       |
  506|  10.4k|    num_rows = ht >> 1;
  507|  10.4k|    num_cols = wd;
  508|       |
  509|  10.4k|    src_strd = src_uv_strd;
  510|  10.4k|    dst_strd = dst_uv_strd;
  511|       |
  512|   632k|    for(i = 0; i < num_rows; i++)
  ------------------
  |  Branch (512:16): [True: 621k, False: 10.4k]
  ------------------
  513|   621k|    {
  514|   621k|        WORD32 j;
  515|  74.8M|        for(j = 0; j < num_cols; j += 2)
  ------------------
  |  Branch (515:20): [True: 74.2M, False: 621k]
  ------------------
  516|  74.2M|        {
  517|  74.2M|            pu1_dst[j + 0] = pu1_src[j + 1];
  518|  74.2M|            pu1_dst[j + 1] = pu1_src[j + 0];
  519|  74.2M|        }
  520|   621k|        pu1_dst += dst_strd;
  521|   621k|        pu1_src += src_strd;
  522|   621k|    }
  523|  10.4k|    return;
  524|  10.4k|}
ih264d_fmt_conv_420sp_to_420p:
  591|  47.1k|{
  592|  47.1k|    UWORD8 *pu1_src, *pu1_dst;
  593|  47.1k|    UWORD8 *pu1_u_src, *pu1_v_src;
  594|  47.1k|    WORD32 num_rows, num_cols, src_strd, dst_strd;
  595|  47.1k|    WORD32 i, j;
  596|       |
  597|  47.1k|    if(0 == disable_luma_copy)
  ------------------
  |  Branch (597:8): [True: 47.1k, False: 0]
  ------------------
  598|  47.1k|    {
  599|       |        /* copy luma */
  600|  47.1k|        pu1_src = (UWORD8 *)pu1_y_src;
  601|  47.1k|        pu1_dst = (UWORD8 *)pu1_y_dst;
  602|       |
  603|  47.1k|        num_rows = ht;
  604|  47.1k|        num_cols = wd;
  605|       |
  606|  47.1k|        src_strd = src_y_strd;
  607|  47.1k|        dst_strd = dst_y_strd;
  608|       |
  609|  3.36M|        for(i = 0; i < num_rows; i++)
  ------------------
  |  Branch (609:20): [True: 3.31M, False: 47.1k]
  ------------------
  610|  3.31M|        {
  611|  3.31M|            memcpy(pu1_dst, pu1_src, num_cols);
  612|  3.31M|            pu1_dst += dst_strd;
  613|  3.31M|            pu1_src += src_strd;
  614|  3.31M|        }
  615|  47.1k|    }
  616|       |    /* de-interleave U and V and copy to destination */
  617|  47.1k|    if(is_u_first)
  ------------------
  |  Branch (617:8): [True: 47.1k, False: 0]
  ------------------
  618|  47.1k|    {
  619|  47.1k|        pu1_u_src = (UWORD8 *)pu1_uv_src;
  620|  47.1k|        pu1_v_src = (UWORD8 *)pu1_uv_src + 1;
  621|  47.1k|    }
  622|      0|    else
  623|      0|    {
  624|      0|        pu1_u_src = (UWORD8 *)pu1_uv_src + 1;
  625|      0|        pu1_v_src = (UWORD8 *)pu1_uv_src;
  626|      0|    }
  627|       |
  628|  47.1k|    num_rows = ht >> 1;
  629|  47.1k|    num_cols = wd >> 1;
  630|       |
  631|  47.1k|    src_strd = src_uv_strd;
  632|  47.1k|    dst_strd = dst_uv_strd;
  633|       |
  634|  1.70M|    for(i = 0; i < num_rows; i++)
  ------------------
  |  Branch (634:16): [True: 1.65M, False: 47.1k]
  ------------------
  635|  1.65M|    {
  636|  76.0M|        for(j = 0; j < num_cols; j++)
  ------------------
  |  Branch (636:20): [True: 74.3M, False: 1.65M]
  ------------------
  637|  74.3M|        {
  638|  74.3M|            pu1_u_dst[j] = pu1_u_src[j * 2];
  639|  74.3M|            pu1_v_dst[j] = pu1_v_src[j * 2];
  640|  74.3M|        }
  641|       |
  642|  1.65M|        pu1_u_dst += dst_strd;
  643|  1.65M|        pu1_v_dst += dst_strd;
  644|  1.65M|        pu1_u_src += src_strd;
  645|  1.65M|        pu1_v_src += src_strd;
  646|  1.65M|    }
  647|  47.1k|    return;
  648|  47.1k|}
ih264d_format_convert:
  672|  98.2k|{
  673|  98.2k|    UWORD32 convert_uv_only = 0;
  674|  98.2k|    iv_yuv_buf_t *ps_op_frm;
  675|  98.2k|    UWORD8 *pu1_y_src, *pu1_uv_src;
  676|  98.2k|    UWORD32 start_uv = u4_start_y >> 1;
  677|       |
  678|  98.2k|    if(1 == pv_disp_op->u4_error_code)
  ------------------
  |  Branch (678:8): [True: 1, False: 98.2k]
  ------------------
  679|      1|        return;
  680|       |
  681|  98.2k|    ps_op_frm = &(ps_dec->s_disp_frame_info);
  682|       |
  683|       |    /* Requires u4_start_y and u4_num_rows_y to be even */
  684|  98.2k|    if(u4_start_y & 1)
  ------------------
  |  Branch (684:8): [True: 0, False: 98.2k]
  ------------------
  685|      0|    {
  686|      0|        return;
  687|      0|    }
  688|       |
  689|  98.2k|    if((1 == ps_dec->u4_share_disp_buf) &&
  ------------------
  |  Branch (689:8): [True: 0, False: 98.2k]
  ------------------
  690|      0|       (pv_disp_op->e_output_format == IV_YUV_420SP_UV))
  ------------------
  |  Branch (690:8): [True: 0, False: 0]
  ------------------
  691|      0|    {
  692|      0|        return;
  693|      0|    }
  694|       |
  695|  98.2k|    pu1_y_src = (UWORD8 *)ps_op_frm->pv_y_buf;
  696|  98.2k|    pu1_y_src += u4_start_y * ps_op_frm->u4_y_strd,
  697|       |
  698|  98.2k|    pu1_uv_src = (UWORD8 *)ps_op_frm->pv_u_buf;
  699|  98.2k|    pu1_uv_src += start_uv * ps_op_frm->u4_u_strd;
  700|       |
  701|  98.2k|    if(pv_disp_op->e_output_format == IV_YUV_420P)
  ------------------
  |  Branch (701:8): [True: 47.1k, False: 51.1k]
  ------------------
  702|  47.1k|    {
  703|  47.1k|        UWORD8 *pu1_y_dst, *pu1_u_dst, *pu1_v_dst;
  704|  47.1k|        IV_COLOR_FORMAT_T e_output_format = pv_disp_op->e_output_format;
  705|       |
  706|  47.1k|        if(0 == ps_dec->u4_share_disp_buf)
  ------------------
  |  Branch (706:12): [True: 47.1k, False: 0]
  ------------------
  707|  47.1k|        {
  708|  47.1k|            convert_uv_only = 0;
  709|  47.1k|        }
  710|      0|        else
  711|      0|        {
  712|      0|            convert_uv_only = 1;
  713|      0|        }
  714|       |
  715|  47.1k|        pu1_y_dst = (UWORD8 *)pv_disp_op->s_disp_frm_buf.pv_y_buf;
  716|  47.1k|        pu1_y_dst += u4_start_y * pv_disp_op->s_disp_frm_buf.u4_y_strd;
  717|       |
  718|  47.1k|        pu1_u_dst = (UWORD8 *)pv_disp_op->s_disp_frm_buf.pv_u_buf;
  719|  47.1k|        pu1_u_dst += start_uv * pv_disp_op->s_disp_frm_buf.u4_u_strd;
  720|       |
  721|  47.1k|        pu1_v_dst = (UWORD8 *)pv_disp_op->s_disp_frm_buf.pv_v_buf;
  722|  47.1k|        pu1_v_dst += start_uv * pv_disp_op->s_disp_frm_buf.u4_v_strd;
  723|       |
  724|  47.1k|        ih264d_fmt_conv_420sp_to_420p(pu1_y_src,
  725|  47.1k|                                      pu1_uv_src,
  726|  47.1k|                                      pu1_y_dst,
  727|  47.1k|                                      pu1_u_dst,
  728|  47.1k|                                      pu1_v_dst,
  729|  47.1k|                                      ps_op_frm->u4_y_wd,
  730|  47.1k|                                      u4_num_rows_y,
  731|  47.1k|                                      ps_op_frm->u4_y_strd,
  732|  47.1k|                                      ps_op_frm->u4_u_strd,
  733|  47.1k|                                      pv_disp_op->s_disp_frm_buf.u4_y_strd,
  734|  47.1k|                                      pv_disp_op->s_disp_frm_buf.u4_u_strd,
  735|  47.1k|                                      1,
  736|  47.1k|                                      convert_uv_only);
  737|       |
  738|  47.1k|    }
  739|  51.1k|    else if((pv_disp_op->e_output_format == IV_YUV_420SP_UV) ||
  ------------------
  |  Branch (739:13): [True: 40.6k, False: 10.4k]
  ------------------
  740|  10.4k|            (pv_disp_op->e_output_format == IV_YUV_420SP_VU))
  ------------------
  |  Branch (740:13): [True: 10.4k, False: 0]
  ------------------
  741|  51.1k|    {
  742|  51.1k|        UWORD8* pu1_y_dst, *pu1_uv_dst;
  743|       |
  744|  51.1k|        pu1_y_dst = (UWORD8 *)pv_disp_op->s_disp_frm_buf.pv_y_buf;
  745|  51.1k|        pu1_y_dst +=  u4_start_y * pv_disp_op->s_disp_frm_buf.u4_y_strd;
  746|       |
  747|  51.1k|        pu1_uv_dst = (UWORD8 *)pv_disp_op->s_disp_frm_buf.pv_u_buf;
  748|  51.1k|        pu1_uv_dst += start_uv * pv_disp_op->s_disp_frm_buf.u4_u_strd;
  749|       |
  750|  51.1k|        if(pv_disp_op->e_output_format == IV_YUV_420SP_UV)
  ------------------
  |  Branch (750:12): [True: 40.6k, False: 10.4k]
  ------------------
  751|  40.6k|        {
  752|  40.6k|            ih264d_fmt_conv_420sp_to_420sp(pu1_y_src,
  753|  40.6k|                                           pu1_uv_src,
  754|  40.6k|                                           pu1_y_dst,
  755|  40.6k|                                           pu1_uv_dst,
  756|  40.6k|                                           ps_op_frm->u4_y_wd,
  757|  40.6k|                                           u4_num_rows_y,
  758|  40.6k|                                           ps_op_frm->u4_y_strd,
  759|  40.6k|                                           ps_op_frm->u4_u_strd,
  760|  40.6k|                                           pv_disp_op->s_disp_frm_buf.u4_y_strd,
  761|  40.6k|                                           pv_disp_op->s_disp_frm_buf.u4_u_strd);
  762|  40.6k|        }
  763|  10.4k|        else
  764|  10.4k|        {
  765|  10.4k|            ih264d_fmt_conv_420sp_to_420sp_swap_uv(pu1_y_src,
  766|  10.4k|                                                   pu1_uv_src,
  767|  10.4k|                                                   pu1_y_dst,
  768|  10.4k|                                                   pu1_uv_dst,
  769|  10.4k|                                                   ps_op_frm->u4_y_wd,
  770|  10.4k|                                                   u4_num_rows_y,
  771|  10.4k|                                                   ps_op_frm->u4_y_strd,
  772|  10.4k|                                                   ps_op_frm->u4_u_strd,
  773|  10.4k|                                                   pv_disp_op->s_disp_frm_buf.u4_y_strd,
  774|  10.4k|                                                   pv_disp_op->s_disp_frm_buf.u4_u_strd);
  775|  10.4k|        }
  776|  51.1k|    }
  777|      0|    else if(pv_disp_op->e_output_format == IV_RGB_565)
  ------------------
  |  Branch (777:13): [True: 0, False: 0]
  ------------------
  778|      0|    {
  779|      0|        UWORD16 *pu2_rgb_dst;
  780|       |
  781|      0|        pu2_rgb_dst = (UWORD16 *)pv_disp_op->s_disp_frm_buf.pv_y_buf;
  782|      0|        pu2_rgb_dst += u4_start_y * pv_disp_op->s_disp_frm_buf.u4_y_strd;
  783|       |
  784|      0|        ih264d_fmt_conv_420sp_to_rgb565(pu1_y_src,
  785|      0|                                        pu1_uv_src,
  786|      0|                                        pu2_rgb_dst,
  787|      0|                                        ps_op_frm->u4_y_wd,
  788|      0|                                        u4_num_rows_y,
  789|      0|                                        ps_op_frm->u4_y_strd,
  790|      0|                                        ps_op_frm->u4_u_strd,
  791|      0|                                        pv_disp_op->s_disp_frm_buf.u4_y_strd,
  792|      0|                                        1);
  793|      0|    }
  794|       |
  795|  98.2k|    if((u4_start_y + u4_num_rows_y) >= ps_dec->s_disp_frame_info.u4_y_ht)
  ------------------
  |  Branch (795:8): [True: 24.0k, False: 74.2k]
  ------------------
  796|  24.0k|    {
  797|       |
  798|  24.0k|        INSERT_LOGO(pv_disp_op->s_disp_frm_buf.pv_y_buf,
  799|  24.0k|                        pv_disp_op->s_disp_frm_buf.pv_u_buf,
  800|  24.0k|                        pv_disp_op->s_disp_frm_buf.pv_v_buf,
  801|  24.0k|                        pv_disp_op->s_disp_frm_buf.u4_y_strd,
  802|  24.0k|                        ps_dec->u2_disp_width,
  803|  24.0k|                        ps_dec->u2_disp_height,
  804|  24.0k|                        pv_disp_op->e_output_format,
  805|  24.0k|                        ps_op_frm->u4_y_wd,
  806|  24.0k|                        ps_op_frm->u4_y_ht);
  807|  24.0k|    }
  808|       |
  809|  98.2k|    return;
  810|  98.2k|}

ih264d_init_function_ptr_generic:
   82|   176k|{
   83|       |
   84|   176k|    WORD32 i = 0;
   85|       |
   86|       |    /* Init function pointers for intra pred leaf level functions luma
   87|       |     * Intra 16x16 */
   88|   176k|    ps_codec->apf_intra_pred_luma_16x16[0] =
   89|   176k|                    ih264_intra_pred_luma_16x16_mode_vert;
   90|   176k|    ps_codec->apf_intra_pred_luma_16x16[1] =
   91|   176k|                    ih264_intra_pred_luma_16x16_mode_horz;
   92|   176k|    ps_codec->apf_intra_pred_luma_16x16[2] =
   93|   176k|                    ih264_intra_pred_luma_16x16_mode_dc;
   94|   176k|    ps_codec->apf_intra_pred_luma_16x16[3] =
   95|   176k|                    ih264_intra_pred_luma_16x16_mode_plane;
   96|       |
   97|       |    /* Init function pointers for intra pred leaf level functions luma
   98|       |     * Intra 4x4 */
   99|   176k|    ps_codec->apf_intra_pred_luma_4x4[0] = ih264_intra_pred_luma_4x4_mode_vert;
  100|   176k|    ps_codec->apf_intra_pred_luma_4x4[1] = ih264_intra_pred_luma_4x4_mode_horz;
  101|   176k|    ps_codec->apf_intra_pred_luma_4x4[2] = ih264_intra_pred_luma_4x4_mode_dc;
  102|   176k|    ps_codec->apf_intra_pred_luma_4x4[3] =
  103|   176k|                    ih264_intra_pred_luma_4x4_mode_diag_dl;
  104|   176k|    ps_codec->apf_intra_pred_luma_4x4[4] =
  105|   176k|                    ih264_intra_pred_luma_4x4_mode_diag_dr;
  106|   176k|    ps_codec->apf_intra_pred_luma_4x4[5] =
  107|   176k|                    ih264_intra_pred_luma_4x4_mode_vert_r;
  108|   176k|    ps_codec->apf_intra_pred_luma_4x4[6] =
  109|   176k|                    ih264_intra_pred_luma_4x4_mode_horz_d;
  110|   176k|    ps_codec->apf_intra_pred_luma_4x4[7] =
  111|   176k|                    ih264_intra_pred_luma_4x4_mode_vert_l;
  112|   176k|    ps_codec->apf_intra_pred_luma_4x4[8] =
  113|   176k|                    ih264_intra_pred_luma_4x4_mode_horz_u;
  114|       |
  115|       |    /* Init function pointers for intra pred leaf level functions luma
  116|       |     * Intra 8x8 */
  117|   176k|    ps_codec->apf_intra_pred_luma_8x8[0] = ih264_intra_pred_luma_8x8_mode_vert;
  118|   176k|    ps_codec->apf_intra_pred_luma_8x8[1] = ih264_intra_pred_luma_8x8_mode_horz;
  119|   176k|    ps_codec->apf_intra_pred_luma_8x8[2] = ih264_intra_pred_luma_8x8_mode_dc;
  120|   176k|    ps_codec->apf_intra_pred_luma_8x8[3] =
  121|   176k|                    ih264_intra_pred_luma_8x8_mode_diag_dl;
  122|   176k|    ps_codec->apf_intra_pred_luma_8x8[4] =
  123|   176k|                    ih264_intra_pred_luma_8x8_mode_diag_dr;
  124|   176k|    ps_codec->apf_intra_pred_luma_8x8[5] =
  125|   176k|                    ih264_intra_pred_luma_8x8_mode_vert_r;
  126|   176k|    ps_codec->apf_intra_pred_luma_8x8[6] =
  127|   176k|                    ih264_intra_pred_luma_8x8_mode_horz_d;
  128|   176k|    ps_codec->apf_intra_pred_luma_8x8[7] =
  129|   176k|                    ih264_intra_pred_luma_8x8_mode_vert_l;
  130|   176k|    ps_codec->apf_intra_pred_luma_8x8[8] =
  131|   176k|                    ih264_intra_pred_luma_8x8_mode_horz_u;
  132|       |
  133|   176k|    ps_codec->pf_intra_pred_ref_filtering =
  134|   176k|                    ih264_intra_pred_luma_8x8_mode_ref_filtering;
  135|       |
  136|       |    /* Init function pointers for intra pred leaf level functions chroma
  137|       |     * Intra 8x8 */
  138|   176k|    ps_codec->apf_intra_pred_chroma[0] = ih264_intra_pred_chroma_8x8_mode_vert;
  139|   176k|    ps_codec->apf_intra_pred_chroma[1] = ih264_intra_pred_chroma_8x8_mode_horz;
  140|   176k|    ps_codec->apf_intra_pred_chroma[2] = ih264_intra_pred_chroma_8x8_mode_dc;
  141|   176k|    ps_codec->apf_intra_pred_chroma[3] = ih264_intra_pred_chroma_8x8_mode_plane;
  142|       |
  143|   176k|    ps_codec->pf_default_weighted_pred_luma = ih264_default_weighted_pred_luma;
  144|   176k|    ps_codec->pf_default_weighted_pred_chroma =
  145|   176k|                    ih264_default_weighted_pred_chroma;
  146|   176k|    ps_codec->pf_weighted_pred_luma = ih264_weighted_pred_luma;
  147|   176k|    ps_codec->pf_weighted_pred_chroma = ih264_weighted_pred_chroma;
  148|   176k|    ps_codec->pf_weighted_bi_pred_luma = ih264_weighted_bi_pred_luma;
  149|   176k|    ps_codec->pf_weighted_bi_pred_chroma = ih264_weighted_bi_pred_chroma;
  150|       |
  151|       |    /* Padding Functions */
  152|   176k|    ps_codec->pf_pad_top = ih264_pad_top;
  153|   176k|    ps_codec->pf_pad_bottom = ih264_pad_bottom;
  154|   176k|    ps_codec->pf_pad_left_luma = ih264_pad_left_luma;
  155|   176k|    ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma;
  156|   176k|    ps_codec->pf_pad_right_luma = ih264_pad_right_luma;
  157|   176k|    ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma;
  158|       |
  159|   176k|    ps_codec->pf_iquant_itrans_recon_luma_4x4 = ih264_iquant_itrans_recon_4x4;
  160|   176k|    ps_codec->pf_iquant_itrans_recon_luma_4x4_dc =
  161|   176k|                    ih264_iquant_itrans_recon_4x4_dc;
  162|   176k|    ps_codec->pf_iquant_itrans_recon_luma_8x8 = ih264_iquant_itrans_recon_8x8;
  163|   176k|    ps_codec->pf_iquant_itrans_recon_luma_8x8_dc =
  164|   176k|                    ih264_iquant_itrans_recon_8x8_dc;
  165|   176k|    ps_codec->pf_iquant_itrans_recon_chroma_4x4 =
  166|   176k|                    ih264_iquant_itrans_recon_chroma_4x4;
  167|   176k|    ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc =
  168|   176k|                    ih264_iquant_itrans_recon_chroma_4x4_dc;
  169|   176k|    ps_codec->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4;
  170|       |
  171|       |    /* Init fn ptr luma deblocking */
  172|   176k|    ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4;
  173|   176k|    ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4;
  174|   176k|    ps_codec->pf_deblk_luma_vert_bs4_mbaff = ih264_deblk_luma_vert_bs4_mbaff;
  175|   176k|    ps_codec->pf_deblk_luma_vert_bslt4_mbaff =
  176|   176k|                    ih264_deblk_luma_vert_bslt4_mbaff;
  177|       |
  178|   176k|    ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4;
  179|   176k|    ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4;
  180|       |
  181|       |    /* Init fn ptr chroma deblocking */
  182|   176k|    ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4;
  183|   176k|    ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4;
  184|   176k|    ps_codec->pf_deblk_chroma_vert_bs4_mbaff =
  185|   176k|                    ih264_deblk_chroma_vert_bs4_mbaff;
  186|   176k|    ps_codec->pf_deblk_chroma_vert_bslt4_mbaff =
  187|   176k|                    ih264_deblk_chroma_vert_bslt4_mbaff;
  188|       |
  189|   176k|    ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4;
  190|   176k|    ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4;
  191|       |
  192|       |    /* Inter pred leaf level functions */
  193|   176k|    ps_codec->apf_inter_pred_luma[0] = ih264_inter_pred_luma_copy;
  194|   176k|    ps_codec->apf_inter_pred_luma[1] = ih264_inter_pred_luma_horz_qpel;
  195|   176k|    ps_codec->apf_inter_pred_luma[2] = ih264_inter_pred_luma_horz;
  196|   176k|    ps_codec->apf_inter_pred_luma[3] = ih264_inter_pred_luma_horz_qpel;
  197|   176k|    ps_codec->apf_inter_pred_luma[4] = ih264_inter_pred_luma_vert_qpel;
  198|   176k|    ps_codec->apf_inter_pred_luma[5] =
  199|   176k|                    ih264_inter_pred_luma_horz_qpel_vert_qpel;
  200|   176k|    ps_codec->apf_inter_pred_luma[6] =
  201|   176k|                    ih264_inter_pred_luma_horz_hpel_vert_qpel;
  202|   176k|    ps_codec->apf_inter_pred_luma[7] =
  203|   176k|                    ih264_inter_pred_luma_horz_qpel_vert_qpel;
  204|   176k|    ps_codec->apf_inter_pred_luma[8] = ih264_inter_pred_luma_vert;
  205|   176k|    ps_codec->apf_inter_pred_luma[9] =
  206|   176k|                    ih264_inter_pred_luma_horz_qpel_vert_hpel;
  207|   176k|    ps_codec->apf_inter_pred_luma[10] =
  208|   176k|                    ih264_inter_pred_luma_horz_hpel_vert_hpel;
  209|   176k|    ps_codec->apf_inter_pred_luma[11] =
  210|   176k|                    ih264_inter_pred_luma_horz_qpel_vert_hpel;
  211|   176k|    ps_codec->apf_inter_pred_luma[12] = ih264_inter_pred_luma_vert_qpel;
  212|   176k|    ps_codec->apf_inter_pred_luma[13] =
  213|   176k|                    ih264_inter_pred_luma_horz_qpel_vert_qpel;
  214|   176k|    ps_codec->apf_inter_pred_luma[14] =
  215|   176k|                    ih264_inter_pred_luma_horz_hpel_vert_qpel;
  216|   176k|    ps_codec->apf_inter_pred_luma[15] =
  217|   176k|                    ih264_inter_pred_luma_horz_qpel_vert_qpel;
  218|       |
  219|   176k|    ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma;
  220|       |
  221|   176k|    return;
  222|   176k|}

ih264d_fill_pred_info:
  113|  14.9M|{
  114|  14.9M|    WORD32 insert_bits;
  115|       |
  116|  14.9M|    ps_pred_pkd->i2_mv[0] = pi2_mv[0];
  117|  14.9M|    ps_pred_pkd->i2_mv[1] = pi2_mv[1];
  118|       |
  119|  14.9M|    insert_bits = sub_mb_num & 3; /*sub mb x*/
  120|  14.9M|    ps_pred_pkd->i1_size_pos_info = insert_bits;
  121|  14.9M|    insert_bits = sub_mb_num >> 2;/*sub mb y*/
  122|  14.9M|    ps_pred_pkd->i1_size_pos_info |= insert_bits << 2;
  123|  14.9M|    insert_bits = part_width >> 1;
  124|  14.9M|    ps_pred_pkd->i1_size_pos_info |= insert_bits << 4;
  125|  14.9M|    insert_bits = part_height >> 1;
  126|  14.9M|    ps_pred_pkd->i1_size_pos_info |= insert_bits << 6;
  127|       |
  128|  14.9M|    ps_pred_pkd->i1_ref_idx_info = i1_ref_idx;
  129|  14.9M|    ps_pred_pkd->i1_ref_idx_info |= (pred_dir << 6);
  130|  14.9M|    ps_pred_pkd->i1_buf_id = i1_buf_id;
  131|  14.9M|    ps_pred_pkd->pu4_wt_offst = pu4_wt_offset;
  132|  14.9M|    ps_pred_pkd->u1_pic_type = u1_pic_type;
  133|       |
  134|       |
  135|  14.9M|}
ih264d_form_mb_part_info_bp:
  178|  5.34M|{
  179|       |    /* The reference buffer pointer */
  180|  5.34M|    WORD32 i2_frm_x, i2_frm_y;
  181|  5.34M|    WORD32 i2_tmp_mv_x, i2_tmp_mv_y;
  182|  5.34M|    WORD32 i2_rec_x, i2_rec_y;
  183|       |
  184|  5.34M|    WORD32 u2_pic_ht;
  185|  5.34M|    WORD32 u2_frm_wd;
  186|  5.34M|    WORD32 u2_rec_wd;
  187|  5.34M|    UWORD8 u1_sub_x = 0,u1_sub_y=0 ;
  188|  5.34M|    UWORD8  u1_part_wd = 0,u1_part_ht = 0;
  189|  5.34M|    WORD16 i2_mv_x,i2_mv_y;
  190|       |
  191|       |    /********************************************/
  192|       |    /* i1_mc_wd       width reqd for mcomp      */
  193|       |    /* u1_dma_ht      height reqd for mcomp     */
  194|       |    /* u1_dma_wd      width aligned to 4 bytes  */
  195|       |    /* u1_dx          fractional part of width  */
  196|       |    /* u1_dx          fractional part of height */
  197|       |    /********************************************/
  198|  5.34M|    UWORD32 i1_mc_wd;
  199|       |
  200|  5.34M|    WORD32 u1_dma_ht;
  201|       |
  202|  5.34M|    UWORD32 u1_dma_wd;
  203|  5.34M|    UWORD32 u1_dx;
  204|  5.34M|    UWORD32 u1_dy;
  205|  5.34M|    pred_info_t * ps_pred = ps_dec->ps_pred + ps_dec->u4_pred_info_idx;
  206|  5.34M|    dec_slice_params_t * const ps_cur_slice = ps_dec->ps_cur_slice;
  207|  5.34M|    tfr_ctxt_t *ps_frame_buf;
  208|  5.34M|    struct pic_buffer_t *ps_ref_frm;
  209|  5.34M|    UWORD8 u1_scale_ref,u1_mbaff,u1_field;
  210|  5.34M|    pic_buffer_t  **pps_ref_frame;
  211|  5.34M|    WORD8 i1_size_pos_info,i1_buf_id;
  212|       |
  213|  5.34M|    PROFILE_DISABLE_MB_PART_INFO()
  ------------------
  |  |  131|  5.34M|#define PROFILE_DISABLE_MB_PART_INFO() ;
  ------------------
  214|       |
  215|  5.34M|     UNUSED(ps_cur_mb_info);
  ------------------
  |  |   45|  5.34M|#define UNUSED(x) ((void)(x))
  ------------------
  216|  5.34M|     i1_size_pos_info = ps_pred_pkd->i1_size_pos_info;
  217|  5.34M|     GET_XPOS_PRED(u1_sub_x,i1_size_pos_info);
  ------------------
  |  |  165|  5.34M|#define GET_XPOS_PRED(u1_out,pkd_info)                        \
  |  |  166|  5.34M|{                                                               \
  |  |  167|  5.34M|    WORD32 bit_field;                                           \
  |  |  168|  5.34M|    bit_field = pkd_info & 0x3;                                 \
  |  |  169|  5.34M|    u1_out = bit_field;                                       \
  |  |  170|  5.34M|}
  ------------------
  218|  5.34M|     GET_YPOS_PRED(u1_sub_y,i1_size_pos_info);
  ------------------
  |  |  173|  5.34M|#define GET_YPOS_PRED(u1_out,pkd_info)                        \
  |  |  174|  5.34M|{                                                               \
  |  |  175|  5.34M|    WORD32 bit_field;                                           \
  |  |  176|  5.34M|    bit_field = pkd_info >> 2;                                  \
  |  |  177|  5.34M|    u1_out = bit_field & 0x3;                                  \
  |  |  178|  5.34M|}
  ------------------
  219|  5.34M|     GET_WIDTH_PRED(u1_part_wd,i1_size_pos_info);
  ------------------
  |  |  182|  5.34M|#define GET_WIDTH_PRED(u1_out,pkd_info)                        \
  |  |  183|  5.34M|{                                                               \
  |  |  184|  5.34M|    WORD32 bit_field;                                           \
  |  |  185|  5.34M|    bit_field = pkd_info >> 4;                                  \
  |  |  186|  5.34M|    bit_field = (bit_field & 0x3) << 1 ;                        \
  |  |  187|  5.34M|    u1_out = (bit_field == 0)?1:bit_field;                       \
  |  |  ------------------
  |  |  |  Branch (187:14): [True: 23.5k, False: 5.32M]
  |  |  ------------------
  |  |  188|  5.34M|    }
  ------------------
  220|  5.34M|     GET_HEIGHT_PRED(u1_part_ht,i1_size_pos_info);
  ------------------
  |  |  190|  5.34M|#define GET_HEIGHT_PRED(u1_out,pkd_info)                        \
  |  |  191|  5.34M|{                                                               \
  |  |  192|  5.34M|    WORD32 bit_field;                                           \
  |  |  193|  5.34M|    bit_field = pkd_info >> 6;                                  \
  |  |  194|  5.34M|    bit_field = (bit_field & 0x3) << 1 ;                        \
  |  |  195|  5.34M|    u1_out = (bit_field == 0)?1:bit_field;                      \
  |  |  ------------------
  |  |  |  Branch (195:14): [True: 17.0k, False: 5.33M]
  |  |  ------------------
  |  |  196|  5.34M|}
  ------------------
  221|  5.34M|     i2_mv_x = ps_pred_pkd->i2_mv[0];
  222|  5.34M|     i2_mv_y = ps_pred_pkd->i2_mv[1];
  223|  5.34M|     i1_buf_id = ps_pred_pkd->i1_buf_id;
  224|       |
  225|       |
  226|  5.34M|     ps_ref_frm = ps_dec->apv_buf_id_pic_buf_map[i1_buf_id];
  227|       |
  228|       |
  229|  5.34M|    {
  230|  5.34M|        ps_frame_buf = &ps_dec->s_tran_addrecon;
  231|  5.34M|    }
  232|       |
  233|       |
  234|       |    /* Transfer Setup Y */
  235|  5.34M|    {
  236|  5.34M|        UWORD8 *pu1_pred, *pu1_rec;
  237|       |
  238|       |        /* calculating rounded motion vectors and fractional components */
  239|  5.34M|        i2_tmp_mv_x = i2_mv_x;
  240|  5.34M|        i2_tmp_mv_y = i2_mv_y;
  241|  5.34M|        u1_dx = i2_tmp_mv_x & 0x3;
  242|  5.34M|        u1_dy = i2_tmp_mv_y & 0x3;
  243|  5.34M|        i2_tmp_mv_x >>= 2;
  244|  5.34M|        i2_tmp_mv_y >>= 2;
  245|  5.34M|        i1_mc_wd = u1_part_wd << 2;
  246|  5.34M|        u1_dma_ht = u1_part_ht << 2;
  247|  5.34M|        if(u1_dx)
  ------------------
  |  Branch (247:12): [True: 123k, False: 5.22M]
  ------------------
  248|   123k|        {
  249|   123k|            i2_tmp_mv_x -= 2;
  250|   123k|            i1_mc_wd += 5;
  251|   123k|        }
  252|  5.34M|        if(u1_dy)
  ------------------
  |  Branch (252:12): [True: 112k, False: 5.23M]
  ------------------
  253|   112k|        {
  254|   112k|            i2_tmp_mv_y -= 2;
  255|   112k|            u1_dma_ht += 5;
  256|   112k|        }
  257|       |
  258|       |        /********************************************************************/
  259|       |        /* Calulating the horizontal and the vertical u4_ofst from top left  */
  260|       |        /* edge of the reference frame, and subsequent clipping             */
  261|       |        /********************************************************************/
  262|  5.34M|        u2_pic_ht = ps_dec->u2_pic_ht;
  263|  5.34M|        u2_frm_wd = ps_dec->u2_frm_wd_y;
  264|  5.34M|        i2_rec_x = u1_sub_x << 2;
  265|  5.34M|        i2_rec_y = u1_sub_y << 2;
  266|       |
  267|  5.34M|        i2_frm_x = (u2_mb_x << 4) + i2_rec_x + i2_tmp_mv_x;
  268|  5.34M|        i2_frm_y = (u2_mb_y << 4) + i2_rec_y + i2_tmp_mv_y;
  269|       |
  270|  5.34M|        i2_frm_x = CLIP3(MAX_OFFSET_OUTSIDE_X_FRM, (ps_dec->u2_pic_wd - 1),
  ------------------
  |  |   77|  5.34M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 1.01k, False: 5.34M]
  |  |  |  Branch (77:54): [True: 5.48k, False: 5.34M]
  |  |  ------------------
  ------------------
  271|  5.34M|                         i2_frm_x);
  272|  5.34M|        i2_frm_y = CLIP3(((1 - u1_dma_ht)), (u2_pic_ht - (1)), i2_frm_y);
  ------------------
  |  |   77|  5.34M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 1.03k, False: 5.34M]
  |  |  |  Branch (77:54): [True: 714, False: 5.34M]
  |  |  ------------------
  ------------------
  273|       |
  274|  5.34M|        pu1_pred = ps_ref_frm->pu1_buf1 + i2_frm_y * u2_frm_wd + i2_frm_x;
  275|       |
  276|  5.34M|        u1_dma_wd = (i1_mc_wd + 3) & 0xFC;
  277|       |
  278|       |        /********************************************************************/
  279|       |        /* Calulating the horizontal and the vertical u4_ofst from top left  */
  280|       |        /* edge of the recon buffer                                         */
  281|       |        /********************************************************************/
  282|  5.34M|        u2_rec_wd = MB_SIZE;
  ------------------
  |  |  554|  5.34M|#define MB_SIZE             16
  ------------------
  283|  5.34M|        {
  284|  5.34M|            u2_rec_wd = ps_dec->u2_frm_wd_y;
  285|  5.34M|            i2_rec_x += (mb_index << 4);
  286|  5.34M|            pu1_rec = ps_frame_buf->pu1_dest_y + i2_rec_y * u2_rec_wd
  287|  5.34M|                            + i2_rec_x;
  288|  5.34M|        }
  289|       |
  290|       |        /* filling the pred and dma structures for Y */
  291|  5.34M|        u2_frm_wd = ps_dec->u2_frm_wd_y;
  292|       |
  293|  5.34M|        ps_pred->u2_u1_ref_buf_wd = u1_dma_wd;
  294|  5.34M|        ps_pred->i1_dma_ht = u1_dma_ht;
  295|  5.34M|        ps_pred->i1_mc_wd = i1_mc_wd;
  296|  5.34M|        ps_pred->u2_frm_wd = u2_frm_wd;
  297|  5.34M|        ps_pred->pu1_rec_y_u = pu1_rec;
  298|  5.34M|        ps_pred->u2_dst_stride = u2_rec_wd;
  299|       |
  300|  5.34M|        ps_pred->i1_mb_partwidth = u1_part_wd << 2;
  301|  5.34M|        ps_pred->i1_mb_partheight = u1_part_ht << 2;
  302|  5.34M|        ps_pred->u1_dydx = (u1_dy << 2) + u1_dx;
  303|       |
  304|  5.34M|        ps_pred->pu1_y_ref = pu1_pred;
  305|       |
  306|  5.34M|    }
  307|       |
  308|       |    /* Increment ps_pred index */
  309|  5.34M|    ps_pred++;
  310|       |
  311|       |    /* Transfer Setup U & V */
  312|  5.34M|    {
  313|  5.34M|        WORD32 i4_ref_offset, i4_rec_offset;
  314|  5.34M|        UWORD8 *pu1_pred_u, *pu1_pred_v;
  315|       |
  316|       |
  317|       |        /* calculating rounded motion vectors and fractional components */
  318|  5.34M|        i2_tmp_mv_x = i2_mv_x;
  319|  5.34M|        i2_tmp_mv_y = i2_mv_y;
  320|       |
  321|       |        /************************************************************************/
  322|       |        /* Table 8-9: Derivation of the vertical component of the chroma vector */
  323|       |        /* in field coding mode                                                 */
  324|       |        /************************************************************************/
  325|       |
  326|       |        /* Eighth sample of the chroma MV */
  327|  5.34M|        u1_dx = i2_tmp_mv_x & 0x7;
  328|  5.34M|        u1_dy = i2_tmp_mv_y & 0x7;
  329|       |
  330|       |        /********************************************************************/
  331|       |        /* Calculating the full pel MV for chroma which is 1/2 of the Luma  */
  332|       |        /* MV in full pel units                                             */
  333|       |        /********************************************************************/
  334|  5.34M|        i2_mv_x = i2_tmp_mv_x;
  335|  5.34M|        i2_mv_y = i2_tmp_mv_y;
  336|  5.34M|        i2_tmp_mv_x = SIGN_POW2_DIV(i2_tmp_mv_x, 3);
  ------------------
  |  |   67|  5.34M|#define SIGN_POW2_DIV(x, y) (((x) < 0) ? (-((-(x)) >> (y))) : ((x) >> (y)))
  |  |  ------------------
  |  |  |  Branch (67:30): [True: 50.0k, False: 5.29M]
  |  |  ------------------
  ------------------
  337|  5.34M|        i2_tmp_mv_y = SIGN_POW2_DIV(i2_tmp_mv_y, 3);
  ------------------
  |  |   67|  5.34M|#define SIGN_POW2_DIV(x, y) (((x) < 0) ? (-((-(x)) >> (y))) : ((x) >> (y)))
  |  |  ------------------
  |  |  |  Branch (67:30): [True: 46.8k, False: 5.30M]
  |  |  ------------------
  ------------------
  338|  5.34M|        i1_mc_wd = u1_part_wd << 1;
  339|  5.34M|        u1_dma_ht = u1_part_ht << 1;
  340|  5.34M|        if(u1_dx)
  ------------------
  |  Branch (340:12): [True: 132k, False: 5.21M]
  ------------------
  341|   132k|        {
  342|   132k|            i2_tmp_mv_x -= (i2_mv_x < 0);
  343|   132k|            i1_mc_wd++;
  344|   132k|        }
  345|  5.34M|        if(u1_dy != 0)
  ------------------
  |  Branch (345:12): [True: 124k, False: 5.22M]
  ------------------
  346|   124k|        {
  347|   124k|            i2_tmp_mv_y -= (i2_mv_y < 0);
  348|   124k|            u1_dma_ht++;
  349|   124k|        }
  350|       |
  351|       |        /********************************************************************/
  352|       |        /* Calulating the horizontal and the vertical u4_ofst from top left  */
  353|       |        /* edge of the reference frame, and subsequent clipping             */
  354|       |        /********************************************************************/
  355|  5.34M|        u2_pic_ht >>= 1;
  356|  5.34M|        u2_frm_wd = ps_dec->u2_frm_wd_uv;
  357|  5.34M|        i2_rec_x = u1_sub_x << 1;
  358|  5.34M|        i2_rec_y = u1_sub_y << 1;
  359|       |
  360|  5.34M|        i2_frm_x = (u2_mb_x << 3) + i2_rec_x + i2_tmp_mv_x;
  361|  5.34M|        i2_frm_y = (u2_mb_y << 3) + i2_rec_y + i2_tmp_mv_y;
  362|       |
  363|  5.34M|        i2_frm_x = CLIP3(MAX_OFFSET_OUTSIDE_UV_FRM,
  ------------------
  |  |   77|  5.34M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 1.04k, False: 5.34M]
  |  |  |  Branch (77:54): [True: 5.73k, False: 5.34M]
  |  |  ------------------
  ------------------
  364|  5.34M|                         ((ps_dec->u2_pic_wd >> 1) - 1), i2_frm_x);
  365|  5.34M|        i2_frm_y = CLIP3(((1 - u1_dma_ht)), (u2_pic_ht - (1)), i2_frm_y);
  ------------------
  |  |   77|  5.34M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 1.06k, False: 5.34M]
  |  |  |  Branch (77:54): [True: 714, False: 5.34M]
  |  |  ------------------
  ------------------
  366|       |
  367|  5.34M|        i4_ref_offset = i2_frm_y * u2_frm_wd + i2_frm_x * YUV420SP_FACTOR;
  ------------------
  |  |  119|  5.34M|#define YUV420SP_FACTOR 2
  ------------------
  368|  5.34M|        u1_dma_wd = (i1_mc_wd + 3) & 0xFC;
  369|       |
  370|       |        /********************************************************************/
  371|       |        /* Calulating the horizontal and the vertical u4_ofst from top left  */
  372|       |        /* edge of the recon buffer                                         */
  373|       |        /********************************************************************/
  374|       |        /* CHANGED CODE */
  375|  5.34M|        u2_rec_wd = BLK8x8SIZE * YUV420SP_FACTOR;
  ------------------
  |  |  555|  5.34M|#define BLK8x8SIZE          8
  ------------------
                      u2_rec_wd = BLK8x8SIZE * YUV420SP_FACTOR;
  ------------------
  |  |  119|  5.34M|#define YUV420SP_FACTOR 2
  ------------------
  376|  5.34M|        i4_rec_offset = i2_rec_y * u2_rec_wd + i2_rec_x * YUV420SP_FACTOR;
  ------------------
  |  |  119|  5.34M|#define YUV420SP_FACTOR 2
  ------------------
  377|       |
  378|  5.34M|        {
  379|  5.34M|            u2_rec_wd = ps_dec->u2_frm_wd_uv;
  380|  5.34M|            i2_rec_x += (mb_index << 3);
  381|  5.34M|            i4_rec_offset = i2_rec_y * u2_rec_wd + i2_rec_x * YUV420SP_FACTOR;
  ------------------
  |  |  119|  5.34M|#define YUV420SP_FACTOR 2
  ------------------
  382|  5.34M|            ps_pred->pu1_rec_y_u = ps_frame_buf->pu1_dest_u + i4_rec_offset;
  383|  5.34M|            ps_pred->u1_pi1_wt_ofst_rec_v = ps_frame_buf->pu1_dest_v
  384|  5.34M|                            + i4_rec_offset;
  385|  5.34M|        }
  386|       |
  387|       |        /* CHANGED CODE */
  388|       |
  389|       |        /* filling the common pred structures for U */
  390|  5.34M|        u2_frm_wd = ps_dec->u2_frm_wd_uv;
  391|       |
  392|  5.34M|        ps_pred->u2_u1_ref_buf_wd = u1_dma_wd;
  393|  5.34M|        ps_pred->i1_dma_ht = u1_dma_ht;
  394|  5.34M|        ps_pred->i1_mc_wd = i1_mc_wd;
  395|       |
  396|  5.34M|        ps_pred->u2_frm_wd = u2_frm_wd;
  397|  5.34M|        ps_pred->u2_dst_stride = u2_rec_wd;
  398|       |
  399|  5.34M|        ps_pred->i1_mb_partwidth = u1_part_wd << 1;
  400|  5.34M|        ps_pred->i1_mb_partheight = u1_part_ht << 1;
  401|  5.34M|        ps_pred->u1_dydx = (u1_dy << 3) + u1_dx;
  402|       |
  403|  5.34M|        pu1_pred_u = ps_ref_frm->pu1_buf2 + i4_ref_offset;
  404|  5.34M|        pu1_pred_v = ps_ref_frm->pu1_buf3 + i4_ref_offset;
  405|       |
  406|       |        /* Copy U & V partitions */
  407|  5.34M|        ps_pred->pu1_u_ref = pu1_pred_u;
  408|       |
  409|       |        /* Increment the reference buffer Index */
  410|  5.34M|        ps_pred->pu1_v_ref = pu1_pred_v;
  411|  5.34M|    }
  412|       |
  413|       |    /* Increment ps_pred index */
  414|  5.34M|    ps_dec->u4_pred_info_idx += 2;
  415|       |
  416|  5.34M|    return OK;
  ------------------
  |  |  114|  5.34M|#define OK        0
  ------------------
  417|       |
  418|  5.34M|}
ih264d_form_mb_part_info_mp:
  455|  8.30M|{
  456|       |    /* The reference buffer pointer */
  457|  8.30M|    UWORD8 *pu1_ref_buf;
  458|  8.30M|    WORD16 i2_frm_x, i2_frm_y, i2_tmp_mv_x, i2_tmp_mv_y, i2_pod_ht;
  459|  8.30M|    WORD16 i2_rec_x, i2_rec_y;
  460|  8.30M|    UWORD16 u2_pic_ht, u2_frm_wd, u2_rec_wd;
  461|  8.30M|    UWORD8 u1_wght_pred_type, u1_wted_bipred_idc;
  462|  8.30M|    UWORD16 u2_tot_ref_scratch_size;
  463|  8.30M|    UWORD8 u1_sub_x = 0;
  464|  8.30M|    UWORD8 u1_sub_y = 0;
  465|  8.30M|    UWORD8 u1_is_bi_dir = 0;
  466|       |
  467|       |    /********************************************/
  468|       |    /* i1_mc_wd       width reqd for mcomp      */
  469|       |    /* u1_dma_ht      height reqd for mcomp     */
  470|       |    /* u1_dma_wd      width aligned to 4 bytes  */
  471|       |    /* u1_dx          fractional part of width  */
  472|       |    /* u1_dx          fractional part of height */
  473|       |    /********************************************/
  474|  8.30M|    UWORD8 i1_mc_wd, u1_dma_ht, u1_dma_wd, u1_dx, u1_dy;
  475|  8.30M|    pred_info_t * ps_pred ;
  476|  8.30M|    dec_slice_params_t * const ps_cur_slice = ps_dec->ps_cur_slice;
  477|  8.30M|    const UWORD8 u1_slice_type = ps_dec->ps_decode_cur_slice->slice_type;
  478|  8.30M|    UWORD8 u1_pod_bot, u1_pod_top;
  479|       |
  480|       |    /* load the pictype for pod u4_flag & chroma motion vector derivation */
  481|  8.30M|    UWORD8 u1_ref_pic_type ;
  482|       |
  483|       |    /* set default value to flags specifying field nature of picture & mb */
  484|  8.30M|    UWORD32 u1_mb_fld = 0, u1_mb_or_pic_fld;
  485|  8.30M|    UWORD32 u1_mb_bot = 0, u1_pic_bot = 0, u1_mb_or_pic_bot;
  486|  8.30M|    tfr_ctxt_t *ps_frame_buf;
  487|       |    /* calculate flags specifying field nature of picture & mb */
  488|  8.30M|    const UWORD32 u1_pic_fld = ps_cur_slice->u1_field_pic_flag;
  489|  8.30M|    WORD8 i1_pred;
  490|  8.30M|    WORD8 i1_size_pos_info,i1_buf_id,i1_ref_idx;
  491|  8.30M|    UWORD8 u1_part_wd,u1_part_ht;
  492|  8.30M|    WORD16 i2_mv_x,i2_mv_y;
  493|  8.30M|    struct pic_buffer_t *ps_ref_frm;
  494|  8.30M|    UWORD32 *pu4_wt_offset;
  495|  8.30M|    UWORD8 *pu1_buf1,*pu1_buf2,*pu1_buf3;
  496|       |
  497|       |
  498|  8.30M|    PROFILE_DISABLE_MB_PART_INFO()
  ------------------
  |  |  131|  8.30M|#define PROFILE_DISABLE_MB_PART_INFO() ;
  ------------------
  499|       |
  500|  8.30M|    ps_pred = ps_dec->ps_pred + ps_dec->u4_pred_info_idx;
  501|       |
  502|       |
  503|  8.30M|     i1_size_pos_info = ps_pred_pkd->i1_size_pos_info;
  504|  8.30M|     GET_XPOS_PRED(u1_sub_x,i1_size_pos_info);
  ------------------
  |  |  165|  8.30M|#define GET_XPOS_PRED(u1_out,pkd_info)                        \
  |  |  166|  8.30M|{                                                               \
  |  |  167|  8.30M|    WORD32 bit_field;                                           \
  |  |  168|  8.30M|    bit_field = pkd_info & 0x3;                                 \
  |  |  169|  8.30M|    u1_out = bit_field;                                       \
  |  |  170|  8.30M|}
  ------------------
  505|  8.30M|     GET_YPOS_PRED(u1_sub_y,i1_size_pos_info);
  ------------------
  |  |  173|  8.30M|#define GET_YPOS_PRED(u1_out,pkd_info)                        \
  |  |  174|  8.30M|{                                                               \
  |  |  175|  8.30M|    WORD32 bit_field;                                           \
  |  |  176|  8.30M|    bit_field = pkd_info >> 2;                                  \
  |  |  177|  8.30M|    u1_out = bit_field & 0x3;                                  \
  |  |  178|  8.30M|}
  ------------------
  506|  8.30M|     GET_WIDTH_PRED(u1_part_wd,i1_size_pos_info);
  ------------------
  |  |  182|  8.30M|#define GET_WIDTH_PRED(u1_out,pkd_info)                        \
  |  |  183|  8.30M|{                                                               \
  |  |  184|  8.30M|    WORD32 bit_field;                                           \
  |  |  185|  8.30M|    bit_field = pkd_info >> 4;                                  \
  |  |  186|  8.30M|    bit_field = (bit_field & 0x3) << 1 ;                        \
  |  |  187|  8.30M|    u1_out = (bit_field == 0)?1:bit_field;                       \
  |  |  ------------------
  |  |  |  Branch (187:14): [True: 88.6k, False: 8.21M]
  |  |  ------------------
  |  |  188|  8.30M|    }
  ------------------
  507|  8.30M|     GET_HEIGHT_PRED(u1_part_ht,i1_size_pos_info);
  ------------------
  |  |  190|  8.30M|#define GET_HEIGHT_PRED(u1_out,pkd_info)                        \
  |  |  191|  8.30M|{                                                               \
  |  |  192|  8.30M|    WORD32 bit_field;                                           \
  |  |  193|  8.30M|    bit_field = pkd_info >> 6;                                  \
  |  |  194|  8.30M|    bit_field = (bit_field & 0x3) << 1 ;                        \
  |  |  195|  8.30M|    u1_out = (bit_field == 0)?1:bit_field;                      \
  |  |  ------------------
  |  |  |  Branch (195:14): [True: 61.0k, False: 8.24M]
  |  |  ------------------
  |  |  196|  8.30M|}
  ------------------
  508|  8.30M|     i2_mv_x = ps_pred_pkd->i2_mv[0];
  509|  8.30M|     i2_mv_y = ps_pred_pkd->i2_mv[1];
  510|  8.30M|     i1_ref_idx = ps_pred_pkd->i1_ref_idx_info & 0x3f;
  511|  8.30M|     i1_buf_id = ps_pred_pkd->i1_buf_id;
  512|  8.30M|     ps_ref_frm = ps_dec->apv_buf_id_pic_buf_map[i1_buf_id];
  513|       |
  514|  8.30M|     i1_pred = (ps_pred_pkd->i1_ref_idx_info & 0xC0) >> 6;
  515|  8.30M|     u1_is_bi_dir = (i1_pred == BI_PRED);
  ------------------
  |  |  485|  8.30M|#define BI_PRED   3
  ------------------
  516|       |
  517|       |
  518|  8.30M|    u1_ref_pic_type = ps_pred_pkd->u1_pic_type & PIC_MASK;
  ------------------
  |  |  359|  8.30M|#define PIC_MASK        0x03
  ------------------
  519|       |
  520|  8.30M|    pu1_buf1  = ps_ref_frm->pu1_buf1;
  521|  8.30M|    pu1_buf2  = ps_ref_frm->pu1_buf2;
  522|  8.30M|    pu1_buf3  = ps_ref_frm->pu1_buf3;
  523|       |
  524|  8.30M|    if(u1_ref_pic_type == BOT_FLD)
  ------------------
  |  |  354|  8.30M|#define BOT_FLD         0x02
  ------------------
  |  Branch (524:8): [True: 0, False: 8.30M]
  ------------------
  525|      0|    {
  526|      0|        pu1_buf1 += ps_ref_frm->u2_frm_wd_y;
  527|      0|        pu1_buf2 += ps_ref_frm->u2_frm_wd_uv;
  528|      0|        pu1_buf3 += ps_ref_frm->u2_frm_wd_uv;
  529|       |
  530|      0|    }
  531|       |
  532|       |
  533|       |
  534|  8.30M|    if(ps_dec->ps_cur_pps->u1_wted_pred_flag)
  ------------------
  |  Branch (534:8): [True: 6.54M, False: 1.75M]
  ------------------
  535|  6.54M|    {
  536|  6.54M|            pu4_wt_offset = (UWORD32*)&ps_dec->pu4_wt_ofsts[2
  537|  6.54M|                            * X3(i1_ref_idx)];
  ------------------
  |  |   92|  6.54M|#define X3(a)   (((a) << 1) + (a))
  ------------------
  538|  6.54M|    }
  539|       |
  540|       |
  541|  8.30M|    pu4_wt_offset = ps_pred_pkd->pu4_wt_offst;
  542|       |
  543|       |
  544|       |    /* Pointer to the frame buffer */
  545|  8.30M|    {
  546|  8.30M|        ps_frame_buf = &ps_dec->s_tran_addrecon;
  547|       |        /* CHANGED CODE */
  548|  8.30M|    }
  549|       |
  550|  8.30M|    if(!u1_pic_fld)
  ------------------
  |  Branch (550:8): [True: 8.30M, False: 0]
  ------------------
  551|  8.30M|    {
  552|  8.30M|        u1_mb_fld = ps_cur_mb_info->u1_mb_field_decodingflag;
  553|  8.30M|        u1_mb_bot = 1 - ps_cur_mb_info->u1_topmb;
  554|  8.30M|    }
  555|      0|    else
  556|      0|        u1_pic_bot = ps_cur_slice->u1_bottom_field_flag;
  557|       |
  558|       |    /****************************************************************/
  559|       |    /* calculating the flags the tell whether to use frame-padding  */
  560|       |    /* or use software pad-on-demand                                */
  561|       |    /****************************************************************/
  562|  8.30M|    u1_mb_or_pic_bot = u1_mb_bot | u1_pic_bot;
  563|  8.30M|    u1_mb_or_pic_fld = u1_mb_fld | u1_pic_fld;
  564|  8.30M|    u1_pod_bot = u1_mb_or_pic_fld && (u1_ref_pic_type == TOP_FLD);
  ------------------
  |  |  353|      0|#define TOP_FLD         0x01
  ------------------
  |  Branch (564:18): [True: 0, False: 8.30M]
  |  Branch (564:38): [True: 0, False: 0]
  ------------------
  565|  8.30M|    u1_pod_top = u1_mb_or_pic_fld && (u1_ref_pic_type == BOT_FLD);
  ------------------
  |  |  354|      0|#define BOT_FLD         0x02
  ------------------
  |  Branch (565:18): [True: 0, False: 8.30M]
  |  Branch (565:38): [True: 0, False: 0]
  ------------------
  566|       |
  567|       |    /* Weighted Pred additions */
  568|  8.30M|    u1_wted_bipred_idc = ps_dec->ps_cur_pps->u1_wted_bipred_idc;
  569|       |
  570|  8.30M|    if((u1_slice_type == P_SLICE) || (u1_slice_type == SP_SLICE))
  ------------------
  |  |  368|  8.30M|#define P_SLICE  0
  ------------------
                  if((u1_slice_type == P_SLICE) || (u1_slice_type == SP_SLICE))
  ------------------
  |  |  371|  3.51M|#define SP_SLICE 3
  ------------------
  |  Branch (570:8): [True: 4.78M, False: 3.51M]
  |  Branch (570:38): [True: 0, False: 3.51M]
  ------------------
  571|  4.78M|    {
  572|       |        /* P Slice only */
  573|  4.78M|        u1_wght_pred_type = ps_dec->ps_cur_pps->u1_wted_pred_flag;
  574|       |
  575|  4.78M|    }
  576|  3.51M|    else
  577|  3.51M|    {
  578|       |        /* B Slice only */
  579|  3.51M|        u1_wght_pred_type = 1 + u1_is_bi_dir;
  580|  3.51M|        if(u1_wted_bipred_idc == 0)
  ------------------
  |  Branch (580:12): [True: 1.15M, False: 2.35M]
  ------------------
  581|  1.15M|            u1_wght_pred_type = 0;
  582|  3.51M|        if((u1_wted_bipred_idc == 2) && (!u1_is_bi_dir))
  ------------------
  |  Branch (582:12): [True: 1.54M, False: 1.97M]
  |  Branch (582:41): [True: 692k, False: 851k]
  ------------------
  583|   692k|            u1_wght_pred_type = 0;
  584|  3.51M|    }
  585|       |    /* load the scratch reference buffer index */
  586|  8.30M|    pu1_ref_buf = ps_dec->pu1_ref_buff + ps_dec->u4_dma_buf_idx;
  587|  8.30M|    u2_tot_ref_scratch_size = 0;
  588|       |
  589|       |
  590|       |    /* Transfer Setup Y */
  591|  8.30M|    {
  592|  8.30M|        UWORD8 *pu1_pred, *pu1_rec;
  593|       |        /* calculating rounded motion vectors and fractional components */
  594|  8.30M|        i2_tmp_mv_x = i2_mv_x;
  595|  8.30M|        i2_tmp_mv_y = i2_mv_y;
  596|       |
  597|  8.30M|        u1_dx = i2_tmp_mv_x & 0x3;
  598|  8.30M|        u1_dy = i2_tmp_mv_y & 0x3;
  599|  8.30M|        i2_tmp_mv_x >>= 2;
  600|  8.30M|        i2_tmp_mv_y >>= 2;
  601|  8.30M|        i1_mc_wd = u1_part_wd << 2;
  602|  8.30M|        u1_dma_ht = u1_part_ht << 2;
  603|  8.30M|        if(u1_dx)
  ------------------
  |  Branch (603:12): [True: 506k, False: 7.79M]
  ------------------
  604|   506k|        {
  605|   506k|            i2_tmp_mv_x -= 2;
  606|   506k|            i1_mc_wd += 5;
  607|   506k|        }
  608|  8.30M|        if(u1_dy)
  ------------------
  |  Branch (608:12): [True: 492k, False: 7.81M]
  ------------------
  609|   492k|        {
  610|   492k|            i2_tmp_mv_y -= 2;
  611|   492k|            u1_dma_ht += 5;
  612|   492k|        }
  613|       |
  614|       |        /********************************************************************/
  615|       |        /* Calulating the horizontal and the vertical u4_ofst from top left  */
  616|       |        /* edge of the reference frame, and subsequent clipping             */
  617|       |        /********************************************************************/
  618|  8.30M|        u2_pic_ht = ps_dec->u2_pic_ht >> u1_pic_fld;
  619|  8.30M|        u2_frm_wd = ps_dec->u2_frm_wd_y << u1_pic_fld;
  620|  8.30M|        i2_frm_x = (u2_mb_x << 4) + (u1_sub_x << 2) + i2_tmp_mv_x;
  621|  8.30M|        i2_frm_y = ((u2_mb_y + (u1_mb_bot && !u1_mb_fld)) << 4)
  ------------------
  |  Branch (621:33): [True: 0, False: 8.30M]
  |  Branch (621:46): [True: 0, False: 0]
  ------------------
  622|  8.30M|                        + (((u1_sub_y << 2) + i2_tmp_mv_y) << u1_mb_fld);
  623|       |
  624|  8.30M|        i2_frm_x = CLIP3(MAX_OFFSET_OUTSIDE_X_FRM, (ps_dec->u2_pic_wd - 1),
  ------------------
  |  |   77|  8.30M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 3.99k, False: 8.30M]
  |  |  |  Branch (77:54): [True: 80.8k, False: 8.22M]
  |  |  ------------------
  ------------------
  625|  8.30M|                         i2_frm_x);
  626|  8.30M|        i2_frm_y = CLIP3(((1 - u1_dma_ht) << u1_mb_fld),
  ------------------
  |  |   77|  8.30M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 5.01k, False: 8.30M]
  |  |  |  Branch (77:54): [True: 17.5k, False: 8.28M]
  |  |  ------------------
  ------------------
  627|  8.30M|                         (u2_pic_ht - (1 << u1_mb_fld)), i2_frm_y);
  628|       |
  629|  8.30M|        pu1_pred = pu1_buf1 + i2_frm_y * u2_frm_wd + i2_frm_x;
  630|  8.30M|        u1_dma_wd = (i1_mc_wd + 3) & 0xFC;
  631|       |        /********************************************************************/
  632|       |        /* Calulating the horizontal and the vertical u4_ofst from top left  */
  633|       |        /* edge of the recon buffer                                         */
  634|       |        /********************************************************************/
  635|       |        /* CHANGED CODE */
  636|  8.30M|        u2_rec_wd = MB_SIZE;
  ------------------
  |  |  554|  8.30M|#define MB_SIZE             16
  ------------------
  637|  8.30M|        i2_rec_x = u1_sub_x << 2;
  638|  8.30M|        i2_rec_y = u1_sub_y << 2;
  639|  8.30M|        {
  640|  8.30M|            u2_rec_wd = ps_dec->u2_frm_wd_y << u1_mb_or_pic_fld;
  641|  8.30M|            i2_rec_x += (mb_index << 4);
  642|  8.30M|            pu1_rec = ps_frame_buf->pu1_dest_y + i2_rec_y * u2_rec_wd
  643|  8.30M|                            + i2_rec_x;
  644|  8.30M|            if(u1_mb_bot)
  ------------------
  |  Branch (644:16): [True: 0, False: 8.30M]
  ------------------
  645|      0|                pu1_rec += ps_dec->u2_frm_wd_y << ((u1_mb_fld) ? 0 : 4);
  ------------------
  |  Branch (645:52): [True: 0, False: 0]
  ------------------
  646|  8.30M|        }
  647|       |
  648|       |        /* CHANGED CODE */
  649|       |
  650|       |        /* filling the pred and dma structures for Y */
  651|  8.30M|        u2_frm_wd = ps_dec->u2_frm_wd_y << u1_mb_or_pic_fld;
  652|       |
  653|  8.30M|        ps_pred->pu1_dma_dest_addr = pu1_ref_buf;
  654|  8.30M|        ps_pred->u2_u1_ref_buf_wd = u1_dma_wd;
  655|  8.30M|        ps_pred->u2_frm_wd = u2_frm_wd;
  656|  8.30M|        ps_pred->i1_dma_ht = u1_dma_ht;
  657|  8.30M|        ps_pred->i1_mc_wd = i1_mc_wd;
  658|  8.30M|        ps_pred->pu1_rec_y_u = pu1_rec;
  659|  8.30M|        ps_pred->u2_dst_stride = u2_rec_wd;
  660|       |
  661|  8.30M|        ps_pred->i1_mb_partwidth = u1_part_wd << 2;
  662|  8.30M|        ps_pred->i1_mb_partheight = u1_part_ht << 2;
  663|  8.30M|        ps_pred->u1_dydx = (u1_dy << 2) + u1_dx;
  664|  8.30M|        ps_pred->u1_is_bi_direct = u1_is_bi_dir;
  665|  8.30M|        ps_pred->u1_pi1_wt_ofst_rec_v = (UWORD8 *)pu4_wt_offset;
  666|  8.30M|        ps_pred->u1_wght_pred_type = u1_wght_pred_type;
  667|  8.30M|        ps_pred->i1_pod_ht = 0;
  668|       |
  669|       |        /* Increment the Reference buffer Indices */
  670|  8.30M|        pu1_ref_buf += u1_dma_wd * u1_dma_ht;
  671|  8.30M|        u2_tot_ref_scratch_size += u1_dma_wd * u1_dma_ht;
  672|       |
  673|       |        /* unrestricted field motion comp for top region outside frame */
  674|  8.30M|        i2_pod_ht = (-i2_frm_y) >> u1_mb_fld;
  675|  8.30M|        if((i2_pod_ht > 0) && u1_pod_top)
  ------------------
  |  Branch (675:12): [True: 27.7k, False: 8.27M]
  |  Branch (675:31): [True: 0, False: 27.7k]
  ------------------
  676|      0|        {
  677|      0|            ps_pred->i1_pod_ht = (WORD8)(-i2_pod_ht);
  678|      0|            u1_dma_ht -= i2_pod_ht;
  679|      0|            pu1_pred += i2_pod_ht * u2_frm_wd;
  680|      0|        }
  681|       |        /* unrestricted field motion comp for bottom region outside frame */
  682|  8.30M|        else if(u1_pod_bot)
  ------------------
  |  Branch (682:17): [True: 0, False: 8.30M]
  ------------------
  683|      0|        {
  684|      0|            i2_pod_ht = u1_dma_ht + ((i2_frm_y - u2_pic_ht) >> u1_mb_fld);
  685|      0|            if(i2_pod_ht > 0)
  ------------------
  |  Branch (685:16): [True: 0, False: 0]
  ------------------
  686|      0|            {
  687|      0|                u1_dma_ht -= i2_pod_ht;
  688|      0|                ps_pred->i1_pod_ht = (WORD8)i2_pod_ht;
  689|      0|            }
  690|      0|        }
  691|       |
  692|       |        /* Copy Y partition */
  693|       |
  694|       |        /*
  695|       |         * ps_pred->i1_pod_ht is non zero when MBAFF is present. In case of MBAFF the reference data
  696|       |         * is copied in the Scrath buffer so that the padding_on_demand doesnot corrupt the frame data
  697|       |         */
  698|  8.30M|        if(ps_pred->i1_pod_ht)
  ------------------
  |  Branch (698:12): [True: 0, False: 8.30M]
  ------------------
  699|      0|        {
  700|      0|            ps_pred->pu1_pred = pu1_pred;
  701|      0|            ps_pred->u1_dma_ht_y = u1_dma_ht;
  702|      0|            ps_pred->u1_dma_wd_y = u1_dma_wd;
  703|      0|        }
  704|  8.30M|        ps_pred->pu1_y_ref = pu1_pred;
  705|  8.30M|    }
  706|       |
  707|       |
  708|       |
  709|       |    /* Increment ps_pred index */
  710|  8.30M|    ps_pred++;
  711|       |
  712|       |    /* Transfer Setup U & V */
  713|  8.30M|    {
  714|  8.30M|        WORD32 i4_ref_offset, i4_rec_offset;
  715|  8.30M|        UWORD8 *pu1_pred_u, *pu1_pred_v, u1_tmp_dma_ht;
  716|       |        /* CHANGED CODE */
  717|  8.30M|        UWORD8 u1_chroma_cbp = (UWORD8)(ps_cur_mb_info->u1_cbp >> 4);
  718|       |        /* CHANGED CODE */
  719|       |
  720|       |        /* calculating rounded motion vectors and fractional components */
  721|  8.30M|        i2_tmp_mv_x = i2_mv_x;
  722|  8.30M|        i2_tmp_mv_y = i2_mv_y;
  723|       |
  724|       |        /************************************************************************/
  725|       |        /* Table 8-9: Derivation of the vertical component of the chroma vector */
  726|       |        /* in field coding mode                                                 */
  727|       |        /************************************************************************/
  728|  8.30M|        if(u1_pod_bot && u1_mb_or_pic_bot)
  ------------------
  |  Branch (728:12): [True: 0, False: 8.30M]
  |  Branch (728:26): [True: 0, False: 0]
  ------------------
  729|      0|            i2_tmp_mv_y += 2;
  730|  8.30M|        if(u1_pod_top && !u1_mb_or_pic_bot)
  ------------------
  |  Branch (730:12): [True: 0, False: 8.30M]
  |  Branch (730:26): [True: 0, False: 0]
  ------------------
  731|      0|            i2_tmp_mv_y -= 2;
  732|       |
  733|       |        /* Eighth sample of the chroma MV */
  734|  8.30M|        u1_dx = i2_tmp_mv_x & 0x7;
  735|  8.30M|        u1_dy = i2_tmp_mv_y & 0x7;
  736|       |
  737|       |        /********************************************************************/
  738|       |        /* Calculating the full pel MV for chroma which is 1/2 of the Luma  */
  739|       |        /* MV in full pel units                                             */
  740|       |        /********************************************************************/
  741|  8.30M|        i2_mv_x = i2_tmp_mv_x;
  742|  8.30M|        i2_mv_y = i2_tmp_mv_y;
  743|  8.30M|        i2_tmp_mv_x = SIGN_POW2_DIV(i2_tmp_mv_x, 3);
  ------------------
  |  |   67|  8.30M|#define SIGN_POW2_DIV(x, y) (((x) < 0) ? (-((-(x)) >> (y))) : ((x) >> (y)))
  |  |  ------------------
  |  |  |  Branch (67:30): [True: 249k, False: 8.05M]
  |  |  ------------------
  ------------------
  744|  8.30M|        i2_tmp_mv_y = SIGN_POW2_DIV(i2_tmp_mv_y, 3);
  ------------------
  |  |   67|  8.30M|#define SIGN_POW2_DIV(x, y) (((x) < 0) ? (-((-(x)) >> (y))) : ((x) >> (y)))
  |  |  ------------------
  |  |  |  Branch (67:30): [True: 244k, False: 8.06M]
  |  |  ------------------
  ------------------
  745|  8.30M|        i1_mc_wd = u1_part_wd << 1;
  746|  8.30M|        u1_dma_ht = u1_part_ht << 1;
  747|  8.30M|        if(u1_dx)
  ------------------
  |  Branch (747:12): [True: 577k, False: 7.72M]
  ------------------
  748|   577k|        {
  749|   577k|            if(i2_mv_x < 0)
  ------------------
  |  Branch (749:16): [True: 243k, False: 333k]
  ------------------
  750|   243k|                i2_tmp_mv_x -= 1;
  751|   577k|            i1_mc_wd++;
  752|   577k|        }
  753|  8.30M|        if(u1_dy != 0)
  ------------------
  |  Branch (753:12): [True: 544k, False: 7.76M]
  ------------------
  754|   544k|        {
  755|   544k|            if(i2_mv_y < 0)
  ------------------
  |  Branch (755:16): [True: 233k, False: 310k]
  ------------------
  756|   233k|                i2_tmp_mv_y -= 1;
  757|   544k|            u1_dma_ht++;
  758|   544k|        }
  759|       |
  760|       |        /********************************************************************/
  761|       |        /* Calulating the horizontal and the vertical u4_ofst from top left  */
  762|       |        /* edge of the reference frame, and subsequent clipping             */
  763|       |        /********************************************************************/
  764|  8.30M|        u2_pic_ht >>= 1;
  765|  8.30M|        u2_frm_wd = ps_dec->u2_frm_wd_uv << u1_pic_fld;
  766|  8.30M|        i2_frm_x = (u2_mb_x << 3) + (u1_sub_x << 1) + i2_tmp_mv_x;
  767|  8.30M|        i2_frm_y = ((u2_mb_y + (u1_mb_bot && !u1_mb_fld)) << 3)
  ------------------
  |  Branch (767:33): [True: 0, False: 8.30M]
  |  Branch (767:46): [True: 0, False: 0]
  ------------------
  768|  8.30M|                        + (((u1_sub_y << 1) + i2_tmp_mv_y) << u1_mb_fld);
  769|       |
  770|  8.30M|        i2_frm_x = CLIP3(MAX_OFFSET_OUTSIDE_UV_FRM,
  ------------------
  |  |   77|  8.30M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 4.14k, False: 8.30M]
  |  |  |  Branch (77:54): [True: 82.2k, False: 8.21M]
  |  |  ------------------
  ------------------
  771|  8.30M|                         ((ps_dec->u2_pic_wd >> 1) - 1), i2_frm_x);
  772|  8.30M|        i2_frm_y = CLIP3(((1 - u1_dma_ht) << u1_mb_fld),
  ------------------
  |  |   77|  8.30M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 5.07k, False: 8.30M]
  |  |  |  Branch (77:54): [True: 17.5k, False: 8.28M]
  |  |  ------------------
  ------------------
  773|  8.30M|                         (u2_pic_ht - (1 << u1_mb_fld)), i2_frm_y);
  774|       |
  775|  8.30M|        i4_ref_offset = i2_frm_y * u2_frm_wd + i2_frm_x * YUV420SP_FACTOR;
  ------------------
  |  |  119|  8.30M|#define YUV420SP_FACTOR 2
  ------------------
  776|  8.30M|        u1_dma_wd = (i1_mc_wd + 3) & 0xFC;
  777|       |
  778|       |        /********************************************************************/
  779|       |        /* Calulating the horizontal and the vertical u4_ofst from top left  */
  780|       |        /* edge of the recon buffer                                         */
  781|       |        /********************************************************************/
  782|       |        /* CHANGED CODE */
  783|  8.30M|        u2_rec_wd = BLK8x8SIZE * YUV420SP_FACTOR;
  ------------------
  |  |  555|  8.30M|#define BLK8x8SIZE          8
  ------------------
                      u2_rec_wd = BLK8x8SIZE * YUV420SP_FACTOR;
  ------------------
  |  |  119|  8.30M|#define YUV420SP_FACTOR 2
  ------------------
  784|  8.30M|        i2_rec_x = u1_sub_x << 1;
  785|  8.30M|        i2_rec_y = u1_sub_y << 1;
  786|  8.30M|        i4_rec_offset = i2_rec_y * u2_rec_wd + i2_rec_x * YUV420SP_FACTOR;
  ------------------
  |  |  119|  8.30M|#define YUV420SP_FACTOR 2
  ------------------
  787|  8.30M|        {
  788|  8.30M|            u2_rec_wd = ps_dec->u2_frm_wd_uv << u1_mb_or_pic_fld;
  789|       |
  790|  8.30M|            i2_rec_x += (mb_index << 3);
  791|  8.30M|            i4_rec_offset = i2_rec_y * u2_rec_wd + i2_rec_x * YUV420SP_FACTOR;
  ------------------
  |  |  119|  8.30M|#define YUV420SP_FACTOR 2
  ------------------
  792|  8.30M|            if(u1_mb_bot)
  ------------------
  |  Branch (792:16): [True: 0, False: 8.30M]
  ------------------
  793|      0|                i4_rec_offset += ps_dec->u2_frm_wd_uv << ((u1_mb_fld) ? 0 : 3);
  ------------------
  |  Branch (793:59): [True: 0, False: 0]
  ------------------
  794|  8.30M|            ps_pred->pu1_rec_y_u = ps_frame_buf->pu1_dest_u + i4_rec_offset;
  795|  8.30M|            ps_pred->u1_pi1_wt_ofst_rec_v = ps_frame_buf->pu1_dest_v
  796|  8.30M|                            + i4_rec_offset;
  797|       |
  798|  8.30M|        }
  799|       |
  800|       |        /* CHANGED CODE */
  801|       |
  802|       |        /* filling the common pred structures for U */
  803|  8.30M|        u2_frm_wd = ps_dec->u2_frm_wd_uv << u1_mb_or_pic_fld;
  804|  8.30M|        u1_tmp_dma_ht = u1_dma_ht;
  805|  8.30M|        ps_pred->u2_u1_ref_buf_wd = u1_dma_wd;
  806|  8.30M|        ps_pred->u2_frm_wd = u2_frm_wd;
  807|  8.30M|        ps_pred->i1_dma_ht = u1_dma_ht;
  808|  8.30M|        ps_pred->i1_mc_wd = i1_mc_wd;
  809|  8.30M|        ps_pred->u2_dst_stride = u2_rec_wd;
  810|       |
  811|  8.30M|        ps_pred->i1_mb_partwidth = u1_part_wd << 1;
  812|  8.30M|        ps_pred->i1_mb_partheight = u1_part_ht << 1;
  813|  8.30M|        ps_pred->u1_dydx = (u1_dy << 3) + u1_dx;
  814|  8.30M|        ps_pred->u1_is_bi_direct = u1_is_bi_dir;
  815|  8.30M|        ps_pred->u1_wght_pred_type = u1_wght_pred_type;
  816|  8.30M|        ps_pred->i1_pod_ht = 0;
  817|       |
  818|  8.30M|        ps_pred->pu1_dma_dest_addr = pu1_ref_buf;
  819|       |
  820|       |        /* unrestricted field motion comp for top region outside frame */
  821|  8.30M|        i2_pod_ht = (-i2_frm_y) >> u1_mb_fld;
  822|  8.30M|        if((i2_pod_ht > 0) && u1_pod_top)
  ------------------
  |  Branch (822:12): [True: 17.8k, False: 8.28M]
  |  Branch (822:31): [True: 0, False: 17.8k]
  ------------------
  823|      0|        {
  824|      0|            i4_ref_offset += i2_pod_ht * u2_frm_wd;
  825|      0|            u1_dma_ht -= i2_pod_ht;
  826|      0|            ps_pred->i1_pod_ht = (WORD8)(-i2_pod_ht);
  827|      0|        }
  828|       |        /* unrestricted field motion comp for bottom region outside frame */
  829|  8.30M|        else if(u1_pod_bot)
  ------------------
  |  Branch (829:17): [True: 0, False: 8.30M]
  ------------------
  830|      0|        {
  831|      0|            i2_pod_ht = u1_dma_ht + ((i2_frm_y - u2_pic_ht) >> u1_mb_fld);
  832|      0|            if(i2_pod_ht > 0)
  ------------------
  |  Branch (832:16): [True: 0, False: 0]
  ------------------
  833|      0|            {
  834|      0|                u1_dma_ht -= i2_pod_ht;
  835|      0|                ps_pred->i1_pod_ht = (WORD8)i2_pod_ht;
  836|      0|            }
  837|      0|        }
  838|       |
  839|  8.30M|        pu1_pred_u = pu1_buf2 + i4_ref_offset;
  840|  8.30M|        pu1_pred_v = pu1_buf3 + i4_ref_offset;
  841|       |
  842|       |        /* Copy U & V partitions */
  843|  8.30M|        if(ps_pred->i1_pod_ht)
  ------------------
  |  Branch (843:12): [True: 0, False: 8.30M]
  ------------------
  844|      0|        {
  845|      0|            ps_pred->pu1_pred_u = pu1_pred_u;
  846|      0|            ps_pred->u1_dma_ht_uv = u1_dma_ht;
  847|      0|            ps_pred->u1_dma_wd_uv = u1_dma_wd;
  848|       |
  849|      0|        }
  850|  8.30M|        ps_pred->pu1_u_ref = pu1_pred_u;
  851|       |
  852|       |        /* Increment the reference buffer Index */
  853|  8.30M|        u2_tot_ref_scratch_size += (u1_dma_wd * u1_tmp_dma_ht) << 1;
  854|       |
  855|  8.30M|        if(ps_pred->i1_pod_ht)
  ------------------
  |  Branch (855:12): [True: 0, False: 8.30M]
  ------------------
  856|      0|        {
  857|      0|            ps_pred->pu1_pred_v = pu1_pred_v;
  858|      0|            ps_pred->u1_dma_ht_uv = u1_dma_ht;
  859|      0|            ps_pred->u1_dma_wd_uv = u1_dma_wd;
  860|      0|        }
  861|       |
  862|  8.30M|        ps_pred->pu1_v_ref = pu1_pred_v;
  863|  8.30M|    }
  864|       |
  865|       |    /* Increment ps_pred index */
  866|  8.30M|    ps_dec->u4_pred_info_idx += 2;
  867|       |
  868|       |
  869|       |    /* Increment the reference buffer Index */
  870|  8.30M|    ps_dec->u4_dma_buf_idx += u2_tot_ref_scratch_size;
  871|       |
  872|  8.30M|    if(ps_dec->u4_dma_buf_idx > MAX_REF_BUF_SIZE)
  ------------------
  |  |   68|  8.30M|#define MAX_REF_BUF_SIZE       (3776*2*2)
  ------------------
  |  Branch (872:8): [True: 0, False: 8.30M]
  ------------------
  873|      0|        return ERROR_NUM_MV;
  874|       |
  875|  8.30M|    return OK;
  ------------------
  |  |  114|  8.30M|#define OK        0
  ------------------
  876|       |
  877|       |
  878|       |
  879|  8.30M|}
ih264d_motion_compensate_bp:
  904|  5.24M|{
  905|  5.24M|    pred_info_t *ps_pred ;
  906|  5.24M|    UWORD8 *puc_ref, *pu1_dest_y;
  907|  5.24M|    UWORD8 *pu1_dest_u;
  908|  5.24M|    UWORD32 u2_num_pels, u2_ref_wd_y, u2_ref_wd_uv, u2_dst_wd;
  909|       |
  910|  5.24M|    UWORD32 u4_wd_y, u4_ht_y, u4_wd_uv;
  911|  5.24M|    UWORD32 u4_ht_uv;
  912|  5.24M|    UWORD8 *puc_pred0 = (UWORD8 *)(ps_dec->pi2_pred1);
  913|       |
  914|       |
  915|  5.24M|    PROFILE_DISABLE_INTER_PRED()
  ------------------
  |  |  119|  5.24M|#define PROFILE_DISABLE_INTER_PRED() ;
  ------------------
  916|  5.24M|    UNUSED(ps_cur_mb_info);
  ------------------
  |  |   45|  5.24M|#define UNUSED(x) ((void)(x))
  ------------------
  917|  5.24M|    ps_pred = ps_dec->ps_pred ;
  918|       |
  919|  10.5M|    for(u2_num_pels = 0; u2_num_pels < 256;)
  ------------------
  |  Branch (919:26): [True: 5.34M, False: 5.24M]
  ------------------
  920|  5.34M|    {
  921|  5.34M|        UWORD32 uc_dx, uc_dy;
  922|       |        /* Pointer to the destination buffer. If the CBPs of all 8x8 blocks in
  923|       |         the MB partition are zero then it would be better to copy the
  924|       |         predictor valus directly to the current frame buffer */
  925|       |        /*
  926|       |         * ps_pred->i1_pod_ht is non zero when MBAFF is present. In case of MBAFF the reference data
  927|       |         * is copied in the Scrath buffer so that the padding_on_demand doesnot corrupt the frame data
  928|       |         */
  929|       |
  930|  5.34M|        u2_ref_wd_y = ps_pred->u2_frm_wd;
  931|  5.34M|        puc_ref = ps_pred->pu1_y_ref;
  932|  5.34M|        if(ps_pred->u1_dydx & 0x3)
  ------------------
  |  Branch (932:12): [True: 123k, False: 5.22M]
  ------------------
  933|   123k|            puc_ref += 2;
  934|  5.34M|        if(ps_pred->u1_dydx >> 2)
  ------------------
  |  Branch (934:12): [True: 112k, False: 5.23M]
  ------------------
  935|   112k|            puc_ref += 2 * u2_ref_wd_y;
  936|       |
  937|  5.34M|        u4_wd_y = ps_pred->i1_mb_partwidth;
  938|  5.34M|        u4_ht_y = ps_pred->i1_mb_partheight;
  939|  5.34M|        uc_dx = ps_pred->u1_dydx;
  940|  5.34M|        uc_dy = uc_dx >> 2;
  941|  5.34M|        uc_dx &= 0x3;
  942|       |
  943|  5.34M|        pu1_dest_y = ps_pred->pu1_rec_y_u;
  944|  5.34M|        u2_dst_wd = ps_pred->u2_dst_stride;
  945|       |
  946|  5.34M|        ps_dec->apf_inter_pred_luma[ps_pred->u1_dydx](puc_ref, pu1_dest_y,
  947|  5.34M|                                                      u2_ref_wd_y,
  948|  5.34M|                                                      u2_dst_wd,
  949|  5.34M|                                                      u4_ht_y,
  950|  5.34M|                                                      u4_wd_y, puc_pred0,
  951|  5.34M|                                                      ps_pred->u1_dydx);
  952|       |
  953|  5.34M|        ps_pred++;
  954|       |
  955|       |        /* Interpolate samples for the chroma components */
  956|  5.34M|        {
  957|  5.34M|            UWORD8 *pu1_ref_u;
  958|       |
  959|  5.34M|            u2_ref_wd_uv = ps_pred->u2_frm_wd;
  960|  5.34M|            pu1_ref_u = ps_pred->pu1_u_ref;
  961|       |
  962|  5.34M|            u4_wd_uv = ps_pred->i1_mb_partwidth;
  963|  5.34M|            u4_ht_uv = ps_pred->i1_mb_partheight;
  964|  5.34M|            uc_dx = ps_pred->u1_dydx; /* 8*dy + dx */
  965|  5.34M|            uc_dy = uc_dx >> 3;
  966|  5.34M|            uc_dx &= 0x7;
  967|       |
  968|  5.34M|            pu1_dest_u = ps_pred->pu1_rec_y_u;
  969|  5.34M|            u2_dst_wd = ps_pred->u2_dst_stride;
  970|       |
  971|  5.34M|            ps_pred++;
  972|  5.34M|            ps_dec->pf_inter_pred_chroma(pu1_ref_u, pu1_dest_u, u2_ref_wd_uv,
  973|  5.34M|                                         u2_dst_wd, uc_dx, uc_dy,
  974|  5.34M|                                         u4_ht_uv, u4_wd_uv);
  975|       |
  976|  5.34M|        }
  977|       |
  978|  5.34M|        u2_num_pels += (UWORD8)u4_wd_y * (UWORD8)u4_ht_y;
  979|       |
  980|  5.34M|    }
  981|  5.24M|}
ih264d_motion_compensate_mp:
 1006|  7.05M|{
 1007|  7.05M|    pred_info_t *ps_pred ;
 1008|  7.05M|    pred_info_t *ps_pred_y_forw, *ps_pred_y_back, *ps_pred_cr_forw;
 1009|  7.05M|    UWORD8 *puc_ref, *pu1_dest_y, *puc_pred0, *puc_pred1;
 1010|  7.05M|    UWORD8 *pu1_dest_u, *pu1_dest_v;
 1011|  7.05M|    WORD16 *pi16_intm;
 1012|  7.05M|    UWORD32 u2_num_pels, u2_ref_wd_y, u2_ref_wd_uv, u2_dst_wd;
 1013|  7.05M|    UWORD32 u2_dest_wd_y, u2_dest_wd_uv;
 1014|  7.05M|    UWORD32 u2_row_buf_wd_y = 0;
 1015|  7.05M|    UWORD32 u2_row_buf_wd_uv = 0;
 1016|  7.05M|    UWORD32 u2_log2Y_crwd;
 1017|  7.05M|    UWORD32 u4_wd_y, u4_ht_y, u1_dir, u4_wd_uv;
 1018|  7.05M|    UWORD32 u4_ht_uv;
 1019|  7.05M|    UWORD8 *pu1_temp_mc_buffer = ps_dec->pu1_temp_mc_buffer;
 1020|  7.05M|    WORD32 i2_pod_ht;
 1021|  7.05M|    UWORD32 u2_pic_ht, u2_frm_wd, u2_rec_wd;
 1022|  7.05M|    UWORD32 u1_pod_bot, u1_pod_top;
 1023|  7.05M|    UWORD8 *pu1_pred, *pu1_dma_dst;
 1024|  7.05M|    UWORD32 u1_dma_wd, u1_dma_ht;
 1025|       |
 1026|  7.05M|    dec_slice_params_t * const ps_cur_slice = ps_dec->ps_cur_slice;
 1027|       |
 1028|       |    /* set default value to flags specifying field nature of picture & mb */
 1029|  7.05M|    UWORD32 u1_mb_fld = 0, u1_mb_or_pic_fld;
 1030|  7.05M|    UWORD32 u1_mb_or_pic_bot;
 1031|       |    /* calculate flags specifying field nature of picture & mb */
 1032|  7.05M|    const UWORD8 u1_pic_fld = ps_cur_slice->u1_field_pic_flag;
 1033|       |
 1034|  7.05M|    PROFILE_DISABLE_INTER_PRED()
  ------------------
  |  |  119|  7.05M|#define PROFILE_DISABLE_INTER_PRED() ;
  ------------------
 1035|  7.05M|    ps_pred = ps_dec->ps_pred ;
 1036|       |    /* Initialize both ps_pred_y_forw, ps_pred_cr_forw and ps_pred_y_back
 1037|       |     * to avoid static analysis warnings */
 1038|  7.05M|    ps_pred_y_forw = ps_pred;
 1039|  7.05M|    ps_pred_y_back = ps_pred;
 1040|  7.05M|    ps_pred_cr_forw = ps_pred;
 1041|       |
 1042|  7.05M|    u2_log2Y_crwd = ps_dec->ps_decode_cur_slice->u2_log2Y_crwd;
 1043|       |
 1044|  7.05M|    if(!u1_pic_fld)
  ------------------
  |  Branch (1044:8): [True: 7.05M, False: 0]
  ------------------
 1045|  7.05M|    {
 1046|  7.05M|        u1_mb_fld = ps_cur_mb_info->u1_mb_field_decodingflag;
 1047|  7.05M|    }
 1048|       |
 1049|  7.05M|    u1_mb_or_pic_fld = u1_mb_fld | u1_pic_fld;
 1050|       |
 1051|  7.05M|    pi16_intm = ps_dec->pi2_pred1;
 1052|  7.05M|    puc_pred0 = (UWORD8 *)pi16_intm;
 1053|  7.05M|    puc_pred1 = puc_pred0 + PRED_BUFFER_WIDTH * PRED_BUFFER_HEIGHT * sizeof(WORD16);
  ------------------
  |  |   55|  7.05M|#define PRED_BUFFER_WIDTH   24*2
  ------------------
                  puc_pred1 = puc_pred0 + PRED_BUFFER_WIDTH * PRED_BUFFER_HEIGHT * sizeof(WORD16);
  ------------------
  |  |   56|  7.05M|#define PRED_BUFFER_HEIGHT  24*2
  ------------------
 1054|       |
 1055|  14.4M|    for(u2_num_pels = 0; u2_num_pels < 256;)
  ------------------
  |  Branch (1055:26): [True: 7.39M, False: 7.05M]
  ------------------
 1056|  7.39M|    {
 1057|  7.39M|        UWORD8 uc_dx, uc_dy;
 1058|  7.39M|        const UWORD8 u1_is_bi_direct = ps_pred->u1_is_bi_direct;
 1059|  15.6M|        for(u1_dir = 0; u1_dir <= u1_is_bi_direct; u1_dir++)
  ------------------
  |  Branch (1059:25): [True: 8.29M, False: 7.39M]
  ------------------
 1060|  8.29M|        {
 1061|       |            /* Pointer to the destination buffer. If the CBPs of all 8x8 blocks in
 1062|       |             the MB partition are zero then it would be better to copy the
 1063|       |             predictor valus directly to the current frame buffer */
 1064|       |            /*
 1065|       |             * ps_pred->i1_pod_ht is non zero when MBAFF is present. In case of MBAFF the reference data
 1066|       |             * is copied in the Scrath buffer so that the padding_on_demand doesnot corrupt the frame data
 1067|       |             */
 1068|       |
 1069|  8.29M|            if(ps_pred->i1_pod_ht)
  ------------------
  |  Branch (1069:16): [True: 0, False: 8.29M]
  ------------------
 1070|      0|            {
 1071|      0|                u2_ref_wd_y = ps_pred->u2_u1_ref_buf_wd;
 1072|      0|                puc_ref = ps_pred->pu1_dma_dest_addr;
 1073|      0|            }
 1074|  8.29M|            else
 1075|  8.29M|            {
 1076|  8.29M|                u2_ref_wd_y = ps_pred->u2_frm_wd;
 1077|  8.29M|                puc_ref = ps_pred->pu1_y_ref;
 1078|       |
 1079|  8.29M|            }
 1080|       |
 1081|  8.29M|            if(ps_pred->u1_dydx & 0x3)
  ------------------
  |  Branch (1081:16): [True: 506k, False: 7.79M]
  ------------------
 1082|   506k|                puc_ref += 2;
 1083|  8.29M|            if(ps_pred->u1_dydx >> 2)
  ------------------
  |  Branch (1083:16): [True: 492k, False: 7.80M]
  ------------------
 1084|   492k|                puc_ref += 2 * u2_ref_wd_y;
 1085|  8.29M|            u4_wd_y = ps_pred->i1_mb_partwidth;
 1086|  8.29M|            u4_ht_y = ps_pred->i1_mb_partheight;
 1087|       |
 1088|  8.29M|            uc_dx = ps_pred->u1_dydx;
 1089|  8.29M|            uc_dy = uc_dx >> 2;
 1090|  8.29M|            uc_dx &= 0x3;
 1091|  8.29M|            if(u1_dir == 0)
  ------------------
  |  Branch (1091:16): [True: 7.39M, False: 901k]
  ------------------
 1092|  7.39M|            {
 1093|  7.39M|                pu1_dest_y = ps_pred->pu1_rec_y_u;
 1094|  7.39M|                u2_row_buf_wd_y = ps_pred->u2_dst_stride;
 1095|  7.39M|                u2_dst_wd = ps_pred->u2_dst_stride;
 1096|  7.39M|                u2_dest_wd_y = u2_dst_wd;
 1097|  7.39M|                ps_pred_y_forw = ps_pred;
 1098|  7.39M|            }
 1099|   901k|            else
 1100|   901k|            {
 1101|   901k|                pu1_dest_y = pu1_temp_mc_buffer;
 1102|   901k|                u2_dst_wd = MB_SIZE;
  ------------------
  |  |  554|   901k|#define MB_SIZE             16
  ------------------
 1103|   901k|                u2_dest_wd_y = u2_dst_wd;
 1104|   901k|                ps_pred_y_back = ps_pred;
 1105|   901k|                ps_pred_y_back->pu1_rec_y_u = pu1_dest_y;
 1106|   901k|            }
 1107|       |
 1108|       |            /* padding on demand (POD) for y done here */
 1109|       |
 1110|  8.29M|            if(ps_pred->i1_pod_ht)
  ------------------
  |  Branch (1110:16): [True: 0, False: 8.29M]
  ------------------
 1111|      0|            {
 1112|      0|                pu1_pred = ps_pred->pu1_pred;
 1113|      0|                pu1_dma_dst = ps_pred->pu1_dma_dest_addr;
 1114|      0|                u1_dma_wd = ps_pred->u1_dma_wd_y;
 1115|      0|                u1_dma_ht = ps_pred->u1_dma_ht_y;
 1116|      0|                u2_frm_wd = ps_dec->u2_frm_wd_y << u1_mb_or_pic_fld;
 1117|      0|                if(ps_pred->i1_pod_ht < 0)
  ------------------
  |  Branch (1117:20): [True: 0, False: 0]
  ------------------
 1118|      0|                {
 1119|      0|                    pu1_dma_dst = pu1_dma_dst - (ps_pred->i1_pod_ht * ps_pred->u2_u1_ref_buf_wd);
 1120|      0|                }
 1121|      0|                ih264d_copy_2d1d(pu1_pred, pu1_dma_dst, u2_frm_wd, u1_dma_wd,
 1122|      0|                                 u1_dma_ht);
 1123|      0|                ih264d_pad_on_demand(ps_pred, LUM_BLK);
  ------------------
  |  |  286|      0|#define LUM_BLK              0
  ------------------
 1124|      0|            }
 1125|  8.29M|            ps_dec->apf_inter_pred_luma[ps_pred->u1_dydx](puc_ref, pu1_dest_y,
 1126|  8.29M|                                                          u2_ref_wd_y,
 1127|  8.29M|                                                          u2_dst_wd,
 1128|  8.29M|                                                          u4_ht_y,
 1129|  8.29M|                                                          u4_wd_y,
 1130|  8.29M|                                                          puc_pred0,
 1131|  8.29M|                                                          ps_pred->u1_dydx);
 1132|  8.29M|            ps_pred++;
 1133|       |
 1134|       |            /* Interpolate samples for the chroma components */
 1135|  8.29M|            {
 1136|  8.29M|                UWORD8 *pu1_ref_u;
 1137|  8.29M|                UWORD32 u1_dma_ht;
 1138|       |
 1139|       |                /* padding on demand (POD) for U and V done here */
 1140|  8.29M|                u1_dma_ht = ps_pred->i1_dma_ht;
 1141|       |
 1142|  8.29M|                if(ps_pred->i1_pod_ht)
  ------------------
  |  Branch (1142:20): [True: 0, False: 8.29M]
  ------------------
 1143|      0|                {
 1144|      0|                    pu1_pred = ps_pred->pu1_pred_u;
 1145|      0|                    pu1_dma_dst = ps_pred->pu1_dma_dest_addr;
 1146|      0|                    u1_dma_ht = ps_pred->u1_dma_ht_uv;
 1147|      0|                    u1_dma_wd = ps_pred->u1_dma_wd_uv * YUV420SP_FACTOR;
  ------------------
  |  |  119|      0|#define YUV420SP_FACTOR 2
  ------------------
 1148|      0|                    u2_frm_wd = ps_dec->u2_frm_wd_uv << u1_mb_or_pic_fld;
 1149|      0|                    if(ps_pred->i1_pod_ht < 0)
  ------------------
  |  Branch (1149:24): [True: 0, False: 0]
  ------------------
 1150|      0|                    {
 1151|       |                        /*Top POD*/
 1152|      0|                        pu1_dma_dst -= (ps_pred->i1_pod_ht
 1153|      0|                                        * ps_pred->u2_u1_ref_buf_wd
 1154|      0|                                        * YUV420SP_FACTOR);
  ------------------
  |  |  119|      0|#define YUV420SP_FACTOR 2
  ------------------
 1155|      0|                    }
 1156|       |
 1157|      0|                    ih264d_copy_2d1d(pu1_pred, pu1_dma_dst, u2_frm_wd,
 1158|      0|                                     u1_dma_wd, u1_dma_ht);
 1159|       |
 1160|      0|                    pu1_dma_dst += (ps_pred->i1_dma_ht
 1161|      0|                                    * ps_pred->u2_u1_ref_buf_wd);
 1162|      0|                    pu1_pred = ps_pred->pu1_pred_v;
 1163|       |
 1164|      0|                    ih264d_pad_on_demand(ps_pred, CHROM_BLK);
  ------------------
  |  |  287|      0|#define CHROM_BLK            1
  ------------------
 1165|      0|                }
 1166|       |
 1167|  8.29M|                if(ps_pred->i1_pod_ht)
  ------------------
  |  Branch (1167:20): [True: 0, False: 8.29M]
  ------------------
 1168|      0|                {
 1169|      0|                    pu1_ref_u = ps_pred->pu1_dma_dest_addr;
 1170|       |
 1171|      0|                    u2_ref_wd_uv = ps_pred->u2_u1_ref_buf_wd
 1172|      0|                                    * YUV420SP_FACTOR;
  ------------------
  |  |  119|      0|#define YUV420SP_FACTOR 2
  ------------------
 1173|      0|                }
 1174|  8.29M|                else
 1175|  8.29M|                {
 1176|  8.29M|                    u2_ref_wd_uv = ps_pred->u2_frm_wd;
 1177|  8.29M|                    pu1_ref_u = ps_pred->pu1_u_ref;
 1178|       |
 1179|  8.29M|                }
 1180|       |
 1181|  8.29M|                u4_wd_uv = ps_pred->i1_mb_partwidth;
 1182|  8.29M|                u4_ht_uv = ps_pred->i1_mb_partheight;
 1183|  8.29M|                uc_dx = ps_pred->u1_dydx; /* 8*dy + dx */
 1184|  8.29M|                uc_dy = uc_dx >> 3;
 1185|  8.29M|                uc_dx &= 0x7;
 1186|  8.29M|                if(u1_dir == 0)
  ------------------
  |  Branch (1186:20): [True: 7.39M, False: 901k]
  ------------------
 1187|  7.39M|                {
 1188|  7.39M|                    pu1_dest_u = ps_pred->pu1_rec_y_u;
 1189|       |
 1190|  7.39M|                    pu1_dest_v = ps_pred->u1_pi1_wt_ofst_rec_v;
 1191|  7.39M|                    u2_row_buf_wd_uv = ps_pred->u2_dst_stride;
 1192|  7.39M|                    u2_dst_wd = ps_pred->u2_dst_stride;
 1193|  7.39M|                    u2_dest_wd_uv = u2_dst_wd;
 1194|  7.39M|                    ps_pred_cr_forw = ps_pred;
 1195|  7.39M|                }
 1196|   901k|                else
 1197|   901k|                {
 1198|   901k|                    pu1_dest_u = puc_pred0;
 1199|       |
 1200|   901k|                    pu1_dest_v = puc_pred1;
 1201|   901k|                    u2_dest_wd_uv = BUFFER_WIDTH;
  ------------------
  |  |   42|   901k|#define BUFFER_WIDTH        16
  ------------------
 1202|   901k|                    u2_dst_wd = BUFFER_WIDTH;
  ------------------
  |  |   42|   901k|#define BUFFER_WIDTH        16
  ------------------
 1203|   901k|                    ps_pred->pu1_rec_y_u = pu1_dest_u;
 1204|   901k|                    ps_pred->u1_pi1_wt_ofst_rec_v = pu1_dest_v;
 1205|   901k|                }
 1206|       |
 1207|  8.29M|                ps_pred++;
 1208|  8.29M|                ps_dec->pf_inter_pred_chroma(pu1_ref_u, pu1_dest_u,
 1209|  8.29M|                                             u2_ref_wd_uv, u2_dst_wd,
 1210|  8.29M|                                             uc_dx, uc_dy, u4_ht_uv,
 1211|  8.29M|                                             u4_wd_uv);
 1212|       |
 1213|  8.29M|                if(ps_cur_mb_info->u1_Mux == 1)
  ------------------
  |  Branch (1213:20): [True: 6.90k, False: 8.29M]
  ------------------
 1214|  6.90k|                {
 1215|       |                    /******************************************************************/
 1216|       |                    /* padding on demand (POD) for U and V done here                  */
 1217|       |                    /* ps_pred now points to the Y entry of the 0,0 component         */
 1218|       |                    /* Y need not be checked for POD because Y lies within            */
 1219|       |                    /* the picture((0,0) mv for Y doesnot get changed. But (0,0) for  */
 1220|       |                    /* U and V can need POD beacause of cross-field mv adjustments    */
 1221|       |                    /* (Table 8-9 of standard)                                        */
 1222|       |                    /******************************************************************/
 1223|  6.90k|                    if((ps_pred + 1)->i1_pod_ht)
  ------------------
  |  Branch (1223:24): [True: 0, False: 6.90k]
  ------------------
 1224|      0|                    {
 1225|      0|                        pu1_pred = (ps_pred + 1)->pu1_pred_u;
 1226|      0|                        pu1_dma_dst = (ps_pred + 1)->pu1_dma_dest_addr;
 1227|      0|                        u1_dma_ht = (ps_pred + 1)->u1_dma_ht_uv;
 1228|      0|                        u1_dma_wd = (ps_pred + 1)->u1_dma_wd_uv
 1229|      0|                                        * YUV420SP_FACTOR;
  ------------------
  |  |  119|      0|#define YUV420SP_FACTOR 2
  ------------------
 1230|      0|                        u2_frm_wd = ps_dec->u2_frm_wd_uv << u1_mb_or_pic_fld;
 1231|      0|                        if((ps_pred + 1)->i1_pod_ht < 0)
  ------------------
  |  Branch (1231:28): [True: 0, False: 0]
  ------------------
 1232|      0|                        {
 1233|       |                            /*Top POD*/
 1234|      0|                            pu1_dma_dst -= ((ps_pred + 1)->i1_pod_ht
 1235|      0|                                            * (ps_pred + 1)->u2_u1_ref_buf_wd
 1236|      0|                                            * YUV420SP_FACTOR);
  ------------------
  |  |  119|      0|#define YUV420SP_FACTOR 2
  ------------------
 1237|      0|                        }
 1238|      0|                        ih264d_copy_2d1d(pu1_pred, pu1_dma_dst, u2_frm_wd,
 1239|      0|                                         u1_dma_wd, u1_dma_ht);
 1240|      0|                        pu1_dma_dst += ((ps_pred + 1)->i1_dma_ht
 1241|      0|                                        * (ps_pred + 1)->u2_u1_ref_buf_wd); //(u1_dma_ht * u1_dma_wd);//
 1242|      0|                        pu1_pred = (ps_pred + 1)->pu1_pred_v;
 1243|      0|                        ih264d_pad_on_demand(ps_pred + 1, CHROM_BLK);
  ------------------
  |  |  287|      0|#define CHROM_BLK            1
  ------------------
 1244|       |
 1245|      0|                    }
 1246|       |
 1247|  6.90k|                    ih264d_multiplex_ref_data(ps_dec, ps_pred, pu1_dest_y,
 1248|  6.90k|                                              pu1_dest_u, ps_cur_mb_info,
 1249|  6.90k|                                              u2_dest_wd_y, u2_dest_wd_uv,
 1250|  6.90k|                                              u1_dir);
 1251|  6.90k|                    ps_pred += 2;
 1252|  6.90k|                }
 1253|  8.29M|            }
 1254|  8.29M|        }
 1255|  7.39M|        if(u1_dir != 0)
  ------------------
  |  Branch (1255:12): [True: 7.39M, False: 0]
  ------------------
 1256|  7.39M|            u2_ref_wd_y = MB_SIZE;
  ------------------
  |  |  554|  7.39M|#define MB_SIZE             16
  ------------------
 1257|       |
 1258|  7.39M|        u2_num_pels += u4_wd_y * u4_ht_y;
 1259|       |        /* if BI_DIRECT, average the two pred's, and put in ..PredBuffer[0] */
 1260|  7.39M|        if((u1_is_bi_direct != 0) || (ps_pred_y_forw->u1_wght_pred_type != 0))
  ------------------
  |  Branch (1260:12): [True: 901k, False: 6.49M]
  |  Branch (1260:38): [True: 5.29M, False: 1.20M]
  ------------------
 1261|  6.19M|        {
 1262|       |
 1263|  6.19M|            switch(ps_pred_y_forw->u1_wght_pred_type)
  ------------------
  |  Branch (1263:20): [True: 6.19M, False: 0]
  ------------------
 1264|  6.19M|            {
 1265|   321k|                case 0:
  ------------------
  |  Branch (1265:17): [True: 321k, False: 5.87M]
  ------------------
 1266|   321k|                    ps_dec->pf_default_weighted_pred_luma(
 1267|   321k|                                    ps_pred_y_forw->pu1_rec_y_u, pu1_dest_y,
 1268|   321k|                                    ps_pred_y_forw->pu1_rec_y_u,
 1269|   321k|                                    u2_row_buf_wd_y, u2_ref_wd_y,
 1270|   321k|                                    u2_row_buf_wd_y, u4_ht_uv * 2,
 1271|   321k|                                    u4_wd_uv * 2);
 1272|       |
 1273|   321k|                    ps_dec->pf_default_weighted_pred_chroma(
 1274|   321k|                                    ps_pred_cr_forw->pu1_rec_y_u, pu1_dest_u,
 1275|   321k|                                    ps_pred_cr_forw->pu1_rec_y_u,
 1276|   321k|                                    u2_row_buf_wd_uv, u2_dst_wd,
 1277|   321k|                                    u2_row_buf_wd_uv, u4_ht_uv,
 1278|   321k|                                    u4_wd_uv);
 1279|       |
 1280|   321k|                    break;
 1281|  5.29M|                case 1:
  ------------------
  |  Branch (1281:17): [True: 5.29M, False: 901k]
  ------------------
 1282|  5.29M|                {
 1283|  5.29M|                    UWORD32 *pu4_weight_ofst =
 1284|  5.29M|                                    (UWORD32*)ps_pred_y_forw->u1_pi1_wt_ofst_rec_v;
 1285|  5.29M|                    UWORD32 u4_wt_ofst_u, u4_wt_ofst_v;
 1286|  5.29M|                    UWORD32 u4_wt_ofst_y =
 1287|  5.29M|                                    (UWORD32)(pu4_weight_ofst[0]);
 1288|  5.29M|                    WORD32 weight = (WORD16)(u4_wt_ofst_y & 0xffff);
 1289|  5.29M|                    WORD32 ofst = (WORD8)(u4_wt_ofst_y >> 16);
 1290|       |
 1291|  5.29M|                    ps_dec->pf_weighted_pred_luma(ps_pred_y_forw->pu1_rec_y_u,
 1292|  5.29M|                                                  ps_pred_y_forw->pu1_rec_y_u,
 1293|  5.29M|                                                  u2_row_buf_wd_y,
 1294|  5.29M|                                                  u2_row_buf_wd_y,
 1295|  5.29M|                                                  (u2_log2Y_crwd & 0x0ff),
 1296|  5.29M|                                                  weight, ofst, u4_ht_y,
 1297|  5.29M|                                                  u4_wd_y);
 1298|       |
 1299|  5.29M|                    u4_wt_ofst_u = (UWORD32)(pu4_weight_ofst[2]);
 1300|  5.29M|                    u4_wt_ofst_v = (UWORD32)(pu4_weight_ofst[4]);
 1301|  5.29M|                    weight = ((u4_wt_ofst_v & 0xffff) << 16)
 1302|  5.29M|                                    | (u4_wt_ofst_u & 0xffff);
 1303|  5.29M|                    ofst = ((u4_wt_ofst_v >> 16) << 8)
 1304|  5.29M|                                    | ((u4_wt_ofst_u >> 16) & 0xFF);
 1305|       |
 1306|  5.29M|                    ps_dec->pf_weighted_pred_chroma(
 1307|  5.29M|                                    ps_pred_cr_forw->pu1_rec_y_u,
 1308|  5.29M|                                    ps_pred_cr_forw->pu1_rec_y_u,
 1309|  5.29M|                                    u2_row_buf_wd_uv, u2_row_buf_wd_uv,
 1310|  5.29M|                                    (u2_log2Y_crwd >> 8), weight, ofst,
 1311|  5.29M|                                    u4_ht_y >> 1, u4_wd_y >> 1);
 1312|  5.29M|                }
 1313|       |
 1314|  5.29M|                    break;
 1315|   579k|                case 2:
  ------------------
  |  Branch (1315:17): [True: 579k, False: 5.61M]
  ------------------
 1316|   579k|                {
 1317|   579k|                    UWORD32 *pu4_weight_ofst =
 1318|   579k|                                    (UWORD32*)ps_pred_y_forw->u1_pi1_wt_ofst_rec_v;
 1319|   579k|                    UWORD32 u4_wt_ofst_u, u4_wt_ofst_v;
 1320|   579k|                    UWORD32 u4_wt_ofst_y;
 1321|   579k|                    WORD32 weight1, weight2;
 1322|   579k|                    WORD32 ofst1, ofst2;
 1323|       |
 1324|   579k|                    u4_wt_ofst_y = (UWORD32)(pu4_weight_ofst[0]);
 1325|       |
 1326|   579k|                    weight1 = (WORD16)(u4_wt_ofst_y & 0xffff);
 1327|   579k|                    ofst1 = (WORD8)(u4_wt_ofst_y >> 16);
 1328|       |
 1329|   579k|                    u4_wt_ofst_y = (UWORD32)(pu4_weight_ofst[1]);
 1330|   579k|                    weight2 = (WORD16)(u4_wt_ofst_y & 0xffff);
 1331|   579k|                    ofst2 = (WORD8)(u4_wt_ofst_y >> 16);
 1332|       |
 1333|   579k|                    ps_dec->pf_weighted_bi_pred_luma(ps_pred_y_forw->pu1_rec_y_u,
 1334|   579k|                                                     ps_pred_y_back->pu1_rec_y_u,
 1335|   579k|                                                     ps_pred_y_forw->pu1_rec_y_u,
 1336|   579k|                                                     u2_row_buf_wd_y,
 1337|   579k|                                                     u2_ref_wd_y,
 1338|   579k|                                                     u2_row_buf_wd_y,
 1339|   579k|                                                     (u2_log2Y_crwd & 0x0ff),
 1340|   579k|                                                     weight1, weight2, ofst1,
 1341|   579k|                                                     ofst2, u4_ht_y,
 1342|   579k|                                                     u4_wd_y);
 1343|       |
 1344|   579k|                    u4_wt_ofst_u = (UWORD32)(pu4_weight_ofst[2]);
 1345|   579k|                    u4_wt_ofst_v = (UWORD32)(pu4_weight_ofst[4]);
 1346|   579k|                    weight1 = ((u4_wt_ofst_v & 0xffff) << 16)
 1347|   579k|                                    | (u4_wt_ofst_u & 0xffff);
 1348|   579k|                    ofst1 = ((u4_wt_ofst_v >> 16) << 8)
 1349|   579k|                                    | ((u4_wt_ofst_u >> 16) & 0xFF);
 1350|       |
 1351|   579k|                    u4_wt_ofst_u = (UWORD32)(pu4_weight_ofst[3]);
 1352|   579k|                    u4_wt_ofst_v = (UWORD32)(pu4_weight_ofst[5]);
 1353|   579k|                    weight2 = ((u4_wt_ofst_v & 0xffff) << 16)
 1354|   579k|                                    | (u4_wt_ofst_u & 0xffff);
 1355|   579k|                    ofst2 = ((u4_wt_ofst_v >> 16) << 8)
 1356|   579k|                                    | ((u4_wt_ofst_u >> 16) & 0xFF);
 1357|       |
 1358|   579k|                    ps_dec->pf_weighted_bi_pred_chroma(
 1359|   579k|                                    (ps_pred_y_forw + 1)->pu1_rec_y_u,
 1360|   579k|                                    (ps_pred_y_back + 1)->pu1_rec_y_u,
 1361|   579k|                                    (ps_pred_y_forw + 1)->pu1_rec_y_u,
 1362|   579k|                                    u2_row_buf_wd_uv, u2_dst_wd,
 1363|   579k|                                    u2_row_buf_wd_uv, (u2_log2Y_crwd >> 8),
 1364|   579k|                                    weight1, weight2, ofst1, ofst2,
 1365|   579k|                                    u4_ht_y >> 1, u4_wd_y >> 1);
 1366|   579k|                }
 1367|       |
 1368|   579k|                    break;
 1369|  6.19M|            }
 1370|       |
 1371|  6.19M|        }
 1372|  7.39M|    }
 1373|  7.05M|}
ih264d_multiplex_ref_data:
 1397|  6.90k|{
 1398|  6.90k|    UWORD16 u2_mask = ps_cur_mb_info->u2_mask[u1_dir];
 1399|  6.90k|    UWORD8 *pu1_ref_y, *pu1_ref_u;
 1400|  6.90k|    UWORD8 uc_cond, i, j, u1_dydx;
 1401|  6.90k|    UWORD16 u2_ref_wd_y, u2_ref_wd_uv;
 1402|       |
 1403|  6.90k|    PROFILE_DISABLE_INTER_PRED()
  ------------------
  |  |  119|  6.90k|#define PROFILE_DISABLE_INTER_PRED() ;
  ------------------
 1404|       |
 1405|  6.90k|    if(ps_pred->i1_pod_ht)
  ------------------
  |  Branch (1405:8): [True: 0, False: 6.90k]
  ------------------
 1406|      0|    {
 1407|      0|        pu1_ref_y = ps_pred->pu1_dma_dest_addr;
 1408|       |
 1409|      0|        u2_ref_wd_y = ps_pred->u2_u1_ref_buf_wd;
 1410|      0|    }
 1411|  6.90k|    else
 1412|  6.90k|    {
 1413|  6.90k|        pu1_ref_y = ps_pred->pu1_y_ref;
 1414|  6.90k|        u2_ref_wd_y = ps_pred->u2_frm_wd;
 1415|  6.90k|    }
 1416|       |
 1417|  6.90k|    ps_pred++;
 1418|  6.90k|    if(ps_pred->i1_pod_ht)
  ------------------
  |  Branch (1418:8): [True: 0, False: 6.90k]
  ------------------
 1419|      0|    {
 1420|      0|        pu1_ref_u = ps_pred->pu1_dma_dest_addr;
 1421|      0|        u2_ref_wd_uv = ps_pred->u2_u1_ref_buf_wd * YUV420SP_FACTOR;
  ------------------
  |  |  119|      0|#define YUV420SP_FACTOR 2
  ------------------
 1422|       |
 1423|      0|    }
 1424|  6.90k|    else
 1425|  6.90k|    {
 1426|  6.90k|        pu1_ref_u = ps_pred->pu1_u_ref;
 1427|  6.90k|        u2_ref_wd_uv = ps_pred->u2_frm_wd;
 1428|       |
 1429|  6.90k|    }
 1430|       |
 1431|  6.90k|    u1_dydx = ps_pred->u1_dydx;
 1432|       |
 1433|  6.90k|    {
 1434|  6.90k|        UWORD8 uc_dx, uc_dy;
 1435|  6.90k|        UWORD8 *pu1_scratch_u;
 1436|       |
 1437|  6.90k|        uc_dx = u1_dydx & 0x3;
 1438|  6.90k|        uc_dy = u1_dydx >> 3;
 1439|  6.90k|        if(u1_dydx != 0)
  ------------------
  |  Branch (1439:12): [True: 0, False: 6.90k]
  ------------------
 1440|      0|        {
 1441|      0|            pred_info_t * ps_prv_pred = ps_pred - 2;
 1442|      0|            pu1_scratch_u = ps_prv_pred->pu1_dma_dest_addr;
 1443|      0|            ps_dec->pf_inter_pred_chroma(pu1_ref_u, pu1_scratch_u,
 1444|      0|                                         u2_ref_wd_uv, 16, uc_dx, uc_dy, 8,
 1445|      0|                                         8);
 1446|       |
 1447|       |            /* Modify ref pointer and refWidth to point to scratch    */
 1448|       |            /* buffer to be used below in ih264d_copy_multiplex_data functions */
 1449|       |            /* CHANGED CODE */
 1450|      0|            pu1_ref_u = pu1_scratch_u;
 1451|      0|            u2_ref_wd_uv = 8 * YUV420SP_FACTOR;
  ------------------
  |  |  119|      0|#define YUV420SP_FACTOR 2
  ------------------
 1452|      0|        }
 1453|  6.90k|    }
 1454|  6.90k|    {
 1455|  34.5k|        for(i = 0; i < 4; i++)
  ------------------
  |  Branch (1455:20): [True: 27.6k, False: 6.90k]
  ------------------
 1456|  27.6k|        {
 1457|   138k|            for(j = 0; j < 4; j++)
  ------------------
  |  Branch (1457:24): [True: 110k, False: 27.6k]
  ------------------
 1458|   110k|            {
 1459|   110k|                uc_cond = u2_mask & 1;
 1460|   110k|                u2_mask >>= 1;
 1461|   110k|                if(uc_cond)
  ------------------
  |  Branch (1461:20): [True: 19.5k, False: 90.9k]
  ------------------
 1462|  19.5k|                {
 1463|  19.5k|                    *(UWORD32 *)(pu1_dest_y + u2_dest_wd_y) =
 1464|  19.5k|                                    *(UWORD32 *)(pu1_ref_y + u2_ref_wd_y);
 1465|  19.5k|                    *(UWORD32 *)(pu1_dest_y + 2 * u2_dest_wd_y) =
 1466|  19.5k|                                    *(UWORD32 *)(pu1_ref_y + 2 * u2_ref_wd_y);
 1467|  19.5k|                    *(UWORD32 *)(pu1_dest_y + 3 * u2_dest_wd_y) =
 1468|  19.5k|                                    *(UWORD32 *)(pu1_ref_y + 3 * u2_ref_wd_y);
 1469|  19.5k|                    {
 1470|  19.5k|                        UWORD32 *dst, *src;
 1471|  19.5k|                        dst = (UWORD32 *)pu1_dest_y;
 1472|  19.5k|                        src = (UWORD32 *)pu1_ref_y;
 1473|  19.5k|                        *dst = *src;
 1474|  19.5k|                        dst++;
 1475|  19.5k|                        src++;
 1476|  19.5k|                        pu1_dest_y = (UWORD8 *)dst;
 1477|  19.5k|                        pu1_ref_y = (UWORD8 *)src;
 1478|  19.5k|                    }
 1479|  19.5k|                    *(UWORD32 *)(pu1_dest_u + u2_dest_wd_uv) =
 1480|  19.5k|                                    *(UWORD32 *)(pu1_ref_u + u2_ref_wd_uv);
 1481|  19.5k|                    {
 1482|  19.5k|                        UWORD32 *dst, *src;
 1483|  19.5k|                        dst = (UWORD32 *)pu1_dest_u;
 1484|  19.5k|                        src = (UWORD32 *)pu1_ref_u;
 1485|  19.5k|                        *dst = *src;
 1486|  19.5k|                        dst++;
 1487|  19.5k|                        src++;
 1488|  19.5k|                        pu1_dest_u = (UWORD8 *)dst;
 1489|  19.5k|                        pu1_ref_u = (UWORD8 *)src;
 1490|  19.5k|                    }
 1491|       |
 1492|  19.5k|                }
 1493|  90.9k|                else
 1494|  90.9k|                {
 1495|  90.9k|                    pu1_dest_y += 4;
 1496|  90.9k|                    pu1_ref_y += 4;
 1497|  90.9k|                    pu1_dest_u += 2 * YUV420SP_FACTOR;
  ------------------
  |  |  119|  90.9k|#define YUV420SP_FACTOR 2
  ------------------
 1498|  90.9k|                    pu1_ref_u += 2 * YUV420SP_FACTOR;
  ------------------
  |  |  119|  90.9k|#define YUV420SP_FACTOR 2
  ------------------
 1499|  90.9k|                }
 1500|   110k|            }
 1501|  27.6k|            pu1_ref_y += 4 * (u2_ref_wd_y - 4);
 1502|  27.6k|            pu1_ref_u += 2 * (u2_ref_wd_uv - 4 * YUV420SP_FACTOR);
  ------------------
  |  |  119|  27.6k|#define YUV420SP_FACTOR 2
  ------------------
 1503|  27.6k|            pu1_dest_y += 4 * (u2_dest_wd_y - 4);
 1504|  27.6k|            pu1_dest_u += 2 * (u2_dest_wd_uv - 4 * YUV420SP_FACTOR);
  ------------------
  |  |  119|  27.6k|#define YUV420SP_FACTOR 2
  ------------------
 1505|  27.6k|        }
 1506|  6.90k|    }
 1507|  6.90k|}

ih264d_transfer_mb_group_data:
 1396|  2.27M|{
 1397|  2.27M|    dec_mb_info_t *ps_cur_mb_info = ps_dec->ps_nmb_info;
 1398|  2.27M|    tfr_ctxt_t *ps_trns_addr = &ps_dec->s_tran_addrecon;
 1399|  2.27M|    UWORD16 u2_mb_y;
 1400|  2.27M|    UWORD32 y_offset;
 1401|  2.27M|    UWORD32 u4_frame_stride;
 1402|  2.27M|    mb_neigbour_params_t *ps_temp;
 1403|  2.27M|    const UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
 1404|  2.27M|    UNUSED(u4_end_of_row_next);
  ------------------
  |  |   45|  2.27M|#define UNUSED(x) ((void)(x))
  ------------------
 1405|       |
 1406|  2.27M|    ps_trns_addr->pu1_dest_y += ps_trns_addr->u4_inc_y[u4_end_of_row];
 1407|  2.27M|    ps_trns_addr->pu1_dest_u += ps_trns_addr->u4_inc_uv[u4_end_of_row];
 1408|  2.27M|    ps_trns_addr->pu1_dest_v += ps_trns_addr->u4_inc_uv[u4_end_of_row];
 1409|       |
 1410|       |    /* Swap top and current pointers */
 1411|  2.27M|    if(u4_end_of_row)
  ------------------
  |  Branch (1411:8): [True: 2.22M, False: 42.6k]
  ------------------
 1412|  2.22M|    {
 1413|       |
 1414|  2.22M|        if(ps_dec->u1_separate_parse)
  ------------------
  |  Branch (1414:12): [True: 1.08M, False: 1.14M]
  ------------------
 1415|  1.08M|        {
 1416|  1.08M|            u2_mb_y = ps_dec->i2_dec_thread_mb_y;
 1417|  1.08M|        }
 1418|  1.14M|        else
 1419|  1.14M|        {
 1420|  1.14M|            ps_temp = ps_dec->ps_cur_mb_row;
 1421|  1.14M|            ps_dec->ps_cur_mb_row = ps_dec->ps_top_mb_row;
 1422|  1.14M|            ps_dec->ps_top_mb_row = ps_temp;
 1423|       |
 1424|  1.14M|            u2_mb_y = ps_dec->u2_mby + (1 + u1_mbaff);
 1425|  1.14M|        }
 1426|       |
 1427|  2.22M|        u4_frame_stride = ps_dec->u2_frm_wd_y
 1428|  2.22M|                        << ps_dec->ps_cur_slice->u1_field_pic_flag;
 1429|  2.22M|        y_offset = (u2_mb_y * u4_frame_stride) << 4;
 1430|  2.22M|        ps_trns_addr->pu1_dest_y = ps_dec->s_cur_pic.pu1_buf1 + y_offset;
 1431|       |
 1432|  2.22M|        u4_frame_stride = ps_dec->u2_frm_wd_uv
 1433|  2.22M|                        << ps_dec->ps_cur_slice->u1_field_pic_flag;
 1434|  2.22M|        y_offset = (u2_mb_y * u4_frame_stride) << 3;
 1435|  2.22M|        ps_trns_addr->pu1_dest_u = ps_dec->s_cur_pic.pu1_buf2 + y_offset;
 1436|  2.22M|        ps_trns_addr->pu1_dest_v = ps_dec->s_cur_pic.pu1_buf3 + y_offset;
 1437|       |
 1438|  2.22M|        ps_trns_addr->pu1_mb_y = ps_trns_addr->pu1_dest_y;
 1439|  2.22M|        ps_trns_addr->pu1_mb_u = ps_trns_addr->pu1_dest_u;
 1440|  2.22M|        ps_trns_addr->pu1_mb_v = ps_trns_addr->pu1_dest_v;
 1441|  2.22M|    }
 1442|       |
 1443|       |    /*
 1444|       |     * The Slice boundary is also a valid condition to transfer. So recalculate
 1445|       |     * the Left increment, in case the number of MBs is lesser than the
 1446|       |     * N MB value. u4_num_mbs will be equal to N of N MB if the entire N Mb is
 1447|       |     * decoded.
 1448|       |     */
 1449|  2.27M|    ps_dec->s_tran_addrecon.u2_mv_left_inc = ((WORD16)(u4_num_mbs >> u1_mbaff) - 1)
 1450|  2.27M|                    << (4 + u1_mbaff);
 1451|  2.27M|    ps_dec->s_tran_addrecon.u2_mv_top_left_inc = (WORD16)(u4_num_mbs << 2) - 1
 1452|  2.27M|                    - (u1_mbaff << 2);
 1453|       |
 1454|  2.27M|    if(ps_dec->u1_separate_parse == 0)
  ------------------
  |  Branch (1454:8): [True: 1.16M, False: 1.10M]
  ------------------
 1455|  1.16M|    {
 1456|       |        /* reassign left MV and cur MV pointers */
 1457|  1.16M|        ps_dec->ps_mv_left = ps_dec->ps_mv_cur
 1458|  1.16M|                        + ps_dec->s_tran_addrecon.u2_mv_left_inc;
 1459|       |
 1460|  1.16M|        ps_dec->ps_mv_cur += (u4_num_mbs << 4);
 1461|  1.16M|    }
 1462|       |
 1463|       |    /* Increment deblock parameters pointer in external memory */
 1464|       |
 1465|  2.27M|    if(ps_dec->u1_separate_parse == 0)
  ------------------
  |  Branch (1465:8): [True: 1.16M, False: 1.10M]
  ------------------
 1466|  1.16M|    {
 1467|  1.16M|        ps_dec->ps_deblk_mbn += u4_num_mbs;
 1468|  1.16M|    }
 1469|       |
 1470|  2.27M|}

ih264d_get_motion_vector_predictor:
   80|  2.04M|{
   81|  2.04M|    WORD8 c_temp;
   82|  2.04M|    UWORD8 uc_B2 = (u1_B << 1);
   83|       |
   84|       |    /* If only one of the candidate blocks has a reference frame equal to
   85|       |     the current block then use the same block as the final predictor */
   86|  2.04M|    c_temp =
   87|  2.04M|                    (ps_mv_pred[LEFT]->i1_ref_frame[u1_B] == u1_ref_idx)
  ------------------
  |  |  512|  2.04M|#define LEFT  0
  ------------------
   88|  2.04M|                                    | ((ps_mv_pred[TOP]->i1_ref_frame[u1_B]
  ------------------
  |  |  514|  2.04M|#define TOP   1
  ------------------
   89|  2.04M|                                                    == u1_ref_idx) << 1)
   90|  2.04M|                                    | ((ps_mv_pred[TOP_R]->i1_ref_frame[u1_B]
  ------------------
  |  |  516|  2.04M|#define TOP_R 2
  ------------------
   91|  2.04M|                                                    == u1_ref_idx) << 2);
   92|  2.04M|    c_temp = pu1_mv_pred_condition[c_temp];
   93|       |
   94|  2.04M|    if(c_temp != -1)
  ------------------
  |  Branch (94:8): [True: 89.5k, False: 1.95M]
  ------------------
   95|  89.5k|    {
   96|       |        /* Case when only when one of the cadidate block has the same
   97|       |         reference frame as the current block */
   98|  89.5k|        ps_result->i2_mv[uc_B2 + 0] = ps_mv_pred[c_temp]->i2_mv[uc_B2 + 0];
   99|  89.5k|        ps_result->i2_mv[uc_B2 + 1] = ps_mv_pred[c_temp]->i2_mv[uc_B2 + 1];
  100|  89.5k|    }
  101|  1.95M|    else
  102|  1.95M|    {
  103|  1.95M|        WORD32 D0, D1;
  104|  1.95M|        D0 = MIN(ps_mv_pred[0]->i2_mv[uc_B2 + 0],
  ------------------
  |  |   61|  1.95M|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 119k, False: 1.83M]
  |  |  ------------------
  ------------------
  105|  1.95M|                 ps_mv_pred[1]->i2_mv[uc_B2 + 0]);
  106|  1.95M|        D1 = MAX(ps_mv_pred[0]->i2_mv[uc_B2 + 0],
  ------------------
  |  |   60|  1.95M|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 169k, False: 1.78M]
  |  |  ------------------
  ------------------
  107|  1.95M|                 ps_mv_pred[1]->i2_mv[uc_B2 + 0]);
  108|  1.95M|        D1 = MIN(D1, ps_mv_pred[2]->i2_mv[uc_B2 + 0]);
  ------------------
  |  |   61|  1.95M|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 102k, False: 1.85M]
  |  |  ------------------
  ------------------
  109|  1.95M|        ps_result->i2_mv[uc_B2 + 0] = (WORD16)(MAX(D0, D1));
  ------------------
  |  |   60|  1.95M|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 75.2k, False: 1.87M]
  |  |  ------------------
  ------------------
  110|       |
  111|  1.95M|        D0 = MIN(ps_mv_pred[0]->i2_mv[uc_B2 + 1],
  ------------------
  |  |   61|  1.95M|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 96.1k, False: 1.85M]
  |  |  ------------------
  ------------------
  112|  1.95M|                 ps_mv_pred[1]->i2_mv[uc_B2 + 1]);
  113|  1.95M|        D1 = MAX(ps_mv_pred[0]->i2_mv[uc_B2 + 1],
  ------------------
  |  |   60|  1.95M|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 154k, False: 1.79M]
  |  |  ------------------
  ------------------
  114|  1.95M|                 ps_mv_pred[1]->i2_mv[uc_B2 + 1]);
  115|  1.95M|        D1 = MIN(D1, ps_mv_pred[2]->i2_mv[uc_B2 + 1]);
  ------------------
  |  |   61|  1.95M|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 80.3k, False: 1.87M]
  |  |  ------------------
  ------------------
  116|  1.95M|        ps_result->i2_mv[uc_B2 + 1] = (WORD16)(MAX(D0, D1));
  ------------------
  |  |   60|  1.95M|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 65.0k, False: 1.88M]
  |  |  ------------------
  ------------------
  117|       |
  118|  1.95M|    }
  119|  2.04M|}
ih264d_non_mbaff_mv_pred:
  342|  13.3M|{
  343|  13.3M|    UWORD16 u2_b_in = 0, u2_c_in = 0, u2_d_in = 0;
  344|  13.3M|    UWORD32 u4_sub_mb_x = (u4_sub_mb_num & 3), uc_sub_mb_y = (u4_sub_mb_num >> 2);
  345|       |
  346|       |    /* Checking in the subMB exists, calculating their motion vectors to be
  347|       |     used as predictors and the reference frames of those subMBs */
  348|       |
  349|  13.3M|    ps_mv_pred[LEFT] = &ps_dec->s_default_mv_pred;
  ------------------
  |  |  512|  13.3M|#define LEFT  0
  ------------------
  350|  13.3M|    ps_mv_pred[TOP] = &(ps_dec->s_default_mv_pred);
  ------------------
  |  |  514|  13.3M|#define TOP   1
  ------------------
  351|  13.3M|    ps_mv_pred[TOP_R] = &(ps_dec->s_default_mv_pred);
  ------------------
  |  |  516|  13.3M|#define TOP_R 2
  ------------------
  352|       |    /* Check if the left subMb is available */
  353|       |
  354|  13.3M|    if(u4_sub_mb_x)
  ------------------
  |  Branch (354:8): [True: 245k, False: 13.1M]
  ------------------
  355|   245k|    {
  356|   245k|        ps_mv_pred[LEFT] = (ps_mv_nmb - 1);
  ------------------
  |  |  512|   245k|#define LEFT  0
  ------------------
  357|   245k|    }
  358|  13.1M|    else
  359|  13.1M|    {
  360|  13.1M|        if(ps_cur_mb_info->u1_mb_ngbr_availablity & LEFT_MB_AVAILABLE_MASK)
  ------------------
  |  |   53|  13.1M|#define LEFT_MB_AVAILABLE_MASK      0x01
  ------------------
  |  Branch (360:12): [True: 10.9M, False: 2.15M]
  ------------------
  361|  10.9M|        {
  362|  10.9M|            ps_mv_pred[LEFT] = (ps_mv_nmb - 13);
  ------------------
  |  |  512|  10.9M|#define LEFT  0
  ------------------
  363|  10.9M|        }
  364|  13.1M|    }
  365|       |
  366|       |    /* Check if the top subMB is available */
  367|  13.3M|    if(uc_sub_mb_y)
  ------------------
  |  Branch (367:8): [True: 261k, False: 13.1M]
  ------------------
  368|   261k|    {
  369|   261k|        u2_b_in = 1;
  370|   261k|        ps_mv_ntop = ps_mv_nmb - 4;
  371|   261k|        ps_mv_pred[TOP] = ps_mv_ntop;
  ------------------
  |  |  514|   261k|#define TOP   1
  ------------------
  372|       |
  373|   261k|    }
  374|  13.1M|    else
  375|  13.1M|    {
  376|  13.1M|        u2_b_in = (ps_cur_mb_info->u1_mb_ngbr_availablity & TOP_MB_AVAILABLE_MASK);
  ------------------
  |  |   55|  13.1M|#define TOP_MB_AVAILABLE_MASK       0x04
  ------------------
  377|  13.1M|        if(u2_b_in)
  ------------------
  |  Branch (377:12): [True: 12.2M, False: 894k]
  ------------------
  378|  12.2M|        {
  379|  12.2M|            ps_mv_pred[TOP] = ps_mv_ntop;
  ------------------
  |  |  514|  12.2M|#define TOP   1
  ------------------
  380|  12.2M|        }
  381|  13.1M|    }
  382|       |
  383|       |    /* Check if the top right subMb is available. The top right subMb is
  384|       |     defined as the top right subMb at the top right corner of the MB
  385|       |     partition. The top right subMb index starting from the top left
  386|       |     corner of the MB partition is given by
  387|       |     TopRightSubMbIndx = TopLeftSubMbIndx + (WidthOfMbPartition - 6) / 2
  388|       |     */
  389|  13.3M|    u2_c_in = CHECKBIT(ps_cur_mb_info->u2_top_right_avail_mask,
  ------------------
  |  |   54|  13.3M|#define CHECKBIT(a,i) ((a) &  (1 << i))
  ------------------
  390|  13.3M|                        (u4_sub_mb_num + uc_mb_part_width - 1));
  391|  13.3M|    if(u2_c_in)
  ------------------
  |  Branch (391:8): [True: 10.3M, False: 2.98M]
  ------------------
  392|  10.3M|    {
  393|  10.3M|        ps_mv_pred[TOP_R] = (ps_mv_ntop + uc_mb_part_width);
  ------------------
  |  |  516|  10.3M|#define TOP_R 2
  ------------------
  394|       |
  395|  10.3M|        if(uc_sub_mb_y == 0)
  ------------------
  |  Branch (395:12): [True: 10.2M, False: 98.6k]
  ------------------
  396|  10.2M|        {
  397|       |            /* CHANGED CODE */
  398|  10.2M|            if((u4_sub_mb_x + uc_mb_part_width) > 3)
  ------------------
  |  Branch (398:16): [True: 10.1M, False: 116k]
  ------------------
  399|  10.1M|                ps_mv_pred[TOP_R] += 12;
  ------------------
  |  |  516|  10.1M|#define TOP_R 2
  ------------------
  400|  10.2M|        }
  401|  10.3M|    }
  402|  2.98M|    else
  403|  2.98M|    {
  404|  2.98M|        u2_d_in = CHECKBIT(ps_cur_mb_info->u2_top_left_avail_mask, u4_sub_mb_num);
  ------------------
  |  |   54|  2.98M|#define CHECKBIT(a,i) ((a) &  (1 << i))
  ------------------
  405|       |        /* Check if the the top left subMB is available */
  406|  2.98M|        if(u2_d_in)
  ------------------
  |  Branch (406:12): [True: 2.09M, False: 895k]
  ------------------
  407|  2.09M|        {
  408|       |            /* CHANGED CODE */
  409|  2.09M|            ps_mv_pred[TOP_R] = (ps_mv_ntop - 1);
  ------------------
  |  |  516|  2.09M|#define TOP_R 2
  ------------------
  410|  2.09M|            if(u4_sub_mb_x == 0)
  ------------------
  |  Branch (410:16): [True: 1.98M, False: 104k]
  ------------------
  411|  1.98M|            {
  412|  1.98M|                if(uc_sub_mb_y)
  ------------------
  |  Branch (412:20): [True: 61.4k, False: 1.92M]
  ------------------
  413|  61.4k|                {
  414|  61.4k|                    ps_mv_pred[TOP_R] = (ps_mv_nmb - 17);
  ------------------
  |  |  516|  61.4k|#define TOP_R 2
  ------------------
  415|  61.4k|                }
  416|  1.92M|                else
  417|  1.92M|                {
  418|       |                    /* CHANGED CODE */
  419|  1.92M|                    ps_mv_pred[TOP_R] -= 12;
  ------------------
  |  |  516|  1.92M|#define TOP_R 2
  ------------------
  420|  1.92M|                }
  421|  1.98M|            }
  422|  2.09M|        }
  423|   895k|        else if(u2_b_in == 0)
  ------------------
  |  Branch (423:17): [True: 858k, False: 37.6k]
  ------------------
  424|   858k|        {
  425|       |            /* If all the subMBs B, C, D are all out of the frame then their MV
  426|       |             and their reference picture is equal to that of A */
  427|   858k|            ps_mv_pred[TOP] = ps_mv_pred[LEFT];
  ------------------
  |  |  514|   858k|#define TOP   1
  ------------------
                          ps_mv_pred[TOP] = ps_mv_pred[LEFT];
  ------------------
  |  |  512|   858k|#define LEFT  0
  ------------------
  428|   858k|            ps_mv_pred[TOP_R] = ps_mv_pred[LEFT];
  ------------------
  |  |  516|   858k|#define TOP_R 2
  ------------------
                          ps_mv_pred[TOP_R] = ps_mv_pred[LEFT];
  ------------------
  |  |  512|   858k|#define LEFT  0
  ------------------
  429|   858k|        }
  430|  2.98M|    }
  431|  13.3M|}
ih264d_mvpred_nonmbaffB:
  468|  3.47M|{
  469|  3.47M|    UWORD8 u1_a_in, u1_b_in, uc_temp1, uc_temp2, uc_temp3;
  470|  3.47M|    mv_pred_t *ps_mv_pred[3];
  471|  3.47M|    UWORD8 uc_B2, uc_lx, u1_ref_idx;
  472|  3.47M|    UWORD8 u1_direct_zero_pred_flag = 0;
  473|       |
  474|  3.47M|    ih264d_non_mbaff_mv_pred(ps_mv_pred, u4_sub_mb_num, ps_mv_nmb, ps_mv_ntop,
  475|  3.47M|                             ps_dec, uc_mb_part_width, ps_cur_mb_info);
  476|       |
  477|  6.99M|    for(uc_lx = u1_lx_start; uc_lx < u1_lxend; uc_lx++)
  ------------------
  |  Branch (477:30): [True: 3.52M, False: 3.47M]
  ------------------
  478|  3.52M|    {
  479|  3.52M|        u1_ref_idx = ps_mv_final_pred->i1_ref_frame[uc_lx];
  480|  3.52M|        uc_B2 = (uc_lx << 1);
  481|  3.52M|        switch(u1_mb_mc_mode)
  482|  3.52M|        {
  483|   111k|            case PRED_16x8:
  ------------------
  |  |  451|   111k|#define PRED_16x8   1
  ------------------
  |  Branch (483:13): [True: 111k, False: 3.41M]
  ------------------
  484|       |                /* Directional prediction for a 16x8 MB partition */
  485|   111k|                if(u4_sub_mb_num == 0)
  ------------------
  |  Branch (485:20): [True: 44.8k, False: 66.4k]
  ------------------
  486|  44.8k|                {
  487|       |                    /* Calculating the MV pred for the top 16x8 block */
  488|  44.8k|                    if(ps_mv_pred[TOP]->i1_ref_frame[uc_lx] == u1_ref_idx)
  ------------------
  |  |  514|  44.8k|#define TOP   1
  ------------------
  |  Branch (488:24): [True: 22.6k, False: 22.1k]
  ------------------
  489|  22.6k|                    {
  490|       |                        /* If the reference frame used by the top subMB is same as the
  491|       |                         reference frame used by the current block then MV predictor to
  492|       |                         be used for the current block is same as the MV of the top
  493|       |                         subMB */
  494|  22.6k|                        ps_mv_final_pred->i2_mv[uc_B2 + 0] =
  495|  22.6k|                                        ps_mv_pred[TOP]->i2_mv[uc_B2 + 0];
  ------------------
  |  |  514|  22.6k|#define TOP   1
  ------------------
  496|  22.6k|                        ps_mv_final_pred->i2_mv[uc_B2 + 1] =
  497|  22.6k|                                        ps_mv_pred[TOP]->i2_mv[uc_B2 + 1];
  ------------------
  |  |  514|  22.6k|#define TOP   1
  ------------------
  498|  22.6k|                    }
  499|  22.1k|                    else
  500|  22.1k|                    {
  501|       |                        /* The MV predictor is calculated according to the process
  502|       |                         defined in 8.4.1.2.1 */
  503|  22.1k|                        ih264d_get_motion_vector_predictor(
  504|  22.1k|                                        ps_mv_final_pred,
  505|  22.1k|                                        ps_mv_pred,
  506|  22.1k|                                        u1_ref_idx,
  507|  22.1k|                                        uc_lx,
  508|  22.1k|                                        (const UWORD8 *)gau1_ih264d_mv_pred_condition);
  509|  22.1k|                    }
  510|  44.8k|                }
  511|  66.4k|                else
  512|  66.4k|                {
  513|  66.4k|                    if(ps_mv_pred[LEFT]->i1_ref_frame[uc_lx] == u1_ref_idx)
  ------------------
  |  |  512|  66.4k|#define LEFT  0
  ------------------
  |  Branch (513:24): [True: 32.4k, False: 34.0k]
  ------------------
  514|  32.4k|                    {
  515|       |                        /* If the reference frame used by the left subMB is same as the
  516|       |                         reference frame used by the current block then MV predictor to
  517|       |                         be used for the current block is same as the MV of the left
  518|       |                         subMB */
  519|  32.4k|                        ps_mv_final_pred->i2_mv[uc_B2 + 0] =
  520|  32.4k|                                        ps_mv_pred[LEFT]->i2_mv[uc_B2 + 0];
  ------------------
  |  |  512|  32.4k|#define LEFT  0
  ------------------
  521|  32.4k|                        ps_mv_final_pred->i2_mv[uc_B2 + 1] =
  522|  32.4k|                                        ps_mv_pred[LEFT]->i2_mv[uc_B2 + 1];
  ------------------
  |  |  512|  32.4k|#define LEFT  0
  ------------------
  523|  32.4k|                    }
  524|  34.0k|                    else
  525|  34.0k|                    {
  526|       |                        /* The MV predictor is calculated according to the process
  527|       |                         defined in 8.4.1.2.1 */
  528|  34.0k|                        ih264d_get_motion_vector_predictor(
  529|  34.0k|                                        ps_mv_final_pred,
  530|  34.0k|                                        ps_mv_pred,
  531|  34.0k|                                        u1_ref_idx,
  532|  34.0k|                                        uc_lx,
  533|  34.0k|                                        (const UWORD8 *)gau1_ih264d_mv_pred_condition);
  534|  34.0k|                    }
  535|  66.4k|                }
  536|   111k|                break;
  537|  51.5k|            case PRED_8x16:
  ------------------
  |  |  452|  51.5k|#define PRED_8x16   2
  ------------------
  |  Branch (537:13): [True: 51.5k, False: 3.47M]
  ------------------
  538|       |                /* Directional prediction for a 8x16 MB partition */
  539|  51.5k|                if(u4_sub_mb_num == 0)
  ------------------
  |  Branch (539:20): [True: 26.0k, False: 25.4k]
  ------------------
  540|  26.0k|                {
  541|  26.0k|                    if(ps_mv_pred[LEFT]->i1_ref_frame[uc_lx] == u1_ref_idx)
  ------------------
  |  |  512|  26.0k|#define LEFT  0
  ------------------
  |  Branch (541:24): [True: 13.9k, False: 12.0k]
  ------------------
  542|  13.9k|                    {
  543|       |                        /* If the reference frame used by the left subMB is same as the
  544|       |                         reference frame used by the current block then MV predictor to
  545|       |                         be used for the current block is same as the MV of the left
  546|       |                         subMB */
  547|  13.9k|                        ps_mv_final_pred->i2_mv[uc_B2 + 0] =
  548|  13.9k|                                        ps_mv_pred[LEFT]->i2_mv[uc_B2 + 0];
  ------------------
  |  |  512|  13.9k|#define LEFT  0
  ------------------
  549|  13.9k|                        ps_mv_final_pred->i2_mv[uc_B2 + 1] =
  550|  13.9k|                                        ps_mv_pred[LEFT]->i2_mv[uc_B2 + 1];
  ------------------
  |  |  512|  13.9k|#define LEFT  0
  ------------------
  551|  13.9k|                    }
  552|  12.0k|                    else
  553|  12.0k|                    {
  554|       |                        /* The MV predictor is calculated according to the process
  555|       |                         defined in 8.4.1.2.1 */
  556|  12.0k|                        ih264d_get_motion_vector_predictor(
  557|  12.0k|                                        ps_mv_final_pred,
  558|  12.0k|                                        ps_mv_pred,
  559|  12.0k|                                        u1_ref_idx,
  560|  12.0k|                                        uc_lx,
  561|  12.0k|                                        (const UWORD8 *)gau1_ih264d_mv_pred_condition);
  562|  12.0k|                    }
  563|  26.0k|                }
  564|  25.4k|                else
  565|  25.4k|                {
  566|  25.4k|                    if(ps_mv_pred[TOP_R]->i1_ref_frame[uc_lx] == u1_ref_idx)
  ------------------
  |  |  516|  25.4k|#define TOP_R 2
  ------------------
  |  Branch (566:24): [True: 15.2k, False: 10.2k]
  ------------------
  567|  15.2k|                    {
  568|       |                        /* If the reference frame used by the top right subMB is same as
  569|       |                         the reference frame used by the current block then MV
  570|       |                         predictor to be used for the current block is same as the MV
  571|       |                         of the left subMB */
  572|  15.2k|                        ps_mv_final_pred->i2_mv[uc_B2 + 0] =
  573|  15.2k|                                        ps_mv_pred[TOP_R]->i2_mv[uc_B2 + 0];
  ------------------
  |  |  516|  15.2k|#define TOP_R 2
  ------------------
  574|  15.2k|                        ps_mv_final_pred->i2_mv[uc_B2 + 1] =
  575|  15.2k|                                        ps_mv_pred[TOP_R]->i2_mv[uc_B2 + 1];
  ------------------
  |  |  516|  15.2k|#define TOP_R 2
  ------------------
  576|  15.2k|                    }
  577|  10.2k|                    else
  578|  10.2k|                    {
  579|       |                        /* The MV predictor is calculated according to the process
  580|       |                         defined in 8.4.1.2.1 */
  581|  10.2k|                        ih264d_get_motion_vector_predictor(
  582|  10.2k|                                        ps_mv_final_pred,
  583|  10.2k|                                        ps_mv_pred,
  584|  10.2k|                                        u1_ref_idx,
  585|  10.2k|                                        uc_lx,
  586|  10.2k|                                        (const UWORD8 *)gau1_ih264d_mv_pred_condition);
  587|  10.2k|                    }
  588|  25.4k|                }
  589|  51.5k|                break;
  590|   681k|            case B_DIRECT_SPATIAL:
  ------------------
  |  |  489|   681k|#define B_DIRECT_SPATIAL  26
  ------------------
  |  Branch (590:13): [True: 681k, False: 2.84M]
  ------------------
  591|       |                /* Case when the MB has been skipped */
  592|       |                /* If either of left or the top subMB is not present
  593|       |                 OR
  594|       |                 If both the MV components of either the left or the top subMB are
  595|       |                 zero and their reference frame pointer pointing to 0
  596|       |                 then MV for the skipped MB is zero
  597|       |                 else the Median of the mv_pred_t is used */
  598|   681k|                uc_temp1 = (UWORD8)ps_mv_pred[LEFT]->i1_ref_frame[0];
  ------------------
  |  |  512|   681k|#define LEFT  0
  ------------------
  599|   681k|                uc_temp2 = (UWORD8)ps_mv_pred[TOP]->i1_ref_frame[0];
  ------------------
  |  |  514|   681k|#define TOP   1
  ------------------
  600|   681k|                uc_temp3 = (UWORD8)ps_mv_pred[TOP_R]->i1_ref_frame[0];
  ------------------
  |  |  516|   681k|#define TOP_R 2
  ------------------
  601|       |
  602|   681k|                ps_mv_final_pred->i1_ref_frame[0] = MIN(uc_temp1,
  ------------------
  |  |   61|  1.36M|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 4.36k, False: 677k]
  |  |  |  Branch (61:24): [True: 7.94k, False: 673k]
  |  |  |  Branch (61:32): [True: 7.92k, False: 669k]
  |  |  ------------------
  ------------------
  603|   681k|                                                      MIN(uc_temp2, uc_temp3));
  604|       |
  605|   681k|                uc_temp1 = (UWORD8)ps_mv_pred[LEFT]->i1_ref_frame[1];
  ------------------
  |  |  512|   681k|#define LEFT  0
  ------------------
  606|   681k|                uc_temp2 = (UWORD8)ps_mv_pred[TOP]->i1_ref_frame[1];
  ------------------
  |  |  514|   681k|#define TOP   1
  ------------------
  607|   681k|                uc_temp3 = (UWORD8)ps_mv_pred[TOP_R]->i1_ref_frame[1];
  ------------------
  |  |  516|   681k|#define TOP_R 2
  ------------------
  608|       |
  609|   681k|                ps_mv_final_pred->i1_ref_frame[1] = MIN(uc_temp1,
  ------------------
  |  |   61|  1.36M|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 3.85k, False: 677k]
  |  |  |  Branch (61:24): [True: 7.01k, False: 674k]
  |  |  |  Branch (61:32): [True: 7.00k, False: 670k]
  |  |  ------------------
  ------------------
  610|   681k|                                                      MIN(uc_temp2, uc_temp3));
  611|       |
  612|   681k|                if((ps_mv_final_pred->i1_ref_frame[0] < 0)
  ------------------
  |  Branch (612:20): [True: 55.4k, False: 626k]
  ------------------
  613|  55.4k|                                && (ps_mv_final_pred->i1_ref_frame[1] < 0))
  ------------------
  |  Branch (613:36): [True: 14.0k, False: 41.3k]
  ------------------
  614|  14.0k|                {
  615|  14.0k|                    u1_direct_zero_pred_flag = 1;
  616|  14.0k|                    ps_mv_final_pred->i1_ref_frame[0] = 0;
  617|  14.0k|                    ps_mv_final_pred->i1_ref_frame[1] = 0;
  618|  14.0k|                }
  619|   681k|                ih264d_get_motion_vector_predictor(
  620|   681k|                                ps_mv_final_pred, ps_mv_pred,
  621|   681k|                                ps_mv_final_pred->i1_ref_frame[0], 0,
  622|   681k|                                (const UWORD8 *)gau1_ih264d_mv_pred_condition);
  623|       |
  624|   681k|                ih264d_get_motion_vector_predictor(
  625|   681k|                                ps_mv_final_pred, ps_mv_pred,
  626|   681k|                                ps_mv_final_pred->i1_ref_frame[1], 1,
  627|   681k|                                (const UWORD8 *)gau1_ih264d_mv_pred_condition);
  628|       |
  629|   681k|                break;
  630|  2.41M|            case MB_SKIP:
  ------------------
  |  |  456|  2.41M|#define MB_SKIP     255
  ------------------
  |  Branch (630:13): [True: 2.41M, False: 1.11M]
  ------------------
  631|       |                /* Case when the MB has been skipped */
  632|       |                /* If either of left or the top subMB is not present
  633|       |                 OR
  634|       |                 If both the MV components of either the left or the top subMB are
  635|       |                 zero and their reference frame pointer pointing to 0
  636|       |                 then MV for the skipped MB is zero
  637|       |                 else the Median of the mv_pred_t is used */
  638|  2.41M|                u1_a_in = (ps_cur_mb_info->u1_mb_ngbr_availablity &
  639|  2.41M|                LEFT_MB_AVAILABLE_MASK);
  ------------------
  |  |   53|  2.41M|#define LEFT_MB_AVAILABLE_MASK      0x01
  ------------------
  640|  2.41M|                u1_b_in = (ps_cur_mb_info->u1_mb_ngbr_availablity &
  641|  2.41M|                TOP_MB_AVAILABLE_MASK);
  ------------------
  |  |   55|  2.41M|#define TOP_MB_AVAILABLE_MASK       0x04
  ------------------
  642|  2.41M|                if(((u1_a_in * u1_b_in) == 0)
  ------------------
  |  Branch (642:20): [True: 394k, False: 2.01M]
  ------------------
  643|  2.01M|                                || ((ps_mv_pred[LEFT]->i2_mv[0]
  ------------------
  |  |  512|  2.01M|#define LEFT  0
  ------------------
  |  Branch (643:36): [True: 2.01M, False: 598]
  ------------------
  644|  2.01M|                                                | ps_mv_pred[LEFT]->i2_mv[1]
  ------------------
  |  |  512|  2.01M|#define LEFT  0
  ------------------
  645|  2.01M|                                                | ps_mv_pred[LEFT]->i1_ref_frame[0])
  ------------------
  |  |  512|  2.01M|#define LEFT  0
  ------------------
  646|  2.01M|                                                == 0)
  647|    598|                                || ((ps_mv_pred[TOP]->i2_mv[0]
  ------------------
  |  |  514|    598|#define TOP   1
  ------------------
  |  Branch (647:36): [True: 11, False: 587]
  ------------------
  648|    598|                                                | ps_mv_pred[TOP]->i2_mv[1]
  ------------------
  |  |  514|    598|#define TOP   1
  ------------------
  649|    598|                                                | ps_mv_pred[TOP]->i1_ref_frame[0])
  ------------------
  |  |  514|    598|#define TOP   1
  ------------------
  650|    598|                                                == 0))
  651|  2.41M|                {
  652|  2.41M|                    ps_mv_final_pred->i2_mv[0] = 0;
  653|  2.41M|                    ps_mv_final_pred->i2_mv[1] = 0;
  654|  2.41M|                    break;
  655|  2.41M|                }
  656|       |                /* If the condition above is not true calculate the MV predictor
  657|       |                 according to the process defined in sec 8.4.1.2.1 */
  658|   271k|            default:
  ------------------
  |  Branch (658:13): [True: 271k, False: 3.25M]
  ------------------
  659|   271k|                ih264d_get_motion_vector_predictor(
  660|   271k|                                ps_mv_final_pred, ps_mv_pred, u1_ref_idx, uc_lx,
  661|   271k|                                (const UWORD8 *)gau1_ih264d_mv_pred_condition);
  662|   271k|                break;
  663|  3.52M|        }
  664|  3.52M|    }
  665|  3.47M|    return (u1_direct_zero_pred_flag);
  666|  3.47M|}
ih264d_mvpred_nonmbaff:
  703|  9.90M|{
  704|  9.90M|    UWORD8 u1_a_in, u1_b_in, uc_temp1, uc_temp2, uc_temp3;
  705|  9.90M|    mv_pred_t *ps_mv_pred[3];
  706|  9.90M|    UWORD8 u1_ref_idx;
  707|  9.90M|    UWORD8 u1_direct_zero_pred_flag = 0;
  708|  9.90M|    UNUSED(u1_lx_start);
  ------------------
  |  |   45|  9.90M|#define UNUSED(x) ((void)(x))
  ------------------
  709|  9.90M|    UNUSED(u1_lxend);
  ------------------
  |  |   45|  9.90M|#define UNUSED(x) ((void)(x))
  ------------------
  710|  9.90M|    ih264d_non_mbaff_mv_pred(ps_mv_pred, u4_sub_mb_num, ps_mv_nmb, ps_mv_ntop,
  711|  9.90M|                             ps_dec, uc_mb_part_width, ps_cur_mb_info);
  712|       |
  713|  9.90M|    u1_ref_idx = ps_mv_final_pred->i1_ref_frame[0];
  714|       |
  715|  9.90M|    switch(u1_mb_mc_mode)
  716|  9.90M|    {
  717|  53.6k|        case PRED_16x8:
  ------------------
  |  |  451|  53.6k|#define PRED_16x8   1
  ------------------
  |  Branch (717:9): [True: 53.6k, False: 9.85M]
  ------------------
  718|       |            /* Directional prediction for a 16x8 MB partition */
  719|  53.6k|            if(u4_sub_mb_num == 0)
  ------------------
  |  Branch (719:16): [True: 26.8k, False: 26.8k]
  ------------------
  720|  26.8k|            {
  721|       |                /* Calculating the MV pred for the top 16x8 block */
  722|  26.8k|                if(ps_mv_pred[TOP]->i1_ref_frame[0] == u1_ref_idx)
  ------------------
  |  |  514|  26.8k|#define TOP   1
  ------------------
  |  Branch (722:20): [True: 14.1k, False: 12.6k]
  ------------------
  723|  14.1k|                {
  724|       |                    /* If the reference frame used by the top subMB is same as the
  725|       |                     reference frame used by the current block then MV predictor to
  726|       |                     be used for the current block is same as the MV of the top
  727|       |                     subMB */
  728|       |
  729|  14.1k|                    ps_mv_final_pred->i2_mv[0] = ps_mv_pred[TOP]->i2_mv[0];
  ------------------
  |  |  514|  14.1k|#define TOP   1
  ------------------
  730|  14.1k|                    ps_mv_final_pred->i2_mv[1] = ps_mv_pred[TOP]->i2_mv[1];
  ------------------
  |  |  514|  14.1k|#define TOP   1
  ------------------
  731|  14.1k|                }
  732|  12.6k|                else
  733|  12.6k|                {
  734|       |                    /* The MV predictor is calculated according to the process
  735|       |                     defined in 8.4.1.2.1 */
  736|  12.6k|                    ih264d_get_motion_vector_predictor(
  737|  12.6k|                                    ps_mv_final_pred,
  738|  12.6k|                                    ps_mv_pred,
  739|  12.6k|                                    u1_ref_idx,
  740|  12.6k|                                    0,
  741|  12.6k|                                    (const UWORD8 *)gau1_ih264d_mv_pred_condition);
  742|  12.6k|                }
  743|  26.8k|            }
  744|  26.8k|            else
  745|  26.8k|            {
  746|  26.8k|                if(ps_mv_pred[LEFT]->i1_ref_frame[0] == u1_ref_idx)
  ------------------
  |  |  512|  26.8k|#define LEFT  0
  ------------------
  |  Branch (746:20): [True: 10.1k, False: 16.6k]
  ------------------
  747|  10.1k|                {
  748|       |                    /* If the reference frame used by the left subMB is same as the
  749|       |                     reference frame used by the current block then MV predictor to
  750|       |                     be used for the current block is same as the MV of the left
  751|       |                     subMB */
  752|       |
  753|  10.1k|                    ps_mv_final_pred->i2_mv[0] = ps_mv_pred[LEFT]->i2_mv[0];
  ------------------
  |  |  512|  10.1k|#define LEFT  0
  ------------------
  754|  10.1k|                    ps_mv_final_pred->i2_mv[1] = ps_mv_pred[LEFT]->i2_mv[1];
  ------------------
  |  |  512|  10.1k|#define LEFT  0
  ------------------
  755|  10.1k|                }
  756|  16.6k|                else
  757|  16.6k|                {
  758|       |                    /* The MV predictor is calculated according to the process
  759|       |                     defined in 8.4.1.2.1 */
  760|  16.6k|                    ih264d_get_motion_vector_predictor(
  761|  16.6k|                                    ps_mv_final_pred,
  762|  16.6k|                                    ps_mv_pred,
  763|  16.6k|                                    u1_ref_idx,
  764|  16.6k|                                    0,
  765|  16.6k|                                    (const UWORD8 *)gau1_ih264d_mv_pred_condition);
  766|  16.6k|                }
  767|  26.8k|            }
  768|  53.6k|            break;
  769|  49.2k|        case PRED_8x16:
  ------------------
  |  |  452|  49.2k|#define PRED_8x16   2
  ------------------
  |  Branch (769:9): [True: 49.2k, False: 9.86M]
  ------------------
  770|       |            /* Directional prediction for a 8x16 MB partition */
  771|  49.2k|            if(u4_sub_mb_num == 0)
  ------------------
  |  Branch (771:16): [True: 25.7k, False: 23.4k]
  ------------------
  772|  25.7k|            {
  773|  25.7k|                if(ps_mv_pred[LEFT]->i1_ref_frame[0] == u1_ref_idx)
  ------------------
  |  |  512|  25.7k|#define LEFT  0
  ------------------
  |  Branch (773:20): [True: 7.52k, False: 18.2k]
  ------------------
  774|  7.52k|                {
  775|       |                    /* If the reference frame used by the left subMB is same as the
  776|       |                     reference frame used by the current block then MV predictor to
  777|       |                     be used for the current block is same as the MV of the left
  778|       |                     subMB */
  779|       |
  780|  7.52k|                    ps_mv_final_pred->i2_mv[0] = ps_mv_pred[LEFT]->i2_mv[0];
  ------------------
  |  |  512|  7.52k|#define LEFT  0
  ------------------
  781|  7.52k|                    ps_mv_final_pred->i2_mv[1] = ps_mv_pred[LEFT]->i2_mv[1];
  ------------------
  |  |  512|  7.52k|#define LEFT  0
  ------------------
  782|  7.52k|                }
  783|  18.2k|                else
  784|  18.2k|                {
  785|       |                    /* The MV predictor is calculated according to the process
  786|       |                     defined in 8.4.1.2.1 */
  787|  18.2k|                    ih264d_get_motion_vector_predictor(
  788|  18.2k|                                    ps_mv_final_pred,
  789|  18.2k|                                    ps_mv_pred,
  790|  18.2k|                                    u1_ref_idx,
  791|  18.2k|                                    0,
  792|  18.2k|                                    (const UWORD8 *)gau1_ih264d_mv_pred_condition);
  793|  18.2k|                }
  794|  25.7k|            }
  795|  23.4k|            else
  796|  23.4k|            {
  797|  23.4k|                if(ps_mv_pred[TOP_R]->i1_ref_frame[0] == u1_ref_idx)
  ------------------
  |  |  516|  23.4k|#define TOP_R 2
  ------------------
  |  Branch (797:20): [True: 15.4k, False: 8.02k]
  ------------------
  798|  15.4k|                {
  799|       |                    /* If the reference frame used by the top right subMB is same as
  800|       |                     the reference frame used by the current block then MV
  801|       |                     predictor to be used for the current block is same as the MV
  802|       |                     of the left subMB */
  803|       |
  804|  15.4k|                    ps_mv_final_pred->i2_mv[0] = ps_mv_pred[TOP_R]->i2_mv[0];
  ------------------
  |  |  516|  15.4k|#define TOP_R 2
  ------------------
  805|  15.4k|                    ps_mv_final_pred->i2_mv[1] = ps_mv_pred[TOP_R]->i2_mv[1];
  ------------------
  |  |  516|  15.4k|#define TOP_R 2
  ------------------
  806|  15.4k|                }
  807|  8.02k|                else
  808|  8.02k|                {
  809|       |                    /* The MV predictor is calculated according to the process
  810|       |                     defined in 8.4.1.2.1 */
  811|  8.02k|                    ih264d_get_motion_vector_predictor(
  812|  8.02k|                                    ps_mv_final_pred,
  813|  8.02k|                                    ps_mv_pred,
  814|  8.02k|                                    u1_ref_idx,
  815|  8.02k|                                    0,
  816|  8.02k|                                    (const UWORD8 *)gau1_ih264d_mv_pred_condition);
  817|  8.02k|                }
  818|  23.4k|            }
  819|  49.2k|            break;
  820|      0|        case B_DIRECT_SPATIAL:
  ------------------
  |  |  489|      0|#define B_DIRECT_SPATIAL  26
  ------------------
  |  Branch (820:9): [True: 0, False: 9.90M]
  ------------------
  821|       |            /* Case when the MB has been skipped */
  822|       |            /* If either of left or the top subMB is not present
  823|       |             OR
  824|       |             If both the MV components of either the left or the top subMB are
  825|       |             zero and their reference frame pointer pointing to 0
  826|       |             then MV for the skipped MB is zero
  827|       |             else the Median of the mv_pred_t is used */
  828|      0|            uc_temp1 = (UWORD8)ps_mv_pred[LEFT]->i1_ref_frame[0];
  ------------------
  |  |  512|      0|#define LEFT  0
  ------------------
  829|      0|            uc_temp2 = (UWORD8)ps_mv_pred[TOP]->i1_ref_frame[0];
  ------------------
  |  |  514|      0|#define TOP   1
  ------------------
  830|      0|            uc_temp3 = (UWORD8)ps_mv_pred[TOP_R]->i1_ref_frame[0];
  ------------------
  |  |  516|      0|#define TOP_R 2
  ------------------
  831|       |
  832|      0|            ps_mv_final_pred->i1_ref_frame[0] = MIN(uc_temp1,
  ------------------
  |  |   61|      0|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 0, False: 0]
  |  |  |  Branch (61:24): [True: 0, False: 0]
  |  |  |  Branch (61:32): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  833|      0|                                                  MIN(uc_temp2, uc_temp3));
  834|       |
  835|      0|            uc_temp1 = (UWORD8)ps_mv_pred[LEFT]->i1_ref_frame[1];
  ------------------
  |  |  512|      0|#define LEFT  0
  ------------------
  836|      0|            uc_temp2 = (UWORD8)ps_mv_pred[TOP]->i1_ref_frame[1];
  ------------------
  |  |  514|      0|#define TOP   1
  ------------------
  837|      0|            uc_temp3 = (UWORD8)ps_mv_pred[TOP_R]->i1_ref_frame[1];
  ------------------
  |  |  516|      0|#define TOP_R 2
  ------------------
  838|       |
  839|      0|            ps_mv_final_pred->i1_ref_frame[1] = MIN(uc_temp1,
  ------------------
  |  |   61|      0|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 0, False: 0]
  |  |  |  Branch (61:24): [True: 0, False: 0]
  |  |  |  Branch (61:32): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  840|      0|                                                  MIN(uc_temp2, uc_temp3));
  841|       |
  842|      0|            if((ps_mv_final_pred->i1_ref_frame[0] < 0)
  ------------------
  |  Branch (842:16): [True: 0, False: 0]
  ------------------
  843|      0|                            && (ps_mv_final_pred->i1_ref_frame[1] < 0))
  ------------------
  |  Branch (843:32): [True: 0, False: 0]
  ------------------
  844|      0|            {
  845|      0|                u1_direct_zero_pred_flag = 1;
  846|      0|                ps_mv_final_pred->i1_ref_frame[0] = 0;
  847|      0|                ps_mv_final_pred->i1_ref_frame[1] = 0;
  848|      0|            }
  849|      0|            ih264d_get_motion_vector_predictor(
  850|      0|                            ps_mv_final_pred, ps_mv_pred,
  851|      0|                            ps_mv_final_pred->i1_ref_frame[0], 0,
  852|      0|                            (const UWORD8 *)gau1_ih264d_mv_pred_condition);
  853|       |
  854|      0|            ih264d_get_motion_vector_predictor(
  855|      0|                            ps_mv_final_pred, ps_mv_pred,
  856|      0|                            ps_mv_final_pred->i1_ref_frame[1], 1,
  857|      0|                            (const UWORD8 *)gau1_ih264d_mv_pred_condition);
  858|       |
  859|      0|            break;
  860|  9.57M|        case MB_SKIP:
  ------------------
  |  |  456|  9.57M|#define MB_SKIP     255
  ------------------
  |  Branch (860:9): [True: 9.57M, False: 337k]
  ------------------
  861|       |            /* Case when the MB has been skipped */
  862|       |            /* If either of left or the top subMB is not present
  863|       |             OR
  864|       |             If both the MV components of either the left or the top subMB are
  865|       |             zero and their reference frame pointer pointing to 0
  866|       |             then MV for the skipped MB is zero
  867|       |             else the Median of the mv_pred_t is used */
  868|  9.57M|            u1_a_in = (ps_cur_mb_info->u1_mb_ngbr_availablity &
  869|  9.57M|            LEFT_MB_AVAILABLE_MASK);
  ------------------
  |  |   53|  9.57M|#define LEFT_MB_AVAILABLE_MASK      0x01
  ------------------
  870|  9.57M|            u1_b_in = (ps_cur_mb_info->u1_mb_ngbr_availablity &
  871|  9.57M|            TOP_MB_AVAILABLE_MASK);
  ------------------
  |  |   55|  9.57M|#define TOP_MB_AVAILABLE_MASK       0x04
  ------------------
  872|  9.57M|            if(((u1_a_in * u1_b_in) == 0)
  ------------------
  |  Branch (872:16): [True: 2.15M, False: 7.41M]
  ------------------
  873|  7.41M|                            || ((ps_mv_pred[LEFT]->i2_mv[0]
  ------------------
  |  |  512|  7.41M|#define LEFT  0
  ------------------
  |  Branch (873:32): [True: 7.37M, False: 46.5k]
  ------------------
  874|  7.41M|                                            | ps_mv_pred[LEFT]->i2_mv[1]
  ------------------
  |  |  512|  7.41M|#define LEFT  0
  ------------------
  875|  7.41M|                                            | ps_mv_pred[LEFT]->i1_ref_frame[0])
  ------------------
  |  |  512|  7.41M|#define LEFT  0
  ------------------
  876|  7.41M|                                            == 0)
  877|  46.5k|                            || ((ps_mv_pred[TOP]->i2_mv[0]
  ------------------
  |  |  514|  46.5k|#define TOP   1
  ------------------
  |  Branch (877:32): [True: 7.66k, False: 38.8k]
  ------------------
  878|  46.5k|                                            | ps_mv_pred[TOP]->i2_mv[1]
  ------------------
  |  |  514|  46.5k|#define TOP   1
  ------------------
  879|  46.5k|                                            | ps_mv_pred[TOP]->i1_ref_frame[0])
  ------------------
  |  |  514|  46.5k|#define TOP   1
  ------------------
  880|  46.5k|                                            == 0))
  881|  9.53M|            {
  882|       |
  883|  9.53M|                ps_mv_final_pred->i2_mv[0] = 0;
  884|  9.53M|                ps_mv_final_pred->i2_mv[1] = 0;
  885|  9.53M|                break;
  886|  9.53M|            }
  887|       |            /* If the condition above is not true calculate the MV predictor
  888|       |             according to the process defined in sec 8.4.1.2.1 */
  889|   273k|        default:
  ------------------
  |  Branch (889:9): [True: 234k, False: 9.67M]
  ------------------
  890|   273k|            ih264d_get_motion_vector_predictor(
  891|   273k|                            ps_mv_final_pred, ps_mv_pred, u1_ref_idx, 0,
  892|   273k|                            (const UWORD8 *)gau1_ih264d_mv_pred_condition);
  893|   273k|            break;
  894|  9.90M|    }
  895|       |
  896|  9.90M|    return (u1_direct_zero_pred_flag);
  897|  9.90M|}
ih264d_rep_mv_colz:
 1175|  14.1M|{
 1176|       |
 1177|  14.1M|    UWORD8 k, m;
 1178|  14.1M|    UWORD8 *pu1_colz = ps_dec->pu1_col_zero_flag + ps_dec->i4_submb_ofst
 1179|  14.1M|                    + u4_sub_mb_num;
 1180|       |
 1181|  69.5M|    for(k = 0; k < u1_ht; k++)
  ------------------
  |  Branch (1181:16): [True: 55.3M, False: 14.1M]
  ------------------
 1182|  55.3M|    {
 1183|   273M|        for(m = 0; m < u1_wd; m++)
  ------------------
  |  Branch (1183:20): [True: 218M, False: 55.3M]
  ------------------
 1184|   218M|        {
 1185|   218M|            *(ps_mv_pred_dst + m) = *(ps_mv_pred_src);
 1186|   218M|            *(pu1_colz + m) = u1_colz;
 1187|       |
 1188|   218M|        }
 1189|  55.3M|        pu1_colz += SUB_BLK_WIDTH;
  ------------------
  |  |  560|  55.3M|#define SUB_BLK_WIDTH                 4
  ------------------
 1190|  55.3M|        ps_mv_pred_dst += SUB_BLK_WIDTH;
  ------------------
  |  |  560|  55.3M|#define SUB_BLK_WIDTH                 4
  ------------------
 1191|  55.3M|    }
 1192|  14.1M|}

ih264d_parse_bmb_non_direct_cavlc:
   79|  24.4k|{
   80|  24.4k|    dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm;
   81|  24.4k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
   82|  24.4k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
   83|  24.4k|    UWORD8 * pu1_sub_mb_pred_modes = (UWORD8 *)(gau1_ih264d_submb_pred_modes) + 4;
   84|  24.4k|    const UWORD8 (*pu1_mb_pred_modes)[32] =
   85|  24.4k|                    (const UWORD8 (*)[32])gau1_ih264d_mb_pred_modes;
   86|  24.4k|    const UWORD8 * pu1_num_mb_part = (const UWORD8 *)gau1_ih264d_num_mb_part;
   87|  24.4k|    const UWORD8 * pu1_sub_mb_mc_mode = (const UWORD8 *)(gau1_ih264d_submb_mc_mode)
   88|  24.4k|                    + 4;
   89|       |
   90|  24.4k|    parse_pmbarams_t * ps_parse_mb_data = ps_dec->ps_parse_mb_data
   91|  24.4k|                    + u4_num_mbsNby2;
   92|  24.4k|    UWORD8 * pu1_col_info = ps_parse_mb_data->u1_col_info;
   93|  24.4k|    WORD8 (*pi1_ref_idx)[MAX_REFIDX_INFO_PER_MB] = ps_parse_mb_data->i1_ref_idx;
   94|  24.4k|    UWORD8 u1_mb_type = ps_cur_mb_info->u1_mb_type;
   95|  24.4k|    UWORD8 u1_mb_mc_mode, u1_num_mb_part, u1_sub_mb = !(u1_mb_type ^ B_8x8);
  ------------------
  |  |  480|  24.4k|#define B_8x8    22
  ------------------
   96|  24.4k|    UWORD32 u4_mb_mc_mode = 0, u4_mb_pred_mode = 0;
   97|  24.4k|    WORD32 ret;
   98|       |
   99|  24.4k|    if(u1_sub_mb)
  ------------------
  |  Branch (99:8): [True: 1.57k, False: 22.8k]
  ------------------
  100|  1.57k|    {
  101|  1.57k|        UWORD8 uc_i;
  102|  1.57k|        u1_mb_mc_mode = 0;
  103|  1.57k|        u1_num_mb_part = 4;
  104|       |        /* Reading the subMB type */
  105|  7.70k|        for(uc_i = 0; uc_i < 4; uc_i++)
  ------------------
  |  Branch (105:23): [True: 6.26k, False: 1.44k]
  ------------------
  106|  6.26k|        {
  107|       |
  108|  6.26k|            UWORD32 ui_sub_mb_mode;
  109|       |
  110|       |//Inlined ih264d_uev
  111|  6.26k|            UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
  112|  6.26k|            UWORD32 u4_word, u4_ldz;
  113|       |
  114|       |            /***************************************************************/
  115|       |            /* Find leading zeros in next 32 bits                          */
  116|       |            /***************************************************************/
  117|  6.26k|            NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  6.26k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  6.26k|{                                                                           \
  |  |  152|  6.26k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  6.26k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  6.26k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  6.26k|                                                                            \
  |  |  156|  6.26k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  6.26k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 5.95k, False: 302]
  |  |  ------------------
  |  |  158|  6.26k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  5.95k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  6.26k|}
  ------------------
  118|  6.26k|            u4_ldz = CLZ(u4_word);
  119|       |            /* Flush the ps_bitstrm */
  120|  6.26k|            u4_bitstream_offset += (u4_ldz + 1);
  121|       |            /* Read the suffix from the ps_bitstrm */
  122|  6.26k|            u4_word = 0;
  123|  6.26k|            if(u4_ldz)
  ------------------
  |  Branch (123:16): [True: 2.80k, False: 3.45k]
  ------------------
  124|  2.80k|                GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf,
  ------------------
  |  |  120|  2.80k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  2.80k|{                                                                           \
  |  |  122|  2.80k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  2.80k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  2.80k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  2.80k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  2.80k|                                                                            \
  |  |  127|  2.80k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 2.60k, False: 206]
  |  |  ------------------
  |  |  128|  2.80k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  2.60k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  2.80k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  2.80k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  2.80k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  2.80k|}                                                                           \
  ------------------
  125|  6.26k|                        u4_ldz);
  126|  6.26k|            *pu4_bitstrm_ofst = u4_bitstream_offset;
  127|  6.26k|            ui_sub_mb_mode = ((1 << u4_ldz) + u4_word - 1);
  128|       |//Inlined ih264d_uev
  129|       |
  130|  6.26k|            if(ui_sub_mb_mode > 12)
  ------------------
  |  Branch (130:16): [True: 131, False: 6.12k]
  ------------------
  131|    131|                return ERROR_SUB_MB_TYPE;
  132|  6.12k|            else
  133|  6.12k|            {
  134|  6.12k|                UWORD8 u1_subMbPredMode = pu1_sub_mb_pred_modes[ui_sub_mb_mode];
  135|  6.12k|                u4_mb_mc_mode = (u4_mb_mc_mode << 8)
  136|  6.12k|                                | pu1_sub_mb_mc_mode[ui_sub_mb_mode];
  137|  6.12k|                u4_mb_pred_mode = (u4_mb_pred_mode << 8) | u1_subMbPredMode;
  138|  6.12k|                pi1_ref_idx[0][uc_i] = ((u1_subMbPredMode & PRED_L0) - 1) >> 1;
  ------------------
  |  |  483|  6.12k|#define PRED_L0   1
  ------------------
  139|  6.12k|                pi1_ref_idx[1][uc_i] = ((u1_subMbPredMode & PRED_L1) - 1) >> 1;
  ------------------
  |  |  484|  6.12k|#define PRED_L1   2
  ------------------
  140|  6.12k|                COPYTHECONTEXT("sub_mb_type", u1_subMbPredMode);
  141|  6.12k|            }
  142|       |            /* Storing collocated Mb and SubMb mode information */
  143|  6.12k|            *pu1_col_info++ = ((PRED_8x8) << 6)
  ------------------
  |  |  453|  6.12k|#define PRED_8x8    3
  ------------------
  144|  6.12k|                            | ((pu1_sub_mb_mc_mode[ui_sub_mb_mode] << 4));
  145|  6.12k|            if(ui_sub_mb_mode != B_DIRECT_8x8)
  ------------------
  |  |  465|  6.12k|#define B_DIRECT_8x8    0
  ------------------
  |  Branch (145:16): [True: 2.67k, False: 3.45k]
  ------------------
  146|  2.67k|            {
  147|  2.67k|                if(ui_sub_mb_mode > B_BI_8x8)
  ------------------
  |  |  468|  2.67k|#define B_BI_8x8        3
  ------------------
  |  Branch (147:20): [True: 1.21k, False: 1.46k]
  ------------------
  148|  1.21k|                {
  149|  1.21k|                    ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag = 0;
  150|  1.21k|                }
  151|  2.67k|            }
  152|  3.45k|            else if(!ps_dec->s_high_profile.u1_direct_8x8_inference_flag)
  ------------------
  |  Branch (152:21): [True: 2.95k, False: 494]
  ------------------
  153|  2.95k|            {
  154|  2.95k|                ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag = 0;
  155|  2.95k|            }
  156|  6.12k|        }
  157|  1.57k|    }
  158|  22.8k|    else
  159|  22.8k|    {
  160|  22.8k|        UWORD8 u1_mb_pred_mode_idx = 5 + u1_mb_type;
  161|  22.8k|        UWORD8 u1_mb_pred_mode_part0 = pu1_mb_pred_modes[0][u1_mb_pred_mode_idx];
  162|  22.8k|        UWORD8 u1_mb_pred_mode_part1 = pu1_mb_pred_modes[1][u1_mb_pred_mode_idx];
  163|  22.8k|        u1_mb_mc_mode = ps_cur_mb_info->u1_mb_mc_mode;
  164|  22.8k|        u1_num_mb_part = pu1_num_mb_part[u1_mb_mc_mode];
  165|       |
  166|  22.8k|        pi1_ref_idx[0][0] = ((u1_mb_pred_mode_part0 & PRED_L0) - 1) >> 1;
  ------------------
  |  |  483|  22.8k|#define PRED_L0   1
  ------------------
  167|  22.8k|        pi1_ref_idx[1][0] = ((u1_mb_pred_mode_part0 & PRED_L1) - 1) >> 1;
  ------------------
  |  |  484|  22.8k|#define PRED_L1   2
  ------------------
  168|  22.8k|        pi1_ref_idx[0][1] = ((u1_mb_pred_mode_part1 & PRED_L0) - 1) >> 1;
  ------------------
  |  |  483|  22.8k|#define PRED_L0   1
  ------------------
  169|  22.8k|        pi1_ref_idx[1][1] = ((u1_mb_pred_mode_part1 & PRED_L1) - 1) >> 1;
  ------------------
  |  |  484|  22.8k|#define PRED_L1   2
  ------------------
  170|       |
  171|  22.8k|        u4_mb_pred_mode = (u1_mb_pred_mode_part0 << 8) | u1_mb_pred_mode_part1;
  172|  22.8k|        u4_mb_mc_mode = u1_mb_mc_mode | (u1_mb_mc_mode << 8);
  173|  22.8k|        u4_mb_mc_mode <<= 16;
  174|  22.8k|        u4_mb_pred_mode <<= 16;
  175|       |
  176|       |        /* Storing collocated Mb and SubMb mode information */
  177|  22.8k|        *pu1_col_info++ = (u1_mb_mc_mode << 6);
  178|  22.8k|        if(u1_mb_mc_mode)
  ------------------
  |  Branch (178:12): [True: 12.1k, False: 10.7k]
  ------------------
  179|  12.1k|            *pu1_col_info++ = (u1_mb_mc_mode << 6);
  180|  22.8k|    }
  181|       |
  182|  24.3k|    {
  183|  24.3k|        UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
  184|  24.3k|        UWORD8 uc_field = ps_cur_mb_info->u1_mb_field_decodingflag;
  185|  24.3k|        UWORD8 *pu1_num_ref_idx_lx_active =
  186|  24.3k|                        ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active;
  187|  24.3k|        const UWORD8 u1_mbaff_field = (u1_mbaff & uc_field);
  188|  24.3k|        UWORD8 u4_num_ref_idx_lx_active;
  189|       |
  190|  24.3k|        u4_num_ref_idx_lx_active = (pu1_num_ref_idx_lx_active[0]
  191|  24.3k|                        << u1_mbaff_field) - 1;
  192|       |
  193|  24.3k|        if(u4_num_ref_idx_lx_active)
  ------------------
  |  Branch (193:12): [True: 11.0k, False: 13.2k]
  ------------------
  194|  11.0k|        {
  195|  11.0k|            if(1 == u4_num_ref_idx_lx_active)
  ------------------
  |  Branch (195:16): [True: 1.89k, False: 9.19k]
  ------------------
  196|  1.89k|                ih264d_parse_bmb_ref_index_cavlc_range1(
  197|  1.89k|                                u1_num_mb_part, ps_bitstrm, pi1_ref_idx[0],
  198|  1.89k|                                u4_num_ref_idx_lx_active);
  199|  9.19k|            else
  200|  9.19k|            {
  201|  9.19k|                ret = ih264d_parse_bmb_ref_index_cavlc(u1_num_mb_part, ps_bitstrm,
  202|  9.19k|                                                 pi1_ref_idx[0],
  203|  9.19k|                                                 u4_num_ref_idx_lx_active);
  204|  9.19k|                if(ret != OK)
  ------------------
  |  |  114|  9.19k|#define OK        0
  ------------------
  |  Branch (204:20): [True: 356, False: 8.83k]
  ------------------
  205|    356|                    return ret;
  206|  9.19k|            }
  207|  11.0k|        }
  208|       |
  209|  23.9k|        u4_num_ref_idx_lx_active = (pu1_num_ref_idx_lx_active[1]
  210|  23.9k|                        << u1_mbaff_field) - 1;
  211|       |
  212|  23.9k|        if(u4_num_ref_idx_lx_active)
  ------------------
  |  Branch (212:12): [True: 12.2k, False: 11.7k]
  ------------------
  213|  12.2k|        {
  214|  12.2k|            if(1 == u4_num_ref_idx_lx_active)
  ------------------
  |  Branch (214:16): [True: 9.35k, False: 2.90k]
  ------------------
  215|  9.35k|                ih264d_parse_bmb_ref_index_cavlc_range1(
  216|  9.35k|                                u1_num_mb_part, ps_bitstrm, pi1_ref_idx[1],
  217|  9.35k|                                u4_num_ref_idx_lx_active);
  218|  2.90k|            else
  219|  2.90k|            {
  220|  2.90k|                ret = ih264d_parse_bmb_ref_index_cavlc(u1_num_mb_part, ps_bitstrm,
  221|  2.90k|                                                 pi1_ref_idx[1],
  222|  2.90k|                                                 u4_num_ref_idx_lx_active);
  223|  2.90k|                if(ret != OK)
  ------------------
  |  |  114|  2.90k|#define OK        0
  ------------------
  |  Branch (223:20): [True: 180, False: 2.72k]
  ------------------
  224|    180|                    return ret;
  225|  2.90k|            }
  226|  12.2k|        }
  227|  23.9k|    }
  228|       |
  229|       |    /* Read MotionVectors */
  230|  23.8k|    {
  231|  23.8k|        const UWORD8 * pu1_top_left_sub_mb_indx;
  232|       |
  233|  23.8k|        const UWORD8 * pu1_sub_mb_indx_mod =
  234|  23.8k|                        (const UWORD8 *)(gau1_ih264d_submb_indx_mod)
  235|  23.8k|                                        + (u1_sub_mb * 6);
  236|  23.8k|        const UWORD8 * pu1_sub_mb_partw = (const UWORD8 *)gau1_ih264d_submb_partw;
  237|  23.8k|        const UWORD8 * pu1_sub_mb_parth = (const UWORD8 *)gau1_ih264d_submb_parth;
  238|  23.8k|        const UWORD8 * pu1_num_sub_mb_part =
  239|  23.8k|                        (const UWORD8 *)gau1_ih264d_num_submb_part;
  240|  23.8k|        const UWORD8 * pu1_mb_partw = (const UWORD8 *)gau1_ih264d_mb_partw;
  241|  23.8k|        const UWORD8 * pu1_mb_parth = (const UWORD8 *)gau1_ih264d_mb_parth;
  242|  23.8k|        UWORD8 u1_p_idx = 0, u1_num_submb_part, uc_lx;
  243|  23.8k|        parse_part_params_t * ps_part;
  244|  23.8k|        mv_pred_t *ps_mv_start = ps_dec->ps_mv_cur + (u4_mb_num << 4);
  245|  23.8k|        UWORD8 u1_mb_part_wd, u1_mb_part_ht;
  246|       |
  247|       |        /* Initialisations */
  248|  23.8k|        ps_part = ps_dec->ps_part;
  249|       |        /* Default Initialization for Non subMb Case Mode */
  250|  23.8k|        u1_mb_part_wd = pu1_mb_partw[u1_mb_mc_mode];
  251|  23.8k|        u1_mb_part_ht = pu1_mb_parth[u1_mb_mc_mode];
  252|  23.8k|        u1_num_submb_part = 1;
  253|       |
  254|       |        /* Decoding the MV for the subMB */
  255|  71.4k|        for(uc_lx = 0; uc_lx < 2; uc_lx++)
  ------------------
  |  Branch (255:24): [True: 47.6k, False: 23.8k]
  ------------------
  256|  47.6k|        {
  257|  47.6k|            UWORD8 u1_sub_mb_num = 0, u1_pred_mode, uc_i;
  258|  47.6k|            UWORD32 u4_mb_mc_mode_tmp = u4_mb_mc_mode;
  259|  47.6k|            UWORD32 u4_mb_pred_mode_tmp = u4_mb_pred_mode;
  260|  47.6k|            UWORD16 u2_sub_mb_num = 0x028A; // for sub mb case
  261|  47.6k|            UWORD8 u1_b2 = uc_lx << 1;
  262|  47.6k|            u1_pred_mode = (uc_lx) ? PRED_L1 : PRED_L0;
  ------------------
  |  |  484|  23.8k|#define PRED_L1   2
  ------------------
                          u1_pred_mode = (uc_lx) ? PRED_L1 : PRED_L0;
  ------------------
  |  |  483|  71.4k|#define PRED_L0   1
  ------------------
  |  Branch (262:28): [True: 23.8k, False: 23.8k]
  ------------------
  263|  47.6k|            pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + (u1_mb_mc_mode << 1);
  264|       |
  265|   126k|            for(uc_i = 0; uc_i < u1_num_mb_part; uc_i++)
  ------------------
  |  Branch (265:27): [True: 79.2k, False: 47.6k]
  ------------------
  266|  79.2k|            {
  267|  79.2k|                UWORD8 u1_mb_mc_mode, uc_j;
  268|  79.2k|                UWORD8 i1_pred = u4_mb_pred_mode_tmp >> 24;
  269|  79.2k|                u1_mb_mc_mode = u4_mb_mc_mode_tmp >> 24;
  270|  79.2k|                u4_mb_pred_mode_tmp <<= 8;
  271|  79.2k|                u4_mb_mc_mode_tmp <<= 8;
  272|       |                /* subMb prediction mode */
  273|  79.2k|                if(u1_sub_mb)
  ------------------
  |  Branch (273:20): [True: 10.1k, False: 69.0k]
  ------------------
  274|  10.1k|                {
  275|       |
  276|  10.1k|                    u1_mb_part_wd = pu1_sub_mb_partw[u1_mb_mc_mode];
  277|  10.1k|                    u1_mb_part_ht = pu1_sub_mb_parth[u1_mb_mc_mode];
  278|  10.1k|                    u1_sub_mb_num = u2_sub_mb_num >> 12;
  279|  10.1k|                    u1_num_submb_part = pu1_num_sub_mb_part[u1_mb_mc_mode];
  280|  10.1k|                    pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod
  281|  10.1k|                                    + (u1_mb_mc_mode << 1);
  282|  10.1k|                    u2_sub_mb_num <<= 4;
  283|  10.1k|                }
  284|   160k|                for(uc_j = 0; uc_j < u1_num_submb_part;
  ------------------
  |  Branch (284:31): [True: 81.0k, False: 79.2k]
  ------------------
  285|  81.0k|                                uc_j++, pu1_top_left_sub_mb_indx++)
  286|  81.0k|                {
  287|  81.0k|                    mv_pred_t * ps_mv;
  288|  81.0k|                    u1_sub_mb_num = u1_sub_mb_num + *pu1_top_left_sub_mb_indx;
  289|  81.0k|                    ps_mv = ps_mv_start + u1_sub_mb_num;
  290|       |
  291|       |                    /* Storing Info for partitions, writing only once */
  292|  81.0k|                    if(uc_lx)
  ------------------
  |  Branch (292:24): [True: 40.5k, False: 40.5k]
  ------------------
  293|  40.5k|                    {
  294|  40.5k|                        ps_part->u1_is_direct = (!i1_pred);
  295|  40.5k|                        ps_part->u1_pred_mode = i1_pred;
  296|  40.5k|                        ps_part->u1_sub_mb_num = u1_sub_mb_num;
  297|  40.5k|                        ps_part->u1_partheight = u1_mb_part_ht;
  298|  40.5k|                        ps_part->u1_partwidth = u1_mb_part_wd;
  299|       |                        /* Increment partition Index */
  300|  40.5k|                        u1_p_idx++;
  301|  40.5k|                        ps_part++;
  302|  40.5k|                    }
  303|       |
  304|  81.0k|                    if(i1_pred & u1_pred_mode)
  ------------------
  |  Branch (304:24): [True: 39.6k, False: 41.3k]
  ------------------
  305|  39.6k|                    {
  306|  39.6k|                        WORD16 i2_mvx, i2_mvy;
  307|       |
  308|       |//inlining ih264d_sev
  309|  39.6k|                        {
  310|  39.6k|                            UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
  311|  39.6k|                            UWORD32 u4_word, u4_ldz, u4_abs_val;
  312|       |
  313|       |                            /***************************************************************/
  314|       |                            /* Find leading zeros in next 32 bits                          */
  315|       |                            /***************************************************************/
  316|  39.6k|                            NEXTBITS_32(u4_word, u4_bitstream_offset,
  ------------------
  |  |  150|  39.6k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  39.6k|{                                                                           \
  |  |  152|  39.6k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  39.6k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  39.6k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  39.6k|                                                                            \
  |  |  156|  39.6k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  39.6k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 37.7k, False: 1.93k]
  |  |  ------------------
  |  |  158|  39.6k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  37.7k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  39.6k|}
  ------------------
  317|  39.6k|                                        pu4_bitstrm_buf);
  318|  39.6k|                            u4_ldz = CLZ(u4_word);
  319|       |
  320|       |                            /* Flush the ps_bitstrm */
  321|  39.6k|                            u4_bitstream_offset += (u4_ldz + 1);
  322|       |
  323|       |                            /* Read the suffix from the ps_bitstrm */
  324|  39.6k|                            u4_word = 0;
  325|  39.6k|                            if(u4_ldz)
  ------------------
  |  Branch (325:32): [True: 12.9k, False: 26.6k]
  ------------------
  326|  12.9k|                                GETBITS(u4_word, u4_bitstream_offset,
  ------------------
  |  |  120|  12.9k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  12.9k|{                                                                           \
  |  |  122|  12.9k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  12.9k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  12.9k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  12.9k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  12.9k|                                                                            \
  |  |  127|  12.9k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 12.2k, False: 720]
  |  |  ------------------
  |  |  128|  12.9k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  12.2k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  12.9k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  12.9k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  12.9k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  12.9k|}                                                                           \
  ------------------
  327|  39.6k|                                        pu4_bitstrm_buf, u4_ldz);
  328|       |
  329|  39.6k|                            *pu4_bitstrm_ofst = u4_bitstream_offset;
  330|  39.6k|                            u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1;
  331|       |
  332|  39.6k|                            if(u4_word & 0x1)
  ------------------
  |  Branch (332:32): [True: 6.91k, False: 32.7k]
  ------------------
  333|  6.91k|                                i2_mvx = (-(WORD32)u4_abs_val);
  334|  32.7k|                            else
  335|  32.7k|                                i2_mvx = (u4_abs_val);
  336|  39.6k|                        }
  337|       |//inlinined ih264d_sev
  338|       |
  339|       |//inlining ih264d_sev
  340|  39.6k|                        {
  341|  39.6k|                            UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
  342|  39.6k|                            UWORD32 u4_word, u4_ldz, u4_abs_val;
  343|       |
  344|       |                            /***************************************************************/
  345|       |                            /* Find leading zeros in next 32 bits                          */
  346|       |                            /***************************************************************/
  347|  39.6k|                            NEXTBITS_32(u4_word, u4_bitstream_offset,
  ------------------
  |  |  150|  39.6k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  39.6k|{                                                                           \
  |  |  152|  39.6k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  39.6k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  39.6k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  39.6k|                                                                            \
  |  |  156|  39.6k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  39.6k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 38.3k, False: 1.29k]
  |  |  ------------------
  |  |  158|  39.6k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  38.3k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  39.6k|}
  ------------------
  348|  39.6k|                                        pu4_bitstrm_buf);
  349|  39.6k|                            u4_ldz = CLZ(u4_word);
  350|       |
  351|       |                            /* Flush the ps_bitstrm */
  352|  39.6k|                            u4_bitstream_offset += (u4_ldz + 1);
  353|       |
  354|       |                            /* Read the suffix from the ps_bitstrm */
  355|  39.6k|                            u4_word = 0;
  356|  39.6k|                            if(u4_ldz)
  ------------------
  |  Branch (356:32): [True: 18.3k, False: 21.3k]
  ------------------
  357|  18.3k|                                GETBITS(u4_word, u4_bitstream_offset,
  ------------------
  |  |  120|  18.3k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  18.3k|{                                                                           \
  |  |  122|  18.3k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  18.3k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  18.3k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  18.3k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  18.3k|                                                                            \
  |  |  127|  18.3k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 17.5k, False: 727]
  |  |  ------------------
  |  |  128|  18.3k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  17.5k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  18.3k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  18.3k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  18.3k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  18.3k|}                                                                           \
  ------------------
  358|  39.6k|                                        pu4_bitstrm_buf, u4_ldz);
  359|       |
  360|  39.6k|                            *pu4_bitstrm_ofst = u4_bitstream_offset;
  361|  39.6k|                            u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1;
  362|       |
  363|  39.6k|                            if(u4_word & 0x1)
  ------------------
  |  Branch (363:32): [True: 6.53k, False: 33.1k]
  ------------------
  364|  6.53k|                                i2_mvy = (-(WORD32)u4_abs_val);
  365|  33.1k|                            else
  366|  33.1k|                                i2_mvy = (u4_abs_val);
  367|  39.6k|                        }
  368|       |//inlinined ih264d_sev
  369|       |
  370|       |                        /* Storing Mv residuals */
  371|  39.6k|                        ps_mv->i2_mv[u1_b2] = i2_mvx;
  372|  39.6k|                        ps_mv->i2_mv[u1_b2 + 1] = i2_mvy;
  373|  39.6k|                    }
  374|  81.0k|                }
  375|  79.2k|            }
  376|  47.6k|        }
  377|       |        /* write back to the scratch partition info */
  378|  23.8k|        ps_dec->ps_part = ps_part;
  379|  23.8k|        ps_parse_mb_data->u1_num_part = u1_sub_mb ? u1_p_idx : u1_num_mb_part;
  ------------------
  |  Branch (379:41): [True: 1.27k, False: 22.5k]
  ------------------
  380|       |
  381|  23.8k|    }
  382|  23.8k|    return OK;
  ------------------
  |  |  114|  23.8k|#define OK        0
  ------------------
  383|  23.9k|}
ih264d_parse_bmb_non_direct_cabac:
  402|   118k|{
  403|       |    /* Loads from ps_dec */
  404|   118k|    decoding_envirnoment_t * ps_cab_env = &ps_dec->s_cab_dec_env;
  405|   118k|    dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm;
  406|   118k|    ctxt_inc_mb_info_t *p_curr_ctxt = ps_dec->ps_curr_ctxt_mb_info;
  407|   118k|    parse_pmbarams_t * ps_parse_mb_data = ps_dec->ps_parse_mb_data
  408|   118k|                    + u4_num_mbsNby2;
  409|       |
  410|       |    /* table pointer loads */
  411|   118k|    const UWORD8 * pu1_sub_mb_pred_modes = (UWORD8 *)(gau1_ih264d_submb_pred_modes)
  412|   118k|                    + 4;
  413|   118k|    const UWORD8 (*pu1_mb_pred_modes)[32] =
  414|   118k|                    (const UWORD8 (*)[32])gau1_ih264d_mb_pred_modes;
  415|   118k|    const UWORD8 *pu1_num_mb_part = (const UWORD8 *)gau1_ih264d_num_mb_part;
  416|   118k|    const UWORD8 *pu1_sub_mb_mc_mode = (UWORD8 *)(gau1_ih264d_submb_mc_mode) + 4;
  417|       |
  418|   118k|    const UWORD8 u1_mb_type = ps_cur_mb_info->u1_mb_type;
  419|   118k|    UWORD8 * pu1_col_info = ps_parse_mb_data->u1_col_info;
  420|   118k|    WORD8 *pi1_ref_idx_l0 = &ps_parse_mb_data->i1_ref_idx[0][0];
  421|   118k|    WORD8 *pi1_ref_idx_l1 = &ps_parse_mb_data->i1_ref_idx[1][0];
  422|   118k|    UWORD8 u1_dec_ref_l0, u1_dec_ref_l1;
  423|       |
  424|   118k|    UWORD8 u1_num_mb_part, u1_mb_mc_mode, u1_sub_mb, u1_mbpred_mode = 5
  425|   118k|                    + u1_mb_type;
  426|   118k|    UWORD32 u4_mb_mc_mode = 0, u4_mb_pred_mode = 0;
  427|   118k|    WORD32 ret;
  428|       |
  429|   118k|    p_curr_ctxt->u1_mb_type = CAB_NON_BD16x16;
  ------------------
  |  |  397|   118k|#define CAB_NON_BD16x16   0x05 /* 0000 0101 */
  ------------------
  430|   118k|    u1_sub_mb = !(u1_mb_type ^ B_8x8);
  ------------------
  |  |  480|   118k|#define B_8x8    22
  ------------------
  431|       |
  432|   118k|    {
  433|   118k|        UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
  434|   118k|        UWORD8 *pu1_num_ref_idx_lx_active =
  435|   118k|                        ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active;
  436|   118k|        UWORD8 uc_field = ps_cur_mb_info->u1_mb_field_decodingflag;
  437|   118k|        UWORD8 u1_mbaff_field = (u1_mbaff & uc_field);
  438|   118k|        u1_dec_ref_l0 = (pu1_num_ref_idx_lx_active[0] << u1_mbaff_field) - 1;
  439|   118k|        u1_dec_ref_l1 = (pu1_num_ref_idx_lx_active[1] << u1_mbaff_field) - 1;
  440|   118k|    }
  441|       |
  442|   118k|    if(u1_sub_mb)
  ------------------
  |  Branch (442:8): [True: 44.0k, False: 74.2k]
  ------------------
  443|  44.0k|    {
  444|  44.0k|        const UWORD8 u1_colz = ((PRED_8x8) << 6);
  ------------------
  |  |  453|  44.0k|#define PRED_8x8    3
  ------------------
  445|  44.0k|        UWORD8 uc_i;
  446|  44.0k|        u1_mb_mc_mode = 0;
  447|  44.0k|        u1_num_mb_part = 4;
  448|       |        /* Reading the subMB type */
  449|   220k|        for(uc_i = 0; uc_i < 4; uc_i++)
  ------------------
  |  Branch (449:23): [True: 176k, False: 44.0k]
  ------------------
  450|   176k|        {
  451|   176k|            UWORD8 u1_sub_mb_mode, u1_subMbPredModes;
  452|   176k|            u1_sub_mb_mode = ih264d_parse_submb_type_cabac(
  453|   176k|                            1, ps_cab_env, ps_bitstrm,
  454|   176k|                            ps_dec->p_sub_mb_type_t);
  455|       |
  456|   176k|            if(u1_sub_mb_mode > 12)
  ------------------
  |  Branch (456:16): [True: 0, False: 176k]
  ------------------
  457|      0|                return ERROR_SUB_MB_TYPE;
  458|       |
  459|   176k|            u1_subMbPredModes = pu1_sub_mb_pred_modes[u1_sub_mb_mode];
  460|   176k|            u4_mb_mc_mode = (u4_mb_mc_mode << 8) | pu1_sub_mb_mc_mode[u1_sub_mb_mode];
  461|   176k|            u4_mb_pred_mode = (u4_mb_pred_mode << 8) | u1_subMbPredModes;
  462|   176k|            *pi1_ref_idx_l0++ =
  463|   176k|                            (u1_subMbPredModes & PRED_L0) ? u1_dec_ref_l0 : -1;
  ------------------
  |  |  483|   176k|#define PRED_L0   1
  ------------------
  |  Branch (463:29): [True: 153k, False: 22.6k]
  ------------------
  464|   176k|            *pi1_ref_idx_l1++ =
  465|   176k|                            (u1_subMbPredModes & PRED_L1) ? u1_dec_ref_l1 : -1;
  ------------------
  |  |  484|   176k|#define PRED_L1   2
  ------------------
  |  Branch (465:29): [True: 36.4k, False: 139k]
  ------------------
  466|   176k|            COPYTHECONTEXT("sub_mb_type", u1_sub_mb_mode);
  467|       |            /* Storing collocated Mb and SubMb mode information */
  468|   176k|            *pu1_col_info++ =
  469|   176k|                            (u1_colz | (pu1_sub_mb_mc_mode[u1_sub_mb_mode] << 4));
  470|   176k|            if(u1_sub_mb_mode != B_DIRECT_8x8)
  ------------------
  |  |  465|   176k|#define B_DIRECT_8x8    0
  ------------------
  |  Branch (470:16): [True: 165k, False: 11.1k]
  ------------------
  471|   165k|            {
  472|   165k|                if(u1_sub_mb_mode > B_BI_8x8)
  ------------------
  |  |  468|   165k|#define B_BI_8x8        3
  ------------------
  |  Branch (472:20): [True: 10.5k, False: 154k]
  ------------------
  473|  10.5k|                {
  474|  10.5k|                    ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag = 0;
  475|  10.5k|                }
  476|   165k|            }
  477|  11.1k|            else if(!ps_dec->s_high_profile.u1_direct_8x8_inference_flag)
  ------------------
  |  Branch (477:21): [True: 10.0k, False: 1.08k]
  ------------------
  478|  10.0k|            {
  479|  10.0k|                ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag = 0;
  480|  10.0k|            }
  481|   176k|        }
  482|  44.0k|        pi1_ref_idx_l0 -= 4;
  483|  44.0k|        pi1_ref_idx_l1 -= 4;
  484|  44.0k|    }
  485|  74.2k|    else
  486|  74.2k|    {
  487|  74.2k|        UWORD8 u1_mb_pred_mode_part0 = pu1_mb_pred_modes[0][u1_mbpred_mode];
  488|  74.2k|        UWORD8 u1_mb_pred_mode_part1 = pu1_mb_pred_modes[1][u1_mbpred_mode];
  489|  74.2k|        u1_mb_mc_mode = ps_cur_mb_info->u1_mb_mc_mode;
  490|  74.2k|        u1_num_mb_part = pu1_num_mb_part[u1_mb_mc_mode];
  491|       |        /* Storing collocated Mb and SubMb mode information */
  492|  74.2k|        *pu1_col_info++ = (u1_mb_mc_mode << 6);
  493|  74.2k|        if(u1_mb_mc_mode)
  ------------------
  |  Branch (493:12): [True: 46.9k, False: 27.3k]
  ------------------
  494|  46.9k|            *pu1_col_info++ = (u1_mb_mc_mode << 6);
  495|  74.2k|        u4_mb_mc_mode = u1_mb_mc_mode | (u1_mb_mc_mode << 8);
  496|  74.2k|        u4_mb_mc_mode <<= 16;
  497|  74.2k|        u4_mb_pred_mode = ((u1_mb_pred_mode_part0 << 8) | u1_mb_pred_mode_part1) << 16;
  498|       |
  499|  74.2k|        *pi1_ref_idx_l0++ = (u1_mb_pred_mode_part0 & PRED_L0) ? u1_dec_ref_l0 : -1;
  ------------------
  |  |  483|  74.2k|#define PRED_L0   1
  ------------------
  |  Branch (499:29): [True: 44.6k, False: 29.6k]
  ------------------
  500|  74.2k|        *pi1_ref_idx_l0-- = (u1_mb_pred_mode_part1 & PRED_L0) ? u1_dec_ref_l0 : -1;
  ------------------
  |  |  483|  74.2k|#define PRED_L0   1
  ------------------
  |  Branch (500:29): [True: 65.4k, False: 8.78k]
  ------------------
  501|  74.2k|        *pi1_ref_idx_l1++ = (u1_mb_pred_mode_part0 & PRED_L1) ? u1_dec_ref_l1 : -1;
  ------------------
  |  |  484|  74.2k|#define PRED_L1   2
  ------------------
  |  Branch (501:29): [True: 40.6k, False: 33.6k]
  ------------------
  502|  74.2k|        *pi1_ref_idx_l1-- = (u1_mb_pred_mode_part1 & PRED_L1) ? u1_dec_ref_l1 : -1;
  ------------------
  |  |  484|  74.2k|#define PRED_L1   2
  ------------------
  |  Branch (502:29): [True: 66.0k, False: 8.21k]
  ------------------
  503|  74.2k|    }
  504|   118k|    {
  505|   118k|        WORD8 *pi1_lft_cxt = ps_dec->pi1_left_ref_idx_ctxt_inc;
  506|   118k|        WORD8 *pi1_top_cxt = p_curr_ctxt->i1_ref_idx;
  507|       |
  508|   118k|        ret = ih264d_parse_ref_idx_cabac(u1_num_mb_part, 0, u1_dec_ref_l0,
  509|   118k|                                   u1_mb_mc_mode, pi1_ref_idx_l0, pi1_lft_cxt,
  510|   118k|                                   pi1_top_cxt, ps_cab_env, ps_bitstrm,
  511|   118k|                                   ps_dec->p_ref_idx_t);
  512|   118k|        if(ret != OK)
  ------------------
  |  |  114|   118k|#define OK        0
  ------------------
  |  Branch (512:12): [True: 200, False: 118k]
  ------------------
  513|    200|            return ret;
  514|       |
  515|   118k|        ret = ih264d_parse_ref_idx_cabac(u1_num_mb_part, 2, u1_dec_ref_l1,
  516|   118k|                                   u1_mb_mc_mode, pi1_ref_idx_l1, pi1_lft_cxt,
  517|   118k|                                   pi1_top_cxt, ps_cab_env, ps_bitstrm,
  518|   118k|                                   ps_dec->p_ref_idx_t);
  519|   118k|        if(ret != OK)
  ------------------
  |  |  114|   118k|#define OK        0
  ------------------
  |  Branch (519:12): [True: 192, False: 117k]
  ------------------
  520|    192|            return ret;
  521|   118k|    }
  522|       |    /* Read MotionVectors */
  523|   117k|    {
  524|   117k|        const UWORD8 *pu1_top_left_sub_mb_indx;
  525|   117k|        UWORD8 uc_j, uc_lx;
  526|   117k|        UWORD8 u1_mb_part_wd, u1_mb_part_ht;
  527|       |
  528|   117k|        const UWORD8 *pu1_sub_mb_indx_mod =
  529|   117k|                        (const UWORD8 *)gau1_ih264d_submb_indx_mod
  530|   117k|                                        + (u1_sub_mb * 6);
  531|   117k|        const UWORD8 *pu1_sub_mb_partw = (const UWORD8 *)gau1_ih264d_submb_partw;
  532|   117k|        const UWORD8 *pu1_sub_mb_parth = (const UWORD8 *)gau1_ih264d_submb_parth;
  533|   117k|        const UWORD8 *pu1_num_sub_mb_part =
  534|   117k|                        (const UWORD8 *)gau1_ih264d_num_submb_part;
  535|   117k|        const UWORD8 *pu1_mb_partw = (const UWORD8 *)gau1_ih264d_mb_partw;
  536|   117k|        const UWORD8 *pu1_mb_parth = (const UWORD8 *)gau1_ih264d_mb_parth;
  537|       |
  538|   117k|        UWORD8 u1_p_idx = 0;
  539|   117k|        UWORD8 u1_num_submb_part;
  540|   117k|        parse_part_params_t *ps_part;
  541|       |        /* Initialisations */
  542|   117k|        mv_pred_t *ps_mv_start = ps_dec->ps_mv_cur + (u4_mb_num << 4);
  543|   117k|        ps_part = ps_dec->ps_part;
  544|       |
  545|       |        /* Default initialization for non subMb case */
  546|   117k|        u1_mb_part_wd = pu1_mb_partw[u1_mb_mc_mode];
  547|   117k|        u1_mb_part_ht = pu1_mb_parth[u1_mb_mc_mode];
  548|   117k|        u1_num_submb_part = 1;
  549|       |
  550|       |        /* Decoding the MV for the subMB */
  551|   353k|        for(uc_lx = 0; uc_lx < 2; uc_lx++)
  ------------------
  |  Branch (551:24): [True: 235k, False: 117k]
  ------------------
  552|   235k|        {
  553|   235k|            UWORD32 u4_sub_mb_num = 0;
  554|   235k|            UWORD32 u4_mb_pred_mode_tmp = u4_mb_pred_mode;
  555|   235k|            UWORD32 u4_mb_mc_mode_tmp = u4_mb_mc_mode;
  556|   235k|            UWORD8 u1_mb_mc_mode_1, u1_pred_mode, uc_i;
  557|   235k|            UWORD16 u2_sub_mb_num = 0x028A;
  558|   235k|            UWORD8 u1_b2 = uc_lx << 1;
  559|   235k|            u1_pred_mode = (uc_lx) ? PRED_L1 : PRED_L0;
  ------------------
  |  |  484|   117k|#define PRED_L1   2
  ------------------
                          u1_pred_mode = (uc_lx) ? PRED_L1 : PRED_L0;
  ------------------
  |  |  483|   353k|#define PRED_L0   1
  ------------------
  |  Branch (559:28): [True: 117k, False: 117k]
  ------------------
  560|       |            /* Default for Cabac */
  561|   235k|            pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + (u1_mb_mc_mode << 1);
  562|   829k|            for(uc_i = 0; uc_i < u1_num_mb_part; uc_i++)
  ------------------
  |  Branch (562:27): [True: 593k, False: 235k]
  ------------------
  563|   593k|            {
  564|       |
  565|   593k|                WORD8 i1_pred = (UWORD8)(u4_mb_pred_mode_tmp >> 24);
  566|   593k|                u1_mb_mc_mode_1 = (UWORD8)(u4_mb_mc_mode_tmp >> 24);
  567|   593k|                u4_mb_pred_mode_tmp <<= 8;
  568|   593k|                u4_mb_mc_mode_tmp <<= 8;
  569|       |
  570|       |                /* subMb prediction mode */
  571|   593k|                if(u1_sub_mb)
  ------------------
  |  Branch (571:20): [True: 352k, False: 241k]
  ------------------
  572|   352k|                {
  573|   352k|                    u1_mb_part_wd = pu1_sub_mb_partw[u1_mb_mc_mode_1];
  574|   352k|                    u1_mb_part_ht = pu1_sub_mb_parth[u1_mb_mc_mode_1];
  575|   352k|                    u4_sub_mb_num = u2_sub_mb_num >> 12;
  576|   352k|                    pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + (u1_mb_mc_mode_1 << 1);
  577|   352k|                    u1_num_submb_part = pu1_num_sub_mb_part[u1_mb_mc_mode_1];
  578|   352k|                    u2_sub_mb_num = u2_sub_mb_num << 4;
  579|   352k|                }
  580|       |
  581|  1.21M|                for(uc_j = 0; uc_j < u1_num_submb_part;
  ------------------
  |  Branch (581:31): [True: 619k, False: 593k]
  ------------------
  582|   619k|                                uc_j++, pu1_top_left_sub_mb_indx++)
  583|   619k|                {
  584|   619k|                    mv_pred_t *ps_mv;
  585|   619k|                    u4_sub_mb_num = u4_sub_mb_num + *pu1_top_left_sub_mb_indx;
  586|   619k|                    ps_mv = ps_mv_start + u4_sub_mb_num;
  587|       |
  588|       |                    /* Storing Info for partitions, writing only once */
  589|   619k|                    if(uc_lx)
  ------------------
  |  Branch (589:24): [True: 309k, False: 309k]
  ------------------
  590|   309k|                    {
  591|   309k|                        ps_part->u1_is_direct = (!i1_pred);
  592|   309k|                        ps_part->u1_pred_mode = i1_pred;
  593|   309k|                        ps_part->u1_sub_mb_num = u4_sub_mb_num;
  594|   309k|                        ps_part->u1_partheight = u1_mb_part_ht;
  595|   309k|                        ps_part->u1_partwidth = u1_mb_part_wd;
  596|       |
  597|       |                        /* Increment partition Index */
  598|   309k|                        u1_p_idx++;
  599|   309k|                        ps_part++;
  600|   309k|                    }
  601|       |
  602|   619k|                    ih264d_get_mvd_cabac(u4_sub_mb_num, u1_b2, u1_mb_part_wd,
  603|   619k|                                         u1_mb_part_ht,
  604|   619k|                                         (UWORD8)(i1_pred & u1_pred_mode), ps_dec,
  605|   619k|                                         ps_mv);
  606|   619k|                }
  607|   593k|            }
  608|   235k|        }
  609|       |        /* write back to the scratch partition info */
  610|       |
  611|   117k|        ps_dec->ps_part = ps_part;
  612|   117k|        ps_parse_mb_data->u1_num_part = u1_sub_mb ? u1_p_idx : u1_num_mb_part;
  ------------------
  |  Branch (612:41): [True: 44.0k, False: 73.9k]
  ------------------
  613|       |
  614|   117k|    }
  615|       |
  616|   117k|    return OK;
  ------------------
  |  |  114|   117k|#define OK        0
  ------------------
  617|   118k|}
ih264d_parse_bmb_cabac:
  634|   122k|{
  635|   122k|    UWORD8 u1_cbp;
  636|   122k|    deblk_mb_t * ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + u4_mb_num;
  637|   122k|    const UWORD8 *puc_mb_mc_mode = (const UWORD8 *)gau1_ih264d_mb_mc_mode;
  638|   122k|    UWORD8 u1_mb_type = ps_cur_mb_info->u1_mb_type;
  639|   122k|    ctxt_inc_mb_info_t *p_curr_ctxt = ps_dec->ps_curr_ctxt_mb_info;
  640|       |
  641|   122k|    WORD32 ret;
  642|   122k|    UWORD8 u1_Bdirect_tranform_read = 1;
  643|   122k|    ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag = 1;
  644|       |
  645|   122k|    ps_cur_mb_info->u1_mb_mc_mode = puc_mb_mc_mode[5 + u1_mb_type];
  646|       |
  647|   122k|    ps_cur_mb_info->u1_yuv_dc_block_flag = 0;
  648|       |
  649|   122k|    ps_cur_deblk_mb->u1_mb_type |= D_B_SLICE;
  ------------------
  |  |  384|   122k|#define D_B_SLICE         4
  ------------------
  650|   122k|    if(u1_mb_type != B_DIRECT)
  ------------------
  |  |  482|   122k|#define B_DIRECT  0
  ------------------
  |  Branch (650:8): [True: 118k, False: 4.27k]
  ------------------
  651|   118k|    {
  652|   118k|        ret = ih264d_parse_bmb_non_direct_cabac(ps_dec, ps_cur_mb_info, u4_mb_num,
  653|   118k|                                          u4_num_mbsNby2);
  654|   118k|        if(ret != OK)
  ------------------
  |  |  114|   118k|#define OK        0
  ------------------
  |  Branch (654:12): [True: 392, False: 117k]
  ------------------
  655|    392|            return ret;
  656|   118k|    }
  657|  4.27k|    else
  658|  4.27k|    {
  659|       |
  660|       |        /************ STORING PARTITION INFO ***********/
  661|  4.27k|        parse_part_params_t * ps_part_info;
  662|  4.27k|        ps_part_info = ps_dec->ps_part;
  663|  4.27k|        ps_part_info->u1_is_direct = PART_DIRECT_16x16;
  ------------------
  |  |  572|  4.27k|#define PART_DIRECT_16x16              2
  ------------------
  664|  4.27k|        ps_part_info->u1_sub_mb_num = 0;
  665|  4.27k|        ps_dec->ps_part++;
  666|  4.27k|        p_curr_ctxt->u1_mb_type = CAB_BD16x16;
  ------------------
  |  |  396|  4.27k|#define CAB_BD16x16       0x04 /* 0000 0100 */
  ------------------
  667|       |
  668|  4.27k|        MEMSET_16BYTES(&ps_dec->pu1_left_mv_ctxt_inc[0][0], 0);
  ------------------
  |  |  652|  4.27k|#define MEMSET_16BYTES(pu4_start,value)                         \
  |  |  653|  4.27k|{                                                               \
  |  |  654|  4.27k|    memset(pu4_start,value,16);                                 \
  |  |  655|  4.27k|}
  ------------------
  669|  4.27k|        memset(ps_dec->pi1_left_ref_idx_ctxt_inc, 0, 4);
  670|  4.27k|        MEMSET_16BYTES(p_curr_ctxt->u1_mv, 0);
  ------------------
  |  |  652|  4.27k|#define MEMSET_16BYTES(pu4_start,value)                         \
  |  |  653|  4.27k|{                                                               \
  |  |  654|  4.27k|    memset(pu4_start,value,16);                                 \
  |  |  655|  4.27k|}
  ------------------
  671|  4.27k|        memset(p_curr_ctxt->i1_ref_idx, 0, 4);
  672|       |
  673|       |        /* check whether transform8x8 u4_flag to be read or not */
  674|  4.27k|        u1_Bdirect_tranform_read =
  675|  4.27k|                        ps_dec->s_high_profile.u1_direct_8x8_inference_flag;
  676|  4.27k|    }
  677|       |
  678|       |    /* Read the Coded block pattern */
  679|   122k|    u1_cbp = (WORD8)ih264d_parse_ctx_cbp_cabac(ps_dec);
  680|   122k|    p_curr_ctxt->u1_cbp = u1_cbp;
  681|   122k|    ps_cur_mb_info->u1_cbp = u1_cbp;
  682|       |
  683|   122k|    if(u1_cbp > 47)
  ------------------
  |  Branch (683:8): [True: 0, False: 122k]
  ------------------
  684|      0|        return ERROR_CBP;
  685|       |
  686|   122k|    COPYTHECONTEXT("coded_block_pattern", u1_cbp);
  687|       |
  688|   122k|    ps_cur_mb_info->u1_tran_form8x8 = 0;
  689|   122k|    ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
  690|       |
  691|   122k|    if((ps_dec->s_high_profile.u1_transform8x8_present) && (u1_cbp & (0xf))
  ------------------
  |  Branch (691:8): [True: 5.32k, False: 116k]
  |  Branch (691:60): [True: 2.22k, False: 3.09k]
  ------------------
  692|  2.22k|                    && (ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag)
  ------------------
  |  Branch (692:24): [True: 1.85k, False: 368]
  ------------------
  693|  1.85k|                    && (u1_Bdirect_tranform_read))
  ------------------
  |  Branch (693:24): [True: 1.76k, False: 96]
  ------------------
  694|  1.76k|    {
  695|  1.76k|        ps_cur_mb_info->u1_tran_form8x8 = ih264d_parse_transform8x8flag_cabac(
  696|  1.76k|                        ps_dec, ps_cur_mb_info);
  697|  1.76k|        COPYTHECONTEXT("transform_size_8x8_flag", ps_cur_mb_info->u1_tran_form8x8);
  698|       |
  699|  1.76k|        ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = ps_cur_mb_info->u1_tran_form8x8;
  700|  1.76k|        p_curr_ctxt->u1_transform8x8_ctxt = ps_cur_mb_info->u1_tran_form8x8;
  701|  1.76k|    }
  702|   120k|    else
  703|   120k|    {
  704|   120k|        p_curr_ctxt->u1_transform8x8_ctxt = 0;
  705|   120k|    }
  706|       |
  707|   122k|    p_curr_ctxt->u1_intra_chroma_pred_mode = 0;
  708|   122k|    p_curr_ctxt->u1_yuv_dc_csbp &= 0xFE;
  709|   122k|    ps_dec->pu1_left_yuv_dc_csbp[0] &= 0x6;
  710|       |
  711|       |    /* Read mb_qp_delta */
  712|   122k|    if(u1_cbp)
  ------------------
  |  Branch (712:8): [True: 24.3k, False: 97.8k]
  ------------------
  713|  24.3k|    {
  714|  24.3k|        WORD8 c_temp;
  715|  24.3k|        ret = ih264d_parse_mb_qp_delta_cabac(ps_dec, &c_temp);
  716|  24.3k|        if(ret != OK)
  ------------------
  |  |  114|  24.3k|#define OK        0
  ------------------
  |  Branch (716:12): [True: 133, False: 24.2k]
  ------------------
  717|    133|            return ret;
  718|  24.2k|        COPYTHECONTEXT("mb_qp_delta", c_temp);
  719|  24.2k|        if(c_temp)
  ------------------
  |  Branch (719:12): [True: 1.93k, False: 22.3k]
  ------------------
  720|  1.93k|        {
  721|  1.93k|            ret = ih264d_update_qp(ps_dec, c_temp);
  722|  1.93k|            if(ret != OK)
  ------------------
  |  |  114|  1.93k|#define OK        0
  ------------------
  |  Branch (722:16): [True: 0, False: 1.93k]
  ------------------
  723|      0|                return ret;
  724|  1.93k|        }
  725|  24.2k|    }
  726|  97.8k|    else
  727|  97.8k|        ps_dec->i1_prev_mb_qp_delta = 0;
  728|       |
  729|   122k|    ih264d_parse_residual4x4_cabac(ps_dec, ps_cur_mb_info, 0);
  730|   122k|    if(EXCEED_OFFSET(ps_dec->ps_bitstrm))
  ------------------
  |  |   93|   122k|  (ps_bitstrm->u4_ofst > ps_bitstrm->u4_max_ofst)
  |  |  ------------------
  |  |  |  Branch (93:3): [True: 1.55k, False: 120k]
  |  |  ------------------
  ------------------
  731|  1.55k|        return ERROR_EOB_TERMINATE_T;
  732|   120k|    return OK;
  ------------------
  |  |  114|   120k|#define OK        0
  ------------------
  733|   122k|}
ih264d_parse_bmb_cavlc:
  749|  64.0k|{
  750|  64.0k|    UWORD32 u4_cbp;
  751|  64.0k|    deblk_mb_t * ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + u4_mb_num;
  752|  64.0k|    dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm;
  753|  64.0k|    UWORD32 * pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
  754|  64.0k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
  755|  64.0k|    const UWORD8 *puc_mb_mc_mode = (const UWORD8 *)gau1_ih264d_mb_mc_mode;
  756|  64.0k|    UWORD8 u1_mb_type = ps_cur_mb_info->u1_mb_type;
  757|       |
  758|  64.0k|    WORD32 ret;
  759|  64.0k|    UWORD8 u1_Bdirect_tranform_read = 1;
  760|  64.0k|    ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag = 1;
  761|  64.0k|    ps_cur_mb_info->u1_tran_form8x8 = 0;
  762|  64.0k|    ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
  763|       |
  764|  64.0k|    ps_cur_mb_info->u1_yuv_dc_block_flag = 0;
  765|       |
  766|  64.0k|    ps_cur_mb_info->u1_mb_mc_mode = puc_mb_mc_mode[5 + u1_mb_type];
  767|       |
  768|  64.0k|    ps_cur_deblk_mb->u1_mb_type |= D_B_SLICE;
  ------------------
  |  |  384|  64.0k|#define D_B_SLICE         4
  ------------------
  769|  64.0k|    if(u1_mb_type != B_DIRECT)
  ------------------
  |  |  482|  64.0k|#define B_DIRECT  0
  ------------------
  |  Branch (769:8): [True: 24.4k, False: 39.5k]
  ------------------
  770|  24.4k|    {
  771|  24.4k|        ret = ih264d_parse_bmb_non_direct_cavlc(ps_dec, ps_cur_mb_info, u4_mb_num,
  772|  24.4k|                                          u4_num_mbsNby2);
  773|  24.4k|        if(ret != OK)
  ------------------
  |  |  114|  24.4k|#define OK        0
  ------------------
  |  Branch (773:12): [True: 667, False: 23.8k]
  ------------------
  774|    667|            return ret;
  775|  24.4k|    }
  776|  39.5k|    else
  777|  39.5k|    {
  778|       |        /************ STORING PARTITION INFO ***********/
  779|  39.5k|        parse_part_params_t * ps_part_info;
  780|  39.5k|        ps_part_info = ps_dec->ps_part;
  781|  39.5k|        ps_part_info->u1_is_direct = PART_DIRECT_16x16;
  ------------------
  |  |  572|  39.5k|#define PART_DIRECT_16x16              2
  ------------------
  782|  39.5k|        ps_part_info->u1_sub_mb_num = 0;
  783|  39.5k|        ps_dec->ps_part++;
  784|       |        /* check whether transform8x8 u4_flag to be read or not */
  785|  39.5k|        u1_Bdirect_tranform_read =
  786|  39.5k|                        ps_dec->s_high_profile.u1_direct_8x8_inference_flag;
  787|  39.5k|    }
  788|       |
  789|       |    /* Read the Coded block pattern */
  790|  63.3k|    {
  791|  63.3k|        const UWORD8 * puc_CbpInter = gau1_ih264d_cbp_inter;
  792|       |//Inlined ih264d_uev
  793|  63.3k|        UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
  794|  63.3k|        UWORD32 u4_word, u4_ldz;
  795|       |
  796|       |        /***************************************************************/
  797|       |        /* Find leading zeros in next 32 bits                          */
  798|       |        /***************************************************************/
  799|  63.3k|        NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  63.3k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  63.3k|{                                                                           \
  |  |  152|  63.3k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  63.3k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  63.3k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  63.3k|                                                                            \
  |  |  156|  63.3k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  63.3k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 61.5k, False: 1.82k]
  |  |  ------------------
  |  |  158|  63.3k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  61.5k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  63.3k|}
  ------------------
  800|  63.3k|        u4_ldz = CLZ(u4_word);
  801|       |        /* Flush the ps_bitstrm */
  802|  63.3k|        u4_bitstream_offset += (u4_ldz + 1);
  803|       |        /* Read the suffix from the ps_bitstrm */
  804|  63.3k|        u4_word = 0;
  805|  63.3k|        if(u4_ldz)
  ------------------
  |  Branch (805:12): [True: 15.1k, False: 48.2k]
  ------------------
  806|  15.1k|            GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  15.1k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  15.1k|{                                                                           \
  |  |  122|  15.1k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  15.1k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  15.1k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  15.1k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  15.1k|                                                                            \
  |  |  127|  15.1k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 14.4k, False: 709]
  |  |  ------------------
  |  |  128|  15.1k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  14.4k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  15.1k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  15.1k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  15.1k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  15.1k|}                                                                           \
  ------------------
  807|  63.3k|        *pu4_bitstrm_ofst = u4_bitstream_offset;
  808|  63.3k|        u4_cbp = ((1 << u4_ldz) + u4_word - 1);
  809|       |//Inlined ih264d_uev
  810|  63.3k|        if(u4_cbp > 47)
  ------------------
  |  Branch (810:12): [True: 1.14k, False: 62.2k]
  ------------------
  811|  1.14k|            return ERROR_CBP;
  812|  62.2k|        u4_cbp = puc_CbpInter[u4_cbp];
  813|       |
  814|  62.2k|        if((ps_dec->s_high_profile.u1_transform8x8_present) && (u4_cbp & (0xf))
  ------------------
  |  Branch (814:12): [True: 4.22k, False: 57.9k]
  |  Branch (814:64): [True: 1.83k, False: 2.39k]
  ------------------
  815|  1.83k|                        && (ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag)
  ------------------
  |  Branch (815:28): [True: 1.63k, False: 203]
  ------------------
  816|  1.63k|                        && (u1_Bdirect_tranform_read))
  ------------------
  |  Branch (816:28): [True: 1.02k, False: 603]
  ------------------
  817|  1.02k|        {
  818|  1.02k|            ps_cur_mb_info->u1_tran_form8x8 = ih264d_get_bit_h264(ps_bitstrm);
  819|  1.02k|            COPYTHECONTEXT("transform_size_8x8_flag", ps_cur_mb_info->u1_tran_form8x8);
  820|  1.02k|            ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = ps_cur_mb_info->u1_tran_form8x8;
  821|  1.02k|        }
  822|       |
  823|  62.2k|    }
  824|       |
  825|      0|    COPYTHECONTEXT("coded_block_pattern", u4_cbp);
  826|  62.2k|    ps_cur_mb_info->u1_cbp = u4_cbp;
  827|       |
  828|       |    /* Read mb_qp_delta */
  829|  62.2k|    if(u4_cbp)
  ------------------
  |  Branch (829:8): [True: 14.0k, False: 48.2k]
  ------------------
  830|  14.0k|    {
  831|  14.0k|        WORD32 i_temp;
  832|       |//inlining ih264d_sev
  833|       |
  834|  14.0k|        UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
  835|  14.0k|        UWORD32 u4_word, u4_ldz, u4_abs_val;
  836|       |
  837|       |        /***************************************************************/
  838|       |        /* Find leading zeros in next 32 bits                          */
  839|       |        /***************************************************************/
  840|  14.0k|        NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  14.0k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  14.0k|{                                                                           \
  |  |  152|  14.0k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  14.0k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  14.0k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  14.0k|                                                                            \
  |  |  156|  14.0k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  14.0k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 13.5k, False: 460]
  |  |  ------------------
  |  |  158|  14.0k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  13.5k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  14.0k|}
  ------------------
  841|  14.0k|        u4_ldz = CLZ(u4_word);
  842|       |
  843|       |        /* Flush the ps_bitstrm */
  844|  14.0k|        u4_bitstream_offset += (u4_ldz + 1);
  845|       |
  846|       |        /* Read the suffix from the ps_bitstrm */
  847|  14.0k|        u4_word = 0;
  848|  14.0k|        if(u4_ldz)
  ------------------
  |  Branch (848:12): [True: 6.46k, False: 7.54k]
  ------------------
  849|  6.46k|            GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  6.46k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  6.46k|{                                                                           \
  |  |  122|  6.46k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  6.46k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  6.46k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  6.46k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  6.46k|                                                                            \
  |  |  127|  6.46k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 5.94k, False: 522]
  |  |  ------------------
  |  |  128|  6.46k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  5.94k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  6.46k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  6.46k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  6.46k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  6.46k|}                                                                           \
  ------------------
  850|       |
  851|  14.0k|        *pu4_bitstrm_ofst = u4_bitstream_offset;
  852|  14.0k|        u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1;
  853|       |
  854|  14.0k|        if(u4_word & 0x1)
  ------------------
  |  Branch (854:12): [True: 3.94k, False: 10.0k]
  ------------------
  855|  3.94k|            i_temp = (-(WORD32)u4_abs_val);
  856|  10.0k|        else
  857|  10.0k|            i_temp = (u4_abs_val);
  858|       |
  859|  14.0k|        if(i_temp < -26 || i_temp > 25)
  ------------------
  |  Branch (859:12): [True: 156, False: 13.8k]
  |  Branch (859:28): [True: 319, False: 13.5k]
  ------------------
  860|    475|            return ERROR_INV_RANGE_QP_T;
  861|       |//inlinined ih264d_sev
  862|  13.5k|        COPYTHECONTEXT("mb_qp_delta", i_temp);
  863|  13.5k|        if(i_temp)
  ------------------
  |  Branch (863:12): [True: 5.99k, False: 7.54k]
  ------------------
  864|  5.99k|        {
  865|  5.99k|            ret = ih264d_update_qp(ps_dec, (WORD8)i_temp);
  866|  5.99k|            if(ret != OK)
  ------------------
  |  |  114|  5.99k|#define OK        0
  ------------------
  |  Branch (866:16): [True: 0, False: 5.99k]
  ------------------
  867|      0|                return ret;
  868|  5.99k|        }
  869|       |
  870|  13.5k|        ret = ih264d_parse_residual4x4_cavlc(ps_dec, ps_cur_mb_info, 0);
  871|  13.5k|        if(ret != OK)
  ------------------
  |  |  114|  13.5k|#define OK        0
  ------------------
  |  Branch (871:12): [True: 1.20k, False: 12.3k]
  ------------------
  872|  1.20k|            return ret;
  873|  12.3k|        if(EXCEED_OFFSET(ps_bitstrm))
  ------------------
  |  |   93|  12.3k|  (ps_bitstrm->u4_ofst > ps_bitstrm->u4_max_ofst)
  |  |  ------------------
  |  |  |  Branch (93:3): [True: 629, False: 11.7k]
  |  |  ------------------
  ------------------
  874|    629|            return ERROR_EOB_TERMINATE_T;
  875|  12.3k|    }
  876|  48.2k|    else
  877|  48.2k|    {
  878|  48.2k|        ps_dec->i1_prev_mb_qp_delta = 0;
  879|  48.2k|        ih264d_update_nnz_for_skipmb(ps_dec, ps_cur_mb_info, CAVLC);
  ------------------
  |  |  338|  48.2k|#define CAVLC  0
  ------------------
  880|  48.2k|    }
  881|       |
  882|  59.9k|    return OK;
  ------------------
  |  |  114|  59.9k|#define OK        0
  ------------------
  883|  62.2k|}
ih264d_mv_pred_ref_tfr_nby2_bmb:
  888|   130k|{
  889|   130k|    parse_pmbarams_t * ps_mb_part_info;
  890|   130k|    parse_part_params_t * ps_part;
  891|   130k|    mv_pred_t *ps_mv_nmb, *ps_mv_nmb_start, *ps_mv_ntop, *ps_mv_ntop_start;
  892|   130k|    pic_buffer_t * ps_ref_frame;
  893|   130k|    UWORD8 u1_direct_mode_width;
  894|   130k|    UWORD32 i;
  895|   130k|    UWORD8 j;
  896|   130k|    dec_mb_info_t * ps_cur_mb_info;
  897|   130k|    const UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
  898|   130k|    UWORD8 u1_field;
  899|   130k|    WORD32 ret = 0;
  900|       |
  901|   130k|    ps_dec->i4_submb_ofst -= (WORD32)(u4_num_mbs - u4_mb_idx) << 4;
  902|   130k|    ps_mb_part_info = ps_dec->ps_parse_mb_data;
  903|   130k|    ps_part = ps_dec->ps_parse_part_params;
  904|       |
  905|       |    /* N/2 Mb MvPred and Transfer Setup Loop */
  906|  1.14M|    for(i = u4_mb_idx; i < u4_num_mbs; i++, ps_mb_part_info++)
  ------------------
  |  Branch (906:24): [True: 1.01M, False: 130k]
  ------------------
  907|  1.01M|    {
  908|  1.01M|        UWORD8 u1_colz = 0;
  909|  1.01M|        ps_dec->i4_submb_ofst += SUB_BLK_SIZE;
  ------------------
  |  |  562|  1.01M|#define SUB_BLK_SIZE                  ((SUB_BLK_WIDTH) * (SUB_BLK_WIDTH))
  |  |  ------------------
  |  |  |  |  560|  1.01M|#define SUB_BLK_WIDTH                 4
  |  |  ------------------
  |  |               #define SUB_BLK_SIZE                  ((SUB_BLK_WIDTH) * (SUB_BLK_WIDTH))
  |  |  ------------------
  |  |  |  |  560|  1.01M|#define SUB_BLK_WIDTH                 4
  |  |  ------------------
  ------------------
  910|       |        /* Restore the slice scratch MbX and MbY context */
  911|  1.01M|        ps_cur_mb_info = ps_dec->ps_nmb_info + i;
  912|       |
  913|       |
  914|  1.01M|        u1_field = ps_cur_mb_info->u1_mb_field_decodingflag;
  915|       |
  916|  1.01M|        ps_mv_nmb_start = ps_dec->ps_mv_cur + (i << 4);
  917|  1.01M|        ps_dec->u2_mbx = ps_cur_mb_info->u2_mbx;
  918|  1.01M|        ps_dec->u2_mby = ps_cur_mb_info->u2_mby;
  919|  1.01M|        ps_dec->u1_currB_type = 0;
  920|  1.01M|        ps_dec->u2_mv_2mb[i & 0x1] = 0;
  921|       |
  922|       |        /* Look for MV Prediction and Reference Transfer in Non-I Mbs */
  923|  1.01M|        if(!ps_mb_part_info->u4_isI_mb)
  ------------------
  |  Branch (923:12): [True: 1.01M, False: 778]
  ------------------
  924|  1.01M|        {
  925|  1.01M|            UWORD8 u1_blk_no;
  926|  1.01M|            WORD16 i1_ref_idx, i1_ref_idx1;
  927|  1.01M|            UWORD8 u1_pred_mode;
  928|  1.01M|            UWORD8 u1_sub_mb_x, u1_sub_mb_y, u1_sub_mb_num;
  929|  1.01M|            UWORD8 u1_lx, u1_lx_start, u1_lxend, u1_tmp_lx;
  930|  1.01M|            UWORD8 u1_num_part, u1_num_ref, u1_wd, u1_ht;
  931|  1.01M|            UWORD32 *pu4_wt_offst;
  932|  1.01M|            UWORD8 u1_scale_ref, u4_bot_mb;
  933|  1.01M|            deblk_mb_t * ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + i;
  934|  1.01M|            WORD8 (*pi1_ref_idx)[MAX_REFIDX_INFO_PER_MB] =
  935|  1.01M|                            ps_mb_part_info->i1_ref_idx;
  936|  1.01M|            WORD8 *pi1_ref_idx0 = pi1_ref_idx[0],
  937|  1.01M|                            *pi1_ref_idx1 = pi1_ref_idx[1];
  938|  1.01M|            UWORD32 **ppu4_wt_ofst = ps_mb_part_info->pu4_wt_offst;
  939|       |
  940|       |            /* MB Level initialisations */
  941|  1.01M|            ps_dec->u4_num_pmbair = i >> u1_mbaff;
  942|  1.01M|            ps_dec->u4_mb_idx_mv = i;
  943|       |
  944|       |            /* CHANGED CODE */
  945|  1.01M|            ps_mv_ntop_start = ps_mv_nmb_start
  946|  1.01M|                            - (ps_dec->u2_frm_wd_in_mbs << (4 + u1_mbaff)) + 12;
  947|       |
  948|  1.01M|            u1_num_part = ps_mb_part_info->u1_num_part;
  949|  1.01M|            ps_cur_deblk_mb->u1_mb_type |= (u1_num_part > 1) << 1;
  950|  1.01M|            u1_direct_mode_width = (1 == ps_mb_part_info->u1_num_part) ? 16 : 8;
  ------------------
  |  Branch (950:36): [True: 917k, False: 101k]
  ------------------
  951|       |
  952|       |
  953|  1.01M|            ps_cur_mb_info->u4_pred_info_pkd_idx = ps_dec->u4_pred_info_pkd_idx;
  954|  1.01M|            ps_cur_mb_info->u1_num_pred_parts = 0;
  955|       |
  956|       |            /****************************************************/
  957|       |            /* weighted u4_ofst pointer calculations, this loop  */
  958|       |            /* runs maximum 4 times, even in direct cases       */
  959|       |            /****************************************************/
  960|  1.01M|            u1_scale_ref = u1_mbaff & ps_cur_mb_info->u1_mb_field_decodingflag;
  961|  1.01M|            u4_bot_mb = 1 - ps_cur_mb_info->u1_topmb;
  962|  1.01M|            if(ps_dec->ps_cur_pps->u1_wted_bipred_idc)
  ------------------
  |  Branch (962:16): [True: 636k, False: 381k]
  ------------------
  963|   636k|            {
  964|   636k|                u1_num_ref = MIN(u1_num_part, 4);
  ------------------
  |  |   61|   636k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 632k, False: 4.46k]
  |  |  ------------------
  ------------------
  965|   636k|                if(PART_DIRECT_16x16 != ps_part->u1_is_direct)
  ------------------
  |  |  572|   636k|#define PART_DIRECT_16x16              2
  ------------------
  |  Branch (965:20): [True: 32.2k, False: 604k]
  ------------------
  966|  32.2k|                {
  967|  88.1k|                    for(u1_blk_no = 0; u1_blk_no < u1_num_ref; u1_blk_no++)
  ------------------
  |  Branch (967:40): [True: 55.9k, False: 32.2k]
  ------------------
  968|  55.9k|                    {
  969|  55.9k|                        i1_ref_idx = MAX(pi1_ref_idx0[u1_blk_no], 0);
  ------------------
  |  |   60|  55.9k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 4.74k, False: 51.2k]
  |  |  ------------------
  ------------------
  970|  55.9k|                        if(u1_scale_ref)
  ------------------
  |  Branch (970:28): [True: 0, False: 55.9k]
  ------------------
  971|      0|                            i1_ref_idx >>= 1;
  972|  55.9k|                        i1_ref_idx *=
  973|  55.9k|                                        ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1];
  974|  55.9k|                        if(u1_scale_ref)
  ------------------
  |  Branch (974:28): [True: 0, False: 55.9k]
  ------------------
  975|      0|                            i1_ref_idx +=
  976|      0|                                            (MAX(pi1_ref_idx1[u1_blk_no], 0)
  ------------------
  |  |   60|      0|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  977|      0|                                                            >> 1);
  978|  55.9k|                        else
  979|  55.9k|                            i1_ref_idx += MAX(pi1_ref_idx1[u1_blk_no], 0);
  ------------------
  |  |   60|  55.9k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 5.54k, False: 50.4k]
  |  |  ------------------
  ------------------
  980|  55.9k|                        pu4_wt_offst = (UWORD32*)&ps_dec->pu4_wt_ofsts[2
  981|  55.9k|                                        * X3(i1_ref_idx)];
  ------------------
  |  |   92|  55.9k|#define X3(a)   (((a) << 1) + (a))
  ------------------
  982|       |
  983|  55.9k|                        if(pi1_ref_idx0[u1_blk_no] < 0)
  ------------------
  |  Branch (983:28): [True: 22.4k, False: 33.4k]
  ------------------
  984|  22.4k|                            pu4_wt_offst += 1;
  985|       |
  986|  55.9k|                        ppu4_wt_ofst[u1_blk_no] = pu4_wt_offst;
  987|  55.9k|                        if(u1_scale_ref
  ------------------
  |  Branch (987:28): [True: 0, False: 55.9k]
  ------------------
  988|      0|                                        && (ps_dec->ps_cur_pps->u1_wted_bipred_idc
  ------------------
  |  Branch (988:44): [True: 0, False: 0]
  ------------------
  989|      0|                                                        == 2))
  990|      0|                        {
  991|      0|                            i1_ref_idx = MAX(pi1_ref_idx0[u1_blk_no], 0);
  ------------------
  |  |   60|      0|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  992|      0|                            i1_ref_idx *=
  993|      0|                                            (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1]
  994|      0|                                                            << 1);
  995|      0|                            i1_ref_idx += MAX(pi1_ref_idx1[u1_blk_no], 0);
  ------------------
  |  |   60|      0|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  996|      0|                            if(u4_bot_mb)
  ------------------
  |  Branch (996:32): [True: 0, False: 0]
  ------------------
  997|      0|                            {
  998|      0|                                i1_ref_idx +=
  999|      0|                                                (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0]
 1000|      0|                                                                << 1)
 1001|      0|                                                                * (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1]
 1002|      0|                                                                                << 1);
 1003|      0|                            }
 1004|      0|                            pu4_wt_offst = (UWORD32*)&ps_dec->pu4_mbaff_wt_mat[2
 1005|      0|                                            * X3(i1_ref_idx)];
  ------------------
  |  |   92|      0|#define X3(a)   (((a) << 1) + (a))
  ------------------
 1006|      0|                            ppu4_wt_ofst[u1_blk_no] = pu4_wt_offst;
 1007|      0|                        }
 1008|  55.9k|                    }
 1009|  32.2k|                }
 1010|   636k|            }
 1011|       |
 1012|       |            /**************************************************/
 1013|       |            /* Loop on Partitions                             */
 1014|       |            /* direct mode is reflected as a single partition */
 1015|       |            /**************************************************/
 1016|  2.23M|            for(j = 0; j < u1_num_part; j++, ps_part++)
  ------------------
  |  Branch (1016:24): [True: 1.22M, False: 1.01M]
  ------------------
 1017|  1.22M|            {
 1018|  1.22M|                u1_sub_mb_num = ps_part->u1_sub_mb_num;
 1019|  1.22M|                ps_dec->u1_sub_mb_num = u1_sub_mb_num;
 1020|       |
 1021|  1.22M|                if(PART_NOT_DIRECT != ps_part->u1_is_direct)
  ------------------
  |  |  570|  1.22M|#define PART_NOT_DIRECT                0
  ------------------
  |  Branch (1021:20): [True: 895k, False: 326k]
  ------------------
 1022|   895k|                {
 1023|       |                    /**************************************************/
 1024|       |                    /* Direct Mode, Call DecodeSpatial/TemporalDirect */
 1025|       |                    /* only (those will in turn call FormMbPartInfo)  */
 1026|       |                    /**************************************************/
 1027|   895k|                    ret = ps_dec->ps_cur_slice->pf_decodeDirect(ps_dec,
 1028|   895k|                                                                u1_direct_mode_width,
 1029|   895k|                                                                ps_cur_mb_info, i);
 1030|   895k|                    if(ret != OK)
  ------------------
  |  |  114|   895k|#define OK        0
  ------------------
  |  Branch (1030:24): [True: 0, False: 895k]
  ------------------
 1031|      0|                        return ret;
 1032|   895k|                    ps_cur_deblk_mb->u1_mb_type |= (ps_dec->u1_currB_type << 1);
 1033|       |
 1034|   895k|                }
 1035|   326k|                else
 1036|   326k|                {
 1037|   326k|                    mv_pred_t s_mvPred;
 1038|       |                    /**************************************************/
 1039|       |                    /* Non Direct Mode, Call Motion Vector Predictor  */
 1040|       |                    /* and FormMbpartInfo                             */
 1041|       |                    /**************************************************/
 1042|   326k|                    u1_sub_mb_x = u1_sub_mb_num & 0x03;
 1043|   326k|                    u1_sub_mb_y = u1_sub_mb_num >> 2;
 1044|   326k|                    u1_blk_no =
 1045|   326k|                                    (u1_num_part < 4) ?
  ------------------
  |  Branch (1045:37): [True: 149k, False: 176k]
  ------------------
 1046|   149k|                                                    j :
 1047|   326k|                                                    (((u1_sub_mb_y >> 1) << 1)
 1048|   176k|                                                                    + (u1_sub_mb_x
 1049|   176k|                                                                                    >> 1));
 1050|       |
 1051|   326k|                    ps_mv_ntop = ps_mv_ntop_start + u1_sub_mb_x;
 1052|   326k|                    ps_mv_nmb = ps_mv_nmb_start + u1_sub_mb_num;
 1053|       |
 1054|   326k|                    u1_pred_mode = ps_part->u1_pred_mode;
 1055|   326k|                    u1_wd = ps_part->u1_partwidth;
 1056|   326k|                    u1_ht = ps_part->u1_partheight;
 1057|       |
 1058|   326k|                    u1_lx_start = 0;
 1059|   326k|                    u1_lxend = 2;
 1060|   326k|                    if( PRED_L0 == u1_pred_mode)
  ------------------
  |  |  483|   326k|#define PRED_L0   1
  ------------------
  |  Branch (1060:25): [True: 191k, False: 134k]
  ------------------
 1061|   191k|                    {
 1062|   191k|                        s_mvPred.i2_mv[2] = 0;
 1063|   191k|                        s_mvPred.i2_mv[3] = 0;
 1064|   191k|                        u1_lxend = 1;
 1065|   191k|                    }
 1066|   326k|                    if( PRED_L1 == u1_pred_mode)
  ------------------
  |  |  484|   326k|#define PRED_L1   2
  ------------------
  |  Branch (1066:25): [True: 66.4k, False: 259k]
  ------------------
 1067|  66.4k|                    {
 1068|  66.4k|                        s_mvPred.i2_mv[0] = 0;
 1069|  66.4k|                        s_mvPred.i2_mv[1] = 0;
 1070|  66.4k|                        u1_lx_start = 1;
 1071|  66.4k|                    }
 1072|       |
 1073|       |                    /* Populate the colpic info and reference frames */
 1074|   326k|                    s_mvPred.i1_ref_frame[0] = pi1_ref_idx0[u1_blk_no];
 1075|   326k|                    s_mvPred.i1_ref_frame[1] = pi1_ref_idx1[u1_blk_no];
 1076|       |
 1077|   326k|                    ps_dec->pf_mvpred(ps_dec, ps_cur_mb_info, ps_mv_nmb, ps_mv_ntop,
 1078|   326k|                                      &s_mvPred, u1_sub_mb_num, u1_wd,
 1079|   326k|                                      u1_lx_start, u1_lxend,
 1080|   326k|                                      ps_cur_mb_info->u1_mb_mc_mode);
 1081|       |
 1082|       |                    /**********************************************************/
 1083|       |                    /* Loop on number of predictors, 1 Each for Forw Backw    */
 1084|       |                    /* Loop 2 times for BiDirect mode                         */
 1085|       |                    /**********************************************************/
 1086|   720k|                    for(u1_lx = u1_lx_start; u1_lx < u1_lxend; u1_lx++)
  ------------------
  |  Branch (1086:46): [True: 394k, False: 326k]
  ------------------
 1087|   394k|                    {
 1088|   394k|                        WORD16 i2_mv_x, i2_mv_y;
 1089|       |
 1090|       |                        /********************************************************/
 1091|       |                        /* Predict Mv                                           */
 1092|       |                        /* Add Mv Residuals and store back                      */
 1093|       |                        /********************************************************/
 1094|   394k|                        i1_ref_idx = s_mvPred.i1_ref_frame[u1_lx];
 1095|   394k|                        u1_tmp_lx = (u1_lx << 1);
 1096|       |
 1097|   394k|                        i2_mv_x = ps_mv_nmb->i2_mv[u1_tmp_lx];
 1098|   394k|                        i2_mv_y = ps_mv_nmb->i2_mv[u1_tmp_lx + 1];
 1099|       |
 1100|   394k|                        i2_mv_x += s_mvPred.i2_mv[u1_tmp_lx];
 1101|   394k|                        i2_mv_y += s_mvPred.i2_mv[u1_tmp_lx + 1];
 1102|   394k|                        s_mvPred.i2_mv[u1_tmp_lx] = i2_mv_x;
 1103|   394k|                        s_mvPred.i2_mv[u1_tmp_lx + 1] = i2_mv_y;
 1104|       |
 1105|       |                        /********************************************************/
 1106|       |                        /* Transfer setup call                                  */
 1107|       |                        /* convert RefIdx if it is MbAff                        */
 1108|       |                        /* Pass Weight Offset and refFrame                      */
 1109|       |                        /********************************************************/
 1110|   394k|                        i1_ref_idx1 = i1_ref_idx >> u1_scale_ref;
 1111|   394k|                        if(u1_scale_ref && ((i1_ref_idx & 0x01) != u4_bot_mb))
  ------------------
  |  Branch (1111:28): [True: 0, False: 394k]
  |  Branch (1111:44): [True: 0, False: 0]
  ------------------
 1112|      0|                            i1_ref_idx1 += MAX_REF_BUFS;
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
 1113|   394k|                        ps_ref_frame =
 1114|   394k|                                        ps_dec->ps_ref_pic_buf_lx[u1_lx][i1_ref_idx1];
 1115|       |
 1116|       |                        /* Storing Colocated-Zero u4_flag */
 1117|   394k|                        if(u1_lx == u1_lx_start)
  ------------------
  |  Branch (1117:28): [True: 326k, False: 67.8k]
  ------------------
 1118|   326k|                        {
 1119|       |                            /* Fill colocated info in MvPred structure */
 1120|   326k|                            s_mvPred.u1_col_ref_pic_idx =
 1121|   326k|                                            ps_ref_frame->u1_mv_buf_id;
 1122|   326k|                            s_mvPred.u1_pic_type = ps_ref_frame->u1_pic_type;
 1123|       |
 1124|       |                            /* Calculating colocated zero information */
 1125|   326k|                            u1_colz =
 1126|   326k|                                            (u1_field << 1)
 1127|   326k|                                                            | ((i1_ref_idx == 0)
  ------------------
  |  Branch (1127:64): [True: 283k, False: 42.8k]
  ------------------
 1128|   283k|                                                                            && (ABS(i2_mv_x)
  ------------------
  |  |  100|   283k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 31.9k, False: 251k]
  |  |  ------------------
  ------------------
  |  Branch (1128:80): [True: 101k, False: 181k]
  ------------------
 1129|   283k|                                                                                            <= 1)
 1130|   101k|                                                                            && (ABS(i2_mv_y)
  ------------------
  |  |  100|   101k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 17.7k, False: 83.6k]
  |  |  ------------------
  ------------------
  |  Branch (1130:80): [True: 80.3k, False: 21.1k]
  ------------------
 1131|   101k|                                                                                            <= 1));
 1132|   326k|                            u1_colz |= ps_mb_part_info->u1_col_info[u1_blk_no];
 1133|   326k|                        }
 1134|       |
 1135|   394k|                        pu4_wt_offst = ppu4_wt_ofst[u1_blk_no];
 1136|   394k|                        {
 1137|   394k|                            pred_info_pkd_t *ps_pred_pkd;
 1138|   394k|                           WORD16 i2_mv[2];
 1139|       |
 1140|   394k|                           i2_mv[0] = i2_mv_x;
 1141|   394k|                           i2_mv[1] = i2_mv_y;
 1142|       |
 1143|   394k|                           ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
 1144|   394k|                        ih264d_fill_pred_info(i2_mv,u1_wd,u1_ht,u1_sub_mb_num,u1_pred_mode,
 1145|   394k|                                        ps_pred_pkd,ps_ref_frame->u1_pic_buf_id,i1_ref_idx,pu4_wt_offst,
 1146|   394k|                                        ps_ref_frame->u1_pic_type);
 1147|   394k|                        ps_dec->u4_pred_info_pkd_idx++;
 1148|   394k|                        ps_cur_mb_info->u1_num_pred_parts++;
 1149|       |
 1150|       |
 1151|   394k|                        }
 1152|       |
 1153|   394k|                    }
 1154|   326k|                    ih264d_rep_mv_colz(ps_dec, &s_mvPred, ps_mv_nmb,
 1155|   326k|                                       u1_sub_mb_num, u1_colz, u1_ht,
 1156|   326k|                                       u1_wd);
 1157|   326k|                }
 1158|  1.22M|            }
 1159|       |
 1160|  1.01M|        }
 1161|    778|        else
 1162|    778|        {
 1163|       |            /* Set zero values in case of Intra Mbs */
 1164|    778|            mv_pred_t s_mvPred =
 1165|    778|                {
 1166|    778|                    { 0, 0, 0, 0 },
 1167|    778|                      { -1, -1 }, 0, 0};
 1168|       |            /* Storing colocated zero information */
 1169|    778|            ih264d_rep_mv_colz(ps_dec, &s_mvPred, ps_mv_nmb_start, 0,
 1170|    778|                               (UWORD8)(u1_field << 1), 4, 4);
 1171|    778|        }
 1172|       |
 1173|       |        /*if num _cores is set to 3 ,compute bs will be done in another thread*/
 1174|  1.01M|        if(ps_dec->u4_num_cores < 3)
  ------------------
  |  Branch (1174:12): [True: 1.01M, False: 0]
  ------------------
 1175|  1.01M|        {
 1176|  1.01M|            if(ps_dec->u4_app_disable_deblk_frm == 0)
  ------------------
  |  Branch (1176:16): [True: 1.01M, False: 0]
  ------------------
 1177|  1.01M|                ps_dec->pf_compute_bs(ps_dec, ps_cur_mb_info,
 1178|  1.01M|                                     (UWORD16)(i >> u1_mbaff));
 1179|  1.01M|        }
 1180|  1.01M|    }
 1181|   130k|    return OK;
  ------------------
  |  |  114|   130k|#define OK        0
  ------------------
 1182|   130k|}
ih264d_get_implicit_weights:
 1196|  16.5k|{
 1197|  16.5k|    UWORD32 *pu4_iwt_ofst;
 1198|  16.5k|    UWORD8 i, j;
 1199|  16.5k|    struct pic_buffer_t *ps_pic_buff0, *ps_pic_buff1;
 1200|  16.5k|    WORD16 i2_dist_scale_factor;
 1201|  16.5k|    WORD16 i2_tb, i2_td, i2_tx;
 1202|  16.5k|    WORD64 i8_tb, i8_td;
 1203|  16.5k|    WORD32 i4_poc0, i4_poc1;
 1204|  16.5k|    UWORD32 ui_temp0, ui_temp1;
 1205|  16.5k|    UWORD8 uc_num_ref_idx_l0_active, uc_num_ref_idx_l1_active;
 1206|       |
 1207|  16.5k|    pu4_iwt_ofst = ps_dec->pu4_wts_ofsts_mat;
 1208|  16.5k|    uc_num_ref_idx_l0_active =
 1209|  16.5k|                    ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0];
 1210|  16.5k|    uc_num_ref_idx_l1_active =
 1211|  16.5k|                    ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1];
 1212|       |
 1213|  56.2k|    for(i = 0; i < uc_num_ref_idx_l0_active; i++)
  ------------------
  |  Branch (1213:16): [True: 39.6k, False: 16.5k]
  ------------------
 1214|  39.6k|    {
 1215|  39.6k|        ps_pic_buff0 = ps_dec->ps_ref_pic_buf_lx[0][i];
 1216|  39.6k|        i4_poc0 = ps_pic_buff0->i4_avg_poc;
 1217|   136k|        for(j = 0; j < uc_num_ref_idx_l1_active; j++)
  ------------------
  |  Branch (1217:20): [True: 97.1k, False: 39.6k]
  ------------------
 1218|  97.1k|        {
 1219|  97.1k|            ps_pic_buff1 = ps_dec->ps_ref_pic_buf_lx[1][j];
 1220|  97.1k|            i4_poc1 = ps_pic_buff1->i4_avg_poc;
 1221|       |
 1222|  97.1k|            if(i4_poc1 != i4_poc0)
  ------------------
  |  Branch (1222:16): [True: 13.2k, False: 83.8k]
  ------------------
 1223|  13.2k|            {
 1224|  13.2k|                i8_tb = (WORD64)ps_dec->ps_cur_pic->i4_poc - i4_poc0;
 1225|  13.2k|                i2_tb = CLIP_S8(i8_tb);
  ------------------
  |  |   59|  13.2k|#define CLIP_S8(x) CLIP3(INT8_MIN, INT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  13.2k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 4.40k, False: 8.82k]
  |  |  |  |  |  Branch (77:54): [True: 3.88k, False: 4.93k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 1226|  13.2k|                i8_td = (WORD64)i4_poc1 - i4_poc0;
 1227|  13.2k|                i2_td = CLIP_S8(i8_td);
  ------------------
  |  |   59|  13.2k|#define CLIP_S8(x) CLIP3(INT8_MIN, INT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  13.2k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 3.43k, False: 9.79k]
  |  |  |  |  |  Branch (77:54): [True: 4.79k, False: 5.00k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 1228|  13.2k|                i2_tx = (16384 + ABS(SIGN_POW2_DIV(i2_td, 1))) / i2_td;
  ------------------
  |  |  100|  26.4k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 5.07k, False: 8.15k]
  |  |  |  Branch (100:27): [True: 5.80k, False: 7.42k]
  |  |  |  Branch (100:39): [True: 5.07k, False: 0]
  |  |  |  Branch (100:46): [True: 729, False: 7.42k]
  |  |  ------------------
  ------------------
 1229|  13.2k|                i2_dist_scale_factor = CLIP_S11(
  ------------------
  |  |   65|  13.2k|#define CLIP_S11(x) CLIP3(-1024, 1023, (x))
  |  |  ------------------
  |  |  |  |   77|  13.2k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 598, False: 12.6k]
  |  |  |  |  |  Branch (77:54): [True: 803, False: 11.8k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 1230|  13.2k|                                            (((i2_tb * i2_tx) + 32) >> 6));
 1231|       |
 1232|  13.2k|                if(/*((u4_poc1 - u4_poc0) == 0) ||*/
 1233|  13.2k|                (!(ps_pic_buff1->u1_is_short && ps_pic_buff0->u1_is_short))
  ------------------
  |  Branch (1233:20): [True: 10.7k, False: 2.47k]
  |  Branch (1233:49): [True: 9.72k, False: 1.02k]
  ------------------
 1234|  9.72k|                                || ((i2_dist_scale_factor >> 2) < -64)
  ------------------
  |  Branch (1234:36): [True: 1.80k, False: 7.92k]
  ------------------
 1235|  7.92k|                                || ((i2_dist_scale_factor >> 2) > 128))
  ------------------
  |  Branch (1235:36): [True: 678, False: 7.24k]
  ------------------
 1236|  5.98k|                {
 1237|       |                    /* same for forward and backward, wt=32 and Offset = 0 */
 1238|  5.98k|                    ui_temp0 = 0x00000020;
 1239|  5.98k|                    ui_temp1 = 0x00000020;
 1240|  5.98k|                }
 1241|  7.24k|                else
 1242|  7.24k|                {
 1243|  7.24k|                    ui_temp0 = 64 - (i2_dist_scale_factor >> 2);
 1244|  7.24k|                    ui_temp1 = (i2_dist_scale_factor >> 2);
 1245|  7.24k|                }
 1246|  13.2k|            }
 1247|  83.8k|            else
 1248|  83.8k|            {
 1249|  83.8k|                ui_temp0 = 0x00000020;
 1250|  83.8k|                ui_temp1 = 0x00000020;
 1251|  83.8k|            }
 1252|  97.1k|            pu4_iwt_ofst[0] = pu4_iwt_ofst[2] = pu4_iwt_ofst[4] = ui_temp0;
 1253|  97.1k|            pu4_iwt_ofst[1] = pu4_iwt_ofst[3] = pu4_iwt_ofst[5] = ui_temp1;
 1254|  97.1k|            pu4_iwt_ofst += 6;
 1255|  97.1k|        }
 1256|  39.6k|    }
 1257|  16.5k|    if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag)
  ------------------
  |  Branch (1257:8): [True: 0, False: 16.5k]
  ------------------
 1258|      0|    {
 1259|      0|        UWORD8 k;
 1260|      0|        WORD32 i4_cur_poc = ps_dec->ps_cur_pic->i4_top_field_order_cnt;
 1261|      0|        UWORD32* pu4_wt_mat = ps_dec->pu4_mbaff_wt_mat;
 1262|       |        /* Form the Implicit Weighted prediction matrix for field MBs also */
 1263|      0|        for(k = 0; k < 2; k++)
  ------------------
  |  Branch (1263:20): [True: 0, False: 0]
  ------------------
 1264|      0|        {
 1265|      0|            for(i = 0; i < (uc_num_ref_idx_l0_active << 1); i++)
  ------------------
  |  Branch (1265:24): [True: 0, False: 0]
  ------------------
 1266|      0|            {
 1267|      0|                UWORD16 u2_l0_idx;
 1268|       |
 1269|       |                /*u2_l0_idx = (i >= uc_num_ref_idx_l0_active)
 1270|       |                 ?(MAX_REF_BUFS + i - uc_num_ref_idx_l0_active) : (i) ;*/
 1271|       |
 1272|      0|                u2_l0_idx = i >> 1;
 1273|      0|                if((i & 0x01) != k)
  ------------------
  |  Branch (1273:20): [True: 0, False: 0]
  ------------------
 1274|      0|                {
 1275|      0|                    u2_l0_idx += MAX_REF_BUFS;
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
 1276|      0|                }
 1277|      0|                ps_pic_buff0 = ps_dec->ps_ref_pic_buf_lx[0][u2_l0_idx];
 1278|      0|                i4_poc0 = ps_pic_buff0->i4_poc;
 1279|      0|                for(j = 0; j < (uc_num_ref_idx_l1_active << 1); j++)
  ------------------
  |  Branch (1279:28): [True: 0, False: 0]
  ------------------
 1280|      0|                {
 1281|      0|                    UWORD16 u2_l1_idx;
 1282|       |                    /*u2_l1_idx = (j >= uc_num_ref_idx_l1_active)
 1283|       |                     ? (MAX_REF_BUFS + j - uc_num_ref_idx_l1_active ) : (j) ;*/
 1284|       |
 1285|      0|                    u2_l1_idx = j >> 1;
 1286|      0|                    if((j & 0x01) != k)
  ------------------
  |  Branch (1286:24): [True: 0, False: 0]
  ------------------
 1287|      0|                    {
 1288|      0|                        u2_l1_idx += MAX_REF_BUFS;
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
 1289|      0|                    }
 1290|      0|                    ps_pic_buff1 = ps_dec->ps_ref_pic_buf_lx[1][u2_l1_idx];
 1291|      0|                    i4_poc1 = ps_pic_buff1->i4_poc;
 1292|      0|                    if(i4_poc1 != i4_poc0)
  ------------------
  |  Branch (1292:24): [True: 0, False: 0]
  ------------------
 1293|      0|                    {
 1294|      0|                        i8_tb = (WORD64)i4_cur_poc - i4_poc0;
 1295|      0|                        i2_tb = CLIP_S8(i8_tb);
  ------------------
  |  |   59|      0|#define CLIP_S8(x) CLIP3(INT8_MIN, INT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|      0|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 0, False: 0]
  |  |  |  |  |  Branch (77:54): [True: 0, False: 0]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 1296|      0|                        i8_td = (WORD64)i4_poc1 - i4_poc0;
 1297|      0|                        i2_td = CLIP_S8(i8_td);
  ------------------
  |  |   59|      0|#define CLIP_S8(x) CLIP3(INT8_MIN, INT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|      0|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 0, False: 0]
  |  |  |  |  |  Branch (77:54): [True: 0, False: 0]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 1298|      0|                        i2_tx = (16384 + ABS(SIGN_POW2_DIV(i2_td, 1)))
  ------------------
  |  |  100|      0|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 0, False: 0]
  |  |  |  Branch (100:27): [True: 0, False: 0]
  |  |  |  Branch (100:39): [True: 0, False: 0]
  |  |  |  Branch (100:46): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1299|      0|                                        / i2_td;
 1300|      0|                        i2_dist_scale_factor = CLIP_S11(
  ------------------
  |  |   65|      0|#define CLIP_S11(x) CLIP3(-1024, 1023, (x))
  |  |  ------------------
  |  |  |  |   77|      0|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 0, False: 0]
  |  |  |  |  |  Branch (77:54): [True: 0, False: 0]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 1301|      0|                                                    (((i2_tb * i2_tx) + 32) >> 6));
 1302|       |
 1303|      0|                        if(/*((u4_poc1 - u4_poc0) == 0) ||*/
 1304|      0|                        (!(ps_pic_buff1->u1_is_short && ps_pic_buff0->u1_is_short))
  ------------------
  |  Branch (1304:28): [True: 0, False: 0]
  |  Branch (1304:57): [True: 0, False: 0]
  ------------------
 1305|      0|                                        || ((i2_dist_scale_factor >> 2) < -64)
  ------------------
  |  Branch (1305:44): [True: 0, False: 0]
  ------------------
 1306|      0|                                        || ((i2_dist_scale_factor >> 2) > 128))
  ------------------
  |  Branch (1306:44): [True: 0, False: 0]
  ------------------
 1307|      0|                        {
 1308|       |                            /* same for forward and backward, wt=32 and Offset = 0 */
 1309|      0|                            ui_temp0 = 0x00000020;
 1310|      0|                            ui_temp1 = 0x00000020;
 1311|      0|                        }
 1312|      0|                        else
 1313|      0|                        {
 1314|      0|                            ui_temp0 = 64 - (i2_dist_scale_factor >> 2);
 1315|      0|                            ui_temp1 = (i2_dist_scale_factor >> 2);
 1316|      0|                        }
 1317|      0|                    }
 1318|      0|                    else
 1319|      0|                    {
 1320|      0|                        ui_temp0 = 0x00000020;
 1321|      0|                        ui_temp1 = 0x00000020;
 1322|      0|                    }
 1323|       |                    /* Store in the weight matrix */
 1324|      0|                    *pu4_wt_mat++ = ui_temp0;
 1325|      0|                    *pu4_wt_mat++ = ui_temp1;
 1326|      0|                    *pu4_wt_mat++ = ui_temp0;
 1327|      0|                    *pu4_wt_mat++ = ui_temp1;
 1328|      0|                    *pu4_wt_mat++ = ui_temp0;
 1329|      0|                    *pu4_wt_mat++ = ui_temp1;
 1330|       |
 1331|      0|                }
 1332|      0|            }
 1333|      0|            i4_cur_poc = ps_dec->ps_cur_pic->i4_bottom_field_order_cnt;
 1334|      0|        }
 1335|      0|    }
 1336|  16.5k|}

ih264d_read_coeff4x4_cabac:
   69|  1.09M|{
   70|       |
   71|  1.09M|    decoding_envirnoment_t *ps_cab_env = &ps_dec->s_cab_dec_env;
   72|  1.09M|    UWORD32 u4_coded_flag;
   73|  1.09M|    UWORD32 u4_offset, *pu4_buffer;
   74|  1.09M|    UWORD32 u4_code_int_range, u4_code_int_val_ofst;
   75|  1.09M|    tu_sblk4x4_coeff_data_t *ps_tu_4x4;
   76|  1.09M|    WORD16 *pi2_coeff_data;
   77|  1.09M|    WORD32 num_sig_coeffs = 0;
   78|       |
   79|       |    /*loading from strcuctures*/
   80|       |
   81|  1.09M|    ps_tu_4x4 = (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data;
   82|  1.09M|    ps_tu_4x4->u2_sig_coeff_map = 0;
   83|  1.09M|    pi2_coeff_data = &ps_tu_4x4->ai2_level[0];
   84|       |
   85|  1.09M|    u4_offset = ps_bitstrm->u4_ofst;
   86|  1.09M|    pu4_buffer = ps_bitstrm->pu4_buffer;
   87|       |
   88|  1.09M|    u4_code_int_range = ps_cab_env->u4_code_int_range;
   89|  1.09M|    u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst;
   90|       |
   91|  1.09M|    {
   92|       |
   93|       |        /*inilined DecodeDecision_onebin begins*/
   94|       |
   95|  1.09M|        {
   96|       |
   97|  1.09M|            UWORD32 u4_qnt_int_range, u4_int_range_lps;
   98|  1.09M|            UWORD32 u4_symbol, u1_mps_state;
   99|       |
  100|  1.09M|            UWORD32 table_lookup;
  101|  1.09M|            const UWORD32 *pu4_table = (const UWORD32 *)ps_cab_env->cabac_table;
  102|  1.09M|            UWORD32 u4_clz;
  103|       |
  104|  1.09M|            u1_mps_state = (ps_ctxt_coded->u1_mps_state);
  105|  1.09M|            u4_clz = CLZ(u4_code_int_range);
  106|  1.09M|            u4_qnt_int_range = u4_code_int_range << u4_clz;
  107|  1.09M|            u4_qnt_int_range = (u4_qnt_int_range >> 29) & 0x3;
  108|  1.09M|            table_lookup =
  109|  1.09M|                            pu4_table[(u1_mps_state << 2) + u4_qnt_int_range];
  110|  1.09M|            u4_int_range_lps = table_lookup & 0xff;
  111|  1.09M|            u4_int_range_lps = u4_int_range_lps << (23 - u4_clz);
  112|  1.09M|            u4_code_int_range = u4_code_int_range - u4_int_range_lps;
  113|  1.09M|            u4_symbol = ((u1_mps_state >> 6) & 0x1);
  114|  1.09M|            u1_mps_state = (table_lookup >> 8) & 0x7F;
  115|       |
  116|  1.09M|            CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst, u4_symbol,
  ------------------
  |  |  184|  1.09M|                    u4_codeIntRangeLPS_m,u1_mps_state_m,table_lookup_m)                     \
  |  |  185|  1.09M|{                                                                                         \
  |  |  186|  1.09M|  if(u4_codeIntValOffset_m >= u4_codeIntRange_m)                                            \
  |  |  ------------------
  |  |  |  Branch (186:6): [True: 168k, False: 926k]
  |  |  ------------------
  |  |  187|  1.09M|  {                                                                                         \
  |  |  188|   168k|      u4_symbol_m = 1 - u4_symbol_m;                                                        \
  |  |  189|   168k|      u4_codeIntValOffset_m -= u4_codeIntRange_m;                                           \
  |  |  190|   168k|      u4_codeIntRange_m = u4_codeIntRangeLPS_m;                                             \
  |  |  191|   168k|      u1_mps_state_m = (table_lookup_m >> 15) & 0x7F;                                       \
  |  |  192|   168k|  }                                                                                         \
  |  |  193|  1.09M|}
  ------------------
  117|  1.09M|                         u4_int_range_lps, u1_mps_state, table_lookup)
  118|       |
  119|  1.09M|            if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_8)
  ------------------
  |  |  113|  1.09M|#define ONE_RIGHT_SHIFTED_BY_8 1<<8
  ------------------
  |  Branch (119:16): [True: 8.62k, False: 1.08M]
  ------------------
  120|  8.62k|            {
  121|       |
  122|  8.62k|                RENORM_RANGE_OFFSET(u4_code_int_range, u4_code_int_val_ofst,
  ------------------
  |  |  170|  8.62k|  {                                                                                         \
  |  |  171|  8.62k|    UWORD32 read_bits_m,u4_clz_m  ;                                                         \
  |  |  172|  8.62k|    u4_clz_m = CLZ(u4_codeIntRange_m);                                                  \
  |  |  173|  8.62k|    NEXTBITS(read_bits_m,(u4_offset_m+23),pu4_buffer_m,u4_clz_m)                            \
  |  |  ------------------
  |  |  |  |  137|  8.62k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  |  |  138|  8.62k|{                                                                           \
  |  |  |  |  139|  8.62k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  140|  8.62k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  141|  8.62k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  142|  8.62k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  143|  8.62k|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (143:8): [True: 8.39k, False: 228]
  |  |  |  |  ------------------
  |  |  |  |  144|  8.62k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  8.39k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  145|  8.62k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  8.62k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  146|  8.62k|}
  |  |  ------------------
  |  |  174|  8.62k|    FLUSHBITS(u4_offset_m,(u4_clz_m))                                                       \
  |  |  ------------------
  |  |  |  |  193|  8.62k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  |  |  194|  8.62k|{                                                                           \
  |  |  |  |  195|  8.62k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  |  |  196|  8.62k|}
  |  |  ------------------
  |  |  175|  8.62k|    u4_codeIntRange_m = u4_codeIntRange_m << u4_clz_m;                                      \
  |  |  176|  8.62k|    u4_codeIntValOffset_m = (u4_codeIntValOffset_m << u4_clz_m) | read_bits_m;              \
  |  |  177|  8.62k|  }
  ------------------
  123|  8.62k|                                    u4_offset, pu4_buffer)
  124|  8.62k|            }
  125|       |
  126|  1.09M|            ps_ctxt_coded->u1_mps_state = u1_mps_state;
  127|  1.09M|            u4_coded_flag = u4_symbol;
  128|       |
  129|       |            /*inilined DecodeDecision_onebin ends*/
  130|       |
  131|  1.09M|        }
  132|       |
  133|  1.09M|    }
  134|       |
  135|  1.09M|    if(u4_coded_flag)
  ------------------
  |  Branch (135:8): [True: 486k, False: 608k]
  ------------------
  136|   486k|    {
  137|       |
  138|   486k|        {
  139|   486k|            bin_ctxt_model_t *p_binCtxt_last, *p_binCtxt_last_org;
  140|   486k|            UWORD32 uc_last_coeff_idx;
  141|   486k|            UWORD32 uc_bin;
  142|   486k|            UWORD32 i;
  143|   486k|            WORD32 first_coeff_offset = 0;
  144|       |
  145|   486k|            if((u4_ctxcat == CHROMA_AC_CTXCAT) || (u4_ctxcat == LUMA_AC_CTXCAT))
  ------------------
  |  |   75|   486k|#define CHROMA_AC_CTXCAT  4
  ------------------
                          if((u4_ctxcat == CHROMA_AC_CTXCAT) || (u4_ctxcat == LUMA_AC_CTXCAT))
  ------------------
  |  |   72|   444k|#define LUMA_AC_CTXCAT    1
  ------------------
  |  Branch (145:16): [True: 42.4k, False: 444k]
  |  Branch (145:51): [True: 10.1k, False: 434k]
  ------------------
  146|  52.6k|            {
  147|  52.6k|                first_coeff_offset = 1;
  148|  52.6k|            }
  149|       |
  150|   486k|            i = 0;
  151|   486k|            if(u4_ctxcat == CHROMA_DC_CTXCAT)
  ------------------
  |  |   74|   486k|#define CHROMA_DC_CTXCAT  3
  ------------------
  |  Branch (151:16): [True: 48.8k, False: 437k]
  ------------------
  152|  48.8k|            {
  153|  48.8k|                uc_last_coeff_idx = 3;
  154|  48.8k|            }
  155|   437k|            else
  156|   437k|            {
  157|   437k|                UWORD32 u4_start;
  158|   437k|                u4_start = (u4_ctxcat & 1) + (u4_ctxcat >> 2);
  159|   437k|                uc_last_coeff_idx = 15 - u4_start;
  160|   437k|            }
  161|   486k|            p_binCtxt_last_org = ps_ctxt_sig_coeff
  162|   486k|                            + LAST_COEFF_CTXT_MINUS_SIG_COEFF_CTXT;
  ------------------
  |  |   84|   486k|#define LAST_COEFF_CTXT_MINUS_SIG_COEFF_CTXT 61
  ------------------
  163|       |
  164|   486k|            do
  165|  3.87M|            {
  166|       |
  167|       |                /*inilined DecodeDecision_onebin begins*/
  168|  3.87M|                {
  169|       |
  170|  3.87M|                    UWORD32 u4_qnt_int_range, u4_int_range_lps;
  171|  3.87M|                    UWORD32 u4_symbol, u1_mps_state;
  172|  3.87M|                    UWORD32 table_lookup;
  173|  3.87M|                    const UWORD32 *pu4_table =
  174|  3.87M|                                    (const UWORD32 *)ps_cab_env->cabac_table;
  175|  3.87M|                    UWORD32 u4_clz;
  176|       |
  177|  3.87M|                    u1_mps_state = (ps_ctxt_sig_coeff->u1_mps_state);
  178|       |
  179|  3.87M|                    u4_clz = CLZ(u4_code_int_range);
  180|       |
  181|  3.87M|                    u4_qnt_int_range = u4_code_int_range << u4_clz;
  182|  3.87M|                    u4_qnt_int_range = (u4_qnt_int_range >> 29) & 0x3;
  183|       |
  184|  3.87M|                    table_lookup = pu4_table[(u1_mps_state << 2)
  185|  3.87M|                                    + u4_qnt_int_range];
  186|       |
  187|  3.87M|                    u4_int_range_lps = table_lookup & 0xff;
  188|       |
  189|  3.87M|                    u4_int_range_lps = u4_int_range_lps << (23 - u4_clz);
  190|  3.87M|                    u4_code_int_range = u4_code_int_range - u4_int_range_lps;
  191|  3.87M|                    u4_symbol = ((u1_mps_state >> 6) & 0x1);
  192|  3.87M|                    u1_mps_state = (table_lookup >> 8) & 0x7F;
  193|       |
  194|  3.87M|                    CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst,
  ------------------
  |  |  184|  3.87M|                    u4_codeIntRangeLPS_m,u1_mps_state_m,table_lookup_m)                     \
  |  |  185|  3.87M|{                                                                                         \
  |  |  186|  3.87M|  if(u4_codeIntValOffset_m >= u4_codeIntRange_m)                                            \
  |  |  ------------------
  |  |  |  Branch (186:6): [True: 508k, False: 3.36M]
  |  |  ------------------
  |  |  187|  3.87M|  {                                                                                         \
  |  |  188|   508k|      u4_symbol_m = 1 - u4_symbol_m;                                                        \
  |  |  189|   508k|      u4_codeIntValOffset_m -= u4_codeIntRange_m;                                           \
  |  |  190|   508k|      u4_codeIntRange_m = u4_codeIntRangeLPS_m;                                             \
  |  |  191|   508k|      u1_mps_state_m = (table_lookup_m >> 15) & 0x7F;                                       \
  |  |  192|   508k|  }                                                                                         \
  |  |  193|  3.87M|}
  ------------------
  195|  3.87M|                                 u4_symbol, u4_int_range_lps, u1_mps_state,
  196|  3.87M|                                 table_lookup)
  197|       |
  198|  3.87M|                    if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_14)
  ------------------
  |  |  115|  3.87M|#define ONE_RIGHT_SHIFTED_BY_14 1<<14
  ------------------
  |  Branch (198:24): [True: 167k, False: 3.70M]
  ------------------
  199|   167k|                    {
  200|       |
  201|   167k|                        UWORD32 read_bits, u4_clz;
  202|   167k|                        u4_clz = CLZ(u4_code_int_range);
  203|   167k|                        NEXTBITS(read_bits, (u4_offset + 23), pu4_buffer,
  ------------------
  |  |  137|   167k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  138|   167k|{                                                                           \
  |  |  139|   167k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  140|   167k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  141|   167k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  142|   167k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  143|   167k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (143:8): [True: 162k, False: 5.13k]
  |  |  ------------------
  |  |  144|   167k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|   162k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  145|   167k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|   167k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  146|   167k|}
  ------------------
  204|   167k|                                 u4_clz)
  205|   167k|                        FLUSHBITS(u4_offset, (u4_clz))
  ------------------
  |  |  193|   167k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  194|   167k|{                                                                           \
  |  |  195|   167k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  196|   167k|}
  ------------------
  206|   167k|                        u4_code_int_range = u4_code_int_range << u4_clz;
  207|   167k|                        u4_code_int_val_ofst = (u4_code_int_val_ofst << u4_clz)
  208|   167k|                                        | read_bits;
  209|   167k|                    }
  210|       |
  211|  3.87M|                    INC_BIN_COUNT(
  212|  3.87M|                                    ps_cab_env)
  213|       |
  214|  3.87M|                    ps_ctxt_sig_coeff->u1_mps_state = u1_mps_state;
  215|  3.87M|                    uc_bin = u4_symbol;
  216|       |
  217|  3.87M|                }
  218|       |                /*incrementing pointer to point to the context of the next bin*/
  219|  3.87M|                ps_ctxt_sig_coeff++;
  220|       |
  221|       |                /*inilined DecodeDecision_onebin ends*/
  222|       |
  223|  3.87M|                if(uc_bin)
  ------------------
  |  Branch (223:20): [True: 1.31M, False: 2.56M]
  ------------------
  224|  1.31M|                {
  225|  1.31M|                    num_sig_coeffs++;
  226|  1.31M|                    SET_BIT(ps_tu_4x4->u2_sig_coeff_map, (i + first_coeff_offset));
  ------------------
  |  |  106|  1.31M|#define SET_BIT(x, pos) (x) = (x) | (1 << pos);
  ------------------
  227|       |
  228|  1.31M|                    p_binCtxt_last = p_binCtxt_last_org + i;
  229|       |
  230|       |                    /*inilined DecodeDecision_onebin begins*/
  231|       |
  232|  1.31M|                    {
  233|       |
  234|  1.31M|                        UWORD32 u4_qnt_int_range, u4_int_range_lps;
  235|  1.31M|                        UWORD32 u4_symbol, u1_mps_state;
  236|  1.31M|                        UWORD32 table_lookup;
  237|  1.31M|                        const UWORD32 *pu4_table =
  238|  1.31M|                                        (const UWORD32 *)ps_cab_env->cabac_table;
  239|  1.31M|                        UWORD32 u4_clz;
  240|       |
  241|  1.31M|                        u1_mps_state = (p_binCtxt_last->u1_mps_state);
  242|       |
  243|  1.31M|                        u4_clz = CLZ(u4_code_int_range);
  244|  1.31M|                        u4_qnt_int_range = u4_code_int_range << u4_clz;
  245|  1.31M|                        u4_qnt_int_range = (u4_qnt_int_range >> 29)
  246|  1.31M|                                        & 0x3;
  247|       |
  248|  1.31M|                        table_lookup = pu4_table[(u1_mps_state << 2)
  249|  1.31M|                                        + u4_qnt_int_range];
  250|  1.31M|                        u4_int_range_lps = table_lookup & 0xff;
  251|       |
  252|  1.31M|                        u4_int_range_lps = u4_int_range_lps
  253|  1.31M|                                        << (23 - u4_clz);
  254|       |
  255|  1.31M|                        u4_code_int_range = u4_code_int_range
  256|  1.31M|                                        - u4_int_range_lps;
  257|  1.31M|                        u4_symbol = ((u1_mps_state >> 6) & 0x1);
  258|  1.31M|                        u1_mps_state = (table_lookup >> 8) & 0x7F;
  259|       |
  260|  1.31M|                        CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst,
  ------------------
  |  |  184|  1.31M|                    u4_codeIntRangeLPS_m,u1_mps_state_m,table_lookup_m)                     \
  |  |  185|  1.31M|{                                                                                         \
  |  |  186|  1.31M|  if(u4_codeIntValOffset_m >= u4_codeIntRange_m)                                            \
  |  |  ------------------
  |  |  |  Branch (186:6): [True: 164k, False: 1.14M]
  |  |  ------------------
  |  |  187|  1.31M|  {                                                                                         \
  |  |  188|   164k|      u4_symbol_m = 1 - u4_symbol_m;                                                        \
  |  |  189|   164k|      u4_codeIntValOffset_m -= u4_codeIntRange_m;                                           \
  |  |  190|   164k|      u4_codeIntRange_m = u4_codeIntRangeLPS_m;                                             \
  |  |  191|   164k|      u1_mps_state_m = (table_lookup_m >> 15) & 0x7F;                                       \
  |  |  192|   164k|  }                                                                                         \
  |  |  193|  1.31M|}
  ------------------
  261|  1.31M|                                     u4_symbol, u4_int_range_lps,
  262|  1.31M|                                     u1_mps_state, table_lookup)
  263|       |
  264|  1.31M|                        INC_BIN_COUNT(ps_cab_env)
  265|       |
  266|  1.31M|                        p_binCtxt_last->u1_mps_state = u1_mps_state;
  267|  1.31M|                        uc_bin = u4_symbol;
  268|       |
  269|  1.31M|                    }
  270|       |
  271|       |                    /*inilined DecodeDecision_onebin ends*/
  272|  1.31M|                    if(uc_bin == 1)
  ------------------
  |  Branch (272:24): [True: 462k, False: 852k]
  ------------------
  273|   462k|                        goto label_read_levels;
  274|       |
  275|  1.31M|                }
  276|       |
  277|  3.41M|                i = i + 1;
  278|       |
  279|  3.41M|            }
  280|  3.41M|            while(i < uc_last_coeff_idx);
  ------------------
  |  Branch (280:19): [True: 3.38M, False: 24.7k]
  ------------------
  281|       |
  282|  24.7k|            num_sig_coeffs++;
  283|  24.7k|            SET_BIT(ps_tu_4x4->u2_sig_coeff_map, (i + first_coeff_offset));
  ------------------
  |  |  106|  24.7k|#define SET_BIT(x, pos) (x) = (x) | (1 << pos);
  ------------------
  284|       |
  285|   486k|            label_read_levels: ;
  286|       |
  287|   486k|        }
  288|       |
  289|       |        /// VALUE of No of Coeff in BLOCK = i + 1 for second case else i;
  290|       |
  291|       |        /* Decode coeff_abs_level_minus1 and coeff_sign_flag */
  292|      0|        {
  293|       |
  294|   486k|            WORD32 i2_abs_lvl;
  295|   486k|            UWORD32 u1_abs_level_equal1 = 1, u1_abs_level_gt1 = 0;
  296|       |
  297|   486k|            UWORD32 u4_ctx_inc;
  298|   486k|            UWORD32 ui_prefix;
  299|   486k|        bin_ctxt_model_t *p_ctxt_abs_level;
  300|       |
  301|       |
  302|   486k|        p_ctxt_abs_level = ps_dec->p_coeff_abs_level_minus1_t[u4_ctxcat];
  303|   486k|        u4_ctx_inc = ((0x51));
  304|       |
  305|       |        /*****************************************************/
  306|       |        /* Main Loop runs for no. of Significant coefficient */
  307|       |        /*****************************************************/
  308|       |
  309|       |
  310|   486k|        do
  311|  1.33M|            {
  312|       |
  313|  1.33M|                {
  314|  1.33M|                    INC_SYM_COUNT(&(ps_dec.s_cab_dec_env));
  315|       |
  316|       |                    /*****************************************************/
  317|       |                    /* inilining a modified ih264d_decode_bins_unary     */
  318|       |                    /*****************************************************/
  319|       |
  320|  1.33M|                    {
  321|  1.33M|                        UWORD32 u4_value;
  322|  1.33M|                        UWORD32 u4_symbol;
  323|  1.33M|                        bin_ctxt_model_t *ps_bin_ctxt;
  324|  1.33M|                        UWORD32 u4_ctx_Inc;
  325|       |
  326|  1.33M|                        u4_value = 0;
  327|       |
  328|  1.33M|                        u4_ctx_Inc = u4_ctx_inc & 0xf;
  329|  1.33M|                        ps_bin_ctxt = p_ctxt_abs_level + u4_ctx_Inc;
  330|       |
  331|  1.33M|                        do
  332|  2.43M|                        {
  333|       |
  334|  2.43M|                            {
  335|       |
  336|  2.43M|                                UWORD32 u4_qnt_int_range,
  337|  2.43M|                                                u4_int_range_lps;
  338|  2.43M|                                UWORD32 u1_mps_state;
  339|  2.43M|                                UWORD32 table_lookup;
  340|  2.43M|                                const UWORD32 *pu4_table =
  341|  2.43M|                                                (const UWORD32 *)ps_cab_env->cabac_table;
  342|  2.43M|                                UWORD32 u4_clz;
  343|       |
  344|  2.43M|                                u1_mps_state = (ps_bin_ctxt->u1_mps_state);
  345|  2.43M|                                u4_clz = CLZ(u4_code_int_range);
  346|  2.43M|                                u4_qnt_int_range = u4_code_int_range
  347|  2.43M|                                                << u4_clz;
  348|  2.43M|                                u4_qnt_int_range = (u4_qnt_int_range
  349|  2.43M|                                                >> 29) & 0x3;
  350|  2.43M|                                table_lookup = pu4_table[(u1_mps_state << 2)
  351|  2.43M|                                                + u4_qnt_int_range];
  352|  2.43M|                                u4_int_range_lps = table_lookup & 0xff;
  353|       |
  354|  2.43M|                                u4_int_range_lps = u4_int_range_lps
  355|  2.43M|                                                << (23 - u4_clz);
  356|  2.43M|                                u4_code_int_range = u4_code_int_range
  357|  2.43M|                                                - u4_int_range_lps;
  358|  2.43M|                                u4_symbol = ((u1_mps_state >> 6) & 0x1);
  359|  2.43M|                                u1_mps_state = (table_lookup >> 8) & 0x7F;
  360|       |
  361|  2.43M|                                CHECK_IF_LPS(u4_code_int_range,
  ------------------
  |  |  184|  2.43M|                    u4_codeIntRangeLPS_m,u1_mps_state_m,table_lookup_m)                     \
  |  |  185|  2.43M|{                                                                                         \
  |  |  186|  2.43M|  if(u4_codeIntValOffset_m >= u4_codeIntRange_m)                                            \
  |  |  ------------------
  |  |  |  Branch (186:6): [True: 288k, False: 2.15M]
  |  |  ------------------
  |  |  187|  2.43M|  {                                                                                         \
  |  |  188|   288k|      u4_symbol_m = 1 - u4_symbol_m;                                                        \
  |  |  189|   288k|      u4_codeIntValOffset_m -= u4_codeIntRange_m;                                           \
  |  |  190|   288k|      u4_codeIntRange_m = u4_codeIntRangeLPS_m;                                             \
  |  |  191|   288k|      u1_mps_state_m = (table_lookup_m >> 15) & 0x7F;                                       \
  |  |  192|   288k|  }                                                                                         \
  |  |  193|  2.43M|}
  ------------------
  362|  2.43M|                                             u4_code_int_val_ofst, u4_symbol,
  363|  2.43M|                                             u4_int_range_lps, u1_mps_state,
  364|  2.43M|                                             table_lookup)
  365|       |
  366|  2.43M|                                if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_9)
  ------------------
  |  |  114|  2.43M|#define ONE_RIGHT_SHIFTED_BY_9    1<<9
  ------------------
  |  Branch (366:36): [True: 41.4k, False: 2.39M]
  ------------------
  367|  41.4k|                                {
  368|       |
  369|  41.4k|                                    RENORM_RANGE_OFFSET(u4_code_int_range,
  ------------------
  |  |  170|  41.4k|  {                                                                                         \
  |  |  171|  41.4k|    UWORD32 read_bits_m,u4_clz_m  ;                                                         \
  |  |  172|  41.4k|    u4_clz_m = CLZ(u4_codeIntRange_m);                                                  \
  |  |  173|  41.4k|    NEXTBITS(read_bits_m,(u4_offset_m+23),pu4_buffer_m,u4_clz_m)                            \
  |  |  ------------------
  |  |  |  |  137|  41.4k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  |  |  138|  41.4k|{                                                                           \
  |  |  |  |  139|  41.4k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  140|  41.4k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  141|  41.4k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  142|  41.4k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  143|  41.4k|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (143:8): [True: 39.8k, False: 1.60k]
  |  |  |  |  ------------------
  |  |  |  |  144|  41.4k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  39.8k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  145|  41.4k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  41.4k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  146|  41.4k|}
  |  |  ------------------
  |  |  174|  41.4k|    FLUSHBITS(u4_offset_m,(u4_clz_m))                                                       \
  |  |  ------------------
  |  |  |  |  193|  41.4k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  |  |  194|  41.4k|{                                                                           \
  |  |  |  |  195|  41.4k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  |  |  196|  41.4k|}
  |  |  ------------------
  |  |  175|  41.4k|    u4_codeIntRange_m = u4_codeIntRange_m << u4_clz_m;                                      \
  |  |  176|  41.4k|    u4_codeIntValOffset_m = (u4_codeIntValOffset_m << u4_clz_m) | read_bits_m;              \
  |  |  177|  41.4k|  }
  ------------------
  370|  41.4k|                                                        u4_code_int_val_ofst,
  371|  41.4k|                                                        u4_offset, pu4_buffer)
  372|  41.4k|                                }
  373|       |
  374|  2.43M|                                INC_BIN_COUNT(ps_cab_env);
  375|       |
  376|  2.43M|                                ps_bin_ctxt->u1_mps_state = u1_mps_state;
  377|  2.43M|                            }
  378|       |
  379|  2.43M|                            INC_BIN_COUNT(ps_cab_env);INC_DECISION_BINS(ps_cab_env);
  380|       |
  381|  2.43M|                            u4_value++;
  382|  2.43M|                            ps_bin_ctxt = p_ctxt_abs_level + (u4_ctx_inc >> 4);
  383|       |
  384|  2.43M|                        }
  385|  2.43M|                        while(u4_symbol && (u4_value < UCOFF_LEVEL));
  ------------------
  |  |   41|  1.15M|#define UCOFF_LEVEL  14
  ------------------
  |  Branch (385:31): [True: 1.15M, False: 1.28M]
  |  Branch (385:44): [True: 1.10M, False: 53.1k]
  ------------------
  386|       |
  387|  1.33M|                        ui_prefix = u4_value - 1 + u4_symbol;
  388|       |
  389|  1.33M|                    }
  390|       |
  391|  1.33M|                    if(ui_prefix == UCOFF_LEVEL)
  ------------------
  |  |   41|  1.33M|#define UCOFF_LEVEL  14
  ------------------
  |  Branch (391:24): [True: 53.1k, False: 1.28M]
  ------------------
  392|  53.1k|                    {
  393|  53.1k|                        UWORD32 ui16_sufS = 0;
  394|  53.1k|                        UWORD32 u1_max_bins;
  395|  53.1k|                        UWORD32 u4_value;
  396|       |
  397|  53.1k|                        i2_abs_lvl = UCOFF_LEVEL;
  ------------------
  |  |   41|  53.1k|#define UCOFF_LEVEL  14
  ------------------
  398|       |                        /*inlining ih264d_decode_bypass_bins_unary begins*/
  399|       |
  400|  53.1k|                        {
  401|  53.1k|                            UWORD32 uc_bin;
  402|  53.1k|                            UWORD32 bits_to_flush;
  403|       |
  404|       |
  405|  53.1k|                            bits_to_flush = 0;
  406|       |                            /*renormalize to ensure there 23 bits more in the u4_code_int_val_ofst*/
  407|  53.1k|                            {
  408|  53.1k|                                UWORD32 u4_clz, read_bits;
  409|       |
  410|  53.1k|                                u4_clz = CLZ(u4_code_int_range);
  411|  53.1k|                                FLUSHBITS(u4_offset, u4_clz)
  ------------------
  |  |  193|  53.1k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  194|  53.1k|{                                                                           \
  |  |  195|  53.1k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  196|  53.1k|}
  ------------------
  412|  53.1k|                                NEXTBITS(read_bits, u4_offset, pu4_buffer, CABAC_BITS_TO_READ)
  ------------------
  |  |  137|  53.1k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  138|  53.1k|{                                                                           \
  |  |  139|  53.1k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  140|  53.1k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  141|  53.1k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  142|  53.1k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  143|  53.1k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (143:8): [True: 51.3k, False: 1.82k]
  |  |  ------------------
  |  |  144|  53.1k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  51.3k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  145|  53.1k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  53.1k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  146|  53.1k|}
  ------------------
  413|  53.1k|                                u4_code_int_range = u4_code_int_range << u4_clz;
  414|  53.1k|                                u4_code_int_val_ofst = (u4_code_int_val_ofst
  415|  53.1k|                                                << u4_clz) | read_bits;
  416|       |
  417|  53.1k|                            }
  418|       |
  419|  53.1k|                            do
  420|  70.1k|                            {
  421|  70.1k|                                bits_to_flush++;
  422|       |
  423|  70.1k|                                u4_code_int_range = u4_code_int_range >> 1;
  424|       |
  425|  70.1k|                                if(u4_code_int_val_ofst >= u4_code_int_range)
  ------------------
  |  Branch (425:36): [True: 17.0k, False: 53.1k]
  ------------------
  426|  17.0k|                                {
  427|       |                                    /* S=1 */
  428|  17.0k|                                    uc_bin = 1;
  429|  17.0k|                                    u4_code_int_val_ofst -= u4_code_int_range;
  430|  17.0k|                                }
  431|  53.1k|                                else
  432|  53.1k|                                {
  433|       |                                    /* S=0 */
  434|  53.1k|                                    uc_bin = 0;
  435|  53.1k|                                }
  436|       |
  437|  70.1k|                                INC_BIN_COUNT(
  438|  70.1k|                                                ps_cab_env);INC_BYPASS_BINS(ps_cab_env);
  439|       |
  440|  70.1k|                            }
  441|  70.1k|                            while(uc_bin && (bits_to_flush < CABAC_BITS_TO_READ));
  ------------------
  |  |  618|  17.0k|#define CABAC_BITS_TO_READ 23
  ------------------
  |  Branch (441:35): [True: 17.0k, False: 53.1k]
  |  Branch (441:45): [True: 16.9k, False: 33]
  ------------------
  442|       |
  443|  53.1k|                            u4_value = (bits_to_flush - 1);
  444|       |
  445|  53.1k|                        }
  446|       |                        /*inlining ih264d_decode_bypass_bins_unary ends*/
  447|       |
  448|  53.1k|                        ui16_sufS = (1 << u4_value);
  449|  53.1k|                        u1_max_bins = u4_value;
  450|       |
  451|  53.1k|                        if(u4_value > 0)
  ------------------
  |  Branch (451:28): [True: 7.84k, False: 45.2k]
  ------------------
  452|  7.84k|                        {
  453|       |
  454|       |                            /*inline bypassbins_flc begins*/
  455|       |
  456|  7.84k|                            if(u4_value > 10)
  ------------------
  |  Branch (456:32): [True: 181, False: 7.66k]
  ------------------
  457|    181|                            {
  458|    181|                                UWORD32 u4_clz, read_bits;
  459|       |
  460|    181|                                u4_clz = CLZ(u4_code_int_range);
  461|    181|                                FLUSHBITS(u4_offset, u4_clz)
  ------------------
  |  |  193|    181|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  194|    181|{                                                                           \
  |  |  195|    181|        (u4_offset) += (u4_no_bits);                                        \
  |  |  196|    181|}
  ------------------
  462|    181|                                NEXTBITS(read_bits, u4_offset, pu4_buffer, CABAC_BITS_TO_READ)
  ------------------
  |  |  137|    181|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  138|    181|{                                                                           \
  |  |  139|    181|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  140|    181|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  141|    181|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  142|    181|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  143|    181|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (143:8): [True: 112, False: 69]
  |  |  ------------------
  |  |  144|    181|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|    112|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  145|    181|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|    181|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  146|    181|}
  ------------------
  463|    181|                                u4_code_int_range = u4_code_int_range << u4_clz;
  464|    181|                                u4_code_int_val_ofst = (u4_code_int_val_ofst
  465|    181|                                                << u4_clz) | read_bits;
  466|    181|                            }
  467|       |
  468|  7.84k|                            {
  469|  7.84k|                                UWORD32 ui_bins;
  470|  7.84k|                                UWORD32 uc_bin;
  471|  7.84k|                                UWORD32 bits_to_flush;
  472|       |
  473|  7.84k|                                ui_bins = 0;
  474|  7.84k|                                bits_to_flush = 0;
  475|       |
  476|  7.84k|                                do
  477|  16.9k|                                {
  478|  16.9k|                                    bits_to_flush++;
  479|       |
  480|  16.9k|                                    u4_code_int_range = u4_code_int_range >> 1;
  481|       |
  482|  16.9k|                                    if(u4_code_int_val_ofst
  ------------------
  |  Branch (482:40): [True: 8.63k, False: 8.34k]
  ------------------
  483|  16.9k|                                                    >= u4_code_int_range)
  484|  8.63k|                                    {
  485|       |                                        /* S=1 */
  486|  8.63k|                                        uc_bin = 1;
  487|  8.63k|                                        u4_code_int_val_ofst -=
  488|  8.63k|                                                        u4_code_int_range;
  489|  8.63k|                                    }
  490|  8.34k|                                    else
  491|  8.34k|                                    {
  492|       |                                        /* S=0 */
  493|  8.34k|                                        uc_bin = 0;
  494|  8.34k|                                    }
  495|       |
  496|  16.9k|                                    INC_BIN_COUNT(
  497|  16.9k|                                                    ps_cab_env);INC_BYPASS_BINS(ps_cab_env);
  498|       |
  499|  16.9k|                                    ui_bins = ((ui_bins << 1) | uc_bin);
  500|       |
  501|  16.9k|                                }
  502|  16.9k|                                while(bits_to_flush < u1_max_bins);
  ------------------
  |  Branch (502:39): [True: 9.13k, False: 7.84k]
  ------------------
  503|       |
  504|  7.84k|                                u4_value = ui_bins;
  505|  7.84k|                            }
  506|       |
  507|       |                            /*inline bypassbins_flc ends*/
  508|       |
  509|  7.84k|                        }
  510|       |
  511|       |                        //Value of K
  512|  53.1k|                        ui16_sufS += u4_value;
  513|  53.1k|                        i2_abs_lvl += ui16_sufS;
  514|       |
  515|  53.1k|                    }
  516|  1.28M|                    else
  517|  1.28M|                        i2_abs_lvl = 1 + ui_prefix;
  518|       |
  519|  1.33M|                    if(i2_abs_lvl > 1)
  ------------------
  |  Branch (519:24): [True: 227k, False: 1.11M]
  ------------------
  520|   227k|                    {
  521|   227k|                        u1_abs_level_gt1++;
  522|   227k|                    }
  523|  1.33M|                    if(!u1_abs_level_gt1)
  ------------------
  |  Branch (523:24): [True: 1.03M, False: 298k]
  ------------------
  524|  1.03M|                    {
  525|  1.03M|                        u1_abs_level_equal1++;
  526|  1.03M|                        u4_ctx_inc = (5 << 4) + MIN(u1_abs_level_equal1, 4);
  ------------------
  |  |   61|  1.03M|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 696k, False: 343k]
  |  |  ------------------
  ------------------
  527|  1.03M|                    }
  528|   298k|                    else
  529|   298k|                        u4_ctx_inc = (5 + MIN(u1_abs_level_gt1, 4)) << 4;
  ------------------
  |  |   61|   298k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 234k, False: 64.8k]
  |  |  ------------------
  ------------------
  530|       |
  531|       |                    /*u4_ctx_inc = g_table_temp[u1_abs_level_gt1][u1_abs_level_equal1];*/
  532|       |
  533|       |                    /* encode coeff_sign_flag[i] */
  534|       |
  535|  1.33M|                    {
  536|  1.33M|                        u4_code_int_range = u4_code_int_range >> 1;
  537|       |
  538|  1.33M|                        if(u4_code_int_val_ofst >= (u4_code_int_range))
  ------------------
  |  Branch (538:28): [True: 407k, False: 930k]
  ------------------
  539|   407k|                        {
  540|       |                            /* S=1 */
  541|   407k|                            u4_code_int_val_ofst -= u4_code_int_range;
  542|   407k|                            i2_abs_lvl = (-i2_abs_lvl);
  543|   407k|                        }
  544|       |
  545|  1.33M|                    }
  546|  1.33M|                    num_sig_coeffs--;
  547|  1.33M|                    *pi2_coeff_data++ = i2_abs_lvl;
  548|  1.33M|                }
  549|  1.33M|            }
  550|  1.33M|            while(num_sig_coeffs > 0);
  ------------------
  |  Branch (550:19): [True: 852k, False: 486k]
  ------------------
  551|   486k|        }
  552|   486k|    }
  553|       |
  554|  1.09M|    if(u4_coded_flag)
  ------------------
  |  Branch (554:8): [True: 486k, False: 608k]
  ------------------
  555|   486k|    {
  556|   486k|        WORD32 offset;
  557|   486k|        offset = (UWORD8 *)pi2_coeff_data - (UWORD8 *)ps_tu_4x4;
  558|   486k|        offset = ALIGN4(offset);
  ------------------
  |  |   52|   486k|#define ALIGN4(x)   ((((x) + 3) >> 2) << 2)
  ------------------
  559|   486k|        ps_dec->pv_parse_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_parse_tu_coeff_data + offset);
  560|   486k|    }
  561|       |
  562|       |
  563|       |    /*updating structures*/
  564|  1.09M|    ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst;
  565|  1.09M|    ps_cab_env->u4_code_int_range = u4_code_int_range;
  566|  1.09M|    ps_bitstrm->u4_ofst = u4_offset;
  567|  1.09M|    return (u4_coded_flag);
  568|  1.09M|}
ih264d_read_coeff8x8_cabac:
  585|   118k|{
  586|   118k|    decoding_envirnoment_t *ps_cab_env = &ps_dec->s_cab_dec_env;
  587|   118k|    UWORD32 u4_offset, *pu4_buffer;
  588|   118k|    UWORD32 u4_code_int_range, u4_code_int_val_ofst;
  589|       |
  590|       |    /* High profile related declarations */
  591|   118k|    UWORD8 u1_field_coding_flag = ps_cur_mb_info->ps_curmb->u1_mb_fld;
  592|   118k|    const UWORD8 *pu1_lastcoeff_context_inc =
  593|   118k|                    (UWORD8 *)gau1_ih264d_lastcoeff_context_inc;
  594|   118k|    const UWORD8 *pu1_sigcoeff_context_inc;
  595|   118k|    bin_ctxt_model_t *ps_ctxt_sig_coeff;
  596|   118k|    WORD32 num_sig_coeffs = 0;
  597|   118k|    tu_blk8x8_coeff_data_t *ps_tu_8x8;
  598|   118k|    WORD16 *pi2_coeff_data;
  599|       |
  600|       |    /*loading from strcuctures*/
  601|       |
  602|   118k|    ps_tu_8x8 = (tu_blk8x8_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data;
  603|   118k|    ps_tu_8x8->au4_sig_coeff_map[0] = 0;
  604|   118k|    ps_tu_8x8->au4_sig_coeff_map[1] = 0;
  605|   118k|    pi2_coeff_data = &ps_tu_8x8->ai2_level[0];
  606|       |
  607|       |
  608|   118k|    if(!u1_field_coding_flag)
  ------------------
  |  Branch (608:8): [True: 118k, False: 0]
  ------------------
  609|   118k|    {
  610|   118k|        pu1_sigcoeff_context_inc =
  611|   118k|                        (UWORD8 *)gau1_ih264d_sigcoeff_context_inc_frame;
  612|       |
  613|       |        /*******************************************************************/
  614|       |        /* last coefficient context is derived from significant coeff u4_flag */
  615|       |        /* only significant coefficient matrix need to be initialized      */
  616|       |        /*******************************************************************/
  617|   118k|        ps_ctxt_sig_coeff = ps_dec->s_high_profile.ps_sigcoeff_8x8_frame;
  618|   118k|    }
  619|      0|    else
  620|      0|    {
  621|      0|        pu1_sigcoeff_context_inc =
  622|      0|                        (UWORD8 *)gau1_ih264d_sigcoeff_context_inc_field;
  623|       |
  624|       |        /*******************************************************************/
  625|       |        /* last coefficient context is derived from significant coeff u4_flag */
  626|       |        /* only significant coefficient matrix need to be initialized      */
  627|       |        /*******************************************************************/
  628|      0|        ps_ctxt_sig_coeff = ps_dec->s_high_profile.ps_sigcoeff_8x8_field;
  629|      0|    }
  630|       |
  631|       |    /*loading from strcuctures*/
  632|       |
  633|   118k|    u4_offset = ps_bitstrm->u4_ofst;
  634|   118k|    pu4_buffer = ps_bitstrm->pu4_buffer;
  635|       |
  636|   118k|    u4_code_int_range = ps_cab_env->u4_code_int_range;
  637|   118k|    u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst;
  638|       |
  639|   118k|    {
  640|   118k|        {
  641|   118k|            bin_ctxt_model_t *p_binCtxt_last, *p_binCtxt_last_org,
  642|   118k|                            *p_ctxt_sig_coeff_org;
  643|   118k|            UWORD32 uc_last_coeff_idx;
  644|   118k|            UWORD32 uc_bin;
  645|   118k|            UWORD32 i;
  646|       |
  647|   118k|            i = 0;
  648|       |
  649|   118k|            uc_last_coeff_idx = 63;
  650|       |
  651|   118k|            p_binCtxt_last_org = ps_ctxt_sig_coeff
  652|   118k|                            + LAST_COEFF_CTXT_MINUS_SIG_COEFF_CTXT_8X8;
  ------------------
  |  |   85|   118k|#define LAST_COEFF_CTXT_MINUS_SIG_COEFF_CTXT_8X8 15
  ------------------
  653|       |
  654|   118k|            p_ctxt_sig_coeff_org = ps_ctxt_sig_coeff;
  655|       |
  656|   118k|            do
  657|  3.71M|            {
  658|       |                /*inilined DecodeDecision_onebin begins*/
  659|  3.71M|                {
  660|  3.71M|                    UWORD32 u4_qnt_int_range, u4_int_range_lps;
  661|  3.71M|                    UWORD32 u4_symbol, u1_mps_state;
  662|  3.71M|                    UWORD32 table_lookup;
  663|  3.71M|                    const UWORD32 *pu4_table =
  664|  3.71M|                                    (const UWORD32 *)ps_cab_env->cabac_table;
  665|  3.71M|                    UWORD32 u4_clz;
  666|       |
  667|  3.71M|                    u1_mps_state = (ps_ctxt_sig_coeff->u1_mps_state);
  668|       |
  669|  3.71M|                    u4_clz = CLZ(u4_code_int_range);
  670|       |
  671|  3.71M|                    u4_qnt_int_range = u4_code_int_range << u4_clz;
  672|  3.71M|                    u4_qnt_int_range = (u4_qnt_int_range >> 29) & 0x3;
  673|       |
  674|  3.71M|                    table_lookup = pu4_table[(u1_mps_state << 2)
  675|  3.71M|                                    + u4_qnt_int_range];
  676|       |
  677|  3.71M|                    u4_int_range_lps = table_lookup & 0xff;
  678|       |
  679|  3.71M|                    u4_int_range_lps = u4_int_range_lps << (23 - u4_clz);
  680|  3.71M|                    u4_code_int_range = u4_code_int_range - u4_int_range_lps;
  681|  3.71M|                    u4_symbol = ((u1_mps_state >> 6) & 0x1);
  682|  3.71M|                    u1_mps_state = (table_lookup >> 8) & 0x7F;
  683|       |
  684|  3.71M|                    CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst,
  ------------------
  |  |  184|  3.71M|                    u4_codeIntRangeLPS_m,u1_mps_state_m,table_lookup_m)                     \
  |  |  185|  3.71M|{                                                                                         \
  |  |  186|  3.71M|  if(u4_codeIntValOffset_m >= u4_codeIntRange_m)                                            \
  |  |  ------------------
  |  |  |  Branch (186:6): [True: 118k, False: 3.59M]
  |  |  ------------------
  |  |  187|  3.71M|  {                                                                                         \
  |  |  188|   118k|      u4_symbol_m = 1 - u4_symbol_m;                                                        \
  |  |  189|   118k|      u4_codeIntValOffset_m -= u4_codeIntRange_m;                                           \
  |  |  190|   118k|      u4_codeIntRange_m = u4_codeIntRangeLPS_m;                                             \
  |  |  191|   118k|      u1_mps_state_m = (table_lookup_m >> 15) & 0x7F;                                       \
  |  |  192|   118k|  }                                                                                         \
  |  |  193|  3.71M|}
  ------------------
  685|  3.71M|                                 u4_symbol, u4_int_range_lps, u1_mps_state,
  686|  3.71M|                                 table_lookup)
  687|       |
  688|  3.71M|                    if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_14)
  ------------------
  |  |  115|  3.71M|#define ONE_RIGHT_SHIFTED_BY_14 1<<14
  ------------------
  |  Branch (688:24): [True: 32.7k, False: 3.68M]
  ------------------
  689|  32.7k|                    {
  690|  32.7k|                        UWORD32 read_bits, u4_clz;
  691|  32.7k|                        u4_clz = CLZ(u4_code_int_range);
  692|  32.7k|                        NEXTBITS(read_bits, (u4_offset + 23), pu4_buffer,
  ------------------
  |  |  137|  32.7k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  138|  32.7k|{                                                                           \
  |  |  139|  32.7k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  140|  32.7k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  141|  32.7k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  142|  32.7k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  143|  32.7k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (143:8): [True: 31.6k, False: 1.04k]
  |  |  ------------------
  |  |  144|  32.7k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  31.6k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  145|  32.7k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  32.7k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  146|  32.7k|}
  ------------------
  693|  32.7k|                                 u4_clz)
  694|  32.7k|                        FLUSHBITS(u4_offset, (u4_clz))
  ------------------
  |  |  193|  32.7k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  194|  32.7k|{                                                                           \
  |  |  195|  32.7k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  196|  32.7k|}
  ------------------
  695|  32.7k|                        u4_code_int_range = u4_code_int_range << u4_clz;
  696|  32.7k|                        u4_code_int_val_ofst = (u4_code_int_val_ofst << u4_clz)
  697|  32.7k|                                        | read_bits;
  698|  32.7k|                    }
  699|       |
  700|  3.71M|                    ps_ctxt_sig_coeff->u1_mps_state = u1_mps_state;
  701|  3.71M|                    uc_bin = u4_symbol;
  702|  3.71M|                }
  703|       |                /*incrementing pointer to point to the context of the next bin*/
  704|  3.71M|                ps_ctxt_sig_coeff = p_ctxt_sig_coeff_org
  705|  3.71M|                                + pu1_sigcoeff_context_inc[i + 1];
  706|       |
  707|       |                /*inilined DecodeDecision_onebin ends*/
  708|  3.71M|                if(uc_bin)
  ------------------
  |  Branch (708:20): [True: 304k, False: 3.40M]
  ------------------
  709|   304k|                {
  710|   304k|                    num_sig_coeffs++;
  711|   304k|                    SET_BIT(ps_tu_8x8->au4_sig_coeff_map[i>31], (i > 31 ? i - 32:i));
  ------------------
  |  |  106|   608k|#define SET_BIT(x, pos) (x) = (x) | (1 << pos);
  |  |  ------------------
  |  |  |  Branch (106:43): [True: 24.6k, False: 279k]
  |  |  ------------------
  ------------------
  712|       |
  713|   304k|                    p_binCtxt_last = p_binCtxt_last_org
  714|   304k|                                    + pu1_lastcoeff_context_inc[i];
  715|       |
  716|       |                    /*inilined DecodeDecision_onebin begins*/
  717|       |
  718|   304k|                    {
  719|   304k|                        UWORD32 u4_qnt_int_range, u4_int_range_lps;
  720|   304k|                        UWORD32 u4_symbol, u1_mps_state;
  721|   304k|                        UWORD32 table_lookup;
  722|   304k|                        const UWORD32 *pu4_table =
  723|   304k|                                        (const UWORD32 *)ps_cab_env->cabac_table;
  724|   304k|                        UWORD32 u4_clz;
  725|       |
  726|   304k|                        u1_mps_state = (p_binCtxt_last->u1_mps_state);
  727|       |
  728|   304k|                        u4_clz = CLZ(u4_code_int_range);
  729|   304k|                        u4_qnt_int_range = u4_code_int_range << u4_clz;
  730|   304k|                        u4_qnt_int_range = (u4_qnt_int_range >> 29)
  731|   304k|                                        & 0x3;
  732|       |
  733|   304k|                        table_lookup = pu4_table[(u1_mps_state << 2)
  734|   304k|                                        + u4_qnt_int_range];
  735|   304k|                        u4_int_range_lps = table_lookup & 0xff;
  736|       |
  737|   304k|                        u4_int_range_lps = u4_int_range_lps
  738|   304k|                                        << (23 - u4_clz);
  739|       |
  740|   304k|                        u4_code_int_range = u4_code_int_range
  741|   304k|                                        - u4_int_range_lps;
  742|   304k|                        u4_symbol = ((u1_mps_state >> 6) & 0x1);
  743|   304k|                        u1_mps_state = (table_lookup >> 8) & 0x7F;
  744|       |
  745|   304k|                        CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst,
  ------------------
  |  |  184|   304k|                    u4_codeIntRangeLPS_m,u1_mps_state_m,table_lookup_m)                     \
  |  |  185|   304k|{                                                                                         \
  |  |  186|   304k|  if(u4_codeIntValOffset_m >= u4_codeIntRange_m)                                            \
  |  |  ------------------
  |  |  |  Branch (186:6): [True: 31.3k, False: 273k]
  |  |  ------------------
  |  |  187|   304k|  {                                                                                         \
  |  |  188|  31.3k|      u4_symbol_m = 1 - u4_symbol_m;                                                        \
  |  |  189|  31.3k|      u4_codeIntValOffset_m -= u4_codeIntRange_m;                                           \
  |  |  190|  31.3k|      u4_codeIntRange_m = u4_codeIntRangeLPS_m;                                             \
  |  |  191|  31.3k|      u1_mps_state_m = (table_lookup_m >> 15) & 0x7F;                                       \
  |  |  192|  31.3k|  }                                                                                         \
  |  |  193|   304k|}
  ------------------
  746|   304k|                                     u4_symbol, u4_int_range_lps,
  747|   304k|                                     u1_mps_state, table_lookup)
  748|       |
  749|   304k|                        p_binCtxt_last->u1_mps_state = u1_mps_state;
  750|   304k|                        uc_bin = u4_symbol;
  751|   304k|                    }
  752|       |
  753|       |                    /*inilined DecodeDecision_onebin ends*/
  754|   304k|                    if(uc_bin == 1)
  ------------------
  |  Branch (754:24): [True: 74.3k, False: 230k]
  ------------------
  755|  74.3k|                        goto label_read_levels;
  756|       |
  757|   304k|                }
  758|       |
  759|  3.63M|                i = i + 1;
  760|       |
  761|  3.63M|            }
  762|  3.63M|            while(i < uc_last_coeff_idx);
  ------------------
  |  Branch (762:19): [True: 3.59M, False: 44.6k]
  ------------------
  763|       |
  764|  44.6k|            num_sig_coeffs++;
  765|  44.6k|            SET_BIT(ps_tu_8x8->au4_sig_coeff_map[i>31], (i > 31 ? i - 32:i));
  ------------------
  |  |  106|  89.3k|#define SET_BIT(x, pos) (x) = (x) | (1 << pos);
  |  |  ------------------
  |  |  |  Branch (106:43): [True: 44.6k, False: 0]
  |  |  ------------------
  ------------------
  766|       |
  767|   118k|            label_read_levels: ;
  768|   118k|        }
  769|       |
  770|       |        /// VALUE of No of Coeff in BLOCK = i + 1 for second case else i;
  771|       |
  772|       |        /* Decode coeff_abs_level_minus1 and coeff_sign_flag */
  773|      0|        {
  774|   118k|            WORD32 i2_abs_lvl;
  775|   118k|            UWORD32 u1_abs_level_equal1 = 1, u1_abs_level_gt1 = 0;
  776|       |
  777|   118k|            UWORD32 u4_ctx_inc;
  778|   118k|            UWORD32 ui_prefix;
  779|   118k|            bin_ctxt_model_t *p_ctxt_abs_level;
  780|       |
  781|   118k|            p_ctxt_abs_level =
  782|   118k|                            ps_dec->p_coeff_abs_level_minus1_t[LUMA_8X8_CTXCAT];
  ------------------
  |  |   76|   118k|#define LUMA_8X8_CTXCAT   5
  ------------------
  783|   118k|            u4_ctx_inc = ((0x51));
  784|       |
  785|       |            /*****************************************************/
  786|       |            /* Main Loop runs for no. of Significant coefficient */
  787|       |            /*****************************************************/
  788|   118k|            do
  789|   349k|            {
  790|   349k|                {
  791|       |
  792|       |                    /*****************************************************/
  793|       |                    /* inilining a modified ih264d_decode_bins_unary     */
  794|       |                    /*****************************************************/
  795|       |
  796|   349k|                    {
  797|   349k|                        UWORD32 u4_value;
  798|   349k|                        UWORD32 u4_symbol;
  799|   349k|                        bin_ctxt_model_t *ps_bin_ctxt;
  800|   349k|                        UWORD32 u4_ctx_Inc;
  801|   349k|                        u4_value = 0;
  802|       |
  803|   349k|                        u4_ctx_Inc = u4_ctx_inc & 0xf;
  804|   349k|                        ps_bin_ctxt = p_ctxt_abs_level + u4_ctx_Inc;
  805|       |
  806|   349k|                        do
  807|   522k|                        {
  808|   522k|                            {
  809|   522k|                                UWORD32 u4_qnt_int_range,
  810|   522k|                                                u4_int_range_lps;
  811|   522k|                                UWORD32 u1_mps_state;
  812|   522k|                                UWORD32 table_lookup;
  813|   522k|                                const UWORD32 *pu4_table =
  814|   522k|                                                (const UWORD32 *)ps_cab_env->cabac_table;
  815|   522k|                                UWORD32 u4_clz;
  816|       |
  817|   522k|                                u1_mps_state = (ps_bin_ctxt->u1_mps_state);
  818|   522k|                                u4_clz = CLZ(u4_code_int_range);
  819|   522k|                                u4_qnt_int_range = u4_code_int_range
  820|   522k|                                                << u4_clz;
  821|   522k|                                u4_qnt_int_range = (u4_qnt_int_range
  822|   522k|                                                >> 29) & 0x3;
  823|   522k|                                table_lookup = pu4_table[(u1_mps_state << 2)
  824|   522k|                                                + u4_qnt_int_range];
  825|   522k|                                u4_int_range_lps = table_lookup & 0xff;
  826|       |
  827|   522k|                                u4_int_range_lps = u4_int_range_lps
  828|   522k|                                                << (23 - u4_clz);
  829|   522k|                                u4_code_int_range = u4_code_int_range
  830|   522k|                                                - u4_int_range_lps;
  831|   522k|                                u4_symbol = ((u1_mps_state >> 6) & 0x1);
  832|   522k|                                u1_mps_state = (table_lookup >> 8) & 0x7F;
  833|       |
  834|   522k|                                CHECK_IF_LPS(u4_code_int_range,
  ------------------
  |  |  184|   522k|                    u4_codeIntRangeLPS_m,u1_mps_state_m,table_lookup_m)                     \
  |  |  185|   522k|{                                                                                         \
  |  |  186|   522k|  if(u4_codeIntValOffset_m >= u4_codeIntRange_m)                                            \
  |  |  ------------------
  |  |  |  Branch (186:6): [True: 51.1k, False: 471k]
  |  |  ------------------
  |  |  187|   522k|  {                                                                                         \
  |  |  188|  51.1k|      u4_symbol_m = 1 - u4_symbol_m;                                                        \
  |  |  189|  51.1k|      u4_codeIntValOffset_m -= u4_codeIntRange_m;                                           \
  |  |  190|  51.1k|      u4_codeIntRange_m = u4_codeIntRangeLPS_m;                                             \
  |  |  191|  51.1k|      u1_mps_state_m = (table_lookup_m >> 15) & 0x7F;                                       \
  |  |  192|  51.1k|  }                                                                                         \
  |  |  193|   522k|}
  ------------------
  835|   522k|                                             u4_code_int_val_ofst, u4_symbol,
  836|   522k|                                             u4_int_range_lps, u1_mps_state,
  837|   522k|                                             table_lookup)
  838|       |
  839|   522k|                                if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_9)
  ------------------
  |  |  114|   522k|#define ONE_RIGHT_SHIFTED_BY_9    1<<9
  ------------------
  |  Branch (839:36): [True: 10.2k, False: 512k]
  ------------------
  840|  10.2k|                                {
  841|       |
  842|  10.2k|                                    RENORM_RANGE_OFFSET(u4_code_int_range,
  ------------------
  |  |  170|  10.2k|  {                                                                                         \
  |  |  171|  10.2k|    UWORD32 read_bits_m,u4_clz_m  ;                                                         \
  |  |  172|  10.2k|    u4_clz_m = CLZ(u4_codeIntRange_m);                                                  \
  |  |  173|  10.2k|    NEXTBITS(read_bits_m,(u4_offset_m+23),pu4_buffer_m,u4_clz_m)                            \
  |  |  ------------------
  |  |  |  |  137|  10.2k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  |  |  138|  10.2k|{                                                                           \
  |  |  |  |  139|  10.2k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  140|  10.2k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  141|  10.2k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  142|  10.2k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  143|  10.2k|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (143:8): [True: 9.68k, False: 519]
  |  |  |  |  ------------------
  |  |  |  |  144|  10.2k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  9.68k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  145|  10.2k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  10.2k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  146|  10.2k|}
  |  |  ------------------
  |  |  174|  10.2k|    FLUSHBITS(u4_offset_m,(u4_clz_m))                                                       \
  |  |  ------------------
  |  |  |  |  193|  10.2k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  |  |  194|  10.2k|{                                                                           \
  |  |  |  |  195|  10.2k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  |  |  196|  10.2k|}
  |  |  ------------------
  |  |  175|  10.2k|    u4_codeIntRange_m = u4_codeIntRange_m << u4_clz_m;                                      \
  |  |  176|  10.2k|    u4_codeIntValOffset_m = (u4_codeIntValOffset_m << u4_clz_m) | read_bits_m;              \
  |  |  177|  10.2k|  }
  ------------------
  843|  10.2k|                                                        u4_code_int_val_ofst,
  844|  10.2k|                                                        u4_offset, pu4_buffer)
  845|  10.2k|                                }
  846|       |
  847|   522k|                                ps_bin_ctxt->u1_mps_state = u1_mps_state;
  848|   522k|                            }
  849|       |
  850|   522k|                            u4_value++;
  851|   522k|                            ps_bin_ctxt = p_ctxt_abs_level + (u4_ctx_inc >> 4);
  852|       |
  853|   522k|                        }
  854|   522k|                        while(u4_symbol && (u4_value < UCOFF_LEVEL));
  ------------------
  |  |   41|   181k|#define UCOFF_LEVEL  14
  ------------------
  |  Branch (854:31): [True: 181k, False: 340k]
  |  Branch (854:44): [True: 173k, False: 8.63k]
  ------------------
  855|       |
  856|   349k|                        ui_prefix = u4_value - 1 + u4_symbol;
  857|   349k|                    }
  858|       |
  859|   349k|                    if(ui_prefix == UCOFF_LEVEL)
  ------------------
  |  |   41|   349k|#define UCOFF_LEVEL  14
  ------------------
  |  Branch (859:24): [True: 8.63k, False: 340k]
  ------------------
  860|  8.63k|                    {
  861|  8.63k|                        UWORD32 ui16_sufS = 0;
  862|  8.63k|                        UWORD32 u1_max_bins;
  863|  8.63k|                        UWORD32 u4_value;
  864|       |
  865|  8.63k|                        i2_abs_lvl = UCOFF_LEVEL;
  ------------------
  |  |   41|  8.63k|#define UCOFF_LEVEL  14
  ------------------
  866|       |                        /*inlining ih264d_decode_bypass_bins_unary begins*/
  867|       |
  868|  8.63k|                        {
  869|  8.63k|                            UWORD32 uc_bin;
  870|  8.63k|                            UWORD32 bits_to_flush;
  871|       |
  872|       |
  873|  8.63k|                            bits_to_flush = 0;
  874|       |                            /*renormalize to ensure there 23 bits more in the u4_code_int_val_ofst*/
  875|  8.63k|                            {
  876|  8.63k|                                UWORD32 u4_clz, read_bits;
  877|       |
  878|  8.63k|                                u4_clz = CLZ(u4_code_int_range);
  879|  8.63k|                                FLUSHBITS(u4_offset, u4_clz)
  ------------------
  |  |  193|  8.63k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  194|  8.63k|{                                                                           \
  |  |  195|  8.63k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  196|  8.63k|}
  ------------------
  880|  8.63k|                                NEXTBITS(read_bits, u4_offset, pu4_buffer, CABAC_BITS_TO_READ)
  ------------------
  |  |  137|  8.63k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  138|  8.63k|{                                                                           \
  |  |  139|  8.63k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  140|  8.63k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  141|  8.63k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  142|  8.63k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  143|  8.63k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (143:8): [True: 8.32k, False: 304]
  |  |  ------------------
  |  |  144|  8.63k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  8.32k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  145|  8.63k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  8.63k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  146|  8.63k|}
  ------------------
  881|  8.63k|                                u4_code_int_range = u4_code_int_range << u4_clz;
  882|  8.63k|                                u4_code_int_val_ofst = (u4_code_int_val_ofst
  883|  8.63k|                                                << u4_clz) | read_bits;
  884|  8.63k|                            }
  885|       |
  886|  8.63k|                            do
  887|  14.3k|                            {
  888|  14.3k|                                bits_to_flush++;
  889|       |
  890|  14.3k|                                u4_code_int_range = u4_code_int_range >> 1;
  891|       |
  892|  14.3k|                                if(u4_code_int_val_ofst >= u4_code_int_range)
  ------------------
  |  Branch (892:36): [True: 5.78k, False: 8.60k]
  ------------------
  893|  5.78k|                                {
  894|       |                                    /* S=1 */
  895|  5.78k|                                    uc_bin = 1;
  896|  5.78k|                                    u4_code_int_val_ofst -= u4_code_int_range;
  897|  5.78k|                                }
  898|  8.60k|                                else
  899|  8.60k|                                {
  900|       |                                    /* S=0 */
  901|  8.60k|                                    uc_bin = 0;
  902|  8.60k|                                }
  903|       |
  904|  14.3k|                            }
  905|  14.3k|                            while(uc_bin && (bits_to_flush < CABAC_BITS_TO_READ));
  ------------------
  |  |  618|  5.78k|#define CABAC_BITS_TO_READ 23
  ------------------
  |  Branch (905:35): [True: 5.78k, False: 8.60k]
  |  Branch (905:45): [True: 5.75k, False: 29]
  ------------------
  906|       |
  907|  8.63k|                            u4_value = (bits_to_flush - 1);
  908|  8.63k|                        }
  909|       |                        /*inlining ih264d_decode_bypass_bins_unary ends*/
  910|       |
  911|  8.63k|                        ui16_sufS = (1 << u4_value);
  912|  8.63k|                        u1_max_bins = u4_value;
  913|       |
  914|  8.63k|                        if(u4_value > 0)
  ------------------
  |  Branch (914:28): [True: 1.18k, False: 7.44k]
  ------------------
  915|  1.18k|                        {
  916|       |                            /*inline bypassbins_flc begins*/
  917|       |
  918|  1.18k|                            if(u4_value > 10)
  ------------------
  |  Branch (918:32): [True: 208, False: 977]
  ------------------
  919|    208|                            {
  920|    208|                                UWORD32 u4_clz, read_bits;
  921|       |
  922|    208|                                u4_clz = CLZ(u4_code_int_range);
  923|    208|                                FLUSHBITS(u4_offset, u4_clz)
  ------------------
  |  |  193|    208|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  194|    208|{                                                                           \
  |  |  195|    208|        (u4_offset) += (u4_no_bits);                                        \
  |  |  196|    208|}
  ------------------
  924|    208|                                NEXTBITS(read_bits, u4_offset, pu4_buffer, CABAC_BITS_TO_READ)
  ------------------
  |  |  137|    208|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  138|    208|{                                                                           \
  |  |  139|    208|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  140|    208|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  141|    208|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  142|    208|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  143|    208|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (143:8): [True: 141, False: 67]
  |  |  ------------------
  |  |  144|    208|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|    141|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  145|    208|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|    208|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  146|    208|}
  ------------------
  925|    208|                                u4_code_int_range = u4_code_int_range << u4_clz;
  926|    208|                                u4_code_int_val_ofst = (u4_code_int_val_ofst
  927|    208|                                                << u4_clz) | read_bits;
  928|    208|                            }
  929|       |
  930|  1.18k|                            {
  931|  1.18k|                                UWORD32 ui_bins;
  932|  1.18k|                                UWORD32 uc_bin;
  933|  1.18k|                                UWORD32 bits_to_flush;
  934|       |
  935|  1.18k|                                ui_bins = 0;
  936|  1.18k|                                bits_to_flush = 0;
  937|       |
  938|  1.18k|                                do
  939|  5.75k|                                {
  940|  5.75k|                                    bits_to_flush++;
  941|       |
  942|  5.75k|                                    u4_code_int_range = u4_code_int_range >> 1;
  943|       |
  944|  5.75k|                                    if(u4_code_int_val_ofst
  ------------------
  |  Branch (944:40): [True: 2.36k, False: 3.39k]
  ------------------
  945|  5.75k|                                                    >= u4_code_int_range)
  946|  2.36k|                                    {
  947|       |                                        /* S=1 */
  948|  2.36k|                                        uc_bin = 1;
  949|  2.36k|                                        u4_code_int_val_ofst -=
  950|  2.36k|                                                        u4_code_int_range;
  951|  2.36k|                                    }
  952|  3.39k|                                    else
  953|  3.39k|                                    {
  954|       |                                        /* S=0 */
  955|  3.39k|                                        uc_bin = 0;
  956|  3.39k|                                    }
  957|       |
  958|  5.75k|                                    ui_bins = ((ui_bins << 1) | uc_bin);
  959|       |
  960|  5.75k|                                }
  961|  5.75k|                                while(bits_to_flush < u1_max_bins);
  ------------------
  |  Branch (961:39): [True: 4.56k, False: 1.18k]
  ------------------
  962|       |
  963|  1.18k|                                u4_value = ui_bins;
  964|  1.18k|                            }
  965|       |                            /*inline bypassbins_flc ends*/
  966|  1.18k|                        }
  967|       |
  968|       |                        //Value of K
  969|  8.63k|                        ui16_sufS += u4_value;
  970|  8.63k|                        i2_abs_lvl += (WORD32)ui16_sufS;
  971|  8.63k|                    }
  972|   340k|                    else
  973|   340k|                    {
  974|   340k|                        i2_abs_lvl = 1 + ui_prefix;
  975|   340k|                    }
  976|       |
  977|   349k|                    if(i2_abs_lvl > 1)
  ------------------
  |  Branch (977:24): [True: 39.5k, False: 309k]
  ------------------
  978|  39.5k|                    {
  979|  39.5k|                        u1_abs_level_gt1++;
  980|  39.5k|                    }
  981|   349k|                    if(!u1_abs_level_gt1)
  ------------------
  |  Branch (981:24): [True: 288k, False: 60.3k]
  ------------------
  982|   288k|                    {
  983|   288k|                        u1_abs_level_equal1++;
  984|   288k|                        u4_ctx_inc = (5 << 4) + MIN(u1_abs_level_equal1, 4);
  ------------------
  |  |   61|   288k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 190k, False: 98.0k]
  |  |  ------------------
  ------------------
  985|   288k|                    }
  986|  60.3k|                    else
  987|  60.3k|                    {
  988|  60.3k|                        u4_ctx_inc = (5 + MIN(u1_abs_level_gt1, 4)) << 4;
  ------------------
  |  |   61|  60.3k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 40.2k, False: 20.1k]
  |  |  ------------------
  ------------------
  989|  60.3k|                    }
  990|       |
  991|       |                    /*u4_ctx_inc = g_table_temp[u1_abs_level_gt1][u1_abs_level_equal1];*/
  992|       |
  993|       |                    /* encode coeff_sign_flag[i] */
  994|       |
  995|   349k|                    {
  996|   349k|                        u4_code_int_range = u4_code_int_range >> 1;
  997|       |
  998|   349k|                        if(u4_code_int_val_ofst >= (u4_code_int_range))
  ------------------
  |  Branch (998:28): [True: 90.9k, False: 258k]
  ------------------
  999|  90.9k|                        {
 1000|       |                            /* S=1 */
 1001|  90.9k|                            u4_code_int_val_ofst -= u4_code_int_range;
 1002|  90.9k|                            i2_abs_lvl = (-i2_abs_lvl);
 1003|  90.9k|                        }
 1004|   349k|                    }
 1005|       |
 1006|   349k|                    *pi2_coeff_data++ = i2_abs_lvl;
 1007|   349k|                    num_sig_coeffs--;
 1008|   349k|                }
 1009|   349k|            }
 1010|   349k|            while(num_sig_coeffs > 0);
  ------------------
  |  Branch (1010:19): [True: 230k, False: 118k]
  ------------------
 1011|   118k|        }
 1012|   118k|    }
 1013|       |
 1014|      0|    {
 1015|   118k|        WORD32 offset;
 1016|   118k|        offset = (UWORD8 *)pi2_coeff_data - (UWORD8 *)ps_tu_8x8;
 1017|   118k|        offset = ALIGN4(offset);
  ------------------
  |  |   52|   118k|#define ALIGN4(x)   ((((x) + 3) >> 2) << 2)
  ------------------
 1018|   118k|        ps_dec->pv_parse_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_parse_tu_coeff_data + offset);
 1019|   118k|    }
 1020|       |
 1021|       |    /*updating structures*/
 1022|   118k|    ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst;
 1023|   118k|    ps_cab_env->u4_code_int_range = u4_code_int_range;
 1024|   118k|    ps_bitstrm->u4_ofst = u4_offset;
 1025|   118k|}
ih264d_cabac_parse_8x8block:
 1072|   241k|{
 1073|   241k|    UWORD32 u4_ctxinc, u4_subblock_coded;
 1074|   241k|    UWORD32 u4_top0, u4_top1;
 1075|   241k|    UWORD32 u4_csbp = 0;
 1076|   241k|    UWORD32 u4_idx = 0;
 1077|   241k|    dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm;
 1078|   241k|    bin_ctxt_model_t * const ps_cbf = ps_dec->p_cbf_t[u4_ctx_cat];
 1079|   241k|    bin_ctxt_model_t *ps_src_bin_ctxt;
 1080|   241k|    bin_ctxt_model_t * const ps_sig_coeff_flag =
 1081|   241k|                    ps_dec->p_significant_coeff_flag_t[u4_ctx_cat];
 1082|       |
 1083|   241k|    UWORD8 *pu1_inv_scan = ps_dec->pu1_inv_scan;
 1084|       |
 1085|       |    /*------------------------------------------------------*/
 1086|       |    /* Residual 4x4 decoding: SubBlock 0                    */
 1087|       |    /*------------------------------------------------------*/
 1088|   241k|    u4_ctxinc = ((!!pu1_top_nnz[0]) << 1) + (!!pu1_left_nnz[0]);
 1089|       |
 1090|   241k|    ps_src_bin_ctxt = ps_cbf + u4_ctxinc;
 1091|       |
 1092|   241k|    u4_top0 = ih264d_read_coeff4x4_cabac( ps_bitstrm,
 1093|   241k|                                         u4_ctx_cat, ps_sig_coeff_flag, ps_dec,
 1094|   241k|                                         ps_src_bin_ctxt);
 1095|       |
 1096|   241k|    INSERT_BIT(u4_csbp, u4_idx, u4_top0);
  ------------------
  |  |  109|   241k|#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); }
  |  |  ------------------
  |  |  |  |  105|   241k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  |  |  ------------------
  ------------------
 1097|       |
 1098|       |    /*------------------------------------------------------*/
 1099|       |    /* Residual 4x4 decoding: SubBlock 1                    */
 1100|       |    /*------------------------------------------------------*/
 1101|   241k|    u4_idx++;
 1102|   241k|    pi2_coeff_block += NUM_COEFFS_IN_4x4BLK;
  ------------------
  |  |  617|   241k|#define NUM_COEFFS_IN_4x4BLK 16
  ------------------
 1103|   241k|    u4_ctxinc = ((!!pu1_top_nnz[1]) << 1) + u4_top0;
 1104|       |
 1105|   241k|    ps_src_bin_ctxt = ps_cbf + u4_ctxinc;
 1106|       |
 1107|   241k|    u4_top1 = ih264d_read_coeff4x4_cabac(ps_bitstrm,
 1108|   241k|                                         u4_ctx_cat, ps_sig_coeff_flag, ps_dec,
 1109|   241k|                                         ps_src_bin_ctxt);
 1110|       |
 1111|   241k|    INSERT_BIT(u4_csbp, u4_idx, u4_top1);
  ------------------
  |  |  109|   241k|#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); }
  |  |  ------------------
  |  |  |  |  105|   241k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  |  |  ------------------
  ------------------
 1112|   241k|    pu1_left_nnz[0] = u4_top1;
 1113|       |
 1114|       |    /*------------------------------------------------------*/
 1115|       |    /* Residual 4x4 decoding: SubBlock 2                    */
 1116|       |    /*------------------------------------------------------*/
 1117|   241k|    u4_idx += (u4_sub_block_strd - 1);
 1118|   241k|    pi2_coeff_block += ((u4_sub_block_strd - 1) * NUM_COEFFS_IN_4x4BLK);
  ------------------
  |  |  617|   241k|#define NUM_COEFFS_IN_4x4BLK 16
  ------------------
 1119|   241k|    u4_ctxinc = (u4_top0 << 1) + (!!pu1_left_nnz[1]);
 1120|       |
 1121|   241k|    ps_src_bin_ctxt = ps_cbf + u4_ctxinc;
 1122|       |
 1123|   241k|    u4_subblock_coded = ih264d_read_coeff4x4_cabac(ps_bitstrm, u4_ctx_cat,
 1124|   241k|                                                   ps_sig_coeff_flag, ps_dec,
 1125|   241k|                                                   ps_src_bin_ctxt);
 1126|       |
 1127|   241k|    INSERT_BIT(u4_csbp, u4_idx, u4_subblock_coded);
  ------------------
  |  |  109|   241k|#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); }
  |  |  ------------------
  |  |  |  |  105|   241k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  |  |  ------------------
  ------------------
 1128|   241k|    pu1_top_nnz[0] = u4_subblock_coded;
 1129|       |
 1130|       |    /*------------------------------------------------------*/
 1131|       |    /* Residual 4x4 decoding: SubBlock 3                    */
 1132|       |    /*------------------------------------------------------*/
 1133|   241k|    u4_idx++;
 1134|   241k|    pi2_coeff_block += NUM_COEFFS_IN_4x4BLK;
  ------------------
  |  |  617|   241k|#define NUM_COEFFS_IN_4x4BLK 16
  ------------------
 1135|   241k|    u4_ctxinc = (u4_top1 << 1) + u4_subblock_coded;
 1136|       |
 1137|   241k|    ps_src_bin_ctxt = ps_cbf + u4_ctxinc;
 1138|       |
 1139|   241k|    u4_subblock_coded = ih264d_read_coeff4x4_cabac(ps_bitstrm, u4_ctx_cat,
 1140|   241k|                                                   ps_sig_coeff_flag, ps_dec,
 1141|   241k|                                                   ps_src_bin_ctxt);
 1142|       |
 1143|   241k|    INSERT_BIT(u4_csbp, u4_idx, u4_subblock_coded);
  ------------------
  |  |  109|   241k|#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); }
  |  |  ------------------
  |  |  |  |  105|   241k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  |  |  ------------------
  ------------------
 1144|   241k|    pu1_top_nnz[1] = pu1_left_nnz[1] = u4_subblock_coded;
 1145|       |
 1146|   241k|    return (u4_csbp);
 1147|   241k|}
ih264d_parse_residual4x4_cabac:
 1164|   372k|{
 1165|   372k|    UWORD8 u1_cbp = ps_cur_mb_info->u1_cbp;
 1166|   372k|    UWORD16 ui16_csbp = 0;
 1167|   372k|    WORD16 *pi2_residual_buf;
 1168|   372k|    UWORD8 uc_ctx_cat;
 1169|   372k|    UWORD8 *pu1_top_nnz = ps_cur_mb_info->ps_curmb->pu1_nnz_y;
 1170|   372k|    UWORD8 *pu1_left_nnz = ps_dec->pu1_left_nnz_y;
 1171|   372k|    UWORD8 *pu1_top_nnz_uv = ps_cur_mb_info->ps_curmb->pu1_nnz_uv;
 1172|   372k|    ctxt_inc_mb_info_t *p_curr_ctxt = ps_dec->ps_curr_ctxt_mb_info;
 1173|   372k|    ctxt_inc_mb_info_t *ps_top_ctxt = ps_dec->p_top_ctxt_mb_info;
 1174|   372k|    dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm;
 1175|   372k|    UWORD32 u4_nbr_avail = ps_dec->u1_mb_ngbr_availablity;
 1176|   372k|    WORD16 *pi2_coeff_block = NULL;
 1177|   372k|    bin_ctxt_model_t *ps_src_bin_ctxt;
 1178|       |
 1179|   372k|    UWORD8 u1_top_dc_csbp = (ps_top_ctxt->u1_yuv_dc_csbp) >> 1;
 1180|   372k|    UWORD8 u1_left_dc_csbp = (ps_dec->pu1_left_yuv_dc_csbp[0]) >> 1;
 1181|       |
 1182|       |
 1183|   372k|    if(!(u4_nbr_avail & TOP_MB_AVAILABLE_MASK))
  ------------------
  |  |   55|   372k|#define TOP_MB_AVAILABLE_MASK       0x04
  ------------------
  |  Branch (1183:8): [True: 58.7k, False: 313k]
  ------------------
 1184|  58.7k|    {
 1185|  58.7k|        if(p_curr_ctxt->u1_mb_type & CAB_INTRA_MASK)
  ------------------
  |  |  406|  58.7k|#define CAB_INTRA_MASK    0x04 /* 0000 0100 */
  ------------------
  |  Branch (1185:12): [True: 35.4k, False: 23.3k]
  ------------------
 1186|  35.4k|        {
 1187|  35.4k|            *(UWORD32 *)pu1_top_nnz = 0;
 1188|  35.4k|            u1_top_dc_csbp = 0;
 1189|  35.4k|            *(UWORD32 *)pu1_top_nnz_uv = 0;
 1190|  35.4k|        }
 1191|  23.3k|        else
 1192|  23.3k|        {
 1193|  23.3k|            *(UWORD32 *)pu1_top_nnz = 0x01010101;
 1194|  23.3k|            u1_top_dc_csbp = 0x3;
 1195|  23.3k|            *(UWORD32 *)pu1_top_nnz_uv = 0x01010101;
 1196|  23.3k|        }
 1197|  58.7k|    }
 1198|   313k|    else
 1199|   313k|    {
 1200|   313k|        UWORD32 *pu4_buf;
 1201|   313k|        UWORD8 *pu1_buf;
 1202|   313k|        pu1_buf = ps_cur_mb_info->ps_top_mb->pu1_nnz_y;
 1203|   313k|        pu4_buf = (UWORD32 *)pu1_buf;
 1204|   313k|        *(UWORD32 *)(pu1_top_nnz) = *pu4_buf;
 1205|       |
 1206|   313k|        pu1_buf = ps_cur_mb_info->ps_top_mb->pu1_nnz_uv;
 1207|   313k|        pu4_buf = (UWORD32 *)pu1_buf;
 1208|   313k|        *(UWORD32 *)(pu1_top_nnz_uv) = *pu4_buf;
 1209|       |
 1210|   313k|    }
 1211|       |
 1212|   372k|    if(!(u4_nbr_avail & LEFT_MB_AVAILABLE_MASK))
  ------------------
  |  |   53|   372k|#define LEFT_MB_AVAILABLE_MASK      0x01
  ------------------
  |  Branch (1212:8): [True: 75.1k, False: 297k]
  ------------------
 1213|  75.1k|    {
 1214|  75.1k|        if(p_curr_ctxt->u1_mb_type & CAB_INTRA_MASK)
  ------------------
  |  |  406|  75.1k|#define CAB_INTRA_MASK    0x04 /* 0000 0100 */
  ------------------
  |  Branch (1214:12): [True: 57.3k, False: 17.7k]
  ------------------
 1215|  57.3k|        {
 1216|  57.3k|            UWORD32 *pu4_buf;
 1217|  57.3k|            UWORD8 *pu1_buf;
 1218|  57.3k|            *(UWORD32 *)pu1_left_nnz = 0;
 1219|  57.3k|            u1_left_dc_csbp = 0;
 1220|  57.3k|            pu1_buf = ps_dec->pu1_left_nnz_uv;
 1221|  57.3k|            pu4_buf = (UWORD32 *)pu1_buf;
 1222|  57.3k|            *pu4_buf = 0;
 1223|  57.3k|        }
 1224|  17.7k|        else
 1225|  17.7k|        {
 1226|  17.7k|            UWORD32 *pu4_buf;
 1227|  17.7k|            UWORD8 *pu1_buf;
 1228|  17.7k|            *(UWORD32 *)pu1_left_nnz = 0x01010101;
 1229|  17.7k|            u1_left_dc_csbp = 0x3;
 1230|  17.7k|            pu1_buf = ps_dec->pu1_left_nnz_uv;
 1231|  17.7k|            pu4_buf = (UWORD32 *)pu1_buf;
 1232|  17.7k|            *pu4_buf = 0x01010101;
 1233|  17.7k|        }
 1234|  75.1k|    }
 1235|       |
 1236|   372k|    uc_ctx_cat = u1_offset ? LUMA_AC_CTXCAT : LUMA_4X4_CTXCAT;
  ------------------
  |  |   72|  43.9k|#define LUMA_AC_CTXCAT    1
  ------------------
                  uc_ctx_cat = u1_offset ? LUMA_AC_CTXCAT : LUMA_4X4_CTXCAT;
  ------------------
  |  |   73|   701k|#define LUMA_4X4_CTXCAT   2
  ------------------
  |  Branch (1236:18): [True: 43.9k, False: 328k]
  ------------------
 1237|       |
 1238|   372k|    ps_cur_mb_info->u1_qp_div6 = ps_dec->u1_qp_y_div6;
 1239|   372k|    ps_cur_mb_info->u1_qpc_div6 = ps_dec->u1_qp_u_div6;
 1240|   372k|    ps_cur_mb_info->u1_qp_rem6 = ps_dec->u1_qp_y_rem6;
 1241|   372k|    ps_cur_mb_info->u1_qpc_rem6 = ps_dec->u1_qp_u_rem6;
 1242|       |    // CHECK_THIS
 1243|   372k|    ps_cur_mb_info->u1_qpcr_div6 = ps_dec->u1_qp_v_div6;
 1244|   372k|    ps_cur_mb_info->u1_qpcr_rem6 = ps_dec->u1_qp_v_rem6;
 1245|       |
 1246|   372k|    if(u1_cbp & 0x0f)
  ------------------
  |  Branch (1246:8): [True: 116k, False: 255k]
  ------------------
 1247|   116k|    {
 1248|   116k|        if(ps_cur_mb_info->u1_tran_form8x8 == 0)
  ------------------
  |  Branch (1248:12): [True: 76.5k, False: 40.3k]
  ------------------
 1249|  76.5k|        {
 1250|       |            /*******************************************************************/
 1251|       |            /* Block 0 residual decoding, check cbp and proceed (subblock = 0) */
 1252|       |            /*******************************************************************/
 1253|  76.5k|            if(!(u1_cbp & 0x1))
  ------------------
  |  Branch (1253:16): [True: 27.4k, False: 49.0k]
  ------------------
 1254|  27.4k|            {
 1255|  27.4k|                *(UWORD16 *)(pu1_top_nnz) = 0;
 1256|  27.4k|                *(UWORD16 *)(pu1_left_nnz) = 0;
 1257|  27.4k|            }
 1258|  49.0k|            else
 1259|  49.0k|            {
 1260|  49.0k|                ui16_csbp = ih264d_cabac_parse_8x8block(pi2_coeff_block, 4,
 1261|  49.0k|                                                        uc_ctx_cat, ps_dec,
 1262|  49.0k|                                                        pu1_top_nnz,
 1263|  49.0k|                                                        pu1_left_nnz);
 1264|  49.0k|            }
 1265|       |
 1266|       |            /*******************************************************************/
 1267|       |            /* Block 1 residual decoding, check cbp and proceed (subblock = 2) */
 1268|       |            /*******************************************************************/
 1269|  76.5k|            pi2_coeff_block += (2 * NUM_COEFFS_IN_4x4BLK);
  ------------------
  |  |  617|  76.5k|#define NUM_COEFFS_IN_4x4BLK 16
  ------------------
 1270|  76.5k|            if(!(u1_cbp & 0x2))
  ------------------
  |  Branch (1270:16): [True: 30.8k, False: 45.6k]
  ------------------
 1271|  30.8k|            {
 1272|  30.8k|                *(UWORD16 *)(pu1_top_nnz + 2) = 0;
 1273|  30.8k|                *(UWORD16 *)(pu1_left_nnz) = 0;
 1274|  30.8k|            }
 1275|  45.6k|            else
 1276|  45.6k|            {
 1277|  45.6k|                UWORD32 u4_temp = ih264d_cabac_parse_8x8block(pi2_coeff_block,
 1278|  45.6k|                                                              4, uc_ctx_cat,
 1279|  45.6k|                                                              ps_dec,
 1280|  45.6k|                                                              (pu1_top_nnz + 2),
 1281|  45.6k|                                                              pu1_left_nnz);
 1282|  45.6k|                ui16_csbp |= (u4_temp << 2);
 1283|  45.6k|            }
 1284|       |
 1285|       |            /*******************************************************************/
 1286|       |            /* Block 2 residual decoding, check cbp and proceed (subblock = 8) */
 1287|       |            /*******************************************************************/
 1288|  76.5k|            pi2_coeff_block += (6 * NUM_COEFFS_IN_4x4BLK);
  ------------------
  |  |  617|  76.5k|#define NUM_COEFFS_IN_4x4BLK 16
  ------------------
 1289|  76.5k|            if(!(u1_cbp & 0x4))
  ------------------
  |  Branch (1289:16): [True: 21.0k, False: 55.4k]
  ------------------
 1290|  21.0k|            {
 1291|  21.0k|                *(UWORD16 *)(pu1_top_nnz) = 0;
 1292|  21.0k|                *(UWORD16 *)(pu1_left_nnz + 2) = 0;
 1293|  21.0k|            }
 1294|  55.4k|            else
 1295|  55.4k|            {
 1296|  55.4k|                UWORD32 u4_temp = ih264d_cabac_parse_8x8block(
 1297|  55.4k|                                pi2_coeff_block, 4, uc_ctx_cat, ps_dec,
 1298|  55.4k|                                pu1_top_nnz, (pu1_left_nnz + 2));
 1299|  55.4k|                ui16_csbp |= (u4_temp << 8);
 1300|  55.4k|            }
 1301|       |
 1302|       |            /*******************************************************************/
 1303|       |            /* Block 3 residual decoding, check cbp and proceed (subblock = 10)*/
 1304|       |            /*******************************************************************/
 1305|  76.5k|            pi2_coeff_block += (2 * NUM_COEFFS_IN_4x4BLK);
  ------------------
  |  |  617|  76.5k|#define NUM_COEFFS_IN_4x4BLK 16
  ------------------
 1306|  76.5k|            if(!(u1_cbp & 0x8))
  ------------------
  |  Branch (1306:16): [True: 25.0k, False: 51.4k]
  ------------------
 1307|  25.0k|            {
 1308|  25.0k|                *(UWORD16 *)(pu1_top_nnz + 2) = 0;
 1309|  25.0k|                *(UWORD16 *)(pu1_left_nnz + 2) = 0;
 1310|  25.0k|            }
 1311|  51.4k|            else
 1312|  51.4k|            {
 1313|  51.4k|                UWORD32 u4_temp = ih264d_cabac_parse_8x8block(
 1314|  51.4k|                                pi2_coeff_block, 4, uc_ctx_cat, ps_dec,
 1315|  51.4k|                                (pu1_top_nnz + 2), (pu1_left_nnz + 2));
 1316|  51.4k|                ui16_csbp |= (u4_temp << 10);
 1317|  51.4k|            }
 1318|       |
 1319|  76.5k|        }
 1320|  40.3k|        else
 1321|  40.3k|        {
 1322|  40.3k|            ui16_csbp = 0;
 1323|       |
 1324|       |            /*******************************************************************/
 1325|       |            /* Block 0 residual decoding, check cbp and proceed (subblock = 0) */
 1326|       |            /*******************************************************************/
 1327|  40.3k|            if(!(u1_cbp & 0x1))
  ------------------
  |  Branch (1327:16): [True: 9.51k, False: 30.8k]
  ------------------
 1328|  9.51k|            {
 1329|  9.51k|                *(UWORD16 *)(pu1_top_nnz) = 0;
 1330|  9.51k|                *(UWORD16 *)(pu1_left_nnz) = 0;
 1331|  9.51k|            }
 1332|  30.8k|            else
 1333|  30.8k|            {
 1334|       |
 1335|  30.8k|                dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm;
 1336|       |
 1337|  30.8k|                ih264d_read_coeff8x8_cabac( ps_bitstrm,
 1338|  30.8k|                                           ps_dec, ps_cur_mb_info);
 1339|       |
 1340|  30.8k|                pu1_left_nnz[0] = 1;
 1341|  30.8k|                pu1_left_nnz[1] = 1;
 1342|       |
 1343|  30.8k|                pu1_top_nnz[0] = 1;
 1344|  30.8k|                pu1_top_nnz[1] = 1;
 1345|       |
 1346|       |                /* added to be used by BS computation module */
 1347|  30.8k|                ui16_csbp |= 0x0033;
 1348|  30.8k|            }
 1349|       |
 1350|       |            /*******************************************************************/
 1351|       |            /* Block 1 residual decoding, check cbp and proceed (subblock = 2) */
 1352|       |            /*******************************************************************/
 1353|  40.3k|            pi2_coeff_block += 64;
 1354|       |
 1355|  40.3k|            if(!(u1_cbp & 0x2))
  ------------------
  |  Branch (1355:16): [True: 10.7k, False: 29.5k]
  ------------------
 1356|  10.7k|            {
 1357|  10.7k|                *(UWORD16 *)(pu1_top_nnz + 2) = 0;
 1358|  10.7k|                *(UWORD16 *)(pu1_left_nnz) = 0;
 1359|  10.7k|            }
 1360|  29.5k|            else
 1361|  29.5k|            {
 1362|       |
 1363|       |
 1364|  29.5k|                dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm;
 1365|       |
 1366|  29.5k|                ih264d_read_coeff8x8_cabac(ps_bitstrm,
 1367|  29.5k|                                           ps_dec, ps_cur_mb_info);
 1368|       |
 1369|  29.5k|                pu1_left_nnz[0] = 1;
 1370|  29.5k|                pu1_left_nnz[1] = 1;
 1371|       |
 1372|  29.5k|                pu1_top_nnz[2] = 1;
 1373|  29.5k|                pu1_top_nnz[3] = 1;
 1374|       |
 1375|       |                /* added to be used by BS computation module */
 1376|  29.5k|                ui16_csbp |= 0x00CC;
 1377|       |
 1378|  29.5k|            }
 1379|       |
 1380|       |            /*******************************************************************/
 1381|       |            /* Block 2 residual decoding, check cbp and proceed (subblock = 8) */
 1382|       |            /*******************************************************************/
 1383|  40.3k|            pi2_coeff_block += 64;
 1384|  40.3k|            if(!(u1_cbp & 0x4))
  ------------------
  |  Branch (1384:16): [True: 9.50k, False: 30.8k]
  ------------------
 1385|  9.50k|            {
 1386|  9.50k|                *(UWORD16 *)(pu1_top_nnz) = 0;
 1387|  9.50k|                *(UWORD16 *)(pu1_left_nnz + 2) = 0;
 1388|  9.50k|            }
 1389|  30.8k|            else
 1390|  30.8k|            {
 1391|       |
 1392|  30.8k|                dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm;
 1393|       |
 1394|  30.8k|                ih264d_read_coeff8x8_cabac(ps_bitstrm,
 1395|  30.8k|                                           ps_dec, ps_cur_mb_info);
 1396|       |
 1397|  30.8k|                pu1_left_nnz[2] = 1;
 1398|  30.8k|                pu1_left_nnz[3] = 1;
 1399|       |
 1400|  30.8k|                pu1_top_nnz[0] = 1;
 1401|  30.8k|                pu1_top_nnz[1] = 1;
 1402|       |
 1403|       |                /* added to be used by BS computation module */
 1404|  30.8k|                ui16_csbp |= 0x3300;
 1405|  30.8k|            }
 1406|       |
 1407|       |            /*******************************************************************/
 1408|       |            /* Block 3 residual decoding, check cbp and proceed (subblock = 10)*/
 1409|       |            /*******************************************************************/
 1410|  40.3k|            pi2_coeff_block += 64;
 1411|       |
 1412|  40.3k|            if(!(u1_cbp & 0x8))
  ------------------
  |  Branch (1412:16): [True: 12.5k, False: 27.7k]
  ------------------
 1413|  12.5k|            {
 1414|  12.5k|                *(UWORD16 *)(pu1_top_nnz + 2) = 0;
 1415|  12.5k|                *(UWORD16 *)(pu1_left_nnz + 2) = 0;
 1416|  12.5k|            }
 1417|  27.7k|            else
 1418|  27.7k|            {
 1419|       |
 1420|  27.7k|                dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm;
 1421|       |
 1422|  27.7k|                ih264d_read_coeff8x8_cabac(ps_bitstrm,
 1423|  27.7k|                                           ps_dec, ps_cur_mb_info);
 1424|       |
 1425|  27.7k|                pu1_left_nnz[2] = 1;
 1426|  27.7k|                pu1_left_nnz[3] = 1;
 1427|       |
 1428|  27.7k|                pu1_top_nnz[2] = 1;
 1429|  27.7k|                pu1_top_nnz[3] = 1;
 1430|       |
 1431|       |                /* added to be used by BS computation module */
 1432|  27.7k|                ui16_csbp |= 0xCC00;
 1433|  27.7k|            }
 1434|  40.3k|        }
 1435|   116k|    }
 1436|   255k|    else
 1437|   255k|    {
 1438|   255k|        *(UWORD32 *)(pu1_top_nnz) = 0;
 1439|   255k|        *(UWORD32 *)(pu1_left_nnz) = 0;
 1440|   255k|    }
 1441|       |    /*--------------------------------------------------------------------*/
 1442|       |    /* Store the last row of N values to top row                          */
 1443|       |    /*--------------------------------------------------------------------*/
 1444|   372k|    ps_cur_mb_info->u2_luma_csbp = ui16_csbp;
 1445|   372k|    ps_cur_mb_info->ps_curmb->u2_luma_csbp = ui16_csbp;
 1446|   372k|    {
 1447|   372k|        WORD8 i;
 1448|   372k|        UWORD16 u2_chroma_csbp = 0;
 1449|   372k|        ps_cur_mb_info->u2_chroma_csbp = 0;
 1450|       |
 1451|   372k|        u1_cbp >>= 4;
 1452|   372k|        pu1_top_nnz = pu1_top_nnz_uv;
 1453|   372k|        pu1_left_nnz = ps_dec->pu1_left_nnz_uv;
 1454|       |        /*--------------------------------------------------------------------*/
 1455|       |        /* if Chroma Component not present OR no ac values present            */
 1456|       |        /* Set the values of N to zero                                        */
 1457|       |        /*--------------------------------------------------------------------*/
 1458|   372k|        if(u1_cbp == CBPC_ALLZERO)
  ------------------
  |  |  507|   372k|#define CBPC_ALLZERO    0
  ------------------
  |  Branch (1458:12): [True: 329k, False: 43.4k]
  ------------------
 1459|   329k|        {
 1460|   329k|            ps_dec->pu1_left_yuv_dc_csbp[0] &= 0x1;
 1461|   329k|            *(UWORD32 *)(pu1_top_nnz) = 0;
 1462|   329k|            *(UWORD32 *)(pu1_left_nnz) = 0;
 1463|   329k|            p_curr_ctxt->u1_yuv_dc_csbp &= 0x1;
 1464|   329k|            return (0);
 1465|   329k|        }
 1466|       |
 1467|       |        /*--------------------------------------------------------------------*/
 1468|       |        /* Decode Chroma DC values                                            */
 1469|       |        /*--------------------------------------------------------------------*/
 1470|   130k|        for(i = 0; i < 2; i++)
  ------------------
  |  Branch (1470:20): [True: 86.9k, False: 43.4k]
  ------------------
 1471|  86.9k|        {
 1472|  86.9k|            UWORD8 uc_a = 1, uc_b = 1;
 1473|  86.9k|            UWORD32 u4_ctx_inc;
 1474|  86.9k|            UWORD8 uc_codedBlockFlag;
 1475|  86.9k|            UWORD8 pu1_inv_scan[4] =
 1476|  86.9k|                { 0, 1, 2, 3 };
 1477|  86.9k|            WORD32 u4_scale;
 1478|  86.9k|            WORD32 i4_mb_inter_inc;
 1479|  86.9k|            tu_sblk4x4_coeff_data_t *ps_tu_4x4 =
 1480|  86.9k|                            (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data;
 1481|  86.9k|            WORD16 *pi2_coeff_data =
 1482|  86.9k|                            (WORD16 *)ps_dec->pv_parse_tu_coeff_data;
 1483|  86.9k|            WORD16 ai2_dc_coef[4];
 1484|       |
 1485|  86.9k|            INC_SYM_COUNT(&(ps_dec->s_cab_dec_env));
 1486|  86.9k|            u4_scale = (i) ?
  ------------------
  |  Branch (1486:24): [True: 43.4k, False: 43.4k]
  ------------------
 1487|  43.4k|                            (ps_dec->pu2_quant_scale_v[0]
 1488|  43.4k|                                            << ps_dec->u1_qp_v_div6) :
 1489|  86.9k|                            (ps_dec->pu2_quant_scale_u[0]
 1490|  43.4k|                                            << ps_dec->u1_qp_u_div6);
 1491|       |
 1492|       |            /*--------------------------------------------------------------------*/
 1493|       |            /* Decode Bitstream to get the DC coeff                               */
 1494|       |            /*--------------------------------------------------------------------*/
 1495|  86.9k|            uc_a = (u1_left_dc_csbp >> i) & 0x01;
 1496|  86.9k|            uc_b = (u1_top_dc_csbp >> i) & 0x01;
 1497|  86.9k|            u4_ctx_inc = (uc_a + (uc_b << 1));
 1498|       |
 1499|  86.9k|            ps_src_bin_ctxt = (ps_dec->p_cbf_t[CHROMA_DC_CTXCAT]) + u4_ctx_inc;
  ------------------
  |  |   74|  86.9k|#define CHROMA_DC_CTXCAT  3
  ------------------
 1500|       |
 1501|  86.9k|            uc_codedBlockFlag =
 1502|  86.9k|                            ih264d_read_coeff4x4_cabac(ps_bitstrm,
 1503|  86.9k|                                            CHROMA_DC_CTXCAT,
  ------------------
  |  |   74|  86.9k|#define CHROMA_DC_CTXCAT  3
  ------------------
 1504|  86.9k|                                            ps_dec->p_significant_coeff_flag_t[CHROMA_DC_CTXCAT],
  ------------------
  |  |   74|  86.9k|#define CHROMA_DC_CTXCAT  3
  ------------------
 1505|  86.9k|                                            ps_dec, ps_src_bin_ctxt);
 1506|       |
 1507|  86.9k|            i4_mb_inter_inc = (!((ps_cur_mb_info->ps_curmb->u1_mb_type == I_4x4_MB)
  ------------------
  |  |  417|  86.9k|#define I_4x4_MB    0
  ------------------
  |  Branch (1507:34): [True: 12.5k, False: 74.3k]
  ------------------
 1508|  74.3k|                            || (ps_cur_mb_info->ps_curmb->u1_mb_type == I_16x16_MB)))
  ------------------
  |  |  418|  74.3k|#define I_16x16_MB  1
  ------------------
  |  Branch (1508:32): [True: 14.9k, False: 59.4k]
  ------------------
 1509|  86.9k|                            * 3;
 1510|       |
 1511|  86.9k|            if(ps_dec->s_high_profile.u1_scaling_present)
  ------------------
  |  Branch (1511:16): [True: 26.0k, False: 60.9k]
  ------------------
 1512|  26.0k|            {
 1513|  26.0k|                u4_scale *=
 1514|  26.0k|                                ps_dec->s_high_profile.i2_scalinglist4x4[i4_mb_inter_inc
 1515|  26.0k|                                                + 1 + i][0];
 1516|       |
 1517|  26.0k|            }
 1518|  60.9k|            else
 1519|  60.9k|            {
 1520|  60.9k|                u4_scale <<= 4;
 1521|  60.9k|            }
 1522|       |
 1523|  86.9k|            if(uc_codedBlockFlag)
  ------------------
  |  Branch (1523:16): [True: 48.8k, False: 38.1k]
  ------------------
 1524|  48.8k|            {
 1525|  48.8k|                WORD32 i_z0, i_z1, i_z2, i_z3;
 1526|  48.8k|                WORD32 *pi4_scale;
 1527|       |
 1528|  48.8k|                SET_BIT(u1_top_dc_csbp, i);
  ------------------
  |  |  106|  48.8k|#define SET_BIT(x, pos) (x) = (x) | (1 << pos);
  ------------------
 1529|  48.8k|                SET_BIT(u1_left_dc_csbp, i);
  ------------------
  |  |  106|  48.8k|#define SET_BIT(x, pos) (x) = (x) | (1 << pos);
  ------------------
 1530|       |
 1531|  48.8k|                ai2_dc_coef[0] = 0;
 1532|  48.8k|                ai2_dc_coef[1] = 0;
 1533|  48.8k|                ai2_dc_coef[2] = 0;
 1534|  48.8k|                ai2_dc_coef[3] = 0;
 1535|       |
 1536|  48.8k|                ih264d_unpack_coeff4x4_dc_4x4blk(ps_tu_4x4,
 1537|  48.8k|                                                 ai2_dc_coef,
 1538|  48.8k|                                                 pu1_inv_scan);
 1539|  48.8k|                i_z0 = (ai2_dc_coef[0] + ai2_dc_coef[2]);
 1540|  48.8k|                i_z1 = (ai2_dc_coef[0] - ai2_dc_coef[2]);
 1541|  48.8k|                i_z2 = (ai2_dc_coef[1] - ai2_dc_coef[3]);
 1542|  48.8k|                i_z3 = (ai2_dc_coef[1] + ai2_dc_coef[3]);
 1543|       |
 1544|       |                /*-----------------------------------------------------------*/
 1545|       |                /* Scaling and storing the values back                       */
 1546|       |                /*-----------------------------------------------------------*/
 1547|  48.8k|                *pi2_coeff_data++ = ((i_z0 + i_z3) * u4_scale) >> 5;
 1548|  48.8k|                *pi2_coeff_data++ = ((i_z0 - i_z3) * u4_scale) >> 5;
 1549|  48.8k|                *pi2_coeff_data++ = ((i_z1 + i_z2) * u4_scale) >> 5;
 1550|  48.8k|                *pi2_coeff_data++ = ((i_z1 - i_z2) * u4_scale) >> 5;
 1551|       |
 1552|  48.8k|                ps_dec->pv_parse_tu_coeff_data = (void *)pi2_coeff_data;
 1553|       |
 1554|  48.8k|                SET_BIT(ps_cur_mb_info->u1_yuv_dc_block_flag,(i+1));
  ------------------
  |  |  106|  48.8k|#define SET_BIT(x, pos) (x) = (x) | (1 << pos);
  ------------------
 1555|  48.8k|            }
 1556|  38.1k|            else
 1557|  38.1k|            {
 1558|  38.1k|                CLEARBIT(u1_top_dc_csbp, i);
  ------------------
  |  |   55|  38.1k|#define CLEARBIT(a,i) ((a) &= ~(1 << i))
  ------------------
 1559|  38.1k|                CLEARBIT(u1_left_dc_csbp, i);
  ------------------
  |  |   55|  38.1k|#define CLEARBIT(a,i) ((a) &= ~(1 << i))
  ------------------
 1560|  38.1k|            }
 1561|  86.9k|        }
 1562|       |
 1563|       |        /*********************************************************************/
 1564|       |        /*                   Update the DC csbp                              */
 1565|       |        /*********************************************************************/
 1566|  43.4k|        ps_dec->pu1_left_yuv_dc_csbp[0] &= 0x1;
 1567|  43.4k|        p_curr_ctxt->u1_yuv_dc_csbp &= 0x1;
 1568|  43.4k|        ps_dec->pu1_left_yuv_dc_csbp[0] |= (u1_left_dc_csbp << 1);
 1569|  43.4k|        p_curr_ctxt->u1_yuv_dc_csbp |= (u1_top_dc_csbp << 1);
 1570|  43.4k|        if(u1_cbp == CBPC_ACZERO)
  ------------------
  |  |  508|  43.4k|#define CBPC_ACZERO     1
  ------------------
  |  Branch (1570:12): [True: 23.7k, False: 19.7k]
  ------------------
 1571|  23.7k|        {
 1572|  23.7k|            *(UWORD32 *)(pu1_top_nnz) = 0;
 1573|  23.7k|            *(UWORD32 *)(pu1_left_nnz) = 0;
 1574|  23.7k|            return (0);
 1575|  23.7k|        }
 1576|       |        /*--------------------------------------------------------------------*/
 1577|       |        /* Decode Chroma AC values                                            */
 1578|       |        /*--------------------------------------------------------------------*/
 1579|  19.7k|        {
 1580|  19.7k|            UWORD32 u4_temp;
 1581|       |            /*****************************************************************/
 1582|       |            /* U Block  residual decoding, check cbp and proceed (subblock=0)*/
 1583|       |            /*****************************************************************/
 1584|  19.7k|            u2_chroma_csbp = ih264d_cabac_parse_8x8block(pi2_coeff_block, 2,
 1585|  19.7k|            CHROMA_AC_CTXCAT,
  ------------------
  |  |   75|  19.7k|#define CHROMA_AC_CTXCAT  4
  ------------------
 1586|  19.7k|                                                         ps_dec, pu1_top_nnz,
 1587|  19.7k|                                                         pu1_left_nnz);
 1588|       |
 1589|  19.7k|            pi2_coeff_block += MB_CHROM_SIZE;
  ------------------
  |  |  564|  19.7k|#define MB_CHROM_SIZE                 64
  ------------------
 1590|       |            /*****************************************************************/
 1591|       |            /* V Block  residual decoding, check cbp and proceed (subblock=1)*/
 1592|       |            /*****************************************************************/
 1593|  19.7k|            u4_temp = ih264d_cabac_parse_8x8block(pi2_coeff_block, 2,
 1594|  19.7k|            CHROMA_AC_CTXCAT,
  ------------------
  |  |   75|  19.7k|#define CHROMA_AC_CTXCAT  4
  ------------------
 1595|  19.7k|                                                  ps_dec, (pu1_top_nnz + 2),
 1596|  19.7k|                                                  (pu1_left_nnz + 2));
 1597|  19.7k|            u2_chroma_csbp |= (u4_temp << 4);
 1598|  19.7k|        }
 1599|       |        /*********************************************************************/
 1600|       |        /*                   Update the AC csbp                              */
 1601|       |        /*********************************************************************/
 1602|  19.7k|        ps_cur_mb_info->u2_chroma_csbp = u2_chroma_csbp;
 1603|  19.7k|    }
 1604|       |
 1605|      0|    return (0);
 1606|  43.4k|}

ih264d_uev:
   78|  4.92M|{
   79|  4.92M|    UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
   80|  4.92M|    UWORD32 u4_word, u4_ldz;
   81|       |
   82|       |    /***************************************************************/
   83|       |    /* Find leading zeros in next 32 bits                          */
   84|       |    /***************************************************************/
   85|  4.92M|    NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  4.92M|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  4.92M|{                                                                           \
  |  |  152|  4.92M|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  4.92M|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  4.92M|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  4.92M|                                                                            \
  |  |  156|  4.92M|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  4.92M|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 3.95M, False: 971k]
  |  |  ------------------
  |  |  158|  4.92M|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  3.95M|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  4.92M|}
  ------------------
   86|  4.92M|    u4_ldz = CLZ(u4_word);
   87|       |    /* Flush the ps_bitstrm */
   88|  4.92M|    u4_bitstream_offset += (u4_ldz + 1);
   89|       |    /* Read the suffix from the ps_bitstrm */
   90|  4.92M|    u4_word = 0;
   91|  4.92M|    if(u4_ldz)
  ------------------
  |  Branch (91:8): [True: 2.00M, False: 2.92M]
  ------------------
   92|  2.00M|        GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  2.00M|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  2.00M|{                                                                           \
  |  |  122|  2.00M|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  2.00M|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  2.00M|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  2.00M|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  2.00M|                                                                            \
  |  |  127|  2.00M|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 1.92M, False: 84.9k]
  |  |  ------------------
  |  |  128|  2.00M|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  1.92M|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  2.00M|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  2.00M|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  2.00M|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  2.00M|}                                                                           \
  ------------------
   93|  4.92M|    *pu4_bitstrm_ofst = u4_bitstream_offset;
   94|  4.92M|    return ((1 << u4_ldz) + u4_word - 1);
   95|  4.92M|}
ih264d_sev:
  123|  1.41M|{
  124|  1.41M|    UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
  125|  1.41M|    UWORD32 u4_word, u4_ldz, u4_abs_val;
  126|       |
  127|       |    /***************************************************************/
  128|       |    /* Find leading zeros in next 32 bits                          */
  129|       |    /***************************************************************/
  130|  1.41M|    NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  1.41M|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  1.41M|{                                                                           \
  |  |  152|  1.41M|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  1.41M|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  1.41M|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  1.41M|                                                                            \
  |  |  156|  1.41M|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  1.41M|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 1.38M, False: 31.8k]
  |  |  ------------------
  |  |  158|  1.41M|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  1.38M|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  1.41M|}
  ------------------
  131|  1.41M|    u4_ldz = CLZ(u4_word);
  132|       |
  133|       |    /* Flush the ps_bitstrm */
  134|  1.41M|    u4_bitstream_offset += (u4_ldz + 1);
  135|       |
  136|       |    /* Read the suffix from the ps_bitstrm */
  137|  1.41M|    u4_word = 0;
  138|  1.41M|    if(u4_ldz)
  ------------------
  |  Branch (138:8): [True: 742k, False: 677k]
  ------------------
  139|   742k|        GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|   742k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|   742k|{                                                                           \
  |  |  122|   742k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|   742k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|   742k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|   742k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|   742k|                                                                            \
  |  |  127|   742k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 723k, False: 19.0k]
  |  |  ------------------
  |  |  128|   742k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|   723k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|   742k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|   742k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|   742k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|   742k|}                                                                           \
  ------------------
  140|       |
  141|  1.41M|    *pu4_bitstrm_ofst = u4_bitstream_offset;
  142|  1.41M|    u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1;
  143|       |
  144|  1.41M|    if(u4_word & 0x1)
  ------------------
  |  Branch (144:8): [True: 350k, False: 1.06M]
  ------------------
  145|   350k|        return (-(WORD32)u4_abs_val);
  146|  1.06M|    else
  147|  1.06M|        return (u4_abs_val);
  148|  1.41M|}
ih264d_tev_range1:
  176|  18.0k|{
  177|  18.0k|    UWORD32 u4_code;
  178|  18.0k|    GETBIT(u4_code, *pu4_bitstrm_ofst, pu4_bitstrm_buf);
  ------------------
  |  |  105|  18.0k|#define   GETBIT(u4_code, u4_offset, pu4_bitstream)                         \
  |  |  106|  18.0k|{                                                                           \
  |  |  107|  18.0k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  108|  18.0k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  109|  18.0k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  110|  18.0k|    u4_code = pu4_buf[u4_word_off] << u4_bit_off;                           \
  |  |  111|  18.0k|    (u4_offset)++;                                                          \
  |  |  112|  18.0k|    u4_code = (u4_code >> 31);                                              \
  |  |  113|  18.0k|}
  ------------------
  179|  18.0k|    return (!u4_code);
  180|  18.0k|}
ih264d_cavlc_4x4res_block_totalcoeff_1:
  305|  99.1k|{
  306|       |
  307|  99.1k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
  308|  99.1k|    UWORD32 u4_bitstream_offset = ps_bitstrm->u4_ofst;
  309|  99.1k|    UWORD32 u4_trailing_ones = u4_total_coeff_trail_one & 0xFFFF;
  310|  99.1k|    WORD32 i2_level;
  311|  99.1k|    UWORD32 u4_tot_zero, u4_ldz, u4_scan_pos;
  312|       |
  313|  99.1k|    tu_sblk4x4_coeff_data_t *ps_tu_4x4;
  314|  99.1k|    WORD16 *pi2_coeff_data;
  315|  99.1k|    dec_struct_t *ps_dec = (dec_struct_t *)ps_bitstrm->pv_codec_handle;
  316|       |
  317|  99.1k|    ps_tu_4x4 = (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data;
  318|  99.1k|    ps_tu_4x4->u2_sig_coeff_map = 0;
  319|  99.1k|    pi2_coeff_data = &ps_tu_4x4->ai2_level[0];
  320|       |
  321|       |
  322|  99.1k|    if(u4_trailing_ones)
  ------------------
  |  Branch (322:8): [True: 88.0k, False: 11.1k]
  ------------------
  323|  88.0k|    {
  324|  88.0k|        UWORD32 u4_sign;
  325|       |        /****************************************************************/
  326|       |        /* Decode Trailing One as in section 9.2.2                      */
  327|       |        /****************************************************************/
  328|  88.0k|        GETBIT(u4_sign, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  105|  88.0k|#define   GETBIT(u4_code, u4_offset, pu4_bitstream)                         \
  |  |  106|  88.0k|{                                                                           \
  |  |  107|  88.0k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  108|  88.0k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  109|  88.0k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  110|  88.0k|    u4_code = pu4_buf[u4_word_off] << u4_bit_off;                           \
  |  |  111|  88.0k|    (u4_offset)++;                                                          \
  |  |  112|  88.0k|    u4_code = (u4_code >> 31);                                              \
  |  |  113|  88.0k|}
  ------------------
  329|  88.0k|        i2_level = u4_sign ? -1 : 1;
  ------------------
  |  Branch (329:20): [True: 53.5k, False: 34.5k]
  ------------------
  330|  88.0k|    }
  331|  11.1k|    else
  332|  11.1k|    {
  333|       |        /****************************************************************/
  334|       |        /* Decoding Level based on prefix and suffix  as in 9.2.2       */
  335|       |        /****************************************************************/
  336|  11.1k|        UWORD32 u4_lev_suffix, u4_lev_suffix_size;
  337|  11.1k|        WORD32 u2_lev_code, u2_abs_value;
  338|  11.1k|        UWORD32 u4_lev_prefix;
  339|       |        /***************************************************************/
  340|       |        /* Find leading zeros in next 32 bits                          */
  341|       |        /***************************************************************/
  342|  11.1k|        FIND_ONE_IN_STREAM_32(u4_lev_prefix, u4_bitstream_offset,
  ------------------
  |  |  165|  11.1k|#define   FIND_ONE_IN_STREAM_32(u4_ldz, u4_offset, pu4_bitstream)           \
  |  |  166|  11.1k|{                                                                           \
  |  |  167|  11.1k|    UWORD32 u4_word;                                                        \
  |  |  168|  11.1k|    NEXTBITS_32(u4_word, u4_offset, pu4_bitstream);                         \
  |  |  ------------------
  |  |  |  |  150|  11.1k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  |  |  151|  11.1k|{                                                                           \
  |  |  |  |  152|  11.1k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  153|  11.1k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  154|  11.1k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  155|  11.1k|                                                                            \
  |  |  |  |  156|  11.1k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  157|  11.1k|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (157:8): [True: 10.7k, False: 398]
  |  |  |  |  ------------------
  |  |  |  |  158|  11.1k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  10.7k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  159|  11.1k|}
  |  |  ------------------
  |  |  169|  11.1k|    u4_ldz = CLZ(u4_word);                                     \
  |  |  170|  11.1k|    (u4_offset) += (u4_ldz + 1);                                            \
  |  |  171|  11.1k|}
  ------------------
  343|  11.1k|                              pu4_bitstrm_buf);
  344|  11.1k|        u2_lev_code = (2 + MIN(u4_lev_prefix, 15));
  ------------------
  |  |   61|  11.1k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 5.74k, False: 5.36k]
  |  |  ------------------
  ------------------
  345|       |
  346|  11.1k|        if(14 == u4_lev_prefix)
  ------------------
  |  Branch (346:12): [True: 244, False: 10.8k]
  ------------------
  347|    244|            u4_lev_suffix_size = 4;
  348|  10.8k|        else if(15 <= u4_lev_prefix)
  ------------------
  |  Branch (348:17): [True: 5.36k, False: 5.50k]
  ------------------
  349|  5.36k|        {
  350|  5.36k|            u2_lev_code += 15;
  351|  5.36k|            u4_lev_suffix_size = u4_lev_prefix - 3;
  352|  5.36k|        }
  353|  5.50k|        else
  354|  5.50k|            u4_lev_suffix_size = 0;
  355|       |
  356|       |        //HP_LEVEL_PREFIX
  357|  11.1k|        if(16 <= u4_lev_prefix)
  ------------------
  |  Branch (357:12): [True: 5.25k, False: 5.85k]
  ------------------
  358|  5.25k|        {
  359|  5.25k|            u2_lev_code += ((1 << (u4_lev_prefix - 3)) - 4096);
  360|  5.25k|        }
  361|  11.1k|        if(u4_lev_suffix_size)
  ------------------
  |  Branch (361:12): [True: 5.60k, False: 5.50k]
  ------------------
  362|  5.60k|        {
  363|  5.60k|            GETBITS(u4_lev_suffix, u4_bitstream_offset, pu4_bitstrm_buf,
  ------------------
  |  |  120|  5.60k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  5.60k|{                                                                           \
  |  |  122|  5.60k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  5.60k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  5.60k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  5.60k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  5.60k|                                                                            \
  |  |  127|  5.60k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 5.28k, False: 329]
  |  |  ------------------
  |  |  128|  5.60k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  5.28k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  5.60k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  5.60k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  5.60k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  5.60k|}                                                                           \
  ------------------
  364|  5.60k|                    u4_lev_suffix_size);
  365|  5.60k|            u2_lev_code += u4_lev_suffix;
  366|  5.60k|        }
  367|       |
  368|  11.1k|        u2_abs_value = (u2_lev_code + 2) >> 1;
  369|       |        /*********************************************************/
  370|       |        /* If Level code is odd, level is negative else positive */
  371|       |        /*********************************************************/
  372|  11.1k|        i2_level = (u2_lev_code & 1) ? -u2_abs_value : u2_abs_value;
  ------------------
  |  Branch (372:20): [True: 1.51k, False: 9.60k]
  ------------------
  373|       |
  374|  11.1k|    }
  375|       |
  376|       |    /****************************************************************/
  377|       |    /* Decoding total zeros as in section 9.2.3, table 9.7          */
  378|       |    /****************************************************************/
  379|  99.1k|    FIND_ONE_IN_STREAM_LEN(u4_ldz, u4_bitstream_offset, pu4_bitstrm_buf, 8);
  ------------------
  |  |  176|  99.1k|#define   FIND_ONE_IN_STREAM_LEN(u4_ldz, u4_offset, pu4_bitstream, u4_len)  \
  |  |  177|  99.1k|{                                                                           \
  |  |  178|  99.1k|    UWORD32 u4_word;                                                        \
  |  |  179|  99.1k|    NEXTBITS_32(u4_word, u4_offset, pu4_bitstream);                         \
  |  |  ------------------
  |  |  |  |  150|  99.1k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  |  |  151|  99.1k|{                                                                           \
  |  |  |  |  152|  99.1k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  153|  99.1k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  154|  99.1k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  155|  99.1k|                                                                            \
  |  |  |  |  156|  99.1k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  157|  99.1k|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (157:8): [True: 96.5k, False: 2.64k]
  |  |  |  |  ------------------
  |  |  |  |  158|  99.1k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  96.5k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  159|  99.1k|}
  |  |  ------------------
  |  |  180|  99.1k|    u4_ldz = CLZ(u4_word);                                     \
  |  |  181|  99.1k|    if(u4_ldz < u4_len)                                                     \
  |  |  ------------------
  |  |  |  Branch (181:8): [True: 92.9k, False: 6.17k]
  |  |  ------------------
  |  |  182|  99.1k|    (u4_offset) += (u4_ldz + 1);                                            \
  |  |  183|  99.1k|    else                                                                    \
  |  |  184|  99.1k|    {                                                                       \
  |  |  185|  6.17k|        u4_ldz = u4_len;                                                    \
  |  |  186|  6.17k|        (u4_offset) += u4_ldz;                                              \
  |  |  187|  6.17k|    }                                                                       \
  |  |  188|  99.1k|}
  ------------------
  380|       |
  381|  99.1k|    if(u4_ldz)
  ------------------
  |  Branch (381:8): [True: 43.4k, False: 55.6k]
  ------------------
  382|  43.4k|    {
  383|  43.4k|        GETBIT(u4_tot_zero, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  105|  43.4k|#define   GETBIT(u4_code, u4_offset, pu4_bitstream)                         \
  |  |  106|  43.4k|{                                                                           \
  |  |  107|  43.4k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  108|  43.4k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  109|  43.4k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  110|  43.4k|    u4_code = pu4_buf[u4_word_off] << u4_bit_off;                           \
  |  |  111|  43.4k|    (u4_offset)++;                                                          \
  |  |  112|  43.4k|    u4_code = (u4_code >> 31);                                              \
  |  |  113|  43.4k|}
  ------------------
  384|  43.4k|        u4_tot_zero = (u4_ldz << 1) - u4_tot_zero;
  385|  43.4k|    }
  386|  55.6k|    else
  387|  55.6k|        u4_tot_zero = 0;
  388|       |
  389|       |    /***********************************************************************/
  390|       |    /* Inverse scan and store  residual coeff. Update the bitstream u4_ofst */
  391|       |    /***********************************************************************/
  392|  99.1k|    u4_scan_pos = u4_tot_zero + u4_isdc;
  393|  99.1k|    if(u4_scan_pos > 15)
  ------------------
  |  Branch (393:8): [True: 6.06k, False: 93.0k]
  ------------------
  394|  6.06k|        return -1;
  395|       |
  396|  93.0k|    SET_BIT(ps_tu_4x4->u2_sig_coeff_map, u4_scan_pos);
  ------------------
  |  |  106|  93.0k|#define SET_BIT(x, pos) (x) = (x) | (1 << pos);
  ------------------
  397|  93.0k|    *pi2_coeff_data++ = i2_level;
  398|       |
  399|       |
  400|  93.0k|    {
  401|  93.0k|        WORD32 offset;
  402|  93.0k|        offset = (UWORD8 *)pi2_coeff_data - (UWORD8 *)ps_tu_4x4;
  403|  93.0k|        offset = ALIGN4(offset);
  ------------------
  |  |   52|  93.0k|#define ALIGN4(x)   ((((x) + 3) >> 2) << 2)
  ------------------
  404|  93.0k|        ps_dec->pv_parse_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_parse_tu_coeff_data + offset);
  405|  93.0k|    }
  406|       |
  407|  93.0k|    ps_bitstrm->u4_ofst = u4_bitstream_offset;
  408|  93.0k|    return 0;
  409|  99.1k|}
ih264d_cavlc_4x4res_block_totalcoeff_2to10:
  439|  66.2k|{
  440|  66.2k|    UWORD32 u4_total_zeroes;
  441|  66.2k|    WORD32 i;
  442|  66.2k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
  443|  66.2k|    UWORD32 u4_bitstream_offset = ps_bitstrm->u4_ofst;
  444|  66.2k|    UWORD32 u4_trailing_ones = u4_total_coeff_trail_one & 0xFFFF;
  445|  66.2k|    UWORD32 u4_total_coeff = u4_total_coeff_trail_one >> 16;
  446|       |    // To avoid error check at 4x4 level, allocating for 3 extra levels(16+3)
  447|       |    // since u4_trailing_ones can at the max be 3. This will be required when
  448|       |    // u4_total_coeff is less than u4_trailing_ones
  449|  66.2k|    WORD16 ai2_level_arr[19];
  450|  66.2k|    WORD16 *i2_level_arr = &ai2_level_arr[3];
  451|       |
  452|  66.2k|    tu_sblk4x4_coeff_data_t *ps_tu_4x4;
  453|  66.2k|    WORD16 *pi2_coeff_data;
  454|  66.2k|    dec_struct_t *ps_dec = (dec_struct_t *)ps_bitstrm->pv_codec_handle;
  455|       |
  456|  66.2k|    ps_tu_4x4 = (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data;
  457|  66.2k|    ps_tu_4x4->u2_sig_coeff_map = 0;
  458|  66.2k|    pi2_coeff_data = &ps_tu_4x4->ai2_level[0];
  459|       |
  460|  66.2k|    i = u4_total_coeff - 1;
  461|       |
  462|  66.2k|    if(u4_trailing_ones)
  ------------------
  |  Branch (462:8): [True: 57.5k, False: 8.73k]
  ------------------
  463|  57.5k|    {
  464|       |        /*********************************************************************/
  465|       |        /* Decode Trailing Ones                                              */
  466|       |        /* read the sign of T1's and put them in level array                 */
  467|       |        /*********************************************************************/
  468|  57.5k|        UWORD32 u4_signs, u4_cnt = u4_trailing_ones;
  469|  57.5k|        WORD16 (*ppi2_trlone_lkup)[3] =
  470|  57.5k|                        (WORD16 (*)[3])gai2_ih264d_trailing_one_level;
  471|  57.5k|        WORD16 *pi2_trlone_lkup;
  472|       |
  473|  57.5k|        GETBITS(u4_signs, u4_bitstream_offset, pu4_bitstrm_buf, u4_cnt);
  ------------------
  |  |  120|  57.5k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  57.5k|{                                                                           \
  |  |  122|  57.5k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  57.5k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  57.5k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  57.5k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  57.5k|                                                                            \
  |  |  127|  57.5k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 54.0k, False: 3.49k]
  |  |  ------------------
  |  |  128|  57.5k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  54.0k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  57.5k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  57.5k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  57.5k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  57.5k|}                                                                           \
  ------------------
  474|       |
  475|  57.5k|        pi2_trlone_lkup = ppi2_trlone_lkup[(1 << u4_cnt) - 2 + u4_signs];
  476|       |
  477|   182k|        while(u4_cnt)
  ------------------
  |  Branch (477:15): [True: 124k, False: 57.5k]
  ------------------
  478|   124k|        {
  479|   124k|            i2_level_arr[i--] = *pi2_trlone_lkup++;
  480|   124k|            u4_cnt--;
  481|   124k|        }
  482|  57.5k|    }
  483|       |
  484|       |    /****************************************************************/
  485|       |    /* Decoding Levels Begins                                       */
  486|       |    /****************************************************************/
  487|  66.2k|    if(i >= 0)
  ------------------
  |  Branch (487:8): [True: 28.8k, False: 37.4k]
  ------------------
  488|  28.8k|    {
  489|       |        /****************************************************************/
  490|       |        /* First level is decoded outside the loop as it has lot of     */
  491|       |        /* special cases.                                               */
  492|       |        /****************************************************************/
  493|  28.8k|        UWORD32 u4_lev_suffix, u4_suffix_len, u4_lev_suffix_size;
  494|  28.8k|        WORD32 u2_lev_code, u2_abs_value;
  495|  28.8k|        UWORD32 u4_lev_prefix;
  496|       |
  497|       |        /***************************************************************/
  498|       |        /* u4_suffix_len = 0,  Find leading zeros in next 32 bits      */
  499|       |        /***************************************************************/
  500|  28.8k|        FIND_ONE_IN_STREAM_32(u4_lev_prefix, u4_bitstream_offset,
  ------------------
  |  |  165|  28.8k|#define   FIND_ONE_IN_STREAM_32(u4_ldz, u4_offset, pu4_bitstream)           \
  |  |  166|  28.8k|{                                                                           \
  |  |  167|  28.8k|    UWORD32 u4_word;                                                        \
  |  |  168|  28.8k|    NEXTBITS_32(u4_word, u4_offset, pu4_bitstream);                         \
  |  |  ------------------
  |  |  |  |  150|  28.8k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  |  |  151|  28.8k|{                                                                           \
  |  |  |  |  152|  28.8k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  153|  28.8k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  154|  28.8k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  155|  28.8k|                                                                            \
  |  |  |  |  156|  28.8k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  157|  28.8k|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (157:8): [True: 27.5k, False: 1.28k]
  |  |  |  |  ------------------
  |  |  |  |  158|  28.8k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  27.5k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  159|  28.8k|}
  |  |  ------------------
  |  |  169|  28.8k|    u4_ldz = CLZ(u4_word);                                     \
  |  |  170|  28.8k|    (u4_offset) += (u4_ldz + 1);                                            \
  |  |  171|  28.8k|}
  ------------------
  501|  28.8k|                              pu4_bitstrm_buf);
  502|       |
  503|       |        /*********************************************************/
  504|       |        /* Special decoding case when trailing ones are 3        */
  505|       |        /*********************************************************/
  506|  28.8k|        u2_lev_code = MIN(15, u4_lev_prefix);
  ------------------
  |  |   61|  28.8k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 6.27k, False: 22.5k]
  |  |  ------------------
  ------------------
  507|       |
  508|  28.8k|        u2_lev_code += (3 == u4_trailing_ones) ? 0 : 2;
  ------------------
  |  Branch (508:24): [True: 10.6k, False: 18.1k]
  ------------------
  509|       |
  510|  28.8k|        if(14 == u4_lev_prefix)
  ------------------
  |  Branch (510:12): [True: 744, False: 28.1k]
  ------------------
  511|    744|            u4_lev_suffix_size = 4;
  512|  28.1k|        else if(15 <= u4_lev_prefix)
  ------------------
  |  Branch (512:17): [True: 6.31k, False: 21.8k]
  ------------------
  513|  6.31k|        {
  514|  6.31k|            u2_lev_code += 15;
  515|  6.31k|            u4_lev_suffix_size = u4_lev_prefix - 3;
  516|  6.31k|        }
  517|  21.8k|        else
  518|  21.8k|            u4_lev_suffix_size = 0;
  519|       |
  520|       |        //HP_LEVEL_PREFIX
  521|  28.8k|        if(16 <= u4_lev_prefix)
  ------------------
  |  Branch (521:12): [True: 6.27k, False: 22.5k]
  ------------------
  522|  6.27k|        {
  523|  6.27k|            u2_lev_code += ((1 << (u4_lev_prefix - 3)) - 4096);
  524|  6.27k|        }
  525|  28.8k|        if(u4_lev_suffix_size)
  ------------------
  |  Branch (525:12): [True: 7.05k, False: 21.8k]
  ------------------
  526|  7.05k|        {
  527|  7.05k|            GETBITS(u4_lev_suffix, u4_bitstream_offset, pu4_bitstrm_buf,
  ------------------
  |  |  120|  7.05k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  7.05k|{                                                                           \
  |  |  122|  7.05k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  7.05k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  7.05k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  7.05k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  7.05k|                                                                            \
  |  |  127|  7.05k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 6.55k, False: 506]
  |  |  ------------------
  |  |  128|  7.05k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  6.55k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  7.05k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  7.05k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  7.05k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  7.05k|}                                                                           \
  ------------------
  528|  7.05k|                    u4_lev_suffix_size);
  529|  7.05k|            u2_lev_code += u4_lev_suffix;
  530|  7.05k|        }
  531|       |
  532|  28.8k|        u2_abs_value = (u2_lev_code + 2) >> 1;
  533|       |        /*********************************************************/
  534|       |        /* If Level code is odd, level is negative else positive */
  535|       |        /*********************************************************/
  536|  28.8k|        i2_level_arr[i--] = (u2_lev_code & 1) ? -u2_abs_value : u2_abs_value;
  ------------------
  |  Branch (536:29): [True: 8.62k, False: 20.2k]
  ------------------
  537|       |
  538|  28.8k|        u4_suffix_len = (u2_abs_value > 3) ? 2 : 1;
  ------------------
  |  Branch (538:25): [True: 9.54k, False: 19.3k]
  ------------------
  539|       |
  540|       |        /*********************************************************/
  541|       |        /* Now loop over the remaining levels                    */
  542|       |        /*********************************************************/
  543|   118k|        while(i >= 0)
  ------------------
  |  Branch (543:15): [True: 90.0k, False: 28.8k]
  ------------------
  544|  90.0k|        {
  545|       |
  546|       |            /***************************************************************/
  547|       |            /* Find leading zeros in next 32 bits                          */
  548|       |            /***************************************************************/
  549|  90.0k|            FIND_ONE_IN_STREAM_32(u4_lev_prefix, u4_bitstream_offset,
  ------------------
  |  |  165|  90.0k|#define   FIND_ONE_IN_STREAM_32(u4_ldz, u4_offset, pu4_bitstream)           \
  |  |  166|  90.0k|{                                                                           \
  |  |  167|  90.0k|    UWORD32 u4_word;                                                        \
  |  |  168|  90.0k|    NEXTBITS_32(u4_word, u4_offset, pu4_bitstream);                         \
  |  |  ------------------
  |  |  |  |  150|  90.0k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  |  |  151|  90.0k|{                                                                           \
  |  |  |  |  152|  90.0k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  153|  90.0k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  154|  90.0k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  155|  90.0k|                                                                            \
  |  |  |  |  156|  90.0k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  157|  90.0k|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (157:8): [True: 87.0k, False: 3.00k]
  |  |  |  |  ------------------
  |  |  |  |  158|  90.0k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  87.0k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  159|  90.0k|}
  |  |  ------------------
  |  |  169|  90.0k|    u4_ldz = CLZ(u4_word);                                     \
  |  |  170|  90.0k|    (u4_offset) += (u4_ldz + 1);                                            \
  |  |  171|  90.0k|}
  ------------------
  550|  90.0k|                                  pu4_bitstrm_buf);
  551|       |
  552|  90.0k|            u4_lev_suffix_size =
  553|  90.0k|                            (15 <= u4_lev_prefix) ?
  ------------------
  |  Branch (553:29): [True: 31.8k, False: 58.1k]
  ------------------
  554|  58.1k|                                            (u4_lev_prefix - 3) : u4_suffix_len;
  555|       |
  556|       |            /*********************************************************/
  557|       |            /* Compute level code using prefix and suffix            */
  558|       |            /*********************************************************/
  559|  90.0k|            GETBITS(u4_lev_suffix, u4_bitstream_offset, pu4_bitstrm_buf,
  ------------------
  |  |  120|  90.0k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  90.0k|{                                                                           \
  |  |  122|  90.0k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  90.0k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  90.0k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  90.0k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  90.0k|                                                                            \
  |  |  127|  90.0k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 86.8k, False: 3.17k]
  |  |  ------------------
  |  |  128|  90.0k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  86.8k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  90.0k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  90.0k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  90.0k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  90.0k|}                                                                           \
  ------------------
  560|  90.0k|                    u4_lev_suffix_size);
  561|  90.0k|            u2_lev_code = (MIN(15,u4_lev_prefix) << u4_suffix_len)
  ------------------
  |  |   61|  90.0k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 31.7k, False: 58.3k]
  |  |  ------------------
  ------------------
  562|  90.0k|                            + u4_lev_suffix;
  563|       |
  564|       |            //HP_LEVEL_PREFIX
  565|  90.0k|            if(16 <= u4_lev_prefix)
  ------------------
  |  Branch (565:16): [True: 31.7k, False: 58.3k]
  ------------------
  566|  31.7k|            {
  567|  31.7k|                u2_lev_code += ((1 << (u4_lev_prefix - 3)) - 4096);
  568|  31.7k|            }
  569|  90.0k|            u2_abs_value = (u2_lev_code + 2) >> 1;
  570|       |
  571|       |            /*********************************************************/
  572|       |            /* If Level code is odd, level is negative else positive */
  573|       |            /*********************************************************/
  574|  90.0k|            i2_level_arr[i--] =
  575|  90.0k|                            (u2_lev_code & 1) ? -u2_abs_value : u2_abs_value;
  ------------------
  |  Branch (575:29): [True: 27.5k, False: 62.4k]
  ------------------
  576|       |
  577|       |            /*********************************************************/
  578|       |            /* Increment suffix length if required                   */
  579|       |            /*********************************************************/
  580|  90.0k|            u4_suffix_len +=
  581|  90.0k|                            (u4_suffix_len < 6) ?
  ------------------
  |  Branch (581:29): [True: 79.5k, False: 10.5k]
  ------------------
  582|  79.5k|                                            (u2_abs_value
  583|  79.5k|                                                            > (3
  584|  79.5k|                                                                            << (u4_suffix_len
  585|  79.5k|                                                                                            - 1))) :
  586|  90.0k|                                            0;
  587|  90.0k|        }
  588|       |
  589|       |        /****************************************************************/
  590|       |        /* Decoding Levels Ends                                         */
  591|       |        /****************************************************************/
  592|  28.8k|    }
  593|       |
  594|       |    /****************************************************************/
  595|       |    /* Decoding total zeros as in section 9.2.3, table 9.7          */
  596|       |    /****************************************************************/
  597|  66.2k|    {
  598|  66.2k|        UWORD32 u4_index;
  599|  66.2k|        const UWORD8 (*ppu1_total_zero_lkup)[64] =
  600|  66.2k|                        (const UWORD8 (*)[64])gau1_ih264d_table_total_zero_2to10;
  601|       |
  602|  66.2k|        NEXTBITS(u4_index, u4_bitstream_offset, pu4_bitstrm_buf, 6);
  ------------------
  |  |  137|  66.2k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  138|  66.2k|{                                                                           \
  |  |  139|  66.2k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  140|  66.2k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  141|  66.2k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  142|  66.2k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  143|  66.2k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (143:8): [True: 64.1k, False: 2.14k]
  |  |  ------------------
  |  |  144|  66.2k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  64.1k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  145|  66.2k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  66.2k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  146|  66.2k|}
  ------------------
  603|  66.2k|        u4_total_zeroes = ppu1_total_zero_lkup[u4_total_coeff - 2][u4_index];
  604|       |
  605|  66.2k|        FLUSHBITS(u4_bitstream_offset, (u4_total_zeroes >> 4));
  ------------------
  |  |  193|  66.2k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  194|  66.2k|{                                                                           \
  |  |  195|  66.2k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  196|  66.2k|}
  ------------------
  606|  66.2k|        u4_total_zeroes &= 0xf;
  607|  66.2k|    }
  608|       |
  609|       |    /**************************************************************/
  610|       |    /* Decode the runs and form the coefficient buffer            */
  611|       |    /**************************************************************/
  612|  66.2k|    {
  613|  66.2k|        const UWORD8 *pu1_table_runbefore;
  614|  66.2k|        UWORD32 u4_run;
  615|  66.2k|        WORD32 k;
  616|  66.2k|        WORD32 u4_scan_pos = u4_total_coeff + u4_total_zeroes - 1 + u4_isdc;
  617|  66.2k|        WORD32 u4_zeroes_left = u4_total_zeroes;
  618|  66.2k|        k = u4_total_coeff - 1;
  619|       |
  620|       |        /**************************************************************/
  621|       |        /* Decoding Runs Begin for zeros left > 6                     */
  622|       |        /**************************************************************/
  623|  89.8k|        while((u4_zeroes_left > 6) && k)
  ------------------
  |  Branch (623:15): [True: 26.0k, False: 63.7k]
  |  Branch (623:39): [True: 23.5k, False: 2.49k]
  ------------------
  624|  23.5k|        {
  625|  23.5k|            UWORD32 u4_code;
  626|       |
  627|  23.5k|            NEXTBITS(u4_code, u4_bitstream_offset, pu4_bitstrm_buf, 3);
  ------------------
  |  |  137|  23.5k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  138|  23.5k|{                                                                           \
  |  |  139|  23.5k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  140|  23.5k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  141|  23.5k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  142|  23.5k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  143|  23.5k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (143:8): [True: 22.9k, False: 627]
  |  |  ------------------
  |  |  144|  23.5k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  22.9k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  145|  23.5k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  23.5k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  146|  23.5k|}
  ------------------
  628|       |
  629|  23.5k|            if(u4_code != 0)
  ------------------
  |  Branch (629:16): [True: 12.1k, False: 11.3k]
  ------------------
  630|  12.1k|            {
  631|  12.1k|                FLUSHBITS(u4_bitstream_offset, 3);
  ------------------
  |  |  193|  12.1k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  194|  12.1k|{                                                                           \
  |  |  195|  12.1k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  196|  12.1k|}
  ------------------
  632|  12.1k|                u4_run = (7 - u4_code);
  633|  12.1k|            }
  634|  11.3k|            else
  635|  11.3k|            {
  636|       |
  637|  11.3k|                FIND_ONE_IN_STREAM_LEN(u4_code, u4_bitstream_offset,
  ------------------
  |  |  176|  11.3k|#define   FIND_ONE_IN_STREAM_LEN(u4_ldz, u4_offset, pu4_bitstream, u4_len)  \
  |  |  177|  11.3k|{                                                                           \
  |  |  178|  11.3k|    UWORD32 u4_word;                                                        \
  |  |  179|  11.3k|    NEXTBITS_32(u4_word, u4_offset, pu4_bitstream);                         \
  |  |  ------------------
  |  |  |  |  150|  11.3k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  |  |  151|  11.3k|{                                                                           \
  |  |  |  |  152|  11.3k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  153|  11.3k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  154|  11.3k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  155|  11.3k|                                                                            \
  |  |  |  |  156|  11.3k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  157|  11.3k|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (157:8): [True: 11.0k, False: 338]
  |  |  |  |  ------------------
  |  |  |  |  158|  11.3k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  11.0k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  159|  11.3k|}
  |  |  ------------------
  |  |  180|  11.3k|    u4_ldz = CLZ(u4_word);                                     \
  |  |  181|  11.3k|    if(u4_ldz < u4_len)                                                     \
  |  |  ------------------
  |  |  |  Branch (181:8): [True: 5.06k, False: 6.31k]
  |  |  ------------------
  |  |  182|  11.3k|    (u4_offset) += (u4_ldz + 1);                                            \
  |  |  183|  11.3k|    else                                                                    \
  |  |  184|  11.3k|    {                                                                       \
  |  |  185|  6.31k|        u4_ldz = u4_len;                                                    \
  |  |  186|  6.31k|        (u4_offset) += u4_ldz;                                              \
  |  |  187|  6.31k|    }                                                                       \
  |  |  188|  11.3k|}
  ------------------
  638|  11.3k|                                       pu4_bitstrm_buf, 11);
  639|  11.3k|                u4_run = (4 + u4_code);
  640|  11.3k|            }
  641|       |
  642|  23.5k|            SET_BIT(ps_tu_4x4->u2_sig_coeff_map, u4_scan_pos);
  ------------------
  |  |  106|  23.5k|#define SET_BIT(x, pos) (x) = (x) | (1 << pos);
  ------------------
  643|  23.5k|            *pi2_coeff_data++ = i2_level_arr[k--];
  644|  23.5k|            u4_zeroes_left -= (WORD32)u4_run;
  645|  23.5k|            u4_scan_pos -= (WORD32)(u4_run + 1);
  646|  23.5k|        }
  647|       |
  648|  66.2k|        if (u4_zeroes_left < 0 || u4_scan_pos < 0)
  ------------------
  |  Branch (648:13): [True: 6.82k, False: 59.4k]
  |  Branch (648:35): [True: 0, False: 59.4k]
  ------------------
  649|  6.82k|            return -1;
  650|       |
  651|       |        /**************************************************************/
  652|       |        /* Decoding Runs for 0 < zeros left <=6                       */
  653|       |        /**************************************************************/
  654|  59.4k|        pu1_table_runbefore = (UWORD8 *)gau1_ih264d_table_run_before;
  655|   127k|        while((u4_zeroes_left > 0) && k)
  ------------------
  |  Branch (655:15): [True: 94.9k, False: 32.6k]
  |  Branch (655:39): [True: 68.1k, False: 26.7k]
  ------------------
  656|  68.1k|        {
  657|  68.1k|            UWORD32 u4_code;
  658|  68.1k|            NEXTBITS(u4_code, u4_bitstream_offset, pu4_bitstrm_buf, 3);
  ------------------
  |  |  137|  68.1k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  138|  68.1k|{                                                                           \
  |  |  139|  68.1k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  140|  68.1k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  141|  68.1k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  142|  68.1k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  143|  68.1k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (143:8): [True: 66.1k, False: 2.07k]
  |  |  ------------------
  |  |  144|  68.1k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  66.1k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  145|  68.1k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  68.1k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  146|  68.1k|}
  ------------------
  659|       |
  660|  68.1k|            u4_code = pu1_table_runbefore[u4_code + (u4_zeroes_left << 3)];
  661|  68.1k|            u4_run = u4_code >> 2;
  662|       |
  663|  68.1k|            FLUSHBITS(u4_bitstream_offset, (u4_code & 0x03));
  ------------------
  |  |  193|  68.1k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  194|  68.1k|{                                                                           \
  |  |  195|  68.1k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  196|  68.1k|}
  ------------------
  664|       |
  665|  68.1k|            SET_BIT(ps_tu_4x4->u2_sig_coeff_map, u4_scan_pos);
  ------------------
  |  |  106|  68.1k|#define SET_BIT(x, pos) (x) = (x) | (1 << pos);
  ------------------
  666|  68.1k|            *pi2_coeff_data++ = i2_level_arr[k--];
  667|  68.1k|            u4_zeroes_left -= (WORD32)u4_run;
  668|  68.1k|            u4_scan_pos -= (WORD32)(u4_run + 1);
  669|  68.1k|        }
  670|  59.4k|        if (u4_zeroes_left < 0 || u4_scan_pos < 0)
  ------------------
  |  Branch (670:13): [True: 0, False: 59.4k]
  |  Branch (670:35): [True: 0, False: 59.4k]
  ------------------
  671|      0|            return -1;
  672|       |        /**************************************************************/
  673|       |        /* Decoding Runs End                                          */
  674|       |        /**************************************************************/
  675|       |
  676|       |        /**************************************************************/
  677|       |        /* Copy the remaining coefficients                            */
  678|       |        /**************************************************************/
  679|   177k|        while(k >= 0)
  ------------------
  |  Branch (679:15): [True: 117k, False: 59.4k]
  ------------------
  680|   117k|        {
  681|       |
  682|   117k|            SET_BIT(ps_tu_4x4->u2_sig_coeff_map, u4_scan_pos);
  ------------------
  |  |  106|   117k|#define SET_BIT(x, pos) (x) = (x) | (1 << pos);
  ------------------
  683|   117k|            *pi2_coeff_data++ = i2_level_arr[k--];
  684|   117k|            u4_scan_pos--;
  685|   117k|        }
  686|  59.4k|    }
  687|       |
  688|      0|    {
  689|  59.4k|        WORD32 offset;
  690|  59.4k|        offset = (UWORD8 *)pi2_coeff_data - (UWORD8 *)ps_tu_4x4;
  691|  59.4k|        offset = ALIGN4(offset);
  ------------------
  |  |   52|  59.4k|#define ALIGN4(x)   ((((x) + 3) >> 2) << 2)
  ------------------
  692|  59.4k|        ps_dec->pv_parse_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_parse_tu_coeff_data + offset);
  693|  59.4k|    }
  694|       |
  695|  59.4k|    ps_bitstrm->u4_ofst = u4_bitstream_offset;
  696|  59.4k|    return 0;
  697|  59.4k|}
ih264d_cavlc_4x4res_block_totalcoeff_11to16:
  727|  17.1k|{
  728|  17.1k|    UWORD32 u4_total_zeroes;
  729|  17.1k|    WORD32 i;
  730|  17.1k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
  731|  17.1k|    UWORD32 u4_bitstream_offset = ps_bitstrm->u4_ofst;
  732|  17.1k|    UWORD32 u4_trailing_ones = u4_total_coeff_trail_one & 0xFFFF;
  733|  17.1k|    UWORD32 u4_total_coeff = u4_total_coeff_trail_one >> 16;
  734|       |    // To avoid error check at 4x4 level, allocating for 3 extra levels(16+3)
  735|       |    // since u4_trailing_ones can at the max be 3. This will be required when
  736|       |    // u4_total_coeff is less than u4_trailing_ones
  737|  17.1k|    WORD16 ai2_level_arr[19];//
  738|  17.1k|    WORD16 *i2_level_arr = &ai2_level_arr[3];
  739|       |
  740|  17.1k|    tu_sblk4x4_coeff_data_t *ps_tu_4x4;
  741|  17.1k|    WORD16 *pi2_coeff_data;
  742|  17.1k|    dec_struct_t *ps_dec = (dec_struct_t *)ps_bitstrm->pv_codec_handle;
  743|       |
  744|  17.1k|    ps_tu_4x4 = (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data;
  745|  17.1k|    ps_tu_4x4->u2_sig_coeff_map = 0;
  746|  17.1k|    pi2_coeff_data = &ps_tu_4x4->ai2_level[0];
  747|       |
  748|  17.1k|    i = u4_total_coeff - 1;
  749|  17.1k|    if(u4_trailing_ones)
  ------------------
  |  Branch (749:8): [True: 11.3k, False: 5.82k]
  ------------------
  750|  11.3k|    {
  751|       |        /*********************************************************************/
  752|       |        /* Decode Trailing Ones                                              */
  753|       |        /* read the sign of T1's and put them in level array                 */
  754|       |        /*********************************************************************/
  755|  11.3k|        UWORD32 u4_signs, u4_cnt = u4_trailing_ones;
  756|  11.3k|        WORD16 (*ppi2_trlone_lkup)[3] =
  757|  11.3k|                        (WORD16 (*)[3])gai2_ih264d_trailing_one_level;
  758|  11.3k|        WORD16 *pi2_trlone_lkup;
  759|       |
  760|  11.3k|        GETBITS(u4_signs, u4_bitstream_offset, pu4_bitstrm_buf, u4_cnt);
  ------------------
  |  |  120|  11.3k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  11.3k|{                                                                           \
  |  |  122|  11.3k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  11.3k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  11.3k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  11.3k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  11.3k|                                                                            \
  |  |  127|  11.3k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 10.6k, False: 707]
  |  |  ------------------
  |  |  128|  11.3k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  10.6k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  11.3k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  11.3k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  11.3k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  11.3k|}                                                                           \
  ------------------
  761|       |
  762|  11.3k|        pi2_trlone_lkup = ppi2_trlone_lkup[(1 << u4_cnt) - 2 + u4_signs];
  763|       |
  764|  40.6k|        while(u4_cnt)
  ------------------
  |  Branch (764:15): [True: 29.2k, False: 11.3k]
  ------------------
  765|  29.2k|        {
  766|  29.2k|            i2_level_arr[i--] = *pi2_trlone_lkup++;
  767|  29.2k|            u4_cnt--;
  768|  29.2k|        }
  769|  11.3k|    }
  770|       |
  771|       |    /****************************************************************/
  772|       |    /* Decoding Levels Begins                                       */
  773|       |    /****************************************************************/
  774|  17.1k|    if(i >= 0)
  ------------------
  |  Branch (774:8): [True: 17.1k, False: 0]
  ------------------
  775|  17.1k|    {
  776|       |        /****************************************************************/
  777|       |        /* First level is decoded outside the loop as it has lot of     */
  778|       |        /* special cases.                                               */
  779|       |        /****************************************************************/
  780|  17.1k|        UWORD32 u4_lev_suffix, u4_suffix_len, u4_lev_suffix_size;
  781|  17.1k|        UWORD16 u2_lev_code, u2_abs_value;
  782|  17.1k|        UWORD32 u4_lev_prefix;
  783|       |
  784|  17.1k|        if(u4_trailing_ones < 3)
  ------------------
  |  Branch (784:12): [True: 8.97k, False: 8.17k]
  ------------------
  785|  8.97k|        {
  786|       |            /*********************************************************/
  787|       |            /* u4_suffix_len = 1                                     */
  788|       |            /*********************************************************/
  789|       |            /***************************************************************/
  790|       |            /* Find leading zeros in next 32 bits                          */
  791|       |            /***************************************************************/
  792|  8.97k|            FIND_ONE_IN_STREAM_32(u4_lev_prefix, u4_bitstream_offset,
  ------------------
  |  |  165|  8.97k|#define   FIND_ONE_IN_STREAM_32(u4_ldz, u4_offset, pu4_bitstream)           \
  |  |  166|  8.97k|{                                                                           \
  |  |  167|  8.97k|    UWORD32 u4_word;                                                        \
  |  |  168|  8.97k|    NEXTBITS_32(u4_word, u4_offset, pu4_bitstream);                         \
  |  |  ------------------
  |  |  |  |  150|  8.97k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  |  |  151|  8.97k|{                                                                           \
  |  |  |  |  152|  8.97k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  153|  8.97k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  154|  8.97k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  155|  8.97k|                                                                            \
  |  |  |  |  156|  8.97k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  157|  8.97k|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (157:8): [True: 7.92k, False: 1.05k]
  |  |  |  |  ------------------
  |  |  |  |  158|  8.97k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  7.92k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  159|  8.97k|}
  |  |  ------------------
  |  |  169|  8.97k|    u4_ldz = CLZ(u4_word);                                     \
  |  |  170|  8.97k|    (u4_offset) += (u4_ldz + 1);                                            \
  |  |  171|  8.97k|}
  ------------------
  793|  8.97k|                                  pu4_bitstrm_buf);
  794|       |
  795|  8.97k|            u4_lev_suffix_size =
  796|  8.97k|                            (15 <= u4_lev_prefix) ? (u4_lev_prefix - 3) : 1;
  ------------------
  |  Branch (796:29): [True: 3.40k, False: 5.57k]
  ------------------
  797|       |
  798|  8.97k|            GETBITS(u4_lev_suffix, u4_bitstream_offset, pu4_bitstrm_buf,
  ------------------
  |  |  120|  8.97k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  8.97k|{                                                                           \
  |  |  122|  8.97k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  8.97k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  8.97k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  8.97k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  8.97k|                                                                            \
  |  |  127|  8.97k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 8.27k, False: 704]
  |  |  ------------------
  |  |  128|  8.97k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  8.27k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  8.97k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  8.97k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  8.97k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  8.97k|}                                                                           \
  ------------------
  799|  8.97k|                    u4_lev_suffix_size);
  800|  8.97k|            u2_lev_code = 2 + (MIN(u4_lev_prefix,15) << 1) + u4_lev_suffix;
  ------------------
  |  |   61|  8.97k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 5.57k, False: 3.40k]
  |  |  ------------------
  ------------------
  801|       |
  802|       |            //HP_LEVEL_PREFIX
  803|  8.97k|            if(16 <= u4_lev_prefix)
  ------------------
  |  Branch (803:16): [True: 3.40k, False: 5.57k]
  ------------------
  804|  3.40k|            {
  805|  3.40k|                u2_lev_code += ((1 << (u4_lev_prefix - 3)) - 4096);
  806|  3.40k|            }
  807|  8.97k|        }
  808|  8.17k|        else
  809|  8.17k|        {
  810|       |            /*********************************************************/
  811|       |            /*u4_suffix_len = 0                                      */
  812|       |            /*********************************************************/
  813|       |            /***************************************************************/
  814|       |            /* Find leading zeros in next 32 bits                          */
  815|       |            /***************************************************************/
  816|  8.17k|            FIND_ONE_IN_STREAM_32(u4_lev_prefix, u4_bitstream_offset,
  ------------------
  |  |  165|  8.17k|#define   FIND_ONE_IN_STREAM_32(u4_ldz, u4_offset, pu4_bitstream)           \
  |  |  166|  8.17k|{                                                                           \
  |  |  167|  8.17k|    UWORD32 u4_word;                                                        \
  |  |  168|  8.17k|    NEXTBITS_32(u4_word, u4_offset, pu4_bitstream);                         \
  |  |  ------------------
  |  |  |  |  150|  8.17k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  |  |  151|  8.17k|{                                                                           \
  |  |  |  |  152|  8.17k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  153|  8.17k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  154|  8.17k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  155|  8.17k|                                                                            \
  |  |  |  |  156|  8.17k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  157|  8.17k|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (157:8): [True: 7.73k, False: 442]
  |  |  |  |  ------------------
  |  |  |  |  158|  8.17k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  7.73k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  159|  8.17k|}
  |  |  ------------------
  |  |  169|  8.17k|    u4_ldz = CLZ(u4_word);                                     \
  |  |  170|  8.17k|    (u4_offset) += (u4_ldz + 1);                                            \
  |  |  171|  8.17k|}
  ------------------
  817|  8.17k|                                  pu4_bitstrm_buf);
  818|       |
  819|       |            /*********************************************************/
  820|       |            /* Special decoding case when trailing ones are 3        */
  821|       |            /*********************************************************/
  822|  8.17k|            u2_lev_code = MIN(15, u4_lev_prefix);
  ------------------
  |  |   61|  8.17k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 609, False: 7.56k]
  |  |  ------------------
  ------------------
  823|       |
  824|  8.17k|            u2_lev_code += (3 == u4_trailing_ones) ? 0 : (2);
  ------------------
  |  Branch (824:28): [True: 8.17k, False: 0]
  ------------------
  825|       |
  826|  8.17k|            if(14 == u4_lev_prefix)
  ------------------
  |  Branch (826:16): [True: 72, False: 8.10k]
  ------------------
  827|     72|                u4_lev_suffix_size = 4;
  828|  8.10k|            else if(15 <= u4_lev_prefix)
  ------------------
  |  Branch (828:21): [True: 805, False: 7.29k]
  ------------------
  829|    805|            {
  830|    805|                u2_lev_code += 15;
  831|    805|                u4_lev_suffix_size = (u4_lev_prefix - 3);
  832|    805|            }
  833|  7.29k|            else
  834|  7.29k|                u4_lev_suffix_size = 0;
  835|       |
  836|       |            //HP_LEVEL_PREFIX
  837|  8.17k|            if(16 <= u4_lev_prefix)
  ------------------
  |  Branch (837:16): [True: 609, False: 7.56k]
  ------------------
  838|    609|            {
  839|    609|                u2_lev_code += ((1 << (u4_lev_prefix - 3)) - 4096);
  840|    609|            }
  841|  8.17k|            if(u4_lev_suffix_size)
  ------------------
  |  Branch (841:16): [True: 877, False: 7.29k]
  ------------------
  842|    877|            {
  843|    877|                GETBITS(u4_lev_suffix, u4_bitstream_offset, pu4_bitstrm_buf,
  ------------------
  |  |  120|    877|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|    877|{                                                                           \
  |  |  122|    877|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|    877|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|    877|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|    877|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|    877|                                                                            \
  |  |  127|    877|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 698, False: 179]
  |  |  ------------------
  |  |  128|    877|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|    698|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|    877|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|    877|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|    877|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|    877|}                                                                           \
  ------------------
  844|    877|                        u4_lev_suffix_size);
  845|    877|                u2_lev_code += u4_lev_suffix;
  846|    877|            }
  847|  8.17k|        }
  848|       |
  849|  17.1k|        u2_abs_value = (u2_lev_code + 2) >> 1;
  850|       |        /*********************************************************/
  851|       |        /* If Level code is odd, level is negative else positive */
  852|       |        /*********************************************************/
  853|  17.1k|        i2_level_arr[i--] = (u2_lev_code & 1) ? -u2_abs_value : u2_abs_value;
  ------------------
  |  Branch (853:29): [True: 5.11k, False: 12.0k]
  ------------------
  854|       |
  855|  17.1k|        u4_suffix_len = (u2_abs_value > 3) ? 2 : 1;
  ------------------
  |  Branch (855:25): [True: 7.11k, False: 10.0k]
  ------------------
  856|       |
  857|       |        /*********************************************************/
  858|       |        /* Now loop over the remaining levels                    */
  859|       |        /*********************************************************/
  860|   223k|        while(i >= 0)
  ------------------
  |  Branch (860:15): [True: 205k, False: 17.1k]
  ------------------
  861|   205k|        {
  862|       |
  863|       |            /***************************************************************/
  864|       |            /* Find leading zeros in next 32 bits                          */
  865|       |            /***************************************************************/
  866|   205k|            FIND_ONE_IN_STREAM_32(u4_lev_prefix, u4_bitstream_offset,
  ------------------
  |  |  165|   205k|#define   FIND_ONE_IN_STREAM_32(u4_ldz, u4_offset, pu4_bitstream)           \
  |  |  166|   205k|{                                                                           \
  |  |  167|   205k|    UWORD32 u4_word;                                                        \
  |  |  168|   205k|    NEXTBITS_32(u4_word, u4_offset, pu4_bitstream);                         \
  |  |  ------------------
  |  |  |  |  150|   205k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  |  |  151|   205k|{                                                                           \
  |  |  |  |  152|   205k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  153|   205k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  154|   205k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  155|   205k|                                                                            \
  |  |  |  |  156|   205k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  157|   205k|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (157:8): [True: 198k, False: 7.32k]
  |  |  |  |  ------------------
  |  |  |  |  158|   205k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|   198k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  159|   205k|}
  |  |  ------------------
  |  |  169|   205k|    u4_ldz = CLZ(u4_word);                                     \
  |  |  170|   205k|    (u4_offset) += (u4_ldz + 1);                                            \
  |  |  171|   205k|}
  ------------------
  867|   205k|                                  pu4_bitstrm_buf);
  868|       |
  869|   205k|            u4_lev_suffix_size =
  870|   205k|                            (15 <= u4_lev_prefix) ?
  ------------------
  |  Branch (870:29): [True: 58.1k, False: 147k]
  ------------------
  871|   147k|                                            (u4_lev_prefix - 3) : u4_suffix_len;
  872|       |
  873|       |            /*********************************************************/
  874|       |            /* Compute level code using prefix and suffix            */
  875|       |            /*********************************************************/
  876|   205k|            GETBITS(u4_lev_suffix, u4_bitstream_offset, pu4_bitstrm_buf,
  ------------------
  |  |  120|   205k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|   205k|{                                                                           \
  |  |  122|   205k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|   205k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|   205k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|   205k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|   205k|                                                                            \
  |  |  127|   205k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 196k, False: 9.09k]
  |  |  ------------------
  |  |  128|   205k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|   196k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|   205k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|   205k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|   205k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|   205k|}                                                                           \
  ------------------
  877|   205k|                    u4_lev_suffix_size);
  878|   205k|            u2_lev_code = (MIN(15,u4_lev_prefix) << u4_suffix_len)
  ------------------
  |  |   61|   205k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 57.9k, False: 147k]
  |  |  ------------------
  ------------------
  879|   205k|                            + u4_lev_suffix;
  880|       |
  881|       |            //HP_LEVEL_PREFIX
  882|   205k|            if(16 <= u4_lev_prefix)
  ------------------
  |  Branch (882:16): [True: 57.9k, False: 147k]
  ------------------
  883|  57.9k|            {
  884|  57.9k|                u2_lev_code += ((1 << (u4_lev_prefix - 3)) - 4096);
  885|  57.9k|            }
  886|   205k|            u2_abs_value = (u2_lev_code + 2) >> 1;
  887|       |
  888|       |            /*********************************************************/
  889|       |            /* If Level code is odd, level is negative else positive */
  890|       |            /*********************************************************/
  891|   205k|            i2_level_arr[i--] =
  892|   205k|                            (u2_lev_code & 1) ? -u2_abs_value : u2_abs_value;
  ------------------
  |  Branch (892:29): [True: 92.5k, False: 113k]
  ------------------
  893|       |
  894|       |            /*********************************************************/
  895|       |            /* Increment suffix length if required                   */
  896|       |            /*********************************************************/
  897|   205k|            u4_suffix_len +=
  898|   205k|                            (u4_suffix_len < 6) ?
  ------------------
  |  Branch (898:29): [True: 158k, False: 47.3k]
  ------------------
  899|   158k|                                            (u2_abs_value
  900|   158k|                                                            > (3
  901|   158k|                                                                            << (u4_suffix_len
  902|   158k|                                                                                            - 1))) :
  903|   205k|                                            0;
  904|   205k|        }
  905|       |
  906|       |        /****************************************************************/
  907|       |        /* Decoding Levels Ends                                         */
  908|       |        /****************************************************************/
  909|  17.1k|    }
  910|       |
  911|  17.1k|    if(u4_total_coeff < (16 - u4_isdc))
  ------------------
  |  Branch (911:8): [True: 7.06k, False: 10.0k]
  ------------------
  912|  7.06k|    {
  913|  7.06k|        UWORD32 u4_index;
  914|  7.06k|        const UWORD8 (*ppu1_total_zero_lkup)[16] =
  915|  7.06k|                        (const UWORD8 (*)[16])gau1_ih264d_table_total_zero_11to15;
  916|       |
  917|  7.06k|        NEXTBITS(u4_index, u4_bitstream_offset, pu4_bitstrm_buf, 4);
  ------------------
  |  |  137|  7.06k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  138|  7.06k|{                                                                           \
  |  |  139|  7.06k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  140|  7.06k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  141|  7.06k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  142|  7.06k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  143|  7.06k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (143:8): [True: 6.69k, False: 367]
  |  |  ------------------
  |  |  144|  7.06k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  6.69k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  145|  7.06k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  7.06k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  146|  7.06k|}
  ------------------
  918|  7.06k|        u4_total_zeroes = ppu1_total_zero_lkup[u4_total_coeff - 11][u4_index];
  919|       |
  920|  7.06k|        FLUSHBITS(u4_bitstream_offset, (u4_total_zeroes >> 4));
  ------------------
  |  |  193|  7.06k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  194|  7.06k|{                                                                           \
  |  |  195|  7.06k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  196|  7.06k|}
  ------------------
  921|  7.06k|        u4_total_zeroes &= 0xf;
  922|  7.06k|    }
  923|  10.0k|    else
  924|  10.0k|        u4_total_zeroes = 0;
  925|       |
  926|       |    /**************************************************************/
  927|       |    /* Decode the runs and form the coefficient buffer            */
  928|       |    /**************************************************************/
  929|  17.1k|    {
  930|  17.1k|        const UWORD8 *pu1_table_runbefore;
  931|  17.1k|        UWORD32 u4_run;
  932|  17.1k|        WORD32 k;
  933|  17.1k|        WORD32 u4_scan_pos = u4_total_coeff + u4_total_zeroes - 1 + u4_isdc;
  934|  17.1k|        WORD32 u4_zeroes_left = u4_total_zeroes;
  935|  17.1k|        k = u4_total_coeff - 1;
  936|       |
  937|       |        /**************************************************************/
  938|       |        /* Decoding Runs for 0 < zeros left <=6                       */
  939|       |        /**************************************************************/
  940|  17.1k|        pu1_table_runbefore = (UWORD8 *)gau1_ih264d_table_run_before;
  941|  36.7k|        while((u4_zeroes_left > 0) && k)
  ------------------
  |  Branch (941:15): [True: 20.2k, False: 16.4k]
  |  Branch (941:39): [True: 19.5k, False: 678]
  ------------------
  942|  19.5k|        {
  943|  19.5k|            UWORD32 u4_code;
  944|  19.5k|            NEXTBITS(u4_code, u4_bitstream_offset, pu4_bitstrm_buf, 3);
  ------------------
  |  |  137|  19.5k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  138|  19.5k|{                                                                           \
  |  |  139|  19.5k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  140|  19.5k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  141|  19.5k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  142|  19.5k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  143|  19.5k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (143:8): [True: 18.7k, False: 848]
  |  |  ------------------
  |  |  144|  19.5k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  18.7k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  145|  19.5k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  19.5k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  146|  19.5k|}
  ------------------
  945|       |
  946|  19.5k|            u4_code = pu1_table_runbefore[u4_code + (u4_zeroes_left << 3)];
  947|  19.5k|            u4_run = u4_code >> 2;
  948|       |
  949|  19.5k|            FLUSHBITS(u4_bitstream_offset, (u4_code & 0x03));
  ------------------
  |  |  193|  19.5k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  194|  19.5k|{                                                                           \
  |  |  195|  19.5k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  196|  19.5k|}
  ------------------
  950|  19.5k|            SET_BIT(ps_tu_4x4->u2_sig_coeff_map, u4_scan_pos);
  ------------------
  |  |  106|  19.5k|#define SET_BIT(x, pos) (x) = (x) | (1 << pos);
  ------------------
  951|  19.5k|            *pi2_coeff_data++ = i2_level_arr[k--];
  952|  19.5k|            u4_zeroes_left -= (WORD32)u4_run;
  953|  19.5k|            u4_scan_pos -= (WORD32)(u4_run + 1);
  954|  19.5k|        }
  955|  17.1k|        if (u4_zeroes_left < 0 || u4_scan_pos < 0)
  ------------------
  |  Branch (955:13): [True: 0, False: 17.1k]
  |  Branch (955:35): [True: 0, False: 17.1k]
  ------------------
  956|      0|          return -1;
  957|       |
  958|       |        /**************************************************************/
  959|       |        /* Decoding Runs End                                          */
  960|       |        /**************************************************************/
  961|       |
  962|       |        /**************************************************************/
  963|       |        /* Copy the remaining coefficients                            */
  964|       |        /**************************************************************/
  965|   249k|        while(k >= 0)
  ------------------
  |  Branch (965:15): [True: 232k, False: 17.1k]
  ------------------
  966|   232k|        {
  967|   232k|            SET_BIT(ps_tu_4x4->u2_sig_coeff_map, u4_scan_pos);
  ------------------
  |  |  106|   232k|#define SET_BIT(x, pos) (x) = (x) | (1 << pos);
  ------------------
  968|   232k|            *pi2_coeff_data++ = i2_level_arr[k--];
  969|   232k|            u4_scan_pos--;
  970|   232k|        }
  971|  17.1k|    }
  972|       |
  973|      0|    {
  974|  17.1k|        WORD32 offset;
  975|  17.1k|        offset = (UWORD8 *)pi2_coeff_data - (UWORD8 *)ps_tu_4x4;
  976|  17.1k|        offset = ALIGN4(offset);
  ------------------
  |  |   52|  17.1k|#define ALIGN4(x)   ((((x) + 3) >> 2) << 2)
  ------------------
  977|  17.1k|        ps_dec->pv_parse_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_parse_tu_coeff_data + offset);
  978|  17.1k|    }
  979|       |
  980|  17.1k|    ps_bitstrm->u4_ofst = u4_bitstream_offset;
  981|  17.1k|    return 0;
  982|  17.1k|}
ih264d_rest_of_residual_cav_chroma_dc_block:
 1007|  86.0k|{
 1008|  86.0k|    UWORD32 u4_total_zeroes;
 1009|  86.0k|    WORD16 i;
 1010|  86.0k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
 1011|  86.0k|    UWORD32 u4_bitstream_offset = ps_bitstrm->u4_ofst;
 1012|  86.0k|    UWORD32 u4_trailing_ones = u4_total_coeff_trail_one & 0xFFFF;
 1013|  86.0k|    UWORD32 u4_total_coeff = u4_total_coeff_trail_one >> 16;
 1014|       |    // To avoid error check at 4x4 level, allocating for 3 extra levels(4+3)
 1015|       |    // since u4_trailing_ones can at the max be 3. This will be required when
 1016|       |    // u4_total_coeff is less than u4_trailing_ones
 1017|  86.0k|    WORD16 ai2_level_arr[7];//
 1018|  86.0k|    WORD16 *i2_level_arr = &ai2_level_arr[3];
 1019|       |
 1020|  86.0k|    tu_sblk4x4_coeff_data_t *ps_tu_4x4;
 1021|  86.0k|    WORD16 *pi2_coeff_data;
 1022|  86.0k|    dec_struct_t *ps_dec = (dec_struct_t *)ps_bitstrm->pv_codec_handle;
 1023|       |
 1024|  86.0k|    ps_tu_4x4 = (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data;
 1025|  86.0k|    ps_tu_4x4->u2_sig_coeff_map = 0;
 1026|  86.0k|    pi2_coeff_data = &ps_tu_4x4->ai2_level[0];
 1027|       |
 1028|  86.0k|    i = u4_total_coeff - 1;
 1029|  86.0k|    if(u4_trailing_ones)
  ------------------
  |  Branch (1029:8): [True: 82.0k, False: 3.99k]
  ------------------
 1030|  82.0k|    {
 1031|       |        /*********************************************************************/
 1032|       |        /* Decode Trailing Ones                                              */
 1033|       |        /* read the sign of T1's and put them in level array                 */
 1034|       |        /*********************************************************************/
 1035|  82.0k|        UWORD32 u4_signs, u4_cnt = u4_trailing_ones;
 1036|  82.0k|        WORD16 (*ppi2_trlone_lkup)[3] =
 1037|  82.0k|                        (WORD16 (*)[3])gai2_ih264d_trailing_one_level;
 1038|  82.0k|        WORD16 *pi2_trlone_lkup;
 1039|       |
 1040|  82.0k|        GETBITS(u4_signs, u4_bitstream_offset, pu4_bitstrm_buf, u4_cnt);
  ------------------
  |  |  120|  82.0k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  82.0k|{                                                                           \
  |  |  122|  82.0k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  82.0k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  82.0k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  82.0k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  82.0k|                                                                            \
  |  |  127|  82.0k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 78.9k, False: 3.12k]
  |  |  ------------------
  |  |  128|  82.0k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  78.9k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  82.0k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  82.0k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  82.0k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  82.0k|}                                                                           \
  ------------------
 1041|       |
 1042|  82.0k|        pi2_trlone_lkup = ppi2_trlone_lkup[(1 << u4_cnt) - 2 + u4_signs];
 1043|       |
 1044|   190k|        while(u4_cnt)
  ------------------
  |  Branch (1044:15): [True: 107k, False: 82.0k]
  ------------------
 1045|   107k|        {
 1046|   107k|            i2_level_arr[i--] = *pi2_trlone_lkup++;
 1047|   107k|            u4_cnt--;
 1048|   107k|        }
 1049|  82.0k|    }
 1050|       |
 1051|       |    /****************************************************************/
 1052|       |    /* Decoding Levels Begins                                       */
 1053|       |    /****************************************************************/
 1054|  86.0k|    if(i >= 0)
  ------------------
  |  Branch (1054:8): [True: 13.3k, False: 72.7k]
  ------------------
 1055|  13.3k|    {
 1056|       |        /****************************************************************/
 1057|       |        /* First level is decoded outside the loop as it has lot of     */
 1058|       |        /* special cases.                                               */
 1059|       |        /****************************************************************/
 1060|  13.3k|        UWORD32 u4_lev_suffix, u4_suffix_len, u4_lev_suffix_size;
 1061|  13.3k|        UWORD16 u2_lev_code, u2_abs_value;
 1062|  13.3k|        UWORD32 u4_lev_prefix;
 1063|       |
 1064|       |        /***************************************************************/
 1065|       |        /* u4_suffix_len = 0,  Find leading zeros in next 32 bits      */
 1066|       |        /***************************************************************/
 1067|  13.3k|        FIND_ONE_IN_STREAM_32(u4_lev_prefix, u4_bitstream_offset,
  ------------------
  |  |  165|  13.3k|#define   FIND_ONE_IN_STREAM_32(u4_ldz, u4_offset, pu4_bitstream)           \
  |  |  166|  13.3k|{                                                                           \
  |  |  167|  13.3k|    UWORD32 u4_word;                                                        \
  |  |  168|  13.3k|    NEXTBITS_32(u4_word, u4_offset, pu4_bitstream);                         \
  |  |  ------------------
  |  |  |  |  150|  13.3k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  |  |  151|  13.3k|{                                                                           \
  |  |  |  |  152|  13.3k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  153|  13.3k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  154|  13.3k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  155|  13.3k|                                                                            \
  |  |  |  |  156|  13.3k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  157|  13.3k|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (157:8): [True: 12.9k, False: 397]
  |  |  |  |  ------------------
  |  |  |  |  158|  13.3k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  12.9k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  159|  13.3k|}
  |  |  ------------------
  |  |  169|  13.3k|    u4_ldz = CLZ(u4_word);                                     \
  |  |  170|  13.3k|    (u4_offset) += (u4_ldz + 1);                                            \
  |  |  171|  13.3k|}
  ------------------
 1068|  13.3k|                              pu4_bitstrm_buf);
 1069|       |
 1070|       |        /*********************************************************/
 1071|       |        /* Special decoding case when trailing ones are 3        */
 1072|       |        /*********************************************************/
 1073|  13.3k|        u2_lev_code = MIN(15, u4_lev_prefix);
  ------------------
  |  |   61|  13.3k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 2.19k, False: 11.1k]
  |  |  ------------------
  ------------------
 1074|       |
 1075|  13.3k|        u2_lev_code += (3 == u4_trailing_ones) ? 0 : (2);
  ------------------
  |  Branch (1075:24): [True: 4.35k, False: 8.98k]
  ------------------
 1076|       |
 1077|  13.3k|        if(14 == u4_lev_prefix)
  ------------------
  |  Branch (1077:12): [True: 208, False: 13.1k]
  ------------------
 1078|    208|            u4_lev_suffix_size = 4;
 1079|  13.1k|        else if(15 <= u4_lev_prefix)
  ------------------
  |  Branch (1079:17): [True: 2.24k, False: 10.8k]
  ------------------
 1080|  2.24k|        {
 1081|  2.24k|            u2_lev_code += 15;
 1082|  2.24k|            u4_lev_suffix_size = u4_lev_prefix - 3;
 1083|  2.24k|        }
 1084|  10.8k|        else
 1085|  10.8k|            u4_lev_suffix_size = 0;
 1086|       |
 1087|       |        //HP_LEVEL_PREFIX
 1088|  13.3k|        if(16 <= u4_lev_prefix)
  ------------------
  |  Branch (1088:12): [True: 2.19k, False: 11.1k]
  ------------------
 1089|  2.19k|        {
 1090|  2.19k|            u2_lev_code += ((1 << (u4_lev_prefix - 3)) - 4096);
 1091|  2.19k|        }
 1092|  13.3k|        if(u4_lev_suffix_size)
  ------------------
  |  Branch (1092:12): [True: 2.45k, False: 10.8k]
  ------------------
 1093|  2.45k|        {
 1094|  2.45k|            GETBITS(u4_lev_suffix, u4_bitstream_offset, pu4_bitstrm_buf,
  ------------------
  |  |  120|  2.45k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  2.45k|{                                                                           \
  |  |  122|  2.45k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  2.45k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  2.45k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  2.45k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  2.45k|                                                                            \
  |  |  127|  2.45k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 2.09k, False: 354]
  |  |  ------------------
  |  |  128|  2.45k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  2.09k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  2.45k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  2.45k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  2.45k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  2.45k|}                                                                           \
  ------------------
 1095|  2.45k|                    u4_lev_suffix_size);
 1096|  2.45k|            u2_lev_code += u4_lev_suffix;
 1097|  2.45k|        }
 1098|       |
 1099|  13.3k|        u2_abs_value = (u2_lev_code + 2) >> 1;
 1100|       |        /*********************************************************/
 1101|       |        /* If Level code is odd, level is negative else positive */
 1102|       |        /*********************************************************/
 1103|  13.3k|        i2_level_arr[i--] = (u2_lev_code & 1) ? -u2_abs_value : u2_abs_value;
  ------------------
  |  Branch (1103:29): [True: 2.30k, False: 11.0k]
  ------------------
 1104|       |
 1105|  13.3k|        u4_suffix_len = (u2_abs_value > 3) ? 2 : 1;
  ------------------
  |  Branch (1105:25): [True: 2.94k, False: 10.4k]
  ------------------
 1106|       |
 1107|       |        /*********************************************************/
 1108|       |        /* Now loop over the remaining levels                    */
 1109|       |        /*********************************************************/
 1110|  20.3k|        while(i >= 0)
  ------------------
  |  Branch (1110:15): [True: 6.99k, False: 13.3k]
  ------------------
 1111|  6.99k|        {
 1112|       |
 1113|       |            /***************************************************************/
 1114|       |            /* Find leading zeros in next 32 bits                          */
 1115|       |            /***************************************************************/
 1116|  6.99k|            FIND_ONE_IN_STREAM_32(u4_lev_prefix, u4_bitstream_offset,
  ------------------
  |  |  165|  6.99k|#define   FIND_ONE_IN_STREAM_32(u4_ldz, u4_offset, pu4_bitstream)           \
  |  |  166|  6.99k|{                                                                           \
  |  |  167|  6.99k|    UWORD32 u4_word;                                                        \
  |  |  168|  6.99k|    NEXTBITS_32(u4_word, u4_offset, pu4_bitstream);                         \
  |  |  ------------------
  |  |  |  |  150|  6.99k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  |  |  151|  6.99k|{                                                                           \
  |  |  |  |  152|  6.99k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  153|  6.99k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  154|  6.99k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  155|  6.99k|                                                                            \
  |  |  |  |  156|  6.99k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  157|  6.99k|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (157:8): [True: 6.70k, False: 294]
  |  |  |  |  ------------------
  |  |  |  |  158|  6.99k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  6.70k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  159|  6.99k|}
  |  |  ------------------
  |  |  169|  6.99k|    u4_ldz = CLZ(u4_word);                                     \
  |  |  170|  6.99k|    (u4_offset) += (u4_ldz + 1);                                            \
  |  |  171|  6.99k|}
  ------------------
 1117|  6.99k|                                  pu4_bitstrm_buf);
 1118|       |
 1119|  6.99k|            u4_lev_suffix_size =
 1120|  6.99k|                            (15 <= u4_lev_prefix) ?
  ------------------
  |  Branch (1120:29): [True: 514, False: 6.48k]
  ------------------
 1121|  6.48k|                                            (u4_lev_prefix - 3) : u4_suffix_len;
 1122|       |
 1123|       |            /*********************************************************/
 1124|       |            /* Compute level code using prefix and suffix            */
 1125|       |            /*********************************************************/
 1126|  6.99k|            GETBITS(u4_lev_suffix, u4_bitstream_offset, pu4_bitstrm_buf,
  ------------------
  |  |  120|  6.99k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  6.99k|{                                                                           \
  |  |  122|  6.99k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  6.99k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  6.99k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  6.99k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  6.99k|                                                                            \
  |  |  127|  6.99k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 6.51k, False: 479]
  |  |  ------------------
  |  |  128|  6.99k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  6.51k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  6.99k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  6.99k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  6.99k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  6.99k|}                                                                           \
  ------------------
 1127|  6.99k|                    u4_lev_suffix_size);
 1128|  6.99k|            u2_lev_code = (MIN(u4_lev_prefix,15) << u4_suffix_len)
  ------------------
  |  |   61|  6.99k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 6.48k, False: 514]
  |  |  ------------------
  ------------------
 1129|  6.99k|                            + u4_lev_suffix;
 1130|       |
 1131|       |            //HP_LEVEL_PREFIX
 1132|  6.99k|            if(16 <= u4_lev_prefix)
  ------------------
  |  Branch (1132:16): [True: 508, False: 6.48k]
  ------------------
 1133|    508|            {
 1134|    508|                u2_lev_code += ((1 << (u4_lev_prefix - 3)) - 4096);
 1135|    508|            }
 1136|  6.99k|            u2_abs_value = (u2_lev_code + 2) >> 1;
 1137|       |
 1138|       |            /*********************************************************/
 1139|       |            /* If Level code is odd, level is negative else positive */
 1140|       |            /*********************************************************/
 1141|  6.99k|            i2_level_arr[i--] =
 1142|  6.99k|                            (u2_lev_code & 1) ? -u2_abs_value : u2_abs_value;
  ------------------
  |  Branch (1142:29): [True: 2.35k, False: 4.64k]
  ------------------
 1143|       |
 1144|       |            /*********************************************************/
 1145|       |            /* Increment suffix length if required                   */
 1146|       |            /*********************************************************/
 1147|  6.99k|            u4_suffix_len += (u2_abs_value > (3 << (u4_suffix_len - 1)));
 1148|  6.99k|        }
 1149|       |
 1150|       |        /****************************************************************/
 1151|       |        /* Decoding Levels Ends                                         */
 1152|       |        /****************************************************************/
 1153|  13.3k|    }
 1154|       |
 1155|  86.0k|    if(u4_total_coeff < 4)
  ------------------
  |  Branch (1155:8): [True: 80.0k, False: 6.01k]
  ------------------
 1156|  80.0k|    {
 1157|  80.0k|        UWORD32 u4_max_ldz = (4 - u4_total_coeff);
 1158|  80.0k|        FIND_ONE_IN_STREAM_LEN(u4_total_zeroes, u4_bitstream_offset,
  ------------------
  |  |  176|  80.0k|#define   FIND_ONE_IN_STREAM_LEN(u4_ldz, u4_offset, pu4_bitstream, u4_len)  \
  |  |  177|  80.0k|{                                                                           \
  |  |  178|  80.0k|    UWORD32 u4_word;                                                        \
  |  |  179|  80.0k|    NEXTBITS_32(u4_word, u4_offset, pu4_bitstream);                         \
  |  |  ------------------
  |  |  |  |  150|  80.0k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  |  |  151|  80.0k|{                                                                           \
  |  |  |  |  152|  80.0k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  153|  80.0k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  154|  80.0k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  155|  80.0k|                                                                            \
  |  |  |  |  156|  80.0k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  157|  80.0k|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (157:8): [True: 77.3k, False: 2.72k]
  |  |  |  |  ------------------
  |  |  |  |  158|  80.0k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  77.3k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  159|  80.0k|}
  |  |  ------------------
  |  |  180|  80.0k|    u4_ldz = CLZ(u4_word);                                     \
  |  |  181|  80.0k|    if(u4_ldz < u4_len)                                                     \
  |  |  ------------------
  |  |  |  Branch (181:8): [True: 67.2k, False: 12.8k]
  |  |  ------------------
  |  |  182|  80.0k|    (u4_offset) += (u4_ldz + 1);                                            \
  |  |  183|  80.0k|    else                                                                    \
  |  |  184|  80.0k|    {                                                                       \
  |  |  185|  12.8k|        u4_ldz = u4_len;                                                    \
  |  |  186|  12.8k|        (u4_offset) += u4_ldz;                                              \
  |  |  187|  12.8k|    }                                                                       \
  |  |  188|  80.0k|}
  ------------------
 1159|  80.0k|                               pu4_bitstrm_buf, u4_max_ldz);
 1160|  80.0k|    }
 1161|  6.01k|    else
 1162|  6.01k|        u4_total_zeroes = 0;
 1163|       |
 1164|       |    /**************************************************************/
 1165|       |    /* Decode the runs and form the coefficient buffer            */
 1166|       |    /**************************************************************/
 1167|  86.0k|    {
 1168|  86.0k|        const UWORD8 *pu1_table_runbefore;
 1169|  86.0k|        UWORD32 u4_run;
 1170|  86.0k|        WORD32 u4_scan_pos = (u4_total_coeff + u4_total_zeroes - 1);
 1171|  86.0k|        UWORD32 u4_zeroes_left = u4_total_zeroes;
 1172|  86.0k|        i = u4_total_coeff - 1;
 1173|       |
 1174|       |        /**************************************************************/
 1175|       |        /* Decoding Runs for 0 < zeros left <=6                       */
 1176|       |        /**************************************************************/
 1177|  86.0k|        pu1_table_runbefore = (UWORD8 *)gau1_ih264d_table_run_before;
 1178|  99.8k|        while(u4_zeroes_left && i)
  ------------------
  |  Branch (1178:15): [True: 37.1k, False: 62.7k]
  |  Branch (1178:33): [True: 13.7k, False: 23.3k]
  ------------------
 1179|  13.7k|        {
 1180|  13.7k|            UWORD32 u4_code;
 1181|  13.7k|            NEXTBITS(u4_code, u4_bitstream_offset, pu4_bitstrm_buf, 3);
  ------------------
  |  |  137|  13.7k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  138|  13.7k|{                                                                           \
  |  |  139|  13.7k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  140|  13.7k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  141|  13.7k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  142|  13.7k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  143|  13.7k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (143:8): [True: 13.2k, False: 437]
  |  |  ------------------
  |  |  144|  13.7k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  13.2k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  145|  13.7k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  13.7k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  146|  13.7k|}
  ------------------
 1182|       |
 1183|  13.7k|            u4_code = pu1_table_runbefore[u4_code + (u4_zeroes_left << 3)];
 1184|  13.7k|            u4_run = u4_code >> 2;
 1185|       |
 1186|  13.7k|            FLUSHBITS(u4_bitstream_offset, (u4_code & 0x03));
  ------------------
  |  |  193|  13.7k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  194|  13.7k|{                                                                           \
  |  |  195|  13.7k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  196|  13.7k|}
  ------------------
 1187|  13.7k|            SET_BIT(ps_tu_4x4->u2_sig_coeff_map, u4_scan_pos);
  ------------------
  |  |  106|  13.7k|#define SET_BIT(x, pos) (x) = (x) | (1 << pos);
  ------------------
 1188|  13.7k|            *pi2_coeff_data++ = i2_level_arr[i--];
 1189|  13.7k|            u4_zeroes_left -= (WORD32)u4_run;
 1190|  13.7k|            u4_scan_pos -= (WORD32)(u4_run + 1);
 1191|  13.7k|        }
 1192|       |        /**************************************************************/
 1193|       |        /* Decoding Runs End                                          */
 1194|       |        /**************************************************************/
 1195|       |
 1196|       |        /**************************************************************/
 1197|       |        /* Copy the remaining coefficients                            */
 1198|       |        /**************************************************************/
 1199|   200k|        while(i >= 0)
  ------------------
  |  Branch (1199:15): [True: 114k, False: 86.0k]
  ------------------
 1200|   114k|        {
 1201|   114k|            SET_BIT(ps_tu_4x4->u2_sig_coeff_map, u4_scan_pos);
  ------------------
  |  |  106|   114k|#define SET_BIT(x, pos) (x) = (x) | (1 << pos);
  ------------------
 1202|   114k|            *pi2_coeff_data++ = i2_level_arr[i--];
 1203|   114k|            u4_scan_pos--;
 1204|   114k|        }
 1205|  86.0k|    }
 1206|       |
 1207|  86.0k|    {
 1208|  86.0k|        WORD32 offset;
 1209|  86.0k|        offset = (UWORD8 *)pi2_coeff_data - (UWORD8 *)ps_tu_4x4;
 1210|  86.0k|        offset = ALIGN4(offset);
  ------------------
  |  |   52|  86.0k|#define ALIGN4(x)   ((((x) + 3) >> 2) << 2)
  ------------------
 1211|  86.0k|        ps_dec->pv_parse_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_parse_tu_coeff_data + offset);
 1212|  86.0k|    }
 1213|       |
 1214|  86.0k|    ps_bitstrm->u4_ofst = u4_bitstream_offset;
 1215|  86.0k|}
ih264d_cavlc_parse4x4coeff_n0to7:
 1238|   555k|{
 1239|   555k|    dec_bit_stream_t *ps_bitstrm = ps_dec->ps_bitstrm;
 1240|   555k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
 1241|   555k|    UWORD32 u4_bitstream_offset = ps_bitstrm->u4_ofst;
 1242|   555k|    UWORD32 u4_code, u4_index, u4_ldz;
 1243|   555k|    const UWORD16 *pu2_code = (const UWORD16*)gau2_ih264d_code_gx;
 1244|   555k|    const UWORD16 *pu2_offset_num_vlc =
 1245|   555k|                    (const UWORD16 *)gau2_ih264d_offset_num_vlc_tab;
 1246|   555k|    UWORD32 u4_offset_num_vlc = pu2_offset_num_vlc[u4_n];
 1247|       |
 1248|       |
 1249|   555k|    UNUSED(pi2_coeff_block);
  ------------------
  |  |   45|   555k|#define UNUSED(x) ((void)(x))
  ------------------
 1250|   555k|    *pu4_total_coeff = 0;
 1251|   555k|    FIND_ONE_IN_STREAM_32(u4_ldz, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  165|   555k|#define   FIND_ONE_IN_STREAM_32(u4_ldz, u4_offset, pu4_bitstream)           \
  |  |  166|   555k|{                                                                           \
  |  |  167|   555k|    UWORD32 u4_word;                                                        \
  |  |  168|   555k|    NEXTBITS_32(u4_word, u4_offset, pu4_bitstream);                         \
  |  |  ------------------
  |  |  |  |  150|   555k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  |  |  151|   555k|{                                                                           \
  |  |  |  |  152|   555k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  153|   555k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  154|   555k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  155|   555k|                                                                            \
  |  |  |  |  156|   555k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  157|   555k|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (157:8): [True: 539k, False: 16.3k]
  |  |  |  |  ------------------
  |  |  |  |  158|   555k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|   539k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  159|   555k|}
  |  |  ------------------
  |  |  169|   555k|    u4_ldz = CLZ(u4_word);                                     \
  |  |  170|   555k|    (u4_offset) += (u4_ldz + 1);                                            \
  |  |  171|   555k|}
  ------------------
 1252|   555k|    NEXTBITS(u4_index, u4_bitstream_offset, pu4_bitstrm_buf, 3);
  ------------------
  |  |  137|   555k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  138|   555k|{                                                                           \
  |  |  139|   555k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  140|   555k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  141|   555k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  142|   555k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  143|   555k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (143:8): [True: 535k, False: 20.1k]
  |  |  ------------------
  |  |  144|   555k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|   535k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  145|   555k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|   555k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  146|   555k|}
  ------------------
 1253|   555k|    u4_index += (u4_ldz << 3);
 1254|   555k|    u4_index += u4_offset_num_vlc;
 1255|       |
 1256|   555k|    u4_index = MIN(u4_index, 303);
  ------------------
  |  |   61|   555k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 551k, False: 4.34k]
  |  |  ------------------
  ------------------
 1257|   555k|    u4_code = pu2_code[u4_index];
 1258|       |
 1259|   555k|    FLUSHBITS(u4_bitstream_offset, (u4_code & 0x03));
  ------------------
  |  |  193|   555k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  194|   555k|{                                                                           \
  |  |  195|   555k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  196|   555k|}
  ------------------
 1260|   555k|    ps_bitstrm->u4_ofst = u4_bitstream_offset;
 1261|   555k|    *pu4_total_coeff = (u4_code >> 4);
 1262|       |
 1263|   555k|    if(*pu4_total_coeff)
  ------------------
  |  Branch (1263:8): [True: 160k, False: 395k]
  ------------------
 1264|   160k|    {
 1265|   160k|        UWORD32 u4_trailing_ones, u4_offset, u4_total_coeff_tone;
 1266|   160k|        const UWORD8 *pu1_offset =
 1267|   160k|                        (UWORD8 *)gau1_ih264d_total_coeff_fn_ptr_offset;
 1268|   160k|        WORD32 ret;
 1269|   160k|        u4_trailing_ones = ((u4_code >> 2) & 0x03);
 1270|   160k|        u4_offset = pu1_offset[*pu4_total_coeff - 1];
 1271|   160k|        u4_total_coeff_tone = (*pu4_total_coeff << 16) | u4_trailing_ones;
 1272|       |
 1273|   160k|        ret = ps_dec->pf_cavlc_4x4res_block[u4_offset](u4_isdc,
 1274|   160k|                                                       u4_total_coeff_tone,
 1275|   160k|                                                       ps_bitstrm);
 1276|   160k|        if(ret != 0)
  ------------------
  |  Branch (1276:12): [True: 6.88k, False: 153k]
  ------------------
 1277|  6.88k|            return ERROR_CAVLC_NUM_COEFF_T;
 1278|   160k|    }
 1279|       |
 1280|   549k|    return OK;
  ------------------
  |  |  114|   549k|#define OK        0
  ------------------
 1281|   555k|}
ih264d_cavlc_parse4x4coeff_n8:
 1288|  22.6k|{
 1289|       |
 1290|  22.6k|    dec_bit_stream_t *ps_bitstrm = ps_dec->ps_bitstrm;
 1291|  22.6k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
 1292|  22.6k|    UWORD32 u4_bitstream_offset = ps_bitstrm->u4_ofst;
 1293|  22.6k|    UWORD32 u4_code;
 1294|  22.6k|    UNUSED(u4_n);
  ------------------
  |  |   45|  22.6k|#define UNUSED(x) ((void)(x))
  ------------------
 1295|  22.6k|    UNUSED(pi2_coeff_block);
  ------------------
  |  |   45|  22.6k|#define UNUSED(x) ((void)(x))
  ------------------
 1296|  22.6k|    GETBITS(u4_code, u4_bitstream_offset, pu4_bitstrm_buf, 6);
  ------------------
  |  |  120|  22.6k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  22.6k|{                                                                           \
  |  |  122|  22.6k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  22.6k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  22.6k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  22.6k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  22.6k|                                                                            \
  |  |  127|  22.6k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 21.3k, False: 1.21k]
  |  |  ------------------
  |  |  128|  22.6k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  21.3k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  22.6k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  22.6k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  22.6k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  22.6k|}                                                                           \
  ------------------
 1297|  22.6k|    ps_bitstrm->u4_ofst = u4_bitstream_offset;
 1298|  22.6k|    *pu4_total_coeff = 0;
 1299|       |
 1300|  22.6k|    if(u4_code != 3)
  ------------------
  |  Branch (1300:8): [True: 22.0k, False: 563]
  ------------------
 1301|  22.0k|    {
 1302|  22.0k|        UWORD8 *pu1_offset = (UWORD8 *)gau1_ih264d_total_coeff_fn_ptr_offset;
 1303|  22.0k|        UWORD32 u4_trailing_ones, u4_offset, u4_total_coeff_tone;
 1304|       |
 1305|  22.0k|        *pu4_total_coeff = (u4_code >> 2) + 1;
 1306|  22.0k|        u4_trailing_ones = u4_code & 0x03;
 1307|  22.0k|        u4_offset = pu1_offset[*pu4_total_coeff - 1];
 1308|  22.0k|        u4_total_coeff_tone = (*pu4_total_coeff << 16) | u4_trailing_ones;
 1309|       |
 1310|  22.0k|        ps_dec->pf_cavlc_4x4res_block[u4_offset](u4_isdc,
 1311|  22.0k|                                                 u4_total_coeff_tone,
 1312|  22.0k|                                                 ps_bitstrm);
 1313|  22.0k|    }
 1314|       |
 1315|  22.6k|    return OK;
  ------------------
  |  |  114|  22.6k|#define OK        0
  ------------------
 1316|  22.6k|}
ih264d_cavlc_parse_chroma_dc:
 1339|  56.5k|{
 1340|  56.5k|    UWORD32 u4_total_coeff, u4_trailing_ones, u4_total_coeff_tone, u4_code;
 1341|  56.5k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
 1342|  56.5k|    UWORD32 u4_bitstream_offset = ps_bitstrm->u4_ofst;
 1343|  56.5k|    const UWORD8 *pu1_cav_chromdc = (const UWORD8*)gau1_ih264d_cav_chromdc_vld;
 1344|  56.5k|    UNUSED(i4_mb_inter_inc);
  ------------------
  |  |   45|  56.5k|#define UNUSED(x) ((void)(x))
  ------------------
 1345|       |    /******************************************************************/
 1346|       |    /*  Chroma DC Block for U component                               */
 1347|       |    /******************************************************************/
 1348|  56.5k|    NEXTBITS(u4_code, u4_bitstream_offset, pu4_bitstrm_buf, 8);
  ------------------
  |  |  137|  56.5k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  138|  56.5k|{                                                                           \
  |  |  139|  56.5k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  140|  56.5k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  141|  56.5k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  142|  56.5k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  143|  56.5k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (143:8): [True: 55.3k, False: 1.23k]
  |  |  ------------------
  |  |  144|  56.5k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  55.3k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  145|  56.5k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  56.5k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  146|  56.5k|}
  ------------------
 1349|       |
 1350|  56.5k|    u4_code = pu1_cav_chromdc[u4_code];
 1351|       |
 1352|  56.5k|    FLUSHBITS(u4_bitstream_offset, ((u4_code & 0x7) + 1));
  ------------------
  |  |  193|  56.5k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  194|  56.5k|{                                                                           \
  |  |  195|  56.5k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  196|  56.5k|}
  ------------------
 1353|  56.5k|    ps_bitstrm->u4_ofst = u4_bitstream_offset;
 1354|       |
 1355|  56.5k|    u4_total_coeff = (u4_code >> 5);
 1356|       |
 1357|  56.5k|    if(u4_total_coeff)
  ------------------
  |  Branch (1357:8): [True: 42.5k, False: 14.0k]
  ------------------
 1358|  42.5k|    {
 1359|  42.5k|        WORD32 i_z0, i_z1, i_z2, i_z3;
 1360|  42.5k|        tu_sblk4x4_coeff_data_t *ps_tu_4x4;
 1361|  42.5k|        dec_struct_t *ps_dec = (dec_struct_t *)ps_bitstrm->pv_codec_handle;
 1362|  42.5k|        WORD16 ai2_dc_coef[4];
 1363|  42.5k|        UWORD8 pu1_inv_scan[4] =
 1364|  42.5k|                        { 0, 1, 2, 3 };
 1365|  42.5k|        WORD16 *pi2_coeff_data =
 1366|  42.5k|                                    (WORD16 *)ps_dec->pv_parse_tu_coeff_data;
 1367|       |
 1368|  42.5k|        ps_tu_4x4 = (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data;
 1369|       |
 1370|  42.5k|        u4_trailing_ones = ((u4_code >> 3) & 0x3);
 1371|  42.5k|        u4_total_coeff_tone = (u4_total_coeff << 16) | u4_trailing_ones;
 1372|  42.5k|        ih264d_rest_of_residual_cav_chroma_dc_block(u4_total_coeff_tone,
 1373|  42.5k|                                                    ps_bitstrm);
 1374|       |
 1375|  42.5k|        ai2_dc_coef[0] = 0;
 1376|  42.5k|        ai2_dc_coef[1] = 0;
 1377|  42.5k|        ai2_dc_coef[2] = 0;
 1378|  42.5k|        ai2_dc_coef[3] = 0;
 1379|       |
 1380|  42.5k|        ih264d_unpack_coeff4x4_dc_4x4blk(ps_tu_4x4,
 1381|  42.5k|                                         ai2_dc_coef,
 1382|  42.5k|                                         pu1_inv_scan);
 1383|       |        /*-------------------------------------------------------------------*/
 1384|       |        /* Inverse 2x2 transform and scaling  of chroma DC                   */
 1385|       |        /*-------------------------------------------------------------------*/
 1386|  42.5k|        i_z0 = (ai2_dc_coef[0] + ai2_dc_coef[2]);
 1387|  42.5k|        i_z1 = (ai2_dc_coef[0] - ai2_dc_coef[2]);
 1388|  42.5k|        i_z2 = (ai2_dc_coef[1] - ai2_dc_coef[3]);
 1389|  42.5k|        i_z3 = (ai2_dc_coef[1] + ai2_dc_coef[3]);
 1390|       |
 1391|       |        /*-----------------------------------------------------------*/
 1392|       |        /* Scaling and storing the values back                       */
 1393|       |        /*-----------------------------------------------------------*/
 1394|  42.5k|        *pi2_coeff_data++ = (WORD16)(((i_z0 + i_z3) * (WORD32)u4_scale_u) >> 5);
 1395|  42.5k|        *pi2_coeff_data++ = (WORD16)(((i_z0 - i_z3) * (WORD32)u4_scale_u) >> 5);
 1396|  42.5k|        *pi2_coeff_data++ = (WORD16)(((i_z1 + i_z2) * (WORD32)u4_scale_u) >> 5);
 1397|  42.5k|        *pi2_coeff_data++ = (WORD16)(((i_z1 - i_z2) * (WORD32)u4_scale_u) >> 5);
 1398|       |
 1399|  42.5k|        ps_dec->pv_parse_tu_coeff_data = (void *)pi2_coeff_data;
 1400|       |
 1401|  42.5k|        SET_BIT(ps_cur_mb_info->u1_yuv_dc_block_flag,1);
  ------------------
  |  |  106|  42.5k|#define SET_BIT(x, pos) (x) = (x) | (1 << pos);
  ------------------
 1402|  42.5k|    }
 1403|       |
 1404|       |    /******************************************************************/
 1405|       |    /*  Chroma DC Block for V component                               */
 1406|       |    /******************************************************************/
 1407|  56.5k|    pi2_coeff_block += 64;
 1408|  56.5k|    u4_bitstream_offset = ps_bitstrm->u4_ofst;
 1409|       |
 1410|  56.5k|    NEXTBITS(u4_code, u4_bitstream_offset, pu4_bitstrm_buf, 8);
  ------------------
  |  |  137|  56.5k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  138|  56.5k|{                                                                           \
  |  |  139|  56.5k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  140|  56.5k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  141|  56.5k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  142|  56.5k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  143|  56.5k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (143:8): [True: 54.3k, False: 2.25k]
  |  |  ------------------
  |  |  144|  56.5k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  54.3k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  145|  56.5k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  56.5k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  146|  56.5k|}
  ------------------
 1411|       |
 1412|  56.5k|    u4_code = pu1_cav_chromdc[u4_code];
 1413|       |
 1414|  56.5k|    FLUSHBITS(u4_bitstream_offset, ((u4_code & 0x7) + 1));
  ------------------
  |  |  193|  56.5k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  194|  56.5k|{                                                                           \
  |  |  195|  56.5k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  196|  56.5k|}
  ------------------
 1415|  56.5k|    ps_bitstrm->u4_ofst = u4_bitstream_offset;
 1416|       |
 1417|  56.5k|    u4_total_coeff = (u4_code >> 5);
 1418|       |
 1419|  56.5k|    if(u4_total_coeff)
  ------------------
  |  Branch (1419:8): [True: 43.5k, False: 13.0k]
  ------------------
 1420|  43.5k|    {
 1421|  43.5k|        WORD32 i_z0, i_z1, i_z2, i_z3;
 1422|  43.5k|        tu_sblk4x4_coeff_data_t *ps_tu_4x4;
 1423|  43.5k|        dec_struct_t *ps_dec = (dec_struct_t *)ps_bitstrm->pv_codec_handle;
 1424|  43.5k|        WORD16 ai2_dc_coef[4];
 1425|  43.5k|        UWORD8 pu1_inv_scan[4] =
 1426|  43.5k|                        { 0, 1, 2, 3 };
 1427|  43.5k|        WORD16 *pi2_coeff_data =
 1428|  43.5k|                                    (WORD16 *)ps_dec->pv_parse_tu_coeff_data;
 1429|       |
 1430|  43.5k|        ps_tu_4x4 = (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data;
 1431|       |
 1432|  43.5k|        u4_trailing_ones = ((u4_code >> 3) & 0x3);
 1433|  43.5k|        u4_total_coeff_tone = (u4_total_coeff << 16) | u4_trailing_ones;
 1434|  43.5k|        ih264d_rest_of_residual_cav_chroma_dc_block(u4_total_coeff_tone,
 1435|  43.5k|                                                    ps_bitstrm);
 1436|       |
 1437|  43.5k|        ai2_dc_coef[0] = 0;
 1438|  43.5k|        ai2_dc_coef[1] = 0;
 1439|  43.5k|        ai2_dc_coef[2] = 0;
 1440|  43.5k|        ai2_dc_coef[3] = 0;
 1441|       |
 1442|  43.5k|        ih264d_unpack_coeff4x4_dc_4x4blk(ps_tu_4x4,
 1443|  43.5k|                                         ai2_dc_coef,
 1444|  43.5k|                                         pu1_inv_scan);
 1445|       |
 1446|       |        /*-------------------------------------------------------------------*/
 1447|       |        /* Inverse 2x2 transform and scaling  of chroma DC                   */
 1448|       |        /*-------------------------------------------------------------------*/
 1449|  43.5k|        i_z0 = (ai2_dc_coef[0] + ai2_dc_coef[2]);
 1450|  43.5k|        i_z1 = (ai2_dc_coef[0] - ai2_dc_coef[2]);
 1451|  43.5k|        i_z2 = (ai2_dc_coef[1] - ai2_dc_coef[3]);
 1452|  43.5k|        i_z3 = (ai2_dc_coef[1] + ai2_dc_coef[3]);
 1453|       |
 1454|       |        /*-----------------------------------------------------------*/
 1455|       |        /* Scaling and storing the values back                       */
 1456|       |        /*-----------------------------------------------------------*/
 1457|  43.5k|        *pi2_coeff_data++ = (WORD16)(((i_z0 + i_z3) * (WORD32)u4_scale_v) >> 5);
 1458|  43.5k|        *pi2_coeff_data++ = (WORD16)(((i_z0 - i_z3) * (WORD32)u4_scale_v) >> 5);
 1459|  43.5k|        *pi2_coeff_data++ = (WORD16)(((i_z1 + i_z2) * (WORD32)u4_scale_v) >> 5);
 1460|  43.5k|        *pi2_coeff_data++ = (WORD16)(((i_z1 - i_z2) * (WORD32)u4_scale_v) >> 5);
 1461|       |
 1462|  43.5k|        ps_dec->pv_parse_tu_coeff_data = (void *)pi2_coeff_data;
 1463|       |
 1464|  43.5k|        SET_BIT(ps_cur_mb_info->u1_yuv_dc_block_flag,2);
  ------------------
  |  |  106|  43.5k|#define SET_BIT(x, pos) (x) = (x) | (1 << pos);
  ------------------
 1465|  43.5k|    }
 1466|  56.5k|}
ih264d_parse_pmb_ref_index_cavlc_range1:
 1496|  5.03k|{
 1497|  5.03k|    UWORD32 u4_i;
 1498|  5.03k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
 1499|  5.03k|    UWORD32 *pu4_bitstream_off = &ps_bitstrm->u4_ofst;
 1500|  5.03k|    UNUSED(u4_num_ref_idx_active_minus1);
  ------------------
  |  |   45|  5.03k|#define UNUSED(x) ((void)(x))
  ------------------
 1501|  13.7k|    for(u4_i = 0; u4_i < u4_num_part; u4_i++)
  ------------------
  |  Branch (1501:19): [True: 8.69k, False: 5.03k]
  ------------------
 1502|  8.69k|    {
 1503|  8.69k|        UWORD32 u4_ref_idx;
 1504|  8.69k|        u4_ref_idx = ih264d_tev_range1(pu4_bitstream_off, pu4_bitstrm_buf);
 1505|       |
 1506|       |        /* Storing Reference Idx Information */
 1507|  8.69k|        pi1_ref_idx[u4_i] = (WORD8)u4_ref_idx;
 1508|  8.69k|    }
 1509|  5.03k|}
ih264d_parse_pmb_ref_index_cavlc:
 1540|  13.8k|{
 1541|  13.8k|    UWORD32 u4_i;
 1542|  13.8k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
 1543|  13.8k|    UWORD32 *pu4_bitstream_off = &ps_bitstrm->u4_ofst;
 1544|       |
 1545|  30.4k|    for(u4_i = 0; u4_i < u4_num_part; u4_i++)
  ------------------
  |  Branch (1545:19): [True: 17.0k, False: 13.3k]
  ------------------
 1546|  17.0k|    {
 1547|  17.0k|        UWORD32 u4_ref_idx;
 1548|       |//Inlined ih264d_uev
 1549|  17.0k|        UWORD32 u4_bitstream_offset = *pu4_bitstream_off;
 1550|  17.0k|        UWORD32 u4_word, u4_ldz;
 1551|       |
 1552|       |        /***************************************************************/
 1553|       |        /* Find leading zeros in next 32 bits                          */
 1554|       |        /***************************************************************/
 1555|  17.0k|        NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  17.0k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  17.0k|{                                                                           \
  |  |  152|  17.0k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  17.0k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  17.0k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  17.0k|                                                                            \
  |  |  156|  17.0k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  17.0k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 16.6k, False: 411]
  |  |  ------------------
  |  |  158|  17.0k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  16.6k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  17.0k|}
  ------------------
 1556|  17.0k|        u4_ldz = CLZ(u4_word);
 1557|       |        /* Flush the ps_bitstrm */
 1558|  17.0k|        u4_bitstream_offset += (u4_ldz + 1);
 1559|       |        /* Read the suffix from the ps_bitstrm */
 1560|  17.0k|        u4_word = 0;
 1561|  17.0k|        if(u4_ldz)
  ------------------
  |  Branch (1561:12): [True: 5.48k, False: 11.5k]
  ------------------
 1562|  5.48k|            GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  5.48k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  5.48k|{                                                                           \
  |  |  122|  5.48k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  5.48k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  5.48k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  5.48k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  5.48k|                                                                            \
  |  |  127|  5.48k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 5.23k, False: 246]
  |  |  ------------------
  |  |  128|  5.48k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  5.23k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  5.48k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  5.48k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  5.48k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  5.48k|}                                                                           \
  ------------------
 1563|  17.0k|        *pu4_bitstream_off = u4_bitstream_offset;
 1564|  17.0k|        u4_ref_idx = ((1 << u4_ldz) + u4_word - 1);
 1565|       |//Inlined ih264d_uev
 1566|       |
 1567|  17.0k|        if(u4_ref_idx > u4_num_ref_idx_active_minus1)
  ------------------
  |  Branch (1567:12): [True: 510, False: 16.5k]
  ------------------
 1568|    510|            return ERROR_REF_IDX;
 1569|       |
 1570|       |        /* Storing Reference Idx Information */
 1571|  16.5k|        pi1_ref_idx[u4_i] = (WORD8)u4_ref_idx;
 1572|  16.5k|    }
 1573|  13.3k|    return OK;
  ------------------
  |  |  114|  13.3k|#define OK        0
  ------------------
 1574|  13.8k|}
ih264d_parse_bmb_ref_index_cavlc_range1:
 1604|  11.2k|{
 1605|  11.2k|    UWORD32 u4_i;
 1606|  11.2k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
 1607|  11.2k|    UWORD32 *pu4_bitstream_off = &ps_bitstrm->u4_ofst;
 1608|  11.2k|    UNUSED(u4_num_ref_idx_active_minus1);
  ------------------
  |  |   45|  11.2k|#define UNUSED(x) ((void)(x))
  ------------------
 1609|  30.3k|    for(u4_i = 0; u4_i < u4_num_part; u4_i++)
  ------------------
  |  Branch (1609:19): [True: 19.0k, False: 11.2k]
  ------------------
 1610|  19.0k|    {
 1611|  19.0k|        if(pi1_ref_idx[u4_i] > -1)
  ------------------
  |  Branch (1611:12): [True: 5.87k, False: 13.1k]
  ------------------
 1612|  5.87k|        {
 1613|  5.87k|            UWORD32 u4_ref_idx;
 1614|       |
 1615|  5.87k|            u4_ref_idx = ih264d_tev_range1(pu4_bitstream_off, pu4_bitstrm_buf);
 1616|       |
 1617|       |            /* Storing Reference Idx Information */
 1618|  5.87k|            pi1_ref_idx[u4_i] = (WORD8)u4_ref_idx;
 1619|  5.87k|        }
 1620|  19.0k|    }
 1621|  11.2k|}
ih264d_parse_bmb_ref_index_cavlc:
 1651|  12.0k|{
 1652|  12.0k|    UWORD32 u4_i;
 1653|  12.0k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
 1654|  12.0k|    UWORD32 *pu4_bitstream_off = &ps_bitstrm->u4_ofst;
 1655|       |
 1656|  30.7k|    for(u4_i = 0; u4_i < u4_num_part; u4_i++)
  ------------------
  |  Branch (1656:19): [True: 19.1k, False: 11.5k]
  ------------------
 1657|  19.1k|    {
 1658|  19.1k|        if(pi1_ref_idx[u4_i] > -1)
  ------------------
  |  Branch (1658:12): [True: 14.3k, False: 4.75k]
  ------------------
 1659|  14.3k|        {
 1660|  14.3k|            UWORD32 u4_ref_idx;
 1661|       |//inlining ih264d_uev
 1662|  14.3k|            UWORD32 u4_bitstream_offset = *pu4_bitstream_off;
 1663|  14.3k|            UWORD32 u4_word, u4_ldz;
 1664|       |
 1665|       |            /***************************************************************/
 1666|       |            /* Find leading zeros in next 32 bits                          */
 1667|       |            /***************************************************************/
 1668|  14.3k|            NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  14.3k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  14.3k|{                                                                           \
  |  |  152|  14.3k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  14.3k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  14.3k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  14.3k|                                                                            \
  |  |  156|  14.3k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  14.3k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 12.8k, False: 1.50k]
  |  |  ------------------
  |  |  158|  14.3k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  12.8k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  14.3k|}
  ------------------
 1669|  14.3k|            u4_ldz = CLZ(u4_word);
 1670|       |            /* Flush the ps_bitstrm */
 1671|  14.3k|            u4_bitstream_offset += (u4_ldz + 1);
 1672|       |            /* Read the suffix from the ps_bitstrm */
 1673|  14.3k|            u4_word = 0;
 1674|  14.3k|            if(u4_ldz)
  ------------------
  |  Branch (1674:16): [True: 2.43k, False: 11.9k]
  ------------------
 1675|  2.43k|                GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  2.43k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  2.43k|{                                                                           \
  |  |  122|  2.43k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  2.43k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  2.43k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  2.43k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  2.43k|                                                                            \
  |  |  127|  2.43k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 2.19k, False: 239]
  |  |  ------------------
  |  |  128|  2.43k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  2.19k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  2.43k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  2.43k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  2.43k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  2.43k|}                                                                           \
  ------------------
 1676|  14.3k|            *pu4_bitstream_off = u4_bitstream_offset;
 1677|  14.3k|            u4_ref_idx = ((1 << u4_ldz) + u4_word - 1);
 1678|       |//inlining ih264d_uev
 1679|  14.3k|            if(u4_ref_idx > u4_num_ref_idx_active_minus1)
  ------------------
  |  Branch (1679:16): [True: 536, False: 13.8k]
  ------------------
 1680|    536|                return ERROR_REF_IDX;
 1681|       |
 1682|       |            /* Storing Reference Idx Information */
 1683|  13.8k|            pi1_ref_idx[u4_i] = (WORD8)u4_ref_idx;
 1684|  13.8k|        }
 1685|  19.1k|    }
 1686|  11.5k|    return OK;
  ------------------
  |  |  114|  11.5k|#define OK        0
  ------------------
 1687|  12.0k|}
ih264d_cavlc_parse_8x8block_both_available:
 1736|  77.9k|{
 1737|  77.9k|    UWORD32 u4_num_coeff, u4_n, u4_subblock_coded;
 1738|  77.9k|    UWORD32 u4_top0, u4_top1;
 1739|  77.9k|    UWORD32 *pu4_dummy;
 1740|  77.9k|    WORD32 (**pf_cavlc_parse4x4coeff)(WORD16 *pi2_coeff_block,
 1741|  77.9k|                                      UWORD32 u4_isdc,
 1742|  77.9k|                                      WORD32 u4_n,
 1743|  77.9k|                                      struct _DecStruct *ps_dec,
 1744|  77.9k|                                      UWORD32 *pu4_dummy) =
 1745|  77.9k|                                      ps_dec->pf_cavlc_parse4x4coeff;
 1746|  77.9k|    UWORD32 u4_idx = 0;
 1747|  77.9k|    UWORD8 *puc_temp;
 1748|  77.9k|    WORD32 ret;
 1749|       |
 1750|  77.9k|    *pu4_csbp = 0;
 1751|       |    /* need to change the inverse scan matrices here */
 1752|  77.9k|    puc_temp = ps_dec->pu1_inv_scan;
 1753|       |
 1754|       |    /*------------------------------------------------------*/
 1755|       |    /* Residual 4x4 decoding: SubBlock 0                    */
 1756|       |    /*------------------------------------------------------*/
 1757|  77.9k|    if(u1_tran_form8x8)
  ------------------
  |  Branch (1757:8): [True: 35.8k, False: 42.1k]
  ------------------
 1758|  35.8k|    {
 1759|  35.8k|        if(!u1_mb_field_decodingflag)
  ------------------
  |  Branch (1759:12): [True: 35.8k, False: 0]
  ------------------
 1760|  35.8k|        {
 1761|  35.8k|            ps_dec->pu1_inv_scan =
 1762|  35.8k|                            (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[0];
 1763|  35.8k|        }
 1764|      0|        else
 1765|      0|        {
 1766|      0|            ps_dec->pu1_inv_scan =
 1767|      0|                            (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[0];
 1768|      0|        }
 1769|  35.8k|    }
 1770|  77.9k|    u4_n = (pu1_top_nnz[0] + pu1_left_nnz[0] + 1) >> 1;
 1771|  77.9k|    ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
 1772|  77.9k|                                             u4_n, ps_dec, &u4_num_coeff);
 1773|  77.9k|    if(ret != OK)
  ------------------
  |  |  114|  77.9k|#define OK        0
  ------------------
  |  Branch (1773:8): [True: 576, False: 77.3k]
  ------------------
 1774|    576|        return ret;
 1775|       |
 1776|  77.3k|    u4_top0 = u4_num_coeff;
 1777|  77.3k|    u4_subblock_coded = (u4_num_coeff != 0);
 1778|  77.3k|    INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
  ------------------
  |  |  109|  77.3k|#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); }
  |  |  ------------------
  |  |  |  |  105|  77.3k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  |  |  ------------------
  ------------------
 1779|       |
 1780|       |    /*------------------------------------------------------*/
 1781|       |    /* Residual 4x4 decoding: SubBlock 1                    */
 1782|       |    /*------------------------------------------------------*/
 1783|  77.3k|    u4_idx++;
 1784|  77.3k|    if(u1_tran_form8x8)
  ------------------
  |  Branch (1784:8): [True: 35.8k, False: 41.5k]
  ------------------
 1785|  35.8k|    {
 1786|  35.8k|        if(!u1_mb_field_decodingflag)
  ------------------
  |  Branch (1786:12): [True: 35.8k, False: 0]
  ------------------
 1787|  35.8k|        {
 1788|  35.8k|            ps_dec->pu1_inv_scan =
 1789|  35.8k|                            (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[1];
 1790|  35.8k|        }
 1791|      0|        else
 1792|      0|        {
 1793|      0|            ps_dec->pu1_inv_scan =
 1794|      0|                            (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[1];
 1795|      0|        }
 1796|  35.8k|    }
 1797|  41.5k|    else
 1798|  41.5k|    {
 1799|  41.5k|        pi2_coeff_block += NUM_COEFFS_IN_4x4BLK;
  ------------------
  |  |  617|  41.5k|#define NUM_COEFFS_IN_4x4BLK 16
  ------------------
 1800|  41.5k|    }
 1801|  77.3k|    u4_n = (pu1_top_nnz[1] + u4_num_coeff + 1) >> 1;
 1802|  77.3k|    ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
 1803|  77.3k|                                             u4_n, ps_dec, &u4_num_coeff);
 1804|  77.3k|    if(ret != OK)
  ------------------
  |  |  114|  77.3k|#define OK        0
  ------------------
  |  Branch (1804:8): [True: 348, False: 77.0k]
  ------------------
 1805|    348|        return ret;
 1806|       |
 1807|  77.0k|    u4_top1 = pu1_left_nnz[0] = u4_num_coeff;
 1808|  77.0k|    u4_subblock_coded = (u4_num_coeff != 0);
 1809|  77.0k|    INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
  ------------------
  |  |  109|  77.0k|#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); }
  |  |  ------------------
  |  |  |  |  105|  77.0k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  |  |  ------------------
  ------------------
 1810|       |
 1811|       |    /*------------------------------------------------------*/
 1812|       |    /* Residual 4x4 decoding: SubBlock 2                    */
 1813|       |    /*------------------------------------------------------*/
 1814|  77.0k|    u4_idx += (u4_sub_block_strd - 1);
 1815|  77.0k|    if(u1_tran_form8x8)
  ------------------
  |  Branch (1815:8): [True: 35.7k, False: 41.2k]
  ------------------
 1816|  35.7k|    {
 1817|  35.7k|        if(!u1_mb_field_decodingflag)
  ------------------
  |  Branch (1817:12): [True: 35.7k, False: 0]
  ------------------
 1818|  35.7k|        {
 1819|  35.7k|            ps_dec->pu1_inv_scan =
 1820|  35.7k|                            (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[2];
 1821|  35.7k|        }
 1822|      0|        else
 1823|      0|        {
 1824|      0|            ps_dec->pu1_inv_scan =
 1825|      0|                            (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[2];
 1826|      0|        }
 1827|  35.7k|    }
 1828|  41.2k|    else
 1829|  41.2k|    {
 1830|  41.2k|        pi2_coeff_block += ((u4_sub_block_strd - 1) * NUM_COEFFS_IN_4x4BLK);
  ------------------
  |  |  617|  41.2k|#define NUM_COEFFS_IN_4x4BLK 16
  ------------------
 1831|  41.2k|    }
 1832|  77.0k|    u4_n = (u4_top0 + pu1_left_nnz[1] + 1) >> 1;
 1833|  77.0k|    ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
 1834|  77.0k|                                             u4_n, ps_dec, &u4_num_coeff);
 1835|  77.0k|    if(ret != OK)
  ------------------
  |  |  114|  77.0k|#define OK        0
  ------------------
  |  Branch (1835:8): [True: 483, False: 76.5k]
  ------------------
 1836|    483|        return ret;
 1837|       |
 1838|  76.5k|    pu1_top_nnz[0] = u4_num_coeff;
 1839|  76.5k|    u4_subblock_coded = (u4_num_coeff != 0);
 1840|  76.5k|    INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
  ------------------
  |  |  109|  76.5k|#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); }
  |  |  ------------------
  |  |  |  |  105|  76.5k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  |  |  ------------------
  ------------------
 1841|       |
 1842|       |    /*------------------------------------------------------*/
 1843|       |    /* Residual 4x4 decoding: SubBlock 3                    */
 1844|       |    /*------------------------------------------------------*/
 1845|  76.5k|    u4_idx++;
 1846|  76.5k|    if(u1_tran_form8x8)
  ------------------
  |  Branch (1846:8): [True: 35.7k, False: 40.8k]
  ------------------
 1847|  35.7k|    {
 1848|  35.7k|        if(!u1_mb_field_decodingflag)
  ------------------
  |  Branch (1848:12): [True: 35.7k, False: 0]
  ------------------
 1849|  35.7k|        {
 1850|  35.7k|            ps_dec->pu1_inv_scan =
 1851|  35.7k|                            (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[3];
 1852|  35.7k|        }
 1853|      0|        else
 1854|      0|        {
 1855|      0|            ps_dec->pu1_inv_scan =
 1856|      0|                            (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[3];
 1857|      0|        }
 1858|  35.7k|    }
 1859|  40.8k|    else
 1860|  40.8k|    {
 1861|  40.8k|        pi2_coeff_block += NUM_COEFFS_IN_4x4BLK;
  ------------------
  |  |  617|  40.8k|#define NUM_COEFFS_IN_4x4BLK 16
  ------------------
 1862|  40.8k|    }
 1863|  76.5k|    u4_n = (u4_top1 + u4_num_coeff + 1) >> 1;
 1864|  76.5k|    ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
 1865|  76.5k|                                             u4_n, ps_dec, &u4_num_coeff);
 1866|  76.5k|    if(ret != OK)
  ------------------
  |  |  114|  76.5k|#define OK        0
  ------------------
  |  Branch (1866:8): [True: 466, False: 76.1k]
  ------------------
 1867|    466|        return ret;
 1868|       |
 1869|  76.1k|    pu1_top_nnz[1] = pu1_left_nnz[1] = u4_num_coeff;
 1870|  76.1k|    u4_subblock_coded = (u4_num_coeff != 0);
 1871|  76.1k|    INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
  ------------------
  |  |  109|  76.1k|#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); }
  |  |  ------------------
  |  |  |  |  105|  76.1k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  |  |  ------------------
  ------------------
 1872|       |
 1873|  76.1k|    ps_dec->pu1_inv_scan = puc_temp;
 1874|       |
 1875|  76.1k|    return OK;
  ------------------
  |  |  114|  76.1k|#define OK        0
  ------------------
 1876|  76.5k|}
ih264d_cavlc_parse_8x8block_left_available:
 1926|  21.0k|{
 1927|  21.0k|    UWORD32 u4_num_coeff, u4_n, u4_subblock_coded;
 1928|  21.0k|    UWORD32 u4_top0, u4_top1;
 1929|  21.0k|    UWORD32 *pu4_dummy;
 1930|  21.0k|    WORD32 (**pf_cavlc_parse4x4coeff)(WORD16 *pi2_coeff_block,
 1931|  21.0k|                                      UWORD32 u4_isdc,
 1932|  21.0k|                                      WORD32 u4_n,
 1933|  21.0k|                                      struct _DecStruct *ps_dec,
 1934|  21.0k|                                      UWORD32 *pu4_dummy) =
 1935|  21.0k|                                      ps_dec->pf_cavlc_parse4x4coeff;
 1936|  21.0k|    UWORD32 u4_idx = 0;
 1937|  21.0k|    UWORD8 *puc_temp;
 1938|  21.0k|    WORD32 ret;
 1939|       |
 1940|  21.0k|    *pu4_csbp = 0;
 1941|  21.0k|    puc_temp = ps_dec->pu1_inv_scan;
 1942|       |
 1943|       |    /*------------------------------------------------------*/
 1944|       |    /* Residual 4x4 decoding: SubBlock 0                    */
 1945|       |    /*------------------------------------------------------*/
 1946|  21.0k|    if(u1_tran_form8x8)
  ------------------
  |  Branch (1946:8): [True: 6.78k, False: 14.2k]
  ------------------
 1947|  6.78k|    {
 1948|  6.78k|        if(!u1_mb_field_decodingflag)
  ------------------
  |  Branch (1948:12): [True: 6.78k, False: 0]
  ------------------
 1949|  6.78k|        {
 1950|  6.78k|            ps_dec->pu1_inv_scan =
 1951|  6.78k|                            (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[0];
 1952|  6.78k|        }
 1953|      0|        else
 1954|      0|        {
 1955|      0|            ps_dec->pu1_inv_scan =
 1956|      0|                            (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[0];
 1957|      0|        }
 1958|  6.78k|    }
 1959|  21.0k|    u4_n = pu1_left_nnz[0];
 1960|  21.0k|    ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
 1961|  21.0k|                                             u4_n, ps_dec, &u4_num_coeff);
 1962|  21.0k|    if(ret != OK)
  ------------------
  |  |  114|  21.0k|#define OK        0
  ------------------
  |  Branch (1962:8): [True: 198, False: 20.8k]
  ------------------
 1963|    198|        return ret;
 1964|       |
 1965|  20.8k|    u4_top0 = u4_num_coeff;
 1966|  20.8k|    u4_subblock_coded = (u4_num_coeff != 0);
 1967|  20.8k|    INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
  ------------------
  |  |  109|  20.8k|#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); }
  |  |  ------------------
  |  |  |  |  105|  20.8k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  |  |  ------------------
  ------------------
 1968|       |
 1969|       |    /*------------------------------------------------------*/
 1970|       |    /* Residual 4x4 decoding: SubBlock 1                    */
 1971|       |    /*------------------------------------------------------*/
 1972|  20.8k|    u4_idx++;
 1973|  20.8k|    if(u1_tran_form8x8)
  ------------------
  |  Branch (1973:8): [True: 6.77k, False: 14.1k]
  ------------------
 1974|  6.77k|    {
 1975|  6.77k|        if(!u1_mb_field_decodingflag)
  ------------------
  |  Branch (1975:12): [True: 6.77k, False: 0]
  ------------------
 1976|  6.77k|        {
 1977|  6.77k|            ps_dec->pu1_inv_scan =
 1978|  6.77k|                            (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[1];
 1979|  6.77k|        }
 1980|      0|        else
 1981|      0|        {
 1982|      0|            ps_dec->pu1_inv_scan =
 1983|      0|                            (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[1];
 1984|      0|        }
 1985|  6.77k|    }
 1986|  14.1k|    else
 1987|  14.1k|    {
 1988|  14.1k|        pi2_coeff_block += NUM_COEFFS_IN_4x4BLK;
  ------------------
  |  |  617|  14.1k|#define NUM_COEFFS_IN_4x4BLK 16
  ------------------
 1989|  14.1k|    }
 1990|  20.8k|    u4_n = u4_num_coeff;
 1991|  20.8k|    ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
 1992|  20.8k|                                             u4_n, ps_dec, &u4_num_coeff);
 1993|  20.8k|    if(ret != OK)
  ------------------
  |  |  114|  20.8k|#define OK        0
  ------------------
  |  Branch (1993:8): [True: 217, False: 20.6k]
  ------------------
 1994|    217|        return ret;
 1995|       |
 1996|  20.6k|    u4_top1 = pu1_left_nnz[0] = u4_num_coeff;
 1997|  20.6k|    u4_subblock_coded = (u4_num_coeff != 0);
 1998|  20.6k|    INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
  ------------------
  |  |  109|  20.6k|#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); }
  |  |  ------------------
  |  |  |  |  105|  20.6k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  |  |  ------------------
  ------------------
 1999|       |
 2000|       |    /*------------------------------------------------------*/
 2001|       |    /* Residual 4x4 decoding: SubBlock 2                    */
 2002|       |    /*------------------------------------------------------*/
 2003|  20.6k|    u4_idx += (u4_sub_block_strd - 1);
 2004|  20.6k|    if(u1_tran_form8x8)
  ------------------
  |  Branch (2004:8): [True: 6.77k, False: 13.8k]
  ------------------
 2005|  6.77k|    {
 2006|  6.77k|        if(!u1_mb_field_decodingflag)
  ------------------
  |  Branch (2006:12): [True: 6.77k, False: 0]
  ------------------
 2007|  6.77k|        {
 2008|  6.77k|            ps_dec->pu1_inv_scan =
 2009|  6.77k|                            (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[2];
 2010|  6.77k|        }
 2011|      0|        else
 2012|      0|        {
 2013|      0|            ps_dec->pu1_inv_scan =
 2014|      0|                            (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[2];
 2015|      0|        }
 2016|  6.77k|    }
 2017|  13.8k|    else
 2018|  13.8k|    {
 2019|  13.8k|        pi2_coeff_block += ((u4_sub_block_strd - 1) * NUM_COEFFS_IN_4x4BLK);
  ------------------
  |  |  617|  13.8k|#define NUM_COEFFS_IN_4x4BLK 16
  ------------------
 2020|  13.8k|    }
 2021|  20.6k|    u4_n = (u4_top0 + pu1_left_nnz[1] + 1) >> 1;
 2022|  20.6k|    ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
 2023|  20.6k|                                             u4_n, ps_dec, &u4_num_coeff);
 2024|  20.6k|    if(ret != OK)
  ------------------
  |  |  114|  20.6k|#define OK        0
  ------------------
  |  Branch (2024:8): [True: 267, False: 20.4k]
  ------------------
 2025|    267|        return ret;
 2026|       |
 2027|  20.4k|    pu1_top_nnz[0] = u4_num_coeff;
 2028|  20.4k|    u4_subblock_coded = (u4_num_coeff != 0);
 2029|  20.4k|    INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
  ------------------
  |  |  109|  20.4k|#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); }
  |  |  ------------------
  |  |  |  |  105|  20.4k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  |  |  ------------------
  ------------------
 2030|       |
 2031|       |    /*------------------------------------------------------*/
 2032|       |    /* Residual 4x4 decoding: SubBlock 3                    */
 2033|       |    /*------------------------------------------------------*/
 2034|  20.4k|    u4_idx++;
 2035|  20.4k|    if(u1_tran_form8x8)
  ------------------
  |  Branch (2035:8): [True: 6.74k, False: 13.6k]
  ------------------
 2036|  6.74k|    {
 2037|  6.74k|        if(!u1_mb_field_decodingflag)
  ------------------
  |  Branch (2037:12): [True: 6.74k, False: 0]
  ------------------
 2038|  6.74k|        {
 2039|  6.74k|            ps_dec->pu1_inv_scan =
 2040|  6.74k|                            (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[3];
 2041|  6.74k|        }
 2042|      0|        else
 2043|      0|        {
 2044|      0|            ps_dec->pu1_inv_scan =
 2045|      0|                            (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[3];
 2046|      0|        }
 2047|  6.74k|    }
 2048|  13.6k|    else
 2049|  13.6k|    {
 2050|  13.6k|        pi2_coeff_block += NUM_COEFFS_IN_4x4BLK;
  ------------------
  |  |  617|  13.6k|#define NUM_COEFFS_IN_4x4BLK 16
  ------------------
 2051|  13.6k|    }
 2052|  20.4k|    u4_n = (u4_top1 + u4_num_coeff + 1) >> 1;
 2053|  20.4k|    ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
 2054|  20.4k|                                             u4_n, ps_dec, &u4_num_coeff);
 2055|  20.4k|    if(ret != OK)
  ------------------
  |  |  114|  20.4k|#define OK        0
  ------------------
  |  Branch (2055:8): [True: 315, False: 20.0k]
  ------------------
 2056|    315|        return ret;
 2057|       |
 2058|  20.0k|    pu1_top_nnz[1] = pu1_left_nnz[1] = u4_num_coeff;
 2059|  20.0k|    u4_subblock_coded = (u4_num_coeff != 0);
 2060|  20.0k|    INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
  ------------------
  |  |  109|  20.0k|#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); }
  |  |  ------------------
  |  |  |  |  105|  20.0k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  |  |  ------------------
  ------------------
 2061|       |
 2062|  20.0k|    ps_dec->pu1_inv_scan = puc_temp;
 2063|       |
 2064|  20.0k|    return OK;
  ------------------
  |  |  114|  20.0k|#define OK        0
  ------------------
 2065|  20.4k|}
ih264d_cavlc_parse_8x8block_top_available:
 2115|  23.2k|{
 2116|  23.2k|    UWORD32 u4_num_coeff, u4_n, u4_subblock_coded;
 2117|  23.2k|    UWORD32 u4_top0, u4_top1;
 2118|  23.2k|    UWORD32 *pu4_dummy;
 2119|  23.2k|    WORD32 (**pf_cavlc_parse4x4coeff)(WORD16 *pi2_coeff_block,
 2120|  23.2k|                                      UWORD32 u4_isdc,
 2121|  23.2k|                                      WORD32 u4_n,
 2122|  23.2k|                                      struct _DecStruct *ps_dec,
 2123|  23.2k|                                      UWORD32 *pu4_dummy) =
 2124|  23.2k|                                      ps_dec->pf_cavlc_parse4x4coeff;
 2125|  23.2k|    UWORD32 u4_idx = 0;
 2126|  23.2k|    UWORD8 *puc_temp;
 2127|  23.2k|    WORD32 ret;
 2128|       |
 2129|  23.2k|    *pu4_csbp = 0;
 2130|  23.2k|    puc_temp = ps_dec->pu1_inv_scan;
 2131|       |
 2132|       |    /*------------------------------------------------------*/
 2133|       |    /* Residual 4x4 decoding: SubBlock 0                    */
 2134|       |    /*------------------------------------------------------*/
 2135|  23.2k|    if(u1_tran_form8x8)
  ------------------
  |  Branch (2135:8): [True: 4.90k, False: 18.3k]
  ------------------
 2136|  4.90k|    {
 2137|  4.90k|        if(!u1_mb_field_decodingflag)
  ------------------
  |  Branch (2137:12): [True: 4.90k, False: 0]
  ------------------
 2138|  4.90k|        {
 2139|  4.90k|            ps_dec->pu1_inv_scan =
 2140|  4.90k|                            (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[0];
 2141|  4.90k|        }
 2142|      0|        else
 2143|      0|        {
 2144|      0|            ps_dec->pu1_inv_scan =
 2145|      0|                            (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[0];
 2146|      0|        }
 2147|  4.90k|    }
 2148|  23.2k|    u4_n = pu1_top_nnz[0];
 2149|  23.2k|    ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
 2150|  23.2k|                                             u4_n, ps_dec, &u4_num_coeff);
 2151|  23.2k|    if(ret != OK)
  ------------------
  |  |  114|  23.2k|#define OK        0
  ------------------
  |  Branch (2151:8): [True: 323, False: 22.8k]
  ------------------
 2152|    323|        return ret;
 2153|       |
 2154|  22.8k|    u4_top0 = u4_num_coeff;
 2155|  22.8k|    u4_subblock_coded = (u4_num_coeff != 0);
 2156|  22.8k|    INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
  ------------------
  |  |  109|  22.8k|#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); }
  |  |  ------------------
  |  |  |  |  105|  22.8k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  |  |  ------------------
  ------------------
 2157|       |
 2158|       |    /*------------------------------------------------------*/
 2159|       |    /* Residual 4x4 decoding: SubBlock 1                    */
 2160|       |    /*------------------------------------------------------*/
 2161|  22.8k|    u4_idx++;
 2162|  22.8k|    if(u1_tran_form8x8)
  ------------------
  |  Branch (2162:8): [True: 4.87k, False: 18.0k]
  ------------------
 2163|  4.87k|    {
 2164|  4.87k|        if(!u1_mb_field_decodingflag)
  ------------------
  |  Branch (2164:12): [True: 4.87k, False: 0]
  ------------------
 2165|  4.87k|        {
 2166|  4.87k|            ps_dec->pu1_inv_scan =
 2167|  4.87k|                            (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[1];
 2168|  4.87k|        }
 2169|      0|        else
 2170|      0|        {
 2171|      0|            ps_dec->pu1_inv_scan =
 2172|      0|                            (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[1];
 2173|      0|        }
 2174|  4.87k|    }
 2175|  18.0k|    else
 2176|  18.0k|    {
 2177|  18.0k|        pi2_coeff_block += NUM_COEFFS_IN_4x4BLK;
  ------------------
  |  |  617|  18.0k|#define NUM_COEFFS_IN_4x4BLK 16
  ------------------
 2178|  18.0k|    }
 2179|  22.8k|    u4_n = (pu1_top_nnz[1] + u4_num_coeff + 1) >> 1;
 2180|  22.8k|    ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
 2181|  22.8k|                                             u4_n, ps_dec, &u4_num_coeff);
 2182|  22.8k|    if(ret != OK)
  ------------------
  |  |  114|  22.8k|#define OK        0
  ------------------
  |  Branch (2182:8): [True: 343, False: 22.5k]
  ------------------
 2183|    343|        return ret;
 2184|       |
 2185|  22.5k|    u4_top1 = pu1_left_nnz[0] = u4_num_coeff;
 2186|  22.5k|    u4_subblock_coded = (u4_num_coeff != 0);
 2187|  22.5k|    INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
  ------------------
  |  |  109|  22.5k|#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); }
  |  |  ------------------
  |  |  |  |  105|  22.5k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  |  |  ------------------
  ------------------
 2188|       |
 2189|       |    /*------------------------------------------------------*/
 2190|       |    /* Residual 4x4 decoding: SubBlock 2                    */
 2191|       |    /*------------------------------------------------------*/
 2192|  22.5k|    u4_idx += (u4_sub_block_strd - 1);
 2193|  22.5k|    if(u1_tran_form8x8)
  ------------------
  |  Branch (2193:8): [True: 4.86k, False: 17.6k]
  ------------------
 2194|  4.86k|    {
 2195|  4.86k|        if(!u1_mb_field_decodingflag)
  ------------------
  |  Branch (2195:12): [True: 4.86k, False: 0]
  ------------------
 2196|  4.86k|        {
 2197|  4.86k|            ps_dec->pu1_inv_scan =
 2198|  4.86k|                            (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[2];
 2199|  4.86k|        }
 2200|      0|        else
 2201|      0|        {
 2202|      0|            ps_dec->pu1_inv_scan =
 2203|      0|                            (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[2];
 2204|      0|        }
 2205|  4.86k|    }
 2206|  17.6k|    else
 2207|  17.6k|    {
 2208|  17.6k|        pi2_coeff_block += ((u4_sub_block_strd - 1) * NUM_COEFFS_IN_4x4BLK);
  ------------------
  |  |  617|  17.6k|#define NUM_COEFFS_IN_4x4BLK 16
  ------------------
 2209|  17.6k|    }
 2210|  22.5k|    u4_n = u4_top0;
 2211|  22.5k|    ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
 2212|  22.5k|                                             u4_n, ps_dec, &u4_num_coeff);
 2213|  22.5k|    if(ret != OK)
  ------------------
  |  |  114|  22.5k|#define OK        0
  ------------------
  |  Branch (2213:8): [True: 430, False: 22.1k]
  ------------------
 2214|    430|        return ret;
 2215|       |
 2216|  22.1k|    pu1_top_nnz[0] = u4_num_coeff;
 2217|  22.1k|    u4_subblock_coded = (u4_num_coeff != 0);
 2218|  22.1k|    INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
  ------------------
  |  |  109|  22.1k|#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); }
  |  |  ------------------
  |  |  |  |  105|  22.1k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  |  |  ------------------
  ------------------
 2219|       |
 2220|       |    /*------------------------------------------------------*/
 2221|       |    /* Residual 4x4 decoding: SubBlock 3                    */
 2222|       |    /*------------------------------------------------------*/
 2223|  22.1k|    u4_idx++;
 2224|  22.1k|    if(u1_tran_form8x8)
  ------------------
  |  Branch (2224:8): [True: 4.85k, False: 17.2k]
  ------------------
 2225|  4.85k|    {
 2226|  4.85k|        if(!u1_mb_field_decodingflag)
  ------------------
  |  Branch (2226:12): [True: 4.85k, False: 0]
  ------------------
 2227|  4.85k|        {
 2228|  4.85k|            ps_dec->pu1_inv_scan =
 2229|  4.85k|                            (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[3];
 2230|  4.85k|        }
 2231|      0|        else
 2232|      0|        {
 2233|      0|            ps_dec->pu1_inv_scan =
 2234|      0|                            (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[3];
 2235|      0|        }
 2236|  4.85k|    }
 2237|  17.2k|    else
 2238|  17.2k|    {
 2239|  17.2k|        pi2_coeff_block += NUM_COEFFS_IN_4x4BLK;
  ------------------
  |  |  617|  17.2k|#define NUM_COEFFS_IN_4x4BLK 16
  ------------------
 2240|  17.2k|    }
 2241|  22.1k|    u4_n = (u4_top1 + u4_num_coeff + 1) >> 1;
 2242|  22.1k|    ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
 2243|  22.1k|                                             u4_n, ps_dec, &u4_num_coeff);
 2244|  22.1k|    if(ret != OK)
  ------------------
  |  |  114|  22.1k|#define OK        0
  ------------------
  |  Branch (2244:8): [True: 539, False: 21.5k]
  ------------------
 2245|    539|        return ret;
 2246|       |
 2247|  21.5k|    pu1_top_nnz[1] = pu1_left_nnz[1] = u4_num_coeff;
 2248|  21.5k|    u4_subblock_coded = (u4_num_coeff != 0);
 2249|  21.5k|    INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
  ------------------
  |  |  109|  21.5k|#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); }
  |  |  ------------------
  |  |  |  |  105|  21.5k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  |  |  ------------------
  ------------------
 2250|       |
 2251|  21.5k|    ps_dec->pu1_inv_scan = puc_temp;
 2252|       |
 2253|  21.5k|    return OK;
  ------------------
  |  |  114|  21.5k|#define OK        0
  ------------------
 2254|  22.1k|}
ih264d_cavlc_parse_8x8block_none_available:
 2304|  13.0k|{
 2305|  13.0k|    UWORD32 u4_num_coeff, u4_n, u4_subblock_coded;
 2306|  13.0k|    UWORD32 u4_top0, u4_top1;
 2307|  13.0k|    UWORD32 *pu4_dummy;
 2308|  13.0k|    WORD32 (**pf_cavlc_parse4x4coeff)(WORD16 *pi2_coeff_block,
 2309|  13.0k|                                      UWORD32 u4_isdc,
 2310|  13.0k|                                      WORD32 u4_n,
 2311|  13.0k|                                      struct _DecStruct *ps_dec,
 2312|  13.0k|                                      UWORD32 *pu4_dummy) =
 2313|  13.0k|                                      ps_dec->pf_cavlc_parse4x4coeff;
 2314|  13.0k|    UWORD32 u4_idx = 0;
 2315|  13.0k|    UWORD8 *puc_temp;
 2316|  13.0k|    WORD32 ret;
 2317|       |
 2318|  13.0k|    *pu4_csbp = 0;
 2319|  13.0k|    puc_temp = ps_dec->pu1_inv_scan;
 2320|       |
 2321|       |    /*------------------------------------------------------*/
 2322|       |    /* Residual 4x4 decoding: SubBlock 0                    */
 2323|       |    /*------------------------------------------------------*/
 2324|  13.0k|    if(u1_tran_form8x8)
  ------------------
  |  Branch (2324:8): [True: 538, False: 12.5k]
  ------------------
 2325|    538|    {
 2326|    538|        if(!u1_mb_field_decodingflag)
  ------------------
  |  Branch (2326:12): [True: 538, False: 0]
  ------------------
 2327|    538|        {
 2328|    538|            ps_dec->pu1_inv_scan =
 2329|    538|                            (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[0];
 2330|    538|        }
 2331|      0|        else
 2332|      0|        {
 2333|      0|            ps_dec->pu1_inv_scan =
 2334|      0|                            (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[0];
 2335|      0|        }
 2336|    538|    }
 2337|  13.0k|    ret = pf_cavlc_parse4x4coeff[0](pi2_coeff_block, u4_isdc, 0,
 2338|  13.0k|                                    ps_dec, &u4_num_coeff);
 2339|  13.0k|    if(ret != OK)
  ------------------
  |  |  114|  13.0k|#define OK        0
  ------------------
  |  Branch (2339:8): [True: 1.19k, False: 11.8k]
  ------------------
 2340|  1.19k|        return ret;
 2341|       |
 2342|  11.8k|    u4_top0 = u4_num_coeff;
 2343|  11.8k|    u4_subblock_coded = (u4_num_coeff != 0);
 2344|  11.8k|    INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
  ------------------
  |  |  109|  11.8k|#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); }
  |  |  ------------------
  |  |  |  |  105|  11.8k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  |  |  ------------------
  ------------------
 2345|       |
 2346|       |    /*------------------------------------------------------*/
 2347|       |    /* Residual 4x4 decoding: SubBlock 1                    */
 2348|       |    /*------------------------------------------------------*/
 2349|  11.8k|    u4_idx++;
 2350|  11.8k|    if(u1_tran_form8x8)
  ------------------
  |  Branch (2350:8): [True: 531, False: 11.3k]
  ------------------
 2351|    531|    {
 2352|    531|        if(!u1_mb_field_decodingflag)
  ------------------
  |  Branch (2352:12): [True: 531, False: 0]
  ------------------
 2353|    531|        {
 2354|    531|            ps_dec->pu1_inv_scan =
 2355|    531|                            (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[1];
 2356|    531|        }
 2357|      0|        else
 2358|      0|        {
 2359|      0|            ps_dec->pu1_inv_scan =
 2360|      0|                            (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[1];
 2361|      0|        }
 2362|    531|    }
 2363|  11.3k|    else
 2364|  11.3k|    {
 2365|  11.3k|        pi2_coeff_block += NUM_COEFFS_IN_4x4BLK;
  ------------------
  |  |  617|  11.3k|#define NUM_COEFFS_IN_4x4BLK 16
  ------------------
 2366|  11.3k|    }
 2367|  11.8k|    u4_n = u4_num_coeff;
 2368|  11.8k|    ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
 2369|  11.8k|                                             u4_n, ps_dec, &u4_num_coeff);
 2370|  11.8k|    if(ret != OK)
  ------------------
  |  |  114|  11.8k|#define OK        0
  ------------------
  |  Branch (2370:8): [True: 331, False: 11.5k]
  ------------------
 2371|    331|        return ret;
 2372|       |
 2373|  11.5k|    u4_top1 = pu1_left_nnz[0] = u4_num_coeff;
 2374|  11.5k|    u4_subblock_coded = (u4_num_coeff != 0);
 2375|  11.5k|    INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
  ------------------
  |  |  109|  11.5k|#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); }
  |  |  ------------------
  |  |  |  |  105|  11.5k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  |  |  ------------------
  ------------------
 2376|       |
 2377|       |    /*------------------------------------------------------*/
 2378|       |    /* Residual 4x4 decoding: SubBlock 2                    */
 2379|       |    /*------------------------------------------------------*/
 2380|  11.5k|    u4_idx += (u4_sub_block_strd - 1);
 2381|  11.5k|    if(u1_tran_form8x8)
  ------------------
  |  Branch (2381:8): [True: 529, False: 11.0k]
  ------------------
 2382|    529|    {
 2383|    529|        if(!u1_mb_field_decodingflag)
  ------------------
  |  Branch (2383:12): [True: 529, False: 0]
  ------------------
 2384|    529|        {
 2385|    529|            ps_dec->pu1_inv_scan =
 2386|    529|                            (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[2];
 2387|    529|        }
 2388|      0|        else
 2389|      0|        {
 2390|      0|            ps_dec->pu1_inv_scan =
 2391|      0|                            (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[2];
 2392|      0|        }
 2393|    529|    }
 2394|  11.0k|    else
 2395|  11.0k|    {
 2396|  11.0k|        pi2_coeff_block += ((u4_sub_block_strd - 1) * NUM_COEFFS_IN_4x4BLK);
  ------------------
  |  |  617|  11.0k|#define NUM_COEFFS_IN_4x4BLK 16
  ------------------
 2397|  11.0k|    }
 2398|  11.5k|    u4_n = u4_top0;
 2399|  11.5k|    ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
 2400|  11.5k|                                             u4_n, ps_dec, &u4_num_coeff);
 2401|  11.5k|    if(ret != OK)
  ------------------
  |  |  114|  11.5k|#define OK        0
  ------------------
  |  Branch (2401:8): [True: 194, False: 11.3k]
  ------------------
 2402|    194|        return ret;
 2403|       |
 2404|  11.3k|    pu1_top_nnz[0] = u4_num_coeff;
 2405|  11.3k|    u4_subblock_coded = (u4_num_coeff != 0);
 2406|  11.3k|    INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
  ------------------
  |  |  109|  11.3k|#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); }
  |  |  ------------------
  |  |  |  |  105|  11.3k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  |  |  ------------------
  ------------------
 2407|       |
 2408|       |    /*------------------------------------------------------*/
 2409|       |    /* Residual 4x4 decoding: SubBlock 3                    */
 2410|       |    /*------------------------------------------------------*/
 2411|  11.3k|    u4_idx++;
 2412|  11.3k|    if(u1_tran_form8x8)
  ------------------
  |  Branch (2412:8): [True: 518, False: 10.8k]
  ------------------
 2413|    518|    {
 2414|    518|        if(!u1_mb_field_decodingflag)
  ------------------
  |  Branch (2414:12): [True: 518, False: 0]
  ------------------
 2415|    518|        {
 2416|    518|            ps_dec->pu1_inv_scan =
 2417|    518|                            (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[3];
 2418|    518|        }
 2419|      0|        else
 2420|      0|        {
 2421|      0|            ps_dec->pu1_inv_scan =
 2422|      0|                            (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[3];
 2423|      0|        }
 2424|    518|    }
 2425|  10.8k|    else
 2426|  10.8k|    {
 2427|  10.8k|        pi2_coeff_block += NUM_COEFFS_IN_4x4BLK;
  ------------------
  |  |  617|  10.8k|#define NUM_COEFFS_IN_4x4BLK 16
  ------------------
 2428|  10.8k|    }
 2429|  11.3k|    u4_n = (u4_top1 + u4_num_coeff + 1) >> 1;
 2430|  11.3k|    ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
 2431|  11.3k|                                             u4_n, ps_dec, &u4_num_coeff);
 2432|  11.3k|    if(ret != OK)
  ------------------
  |  |  114|  11.3k|#define OK        0
  ------------------
  |  Branch (2432:8): [True: 382, False: 10.9k]
  ------------------
 2433|    382|        return ret;
 2434|       |
 2435|  10.9k|    pu1_top_nnz[1] = pu1_left_nnz[1] = u4_num_coeff;
 2436|  10.9k|    u4_subblock_coded = (u4_num_coeff != 0);
 2437|  10.9k|    INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
  ------------------
  |  |  109|  10.9k|#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); }
  |  |  ------------------
  |  |  |  |  105|  10.9k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  |  |  ------------------
  ------------------
 2438|       |
 2439|  10.9k|    ps_dec->pu1_inv_scan = puc_temp;
 2440|       |
 2441|  10.9k|    return OK;
  ------------------
  |  |  114|  10.9k|#define OK        0
  ------------------
 2442|  11.3k|}
ih264d_parse_residual4x4_cavlc:
 2459|  94.0k|{
 2460|  94.0k|    UWORD8 u1_cbp = ps_cur_mb_info->u1_cbp;
 2461|  94.0k|    UWORD16 ui16_csbp = 0;
 2462|  94.0k|    UWORD32 u4_nbr_avl;
 2463|  94.0k|    WORD16 *pi2_residual_buf;
 2464|       |
 2465|  94.0k|    UWORD8 u1_is_top_mb_avail;
 2466|  94.0k|    UWORD8 u1_is_left_mb_avail;
 2467|       |
 2468|  94.0k|    UWORD8 *pu1_top_nnz = ps_cur_mb_info->ps_curmb->pu1_nnz_y;
 2469|  94.0k|    UWORD8 *pu1_left_nnz = ps_dec->pu1_left_nnz_y;
 2470|  94.0k|    WORD16 *pi2_coeff_block = NULL;
 2471|  94.0k|    UWORD32 *pu4_dummy;
 2472|  94.0k|    WORD32 ret;
 2473|       |
 2474|  94.0k|    WORD32 (**pf_cavlc_parse_8x8block)(WORD16 *pi2_coeff_block,
 2475|  94.0k|                                       UWORD32 u4_sub_block_strd,
 2476|  94.0k|                                       UWORD32 u4_isdc,
 2477|  94.0k|                                       struct _DecStruct *ps_dec,
 2478|  94.0k|                                       UWORD8 *pu1_top_nnz,
 2479|  94.0k|                                       UWORD8 *pu1_left_nnz,
 2480|  94.0k|                                       UWORD8 u1_tran_form8x8,
 2481|  94.0k|                                       UWORD8 u1_mb_field_decodingflag,
 2482|  94.0k|                                       UWORD32 *pu4_dummy) = ps_dec->pf_cavlc_parse_8x8block;
 2483|       |
 2484|       |
 2485|  94.0k|    {
 2486|  94.0k|        UWORD8 uc_temp = ps_dec->u1_mb_ngbr_availablity;
 2487|  94.0k|        u1_is_top_mb_avail = BOOLEAN(uc_temp & TOP_MB_AVAILABLE_MASK);
  ------------------
  |  |   61|  94.0k|#define BOOLEAN(x) (!!(x))
  ------------------
 2488|  94.0k|        u1_is_left_mb_avail = BOOLEAN(uc_temp & LEFT_MB_AVAILABLE_MASK);
  ------------------
  |  |   61|  94.0k|#define BOOLEAN(x) (!!(x))
  ------------------
 2489|  94.0k|        u4_nbr_avl = (u1_is_top_mb_avail << 1) | u1_is_left_mb_avail;
 2490|  94.0k|    }
 2491|       |
 2492|  94.0k|    ps_cur_mb_info->u1_qp_div6 = ps_dec->u1_qp_y_div6;
 2493|  94.0k|    ps_cur_mb_info->u1_qp_rem6 = ps_dec->u1_qp_y_rem6;
 2494|  94.0k|    ps_cur_mb_info->u1_qpc_div6 = ps_dec->u1_qp_u_div6;
 2495|  94.0k|    ps_cur_mb_info->u1_qpc_rem6 = ps_dec->u1_qp_u_rem6;
 2496|  94.0k|    ps_cur_mb_info->u1_qpcr_div6 = ps_dec->u1_qp_v_div6;
 2497|  94.0k|    ps_cur_mb_info->u1_qpcr_rem6 = ps_dec->u1_qp_v_rem6;
 2498|       |
 2499|  94.0k|    if(u1_cbp & 0xf)
  ------------------
  |  Branch (2499:8): [True: 52.2k, False: 41.7k]
  ------------------
 2500|  52.2k|    {
 2501|  52.2k|        pu1_top_nnz[0] = ps_cur_mb_info->ps_top_mb->pu1_nnz_y[0];
 2502|  52.2k|        pu1_top_nnz[1] = ps_cur_mb_info->ps_top_mb->pu1_nnz_y[1];
 2503|  52.2k|        pu1_top_nnz[2] = ps_cur_mb_info->ps_top_mb->pu1_nnz_y[2];
 2504|  52.2k|        pu1_top_nnz[3] = ps_cur_mb_info->ps_top_mb->pu1_nnz_y[3];
 2505|       |
 2506|       |        /*******************************************************************/
 2507|       |        /* Block 0 residual decoding, check cbp and proceed (subblock = 0) */
 2508|       |        /*******************************************************************/
 2509|  52.2k|        if(!(u1_cbp & 0x1))
  ------------------
  |  Branch (2509:12): [True: 9.08k, False: 43.1k]
  ------------------
 2510|  9.08k|        {
 2511|  9.08k|            *(UWORD16 *)(pu1_top_nnz) = 0;
 2512|  9.08k|            *(UWORD16 *)(pu1_left_nnz) = 0;
 2513|       |
 2514|  9.08k|        }
 2515|  43.1k|        else
 2516|  43.1k|        {
 2517|  43.1k|            UWORD32 u4_temp;
 2518|  43.1k|            ret = pf_cavlc_parse_8x8block[u4_nbr_avl](
 2519|  43.1k|                        pi2_coeff_block, 4, u1_offset, ps_dec, pu1_top_nnz,
 2520|  43.1k|                        pu1_left_nnz, ps_cur_mb_info->u1_tran_form8x8,
 2521|  43.1k|                        ps_cur_mb_info->u1_mb_field_decodingflag, &u4_temp);
 2522|  43.1k|            if(ret != OK)
  ------------------
  |  |  114|  43.1k|#define OK        0
  ------------------
  |  Branch (2522:16): [True: 1.10k, False: 42.0k]
  ------------------
 2523|  1.10k|                return ret;
 2524|  42.0k|            ui16_csbp = u4_temp;
 2525|  42.0k|        }
 2526|       |
 2527|       |        /*******************************************************************/
 2528|       |        /* Block 1 residual decoding, check cbp and proceed (subblock = 2) */
 2529|       |        /*******************************************************************/
 2530|  51.1k|        if(ps_cur_mb_info->u1_tran_form8x8)
  ------------------
  |  Branch (2530:12): [True: 16.6k, False: 34.5k]
  ------------------
 2531|  16.6k|        {
 2532|  16.6k|            pi2_coeff_block += 64;
 2533|  16.6k|        }
 2534|  34.5k|        else
 2535|  34.5k|        {
 2536|  34.5k|            pi2_coeff_block += (2 * NUM_COEFFS_IN_4x4BLK);
  ------------------
  |  |  617|  34.5k|#define NUM_COEFFS_IN_4x4BLK 16
  ------------------
 2537|  34.5k|        }
 2538|       |
 2539|  51.1k|        if(!(u1_cbp & 0x2))
  ------------------
  |  Branch (2539:12): [True: 31.2k, False: 19.8k]
  ------------------
 2540|  31.2k|        {
 2541|  31.2k|            *(UWORD16 *)(pu1_top_nnz + 2) = 0;
 2542|  31.2k|            *(UWORD16 *)(pu1_left_nnz) = 0;
 2543|  31.2k|        }
 2544|  19.8k|        else
 2545|  19.8k|        {
 2546|  19.8k|            UWORD32 u4_temp = (u4_nbr_avl | 0x1);
 2547|  19.8k|            ret = pf_cavlc_parse_8x8block[u4_temp](
 2548|  19.8k|                        pi2_coeff_block, 4, u1_offset, ps_dec,
 2549|  19.8k|                        (pu1_top_nnz + 2), pu1_left_nnz,
 2550|  19.8k|                        ps_cur_mb_info->u1_tran_form8x8,
 2551|  19.8k|                        ps_cur_mb_info->u1_mb_field_decodingflag, &u4_temp);
 2552|  19.8k|            if(ret != OK)
  ------------------
  |  |  114|  19.8k|#define OK        0
  ------------------
  |  Branch (2552:16): [True: 1.03k, False: 18.8k]
  ------------------
 2553|  1.03k|                return ret;
 2554|  18.8k|            ui16_csbp |= (u4_temp << 2);
 2555|  18.8k|        }
 2556|       |
 2557|       |        /*******************************************************************/
 2558|       |        /* Block 2 residual decoding, check cbp and proceed (subblock = 8) */
 2559|       |        /*******************************************************************/
 2560|  50.1k|        if(ps_cur_mb_info->u1_tran_form8x8)
  ------------------
  |  Branch (2560:12): [True: 16.5k, False: 33.5k]
  ------------------
 2561|  16.5k|        {
 2562|  16.5k|            pi2_coeff_block += 64;
 2563|  16.5k|        }
 2564|  33.5k|        else
 2565|  33.5k|        {
 2566|  33.5k|            pi2_coeff_block += (6 * NUM_COEFFS_IN_4x4BLK);
  ------------------
  |  |  617|  33.5k|#define NUM_COEFFS_IN_4x4BLK 16
  ------------------
 2567|  33.5k|        }
 2568|       |
 2569|  50.1k|        if(!(u1_cbp & 0x4))
  ------------------
  |  Branch (2569:12): [True: 29.7k, False: 20.3k]
  ------------------
 2570|  29.7k|        {
 2571|  29.7k|            *(UWORD16 *)(pu1_top_nnz) = 0;
 2572|  29.7k|            *(UWORD16 *)(pu1_left_nnz + 2) = 0;
 2573|  29.7k|        }
 2574|  20.3k|        else
 2575|  20.3k|        {
 2576|  20.3k|            UWORD32 u4_temp = (u4_nbr_avl | 0x2);
 2577|  20.3k|            ret = pf_cavlc_parse_8x8block[u4_temp](
 2578|  20.3k|                        pi2_coeff_block, 4, u1_offset, ps_dec, pu1_top_nnz,
 2579|  20.3k|                        (pu1_left_nnz + 2), ps_cur_mb_info->u1_tran_form8x8,
 2580|  20.3k|                        ps_cur_mb_info->u1_mb_field_decodingflag, &u4_temp);
 2581|  20.3k|            if(ret != OK)
  ------------------
  |  |  114|  20.3k|#define OK        0
  ------------------
  |  Branch (2581:16): [True: 1.42k, False: 18.9k]
  ------------------
 2582|  1.42k|                return ret;
 2583|  18.9k|            ui16_csbp |= (u4_temp << 8);
 2584|  18.9k|        }
 2585|       |
 2586|       |        /*******************************************************************/
 2587|       |        /* Block 3 residual decoding, check cbp and proceed (subblock = 10)*/
 2588|       |        /*******************************************************************/
 2589|  48.7k|        if(ps_cur_mb_info->u1_tran_form8x8)
  ------------------
  |  Branch (2589:12): [True: 16.4k, False: 32.2k]
  ------------------
 2590|  16.4k|        {
 2591|  16.4k|            pi2_coeff_block += 64;
 2592|  16.4k|        }
 2593|  32.2k|        else
 2594|  32.2k|        {
 2595|  32.2k|            pi2_coeff_block += (2 * NUM_COEFFS_IN_4x4BLK);
  ------------------
  |  |  617|  32.2k|#define NUM_COEFFS_IN_4x4BLK 16
  ------------------
 2596|  32.2k|        }
 2597|       |
 2598|  48.7k|        if(!(u1_cbp & 0x8))
  ------------------
  |  Branch (2598:12): [True: 28.5k, False: 20.1k]
  ------------------
 2599|  28.5k|        {
 2600|  28.5k|            *(UWORD16 *)(pu1_top_nnz + 2) = 0;
 2601|  28.5k|            *(UWORD16 *)(pu1_left_nnz + 2) = 0;
 2602|  28.5k|        }
 2603|  20.1k|        else
 2604|  20.1k|        {
 2605|  20.1k|            UWORD32 u4_temp;
 2606|  20.1k|            ret = pf_cavlc_parse_8x8block[0x3](
 2607|  20.1k|                        pi2_coeff_block, 4, u1_offset, ps_dec,
 2608|  20.1k|                        (pu1_top_nnz + 2), (pu1_left_nnz + 2),
 2609|  20.1k|                        ps_cur_mb_info->u1_tran_form8x8,
 2610|  20.1k|                        ps_cur_mb_info->u1_mb_field_decodingflag, &u4_temp);
 2611|  20.1k|            if(ret != OK)
  ------------------
  |  |  114|  20.1k|#define OK        0
  ------------------
  |  Branch (2611:16): [True: 989, False: 19.1k]
  ------------------
 2612|    989|                return ret;
 2613|  19.1k|            ui16_csbp |= (u4_temp << 10);
 2614|  19.1k|        }
 2615|  48.7k|    }
 2616|  41.7k|    else
 2617|  41.7k|    {
 2618|  41.7k|        *(UWORD32 *)(pu1_top_nnz) = 0;
 2619|  41.7k|        *(UWORD32 *)(pu1_left_nnz) = 0;
 2620|  41.7k|    }
 2621|       |
 2622|  89.4k|    ps_cur_mb_info->u2_luma_csbp = ui16_csbp;
 2623|  89.4k|    ps_cur_mb_info->ps_curmb->u2_luma_csbp = ui16_csbp;
 2624|       |
 2625|  89.4k|    {
 2626|  89.4k|        UWORD16 u2_chroma_csbp = 0;
 2627|  89.4k|        ps_cur_mb_info->u2_chroma_csbp = 0;
 2628|  89.4k|        pu1_top_nnz = ps_cur_mb_info->ps_curmb->pu1_nnz_uv;
 2629|  89.4k|        pu1_left_nnz = ps_dec->pu1_left_nnz_uv;
 2630|       |
 2631|  89.4k|        u1_cbp >>= 4;
 2632|       |        /*--------------------------------------------------------------------*/
 2633|       |        /* if Chroma Component not present OR no ac values present            */
 2634|       |        /* Set the values of N to zero                                        */
 2635|       |        /*--------------------------------------------------------------------*/
 2636|  89.4k|        if(u1_cbp == CBPC_ALLZERO || u1_cbp == CBPC_ACZERO)
  ------------------
  |  |  507|   178k|#define CBPC_ALLZERO    0
  ------------------
                      if(u1_cbp == CBPC_ALLZERO || u1_cbp == CBPC_ACZERO)
  ------------------
  |  |  508|  56.5k|#define CBPC_ACZERO     1
  ------------------
  |  Branch (2636:12): [True: 32.9k, False: 56.5k]
  |  Branch (2636:38): [True: 40.0k, False: 16.5k]
  ------------------
 2637|  72.9k|        {
 2638|  72.9k|            *(UWORD32 *)(pu1_top_nnz) = 0;
 2639|  72.9k|            *(UWORD32 *)(pu1_left_nnz) = 0;
 2640|  72.9k|        }
 2641|       |
 2642|  89.4k|        if(u1_cbp == CBPC_ALLZERO)
  ------------------
  |  |  507|  89.4k|#define CBPC_ALLZERO    0
  ------------------
  |  Branch (2642:12): [True: 32.9k, False: 56.5k]
  ------------------
 2643|  32.9k|        {
 2644|  32.9k|            return (0);
 2645|  32.9k|        }
 2646|       |        /*--------------------------------------------------------------------*/
 2647|       |        /* Decode Chroma DC values                                            */
 2648|       |        /*--------------------------------------------------------------------*/
 2649|  56.5k|        {
 2650|  56.5k|            WORD32 u4_scale_u;
 2651|  56.5k|            WORD32 u4_scale_v;
 2652|  56.5k|            WORD32 i4_mb_inter_inc;
 2653|  56.5k|            u4_scale_u = ps_dec->pu2_quant_scale_u[0] << ps_dec->u1_qp_u_div6;
 2654|  56.5k|            u4_scale_v = ps_dec->pu2_quant_scale_v[0] << ps_dec->u1_qp_v_div6;
 2655|  56.5k|            i4_mb_inter_inc = (!((ps_cur_mb_info->ps_curmb->u1_mb_type == I_4x4_MB)
  ------------------
  |  |  417|  56.5k|#define I_4x4_MB    0
  ------------------
  |  Branch (2655:34): [True: 13.2k, False: 43.3k]
  ------------------
 2656|  43.3k|                            || (ps_cur_mb_info->ps_curmb->u1_mb_type == I_16x16_MB)))
  ------------------
  |  |  418|  43.3k|#define I_16x16_MB  1
  ------------------
  |  Branch (2656:32): [True: 18.9k, False: 24.4k]
  ------------------
 2657|  56.5k|                            * 3;
 2658|       |
 2659|  56.5k|            if(ps_dec->s_high_profile.u1_scaling_present)
  ------------------
  |  Branch (2659:16): [True: 10.2k, False: 46.3k]
  ------------------
 2660|  10.2k|            {
 2661|  10.2k|                u4_scale_u *=
 2662|  10.2k|                                ps_dec->s_high_profile.i2_scalinglist4x4[i4_mb_inter_inc
 2663|  10.2k|                                                + 1][0];
 2664|  10.2k|                u4_scale_v *=
 2665|  10.2k|                                ps_dec->s_high_profile.i2_scalinglist4x4[i4_mb_inter_inc
 2666|  10.2k|                                                + 2][0];
 2667|       |
 2668|  10.2k|            }
 2669|  46.3k|            else
 2670|  46.3k|            {
 2671|  46.3k|                u4_scale_u <<= 4;
 2672|  46.3k|                u4_scale_v <<= 4;
 2673|  46.3k|            }
 2674|       |
 2675|  56.5k|            ih264d_cavlc_parse_chroma_dc(ps_cur_mb_info,pi2_coeff_block, ps_dec->ps_bitstrm,
 2676|  56.5k|                                         u4_scale_u, u4_scale_v,
 2677|  56.5k|                                         i4_mb_inter_inc);
 2678|  56.5k|        }
 2679|       |
 2680|  56.5k|        if(u1_cbp == CBPC_ACZERO)
  ------------------
  |  |  508|  56.5k|#define CBPC_ACZERO     1
  ------------------
  |  Branch (2680:12): [True: 40.0k, False: 16.5k]
  ------------------
 2681|  40.0k|            return (0);
 2682|       |
 2683|  16.5k|        pu1_top_nnz[0] = ps_cur_mb_info->ps_top_mb->pu1_nnz_uv[0];
 2684|  16.5k|        pu1_top_nnz[1] = ps_cur_mb_info->ps_top_mb->pu1_nnz_uv[1];
 2685|  16.5k|        pu1_top_nnz[2] = ps_cur_mb_info->ps_top_mb->pu1_nnz_uv[2];
 2686|  16.5k|        pu1_top_nnz[3] = ps_cur_mb_info->ps_top_mb->pu1_nnz_uv[3];
 2687|       |        /*--------------------------------------------------------------------*/
 2688|       |        /* Decode Chroma AC values                                            */
 2689|       |        /*--------------------------------------------------------------------*/
 2690|  16.5k|        {
 2691|  16.5k|            UWORD32 u4_temp;
 2692|       |            /*****************************************************************/
 2693|       |            /* U Block  residual decoding, check cbp and proceed (subblock=0)*/
 2694|       |            /*****************************************************************/
 2695|  16.5k|            ret = pf_cavlc_parse_8x8block[u4_nbr_avl](
 2696|  16.5k|                        pi2_coeff_block, 2, 1, ps_dec, pu1_top_nnz,
 2697|  16.5k|                        pu1_left_nnz, 0, 0, &u4_temp);
 2698|  16.5k|            if(ret != OK)
  ------------------
  |  |  114|  16.5k|#define OK        0
  ------------------
  |  Branch (2698:16): [True: 1.43k, False: 15.1k]
  ------------------
 2699|  1.43k|                return ret;
 2700|  15.1k|            u2_chroma_csbp = u4_temp;
 2701|       |
 2702|  15.1k|            pi2_coeff_block += MB_CHROM_SIZE;
  ------------------
  |  |  564|  15.1k|#define MB_CHROM_SIZE                 64
  ------------------
 2703|       |            /*****************************************************************/
 2704|       |            /* V Block  residual decoding, check cbp and proceed (subblock=1)*/
 2705|       |            /*****************************************************************/
 2706|  15.1k|            ret = pf_cavlc_parse_8x8block[u4_nbr_avl](pi2_coeff_block, 2, 1,
 2707|  15.1k|                                                      ps_dec,
 2708|  15.1k|                                                      (pu1_top_nnz + 2),
 2709|  15.1k|                                                      (pu1_left_nnz + 2), 0,
 2710|  15.1k|                                                      0, &u4_temp);
 2711|  15.1k|            if(ret != OK)
  ------------------
  |  |  114|  15.1k|#define OK        0
  ------------------
  |  Branch (2711:16): [True: 617, False: 14.5k]
  ------------------
 2712|    617|                return ret;
 2713|  14.5k|            u2_chroma_csbp |= (u4_temp << 4);
 2714|  14.5k|        }
 2715|       |
 2716|      0|        ps_cur_mb_info->u2_chroma_csbp = u2_chroma_csbp;
 2717|  14.5k|    }
 2718|  14.5k|    return OK;
  ------------------
  |  |  114|  14.5k|#define OK        0
  ------------------
 2719|  15.1k|}

ih264d_get_pre_sei_params:
   86|   169k|{
   87|   169k|    if((NULL != ps_dec->ps_sei) &&
  ------------------
  |  Branch (87:8): [True: 169k, False: 0]
  ------------------
   88|   169k|        ((0 == ps_dec->ps_sei->s_sei_ccv_params.u1_ccv_cancel_flag) &&
  ------------------
  |  Branch (88:10): [True: 169k, False: 167]
  ------------------
   89|   169k|        (0 == ps_dec->ps_sei->s_sei_ccv_params.u1_ccv_persistence_flag)))
  ------------------
  |  Branch (89:9): [True: 169k, False: 101]
  ------------------
   90|   169k|    {
   91|   169k|        ps_dec->ps_sei->u1_sei_ccv_params_present_flag = 0;
   92|   169k|        memset(&ps_dec->ps_sei->s_sei_ccv_params, 0, sizeof(sei_ccv_params_t));
   93|   169k|    }
   94|       |
   95|   169k|    if((NULL != ps_dec->ps_cur_sps) &&
  ------------------
  |  Branch (95:8): [True: 139k, False: 30.0k]
  ------------------
   96|   139k|        ((1 == ps_dec->ps_cur_sps->u1_vui_parameters_present_flag) &&
  ------------------
  |  Branch (96:10): [True: 17.5k, False: 121k]
  ------------------
   97|  17.5k|        ((2 != ps_dec->ps_cur_sps->s_vui.u1_colour_primaries) &&
  ------------------
  |  Branch (97:10): [True: 2.06k, False: 15.5k]
  ------------------
   98|  2.06k|        (2 != ps_dec->ps_cur_sps->s_vui.u1_matrix_coeffs) &&
  ------------------
  |  Branch (98:9): [True: 1.97k, False: 84]
  ------------------
   99|  1.97k|        (2 != ps_dec->ps_cur_sps->s_vui.u1_tfr_chars) &&
  ------------------
  |  Branch (99:9): [True: 1.90k, False: 67]
  ------------------
  100|  1.90k|        (4 != ps_dec->ps_cur_sps->s_vui.u1_tfr_chars) &&
  ------------------
  |  Branch (100:9): [True: 1.84k, False: 68]
  ------------------
  101|  1.84k|        (5 != ps_dec->ps_cur_sps->s_vui.u1_tfr_chars))))
  ------------------
  |  Branch (101:9): [True: 1.64k, False: 196]
  ------------------
  102|  1.64k|    {
  103|  1.64k|        if((1 == ps_dec->ps_sei_parse->u1_sei_ccv_params_present_flag) ||
  ------------------
  |  Branch (103:12): [True: 90, False: 1.55k]
  ------------------
  104|  1.55k|            (IDR_SLICE_NAL == u1_nal_unit_type))
  ------------------
  |  |  328|  1.55k|#define IDR_SLICE_NAL                   5
  ------------------
  |  Branch (104:13): [True: 661, False: 894]
  ------------------
  105|    751|        {
  106|    751|            ps_dec->ps_sei->u1_sei_ccv_params_present_flag =
  107|    751|                        ps_dec->ps_sei_parse->u1_sei_ccv_params_present_flag;
  108|    751|            ps_dec->ps_sei->s_sei_ccv_params = ps_dec->ps_sei_parse->s_sei_ccv_params;
  109|    751|        }
  110|  1.64k|    }
  111|   167k|    else
  112|   167k|    {
  113|   167k|        ps_dec->ps_sei->u1_sei_ccv_params_present_flag = 0;
  114|   167k|        memset(&ps_dec->ps_sei->s_sei_ccv_params, 0, sizeof(sei_ccv_params_t));
  115|   167k|    }
  116|       |
  117|   169k|    if(IDR_SLICE_NAL == u1_nal_unit_type)
  ------------------
  |  |  328|   169k|#define IDR_SLICE_NAL                   5
  ------------------
  |  Branch (117:8): [True: 107k, False: 62.5k]
  ------------------
  118|   107k|    {
  119|   107k|        ps_dec->ps_sei->u1_sei_mdcv_params_present_flag =
  120|   107k|                        ps_dec->ps_sei_parse->u1_sei_mdcv_params_present_flag;
  121|   107k|        ps_dec->ps_sei->s_sei_mdcv_params = ps_dec->ps_sei_parse->s_sei_mdcv_params;
  122|   107k|        ps_dec->ps_sei->u1_sei_cll_params_present_flag =
  123|   107k|                        ps_dec->ps_sei_parse->u1_sei_cll_params_present_flag;
  124|   107k|        ps_dec->ps_sei->s_sei_cll_params = ps_dec->ps_sei_parse->s_sei_cll_params;
  125|   107k|        ps_dec->ps_sei->u1_sei_ave_params_present_flag =
  126|   107k|                        ps_dec->ps_sei_parse->u1_sei_ave_params_present_flag;
  127|   107k|        ps_dec->ps_sei->s_sei_ave_params = ps_dec->ps_sei_parse->s_sei_ave_params;
  128|   107k|        ps_dec->ps_sei->u1_sei_sii_params_present_flag =
  129|   107k|            ps_dec->ps_sei_parse->u1_sei_sii_params_present_flag;
  130|   107k|        ps_dec->ps_sei->s_sei_sii_params = ps_dec->ps_sei_parse->s_sei_sii_params;
  131|   107k|    }
  132|       |
  133|   169k|    if(NULL != ps_dec->ps_sei)
  ------------------
  |  Branch (133:8): [True: 169k, False: 0]
  ------------------
  134|   169k|    {
  135|   169k|        ps_dec->ps_sei->u1_sei_fgc_params_present_flag =
  136|   169k|            ps_dec->ps_sei_parse->u1_sei_fgc_params_present_flag;
  137|   169k|        ps_dec->ps_sei->s_sei_fgc_params = ps_dec->ps_sei_parse->s_sei_fgc_params;
  138|   169k|    }
  139|       |
  140|   169k|    ps_dec->ps_sei_parse->u1_sei_mdcv_params_present_flag = 0;
  141|   169k|    memset(&ps_dec->ps_sei_parse->s_sei_mdcv_params, 0, sizeof(sei_mdcv_params_t));
  142|   169k|    ps_dec->ps_sei_parse->u1_sei_cll_params_present_flag = 0;
  143|   169k|    memset(&ps_dec->ps_sei_parse->s_sei_cll_params, 0, sizeof(sei_cll_params_t));
  144|   169k|    ps_dec->ps_sei_parse->u1_sei_ave_params_present_flag = 0;
  145|   169k|    memset(&ps_dec->ps_sei_parse->s_sei_ave_params, 0, sizeof(sei_ave_params_t));
  146|   169k|    ps_dec->ps_sei_parse->u1_sei_ccv_params_present_flag = 0;
  147|   169k|    memset(&ps_dec->ps_sei_parse->s_sei_ccv_params, 0, sizeof(sei_ccv_params_t));
  148|   169k|    ps_dec->ps_sei_parse->u1_sei_sii_params_present_flag = 0;
  149|   169k|    memset(&ps_dec->ps_sei_parse->s_sei_sii_params, 0, sizeof(sei_sii_params_t));
  150|   169k|}
ih264d_parse_sei:
  207|  2.24k|{
  208|  2.24k|    UNUSED(ps_dec);
  ------------------
  |  |   45|  2.24k|#define UNUSED(x) ((void)(x))
  ------------------
  209|  2.24k|    UNUSED(ps_bitstrm);
  ------------------
  |  |   45|  2.24k|#define UNUSED(x) ((void)(x))
  ------------------
  210|  2.24k|    return (0);
  211|  2.24k|}
ih264d_correct_level_idc:
  500|   108k|{
  501|   108k|    UWORD32 u4_max_mbs_allowed;
  502|       |
  503|   108k|    switch(u4_level_idc)
  504|   108k|    {
  505|  1.62k|        case H264_LEVEL_1_0:
  ------------------
  |  |  291|  1.62k|#define H264_LEVEL_1_0     10
  ------------------
  |  Branch (505:9): [True: 1.62k, False: 107k]
  ------------------
  506|  1.62k|            u4_max_mbs_allowed = MAX_MBS_LEVEL_10;
  ------------------
  |  |  321|  1.62k|#define MAX_MBS_LEVEL_10 99
  ------------------
  507|  1.62k|            break;
  508|  15.2k|        case H264_LEVEL_1_1:
  ------------------
  |  |  292|  15.2k|#define H264_LEVEL_1_1     11
  ------------------
  |  Branch (508:9): [True: 15.2k, False: 93.6k]
  ------------------
  509|  15.2k|            u4_max_mbs_allowed = MAX_MBS_LEVEL_11;
  ------------------
  |  |  320|  15.2k|#define MAX_MBS_LEVEL_11 396
  ------------------
  510|  15.2k|            break;
  511|  1.91k|        case H264_LEVEL_1_2:
  ------------------
  |  |  293|  1.91k|#define H264_LEVEL_1_2     12
  ------------------
  |  Branch (511:9): [True: 1.91k, False: 106k]
  ------------------
  512|  1.91k|            u4_max_mbs_allowed = MAX_MBS_LEVEL_12;
  ------------------
  |  |  319|  1.91k|#define MAX_MBS_LEVEL_12 396
  ------------------
  513|  1.91k|            break;
  514|    941|        case H264_LEVEL_1_3:
  ------------------
  |  |  294|    941|#define H264_LEVEL_1_3     13
  ------------------
  |  Branch (514:9): [True: 941, False: 107k]
  ------------------
  515|    941|            u4_max_mbs_allowed = MAX_MBS_LEVEL_13;
  ------------------
  |  |  318|    941|#define MAX_MBS_LEVEL_13 396
  ------------------
  516|    941|            break;
  517|  20.2k|        case H264_LEVEL_2_0:
  ------------------
  |  |  295|  20.2k|#define H264_LEVEL_2_0     20
  ------------------
  |  Branch (517:9): [True: 20.2k, False: 88.5k]
  ------------------
  518|  20.2k|            u4_max_mbs_allowed = MAX_MBS_LEVEL_20;
  ------------------
  |  |  317|  20.2k|#define MAX_MBS_LEVEL_20 396
  ------------------
  519|  20.2k|            break;
  520|  2.79k|        case H264_LEVEL_2_1:
  ------------------
  |  |  296|  2.79k|#define H264_LEVEL_2_1     21
  ------------------
  |  Branch (520:9): [True: 2.79k, False: 106k]
  ------------------
  521|  2.79k|            u4_max_mbs_allowed = MAX_MBS_LEVEL_21;
  ------------------
  |  |  316|  2.79k|#define MAX_MBS_LEVEL_21 792
  ------------------
  522|  2.79k|            break;
  523|  1.01k|        case H264_LEVEL_2_2:
  ------------------
  |  |  297|  1.01k|#define H264_LEVEL_2_2     22
  ------------------
  |  Branch (523:9): [True: 1.01k, False: 107k]
  ------------------
  524|  1.01k|            u4_max_mbs_allowed = MAX_MBS_LEVEL_22;
  ------------------
  |  |  315|  1.01k|#define MAX_MBS_LEVEL_22 1620
  ------------------
  525|  1.01k|            break;
  526|  1.51k|        case H264_LEVEL_3_0:
  ------------------
  |  |  298|  1.51k|#define H264_LEVEL_3_0     30
  ------------------
  |  Branch (526:9): [True: 1.51k, False: 107k]
  ------------------
  527|  1.51k|            u4_max_mbs_allowed = MAX_MBS_LEVEL_30;
  ------------------
  |  |  314|  1.51k|#define MAX_MBS_LEVEL_30 1620
  ------------------
  528|  1.51k|            break;
  529|  6.91k|        case H264_LEVEL_3_1:
  ------------------
  |  |  299|  6.91k|#define H264_LEVEL_3_1     31
  ------------------
  |  Branch (529:9): [True: 6.91k, False: 101k]
  ------------------
  530|  6.91k|            u4_max_mbs_allowed = MAX_MBS_LEVEL_31;
  ------------------
  |  |  313|  6.91k|#define MAX_MBS_LEVEL_31 3600
  ------------------
  531|  6.91k|            break;
  532|  24.9k|        case H264_LEVEL_3_2:
  ------------------
  |  |  300|  24.9k|#define H264_LEVEL_3_2     32
  ------------------
  |  Branch (532:9): [True: 24.9k, False: 83.8k]
  ------------------
  533|  24.9k|            u4_max_mbs_allowed = MAX_MBS_LEVEL_32;
  ------------------
  |  |  312|  24.9k|#define MAX_MBS_LEVEL_32 5120
  ------------------
  534|  24.9k|            break;
  535|  5.88k|        case H264_LEVEL_4_0:
  ------------------
  |  |  301|  5.88k|#define H264_LEVEL_4_0     40
  ------------------
  |  Branch (535:9): [True: 5.88k, False: 102k]
  ------------------
  536|  5.88k|            u4_max_mbs_allowed = MAX_MBS_LEVEL_40;
  ------------------
  |  |  311|  5.88k|#define MAX_MBS_LEVEL_40 8192
  ------------------
  537|  5.88k|            break;
  538|  1.10k|        case H264_LEVEL_4_1:
  ------------------
  |  |  302|  1.10k|#define H264_LEVEL_4_1     41
  ------------------
  |  Branch (538:9): [True: 1.10k, False: 107k]
  ------------------
  539|  1.10k|            u4_max_mbs_allowed = MAX_MBS_LEVEL_41;
  ------------------
  |  |  310|  1.10k|#define MAX_MBS_LEVEL_41 8192
  ------------------
  540|  1.10k|            break;
  541|  4.26k|        case H264_LEVEL_4_2:
  ------------------
  |  |  303|  4.26k|#define H264_LEVEL_4_2     42
  ------------------
  |  Branch (541:9): [True: 4.26k, False: 104k]
  ------------------
  542|  4.26k|            u4_max_mbs_allowed = MAX_MBS_LEVEL_42;
  ------------------
  |  |  309|  4.26k|#define MAX_MBS_LEVEL_42 8704
  ------------------
  543|  4.26k|            break;
  544|     93|        case H264_LEVEL_5_0:
  ------------------
  |  |  304|     93|#define H264_LEVEL_5_0     50
  ------------------
  |  Branch (544:9): [True: 93, False: 108k]
  ------------------
  545|     93|            u4_max_mbs_allowed = MAX_MBS_LEVEL_50;
  ------------------
  |  |  308|     93|#define MAX_MBS_LEVEL_50 22080
  ------------------
  546|     93|            break;
  547|      0|        case H264_LEVEL_5_1:
  ------------------
  |  |  305|      0|#define H264_LEVEL_5_1     51
  ------------------
  |  Branch (547:9): [True: 0, False: 108k]
  ------------------
  548|  20.3k|        default:
  ------------------
  |  Branch (548:9): [True: 20.3k, False: 88.5k]
  ------------------
  549|  20.3k|            u4_max_mbs_allowed = MAX_MBS_LEVEL_51;
  ------------------
  |  |  307|  20.3k|#define MAX_MBS_LEVEL_51 36864
  ------------------
  550|  20.3k|            break;
  551|       |
  552|   108k|    }
  553|       |
  554|       |    /*correct of the level is incorrect*/
  555|   108k|    if(u4_total_mbs > u4_max_mbs_allowed)
  ------------------
  |  Branch (555:8): [True: 1.67k, False: 107k]
  ------------------
  556|  1.67k|    {
  557|  1.67k|        if(u4_total_mbs > MAX_MBS_LEVEL_50)
  ------------------
  |  |  308|  1.67k|#define MAX_MBS_LEVEL_50 22080
  ------------------
  |  Branch (557:12): [True: 71, False: 1.60k]
  ------------------
  558|     71|            u4_level_idc = H264_LEVEL_5_1;
  ------------------
  |  |  305|     71|#define H264_LEVEL_5_1     51
  ------------------
  559|  1.60k|        else if(u4_total_mbs > MAX_MBS_LEVEL_42)
  ------------------
  |  |  309|  1.60k|#define MAX_MBS_LEVEL_42 8704
  ------------------
  |  Branch (559:17): [True: 100, False: 1.50k]
  ------------------
  560|    100|            u4_level_idc = H264_LEVEL_5_0;
  ------------------
  |  |  304|    100|#define H264_LEVEL_5_0     50
  ------------------
  561|  1.50k|        else if(u4_total_mbs > MAX_MBS_LEVEL_41)
  ------------------
  |  |  310|  1.50k|#define MAX_MBS_LEVEL_41 8192
  ------------------
  |  Branch (561:17): [True: 165, False: 1.34k]
  ------------------
  562|    165|            u4_level_idc = H264_LEVEL_4_2;
  ------------------
  |  |  303|    165|#define H264_LEVEL_4_2     42
  ------------------
  563|  1.34k|        else if(u4_total_mbs > MAX_MBS_LEVEL_40)
  ------------------
  |  |  311|  1.34k|#define MAX_MBS_LEVEL_40 8192
  ------------------
  |  Branch (563:17): [True: 0, False: 1.34k]
  ------------------
  564|      0|            u4_level_idc = H264_LEVEL_4_1;
  ------------------
  |  |  302|      0|#define H264_LEVEL_4_1     41
  ------------------
  565|  1.34k|        else if(u4_total_mbs > MAX_MBS_LEVEL_32)
  ------------------
  |  |  312|  1.34k|#define MAX_MBS_LEVEL_32 5120
  ------------------
  |  Branch (565:17): [True: 125, False: 1.21k]
  ------------------
  566|    125|            u4_level_idc = H264_LEVEL_4_0;
  ------------------
  |  |  301|    125|#define H264_LEVEL_4_0     40
  ------------------
  567|  1.21k|        else if(u4_total_mbs > MAX_MBS_LEVEL_31)
  ------------------
  |  |  313|  1.21k|#define MAX_MBS_LEVEL_31 3600
  ------------------
  |  Branch (567:17): [True: 167, False: 1.04k]
  ------------------
  568|    167|            u4_level_idc = H264_LEVEL_3_2;
  ------------------
  |  |  300|    167|#define H264_LEVEL_3_2     32
  ------------------
  569|  1.04k|        else if(u4_total_mbs > MAX_MBS_LEVEL_30)
  ------------------
  |  |  314|  1.04k|#define MAX_MBS_LEVEL_30 1620
  ------------------
  |  Branch (569:17): [True: 150, False: 899]
  ------------------
  570|    150|            u4_level_idc = H264_LEVEL_3_1;
  ------------------
  |  |  299|    150|#define H264_LEVEL_3_1     31
  ------------------
  571|    899|        else if(u4_total_mbs > MAX_MBS_LEVEL_21)
  ------------------
  |  |  316|    899|#define MAX_MBS_LEVEL_21 792
  ------------------
  |  Branch (571:17): [True: 169, False: 730]
  ------------------
  572|    169|            u4_level_idc = H264_LEVEL_3_0;
  ------------------
  |  |  298|    169|#define H264_LEVEL_3_0     30
  ------------------
  573|    730|        else if(u4_total_mbs > MAX_MBS_LEVEL_20)
  ------------------
  |  |  317|    730|#define MAX_MBS_LEVEL_20 396
  ------------------
  |  Branch (573:17): [True: 521, False: 209]
  ------------------
  574|    521|            u4_level_idc = H264_LEVEL_2_1;
  ------------------
  |  |  296|    521|#define H264_LEVEL_2_1     21
  ------------------
  575|    209|        else if(u4_total_mbs > MAX_MBS_LEVEL_10)
  ------------------
  |  |  321|    209|#define MAX_MBS_LEVEL_10 99
  ------------------
  |  Branch (575:17): [True: 209, False: 0]
  ------------------
  576|    209|            u4_level_idc = H264_LEVEL_2_0;
  ------------------
  |  |  295|    209|#define H264_LEVEL_2_0     20
  ------------------
  577|  1.67k|    }
  578|       |
  579|   108k|    return (u4_level_idc);
  580|       |
  581|   108k|}

ih264d_parse_imb_cavlc:
   92|  54.0k|{
   93|  54.0k|    WORD32 i4_delta_qp;
   94|  54.0k|    UWORD32 u4_temp;
   95|  54.0k|    UWORD32 ui_is_top_mb_available;
   96|  54.0k|    UWORD32 ui_is_left_mb_available;
   97|  54.0k|    UWORD32 u4_cbp;
   98|  54.0k|    UWORD32 u4_offset;
   99|  54.0k|    UWORD32 *pu4_bitstrm_buf;
  100|  54.0k|    WORD32 ret;
  101|       |
  102|  54.0k|    dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm;
  103|  54.0k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
  104|  54.0k|    UNUSED(u4_mb_num);
  ------------------
  |  |   45|  54.0k|#define UNUSED(x) ((void)(x))
  ------------------
  105|  54.0k|    ps_cur_mb_info->u1_tran_form8x8 = 0;
  106|  54.0k|    ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
  107|       |
  108|  54.0k|    ps_cur_mb_info->u1_yuv_dc_block_flag = 0;
  109|       |
  110|  54.0k|    u4_temp = ps_dec->u1_mb_ngbr_availablity;
  111|  54.0k|    ui_is_top_mb_available = BOOLEAN(u4_temp & TOP_MB_AVAILABLE_MASK);
  ------------------
  |  |   61|  54.0k|#define BOOLEAN(x) (!!(x))
  ------------------
  112|  54.0k|    ui_is_left_mb_available = BOOLEAN(u4_temp & LEFT_MB_AVAILABLE_MASK);
  ------------------
  |  |   61|  54.0k|#define BOOLEAN(x) (!!(x))
  ------------------
  113|       |
  114|  54.0k|    pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
  115|       |
  116|  54.0k|    if(u1_mb_type == I_4x4_MB)
  ------------------
  |  |  417|  54.0k|#define I_4x4_MB    0
  ------------------
  |  Branch (116:8): [True: 13.1k, False: 40.9k]
  ------------------
  117|  13.1k|    {
  118|  13.1k|        ps_cur_mb_info->ps_curmb->u1_mb_type = I_4x4_MB;
  ------------------
  |  |  417|  13.1k|#define I_4x4_MB    0
  ------------------
  119|  13.1k|        u4_offset = 0;
  120|       |
  121|       |        /*--------------------------------------------------------------------*/
  122|       |        /* Read transform_size_8x8_flag if present                            */
  123|       |        /*--------------------------------------------------------------------*/
  124|  13.1k|        if(ps_dec->s_high_profile.u1_transform8x8_present)
  ------------------
  |  Branch (124:12): [True: 7.45k, False: 5.65k]
  ------------------
  125|  7.45k|        {
  126|  7.45k|            ps_cur_mb_info->u1_tran_form8x8 = ih264d_get_bit_h264(ps_bitstrm);
  127|  7.45k|            COPYTHECONTEXT("transform_size_8x8_flag", ps_cur_mb_info->u1_tran_form8x8);
  128|  7.45k|            ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = ps_cur_mb_info->u1_tran_form8x8;
  129|  7.45k|        }
  130|       |
  131|       |        /*--------------------------------------------------------------------*/
  132|       |        /* Read the IntraPrediction modes for LUMA                            */
  133|       |        /*--------------------------------------------------------------------*/
  134|  13.1k|        if (!ps_cur_mb_info->u1_tran_form8x8)
  ------------------
  |  Branch (134:13): [True: 5.86k, False: 7.24k]
  ------------------
  135|  5.86k|        {
  136|  5.86k|            UWORD8 *pu1_temp;
  137|  5.86k|            ih264d_read_intra_pred_modes(ps_dec,
  138|  5.86k|                                          ((UWORD8 *)ps_dec->pv_parse_tu_coeff_data),
  139|  5.86k|                                          ((UWORD8 *)ps_dec->pv_parse_tu_coeff_data+16),
  140|  5.86k|                                          ps_cur_mb_info->u1_tran_form8x8);
  141|  5.86k|            pu1_temp = (UWORD8 *)ps_dec->pv_parse_tu_coeff_data;
  142|  5.86k|            pu1_temp += 32;
  143|  5.86k|            ps_dec->pv_parse_tu_coeff_data = (void *)pu1_temp;
  144|  5.86k|        }
  145|  7.24k|        else
  146|  7.24k|        {
  147|  7.24k|            UWORD8 *pu1_temp;
  148|  7.24k|            ih264d_read_intra_pred_modes(ps_dec,
  149|  7.24k|                                          ((UWORD8 *)ps_dec->pv_parse_tu_coeff_data),
  150|  7.24k|                                          ((UWORD8 *)ps_dec->pv_parse_tu_coeff_data+4),
  151|  7.24k|                                          ps_cur_mb_info->u1_tran_form8x8);
  152|  7.24k|            pu1_temp = (UWORD8 *)ps_dec->pv_parse_tu_coeff_data;
  153|  7.24k|            pu1_temp += 8;
  154|  7.24k|            ps_dec->pv_parse_tu_coeff_data = (void *)pu1_temp;
  155|  7.24k|        }
  156|       |        /*--------------------------------------------------------------------*/
  157|       |        /* Read the IntraPrediction mode for CHROMA                           */
  158|       |        /*--------------------------------------------------------------------*/
  159|       |//Inlined ih264d_uev
  160|  13.1k|        {
  161|  13.1k|            UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
  162|  13.1k|            UWORD32 u4_word, u4_ldz, u4_temp;
  163|       |
  164|       |            /***************************************************************/
  165|       |            /* Find leading zeros in next 32 bits                          */
  166|       |            /***************************************************************/
  167|  13.1k|            NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  13.1k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  13.1k|{                                                                           \
  |  |  152|  13.1k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  13.1k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  13.1k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  13.1k|                                                                            \
  |  |  156|  13.1k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  13.1k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 12.6k, False: 479]
  |  |  ------------------
  |  |  158|  13.1k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  12.6k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  13.1k|}
  ------------------
  168|  13.1k|            u4_ldz = CLZ(u4_word);
  169|       |            /* Flush the ps_bitstrm */
  170|  13.1k|            u4_bitstream_offset += (u4_ldz + 1);
  171|       |            /* Read the suffix from the ps_bitstrm */
  172|  13.1k|            u4_word = 0;
  173|  13.1k|            if(u4_ldz)
  ------------------
  |  Branch (173:16): [True: 3.02k, False: 10.0k]
  ------------------
  174|  3.02k|            {
  175|  3.02k|                GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf,
  ------------------
  |  |  120|  3.02k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  3.02k|{                                                                           \
  |  |  122|  3.02k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  3.02k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  3.02k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  3.02k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  3.02k|                                                                            \
  |  |  127|  3.02k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 2.86k, False: 162]
  |  |  ------------------
  |  |  128|  3.02k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  2.86k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  3.02k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  3.02k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  3.02k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  3.02k|}                                                                           \
  ------------------
  176|  3.02k|                        u4_ldz);
  177|  3.02k|            }
  178|  13.1k|            *pu4_bitstrm_ofst = u4_bitstream_offset;
  179|  13.1k|            u4_temp = ((1 << u4_ldz) + u4_word - 1);
  180|  13.1k|            if(u4_temp > 3)
  ------------------
  |  Branch (180:16): [True: 752, False: 12.3k]
  ------------------
  181|    752|            {
  182|    752|                return ERROR_CHROMA_PRED_MODE;
  183|    752|            }
  184|  12.3k|            ps_cur_mb_info->u1_chroma_pred_mode = u4_temp;
  185|  12.3k|            COPYTHECONTEXT("intra_chroma_pred_mode", ps_cur_mb_info->u1_chroma_pred_mode);
  186|  12.3k|        }
  187|       |        /*--------------------------------------------------------------------*/
  188|       |        /* Read the Coded block pattern                                       */
  189|       |        /*--------------------------------------------------------------------*/
  190|      0|        {
  191|  12.3k|            UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
  192|  12.3k|            UWORD32 u4_word, u4_ldz;
  193|       |
  194|       |            /***************************************************************/
  195|       |            /* Find leading zeros in next 32 bits                          */
  196|       |            /***************************************************************/
  197|  12.3k|            NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  12.3k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  12.3k|{                                                                           \
  |  |  152|  12.3k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  12.3k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  12.3k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  12.3k|                                                                            \
  |  |  156|  12.3k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  12.3k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 11.9k, False: 372]
  |  |  ------------------
  |  |  158|  12.3k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  11.9k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  12.3k|}
  ------------------
  198|  12.3k|            u4_ldz = CLZ(u4_word);
  199|       |            /* Flush the ps_bitstrm */
  200|  12.3k|            u4_bitstream_offset += (u4_ldz + 1);
  201|       |            /* Read the suffix from the ps_bitstrm */
  202|  12.3k|            u4_word = 0;
  203|  12.3k|            if(u4_ldz)
  ------------------
  |  Branch (203:16): [True: 4.37k, False: 7.98k]
  ------------------
  204|  4.37k|            {
  205|  4.37k|                GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf,
  ------------------
  |  |  120|  4.37k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  4.37k|{                                                                           \
  |  |  122|  4.37k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  4.37k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  4.37k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  4.37k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  4.37k|                                                                            \
  |  |  127|  4.37k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 4.07k, False: 296]
  |  |  ------------------
  |  |  128|  4.37k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  4.07k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  4.37k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  4.37k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  4.37k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  4.37k|}                                                                           \
  ------------------
  206|  4.37k|                        u4_ldz);
  207|  4.37k|            }
  208|  12.3k|            *pu4_bitstrm_ofst = u4_bitstream_offset;
  209|  12.3k|            u4_cbp = ((1 << u4_ldz) + u4_word - 1);
  210|  12.3k|        }
  211|  12.3k|        if(u4_cbp > 47)
  ------------------
  |  Branch (211:12): [True: 212, False: 12.1k]
  ------------------
  212|    212|        {
  213|    212|            return ERROR_CBP;
  214|    212|        }
  215|       |
  216|  12.1k|        u4_cbp = gau1_ih264d_cbp_table[u4_cbp][0];
  217|  12.1k|        COPYTHECONTEXT("coded_block_pattern", u1_cbp);
  218|  12.1k|        ps_cur_mb_info->u1_cbp = u4_cbp;
  219|       |
  220|       |        /*--------------------------------------------------------------------*/
  221|       |        /* Read mb_qp_delta                                                   */
  222|       |        /*--------------------------------------------------------------------*/
  223|  12.1k|        if(ps_cur_mb_info->u1_cbp)
  ------------------
  |  Branch (223:12): [True: 11.0k, False: 1.13k]
  ------------------
  224|  11.0k|        {
  225|  11.0k|            UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
  226|  11.0k|            UWORD32 u4_word, u4_ldz, u4_abs_val;
  227|       |
  228|       |            /***************************************************************/
  229|       |            /* Find leading zeros in next 32 bits                          */
  230|       |            /***************************************************************/
  231|  11.0k|            NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  11.0k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  11.0k|{                                                                           \
  |  |  152|  11.0k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  11.0k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  11.0k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  11.0k|                                                                            \
  |  |  156|  11.0k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  11.0k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 10.5k, False: 447]
  |  |  ------------------
  |  |  158|  11.0k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  10.5k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  11.0k|}
  ------------------
  232|  11.0k|            u4_ldz = CLZ(u4_word);
  233|       |
  234|       |            /* Flush the ps_bitstrm */
  235|  11.0k|            u4_bitstream_offset += (u4_ldz + 1);
  236|       |
  237|       |            /* Read the suffix from the ps_bitstrm */
  238|  11.0k|            u4_word = 0;
  239|  11.0k|            if(u4_ldz)
  ------------------
  |  Branch (239:16): [True: 1.58k, False: 9.42k]
  ------------------
  240|  1.58k|            {
  241|  1.58k|                GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf,
  ------------------
  |  |  120|  1.58k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  1.58k|{                                                                           \
  |  |  122|  1.58k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  1.58k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  1.58k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  1.58k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  1.58k|                                                                            \
  |  |  127|  1.58k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 1.49k, False: 87]
  |  |  ------------------
  |  |  128|  1.58k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  1.49k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  1.58k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  1.58k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  1.58k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  1.58k|}                                                                           \
  ------------------
  242|  1.58k|                        u4_ldz);
  243|  1.58k|            }
  244|       |
  245|  11.0k|            *pu4_bitstrm_ofst = u4_bitstream_offset;
  246|  11.0k|            u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1;
  247|       |
  248|  11.0k|            if(u4_word & 0x1)
  ------------------
  |  Branch (248:16): [True: 1.06k, False: 9.95k]
  ------------------
  249|  1.06k|            {
  250|  1.06k|                i4_delta_qp = (-(WORD32)u4_abs_val);
  251|  1.06k|            }
  252|  9.95k|            else
  253|  9.95k|            {
  254|  9.95k|                i4_delta_qp = (u4_abs_val);
  255|  9.95k|            }
  256|       |
  257|  11.0k|            if((i4_delta_qp < -26) || (i4_delta_qp > 25))
  ------------------
  |  Branch (257:16): [True: 77, False: 10.9k]
  |  Branch (257:39): [True: 115, False: 10.8k]
  ------------------
  258|    192|            {
  259|    192|                return ERROR_INV_RANGE_QP_T;
  260|    192|            }
  261|       |
  262|  10.8k|            COPYTHECONTEXT("mb_qp_delta", i1_delta_qp);
  263|  10.8k|            if(i4_delta_qp != 0)
  ------------------
  |  Branch (263:16): [True: 1.39k, False: 9.42k]
  ------------------
  264|  1.39k|            {
  265|  1.39k|                ret = ih264d_update_qp(ps_dec, (WORD8)i4_delta_qp);
  266|  1.39k|                if(ret != OK)
  ------------------
  |  |  114|  1.39k|#define OK        0
  ------------------
  |  Branch (266:20): [True: 0, False: 1.39k]
  ------------------
  267|      0|                    return ret;
  268|  1.39k|            }
  269|  10.8k|        }
  270|       |
  271|  12.1k|    }
  272|  40.9k|    else
  273|  40.9k|    {
  274|  40.9k|        u4_offset = 1;
  275|  40.9k|        ps_cur_mb_info->ps_curmb->u1_mb_type = I_16x16_MB;
  ------------------
  |  |  418|  40.9k|#define I_16x16_MB  1
  ------------------
  276|       |        /*-------------------------------------------------------------------*/
  277|       |        /* Read the IntraPrediction mode for CHROMA                          */
  278|       |        /*-------------------------------------------------------------------*/
  279|  40.9k|        {
  280|  40.9k|            UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
  281|  40.9k|            UWORD32 u4_word, u4_ldz;
  282|       |
  283|       |            /***************************************************************/
  284|       |            /* Find leading zeros in next 32 bits                          */
  285|       |            /***************************************************************/
  286|  40.9k|            NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  40.9k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  40.9k|{                                                                           \
  |  |  152|  40.9k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  40.9k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  40.9k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  40.9k|                                                                            \
  |  |  156|  40.9k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  40.9k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 40.3k, False: 609]
  |  |  ------------------
  |  |  158|  40.9k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  40.3k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  40.9k|}
  ------------------
  287|  40.9k|            u4_ldz = CLZ(u4_word);
  288|       |            /* Flush the ps_bitstrm */
  289|  40.9k|            u4_bitstream_offset += (u4_ldz + 1);
  290|       |            /* Read the suffix from the ps_bitstrm */
  291|  40.9k|            u4_word = 0;
  292|  40.9k|            if(u4_ldz)
  ------------------
  |  Branch (292:16): [True: 10.0k, False: 30.9k]
  ------------------
  293|  10.0k|            {
  294|  10.0k|                GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf,
  ------------------
  |  |  120|  10.0k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  10.0k|{                                                                           \
  |  |  122|  10.0k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  10.0k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  10.0k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  10.0k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  10.0k|                                                                            \
  |  |  127|  10.0k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 9.62k, False: 418]
  |  |  ------------------
  |  |  128|  10.0k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  9.62k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  10.0k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  10.0k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  10.0k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  10.0k|}                                                                           \
  ------------------
  295|  10.0k|                        u4_ldz);
  296|  10.0k|            }
  297|  40.9k|            *pu4_bitstrm_ofst = u4_bitstream_offset;
  298|  40.9k|            u4_temp = ((1 << u4_ldz) + u4_word - 1);
  299|       |
  300|       |//Inlined ih264d_uev
  301|       |
  302|  40.9k|            if(u4_temp > 3)
  ------------------
  |  Branch (302:16): [True: 874, False: 40.1k]
  ------------------
  303|    874|            {
  304|    874|                return ERROR_CHROMA_PRED_MODE;
  305|    874|            }
  306|  40.1k|            ps_cur_mb_info->u1_chroma_pred_mode = u4_temp;
  307|  40.1k|            COPYTHECONTEXT("intra_chroma_pred_mode", ps_cur_mb_info->u1_chroma_pred_mode);
  308|  40.1k|        }
  309|       |        /*-------------------------------------------------------------------*/
  310|       |        /* Read the Coded block pattern                                      */
  311|       |        /*-------------------------------------------------------------------*/
  312|      0|        u4_cbp = gau1_ih264d_cbp_tab[(u1_mb_type - 1) >> 2];
  313|  40.1k|        ps_cur_mb_info->u1_cbp = u4_cbp;
  314|       |
  315|       |        /*-------------------------------------------------------------------*/
  316|       |        /* Read mb_qp_delta                                                  */
  317|       |        /*-------------------------------------------------------------------*/
  318|  40.1k|        {
  319|  40.1k|            UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
  320|  40.1k|            UWORD32 u4_word, u4_ldz, u4_abs_val;
  321|       |
  322|       |            /***************************************************************/
  323|       |            /* Find leading zeros in next 32 bits                          */
  324|       |            /***************************************************************/
  325|  40.1k|            NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  40.1k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  40.1k|{                                                                           \
  |  |  152|  40.1k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  40.1k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  40.1k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  40.1k|                                                                            \
  |  |  156|  40.1k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  40.1k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 39.5k, False: 522]
  |  |  ------------------
  |  |  158|  40.1k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  39.5k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  40.1k|}
  ------------------
  326|  40.1k|            u4_ldz = CLZ(u4_word);
  327|       |
  328|       |            /* Flush the ps_bitstrm */
  329|  40.1k|            u4_bitstream_offset += (u4_ldz + 1);
  330|       |
  331|       |            /* Read the suffix from the ps_bitstrm */
  332|  40.1k|            u4_word = 0;
  333|  40.1k|            if(u4_ldz)
  ------------------
  |  Branch (333:16): [True: 21.6k, False: 18.4k]
  ------------------
  334|  21.6k|                GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf,
  ------------------
  |  |  120|  21.6k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  21.6k|{                                                                           \
  |  |  122|  21.6k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  21.6k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  21.6k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  21.6k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  21.6k|                                                                            \
  |  |  127|  21.6k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 19.7k, False: 1.93k]
  |  |  ------------------
  |  |  128|  21.6k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  19.7k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  21.6k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  21.6k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  21.6k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  21.6k|}                                                                           \
  ------------------
  335|  40.1k|                        u4_ldz);
  336|       |
  337|  40.1k|            *pu4_bitstrm_ofst = u4_bitstream_offset;
  338|  40.1k|            u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1;
  339|       |
  340|  40.1k|            if(u4_word & 0x1)
  ------------------
  |  Branch (340:16): [True: 12.9k, False: 27.1k]
  ------------------
  341|  12.9k|                i4_delta_qp = (-(WORD32)u4_abs_val);
  342|  27.1k|            else
  343|  27.1k|                i4_delta_qp = (u4_abs_val);
  344|       |
  345|  40.1k|            if((i4_delta_qp < -26) || (i4_delta_qp > 25))
  ------------------
  |  Branch (345:16): [True: 104, False: 39.9k]
  |  Branch (345:39): [True: 163, False: 39.8k]
  ------------------
  346|    267|                return ERROR_INV_RANGE_QP_T;
  347|       |
  348|  40.1k|        }
  349|       |//inlinined ih264d_sev
  350|  39.8k|        COPYTHECONTEXT("Delta quant", i1_delta_qp);
  351|       |
  352|  39.8k|        if(i4_delta_qp != 0)
  ------------------
  |  Branch (352:12): [True: 21.3k, False: 18.4k]
  ------------------
  353|  21.3k|        {
  354|  21.3k|            ret = ih264d_update_qp(ps_dec, (WORD8)i4_delta_qp);
  355|  21.3k|            if(ret != OK)
  ------------------
  |  |  114|  21.3k|#define OK        0
  ------------------
  |  Branch (355:16): [True: 0, False: 21.3k]
  ------------------
  356|      0|                return ret;
  357|  21.3k|        }
  358|       |
  359|  39.8k|        {
  360|  39.8k|            WORD16 i_scaleFactor;
  361|  39.8k|            UWORD32 ui_N = 0;
  362|  39.8k|            WORD16 *pi2_scale_matrix_ptr;
  363|       |            /*******************************************************************/
  364|       |            /* for luma DC coefficients the scaling is done during the parsing */
  365|       |            /* to preserve the precision                                       */
  366|       |            /*******************************************************************/
  367|  39.8k|            if(ps_dec->s_high_profile.u1_scaling_present)
  ------------------
  |  Branch (367:16): [True: 6.49k, False: 33.3k]
  ------------------
  368|  6.49k|            {
  369|  6.49k|                pi2_scale_matrix_ptr =
  370|  6.49k|                                ps_dec->s_high_profile.i2_scalinglist4x4[0];
  371|  6.49k|            }
  372|  33.3k|            else
  373|  33.3k|            {
  374|  33.3k|                i_scaleFactor = 16;
  375|  33.3k|                pi2_scale_matrix_ptr = &i_scaleFactor;
  376|  33.3k|            }
  377|       |
  378|       |            /*---------------------------------------------------------------*/
  379|       |            /* Decode DC coefficients                                        */
  380|       |            /*---------------------------------------------------------------*/
  381|       |            /*---------------------------------------------------------------*/
  382|       |            /* Calculation of N                                              */
  383|       |            /*---------------------------------------------------------------*/
  384|  39.8k|            if(ui_is_left_mb_available)
  ------------------
  |  Branch (384:16): [True: 26.3k, False: 13.4k]
  ------------------
  385|  26.3k|            {
  386|       |
  387|  26.3k|                if(ui_is_top_mb_available)
  ------------------
  |  Branch (387:20): [True: 19.9k, False: 6.42k]
  ------------------
  388|  19.9k|                {
  389|  19.9k|                    ui_N = ((ps_cur_mb_info->ps_top_mb->pu1_nnz_y[0]
  390|  19.9k|                                    + ps_dec->pu1_left_nnz_y[0] + 1) >> 1);
  391|  19.9k|                }
  392|  6.42k|                else
  393|  6.42k|                {
  394|  6.42k|                    ui_N = ps_dec->pu1_left_nnz_y[0];
  395|  6.42k|                }
  396|  26.3k|            }
  397|  13.4k|            else if(ui_is_top_mb_available)
  ------------------
  |  Branch (397:21): [True: 10.4k, False: 2.99k]
  ------------------
  398|  10.4k|            {
  399|  10.4k|                ui_N = ps_cur_mb_info->ps_top_mb->pu1_nnz_y[0];
  400|  10.4k|            }
  401|       |
  402|  39.8k|            {
  403|  39.8k|                WORD16 pi2_dc_coef[16];
  404|  39.8k|                WORD32 pi4_tmp[16];
  405|  39.8k|                tu_sblk4x4_coeff_data_t *ps_tu_4x4 =
  406|  39.8k|                                (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data;
  407|  39.8k|                WORD16 *pi2_coeff_block =
  408|  39.8k|                                (WORD16 *)ps_dec->pv_parse_tu_coeff_data;
  409|  39.8k|                UWORD32 u4_num_coeff;
  410|  39.8k|                ps_tu_4x4->u2_sig_coeff_map = 0;
  411|       |
  412|  39.8k|                ret = ps_dec->pf_cavlc_parse4x4coeff[(ui_N > 7)](pi2_dc_coef, 0, ui_N,
  413|  39.8k|                                                                 ps_dec, &u4_num_coeff);
  414|  39.8k|                if(ret != OK)
  ------------------
  |  |  114|  39.8k|#define OK        0
  ------------------
  |  Branch (414:20): [True: 189, False: 39.6k]
  ------------------
  415|    189|                    return ret;
  416|       |
  417|  39.6k|                if(EXCEED_OFFSET(ps_bitstrm))
  ------------------
  |  |   93|  39.6k|  (ps_bitstrm->u4_ofst > ps_bitstrm->u4_max_ofst)
  |  |  ------------------
  |  |  |  Branch (93:3): [True: 191, False: 39.4k]
  |  |  ------------------
  ------------------
  418|    191|                    return ERROR_EOB_TERMINATE_T;
  419|  39.4k|                if(ps_tu_4x4->u2_sig_coeff_map)
  ------------------
  |  Branch (419:20): [True: 9.98k, False: 29.4k]
  ------------------
  420|  9.98k|                {
  421|  9.98k|                    memset(pi2_dc_coef,0,sizeof(pi2_dc_coef));
  422|  9.98k|                    ih264d_unpack_coeff4x4_dc_4x4blk(ps_tu_4x4,
  423|  9.98k|                                                     pi2_dc_coef,
  424|  9.98k|                                                     ps_dec->pu1_inv_scan);
  425|       |
  426|  9.98k|                    PROFILE_DISABLE_IQ_IT_RECON()
  ------------------
  |  |   98|  9.98k|#define PROFILE_DISABLE_IQ_IT_RECON() ;
  ------------------
  427|  9.98k|                    ps_dec->pf_ihadamard_scaling_4x4(pi2_dc_coef,
  428|  9.98k|                                                     pi2_coeff_block,
  429|  9.98k|                                                     ps_dec->pu2_quant_scale_y,
  430|  9.98k|                                                     (UWORD16 *)pi2_scale_matrix_ptr,
  431|  9.98k|                                                     ps_dec->u1_qp_y_div6,
  432|  9.98k|                                                     pi4_tmp);
  433|  9.98k|                    pi2_coeff_block += 16;
  434|  9.98k|                    ps_dec->pv_parse_tu_coeff_data = (void *)pi2_coeff_block;
  435|  9.98k|                    SET_BIT(ps_cur_mb_info->u1_yuv_dc_block_flag,0);
  ------------------
  |  |  106|  9.98k|#define SET_BIT(x, pos) (x) = (x) | (1 << pos);
  ------------------
  436|  9.98k|                }
  437|       |
  438|  39.4k|            }
  439|  39.4k|        }
  440|  39.4k|    }
  441|       |
  442|       |
  443|  51.4k|    if(u4_cbp)
  ------------------
  |  Branch (443:8): [True: 29.2k, False: 22.1k]
  ------------------
  444|  29.2k|    {
  445|       |
  446|  29.2k|        ret = ih264d_parse_residual4x4_cavlc(ps_dec, ps_cur_mb_info,
  447|  29.2k|                                       (UWORD8)u4_offset);
  448|  29.2k|        if(ret != OK)
  ------------------
  |  |  114|  29.2k|#define OK        0
  ------------------
  |  Branch (448:12): [True: 1.58k, False: 27.6k]
  ------------------
  449|  1.58k|            return ret;
  450|  27.6k|        if(EXCEED_OFFSET(ps_bitstrm))
  ------------------
  |  |   93|  27.6k|  (ps_bitstrm->u4_ofst > ps_bitstrm->u4_max_ofst)
  |  |  ------------------
  |  |  |  Branch (93:3): [True: 688, False: 26.9k]
  |  |  ------------------
  ------------------
  451|    688|            return ERROR_EOB_TERMINATE_T;
  452|       |
  453|       |        /* Store Left Mb NNZ and TOP chroma NNZ */
  454|  27.6k|    }
  455|  22.1k|    else
  456|  22.1k|    {
  457|  22.1k|        ps_cur_mb_info->u1_qp_div6 = ps_dec->u1_qp_y_div6;
  458|  22.1k|        ps_cur_mb_info->u1_qpc_div6 = ps_dec->u1_qp_u_div6;
  459|  22.1k|        ps_cur_mb_info->u1_qpcr_div6 = ps_dec->u1_qp_v_div6;
  460|  22.1k|        ps_cur_mb_info->u1_qp_rem6 = ps_dec->u1_qp_y_rem6;
  461|  22.1k|        ps_cur_mb_info->u1_qpc_rem6 = ps_dec->u1_qp_u_rem6;
  462|  22.1k|        ps_cur_mb_info->u1_qpcr_rem6 = ps_dec->u1_qp_v_rem6;
  463|  22.1k|        ih264d_update_nnz_for_skipmb(ps_dec, ps_cur_mb_info, CAVLC);
  ------------------
  |  |  338|  22.1k|#define CAVLC  0
  ------------------
  464|  22.1k|    }
  465|       |
  466|  49.1k|    return OK;
  ------------------
  |  |  114|  49.1k|#define OK        0
  ------------------
  467|  51.4k|}
ih264d_parse_imb_cabac:
  485|  78.2k|{
  486|  78.2k|    WORD8 i1_delta_qp;
  487|  78.2k|    UWORD8 u1_cbp;
  488|  78.2k|    UWORD8 u1_offset;
  489|       |    /* Variables for handling Cabac contexts */
  490|  78.2k|    ctxt_inc_mb_info_t *p_curr_ctxt = ps_dec->ps_curr_ctxt_mb_info;
  491|  78.2k|    ctxt_inc_mb_info_t *ps_left_ctxt = ps_dec->p_left_ctxt_mb_info;
  492|  78.2k|    dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm;
  493|  78.2k|    bin_ctxt_model_t *p_bin_ctxt;
  494|       |
  495|  78.2k|    UWORD8 u1_intra_chrom_pred_mode;
  496|  78.2k|    UWORD8 u1_dc_block_flag = 0;
  497|  78.2k|    WORD32 ret;
  498|       |
  499|  78.2k|    ps_cur_mb_info->u1_yuv_dc_block_flag = 0;
  500|       |
  501|  78.2k|    if(ps_left_ctxt == ps_dec->ps_def_ctxt_mb_info)
  ------------------
  |  Branch (501:8): [True: 15.1k, False: 63.1k]
  ------------------
  502|  15.1k|    {
  503|  15.1k|        ps_dec->pu1_left_yuv_dc_csbp[0] = 0xf;
  504|  15.1k|    }
  505|       |
  506|  78.2k|    if(ps_dec->ps_cur_slice->u1_slice_type != I_SLICE)
  ------------------
  |  |  370|  78.2k|#define I_SLICE  2
  ------------------
  |  Branch (506:8): [True: 5.30k, False: 72.9k]
  ------------------
  507|  5.30k|    {
  508|  5.30k|        WORD32 *pi4_buf;
  509|  5.30k|        WORD8 *pi1_buf;
  510|  5.30k|        MEMSET_16BYTES(&ps_dec->pu1_left_mv_ctxt_inc[0][0], 0);
  ------------------
  |  |  652|  5.30k|#define MEMSET_16BYTES(pu4_start,value)                         \
  |  |  653|  5.30k|{                                                               \
  |  |  654|  5.30k|    memset(pu4_start,value,16);                                 \
  |  |  655|  5.30k|}
  ------------------
  511|  5.30k|        *((UWORD32 *)ps_dec->pi1_left_ref_idx_ctxt_inc) = 0;
  512|  5.30k|        MEMSET_16BYTES(p_curr_ctxt->u1_mv, 0);
  ------------------
  |  |  652|  5.30k|#define MEMSET_16BYTES(pu4_start,value)                         \
  |  |  653|  5.30k|{                                                               \
  |  |  654|  5.30k|    memset(pu4_start,value,16);                                 \
  |  |  655|  5.30k|}
  ------------------
  513|  5.30k|        memset(p_curr_ctxt->i1_ref_idx, 0, 4);
  514|  5.30k|    }
  515|       |
  516|  78.2k|    if(u1_mb_type == I_4x4_MB)
  ------------------
  |  |  417|  78.2k|#define I_4x4_MB    0
  ------------------
  |  Branch (516:8): [True: 39.2k, False: 39.0k]
  ------------------
  517|  39.2k|    {
  518|  39.2k|        ps_cur_mb_info->ps_curmb->u1_mb_type = I_4x4_MB;
  ------------------
  |  |  417|  39.2k|#define I_4x4_MB    0
  ------------------
  519|  39.2k|        p_curr_ctxt->u1_mb_type = CAB_I4x4;
  ------------------
  |  |  394|  39.2k|#define CAB_I4x4          0x00 /* 0000 00x0 */
  ------------------
  520|  39.2k|        u1_offset = 0;
  521|       |
  522|  39.2k|        ps_cur_mb_info->u1_tran_form8x8 = 0;
  523|  39.2k|        ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
  524|       |
  525|       |        /*--------------------------------------------------------------------*/
  526|       |        /* Read transform_size_8x8_flag if present                            */
  527|       |        /*--------------------------------------------------------------------*/
  528|  39.2k|        if(ps_dec->s_high_profile.u1_transform8x8_present)
  ------------------
  |  Branch (528:12): [True: 21.5k, False: 17.6k]
  ------------------
  529|  21.5k|        {
  530|  21.5k|            ps_cur_mb_info->u1_tran_form8x8 = ih264d_parse_transform8x8flag_cabac(
  531|  21.5k|                            ps_dec, ps_cur_mb_info);
  532|  21.5k|            COPYTHECONTEXT("transform_size_8x8_flag", ps_cur_mb_info->u1_tran_form8x8);
  533|  21.5k|            p_curr_ctxt->u1_transform8x8_ctxt = ps_cur_mb_info->u1_tran_form8x8;
  534|  21.5k|            ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = ps_cur_mb_info->u1_tran_form8x8;
  535|  21.5k|        }
  536|  17.6k|        else
  537|  17.6k|        {
  538|  17.6k|            p_curr_ctxt->u1_transform8x8_ctxt = 0;
  539|  17.6k|        }
  540|       |
  541|       |        /*--------------------------------------------------------------------*/
  542|       |        /* Read the IntraPrediction modes for LUMA                            */
  543|       |        /*--------------------------------------------------------------------*/
  544|  39.2k|        if (!ps_cur_mb_info->u1_tran_form8x8)
  ------------------
  |  Branch (544:13): [True: 17.9k, False: 21.2k]
  ------------------
  545|  17.9k|        {
  546|  17.9k|            UWORD8 *pu1_temp;
  547|  17.9k|            ih264d_read_intra_pred_modes_cabac(
  548|  17.9k|                            ps_dec,
  549|  17.9k|                            ((UWORD8 *)ps_dec->pv_parse_tu_coeff_data),
  550|  17.9k|                            ((UWORD8 *)ps_dec->pv_parse_tu_coeff_data+16),
  551|  17.9k|                            ps_cur_mb_info->u1_tran_form8x8);
  552|  17.9k|            pu1_temp = (UWORD8 *)ps_dec->pv_parse_tu_coeff_data;
  553|  17.9k|            pu1_temp += 32;
  554|  17.9k|            ps_dec->pv_parse_tu_coeff_data = (void *)pu1_temp;
  555|  17.9k|        }
  556|  21.2k|        else
  557|  21.2k|        {
  558|  21.2k|            UWORD8 *pu1_temp;
  559|  21.2k|            ih264d_read_intra_pred_modes_cabac(
  560|  21.2k|                            ps_dec,
  561|  21.2k|                            ((UWORD8 *)ps_dec->pv_parse_tu_coeff_data),
  562|  21.2k|                            ((UWORD8 *)ps_dec->pv_parse_tu_coeff_data+4),
  563|  21.2k|                            ps_cur_mb_info->u1_tran_form8x8);
  564|  21.2k|            pu1_temp = (UWORD8 *)ps_dec->pv_parse_tu_coeff_data;
  565|  21.2k|            pu1_temp += 8;
  566|  21.2k|            ps_dec->pv_parse_tu_coeff_data = (void *)pu1_temp;
  567|  21.2k|        }
  568|       |        /*--------------------------------------------------------------------*/
  569|       |        /* Read the IntraPrediction mode for CHROMA                           */
  570|       |        /*--------------------------------------------------------------------*/
  571|  39.2k|        u1_intra_chrom_pred_mode = ih264d_parse_chroma_pred_mode_cabac(ps_dec);
  572|  39.2k|        COPYTHECONTEXT("intra_chroma_pred_mode", u1_intra_chrom_pred_mode);
  573|  39.2k|        p_curr_ctxt->u1_intra_chroma_pred_mode = ps_cur_mb_info->u1_chroma_pred_mode =
  574|  39.2k|                        u1_intra_chrom_pred_mode;
  575|       |
  576|       |        /*--------------------------------------------------------------------*/
  577|       |        /* Read the Coded block pattern                                       */
  578|       |        /*--------------------------------------------------------------------*/
  579|  39.2k|        u1_cbp = ih264d_parse_ctx_cbp_cabac(ps_dec);
  580|  39.2k|        COPYTHECONTEXT("coded_block_pattern", u1_cbp);
  581|  39.2k|        ps_cur_mb_info->u1_cbp = u1_cbp;
  582|  39.2k|        p_curr_ctxt->u1_cbp = u1_cbp;
  583|       |
  584|       |        /*--------------------------------------------------------------------*/
  585|       |        /* Read mb_qp_delta                                                   */
  586|       |        /*--------------------------------------------------------------------*/
  587|  39.2k|        if(ps_cur_mb_info->u1_cbp)
  ------------------
  |  Branch (587:12): [True: 26.4k, False: 12.7k]
  ------------------
  588|  26.4k|        {
  589|  26.4k|            ret = ih264d_parse_mb_qp_delta_cabac(ps_dec, &i1_delta_qp);
  590|  26.4k|            if(ret != OK)
  ------------------
  |  |  114|  26.4k|#define OK        0
  ------------------
  |  Branch (590:16): [True: 129, False: 26.3k]
  ------------------
  591|    129|                return ret;
  592|  26.3k|            COPYTHECONTEXT("mb_qp_delta", i1_delta_qp);
  593|  26.3k|            if(i1_delta_qp != 0)
  ------------------
  |  Branch (593:16): [True: 4.66k, False: 21.6k]
  ------------------
  594|  4.66k|            {
  595|  4.66k|                ret = ih264d_update_qp(ps_dec, i1_delta_qp);
  596|  4.66k|                if(ret != OK)
  ------------------
  |  |  114|  4.66k|#define OK        0
  ------------------
  |  Branch (596:20): [True: 0, False: 4.66k]
  ------------------
  597|      0|                    return ret;
  598|  4.66k|            }
  599|  26.3k|        }
  600|  12.7k|        else
  601|  12.7k|            ps_dec->i1_prev_mb_qp_delta = 0;
  602|  39.0k|        p_curr_ctxt->u1_yuv_dc_csbp &= 0xFE;
  603|  39.0k|    }
  604|  39.0k|    else
  605|  39.0k|    {
  606|  39.0k|        u1_offset = 1;
  607|  39.0k|        ps_cur_mb_info->ps_curmb->u1_mb_type = I_16x16_MB;
  ------------------
  |  |  418|  39.0k|#define I_16x16_MB  1
  ------------------
  608|  39.0k|        p_curr_ctxt->u1_mb_type = CAB_I16x16;
  ------------------
  |  |  395|  39.0k|#define CAB_I16x16        0x01 /* 0000 00x1 */
  ------------------
  609|  39.0k|        ps_cur_mb_info->u1_tran_form8x8 = 0;
  610|  39.0k|        p_curr_ctxt->u1_transform8x8_ctxt = 0;
  611|  39.0k|        ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
  612|       |        /*--------------------------------------------------------------------*/
  613|       |        /* Read the IntraPrediction mode for CHROMA                           */
  614|       |        /*--------------------------------------------------------------------*/
  615|  39.0k|        u1_intra_chrom_pred_mode = ih264d_parse_chroma_pred_mode_cabac(ps_dec);
  616|  39.0k|        if(u1_intra_chrom_pred_mode > 3)
  ------------------
  |  Branch (616:12): [True: 0, False: 39.0k]
  ------------------
  617|      0|            return ERROR_CHROMA_PRED_MODE;
  618|       |
  619|  39.0k|        COPYTHECONTEXT("Chroma intra_chroma_pred_mode pred mode", u1_intra_chrom_pred_mode);
  620|  39.0k|        p_curr_ctxt->u1_intra_chroma_pred_mode = ps_cur_mb_info->u1_chroma_pred_mode =
  621|  39.0k|                        u1_intra_chrom_pred_mode;
  622|       |
  623|       |        /*--------------------------------------------------------------------*/
  624|       |        /* Read the Coded block pattern                                       */
  625|       |        /*--------------------------------------------------------------------*/
  626|  39.0k|        u1_cbp = gau1_ih264d_cbp_tab[(u1_mb_type - 1) >> 2];
  627|  39.0k|        ps_cur_mb_info->u1_cbp = u1_cbp;
  628|  39.0k|        p_curr_ctxt->u1_cbp = u1_cbp;
  629|       |
  630|       |        /*--------------------------------------------------------------------*/
  631|       |        /* Read mb_qp_delta                                                   */
  632|       |        /*--------------------------------------------------------------------*/
  633|  39.0k|        ret = ih264d_parse_mb_qp_delta_cabac(ps_dec, &i1_delta_qp);
  634|  39.0k|        if(ret != OK)
  ------------------
  |  |  114|  39.0k|#define OK        0
  ------------------
  |  Branch (634:12): [True: 70, False: 38.9k]
  ------------------
  635|     70|            return ret;
  636|  38.9k|        COPYTHECONTEXT("mb_qp_delta", i1_delta_qp);
  637|  38.9k|        if(i1_delta_qp != 0)
  ------------------
  |  Branch (637:12): [True: 2.40k, False: 36.5k]
  ------------------
  638|  2.40k|        {
  639|  2.40k|            ret = ih264d_update_qp(ps_dec, i1_delta_qp);
  640|  2.40k|            if(ret != OK)
  ------------------
  |  |  114|  2.40k|#define OK        0
  ------------------
  |  Branch (640:16): [True: 0, False: 2.40k]
  ------------------
  641|      0|                return ret;
  642|  2.40k|        }
  643|       |
  644|  38.9k|        {
  645|  38.9k|            WORD16 i_scaleFactor;
  646|  38.9k|            WORD16* pi2_scale_matrix_ptr;
  647|       |            /*******************************************************************/
  648|       |            /* for luma DC coefficients the scaling is done during the parsing */
  649|       |            /* to preserve the precision                                       */
  650|       |            /*******************************************************************/
  651|  38.9k|            if(ps_dec->s_high_profile.u1_scaling_present)
  ------------------
  |  Branch (651:16): [True: 371, False: 38.6k]
  ------------------
  652|    371|            {
  653|    371|                pi2_scale_matrix_ptr =
  654|    371|                                ps_dec->s_high_profile.i2_scalinglist4x4[0];
  655|       |
  656|    371|            }
  657|  38.6k|            else
  658|  38.6k|            {
  659|  38.6k|                i_scaleFactor = 16;
  660|  38.6k|                pi2_scale_matrix_ptr = &i_scaleFactor;
  661|  38.6k|            }
  662|  38.9k|            {
  663|  38.9k|                ctxt_inc_mb_info_t *ps_top_ctxt = ps_dec->p_top_ctxt_mb_info;
  664|  38.9k|                UWORD8 uc_a, uc_b;
  665|  38.9k|                UWORD32 u4_ctx_inc;
  666|       |
  667|  38.9k|                INC_SYM_COUNT(&(ps_dec->s_cab_dec_env));
  668|       |
  669|       |                /* if MbAddrN not available then CondTermN = 1 */
  670|  38.9k|                uc_b = ((ps_top_ctxt->u1_yuv_dc_csbp) & 0x01);
  671|       |
  672|       |                /* if MbAddrN not available then CondTermN = 1 */
  673|  38.9k|                uc_a = ((ps_dec->pu1_left_yuv_dc_csbp[0]) & 0x01);
  674|       |
  675|  38.9k|                u4_ctx_inc = (uc_a + (uc_b << 1));
  676|       |
  677|  38.9k|                {
  678|  38.9k|                    WORD16 pi2_dc_coef[16];
  679|  38.9k|                    tu_sblk4x4_coeff_data_t *ps_tu_4x4 =
  680|  38.9k|                                    (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data;
  681|  38.9k|                    WORD16 *pi2_coeff_block =
  682|  38.9k|                                    (WORD16 *)ps_dec->pv_parse_tu_coeff_data;
  683|       |
  684|  38.9k|                    p_bin_ctxt = (ps_dec->p_cbf_t[LUMA_DC_CTXCAT]) + u4_ctx_inc;
  ------------------
  |  |   71|  38.9k|#define LUMA_DC_CTXCAT    0
  ------------------
  685|       |
  686|  38.9k|                    u1_dc_block_flag =
  687|  38.9k|                                    ih264d_read_coeff4x4_cabac(ps_bitstrm,
  688|  38.9k|                                                    LUMA_DC_CTXCAT,
  ------------------
  |  |   71|  38.9k|#define LUMA_DC_CTXCAT    0
  ------------------
  689|  38.9k|                                                    ps_dec->p_significant_coeff_flag_t[LUMA_DC_CTXCAT],
  ------------------
  |  |   71|  38.9k|#define LUMA_DC_CTXCAT    0
  ------------------
  690|  38.9k|                                                    ps_dec, p_bin_ctxt);
  691|       |
  692|       |                    /* Store coded_block_flag */
  693|  38.9k|                    p_curr_ctxt->u1_yuv_dc_csbp &= 0xFE;
  694|  38.9k|                    p_curr_ctxt->u1_yuv_dc_csbp |= u1_dc_block_flag;
  695|  38.9k|                    if(u1_dc_block_flag)
  ------------------
  |  Branch (695:24): [True: 22.7k, False: 16.2k]
  ------------------
  696|  22.7k|                    {
  697|  22.7k|                        WORD32 pi4_tmp[16];
  698|  22.7k|                        memset(pi2_dc_coef,0,sizeof(pi2_dc_coef));
  699|  22.7k|                        ih264d_unpack_coeff4x4_dc_4x4blk(ps_tu_4x4,
  700|  22.7k|                                                         pi2_dc_coef,
  701|  22.7k|                                                         ps_dec->pu1_inv_scan);
  702|       |
  703|  22.7k|                        PROFILE_DISABLE_IQ_IT_RECON()
  ------------------
  |  |   98|  22.7k|#define PROFILE_DISABLE_IQ_IT_RECON() ;
  ------------------
  704|  22.7k|                        ps_dec->pf_ihadamard_scaling_4x4(pi2_dc_coef,
  705|  22.7k|                                                         pi2_coeff_block,
  706|  22.7k|                                                         ps_dec->pu2_quant_scale_y,
  707|  22.7k|                                                         (UWORD16 *)pi2_scale_matrix_ptr,
  708|  22.7k|                                                         ps_dec->u1_qp_y_div6,
  709|  22.7k|                                                         pi4_tmp);
  710|  22.7k|                        pi2_coeff_block += 16;
  711|  22.7k|                        ps_dec->pv_parse_tu_coeff_data = (void *)pi2_coeff_block;
  712|  22.7k|                        SET_BIT(ps_cur_mb_info->u1_yuv_dc_block_flag,0);
  ------------------
  |  |  106|  22.7k|#define SET_BIT(x, pos) (x) = (x) | (1 << pos);
  ------------------
  713|  22.7k|                    }
  714|       |
  715|  38.9k|                }
  716|       |
  717|  38.9k|            }
  718|  38.9k|        }
  719|  38.9k|    }
  720|       |
  721|  78.0k|    ps_dec->pu1_left_yuv_dc_csbp[0] &= 0x6;
  722|  78.0k|    ps_dec->pu1_left_yuv_dc_csbp[0] |= u1_dc_block_flag;
  723|       |
  724|  78.0k|    ih264d_parse_residual4x4_cabac(ps_dec, ps_cur_mb_info, u1_offset);
  725|  78.0k|    if(EXCEED_OFFSET(ps_bitstrm))
  ------------------
  |  |   93|  78.0k|  (ps_bitstrm->u4_ofst > ps_bitstrm->u4_max_ofst)
  |  |  ------------------
  |  |  |  Branch (93:3): [True: 2.67k, False: 75.3k]
  |  |  ------------------
  ------------------
  726|  2.67k|        return ERROR_EOB_TERMINATE_T;
  727|  75.3k|    return OK;
  ------------------
  |  |  114|  75.3k|#define OK        0
  ------------------
  728|  78.0k|}
ih264d_parse_ipcm_mb:
 1215|  2.78k|{
 1216|  2.78k|    dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm;
 1217|  2.78k|    UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
 1218|  2.78k|    UWORD8 *pu1_y, *pu1_u, *pu1_v;
 1219|  2.78k|    WORD32 ret;
 1220|       |
 1221|  2.78k|    UWORD32 u4_rec_width_y, u4_rec_width_uv;
 1222|  2.78k|    UWORD32 u1_num_mb_pair;
 1223|  2.78k|    UWORD8 u1_x, u1_y;
 1224|       |    /* CHANGED CODE */
 1225|  2.78k|    tfr_ctxt_t *ps_frame_buf;
 1226|  2.78k|    UWORD8 u1_mb_field_decoding_flag;
 1227|  2.78k|    UWORD32 *pu4_buf;
 1228|  2.78k|    UWORD8 *pu1_buf;
 1229|       |    /* CHANGED CODE */
 1230|       |
 1231|  2.78k|    if(ps_dec->u1_separate_parse)
  ------------------
  |  Branch (1231:8): [True: 1.34k, False: 1.43k]
  ------------------
 1232|  1.34k|    {
 1233|  1.34k|        ps_frame_buf = &ps_dec->s_tran_addrecon_parse;
 1234|  1.34k|    }
 1235|  1.43k|    else
 1236|  1.43k|    {
 1237|  1.43k|        ps_frame_buf = &ps_dec->s_tran_addrecon;
 1238|  1.43k|    }
 1239|       |    /* align bistream to byte boundary. */
 1240|       |    /* pcm_alignment_zero_bit discarded */
 1241|       |    /* For XX GotoByteBoundary */
 1242|  2.78k|    if(ps_bitstrm->u4_ofst & 0x07)
  ------------------
  |  Branch (1242:8): [True: 2.52k, False: 256]
  ------------------
 1243|  2.52k|    {
 1244|  2.52k|        ps_bitstrm->u4_ofst += 8;
 1245|  2.52k|        ps_bitstrm->u4_ofst &= 0xFFFFFFF8;
 1246|  2.52k|    }
 1247|       |
 1248|       |    /*  Store left Nnz as 16 for each 4x4 blk */
 1249|       |
 1250|  2.78k|    pu1_buf = ps_dec->pu1_left_nnz_y;
 1251|  2.78k|    pu4_buf = (UWORD32 *)pu1_buf;
 1252|  2.78k|    *pu4_buf = 0x10101010;
 1253|  2.78k|    pu1_buf = ps_cur_mb_info->ps_curmb->pu1_nnz_y;
 1254|  2.78k|    pu4_buf = (UWORD32 *)pu1_buf;
 1255|  2.78k|    *pu4_buf = 0x10101010;
 1256|  2.78k|    pu1_buf = ps_cur_mb_info->ps_curmb->pu1_nnz_uv;
 1257|  2.78k|    pu4_buf = (UWORD32 *)pu1_buf;
 1258|  2.78k|    *pu4_buf = 0x10101010;
 1259|  2.78k|    pu1_buf = ps_dec->pu1_left_nnz_uv;
 1260|  2.78k|    pu4_buf = (UWORD32 *)pu1_buf;
 1261|  2.78k|    *pu4_buf = 0x10101010;
 1262|  2.78k|    ps_cur_mb_info->u1_cbp = 0xff;
 1263|       |
 1264|  2.78k|    ps_dec->i1_prev_mb_qp_delta = 0;
 1265|       |    /* Get neighbour MB's */
 1266|  2.78k|    u1_num_mb_pair = (u4_mbNum >> u1_mbaff);
 1267|       |
 1268|       |    /*****************************************************************************/
 1269|       |    /* calculate the RECON buffer YUV pointers for the PCM data                  */
 1270|       |    /*****************************************************************************/
 1271|       |    /* CHANGED CODE  */
 1272|  2.78k|    u1_mb_field_decoding_flag = ps_cur_mb_info->u1_mb_field_decodingflag;
 1273|  2.78k|    pu1_y = ps_frame_buf->pu1_dest_y + (u1_num_mb_pair << 4);
 1274|  2.78k|    pu1_u = ps_frame_buf->pu1_dest_u + (u1_num_mb_pair << 4);
 1275|  2.78k|    pu1_v = pu1_u + 1;
 1276|       |
 1277|  2.78k|    u4_rec_width_y = ps_dec->u2_frm_wd_y << u1_mb_field_decoding_flag;
 1278|  2.78k|    u4_rec_width_uv = ps_dec->u2_frm_wd_uv << u1_mb_field_decoding_flag;
 1279|       |    /* CHANGED CODE  */
 1280|       |
 1281|  2.78k|    if(u1_mbaff)
  ------------------
  |  Branch (1281:8): [True: 0, False: 2.78k]
  ------------------
 1282|      0|    {
 1283|      0|        UWORD8 u1_top_mb;
 1284|       |
 1285|      0|        u1_top_mb = ps_cur_mb_info->u1_topmb;
 1286|       |
 1287|      0|        if(u1_top_mb == 0)
  ------------------
  |  Branch (1287:12): [True: 0, False: 0]
  ------------------
 1288|      0|        {
 1289|      0|            pu1_y += (u1_mb_field_decoding_flag ?
  ------------------
  |  Branch (1289:23): [True: 0, False: 0]
  ------------------
 1290|      0|                            (u4_rec_width_y >> 1) : (u4_rec_width_y << 4));
 1291|      0|            pu1_u += (u1_mb_field_decoding_flag ?
  ------------------
  |  Branch (1291:23): [True: 0, False: 0]
  ------------------
 1292|      0|                            (u4_rec_width_uv) : (u4_rec_width_uv << 4));
 1293|      0|            pu1_v = pu1_u + 1;
 1294|      0|        }
 1295|      0|    }
 1296|       |
 1297|       |    /* Read Luma samples */
 1298|  47.2k|    for(u1_y = 0; u1_y < 16; u1_y++)
  ------------------
  |  Branch (1298:19): [True: 44.4k, False: 2.78k]
  ------------------
 1299|  44.4k|    {
 1300|   756k|        for(u1_x = 0; u1_x < 16; u1_x++)
  ------------------
  |  Branch (1300:23): [True: 711k, False: 44.4k]
  ------------------
 1301|   711k|            pu1_y[u1_x] = ih264d_get_bits_h264(ps_bitstrm, 8);
 1302|       |
 1303|  44.4k|        pu1_y += u4_rec_width_y;
 1304|  44.4k|    }
 1305|       |
 1306|       |    /* Read Chroma samples */
 1307|  25.0k|    for(u1_y = 0; u1_y < 8; u1_y++)
  ------------------
  |  Branch (1307:19): [True: 22.2k, False: 2.78k]
  ------------------
 1308|  22.2k|    {
 1309|   200k|        for(u1_x = 0; u1_x < 8; u1_x++)
  ------------------
  |  Branch (1309:23): [True: 177k, False: 22.2k]
  ------------------
 1310|   177k|            pu1_u[u1_x * YUV420SP_FACTOR] = ih264d_get_bits_h264(ps_bitstrm, 8);
  ------------------
  |  |  119|   177k|#define YUV420SP_FACTOR 2
  ------------------
 1311|       |
 1312|  22.2k|        pu1_u += u4_rec_width_uv;
 1313|  22.2k|    }
 1314|       |
 1315|  25.0k|    for(u1_y = 0; u1_y < 8; u1_y++)
  ------------------
  |  Branch (1315:19): [True: 22.2k, False: 2.78k]
  ------------------
 1316|  22.2k|    {
 1317|   200k|        for(u1_x = 0; u1_x < 8; u1_x++)
  ------------------
  |  Branch (1317:23): [True: 177k, False: 22.2k]
  ------------------
 1318|   177k|            pu1_v[u1_x * YUV420SP_FACTOR] = ih264d_get_bits_h264(ps_bitstrm, 8);
  ------------------
  |  |  119|   177k|#define YUV420SP_FACTOR 2
  ------------------
 1319|       |
 1320|  22.2k|        pu1_v += u4_rec_width_uv;
 1321|  22.2k|    }
 1322|       |
 1323|  2.78k|    if(CABAC == ps_dec->ps_cur_pps->u1_entropy_coding_mode)
  ------------------
  |  |  339|  2.78k|#define CABAC  1
  ------------------
  |  Branch (1323:8): [True: 1.54k, False: 1.23k]
  ------------------
 1324|  1.54k|    {
 1325|  1.54k|        UWORD32 *pu4_buf;
 1326|  1.54k|        UWORD8 *pu1_buf;
 1327|  1.54k|        ctxt_inc_mb_info_t *p_curr_ctxt = ps_dec->ps_curr_ctxt_mb_info;
 1328|       |        /* Re-initialize the cabac decoding engine. */
 1329|  1.54k|        ret = ih264d_init_cabac_dec_envirnoment(&(ps_dec->s_cab_dec_env), ps_bitstrm);
 1330|  1.54k|        if(ret != OK)
  ------------------
  |  |  114|  1.54k|#define OK        0
  ------------------
  |  Branch (1330:12): [True: 969, False: 580]
  ------------------
 1331|    969|            return ret;
 1332|       |        /* update the cabac contetxs */
 1333|    580|        p_curr_ctxt->u1_mb_type = CAB_I_PCM;
  ------------------
  |  |  407|    580|#define CAB_I_PCM         0x20 /* 001x xxxx */
  ------------------
 1334|    580|        p_curr_ctxt->u1_cbp = 47;
 1335|    580|        p_curr_ctxt->u1_intra_chroma_pred_mode = 0;
 1336|    580|        p_curr_ctxt->u1_transform8x8_ctxt = 0;
 1337|    580|        ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
 1338|       |
 1339|    580|        pu1_buf = ps_dec->pu1_left_nnz_y;
 1340|    580|        pu4_buf = (UWORD32 *)pu1_buf;
 1341|    580|        *pu4_buf = 0x01010101;
 1342|       |
 1343|    580|        pu1_buf = ps_cur_mb_info->ps_curmb->pu1_nnz_y;
 1344|    580|        pu4_buf = (UWORD32 *)pu1_buf;
 1345|    580|        *pu4_buf = 0x01010101;
 1346|       |
 1347|    580|        pu1_buf = ps_cur_mb_info->ps_curmb->pu1_nnz_uv;
 1348|    580|        pu4_buf = (UWORD32 *)pu1_buf;
 1349|    580|        *pu4_buf = 0x01010101;
 1350|       |
 1351|    580|        pu1_buf = ps_dec->pu1_left_nnz_uv;
 1352|    580|        pu4_buf = (UWORD32 *)pu1_buf;
 1353|    580|        *pu4_buf = 0x01010101;
 1354|       |
 1355|    580|        p_curr_ctxt->u1_yuv_dc_csbp = 0x7;
 1356|    580|        ps_dec->pu1_left_yuv_dc_csbp[0] = 0x7;
 1357|    580|        if(ps_dec->ps_cur_slice->u1_slice_type != I_SLICE)
  ------------------
  |  |  370|    580|#define I_SLICE  2
  ------------------
  |  Branch (1357:12): [True: 132, False: 448]
  ------------------
 1358|    132|        {
 1359|       |
 1360|    132|            MEMSET_16BYTES(&ps_dec->pu1_left_mv_ctxt_inc[0][0], 0);
  ------------------
  |  |  652|    132|#define MEMSET_16BYTES(pu4_start,value)                         \
  |  |  653|    132|{                                                               \
  |  |  654|    132|    memset(pu4_start,value,16);                                 \
  |  |  655|    132|}
  ------------------
 1361|    132|            memset(ps_dec->pi1_left_ref_idx_ctxt_inc, 0, 4);
 1362|    132|            MEMSET_16BYTES(p_curr_ctxt->u1_mv, 0);
  ------------------
  |  |  652|    132|#define MEMSET_16BYTES(pu4_start,value)                         \
  |  |  653|    132|{                                                               \
  |  |  654|    132|    memset(pu4_start,value,16);                                 \
  |  |  655|    132|}
  ------------------
 1363|    132|            memset(p_curr_ctxt->i1_ref_idx, 0, 4);
 1364|       |
 1365|    132|        }
 1366|    580|    }
 1367|  1.81k|    return OK;
  ------------------
  |  |  114|  1.81k|#define OK        0
  ------------------
 1368|  2.78k|}

ih264d_parse_mb_type_intra_cabac:
   70|  89.5k|{
   71|  89.5k|    decoding_envirnoment_t * ps_cab_env = &ps_dec->s_cab_dec_env;
   72|  89.5k|    dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm;
   73|  89.5k|    ctxt_inc_mb_info_t * ps_left_ctxt = ps_dec->p_left_ctxt_mb_info;
   74|  89.5k|    ctxt_inc_mb_info_t * ps_top_ctxt = ps_dec->p_top_ctxt_mb_info;
   75|  89.5k|    bin_ctxt_model_t *ps_mb_bin_ctxt = ps_dec->p_mb_type_t;
   76|  89.5k|    WORD8 u1_mb_type, u1_bin;
   77|  89.5k|    UWORD32 u4_cxt_inc;
   78|       |
   79|  89.5k|    u4_cxt_inc = 0;
   80|  89.5k|    if(!u1_inter)
  ------------------
  |  Branch (80:8): [True: 80.9k, False: 8.55k]
  ------------------
   81|  80.9k|    {
   82|  80.9k|        if(ps_left_ctxt != ps_dec->ps_def_ctxt_mb_info)
  ------------------
  |  Branch (82:12): [True: 65.4k, False: 15.5k]
  ------------------
   83|  65.4k|            u4_cxt_inc += ((ps_left_ctxt->u1_mb_type != CAB_I4x4) ? 1 : 0);
  ------------------
  |  |  394|  65.4k|#define CAB_I4x4          0x00 /* 0000 00x0 */
  ------------------
  |  Branch (83:28): [True: 36.8k, False: 28.5k]
  ------------------
   84|  80.9k|        if(ps_top_ctxt != ps_dec->ps_def_ctxt_mb_info)
  ------------------
  |  Branch (84:12): [True: 58.3k, False: 22.6k]
  ------------------
   85|  58.3k|            u4_cxt_inc += ((ps_top_ctxt->u1_mb_type != CAB_I4x4) ? 1 : 0);
  ------------------
  |  |  394|  58.3k|#define CAB_I4x4          0x00 /* 0000 00x0 */
  ------------------
  |  Branch (85:28): [True: 34.5k, False: 23.8k]
  ------------------
   86|  80.9k|    }
   87|  8.55k|    else
   88|  8.55k|    {
   89|  8.55k|        ps_mb_bin_ctxt = ps_mb_bin_ctxt + 3 + (ps_dec->u1_B << 1);
   90|  8.55k|    }
   91|       |
   92|       |    /* b0 */
   93|  89.5k|    u1_mb_type = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt, ps_bitstrm,
   94|  89.5k|                                          ps_cab_env);
   95|  89.5k|    if(u1_mb_type)
  ------------------
  |  Branch (95:8): [True: 45.6k, False: 43.9k]
  ------------------
   96|  45.6k|    {
   97|       |        /* I16x16 or I_PCM mode */
   98|       |        /* b1 */
   99|  45.6k|        u1_bin = ih264d_decode_terminate(ps_cab_env, ps_bitstrm);
  100|  45.6k|        if(u1_bin == 0)
  ------------------
  |  Branch (100:12): [True: 44.0k, False: 1.54k]
  ------------------
  101|  44.0k|        {
  102|       |            /* I16x16 mode */
  103|       |            /* Read b2 and b3 */
  104|  44.0k|            u4_cxt_inc = (u1_inter) ? 0x021 : 0x043;
  ------------------
  |  Branch (104:26): [True: 3.17k, False: 40.9k]
  ------------------
  105|       |
  106|  44.0k|            u1_bin = ih264d_decode_bins(2, u4_cxt_inc, ps_mb_bin_ctxt, ps_bitstrm,
  107|  44.0k|                                        ps_cab_env);
  108|       |
  109|  44.0k|            if(u1_bin & 0x01)
  ------------------
  |  Branch (109:16): [True: 7.46k, False: 36.6k]
  ------------------
  110|  7.46k|                u1_mb_type += 4;
  111|       |
  112|  44.0k|            if(u1_bin & 0x02)
  ------------------
  |  Branch (112:16): [True: 2.38k, False: 41.6k]
  ------------------
  113|  2.38k|                u1_mb_type += 12;
  114|       |
  115|  44.0k|            if(u1_bin & 0x01)
  ------------------
  |  Branch (115:16): [True: 7.46k, False: 36.6k]
  ------------------
  116|  7.46k|            {
  117|       |                /* since b3=1, Read three bins */
  118|  7.46k|                u4_cxt_inc = (u1_inter) ? 0x0332 : 0x0765;
  ------------------
  |  Branch (118:30): [True: 598, False: 6.87k]
  ------------------
  119|  7.46k|                u1_bin = (UWORD8)ih264d_decode_bins(3, u4_cxt_inc, ps_mb_bin_ctxt,
  120|  7.46k|                                                    ps_bitstrm, ps_cab_env);
  121|       |
  122|  7.46k|            }
  123|  36.6k|            else
  124|  36.6k|            {
  125|       |                /* Read two bins */
  126|  36.6k|                u4_cxt_inc = (u1_inter) ? 0x033 : 0x076;
  ------------------
  |  Branch (126:30): [True: 2.57k, False: 34.0k]
  ------------------
  127|  36.6k|                u1_bin = (UWORD8)ih264d_decode_bins(2, u4_cxt_inc, ps_mb_bin_ctxt,
  128|  36.6k|                                                    ps_bitstrm, ps_cab_env);
  129|  36.6k|            }
  130|  44.0k|            u1_mb_type += u1_bin;
  131|  44.0k|        }
  132|  1.54k|        else
  133|  1.54k|        {
  134|       |            /* I_PCM mode */
  135|       |            /* b1=1 */
  136|  1.54k|            u1_mb_type = 25;
  137|  1.54k|        }
  138|  45.6k|    }
  139|  89.5k|    return (u1_mb_type);
  140|  89.5k|}
ih264d_parse_mb_type_cabac:
  155|   258k|{
  156|   258k|    const UWORD8 uc_slice_type = ps_dec->ps_cur_slice->u1_slice_type;
  157|   258k|    decoding_envirnoment_t *ps_cab_env = &ps_dec->s_cab_dec_env;
  158|   258k|    dec_bit_stream_t *ps_bitstrm = ps_dec->ps_bitstrm;
  159|   258k|    ctxt_inc_mb_info_t *ps_left_ctxt = ps_dec->p_left_ctxt_mb_info;
  160|   258k|    ctxt_inc_mb_info_t *ps_top_ctxt = ps_dec->p_top_ctxt_mb_info;
  161|   258k|    WORD8 c_ctxt_inc;
  162|   258k|    bin_ctxt_model_t *ps_mb_bin_ctxt = ps_dec->p_mb_type_t;
  163|   258k|    WORD8 u1_mb_type = 0, u1_bin;
  164|   258k|    UWORD32 u4_cxt_inc;
  165|       |
  166|   258k|    INC_SYM_COUNT(ps_cab_env);
  167|       |
  168|   258k|    c_ctxt_inc = 0;
  169|       |
  170|   258k|    if(uc_slice_type == SI_SLICE)
  ------------------
  |  |  372|   258k|#define SI_SLICE 4
  ------------------
  |  Branch (170:8): [True: 0, False: 258k]
  ------------------
  171|      0|    {
  172|       |        /* b0 */
  173|      0|        if(ps_left_ctxt != ps_dec->ps_def_ctxt_mb_info)
  ------------------
  |  Branch (173:12): [True: 0, False: 0]
  ------------------
  174|      0|            c_ctxt_inc += ((ps_left_ctxt->u1_mb_type != CAB_SI4x4) ? 1 : 0);
  ------------------
  |  |  399|      0|#define CAB_SI4x4         0x08 /* 0000 10x0 */
  ------------------
  |  Branch (174:28): [True: 0, False: 0]
  ------------------
  175|      0|        if(ps_top_ctxt != ps_dec->ps_def_ctxt_mb_info)
  ------------------
  |  Branch (175:12): [True: 0, False: 0]
  ------------------
  176|      0|            c_ctxt_inc += ((ps_top_ctxt->u1_mb_type != CAB_SI4x4) ? 1 : 0);
  ------------------
  |  |  399|      0|#define CAB_SI4x4         0x08 /* 0000 10x0 */
  ------------------
  |  Branch (176:28): [True: 0, False: 0]
  ------------------
  177|       |
  178|      0|        u4_cxt_inc = c_ctxt_inc;
  179|      0|        u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt, ps_bitstrm,
  180|      0|                                           ps_cab_env);
  181|      0|        if(u1_bin == 0)
  ------------------
  |  Branch (181:12): [True: 0, False: 0]
  ------------------
  182|      0|        {
  183|       |            /* SI MB */
  184|      0|            u1_mb_type = 0;
  185|      0|        }
  186|      0|        else
  187|      0|        {
  188|      0|            u1_mb_type = 1 + ih264d_parse_mb_type_intra_cabac(0, ps_dec);
  189|      0|        }
  190|      0|    }
  191|   258k|    else if(uc_slice_type == P_SLICE)
  ------------------
  |  |  368|   258k|#define P_SLICE  0
  ------------------
  |  Branch (191:13): [True: 119k, False: 138k]
  ------------------
  192|   119k|    {
  193|       |        /* P Slice */
  194|       |        /* b0 */
  195|   119k|        u4_cxt_inc = 0;
  196|   119k|        u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt, ps_bitstrm,
  197|   119k|                                           ps_cab_env);
  198|   119k|        if(!u1_bin)
  ------------------
  |  Branch (198:12): [True: 112k, False: 7.05k]
  ------------------
  199|   112k|        {
  200|       |            /* Inter MB types */
  201|       |            /* b1 */
  202|   112k|            u4_cxt_inc = 0x01;
  203|   112k|            u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt,
  204|   112k|                                               ps_bitstrm, ps_cab_env);
  205|       |            /* b2 */
  206|   112k|            u4_cxt_inc = u1_bin + 2;
  207|   112k|            u1_mb_type = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt,
  208|   112k|                                                  ps_bitstrm, ps_cab_env);
  209|   112k|            u1_mb_type = (u1_bin << 1) + u1_mb_type;
  210|   112k|            if(u1_mb_type)
  ------------------
  |  Branch (210:16): [True: 53.2k, False: 59.1k]
  ------------------
  211|  53.2k|                u1_mb_type = 4 - u1_mb_type;
  212|   112k|        }
  213|  7.05k|        else
  214|  7.05k|        {
  215|       |            /* Intra Prefix 1 found */
  216|       |            /* Intra MB type */
  217|  7.05k|            u1_mb_type = 5 + ih264d_parse_mb_type_intra_cabac(1, ps_dec);
  218|  7.05k|        }
  219|   119k|    }
  220|   138k|    else if(uc_slice_type == B_SLICE)
  ------------------
  |  |  369|   138k|#define B_SLICE  1
  ------------------
  |  Branch (220:13): [True: 138k, False: 0]
  ------------------
  221|   138k|    {
  222|   138k|        WORD8 a, b;
  223|       |        /* B Slice */
  224|       |        /* b0 */
  225|       |        /* a = b = 0, if B slice and MB is a SKIP or B_DIRECT16x16 */
  226|   138k|        a = 0;
  227|   138k|        b = 0;
  228|   138k|        u1_mb_type = 0;
  229|   138k|        if(ps_left_ctxt != ps_dec->ps_def_ctxt_mb_info)
  ------------------
  |  Branch (229:12): [True: 127k, False: 11.7k]
  ------------------
  230|   127k|            a = ((ps_left_ctxt->u1_mb_type & CAB_BD16x16_MASK) != CAB_BD16x16);
  ------------------
  |  |  405|   127k|#define CAB_BD16x16_MASK  0x07 /* 0000 0111 */
  ------------------
                          a = ((ps_left_ctxt->u1_mb_type & CAB_BD16x16_MASK) != CAB_BD16x16);
  ------------------
  |  |  396|   127k|#define CAB_BD16x16       0x04 /* 0000 0100 */
  ------------------
  231|   138k|        if(ps_top_ctxt != ps_dec->ps_def_ctxt_mb_info)
  ------------------
  |  Branch (231:12): [True: 116k, False: 22.2k]
  ------------------
  232|   116k|            b = ((ps_top_ctxt->u1_mb_type & CAB_BD16x16_MASK) != CAB_BD16x16);
  ------------------
  |  |  405|   116k|#define CAB_BD16x16_MASK  0x07 /* 0000 0111 */
  ------------------
                          b = ((ps_top_ctxt->u1_mb_type & CAB_BD16x16_MASK) != CAB_BD16x16);
  ------------------
  |  |  396|   116k|#define CAB_BD16x16       0x04 /* 0000 0100 */
  ------------------
  233|       |
  234|   138k|        u4_cxt_inc = a + b;
  235|       |
  236|   138k|        u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt, ps_bitstrm,
  237|   138k|                                           ps_cab_env);
  238|       |
  239|   138k|        if(u1_bin)
  ------------------
  |  Branch (239:12): [True: 133k, False: 5.30k]
  ------------------
  240|   133k|        {
  241|       |
  242|       |            /* b1 */
  243|   133k|            u4_cxt_inc = 0x03;
  244|   133k|            u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt,
  245|   133k|                                               ps_bitstrm, ps_cab_env);
  246|       |
  247|   133k|            if(!u1_bin)
  ------------------
  |  Branch (247:16): [True: 29.9k, False: 103k]
  ------------------
  248|  29.9k|            {
  249|       |                /* b2 */
  250|  29.9k|                u4_cxt_inc = 0x05;
  251|  29.9k|                u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt,
  252|  29.9k|                                                   ps_bitstrm, ps_cab_env);
  253|       |
  254|  29.9k|                u1_mb_type = u1_bin + 1;
  255|  29.9k|            }
  256|   103k|            else
  257|   103k|            {
  258|   103k|                u1_mb_type = 3;
  259|       |                /* b2 */
  260|   103k|                u4_cxt_inc = 0x04;
  261|   103k|                u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt,
  262|   103k|                                                   ps_bitstrm, ps_cab_env);
  263|       |
  264|   103k|                if(u1_bin)
  ------------------
  |  Branch (264:20): [True: 87.0k, False: 16.6k]
  ------------------
  265|  87.0k|                {
  266|  87.0k|                    u1_mb_type += 8;
  267|       |                    /* b3 */
  268|  87.0k|                    u4_cxt_inc = 0x05;
  269|  87.0k|                    u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt,
  270|  87.0k|                                                       ps_bitstrm, ps_cab_env);
  271|       |
  272|  87.0k|                    if(!u1_bin)
  ------------------
  |  Branch (272:24): [True: 31.6k, False: 55.4k]
  ------------------
  273|  31.6k|                    {
  274|  31.6k|                        u1_mb_type++;
  275|       |                        /* b4, b5, b6 */
  276|  31.6k|                        u4_cxt_inc = 0x0555;
  277|  31.6k|                        u1_bin = (UWORD8)ih264d_decode_bins(3, u4_cxt_inc,
  278|  31.6k|                                                            ps_mb_bin_ctxt,
  279|  31.6k|                                                            ps_bitstrm,
  280|  31.6k|                                                            ps_cab_env);
  281|       |
  282|       |
  283|       |
  284|  31.6k|                        u1_mb_type += u1_bin;
  285|  31.6k|                    }
  286|  55.4k|                    else
  287|  55.4k|                    {
  288|       |                        /* b4 */
  289|  55.4k|                        u4_cxt_inc = 0x05;
  290|  55.4k|                        u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc,
  291|  55.4k|                                                           ps_mb_bin_ctxt,
  292|  55.4k|                                                           ps_bitstrm,
  293|  55.4k|                                                           ps_cab_env);
  294|       |
  295|  55.4k|                        if(u1_bin)
  ------------------
  |  Branch (295:28): [True: 50.4k, False: 4.99k]
  ------------------
  296|  50.4k|                        {
  297|       |                            /* b5 */
  298|  50.4k|                            u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc,
  299|  50.4k|                                                               ps_mb_bin_ctxt,
  300|  50.4k|                                                               ps_bitstrm,
  301|  50.4k|                                                               ps_cab_env);
  302|       |
  303|  50.4k|                            u1_mb_type += (u1_bin ? 11 : 0);
  ------------------
  |  Branch (303:44): [True: 47.7k, False: 2.69k]
  ------------------
  304|  50.4k|                        }
  305|  4.99k|                        else
  306|  4.99k|                        {
  307|  4.99k|                            u1_mb_type = 20;
  308|       |                            /* b5 */
  309|  4.99k|                            u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc,
  310|  4.99k|                                                               ps_mb_bin_ctxt,
  311|  4.99k|                                                               ps_bitstrm,
  312|  4.99k|                                                               ps_cab_env);
  313|       |
  314|  4.99k|                            if(!u1_bin)
  ------------------
  |  Branch (314:32): [True: 3.48k, False: 1.50k]
  ------------------
  315|  3.48k|                            {
  316|       |                                /* b6 */
  317|  3.48k|                                u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc,
  318|  3.48k|                                                                   ps_mb_bin_ctxt,
  319|  3.48k|                                                                   ps_bitstrm,
  320|  3.48k|                                                                   ps_cab_env);
  321|       |
  322|  3.48k|                                u1_mb_type += u1_bin;
  323|  3.48k|                            }
  324|  1.50k|                            else
  325|  1.50k|                            {
  326|       |                                /* Intra Prefix 111101 found */
  327|       |                                /* Intra MB type */
  328|  1.50k|                                u1_mb_type =
  329|  1.50k|                                                23
  330|  1.50k|                                                                + ih264d_parse_mb_type_intra_cabac(
  331|  1.50k|                                                                                1,
  332|  1.50k|                                                                                ps_dec);
  333|  1.50k|                            }
  334|  4.99k|                        }
  335|  55.4k|                    }
  336|  87.0k|                }
  337|  16.6k|                else
  338|  16.6k|                {
  339|       |                    /* b3, b4, b5 */
  340|  16.6k|                    u4_cxt_inc = 0x0555;
  341|  16.6k|                    u1_bin = (UWORD8)ih264d_decode_bins(3, u4_cxt_inc,
  342|  16.6k|                                                        ps_mb_bin_ctxt, ps_bitstrm,
  343|  16.6k|                                                        ps_cab_env);
  344|       |
  345|       |
  346|       |
  347|       |
  348|  16.6k|                    u1_mb_type += u1_bin;
  349|  16.6k|                }
  350|   103k|            }
  351|   133k|        }
  352|   138k|    }
  353|   258k|    return ((UWORD32)u1_mb_type);
  354|   258k|}
ih264d_parse_submb_type_cabac:
  372|   267k|{
  373|   267k|    WORD8 u1_sub_mb_type, u1_bin;
  374|       |
  375|   267k|    INC_SYM_COUNT(ps_cab_env);
  376|       |
  377|   267k|    u1_sub_mb_type = 0;
  378|   267k|    u1_bin = (UWORD8)ih264d_decode_bin(0, ps_sub_mb_cxt, ps_bitstrm,
  379|   267k|                                       ps_cab_env);
  380|       |
  381|   267k|    if(u1_slc_type_b ^ u1_bin)
  ------------------
  |  Branch (381:8): [True: 43.2k, False: 224k]
  ------------------
  382|  43.2k|        return 0;
  383|       |
  384|   224k|    if(!u1_slc_type_b)
  ------------------
  |  Branch (384:8): [True: 47.0k, False: 177k]
  ------------------
  385|  47.0k|    {
  386|       |        /* P Slice */
  387|  47.0k|        u1_sub_mb_type = 1;
  388|  47.0k|        u1_bin = (UWORD8)ih264d_decode_bin(1, ps_sub_mb_cxt, ps_bitstrm,
  389|  47.0k|                                           ps_cab_env);
  390|  47.0k|        if(u1_bin == 1)
  ------------------
  |  Branch (390:12): [True: 38.7k, False: 8.37k]
  ------------------
  391|  38.7k|        {
  392|  38.7k|            u1_bin = (UWORD8)ih264d_decode_bin(2, ps_sub_mb_cxt, ps_bitstrm,
  393|  38.7k|                                               ps_cab_env);
  394|  38.7k|            u1_sub_mb_type = (2 + (!u1_bin));
  395|  38.7k|        }
  396|       |
  397|  47.0k|        return u1_sub_mb_type;
  398|  47.0k|    }
  399|   177k|    else
  400|   177k|    {
  401|       |        /* B Slice */
  402|       |
  403|       |        /* b1 */
  404|   177k|        u1_bin = (UWORD8)ih264d_decode_bin(1, ps_sub_mb_cxt, ps_bitstrm,
  405|   177k|                                           ps_cab_env);
  406|   177k|        if(u1_bin)
  ------------------
  |  Branch (406:12): [True: 44.8k, False: 132k]
  ------------------
  407|  44.8k|        {
  408|       |            /* b2 */
  409|  44.8k|            u1_bin = (UWORD8)ih264d_decode_bin(2, ps_sub_mb_cxt, ps_bitstrm,
  410|  44.8k|                                               ps_cab_env);
  411|  44.8k|            if(u1_bin)
  ------------------
  |  Branch (411:16): [True: 3.72k, False: 41.1k]
  ------------------
  412|  3.72k|            {
  413|       |                /* b3 */
  414|  3.72k|                u1_sub_mb_type = 7;
  415|  3.72k|                u1_bin = (UWORD8)ih264d_decode_bin(3, ps_sub_mb_cxt, ps_bitstrm,
  416|  3.72k|                                                   ps_cab_env);
  417|  3.72k|                u1_sub_mb_type += u1_bin << 2;
  418|  3.72k|                u1_bin = !u1_bin;
  419|       |                /* b4 */
  420|  3.72k|                if(u1_bin == 0)
  ------------------
  |  Branch (420:20): [True: 1.60k, False: 2.11k]
  ------------------
  421|  1.60k|                {
  422|  1.60k|                    u1_bin = ih264d_decode_bin(3, ps_sub_mb_cxt, ps_bitstrm,
  423|  1.60k|                                               ps_cab_env);
  424|  1.60k|                }
  425|  2.11k|                else
  426|  2.11k|                {
  427|  2.11k|                    u1_bin = (UWORD8)ih264d_decode_bins(2, 0x33, ps_sub_mb_cxt,
  428|  2.11k|                                                        ps_bitstrm, ps_cab_env);
  429|  2.11k|                }
  430|       |
  431|  3.72k|                return (u1_sub_mb_type + u1_bin);
  432|  3.72k|            }
  433|  41.1k|            else
  434|  41.1k|            {
  435|       |                /* b3 */
  436|  41.1k|                u1_bin = (UWORD8)ih264d_decode_bins(2, 0x33, ps_sub_mb_cxt,
  437|  41.1k|                                                    ps_bitstrm, ps_cab_env);
  438|  41.1k|                return (3 + u1_bin);
  439|  41.1k|            }
  440|  44.8k|        }
  441|   132k|        else
  442|   132k|        {
  443|       |            /* b2 */
  444|   132k|            u1_bin = (UWORD8)ih264d_decode_bin(3, ps_sub_mb_cxt, ps_bitstrm,
  445|   132k|                                               ps_cab_env);
  446|   132k|            return (1 + u1_bin);
  447|   132k|        }
  448|   177k|    }
  449|   224k|}
ih264d_parse_ref_idx_cabac:
  473|   314k|{
  474|   314k|    UWORD8 u1_a, u1_b;
  475|   314k|    UWORD32 u4_cxt_inc;
  476|   314k|    UWORD8 u1_blk_no, u1_i, u1_idx_lft, u1_idx_top;
  477|   314k|    WORD8 i1_ref_idx;
  478|       |
  479|  1.05M|    for(u1_blk_no = 0, u1_i = 0; u1_i < u1_num_part; u1_i++, pi1_ref_idx++)
  ------------------
  |  Branch (479:34): [True: 743k, False: 313k]
  ------------------
  480|   743k|    {
  481|   743k|        u1_idx_lft = ((u1_blk_no & 0x02) >> 1) + u1_b2;
  482|   743k|        u1_idx_top = (u1_blk_no & 0x01) + u1_b2;
  483|   743k|        i1_ref_idx = *pi1_ref_idx;
  484|       |
  485|   743k|        if(i1_ref_idx > 0)
  ------------------
  |  Branch (485:12): [True: 302k, False: 441k]
  ------------------
  486|   302k|        {
  487|   302k|            u1_a = pi1_lft_cxt[u1_idx_lft] > 0;
  488|   302k|            u1_b = pi1_top_cxt[u1_idx_top] > 0;
  489|       |
  490|   302k|            u4_cxt_inc = u1_a + (u1_b << 1);
  491|   302k|            u4_cxt_inc = (u4_cxt_inc | 0x55540);
  492|       |
  493|   302k|            i1_ref_idx = (WORD8)ih264d_decode_bins_unary(32, u4_cxt_inc,
  494|   302k|                                                         ps_ref_cxt, ps_bitstrm,
  495|   302k|                                                         ps_cab_env);
  496|       |
  497|   302k|            if((i1_ref_idx > u1_max_ref_minus1) || (i1_ref_idx < 0))
  ------------------
  |  Branch (497:16): [True: 792, False: 301k]
  |  Branch (497:52): [True: 0, False: 301k]
  ------------------
  498|    792|            {
  499|    792|                return ERROR_REF_IDX;
  500|    792|            }
  501|       |
  502|   301k|            *pi1_ref_idx = i1_ref_idx;
  503|       |
  504|   301k|            INC_SYM_COUNT(ps_cab_env);
  505|       |
  506|   301k|        }
  507|       |
  508|       |        /* Storing Reference Idx Information */
  509|   742k|        pi1_lft_cxt[u1_idx_lft] = i1_ref_idx;
  510|   742k|        pi1_top_cxt[u1_idx_top] = i1_ref_idx;
  511|   742k|        u1_blk_no = u1_blk_no + 1 + (u1_mb_mode & 0x01);
  512|   742k|    }
  513|       |    /* if(!u1_sub_mb) */
  514|   313k|    if(u1_num_part != 4)
  ------------------
  |  Branch (514:8): [True: 213k, False: 99.9k]
  ------------------
  515|   213k|    {
  516|   213k|        pi1_lft_cxt[(!(u1_mb_mode & 0x1)) + u1_b2] = pi1_lft_cxt[u1_b2];
  517|   213k|        pi1_top_cxt[(!(u1_mb_mode & 0x2)) + u1_b2] = pi1_top_cxt[u1_b2];
  518|   213k|    }
  519|   313k|    return OK;
  ------------------
  |  |  114|   313k|#define OK        0
  ------------------
  520|   314k|}
ih264d_parse_mb_qp_delta_cabac:
  536|   172k|{
  537|   172k|    decoding_envirnoment_t * ps_cab_env = &ps_dec->s_cab_dec_env;
  538|   172k|    dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm;
  539|       |
  540|   172k|    UWORD8 u1_code_num;
  541|   172k|    bin_ctxt_model_t *ps_mb_qp_delta_ctxt = ps_dec->p_mb_qp_delta_t;
  542|   172k|    UWORD32 u4_cxt_inc;
  543|       |
  544|   172k|    INC_SYM_COUNT(ps_cab_env);
  545|       |
  546|   172k|    u4_cxt_inc = (!(!(ps_dec->i1_prev_mb_qp_delta)));
  547|       |
  548|   172k|    u1_code_num = 0;
  549|   172k|    u4_cxt_inc = (u4_cxt_inc | 0x33320);
  550|       |    /* max number of bins = 53,
  551|       |     since Range for MbQpDelta= -26 to +25 inclusive, UNARY code */
  552|   172k|    u1_code_num = ih264d_decode_bins_unary(32, u4_cxt_inc, ps_mb_qp_delta_ctxt,
  553|   172k|                                          ps_bitstrm, ps_cab_env);
  554|   172k|    if(u1_code_num == 32)
  ------------------
  |  Branch (554:8): [True: 990, False: 171k]
  ------------------
  555|    990|    {
  556|       |        /* Read remaining 21 bins */
  557|    990|        UWORD8 uc_codeNumX;
  558|    990|        u4_cxt_inc = 0x33333;
  559|    990|        uc_codeNumX = ih264d_decode_bins_unary(21, u4_cxt_inc, ps_mb_qp_delta_ctxt,
  560|    990|                                               ps_bitstrm, ps_cab_env);
  561|    990|        u1_code_num = u1_code_num + uc_codeNumX;
  562|    990|    }
  563|       |
  564|   172k|    *pi1_mb_qp_delta = (u1_code_num + 1) >> 1;
  565|       |    /* Table 9.3: If code_num is even Syntax Element has -ve value */
  566|   172k|    if(!(u1_code_num & 0x01))
  ------------------
  |  Branch (566:8): [True: 152k, False: 20.3k]
  ------------------
  567|   152k|        *pi1_mb_qp_delta = -(*pi1_mb_qp_delta);
  568|       |
  569|       |    /* Range of MbQpDelta= -26 to +25 inclusive */
  570|   172k|    if((*pi1_mb_qp_delta < -26) || (*pi1_mb_qp_delta > 25))
  ------------------
  |  Branch (570:8): [True: 0, False: 172k]
  |  Branch (570:36): [True: 860, False: 171k]
  ------------------
  571|    860|        return ERROR_INV_RANGE_QP_T;
  572|   171k|    ps_dec->i1_prev_mb_qp_delta = *pi1_mb_qp_delta;
  573|   171k|    return OK;
  ------------------
  |  |  114|   171k|#define OK        0
  ------------------
  574|   172k|}
ih264d_parse_chroma_pred_mode_cabac:
  588|  88.0k|{
  589|  88.0k|    decoding_envirnoment_t * ps_cab_env = &ps_dec->s_cab_dec_env;
  590|  88.0k|    dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm;
  591|  88.0k|    ctxt_inc_mb_info_t * ps_left_ctxt = ps_dec->p_left_ctxt_mb_info;
  592|  88.0k|    ctxt_inc_mb_info_t * ps_top_ctxt = ps_dec->p_top_ctxt_mb_info;
  593|  88.0k|    WORD8 i1_chroma_pred_mode, a, b;
  594|  88.0k|    UWORD32 u4_cxt_inc;
  595|       |
  596|  88.0k|    INC_SYM_COUNT(ps_cab_env);
  597|       |
  598|       |    /* Binarization is TU and Cmax=3 */
  599|  88.0k|    i1_chroma_pred_mode = 0;
  600|  88.0k|    a = 0;
  601|  88.0k|    b = 0;
  602|       |
  603|  88.0k|    a = ((ps_left_ctxt->u1_intra_chroma_pred_mode != 0) ? 1 : 0);
  ------------------
  |  Branch (603:10): [True: 35.9k, False: 52.0k]
  ------------------
  604|       |
  605|  88.0k|    b = ((ps_top_ctxt->u1_intra_chroma_pred_mode != 0) ? 1 : 0);
  ------------------
  |  Branch (605:10): [True: 31.7k, False: 56.2k]
  ------------------
  606|  88.0k|    u4_cxt_inc = a + b;
  607|       |
  608|  88.0k|    u4_cxt_inc = (u4_cxt_inc | 0x330);
  609|       |
  610|  88.0k|    i1_chroma_pred_mode = ih264d_decode_bins_tunary(
  611|  88.0k|                    3, u4_cxt_inc, ps_dec->p_intra_chroma_pred_mode_t,
  612|  88.0k|                    ps_bitstrm, ps_cab_env);
  613|       |
  614|  88.0k|    return (i1_chroma_pred_mode);
  615|  88.0k|}
ih264d_parse_transform8x8flag_cabac:
  635|  57.3k|{
  636|  57.3k|    decoding_envirnoment_t * ps_cab_env = &ps_dec->s_cab_dec_env;
  637|  57.3k|    dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm;
  638|  57.3k|    ctxt_inc_mb_info_t * ps_left_ctxt = ps_dec->p_left_ctxt_mb_info;
  639|  57.3k|    ctxt_inc_mb_info_t * ps_top_ctxt = ps_dec->p_top_ctxt_mb_info;
  640|  57.3k|    UWORD8 u1_transform_8x8flag;
  641|  57.3k|    UWORD8 u1_mb_ngbr_avail = ps_cur_mb_info->u1_mb_ngbr_availablity;
  642|       |
  643|  57.3k|    WORD8 a, b;
  644|  57.3k|    UWORD32 u4_cxt_inc;
  645|       |
  646|       |    /* for calculating the context increment for transform8x8 u4_flag */
  647|       |    /* it reads transform8x8 u4_flag of the neighbors through */
  648|       |
  649|       |    /* Binarization is FLC */
  650|  57.3k|    a = 0;
  651|  57.3k|    b = 0;
  652|       |
  653|  57.3k|    if(u1_mb_ngbr_avail & LEFT_MB_AVAILABLE_MASK)
  ------------------
  |  |   53|  57.3k|#define LEFT_MB_AVAILABLE_MASK      0x01
  ------------------
  |  Branch (653:8): [True: 39.0k, False: 18.3k]
  ------------------
  654|  39.0k|    {
  655|  39.0k|        a = ps_left_ctxt->u1_transform8x8_ctxt;
  656|  39.0k|    }
  657|  57.3k|    if(u1_mb_ngbr_avail & TOP_MB_AVAILABLE_MASK)
  ------------------
  |  |   55|  57.3k|#define TOP_MB_AVAILABLE_MASK       0x04
  ------------------
  |  Branch (657:8): [True: 42.9k, False: 14.4k]
  ------------------
  658|  42.9k|    {
  659|  42.9k|        b = ps_top_ctxt->u1_transform8x8_ctxt;
  660|       |
  661|  42.9k|    }
  662|       |
  663|  57.3k|    u4_cxt_inc = a + b;
  664|       |
  665|  57.3k|    u1_transform_8x8flag = ih264d_decode_bin(
  666|  57.3k|                    u4_cxt_inc, ps_dec->s_high_profile.ps_transform8x8_flag,
  667|  57.3k|                    ps_bitstrm, ps_cab_env);
  668|       |
  669|  57.3k|    return (u1_transform_8x8flag);
  670|  57.3k|}
ih264d_read_intra_pred_modes_cabac:
  695|  43.9k|{
  696|  43.9k|    WORD32 i4x4_luma_blk_idx = 0;
  697|  43.9k|    dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm;
  698|  43.9k|    decoding_envirnoment_t * ps_cab_env = &ps_dec->s_cab_dec_env;
  699|  43.9k|    bin_ctxt_model_t *ps_ctxt_ipred_luma_mpm, *ps_ctx_ipred_luma_rm;
  700|  43.9k|    WORD32 i4_rem_intra4x4_pred_mode;
  701|  43.9k|    UWORD32 u4_prev_intra4x4_pred_mode_flag;
  702|  43.9k|    UWORD32 u4_code_int_range, u4_code_int_val_ofst;
  703|  43.9k|    const UWORD32 *pu4_table = (const UWORD32 *)ps_cab_env->cabac_table;
  704|       |
  705|  43.9k|    ps_ctxt_ipred_luma_mpm = ps_dec->p_prev_intra4x4_pred_mode_flag_t;
  706|  43.9k|    ps_ctx_ipred_luma_rm = ps_dec->p_rem_intra4x4_pred_mode_t;
  707|  43.9k|    SWITCHOFFTRACE;
  708|       |
  709|  43.9k|    i4x4_luma_blk_idx = (0 == u1_tran_form8x8) ? 16 : 4;
  ------------------
  |  Branch (709:25): [True: 19.7k, False: 24.1k]
  ------------------
  710|       |
  711|  43.9k|    u4_code_int_range = ps_cab_env->u4_code_int_range;
  712|  43.9k|    u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst;
  713|       |
  714|  43.9k|    do
  715|   412k|    {
  716|       |
  717|   412k|        DECODE_ONE_BIN_MACRO(ps_ctxt_ipred_luma_mpm, u4_code_int_range,
  ------------------
  |  |  217|   412k|                     p_DecBitStream_arg,u4_symbol)                                           \
  |  |  218|   412k|{                                                                                       \
  |  |  219|   412k|    bin_ctxt_model_t *p_binCtxt_m = (bin_ctxt_model_t *) p_binCtxt_arg;                           \
  |  |  220|   412k|    dec_bit_stream_t *p_DecBitStream_m = (dec_bit_stream_t *) p_DecBitStream_arg;                 \
  |  |  221|   412k|    const UWORD32 *pu4_table_m = (const UWORD32 *) pu4_table_arg;                         \
  |  |  222|   412k|                                                                                        \
  |  |  223|   412k|    UWORD32 u4_quantCodeIntRange_m,u4_codeIntRangeLPS_m;                                    \
  |  |  224|   412k|    UWORD32 u1_mps_state_m;                                                               \
  |  |  225|   412k|    UWORD32 table_lookup_m;                                                               \
  |  |  226|   412k|    UWORD32 u4_clz_m;                                                                     \
  |  |  227|   412k|                                                                                        \
  |  |  228|   412k|    u1_mps_state_m = (p_binCtxt_m->u1_mps_state);                                           \
  |  |  229|   412k|    u4_clz_m = CLZ(u4_code_int_range);                                                  \
  |  |  230|   412k|    u4_quantCodeIntRange_m = u4_code_int_range << u4_clz_m;                                   \
  |  |  231|   412k|    u4_quantCodeIntRange_m = (u4_quantCodeIntRange_m >> 29) & 0x3;                          \
  |  |  232|   412k|    table_lookup_m = pu4_table_m[(u1_mps_state_m << 2)+u4_quantCodeIntRange_m];                 \
  |  |  233|   412k|    u4_codeIntRangeLPS_m = table_lookup_m & 0xff;                                           \
  |  |  234|   412k|                                                                                        \
  |  |  235|   412k|    u4_codeIntRangeLPS_m = u4_codeIntRangeLPS_m << (23 - u4_clz_m);                           \
  |  |  236|   412k|    u4_code_int_range = u4_code_int_range - u4_codeIntRangeLPS_m;                             \
  |  |  237|   412k|    u4_symbol = ((u1_mps_state_m>> 6) & 0x1);                                             \
  |  |  238|   412k|    /*if mps*/                                                                          \
  |  |  239|   412k|    u1_mps_state_m = (table_lookup_m >> 8) & 0x7F;                                          \
  |  |  240|   412k|    if(u4_code_int_val_ofst >= u4_code_int_range)                                          \
  |  |  ------------------
  |  |  |  Branch (240:8): [True: 60.3k, False: 352k]
  |  |  ------------------
  |  |  241|   412k|  {                                                                                     \
  |  |  242|  60.3k|                                                                                        \
  |  |  243|  60.3k|    u4_symbol = 1 - u4_symbol;                                                          \
  |  |  244|  60.3k|    u4_code_int_val_ofst -= u4_code_int_range;                                             \
  |  |  245|  60.3k|    u4_code_int_range = u4_codeIntRangeLPS_m;                                               \
  |  |  246|  60.3k|    u1_mps_state_m = (table_lookup_m >> 15) & 0x7F;                                         \
  |  |  247|  60.3k|  }                                                                                     \
  |  |  248|   412k|    if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_8)                                        \
  |  |  ------------------
  |  |  |  |  113|   825k|#define ONE_RIGHT_SHIFTED_BY_8 1<<8
  |  |  ------------------
  |  |  |  Branch (248:8): [True: 4.04k, False: 408k]
  |  |  ------------------
  |  |  249|   412k|    {                                                                                   \
  |  |  250|  4.04k|        UWORD32 *pu4_buffer,u4_offset;                                                  \
  |  |  251|  4.04k|        UWORD32 read_bits,u4_clz_m  ;                                                     \
  |  |  252|  4.04k|                                                                                        \
  |  |  253|  4.04k|        pu4_buffer = p_DecBitStream_m->pu4_buffer;                                         \
  |  |  254|  4.04k|        u4_offset = p_DecBitStream_m->u4_ofst;                                          \
  |  |  255|  4.04k|        u4_clz_m = CLZ(u4_code_int_range);                                              \
  |  |  256|  4.04k|        NEXTBITS(read_bits,(u4_offset+23),pu4_buffer,u4_clz_m)                            \
  |  |  ------------------
  |  |  |  |  137|  4.04k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  |  |  138|  4.04k|{                                                                           \
  |  |  |  |  139|  4.04k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  140|  4.04k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  141|  4.04k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  142|  4.04k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  143|  4.04k|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (143:8): [True: 3.69k, False: 346]
  |  |  |  |  ------------------
  |  |  |  |  144|  4.04k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  3.69k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  145|  4.04k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  4.04k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  146|  4.04k|}
  |  |  ------------------
  |  |  257|  4.04k|        FLUSHBITS(u4_offset,(u4_clz_m))                                                   \
  |  |  ------------------
  |  |  |  |  193|  4.04k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  |  |  194|  4.04k|{                                                                           \
  |  |  |  |  195|  4.04k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  |  |  196|  4.04k|}
  |  |  ------------------
  |  |  258|  4.04k|        u4_code_int_range = u4_code_int_range << u4_clz_m;                                    \
  |  |  259|  4.04k|        u4_code_int_val_ofst= (u4_code_int_val_ofst << u4_clz_m) | read_bits;               \
  |  |  260|  4.04k|                                                                                        \
  |  |  261|  4.04k|                                                                                        \
  |  |  262|  4.04k|        p_DecBitStream_m->u4_ofst = u4_offset;                                          \
  |  |  263|  4.04k|    }                                                                                   \
  |  |  264|   412k|    p_binCtxt_m->u1_mps_state = u1_mps_state_m;                                             \
  |  |  265|   412k|}
  ------------------
  718|   412k|                             u4_code_int_val_ofst, pu4_table, ps_bitstrm,
  719|   412k|                             u4_prev_intra4x4_pred_mode_flag)
  720|   412k|        *pu1_prev_intra4x4_pred_mode_flag = u4_prev_intra4x4_pred_mode_flag;
  721|       |
  722|   412k|        i4_rem_intra4x4_pred_mode = -1;
  723|   412k|        if(!u4_prev_intra4x4_pred_mode_flag)
  ------------------
  |  Branch (723:12): [True: 99.9k, False: 312k]
  ------------------
  724|  99.9k|        {
  725|       |
  726|       |            /*inlining DecodeDecisionBins_FLC*/
  727|       |
  728|  99.9k|            {
  729|       |
  730|  99.9k|                UWORD8 u1_max_bins = 3;
  731|  99.9k|                UWORD32 u4_value;
  732|  99.9k|                UWORD32 u4_symbol, i;
  733|       |
  734|  99.9k|                i = 0;
  735|  99.9k|                u4_value = 0;
  736|       |
  737|  99.9k|                do
  738|   299k|                {
  739|       |
  740|   299k|                    DECODE_ONE_BIN_MACRO(ps_ctx_ipred_luma_rm, u4_code_int_range,
  ------------------
  |  |  217|   299k|                     p_DecBitStream_arg,u4_symbol)                                           \
  |  |  218|   299k|{                                                                                       \
  |  |  219|   299k|    bin_ctxt_model_t *p_binCtxt_m = (bin_ctxt_model_t *) p_binCtxt_arg;                           \
  |  |  220|   299k|    dec_bit_stream_t *p_DecBitStream_m = (dec_bit_stream_t *) p_DecBitStream_arg;                 \
  |  |  221|   299k|    const UWORD32 *pu4_table_m = (const UWORD32 *) pu4_table_arg;                         \
  |  |  222|   299k|                                                                                        \
  |  |  223|   299k|    UWORD32 u4_quantCodeIntRange_m,u4_codeIntRangeLPS_m;                                    \
  |  |  224|   299k|    UWORD32 u1_mps_state_m;                                                               \
  |  |  225|   299k|    UWORD32 table_lookup_m;                                                               \
  |  |  226|   299k|    UWORD32 u4_clz_m;                                                                     \
  |  |  227|   299k|                                                                                        \
  |  |  228|   299k|    u1_mps_state_m = (p_binCtxt_m->u1_mps_state);                                           \
  |  |  229|   299k|    u4_clz_m = CLZ(u4_code_int_range);                                                  \
  |  |  230|   299k|    u4_quantCodeIntRange_m = u4_code_int_range << u4_clz_m;                                   \
  |  |  231|   299k|    u4_quantCodeIntRange_m = (u4_quantCodeIntRange_m >> 29) & 0x3;                          \
  |  |  232|   299k|    table_lookup_m = pu4_table_m[(u1_mps_state_m << 2)+u4_quantCodeIntRange_m];                 \
  |  |  233|   299k|    u4_codeIntRangeLPS_m = table_lookup_m & 0xff;                                           \
  |  |  234|   299k|                                                                                        \
  |  |  235|   299k|    u4_codeIntRangeLPS_m = u4_codeIntRangeLPS_m << (23 - u4_clz_m);                           \
  |  |  236|   299k|    u4_code_int_range = u4_code_int_range - u4_codeIntRangeLPS_m;                             \
  |  |  237|   299k|    u4_symbol = ((u1_mps_state_m>> 6) & 0x1);                                             \
  |  |  238|   299k|    /*if mps*/                                                                          \
  |  |  239|   299k|    u1_mps_state_m = (table_lookup_m >> 8) & 0x7F;                                          \
  |  |  240|   299k|    if(u4_code_int_val_ofst >= u4_code_int_range)                                          \
  |  |  ------------------
  |  |  |  Branch (240:8): [True: 82.7k, False: 217k]
  |  |  ------------------
  |  |  241|   299k|  {                                                                                     \
  |  |  242|  82.7k|                                                                                        \
  |  |  243|  82.7k|    u4_symbol = 1 - u4_symbol;                                                          \
  |  |  244|  82.7k|    u4_code_int_val_ofst -= u4_code_int_range;                                             \
  |  |  245|  82.7k|    u4_code_int_range = u4_codeIntRangeLPS_m;                                               \
  |  |  246|  82.7k|    u1_mps_state_m = (table_lookup_m >> 15) & 0x7F;                                         \
  |  |  247|  82.7k|  }                                                                                     \
  |  |  248|   299k|    if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_8)                                        \
  |  |  ------------------
  |  |  |  |  113|   599k|#define ONE_RIGHT_SHIFTED_BY_8 1<<8
  |  |  ------------------
  |  |  |  Branch (248:8): [True: 6.50k, False: 293k]
  |  |  ------------------
  |  |  249|   299k|    {                                                                                   \
  |  |  250|  6.50k|        UWORD32 *pu4_buffer,u4_offset;                                                  \
  |  |  251|  6.50k|        UWORD32 read_bits,u4_clz_m  ;                                                     \
  |  |  252|  6.50k|                                                                                        \
  |  |  253|  6.50k|        pu4_buffer = p_DecBitStream_m->pu4_buffer;                                         \
  |  |  254|  6.50k|        u4_offset = p_DecBitStream_m->u4_ofst;                                          \
  |  |  255|  6.50k|        u4_clz_m = CLZ(u4_code_int_range);                                              \
  |  |  256|  6.50k|        NEXTBITS(read_bits,(u4_offset+23),pu4_buffer,u4_clz_m)                            \
  |  |  ------------------
  |  |  |  |  137|  6.50k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  |  |  138|  6.50k|{                                                                           \
  |  |  |  |  139|  6.50k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  140|  6.50k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  141|  6.50k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  142|  6.50k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  143|  6.50k|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (143:8): [True: 5.70k, False: 798]
  |  |  |  |  ------------------
  |  |  |  |  144|  6.50k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  5.70k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  145|  6.50k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  6.50k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  146|  6.50k|}
  |  |  ------------------
  |  |  257|  6.50k|        FLUSHBITS(u4_offset,(u4_clz_m))                                                   \
  |  |  ------------------
  |  |  |  |  193|  6.50k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  |  |  194|  6.50k|{                                                                           \
  |  |  |  |  195|  6.50k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  |  |  196|  6.50k|}
  |  |  ------------------
  |  |  258|  6.50k|        u4_code_int_range = u4_code_int_range << u4_clz_m;                                    \
  |  |  259|  6.50k|        u4_code_int_val_ofst= (u4_code_int_val_ofst << u4_clz_m) | read_bits;               \
  |  |  260|  6.50k|                                                                                        \
  |  |  261|  6.50k|                                                                                        \
  |  |  262|  6.50k|        p_DecBitStream_m->u4_ofst = u4_offset;                                          \
  |  |  263|  6.50k|    }                                                                                   \
  |  |  264|   299k|    p_binCtxt_m->u1_mps_state = u1_mps_state_m;                                             \
  |  |  265|   299k|}
  ------------------
  741|   299k|                                         u4_code_int_val_ofst, pu4_table,
  742|   299k|                                         ps_bitstrm, u4_symbol)
  743|       |
  744|   299k|                    INC_BIN_COUNT(ps_cab_env);INC_DECISION_BINS(ps_cab_env);
  745|       |
  746|   299k|                    u4_value = u4_value | (u4_symbol << i);
  747|       |
  748|   299k|                    i++;
  749|   299k|                }
  750|   299k|                while(i < u1_max_bins);
  ------------------
  |  Branch (750:23): [True: 199k, False: 99.9k]
  ------------------
  751|       |
  752|  99.9k|                i4_rem_intra4x4_pred_mode = (u4_value);
  753|       |
  754|  99.9k|            }
  755|       |
  756|  99.9k|        }
  757|       |
  758|   412k|        (*pu1_rem_intra4x4_pred_mode) = i4_rem_intra4x4_pred_mode;
  759|       |
  760|   412k|        COPYTHECONTEXT("intra4x4_pred_mode", i4_rem_intra4x4_pred_mode);
  761|       |
  762|   412k|        pu1_prev_intra4x4_pred_mode_flag++;
  763|   412k|        pu1_rem_intra4x4_pred_mode++;
  764|       |
  765|   412k|        i4x4_luma_blk_idx--;
  766|   412k|    }
  767|   412k|    while(i4x4_luma_blk_idx);
  ------------------
  |  Branch (767:11): [True: 368k, False: 43.9k]
  ------------------
  768|       |
  769|  43.9k|    ps_cab_env->u4_code_int_range = u4_code_int_range;
  770|  43.9k|    ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst;
  771|       |
  772|  43.9k|    return (0);
  773|       |
  774|  43.9k|}
ih264d_parse_ctx_cbp_cabac:
  790|   329k|{
  791|       |
  792|   329k|    UWORD32 u4_cxt_inc;
  793|   329k|    decoding_envirnoment_t * ps_cab_env = &ps_dec->s_cab_dec_env;
  794|   329k|    dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm;
  795|   329k|    ctxt_inc_mb_info_t * ps_left_ctxt = ps_dec->p_left_ctxt_mb_info;
  796|   329k|    ctxt_inc_mb_info_t * ps_top_ctxt = ps_dec->p_top_ctxt_mb_info;
  797|   329k|    bin_ctxt_model_t *ps_ctxt_cbp_luma = ps_dec->p_cbp_luma_t, *ps_bin_ctxt;
  798|   329k|    WORD8 c_Cbp; //,i,j;
  799|   329k|    UWORD32 u4_code_int_range, u4_code_int_val_ofst;
  800|   329k|    UWORD32 u4_offset, *pu4_buffer;
  801|   329k|    const UWORD32 *pu4_table = (const UWORD32 *)ps_cab_env->cabac_table;
  802|       |
  803|   329k|    INC_SYM_COUNT(ps_cab_env);
  804|       |
  805|       |
  806|       |
  807|       |    /* CBP Luma, FL, Cmax = 15, L = 4 */
  808|   329k|    u4_cxt_inc = (!((ps_top_ctxt->u1_cbp >> 2) & 0x01)) << 1;
  809|   329k|    u4_cxt_inc += !((ps_left_ctxt->u1_cbp >> 1) & 0x01);
  810|       |
  811|   329k|    u4_offset = ps_bitstrm->u4_ofst;
  812|   329k|    pu4_buffer = ps_bitstrm->pu4_buffer;
  813|       |
  814|   329k|    u4_code_int_range = ps_cab_env->u4_code_int_range;
  815|   329k|    u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst;
  816|       |    /*renormalize to ensure there 23 bits more in the u4_code_int_val_ofst*/
  817|   329k|    {
  818|   329k|        UWORD32 u4_clz, read_bits;
  819|       |
  820|   329k|        u4_clz = CLZ(u4_code_int_range);
  821|   329k|        FLUSHBITS(u4_offset, u4_clz)
  ------------------
  |  |  193|   329k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  194|   329k|{                                                                           \
  |  |  195|   329k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  196|   329k|}
  ------------------
  822|   329k|        NEXTBITS(read_bits, u4_offset, pu4_buffer, 23)
  ------------------
  |  |  137|   329k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  138|   329k|{                                                                           \
  |  |  139|   329k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  140|   329k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  141|   329k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  142|   329k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  143|   329k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (143:8): [True: 320k, False: 9.32k]
  |  |  ------------------
  |  |  144|   329k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|   320k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  145|   329k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|   329k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  146|   329k|}
  ------------------
  823|   329k|        u4_code_int_range = u4_code_int_range << u4_clz;
  824|   329k|        u4_code_int_val_ofst = (u4_code_int_val_ofst << u4_clz) | read_bits;
  825|   329k|    }
  826|       |
  827|   329k|    ps_bin_ctxt = ps_ctxt_cbp_luma + u4_cxt_inc;
  828|       |
  829|       |    /*inlining DecodeDecision_onebin without renorm*/
  830|   329k|    {
  831|       |
  832|   329k|        UWORD32 u4_qnt_int_range, u4_int_range_lps;
  833|   329k|        UWORD32 u4_symbol, u1_mps_state;
  834|   329k|        UWORD32 table_lookup;
  835|   329k|        UWORD32 u4_clz;
  836|       |
  837|   329k|        u1_mps_state = (ps_bin_ctxt->u1_mps_state);
  838|       |
  839|   329k|        u4_clz = CLZ(u4_code_int_range);
  840|   329k|        u4_qnt_int_range = u4_code_int_range << u4_clz;
  841|   329k|        u4_qnt_int_range = (u4_qnt_int_range >> 29) & 0x3;
  842|       |
  843|   329k|        table_lookup = pu4_table[(u1_mps_state << 2) + u4_qnt_int_range];
  844|   329k|        u4_int_range_lps = table_lookup & 0xff;
  845|       |
  846|   329k|        u4_int_range_lps = u4_int_range_lps << (23 - u4_clz);
  847|   329k|        u4_code_int_range = u4_code_int_range - u4_int_range_lps;
  848|       |
  849|   329k|        u4_symbol = ((u1_mps_state >> 6) & 0x1);
  850|       |
  851|       |        /*if mps*/
  852|   329k|        u1_mps_state = (table_lookup >> 8) & 0x7F;
  853|       |
  854|   329k|        CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst, u4_symbol,
  ------------------
  |  |  184|   329k|                    u4_codeIntRangeLPS_m,u1_mps_state_m,table_lookup_m)                     \
  |  |  185|   329k|{                                                                                         \
  |  |  186|   329k|  if(u4_codeIntValOffset_m >= u4_codeIntRange_m)                                            \
  |  |  ------------------
  |  |  |  Branch (186:6): [True: 23.6k, False: 305k]
  |  |  ------------------
  |  |  187|   329k|  {                                                                                         \
  |  |  188|  23.6k|      u4_symbol_m = 1 - u4_symbol_m;                                                        \
  |  |  189|  23.6k|      u4_codeIntValOffset_m -= u4_codeIntRange_m;                                           \
  |  |  190|  23.6k|      u4_codeIntRange_m = u4_codeIntRangeLPS_m;                                             \
  |  |  191|  23.6k|      u1_mps_state_m = (table_lookup_m >> 15) & 0x7F;                                       \
  |  |  192|  23.6k|  }                                                                                         \
  |  |  193|   329k|}
  ------------------
  855|   329k|                     u4_int_range_lps, u1_mps_state, table_lookup)
  856|       |
  857|   329k|        INC_BIN_COUNT(ps_cab_env);
  858|       |
  859|   329k|        ps_bin_ctxt->u1_mps_state = u1_mps_state;
  860|       |
  861|   329k|        c_Cbp = u4_symbol;
  862|       |
  863|   329k|    }
  864|       |
  865|   329k|    u4_cxt_inc = (!((ps_top_ctxt->u1_cbp >> 3) & 0x01)) << 1;
  866|   329k|    u4_cxt_inc += !(c_Cbp & 0x01);
  867|   329k|    ps_bin_ctxt = ps_ctxt_cbp_luma + u4_cxt_inc;
  868|       |    /*inlining DecodeDecision_onebin without renorm*/
  869|       |
  870|   329k|    {
  871|       |
  872|   329k|        UWORD32 u4_qnt_int_range, u4_int_range_lps;
  873|   329k|        UWORD32 u4_symbol, u1_mps_state;
  874|   329k|        UWORD32 table_lookup;
  875|   329k|        UWORD32 u4_clz;
  876|       |
  877|   329k|        u1_mps_state = (ps_bin_ctxt->u1_mps_state);
  878|       |
  879|   329k|        u4_clz = CLZ(u4_code_int_range);
  880|   329k|        u4_qnt_int_range = u4_code_int_range << u4_clz;
  881|   329k|        u4_qnt_int_range = (u4_qnt_int_range >> 29) & 0x3;
  882|       |
  883|   329k|        table_lookup = pu4_table[(u1_mps_state << 2) + u4_qnt_int_range];
  884|   329k|        u4_int_range_lps = table_lookup & 0xff;
  885|       |
  886|   329k|        u4_int_range_lps = u4_int_range_lps << (23 - u4_clz);
  887|   329k|        u4_code_int_range = u4_code_int_range - u4_int_range_lps;
  888|       |
  889|   329k|        u4_symbol = ((u1_mps_state >> 6) & 0x1);
  890|       |
  891|       |        /*if mps*/
  892|   329k|        u1_mps_state = (table_lookup >> 8) & 0x7F;
  893|       |
  894|   329k|        CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst, u4_symbol,
  ------------------
  |  |  184|   329k|                    u4_codeIntRangeLPS_m,u1_mps_state_m,table_lookup_m)                     \
  |  |  185|   329k|{                                                                                         \
  |  |  186|   329k|  if(u4_codeIntValOffset_m >= u4_codeIntRange_m)                                            \
  |  |  ------------------
  |  |  |  Branch (186:6): [True: 20.3k, False: 309k]
  |  |  ------------------
  |  |  187|   329k|  {                                                                                         \
  |  |  188|  20.3k|      u4_symbol_m = 1 - u4_symbol_m;                                                        \
  |  |  189|  20.3k|      u4_codeIntValOffset_m -= u4_codeIntRange_m;                                           \
  |  |  190|  20.3k|      u4_codeIntRange_m = u4_codeIntRangeLPS_m;                                             \
  |  |  191|  20.3k|      u1_mps_state_m = (table_lookup_m >> 15) & 0x7F;                                       \
  |  |  192|  20.3k|  }                                                                                         \
  |  |  193|   329k|}
  ------------------
  895|   329k|                     u4_int_range_lps, u1_mps_state, table_lookup)
  896|       |
  897|   329k|        INC_BIN_COUNT(ps_cab_env);
  898|       |
  899|   329k|        ps_bin_ctxt->u1_mps_state = u1_mps_state;
  900|       |
  901|   329k|        c_Cbp |= u4_symbol << 1;
  902|       |
  903|   329k|    }
  904|       |
  905|   329k|    u4_cxt_inc = (!(c_Cbp & 0x01)) << 1;
  906|   329k|    u4_cxt_inc += !((ps_left_ctxt->u1_cbp >> 3) & 0x01);
  907|   329k|    ps_bin_ctxt = ps_ctxt_cbp_luma + u4_cxt_inc;
  908|       |    /*inlining DecodeDecision_onebin without renorm*/
  909|       |
  910|   329k|    {
  911|       |
  912|   329k|        UWORD32 u4_qnt_int_range, u4_int_range_lps;
  913|   329k|        UWORD32 u4_symbol, u1_mps_state;
  914|   329k|        UWORD32 table_lookup;
  915|   329k|        UWORD32 u4_clz;
  916|       |
  917|   329k|        u1_mps_state = (ps_bin_ctxt->u1_mps_state);
  918|       |
  919|   329k|        u4_clz = CLZ(u4_code_int_range);
  920|   329k|        u4_qnt_int_range = u4_code_int_range << u4_clz;
  921|   329k|        u4_qnt_int_range = (u4_qnt_int_range >> 29) & 0x3;
  922|       |
  923|   329k|        table_lookup = pu4_table[(u1_mps_state << 2) + u4_qnt_int_range];
  924|   329k|        u4_int_range_lps = table_lookup & 0xff;
  925|       |
  926|   329k|        u4_int_range_lps = u4_int_range_lps << (23 - u4_clz);
  927|   329k|        u4_code_int_range = u4_code_int_range - u4_int_range_lps;
  928|       |
  929|   329k|        u4_symbol = ((u1_mps_state >> 6) & 0x1);
  930|       |
  931|       |        /*if mps*/
  932|   329k|        u1_mps_state = (table_lookup >> 8) & 0x7F;
  933|       |
  934|   329k|        CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst, u4_symbol,
  ------------------
  |  |  184|   329k|                    u4_codeIntRangeLPS_m,u1_mps_state_m,table_lookup_m)                     \
  |  |  185|   329k|{                                                                                         \
  |  |  186|   329k|  if(u4_codeIntValOffset_m >= u4_codeIntRange_m)                                            \
  |  |  ------------------
  |  |  |  Branch (186:6): [True: 22.5k, False: 306k]
  |  |  ------------------
  |  |  187|   329k|  {                                                                                         \
  |  |  188|  22.5k|      u4_symbol_m = 1 - u4_symbol_m;                                                        \
  |  |  189|  22.5k|      u4_codeIntValOffset_m -= u4_codeIntRange_m;                                           \
  |  |  190|  22.5k|      u4_codeIntRange_m = u4_codeIntRangeLPS_m;                                             \
  |  |  191|  22.5k|      u1_mps_state_m = (table_lookup_m >> 15) & 0x7F;                                       \
  |  |  192|  22.5k|  }                                                                                         \
  |  |  193|   329k|}
  ------------------
  935|   329k|                     u4_int_range_lps, u1_mps_state, table_lookup)
  936|       |
  937|   329k|        INC_BIN_COUNT(ps_cab_env);
  938|       |
  939|   329k|        ps_bin_ctxt->u1_mps_state = u1_mps_state;
  940|       |
  941|   329k|        c_Cbp |= u4_symbol << 2;
  942|       |
  943|   329k|    }
  944|       |
  945|   329k|    u4_cxt_inc = (!((c_Cbp >> 1) & 0x01)) << 1;
  946|   329k|    u4_cxt_inc += !((c_Cbp >> 2) & 0x01);
  947|   329k|    ps_bin_ctxt = ps_ctxt_cbp_luma + u4_cxt_inc;
  948|       |    /*inlining DecodeDecision_onebin without renorm*/
  949|       |
  950|   329k|    {
  951|       |
  952|   329k|        UWORD32 u4_qnt_int_range, u4_int_range_lps;
  953|   329k|        UWORD32 u4_symbol, u1_mps_state;
  954|   329k|        UWORD32 table_lookup;
  955|   329k|        UWORD32 u4_clz;
  956|       |
  957|   329k|        u1_mps_state = (ps_bin_ctxt->u1_mps_state);
  958|       |
  959|   329k|        u4_clz = CLZ(u4_code_int_range);
  960|   329k|        u4_qnt_int_range = u4_code_int_range << u4_clz;
  961|   329k|        u4_qnt_int_range = (u4_qnt_int_range >> 29) & 0x3;
  962|       |
  963|   329k|        table_lookup = pu4_table[(u1_mps_state << 2) + u4_qnt_int_range];
  964|   329k|        u4_int_range_lps = table_lookup & 0xff;
  965|       |
  966|   329k|        u4_int_range_lps = u4_int_range_lps << (23 - u4_clz);
  967|   329k|        u4_code_int_range = u4_code_int_range - u4_int_range_lps;
  968|       |
  969|   329k|        u4_symbol = ((u1_mps_state >> 6) & 0x1);
  970|       |
  971|       |        /*if mps*/
  972|   329k|        u1_mps_state = (table_lookup >> 8) & 0x7F;
  973|       |
  974|   329k|        CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst, u4_symbol,
  ------------------
  |  |  184|   329k|                    u4_codeIntRangeLPS_m,u1_mps_state_m,table_lookup_m)                     \
  |  |  185|   329k|{                                                                                         \
  |  |  186|   329k|  if(u4_codeIntValOffset_m >= u4_codeIntRange_m)                                            \
  |  |  ------------------
  |  |  |  Branch (186:6): [True: 22.3k, False: 307k]
  |  |  ------------------
  |  |  187|   329k|  {                                                                                         \
  |  |  188|  22.3k|      u4_symbol_m = 1 - u4_symbol_m;                                                        \
  |  |  189|  22.3k|      u4_codeIntValOffset_m -= u4_codeIntRange_m;                                           \
  |  |  190|  22.3k|      u4_codeIntRange_m = u4_codeIntRangeLPS_m;                                             \
  |  |  191|  22.3k|      u1_mps_state_m = (table_lookup_m >> 15) & 0x7F;                                       \
  |  |  192|  22.3k|  }                                                                                         \
  |  |  193|   329k|}
  ------------------
  975|   329k|                     u4_int_range_lps, u1_mps_state, table_lookup)
  976|       |
  977|   329k|        INC_BIN_COUNT(ps_cab_env);
  978|       |
  979|   329k|        ps_bin_ctxt->u1_mps_state = u1_mps_state;
  980|       |
  981|   329k|        c_Cbp |= u4_symbol << 3;
  982|       |
  983|   329k|    }
  984|       |
  985|   329k|    if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_8)
  ------------------
  |  |  113|   329k|#define ONE_RIGHT_SHIFTED_BY_8 1<<8
  ------------------
  |  Branch (985:8): [True: 0, False: 329k]
  ------------------
  986|      0|    {
  987|       |
  988|      0|        RENORM_RANGE_OFFSET(u4_code_int_range, u4_code_int_val_ofst, u4_offset,
  ------------------
  |  |  170|      0|  {                                                                                         \
  |  |  171|      0|    UWORD32 read_bits_m,u4_clz_m  ;                                                         \
  |  |  172|      0|    u4_clz_m = CLZ(u4_codeIntRange_m);                                                  \
  |  |  173|      0|    NEXTBITS(read_bits_m,(u4_offset_m+23),pu4_buffer_m,u4_clz_m)                            \
  |  |  ------------------
  |  |  |  |  137|      0|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  |  |  138|      0|{                                                                           \
  |  |  |  |  139|      0|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  140|      0|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  141|      0|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  142|      0|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  143|      0|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (143:8): [True: 0, False: 0]
  |  |  |  |  ------------------
  |  |  |  |  144|      0|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|      0|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  145|      0|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|      0|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  146|      0|}
  |  |  ------------------
  |  |  174|      0|    FLUSHBITS(u4_offset_m,(u4_clz_m))                                                       \
  |  |  ------------------
  |  |  |  |  193|      0|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  |  |  194|      0|{                                                                           \
  |  |  |  |  195|      0|        (u4_offset) += (u4_no_bits);                                        \
  |  |  |  |  196|      0|}
  |  |  ------------------
  |  |  175|      0|    u4_codeIntRange_m = u4_codeIntRange_m << u4_clz_m;                                      \
  |  |  176|      0|    u4_codeIntValOffset_m = (u4_codeIntValOffset_m << u4_clz_m) | read_bits_m;              \
  |  |  177|      0|  }
  ------------------
  989|      0|                            pu4_buffer)
  990|       |
  991|      0|    }
  992|       |
  993|   329k|    {
  994|   329k|        UWORD32 u4_cxt_inc;
  995|   329k|        WORD8 a, b, c, d;
  996|   329k|        bin_ctxt_model_t *p_CtxtCbpChroma = ps_dec->p_cbp_chroma_t;
  997|       |
  998|       |        /* CBP Chroma, TU, Cmax = 2 */
  999|   329k|        a = 0;
 1000|   329k|        b = 0;
 1001|   329k|        c = 0;
 1002|   329k|        d = 0;
 1003|       |
 1004|   329k|        {
 1005|   329k|            a = (ps_top_ctxt->u1_cbp > 15) ? 2 : 0;
  ------------------
  |  Branch (1005:17): [True: 23.2k, False: 306k]
  ------------------
 1006|   329k|            c = (ps_top_ctxt->u1_cbp > 31) ? 2 : 0;
  ------------------
  |  Branch (1006:17): [True: 13.4k, False: 315k]
  ------------------
 1007|   329k|        }
 1008|       |
 1009|   329k|        {
 1010|   329k|            b = (ps_left_ctxt->u1_cbp > 15) ? 1 : 0;
  ------------------
  |  Branch (1010:17): [True: 19.5k, False: 309k]
  ------------------
 1011|   329k|            d = (ps_left_ctxt->u1_cbp > 31) ? 1 : 0;
  ------------------
  |  Branch (1011:17): [True: 10.8k, False: 318k]
  ------------------
 1012|   329k|        }
 1013|   329k|        u4_cxt_inc = a + b;
 1014|   329k|        u4_cxt_inc = (u4_cxt_inc | ((4 + c + d) << 4));
 1015|       |
 1016|       |        /*inlining ih264d_decode_bins_tunary */
 1017|       |
 1018|   329k|        {
 1019|       |
 1020|   329k|            UWORD8 u1_max_bins = 2;
 1021|   329k|            UWORD32 u4_ctx_inc = u4_cxt_inc;
 1022|       |
 1023|   329k|            UWORD32 u4_value;
 1024|   329k|            UWORD32 u4_symbol;
 1025|   329k|            UWORD8 u4_ctx_Inc;
 1026|   329k|            bin_ctxt_model_t *ps_bin_ctxt;
 1027|   329k|            u4_value = 0;
 1028|       |
 1029|   329k|            do
 1030|   365k|            {
 1031|   365k|                u4_ctx_Inc = u4_ctx_inc & 0xF;
 1032|   365k|                u4_ctx_inc = u4_ctx_inc >> 4;
 1033|       |
 1034|   365k|                ps_bin_ctxt = p_CtxtCbpChroma + u4_ctx_Inc;
 1035|       |                /*inlining DecodeDecision_onebin*/
 1036|   365k|                {
 1037|       |
 1038|   365k|                    UWORD32 u4_qnt_int_range, u4_int_range_lps;
 1039|       |
 1040|   365k|                    UWORD32 u1_mps_state;
 1041|   365k|                    UWORD32 table_lookup;
 1042|   365k|                    UWORD32 u4_clz;
 1043|       |
 1044|   365k|                    u1_mps_state = (ps_bin_ctxt->u1_mps_state);
 1045|       |
 1046|   365k|                    u4_clz = CLZ(u4_code_int_range);
 1047|   365k|                    u4_qnt_int_range = u4_code_int_range << u4_clz;
 1048|   365k|                    u4_qnt_int_range = (u4_qnt_int_range >> 29) & 0x3;
 1049|       |
 1050|   365k|                    table_lookup = pu4_table[(u1_mps_state << 2)
 1051|   365k|                                    + u4_qnt_int_range];
 1052|   365k|                    u4_int_range_lps = table_lookup & 0xff;
 1053|       |
 1054|   365k|                    u4_int_range_lps = u4_int_range_lps << (23 - u4_clz);
 1055|   365k|                    u4_code_int_range = u4_code_int_range - u4_int_range_lps;
 1056|       |
 1057|   365k|                    u4_symbol = ((u1_mps_state >> 6) & 0x1);
 1058|       |
 1059|       |                    /*if mps*/
 1060|   365k|                    u1_mps_state = (table_lookup >> 8) & 0x7F;
 1061|       |
 1062|   365k|                    CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst,
  ------------------
  |  |  184|   365k|                    u4_codeIntRangeLPS_m,u1_mps_state_m,table_lookup_m)                     \
  |  |  185|   365k|{                                                                                         \
  |  |  186|   365k|  if(u4_codeIntValOffset_m >= u4_codeIntRange_m)                                            \
  |  |  ------------------
  |  |  |  Branch (186:6): [True: 28.6k, False: 336k]
  |  |  ------------------
  |  |  187|   365k|  {                                                                                         \
  |  |  188|  28.6k|      u4_symbol_m = 1 - u4_symbol_m;                                                        \
  |  |  189|  28.6k|      u4_codeIntValOffset_m -= u4_codeIntRange_m;                                           \
  |  |  190|  28.6k|      u4_codeIntRange_m = u4_codeIntRangeLPS_m;                                             \
  |  |  191|  28.6k|      u1_mps_state_m = (table_lookup_m >> 15) & 0x7F;                                       \
  |  |  192|  28.6k|  }                                                                                         \
  |  |  193|   365k|}
  ------------------
 1063|   365k|                                 u4_symbol, u4_int_range_lps, u1_mps_state,
 1064|   365k|                                 table_lookup)
 1065|       |
 1066|   365k|                    if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_8)
  ------------------
  |  |  113|   365k|#define ONE_RIGHT_SHIFTED_BY_8 1<<8
  ------------------
  |  Branch (1066:24): [True: 139, False: 365k]
  ------------------
 1067|    139|                    {
 1068|    139|                        RENORM_RANGE_OFFSET(u4_code_int_range,
  ------------------
  |  |  170|    139|  {                                                                                         \
  |  |  171|    139|    UWORD32 read_bits_m,u4_clz_m  ;                                                         \
  |  |  172|    139|    u4_clz_m = CLZ(u4_codeIntRange_m);                                                  \
  |  |  173|    139|    NEXTBITS(read_bits_m,(u4_offset_m+23),pu4_buffer_m,u4_clz_m)                            \
  |  |  ------------------
  |  |  |  |  137|    139|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  |  |  138|    139|{                                                                           \
  |  |  |  |  139|    139|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  140|    139|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  141|    139|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  142|    139|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  143|    139|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (143:8): [True: 73, False: 66]
  |  |  |  |  ------------------
  |  |  |  |  144|    139|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|     73|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  145|    139|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|    139|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  146|    139|}
  |  |  ------------------
  |  |  174|    139|    FLUSHBITS(u4_offset_m,(u4_clz_m))                                                       \
  |  |  ------------------
  |  |  |  |  193|    139|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  |  |  194|    139|{                                                                           \
  |  |  |  |  195|    139|        (u4_offset) += (u4_no_bits);                                        \
  |  |  |  |  196|    139|}
  |  |  ------------------
  |  |  175|    139|    u4_codeIntRange_m = u4_codeIntRange_m << u4_clz_m;                                      \
  |  |  176|    139|    u4_codeIntValOffset_m = (u4_codeIntValOffset_m << u4_clz_m) | read_bits_m;              \
  |  |  177|    139|  }
  ------------------
 1069|    139|                                            u4_code_int_val_ofst, u4_offset,
 1070|    139|                                            pu4_buffer)
 1071|    139|                    }
 1072|   365k|                    ps_bin_ctxt->u1_mps_state = u1_mps_state;
 1073|   365k|                }
 1074|       |
 1075|   365k|                INC_BIN_COUNT(ps_cab_env);INC_DECISION_BINS(
 1076|   365k|                                ps_cab_env);
 1077|       |
 1078|   365k|                u4_value++;
 1079|   365k|            }
 1080|   365k|            while((u4_value < u1_max_bins) & (u4_symbol));
  ------------------
  |  Branch (1080:19): [True: 36.0k, False: 329k]
  ------------------
 1081|       |
 1082|   329k|            u4_value = u4_value - 1 + u4_symbol;
 1083|       |
 1084|   329k|            a = (u4_value);
 1085|       |
 1086|   329k|        }
 1087|       |
 1088|   329k|c_Cbp = (c_Cbp | (a << 4));
 1089|   329k|}
 1090|       |
 1091|   329k|ps_bitstrm->u4_ofst = u4_offset;
 1092|       |
 1093|   329k|ps_cab_env->u4_code_int_range = u4_code_int_range;
 1094|   329k|ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst;
 1095|       |
 1096|   329k|return (c_Cbp);
 1097|   329k|}
ih264d_get_mvd_cabac:
 1119|   956k|{
 1120|   956k|    UWORD8 u1_abs_mvd_x = 0, u1_abs_mvd_y = 0;
 1121|   956k|    UWORD8 u1_sub_mb_x, u1_sub_mb_y;
 1122|   956k|    UWORD8 *pu1_top_mv_ctxt, *pu1_lft_mv_ctxt;
 1123|   956k|    WORD16 *pi2_mv;
 1124|       |
 1125|   956k|    u1_sub_mb_x = (UWORD8)(u1_sub_mb & 0x03);
 1126|   956k|    u1_sub_mb_y = (UWORD8)(u1_sub_mb >> 2);
 1127|   956k|    pu1_top_mv_ctxt = &ps_dec->ps_curr_ctxt_mb_info->u1_mv[u1_sub_mb_x][u1_b2];
 1128|   956k|    pu1_lft_mv_ctxt = &ps_dec->pu1_left_mv_ctxt_inc[u1_sub_mb_y][u1_b2];
 1129|   956k|    pi2_mv = &ps_mv->i2_mv[u1_b2];
 1130|       |
 1131|   956k|    if(u1_dec_mvd)
  ------------------
  |  Branch (1131:8): [True: 676k, False: 280k]
  ------------------
 1132|   676k|    {
 1133|   676k|        WORD16 i2_mv_x, i2_mv_y;
 1134|   676k|        WORD32 i2_temp;
 1135|   676k|        {
 1136|   676k|            decoding_envirnoment_t * ps_cab_env = &ps_dec->s_cab_dec_env;
 1137|   676k|            dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm;
 1138|   676k|            UWORD16 u2_abs_mvd_x_a, u2_abs_mvd_x_b, u2_abs_mvd_y_a,
 1139|   676k|                            u2_abs_mvd_y_b;
 1140|       |
 1141|   676k|            u2_abs_mvd_x_b = (UWORD16)pu1_top_mv_ctxt[0];
 1142|   676k|            u2_abs_mvd_y_b = (UWORD16)pu1_top_mv_ctxt[1];
 1143|   676k|            u2_abs_mvd_x_a = (UWORD16)pu1_lft_mv_ctxt[0];
 1144|   676k|            u2_abs_mvd_y_a = (UWORD16)pu1_lft_mv_ctxt[1];
 1145|       |
 1146|   676k|            i2_temp = u2_abs_mvd_x_a + u2_abs_mvd_x_b;
 1147|       |
 1148|   676k|            i2_mv_x = ih264d_parse_mvd_cabac(ps_bitstrm, ps_cab_env,
 1149|   676k|                                             ps_dec->p_mvd_x_t, i2_temp);
 1150|       |
 1151|   676k|            i2_temp = u2_abs_mvd_y_a + u2_abs_mvd_y_b;
 1152|       |
 1153|   676k|            i2_mv_y = ih264d_parse_mvd_cabac(ps_bitstrm, ps_cab_env,
 1154|   676k|                                             ps_dec->p_mvd_y_t, i2_temp);
 1155|   676k|        }
 1156|       |
 1157|       |        /***********************************************************************/
 1158|       |        /* Store the abs_mvd_values in cabac contexts                          */
 1159|       |        /* The follownig code can be easily optimzed if mvX, mvY clip values   */
 1160|       |        /* are packed in 16 bits follwed by memcpy                             */
 1161|       |        /***********************************************************************/
 1162|   676k|        u1_abs_mvd_x = CLIP3(0, 127, ABS(i2_mv_x));
  ------------------
  |  |   77|  1.35M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 0, False: 676k]
  |  |  |  Branch (77:33): [True: 81.8k, False: 594k]
  |  |  |  Branch (77:54): [True: 1.33k, False: 675k]
  |  |  |  Branch (77:56): [True: 81.8k, False: 594k]
  |  |  |  Branch (77:77): [True: 81.2k, False: 593k]
  |  |  ------------------
  ------------------
 1163|   676k|        u1_abs_mvd_y = CLIP3(0, 127, ABS(i2_mv_y));
  ------------------
  |  |   77|  1.35M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 0, False: 676k]
  |  |  |  Branch (77:33): [True: 61.6k, False: 614k]
  |  |  |  Branch (77:54): [True: 1.32k, False: 675k]
  |  |  |  Branch (77:56): [True: 61.6k, False: 614k]
  |  |  |  Branch (77:77): [True: 60.8k, False: 614k]
  |  |  ------------------
  ------------------
 1164|       |
 1165|   676k|        COPYTHECONTEXT("MVD", i2_mv_x);COPYTHECONTEXT("MVD", i2_mv_y);
 1166|       |
 1167|       |        /* Storing Mv residuals */
 1168|   676k|        pi2_mv[0] = i2_mv_x;
 1169|   676k|        pi2_mv[1] = i2_mv_y;
 1170|   676k|    }
 1171|       |
 1172|       |    /***************************************************************/
 1173|       |    /* Store abs_mvd_values cabac contexts                         */
 1174|       |    /***************************************************************/
 1175|   956k|    {
 1176|   956k|        UWORD8 u1_i;
 1177|  3.36M|        for(u1_i = 0; u1_i < u1_part_wd; u1_i++, pu1_top_mv_ctxt += 4)
  ------------------
  |  Branch (1177:23): [True: 2.40M, False: 956k]
  ------------------
 1178|  2.40M|        {
 1179|  2.40M|            pu1_top_mv_ctxt[0] = u1_abs_mvd_x;
 1180|  2.40M|            pu1_top_mv_ctxt[1] = u1_abs_mvd_y;
 1181|  2.40M|        }
 1182|       |
 1183|  3.18M|        for(u1_i = 0; u1_i < u1_part_ht; u1_i++, pu1_lft_mv_ctxt += 4)
  ------------------
  |  Branch (1183:23): [True: 2.22M, False: 956k]
  ------------------
 1184|  2.22M|        {
 1185|  2.22M|            pu1_lft_mv_ctxt[0] = u1_abs_mvd_x;
 1186|  2.22M|            pu1_lft_mv_ctxt[1] = u1_abs_mvd_y;
 1187|  2.22M|        }
 1188|   956k|    }
 1189|   956k|}
ih264d_parse_mvd_cabac:
 1222|  1.35M|{
 1223|  1.35M|    WORD8 k;
 1224|  1.35M|    WORD16 i2_suf;
 1225|  1.35M|    WORD16 i2_mvd;
 1226|  1.35M|    UWORD16 u2_abs_mvd;
 1227|  1.35M|    UWORD32 u4_ctx_inc;
 1228|  1.35M|    UWORD32 u4_prefix;
 1229|  1.35M|    const UWORD32 *pu4_table = (const UWORD32 *)ps_cab_env->cabac_table;
 1230|  1.35M|    UWORD32 u4_code_int_range, u4_code_int_val_ofst;
 1231|       |
 1232|       |    /*  if mvd < 9                                                  */
 1233|       |    /*  mvd =  Prefix                                                   */
 1234|       |    /*  else                                                            */
 1235|       |    /*  mvd = Prefix + Suffix                                           */
 1236|       |    /*  decode sign bit                                                 */
 1237|       |    /*  Prefix TU decoding Cmax =Ucoff and Suffix 3rd order Exp-Golomb  */
 1238|       |
 1239|  1.35M|    u2_abs_mvd = (UWORD16)i4_temp;
 1240|  1.35M|    u4_ctx_inc = 1;
 1241|       |
 1242|  1.35M|    if(u2_abs_mvd < 3)
  ------------------
  |  Branch (1242:8): [True: 978k, False: 373k]
  ------------------
 1243|   978k|        u4_ctx_inc = 0;
 1244|   373k|    else if(u2_abs_mvd > 32)
  ------------------
  |  Branch (1244:13): [True: 15.0k, False: 358k]
  ------------------
 1245|  15.0k|        u4_ctx_inc = 2;
 1246|       |
 1247|  1.35M|    u4_ctx_inc = (u4_ctx_inc | 0x65430);
 1248|       |
 1249|       |    /*inlining modified version of ih264d_decode_bins_unary*/
 1250|       |
 1251|  1.35M|    {
 1252|  1.35M|        UWORD8 u1_max_bins = 9;
 1253|  1.35M|        UWORD32 u4_value;
 1254|  1.35M|        UWORD32 u4_symbol;
 1255|  1.35M|        bin_ctxt_model_t *ps_bin_ctxt;
 1256|  1.35M|        UWORD32 u4_ctx_Inc;
 1257|       |
 1258|  1.35M|        u4_value = 0;
 1259|  1.35M|        u4_code_int_range = ps_cab_env->u4_code_int_range;
 1260|  1.35M|        u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst;
 1261|       |
 1262|  1.35M|        do
 1263|  3.14M|        {
 1264|  3.14M|            u4_ctx_Inc = u4_ctx_inc & 0xf;
 1265|  3.14M|            u4_ctx_inc = u4_ctx_inc >> 4;
 1266|       |
 1267|  3.14M|            ps_bin_ctxt = p_ctxt_mvd + u4_ctx_Inc;
 1268|       |
 1269|  3.14M|            DECODE_ONE_BIN_MACRO(ps_bin_ctxt, u4_code_int_range,
  ------------------
  |  |  217|  3.14M|                     p_DecBitStream_arg,u4_symbol)                                           \
  |  |  218|  3.14M|{                                                                                       \
  |  |  219|  3.14M|    bin_ctxt_model_t *p_binCtxt_m = (bin_ctxt_model_t *) p_binCtxt_arg;                           \
  |  |  220|  3.14M|    dec_bit_stream_t *p_DecBitStream_m = (dec_bit_stream_t *) p_DecBitStream_arg;                 \
  |  |  221|  3.14M|    const UWORD32 *pu4_table_m = (const UWORD32 *) pu4_table_arg;                         \
  |  |  222|  3.14M|                                                                                        \
  |  |  223|  3.14M|    UWORD32 u4_quantCodeIntRange_m,u4_codeIntRangeLPS_m;                                    \
  |  |  224|  3.14M|    UWORD32 u1_mps_state_m;                                                               \
  |  |  225|  3.14M|    UWORD32 table_lookup_m;                                                               \
  |  |  226|  3.14M|    UWORD32 u4_clz_m;                                                                     \
  |  |  227|  3.14M|                                                                                        \
  |  |  228|  3.14M|    u1_mps_state_m = (p_binCtxt_m->u1_mps_state);                                           \
  |  |  229|  3.14M|    u4_clz_m = CLZ(u4_code_int_range);                                                  \
  |  |  230|  3.14M|    u4_quantCodeIntRange_m = u4_code_int_range << u4_clz_m;                                   \
  |  |  231|  3.14M|    u4_quantCodeIntRange_m = (u4_quantCodeIntRange_m >> 29) & 0x3;                          \
  |  |  232|  3.14M|    table_lookup_m = pu4_table_m[(u1_mps_state_m << 2)+u4_quantCodeIntRange_m];                 \
  |  |  233|  3.14M|    u4_codeIntRangeLPS_m = table_lookup_m & 0xff;                                           \
  |  |  234|  3.14M|                                                                                        \
  |  |  235|  3.14M|    u4_codeIntRangeLPS_m = u4_codeIntRangeLPS_m << (23 - u4_clz_m);                           \
  |  |  236|  3.14M|    u4_code_int_range = u4_code_int_range - u4_codeIntRangeLPS_m;                             \
  |  |  237|  3.14M|    u4_symbol = ((u1_mps_state_m>> 6) & 0x1);                                             \
  |  |  238|  3.14M|    /*if mps*/                                                                          \
  |  |  239|  3.14M|    u1_mps_state_m = (table_lookup_m >> 8) & 0x7F;                                          \
  |  |  240|  3.14M|    if(u4_code_int_val_ofst >= u4_code_int_range)                                          \
  |  |  ------------------
  |  |  |  Branch (240:8): [True: 323k, False: 2.82M]
  |  |  ------------------
  |  |  241|  3.14M|  {                                                                                     \
  |  |  242|   323k|                                                                                        \
  |  |  243|   323k|    u4_symbol = 1 - u4_symbol;                                                          \
  |  |  244|   323k|    u4_code_int_val_ofst -= u4_code_int_range;                                             \
  |  |  245|   323k|    u4_code_int_range = u4_codeIntRangeLPS_m;                                               \
  |  |  246|   323k|    u1_mps_state_m = (table_lookup_m >> 15) & 0x7F;                                         \
  |  |  247|   323k|  }                                                                                     \
  |  |  248|  3.14M|    if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_8)                                        \
  |  |  ------------------
  |  |  |  |  113|  6.29M|#define ONE_RIGHT_SHIFTED_BY_8 1<<8
  |  |  ------------------
  |  |  |  Branch (248:8): [True: 32.8k, False: 3.11M]
  |  |  ------------------
  |  |  249|  3.14M|    {                                                                                   \
  |  |  250|  32.8k|        UWORD32 *pu4_buffer,u4_offset;                                                  \
  |  |  251|  32.8k|        UWORD32 read_bits,u4_clz_m  ;                                                     \
  |  |  252|  32.8k|                                                                                        \
  |  |  253|  32.8k|        pu4_buffer = p_DecBitStream_m->pu4_buffer;                                         \
  |  |  254|  32.8k|        u4_offset = p_DecBitStream_m->u4_ofst;                                          \
  |  |  255|  32.8k|        u4_clz_m = CLZ(u4_code_int_range);                                              \
  |  |  256|  32.8k|        NEXTBITS(read_bits,(u4_offset+23),pu4_buffer,u4_clz_m)                            \
  |  |  ------------------
  |  |  |  |  137|  32.8k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  |  |  138|  32.8k|{                                                                           \
  |  |  |  |  139|  32.8k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  140|  32.8k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  141|  32.8k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  142|  32.8k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  143|  32.8k|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (143:8): [True: 31.3k, False: 1.50k]
  |  |  |  |  ------------------
  |  |  |  |  144|  32.8k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  31.3k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  145|  32.8k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  32.8k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  146|  32.8k|}
  |  |  ------------------
  |  |  257|  32.8k|        FLUSHBITS(u4_offset,(u4_clz_m))                                                   \
  |  |  ------------------
  |  |  |  |  193|  32.8k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  |  |  194|  32.8k|{                                                                           \
  |  |  |  |  195|  32.8k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  |  |  196|  32.8k|}
  |  |  ------------------
  |  |  258|  32.8k|        u4_code_int_range = u4_code_int_range << u4_clz_m;                                    \
  |  |  259|  32.8k|        u4_code_int_val_ofst= (u4_code_int_val_ofst << u4_clz_m) | read_bits;               \
  |  |  260|  32.8k|                                                                                        \
  |  |  261|  32.8k|                                                                                        \
  |  |  262|  32.8k|        p_DecBitStream_m->u4_ofst = u4_offset;                                          \
  |  |  263|  32.8k|    }                                                                                   \
  |  |  264|  3.14M|    p_binCtxt_m->u1_mps_state = u1_mps_state_m;                                             \
  |  |  265|  3.14M|}
  ------------------
 1270|  3.14M|                                 u4_code_int_val_ofst, pu4_table, ps_bitstrm,
 1271|  3.14M|                                 u4_symbol)
 1272|       |
 1273|  3.14M|            INC_BIN_COUNT(ps_cab_env);INC_DECISION_BINS(ps_cab_env);
 1274|       |
 1275|  3.14M|            u4_value++;
 1276|       |
 1277|  3.14M|        }
 1278|  3.14M|        while(u4_symbol && u4_value < 5);
  ------------------
  |  Branch (1278:15): [True: 2.04M, False: 1.10M]
  |  Branch (1278:28): [True: 1.79M, False: 251k]
  ------------------
 1279|       |
 1280|  1.35M|        ps_bin_ctxt = p_ctxt_mvd + 6;
 1281|       |
 1282|  1.35M|        if(u4_symbol && (u4_value < u1_max_bins))
  ------------------
  |  Branch (1282:12): [True: 251k, False: 1.10M]
  |  Branch (1282:25): [True: 251k, False: 0]
  ------------------
 1283|   251k|        {
 1284|       |
 1285|   251k|            do
 1286|   967k|            {
 1287|       |
 1288|   967k|                DECODE_ONE_BIN_MACRO(ps_bin_ctxt, u4_code_int_range,
  ------------------
  |  |  217|   967k|                     p_DecBitStream_arg,u4_symbol)                                           \
  |  |  218|   967k|{                                                                                       \
  |  |  219|   967k|    bin_ctxt_model_t *p_binCtxt_m = (bin_ctxt_model_t *) p_binCtxt_arg;                           \
  |  |  220|   967k|    dec_bit_stream_t *p_DecBitStream_m = (dec_bit_stream_t *) p_DecBitStream_arg;                 \
  |  |  221|   967k|    const UWORD32 *pu4_table_m = (const UWORD32 *) pu4_table_arg;                         \
  |  |  222|   967k|                                                                                        \
  |  |  223|   967k|    UWORD32 u4_quantCodeIntRange_m,u4_codeIntRangeLPS_m;                                    \
  |  |  224|   967k|    UWORD32 u1_mps_state_m;                                                               \
  |  |  225|   967k|    UWORD32 table_lookup_m;                                                               \
  |  |  226|   967k|    UWORD32 u4_clz_m;                                                                     \
  |  |  227|   967k|                                                                                        \
  |  |  228|   967k|    u1_mps_state_m = (p_binCtxt_m->u1_mps_state);                                           \
  |  |  229|   967k|    u4_clz_m = CLZ(u4_code_int_range);                                                  \
  |  |  230|   967k|    u4_quantCodeIntRange_m = u4_code_int_range << u4_clz_m;                                   \
  |  |  231|   967k|    u4_quantCodeIntRange_m = (u4_quantCodeIntRange_m >> 29) & 0x3;                          \
  |  |  232|   967k|    table_lookup_m = pu4_table_m[(u1_mps_state_m << 2)+u4_quantCodeIntRange_m];                 \
  |  |  233|   967k|    u4_codeIntRangeLPS_m = table_lookup_m & 0xff;                                           \
  |  |  234|   967k|                                                                                        \
  |  |  235|   967k|    u4_codeIntRangeLPS_m = u4_codeIntRangeLPS_m << (23 - u4_clz_m);                           \
  |  |  236|   967k|    u4_code_int_range = u4_code_int_range - u4_codeIntRangeLPS_m;                             \
  |  |  237|   967k|    u4_symbol = ((u1_mps_state_m>> 6) & 0x1);                                             \
  |  |  238|   967k|    /*if mps*/                                                                          \
  |  |  239|   967k|    u1_mps_state_m = (table_lookup_m >> 8) & 0x7F;                                          \
  |  |  240|   967k|    if(u4_code_int_val_ofst >= u4_code_int_range)                                          \
  |  |  ------------------
  |  |  |  Branch (240:8): [True: 22.2k, False: 945k]
  |  |  ------------------
  |  |  241|   967k|  {                                                                                     \
  |  |  242|  22.2k|                                                                                        \
  |  |  243|  22.2k|    u4_symbol = 1 - u4_symbol;                                                          \
  |  |  244|  22.2k|    u4_code_int_val_ofst -= u4_code_int_range;                                             \
  |  |  245|  22.2k|    u4_code_int_range = u4_codeIntRangeLPS_m;                                               \
  |  |  246|  22.2k|    u1_mps_state_m = (table_lookup_m >> 15) & 0x7F;                                         \
  |  |  247|  22.2k|  }                                                                                     \
  |  |  248|   967k|    if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_8)                                        \
  |  |  ------------------
  |  |  |  |  113|  1.93M|#define ONE_RIGHT_SHIFTED_BY_8 1<<8
  |  |  ------------------
  |  |  |  Branch (248:8): [True: 4.70k, False: 963k]
  |  |  ------------------
  |  |  249|   967k|    {                                                                                   \
  |  |  250|  4.70k|        UWORD32 *pu4_buffer,u4_offset;                                                  \
  |  |  251|  4.70k|        UWORD32 read_bits,u4_clz_m  ;                                                     \
  |  |  252|  4.70k|                                                                                        \
  |  |  253|  4.70k|        pu4_buffer = p_DecBitStream_m->pu4_buffer;                                         \
  |  |  254|  4.70k|        u4_offset = p_DecBitStream_m->u4_ofst;                                          \
  |  |  255|  4.70k|        u4_clz_m = CLZ(u4_code_int_range);                                              \
  |  |  256|  4.70k|        NEXTBITS(read_bits,(u4_offset+23),pu4_buffer,u4_clz_m)                            \
  |  |  ------------------
  |  |  |  |  137|  4.70k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  |  |  138|  4.70k|{                                                                           \
  |  |  |  |  139|  4.70k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  140|  4.70k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  141|  4.70k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  142|  4.70k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  143|  4.70k|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (143:8): [True: 4.44k, False: 262]
  |  |  |  |  ------------------
  |  |  |  |  144|  4.70k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  4.44k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  145|  4.70k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  4.70k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  146|  4.70k|}
  |  |  ------------------
  |  |  257|  4.70k|        FLUSHBITS(u4_offset,(u4_clz_m))                                                   \
  |  |  ------------------
  |  |  |  |  193|  4.70k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  |  |  194|  4.70k|{                                                                           \
  |  |  |  |  195|  4.70k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  |  |  196|  4.70k|}
  |  |  ------------------
  |  |  258|  4.70k|        u4_code_int_range = u4_code_int_range << u4_clz_m;                                    \
  |  |  259|  4.70k|        u4_code_int_val_ofst= (u4_code_int_val_ofst << u4_clz_m) | read_bits;               \
  |  |  260|  4.70k|                                                                                        \
  |  |  261|  4.70k|                                                                                        \
  |  |  262|  4.70k|        p_DecBitStream_m->u4_ofst = u4_offset;                                          \
  |  |  263|  4.70k|    }                                                                                   \
  |  |  264|   967k|    p_binCtxt_m->u1_mps_state = u1_mps_state_m;                                             \
  |  |  265|   967k|}
  ------------------
 1289|   967k|                                     u4_code_int_val_ofst, pu4_table,
 1290|   967k|                                     ps_bitstrm, u4_symbol)
 1291|       |
 1292|   967k|                INC_BIN_COUNT(ps_cab_env);INC_DECISION_BINS(ps_cab_env);
 1293|   967k|                u4_value++;
 1294|   967k|            }
 1295|   967k|            while(u4_symbol && (u4_value < u1_max_bins));
  ------------------
  |  Branch (1295:19): [True: 945k, False: 22.2k]
  |  Branch (1295:32): [True: 716k, False: 229k]
  ------------------
 1296|       |
 1297|   251k|        }
 1298|       |
 1299|  1.35M|        ps_cab_env->u4_code_int_range = u4_code_int_range;
 1300|  1.35M|        ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst;
 1301|  1.35M|        u4_value = u4_value - 1 + u4_symbol;
 1302|  1.35M|        u4_prefix = (u4_value);
 1303|  1.35M|    }
 1304|       |
 1305|  1.35M|    i2_mvd = u4_prefix;
 1306|       |
 1307|  1.35M|    if(i2_mvd == 9)
  ------------------
  |  Branch (1307:8): [True: 229k, False: 1.12M]
  ------------------
 1308|   229k|    {
 1309|       |        /* Read Suffix */
 1310|   229k|        k = ih264d_decode_bypass_bins_unary(ps_cab_env, ps_bitstrm);
 1311|   229k|        i2_suf = (k > 15) ? INT16_MAX : ((1 << k) - 1);
  ------------------
  |  Branch (1311:18): [True: 134, False: 229k]
  ------------------
 1312|   229k|        k = k + 3;
 1313|   229k|        i2_suf = (i2_suf << 3);
 1314|   229k|        i2_mvd += i2_suf;
 1315|   229k|        i2_suf = ih264d_decode_bypass_bins(ps_cab_env, k, ps_bitstrm);
 1316|   229k|        i2_mvd += i2_suf;
 1317|   229k|    }
 1318|       |    /* Read Sign bit */
 1319|  1.35M|    if(!i2_mvd)
  ------------------
  |  Branch (1319:8): [True: 659k, False: 693k]
  ------------------
 1320|   659k|        return (i2_mvd);
 1321|       |
 1322|   693k|    else
 1323|   693k|    {
 1324|   693k|        UWORD32 u4_code_int_val_ofst, u4_code_int_range;
 1325|       |
 1326|   693k|        u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst;
 1327|   693k|        u4_code_int_range = ps_cab_env->u4_code_int_range;
 1328|       |
 1329|   693k|        if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_9)
  ------------------
  |  |  114|   693k|#define ONE_RIGHT_SHIFTED_BY_9    1<<9
  ------------------
  |  Branch (1329:12): [True: 9.58k, False: 683k]
  ------------------
 1330|  9.58k|        {
 1331|  9.58k|            UWORD32 *pu4_buffer, u4_offset;
 1332|       |
 1333|  9.58k|            pu4_buffer = ps_bitstrm->pu4_buffer;
 1334|  9.58k|            u4_offset = ps_bitstrm->u4_ofst;
 1335|       |
 1336|  9.58k|            RENORM_RANGE_OFFSET(u4_code_int_range, u4_code_int_val_ofst,
  ------------------
  |  |  170|  9.58k|  {                                                                                         \
  |  |  171|  9.58k|    UWORD32 read_bits_m,u4_clz_m  ;                                                         \
  |  |  172|  9.58k|    u4_clz_m = CLZ(u4_codeIntRange_m);                                                  \
  |  |  173|  9.58k|    NEXTBITS(read_bits_m,(u4_offset_m+23),pu4_buffer_m,u4_clz_m)                            \
  |  |  ------------------
  |  |  |  |  137|  9.58k|#define     NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits)         \
  |  |  |  |  138|  9.58k|{                                                                           \
  |  |  |  |  139|  9.58k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  |  |  140|  9.58k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  |  |  141|  9.58k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  |  |  142|  9.58k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  |  |  143|  9.58k|    if(u4_bit_off)                                                          \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (143:8): [True: 8.94k, False: 642]
  |  |  |  |  ------------------
  |  |  |  |  144|  9.58k|        u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  8.94k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  145|  9.58k|    u4_word = u4_word >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  |  |  ------------------
  |  |  |  |  |  |   45|  9.58k|#define INT_IN_BITS         32
  |  |  |  |  ------------------
  |  |  |  |  146|  9.58k|}
  |  |  ------------------
  |  |  174|  9.58k|    FLUSHBITS(u4_offset_m,(u4_clz_m))                                                       \
  |  |  ------------------
  |  |  |  |  193|  9.58k|#define   FLUSHBITS(u4_offset, u4_no_bits)                                  \
  |  |  |  |  194|  9.58k|{                                                                           \
  |  |  |  |  195|  9.58k|        (u4_offset) += (u4_no_bits);                                        \
  |  |  |  |  196|  9.58k|}
  |  |  ------------------
  |  |  175|  9.58k|    u4_codeIntRange_m = u4_codeIntRange_m << u4_clz_m;                                      \
  |  |  176|  9.58k|    u4_codeIntValOffset_m = (u4_codeIntValOffset_m << u4_clz_m) | read_bits_m;              \
  |  |  177|  9.58k|  }
  ------------------
 1337|  9.58k|                                u4_offset, pu4_buffer)
 1338|  9.58k|            ps_bitstrm->u4_ofst = u4_offset;
 1339|  9.58k|        }
 1340|       |
 1341|   693k|        u4_code_int_range = u4_code_int_range >> 1;
 1342|       |
 1343|   693k|        if(u4_code_int_val_ofst >= u4_code_int_range)
  ------------------
  |  Branch (1343:12): [True: 143k, False: 549k]
  ------------------
 1344|   143k|        {
 1345|       |            /* S=1 */
 1346|   143k|            u4_code_int_val_ofst -= u4_code_int_range;
 1347|   143k|            i2_mvd = (-i2_mvd);
 1348|   143k|        }
 1349|       |
 1350|   693k|        ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst;
 1351|   693k|        ps_cab_env->u4_code_int_range = u4_code_int_range;
 1352|       |
 1353|   693k|        return (i2_mvd);
 1354|       |
 1355|   693k|    }
 1356|  1.35M|}

ih264d_parse_pmb_cavlc:
   87|  55.8k|{
   88|  55.8k|    UWORD32 u1_num_mb_part;
   89|  55.8k|    UWORD32 uc_sub_mb;
   90|  55.8k|    dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm;
   91|  55.8k|    UWORD32 * const pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
   92|  55.8k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
   93|       |
   94|  55.8k|    parse_pmbarams_t * ps_parse_mb_data = ps_dec->ps_parse_mb_data
   95|  55.8k|                    + u4_num_mbsNby2;
   96|  55.8k|    WORD8 * pi1_ref_idx = ps_parse_mb_data->i1_ref_idx[0];
   97|  55.8k|    const UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
   98|  55.8k|    const UWORD8 * pu1_num_mb_part = (const UWORD8 *)gau1_ih264d_num_mb_part;
   99|  55.8k|    UWORD8 * pu1_col_info = ps_parse_mb_data->u1_col_info;
  100|       |
  101|  55.8k|    UWORD32 u1_mb_type = ps_cur_mb_info->u1_mb_type;
  102|  55.8k|    UWORD32 u4_sum_mb_mode_pack = 0;
  103|  55.8k|    WORD32 ret;
  104|       |
  105|  55.8k|    UWORD8 u1_no_submb_part_size_lt8x8_flag = 1;
  106|  55.8k|    ps_cur_mb_info->u1_tran_form8x8 = 0;
  107|  55.8k|    ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
  108|       |
  109|  55.8k|    ps_cur_mb_info->u1_yuv_dc_block_flag = 0;
  110|       |
  111|  55.8k|    ps_cur_mb_info->u1_mb_mc_mode = u1_mb_type;
  112|  55.8k|    uc_sub_mb = ((u1_mb_type == PRED_8x8) | (u1_mb_type == PRED_8x8R0));
  ------------------
  |  |  453|  55.8k|#define PRED_8x8    3
  ------------------
                  uc_sub_mb = ((u1_mb_type == PRED_8x8) | (u1_mb_type == PRED_8x8R0));
  ------------------
  |  |  454|  55.8k|#define PRED_8x8R0  4
  ------------------
  113|       |
  114|       |    /* Reading the subMB type */
  115|  55.8k|    if(uc_sub_mb)
  ------------------
  |  Branch (115:8): [True: 2.38k, False: 53.4k]
  ------------------
  116|  2.38k|    {
  117|  2.38k|        WORD32 i;
  118|  2.38k|        UWORD8 u1_colz = (PRED_8x8 << 6);
  ------------------
  |  |  453|  2.38k|#define PRED_8x8    3
  ------------------
  119|       |
  120|  10.5k|        for(i = 0; i < 4; i++)
  ------------------
  |  Branch (120:20): [True: 8.64k, False: 1.88k]
  ------------------
  121|  8.64k|        {
  122|  8.64k|            UWORD32 ui_sub_mb_mode;
  123|       |
  124|       |            //Inlined ih264d_uev
  125|  8.64k|            UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
  126|  8.64k|            UWORD32 u4_word, u4_ldz;
  127|       |
  128|       |            /***************************************************************/
  129|       |            /* Find leading zeros in next 32 bits                          */
  130|       |            /***************************************************************/
  131|  8.64k|            NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  8.64k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  8.64k|{                                                                           \
  |  |  152|  8.64k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  8.64k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  8.64k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  8.64k|                                                                            \
  |  |  156|  8.64k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  8.64k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 8.15k, False: 488]
  |  |  ------------------
  |  |  158|  8.64k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  8.15k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  8.64k|}
  ------------------
  132|  8.64k|            u4_ldz = CLZ(u4_word);
  133|       |            /* Flush the ps_bitstrm */
  134|  8.64k|            u4_bitstream_offset += (u4_ldz + 1);
  135|       |            /* Read the suffix from the ps_bitstrm */
  136|  8.64k|            u4_word = 0;
  137|  8.64k|            if(u4_ldz)
  ------------------
  |  Branch (137:16): [True: 5.41k, False: 3.22k]
  ------------------
  138|  5.41k|                GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf,
  ------------------
  |  |  120|  5.41k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  5.41k|{                                                                           \
  |  |  122|  5.41k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  5.41k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  5.41k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  5.41k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  5.41k|                                                                            \
  |  |  127|  5.41k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 5.02k, False: 397]
  |  |  ------------------
  |  |  128|  5.41k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  5.02k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  5.41k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  5.41k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  5.41k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  5.41k|}                                                                           \
  ------------------
  139|  8.64k|                        u4_ldz);
  140|  8.64k|            *pu4_bitstrm_ofst = u4_bitstream_offset;
  141|  8.64k|            ui_sub_mb_mode = ((1 << u4_ldz) + u4_word - 1);
  142|       |            //Inlined ih264d_uev
  143|       |
  144|  8.64k|            if(ui_sub_mb_mode > 3)
  ------------------
  |  Branch (144:16): [True: 503, False: 8.14k]
  ------------------
  145|    503|            {
  146|    503|                return ERROR_SUB_MB_TYPE;
  147|    503|            }
  148|  8.14k|            else
  149|  8.14k|            {
  150|  8.14k|                u4_sum_mb_mode_pack = (u4_sum_mb_mode_pack << 8) | ui_sub_mb_mode;
  151|       |                /* Storing collocated information */
  152|  8.14k|                *pu1_col_info++ = u1_colz | (UWORD8)(ui_sub_mb_mode << 4);
  153|       |
  154|  8.14k|                COPYTHECONTEXT("sub_mb_type", ui_sub_mb_mode);
  155|  8.14k|            }
  156|       |
  157|       |            /* check if Motion compensation is done below 8x8 */
  158|  8.14k|            if(ui_sub_mb_mode != P_L0_8x8)
  ------------------
  |  |  459|  8.14k|#define P_L0_8x8    0
  ------------------
  |  Branch (158:16): [True: 4.91k, False: 3.22k]
  ------------------
  159|  4.91k|            {
  160|  4.91k|                u1_no_submb_part_size_lt8x8_flag = 0;
  161|  4.91k|            }
  162|  8.14k|        }
  163|       |
  164|       |        //
  165|  1.88k|        u1_num_mb_part = 4;
  166|  1.88k|    }
  167|  53.4k|    else
  168|  53.4k|    {
  169|  53.4k|        *pu1_col_info++ = (u1_mb_type << 6);
  170|  53.4k|        if(u1_mb_type)
  ------------------
  |  Branch (170:12): [True: 16.4k, False: 36.9k]
  ------------------
  171|  16.4k|            *pu1_col_info++ = (u1_mb_type << 6);
  172|  53.4k|        u1_num_mb_part = pu1_num_mb_part[u1_mb_type];
  173|       |
  174|  53.4k|    }
  175|       |
  176|       |    /* Decoding reference index 0: For simple profile the following   */
  177|       |    /* conditions are always true (mb_field_decoding_flag == 0);      */
  178|       |    /* (MbPartPredMode != PredL1)                                     */
  179|       |
  180|  55.3k|    {
  181|       |
  182|  55.3k|        UWORD8 uc_field = ps_cur_mb_info->u1_mb_field_decodingflag;
  183|  55.3k|        UWORD8 uc_num_ref_idx_l0_active_minus1 =
  184|  55.3k|                        (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0]
  185|  55.3k|                                        << (u1_mbaff & uc_field)) - 1;
  186|       |
  187|  55.3k|        if((uc_num_ref_idx_l0_active_minus1 > 0) & (u1_mb_type != PRED_8x8R0))
  ------------------
  |  |  454|  55.3k|#define PRED_8x8R0  4
  ------------------
  |  Branch (187:12): [True: 18.9k, False: 36.4k]
  ------------------
  188|  18.9k|        {
  189|  18.9k|            if(1 == uc_num_ref_idx_l0_active_minus1)
  ------------------
  |  Branch (189:16): [True: 5.03k, False: 13.8k]
  ------------------
  190|  5.03k|                ih264d_parse_pmb_ref_index_cavlc_range1(
  191|  5.03k|                                u1_num_mb_part, ps_bitstrm, pi1_ref_idx,
  192|  5.03k|                                uc_num_ref_idx_l0_active_minus1);
  193|  13.8k|            else
  194|  13.8k|            {
  195|  13.8k|                ret = ih264d_parse_pmb_ref_index_cavlc(
  196|  13.8k|                                u1_num_mb_part, ps_bitstrm, pi1_ref_idx,
  197|  13.8k|                                uc_num_ref_idx_l0_active_minus1);
  198|  13.8k|                if(ret != OK)
  ------------------
  |  |  114|  13.8k|#define OK        0
  ------------------
  |  Branch (198:20): [True: 510, False: 13.3k]
  ------------------
  199|    510|                    return ret;
  200|  13.8k|            }
  201|  18.9k|        }
  202|  36.4k|        else
  203|  36.4k|        {
  204|       |            /* When there exists only a single frame to predict from */
  205|  36.4k|            UWORD32 uc_i;
  206|  87.8k|            for(uc_i = 0; uc_i < u1_num_mb_part; uc_i++)
  ------------------
  |  Branch (206:27): [True: 51.3k, False: 36.4k]
  ------------------
  207|       |                /* Storing Reference Idx Information */
  208|  51.3k|                pi1_ref_idx[uc_i] = 0;
  209|  36.4k|        }
  210|  55.3k|    }
  211|       |
  212|  54.8k|    {
  213|  54.8k|        UWORD8 u1_p_idx;
  214|  54.8k|        UWORD32 uc_i;
  215|  54.8k|        parse_part_params_t * ps_part = ps_dec->ps_part;
  216|  54.8k|        UWORD8 u1_sub_mb_mode, u1_num_subpart, u1_mb_part_width, u1_mb_part_height;
  217|  54.8k|        UWORD32 u4_sub_mb_num;
  218|  54.8k|        const UWORD8 * pu1_top_left_sub_mb_indx;
  219|  54.8k|        mv_pred_t * ps_mv, *ps_mv_start = ps_dec->ps_mv_cur + (u4_mb_num << 4);
  220|       |        /* Loading the table pointers */
  221|  54.8k|        const UWORD8 * pu1_mb_partw = (const UWORD8 *)gau1_ih264d_mb_partw;
  222|  54.8k|        const UWORD8 * pu1_mb_parth = (const UWORD8 *)gau1_ih264d_mb_parth;
  223|  54.8k|        const UWORD8 * pu1_sub_mb_indx_mod =
  224|  54.8k|                        (const UWORD8 *)(gau1_ih264d_submb_indx_mod)
  225|  54.8k|                                        + (uc_sub_mb * 6);
  226|  54.8k|        const UWORD8 * pu1_sub_mb_partw = (const UWORD8 *)gau1_ih264d_submb_partw;
  227|  54.8k|        const UWORD8 * pu1_sub_mb_parth = (const UWORD8 *)gau1_ih264d_submb_parth;
  228|  54.8k|        const UWORD8 * pu1_num_sub_mb_part =
  229|  54.8k|                        (const UWORD8 *)gau1_ih264d_num_submb_part;
  230|       |
  231|  54.8k|        UWORD16 u2_sub_mb_num = 0x028A;
  232|       |
  233|       |        /*********************************************************/
  234|       |        /* default initialisations for condition (uc_sub_mb == 0) */
  235|       |        /* i.e. all are subpartitions of 8x8                     */
  236|       |        /*********************************************************/
  237|  54.8k|        u1_sub_mb_mode = 0;
  238|  54.8k|        u1_num_subpart = 1;
  239|  54.8k|        u1_mb_part_width = pu1_mb_partw[u1_mb_type];
  240|  54.8k|        u1_mb_part_height = pu1_mb_parth[u1_mb_type];
  241|  54.8k|        pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + (u1_mb_type << 1);
  242|  54.8k|        u4_sub_mb_num = 0;
  243|       |
  244|       |        /* Loop on number of partitions */
  245|   131k|        for(uc_i = 0, u1_p_idx = 0; uc_i < u1_num_mb_part; uc_i++)
  ------------------
  |  Branch (245:37): [True: 76.4k, False: 54.8k]
  ------------------
  246|  76.4k|        {
  247|  76.4k|            UWORD8 uc_j;
  248|  76.4k|            if(uc_sub_mb)
  ------------------
  |  Branch (248:16): [True: 7.28k, False: 69.2k]
  ------------------
  249|  7.28k|            {
  250|  7.28k|                u1_sub_mb_mode = u4_sum_mb_mode_pack >> 24;
  251|  7.28k|                u1_num_subpart = pu1_num_sub_mb_part[u1_sub_mb_mode];
  252|  7.28k|                u1_mb_part_width = pu1_sub_mb_partw[u1_sub_mb_mode];
  253|  7.28k|                u1_mb_part_height = pu1_sub_mb_parth[u1_sub_mb_mode];
  254|  7.28k|                pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + (u1_sub_mb_mode << 1);
  255|  7.28k|                u4_sub_mb_num = u2_sub_mb_num >> 12;
  256|  7.28k|                u4_sum_mb_mode_pack <<= 8;
  257|  7.28k|                u2_sub_mb_num <<= 4;
  258|  7.28k|            }
  259|       |
  260|       |            /* Loop on Number of sub-partitions */
  261|   160k|            for(uc_j = 0; uc_j < u1_num_subpart; uc_j++, pu1_top_left_sub_mb_indx++)
  ------------------
  |  Branch (261:27): [True: 83.5k, False: 76.4k]
  ------------------
  262|  83.5k|            {
  263|  83.5k|                WORD16 i2_mvx, i2_mvy;
  264|  83.5k|                u4_sub_mb_num += *pu1_top_left_sub_mb_indx;
  265|  83.5k|                ps_mv = ps_mv_start + u4_sub_mb_num;
  266|       |
  267|       |                /* Reading the differential Mv from the bitstream */
  268|       |                //i2_mvx = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  269|       |                //inlining ih264d_sev
  270|  83.5k|                {
  271|  83.5k|                    UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
  272|  83.5k|                    UWORD32 u4_word, u4_ldz, u4_abs_val;
  273|       |
  274|       |                    /***************************************************************/
  275|       |                    /* Find leading zeros in next 32 bits                          */
  276|       |                    /***************************************************************/
  277|  83.5k|                    NEXTBITS_32(u4_word, u4_bitstream_offset,
  ------------------
  |  |  150|  83.5k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  83.5k|{                                                                           \
  |  |  152|  83.5k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  83.5k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  83.5k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  83.5k|                                                                            \
  |  |  156|  83.5k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  83.5k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 80.0k, False: 3.52k]
  |  |  ------------------
  |  |  158|  83.5k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  80.0k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  83.5k|}
  ------------------
  278|  83.5k|                                pu4_bitstrm_buf);
  279|  83.5k|                    u4_ldz = CLZ(u4_word);
  280|       |
  281|       |                    /* Flush the ps_bitstrm */
  282|  83.5k|                    u4_bitstream_offset += (u4_ldz + 1);
  283|       |
  284|       |                    /* Read the suffix from the ps_bitstrm */
  285|  83.5k|                    u4_word = 0;
  286|  83.5k|                    if(u4_ldz)
  ------------------
  |  Branch (286:24): [True: 40.8k, False: 42.7k]
  ------------------
  287|  40.8k|                        GETBITS(u4_word, u4_bitstream_offset,
  ------------------
  |  |  120|  40.8k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  40.8k|{                                                                           \
  |  |  122|  40.8k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  40.8k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  40.8k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  40.8k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  40.8k|                                                                            \
  |  |  127|  40.8k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 38.2k, False: 2.62k]
  |  |  ------------------
  |  |  128|  40.8k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  38.2k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  40.8k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  40.8k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  40.8k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  40.8k|}                                                                           \
  ------------------
  288|  83.5k|                                pu4_bitstrm_buf, u4_ldz);
  289|       |
  290|  83.5k|                    *pu4_bitstrm_ofst = u4_bitstream_offset;
  291|  83.5k|                    u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1;
  292|       |
  293|  83.5k|                    if(u4_word & 0x1)
  ------------------
  |  Branch (293:24): [True: 16.4k, False: 67.1k]
  ------------------
  294|  16.4k|                        i2_mvx = (-(WORD32)u4_abs_val);
  295|  67.1k|                    else
  296|  67.1k|                        i2_mvx = (u4_abs_val);
  297|  83.5k|                }
  298|       |                //inlinined ih264d_sev
  299|  83.5k|                COPYTHECONTEXT("MVD", i2_mvx);
  300|  83.5k|                i2_mvy = ih264d_sev(pu4_bitstrm_ofst,
  301|  83.5k|                                     pu4_bitstrm_buf);
  302|  83.5k|                COPYTHECONTEXT("MVD", i2_mvy);
  303|       |
  304|       |                /* Storing Info for partitions */
  305|  83.5k|                ps_part->u1_is_direct = PART_NOT_DIRECT;
  ------------------
  |  |  570|  83.5k|#define PART_NOT_DIRECT                0
  ------------------
  306|  83.5k|                ps_part->u1_sub_mb_num = u4_sub_mb_num;
  307|  83.5k|                ps_part->u1_partheight = u1_mb_part_height;
  308|  83.5k|                ps_part->u1_partwidth = u1_mb_part_width;
  309|       |
  310|       |                /* Storing Mv residuals */
  311|  83.5k|                ps_mv->i2_mv[0] = i2_mvx;
  312|  83.5k|                ps_mv->i2_mv[1] = i2_mvy;
  313|       |
  314|       |                /* Increment partition Index */
  315|  83.5k|                u1_p_idx++;
  316|  83.5k|                ps_part++;
  317|  83.5k|            }
  318|  76.4k|        }
  319|  54.8k|        ps_parse_mb_data->u1_num_part = u1_p_idx;
  320|  54.8k|        ps_dec->ps_part = ps_part;
  321|  54.8k|    }
  322|       |
  323|  54.8k|    {
  324|  54.8k|        UWORD32 u4_cbp;
  325|       |
  326|       |        /* Read the Coded block pattern */
  327|  54.8k|        UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
  328|  54.8k|        UWORD32 u4_word, u4_ldz;
  329|       |
  330|       |        /***************************************************************/
  331|       |        /* Find leading zeros in next 32 bits                          */
  332|       |        /***************************************************************/
  333|  54.8k|        NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  54.8k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  54.8k|{                                                                           \
  |  |  152|  54.8k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  54.8k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  54.8k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  54.8k|                                                                            \
  |  |  156|  54.8k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  54.8k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 53.5k, False: 1.27k]
  |  |  ------------------
  |  |  158|  54.8k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  53.5k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  54.8k|}
  ------------------
  334|  54.8k|        u4_ldz = CLZ(u4_word);
  335|       |        /* Flush the ps_bitstrm */
  336|  54.8k|        u4_bitstream_offset += (u4_ldz + 1);
  337|       |        /* Read the suffix from the ps_bitstrm */
  338|  54.8k|        u4_word = 0;
  339|  54.8k|        if(u4_ldz)
  ------------------
  |  Branch (339:12): [True: 14.5k, False: 40.2k]
  ------------------
  340|  14.5k|            GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  14.5k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  14.5k|{                                                                           \
  |  |  122|  14.5k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  14.5k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  14.5k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  14.5k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  14.5k|                                                                            \
  |  |  127|  14.5k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 13.8k, False: 789]
  |  |  ------------------
  |  |  128|  14.5k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  13.8k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  14.5k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  14.5k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  14.5k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  14.5k|}                                                                           \
  ------------------
  341|  54.8k|        *pu4_bitstrm_ofst = u4_bitstream_offset;
  342|  54.8k|        u4_cbp = ((1 << u4_ldz) + u4_word - 1);
  343|       |
  344|  54.8k|        if(u4_cbp > 47)
  ------------------
  |  Branch (344:12): [True: 1.28k, False: 53.5k]
  ------------------
  345|  1.28k|            return ERROR_CBP;
  346|       |
  347|  53.5k|        u4_cbp = *((UWORD8*)gau1_ih264d_cbp_inter + u4_cbp);
  348|  53.5k|        COPYTHECONTEXT("coded_block_pattern", u4_cbp);
  349|  53.5k|        ps_cur_mb_info->u1_cbp = u4_cbp;
  350|       |
  351|       |        /* Read the transform8x8 u4_flag if present */
  352|  53.5k|        if((ps_dec->s_high_profile.u1_transform8x8_present) && (u4_cbp & 0xf)
  ------------------
  |  Branch (352:12): [True: 6.02k, False: 47.4k]
  |  Branch (352:64): [True: 2.38k, False: 3.64k]
  ------------------
  353|  2.38k|                        && u1_no_submb_part_size_lt8x8_flag)
  ------------------
  |  Branch (353:28): [True: 2.17k, False: 205]
  ------------------
  354|  2.17k|        {
  355|  2.17k|            ps_cur_mb_info->u1_tran_form8x8 = ih264d_get_bit_h264(ps_bitstrm);
  356|  2.17k|            COPYTHECONTEXT("transform_size_8x8_flag", ps_cur_mb_info->u1_tran_form8x8);
  357|  2.17k|            ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = ps_cur_mb_info->u1_tran_form8x8;
  358|  2.17k|        }
  359|       |
  360|       |        /* Read mb_qp_delta */
  361|  53.5k|        if(u4_cbp)
  ------------------
  |  Branch (361:12): [True: 13.3k, False: 40.2k]
  ------------------
  362|  13.3k|        {
  363|  13.3k|            WORD32 i_temp;
  364|       |
  365|  13.3k|            UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
  366|  13.3k|            UWORD32 u4_word, u4_ldz, u4_abs_val;
  367|       |
  368|       |            /***************************************************************/
  369|       |            /* Find leading zeros in next 32 bits                          */
  370|       |            /***************************************************************/
  371|  13.3k|            NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  13.3k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  13.3k|{                                                                           \
  |  |  152|  13.3k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  13.3k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  13.3k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  13.3k|                                                                            \
  |  |  156|  13.3k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  13.3k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 12.6k, False: 677]
  |  |  ------------------
  |  |  158|  13.3k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  12.6k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  13.3k|}
  ------------------
  372|  13.3k|            u4_ldz = CLZ(u4_word);
  373|       |
  374|       |            /* Flush the ps_bitstrm */
  375|  13.3k|            u4_bitstream_offset += (u4_ldz + 1);
  376|       |
  377|       |            /* Read the suffix from the ps_bitstrm */
  378|  13.3k|            u4_word = 0;
  379|  13.3k|            if(u4_ldz)
  ------------------
  |  Branch (379:16): [True: 4.62k, False: 8.68k]
  ------------------
  380|  4.62k|                GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf,
  ------------------
  |  |  120|  4.62k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  4.62k|{                                                                           \
  |  |  122|  4.62k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  4.62k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  4.62k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  4.62k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  4.62k|                                                                            \
  |  |  127|  4.62k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 3.79k, False: 827]
  |  |  ------------------
  |  |  128|  4.62k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  3.79k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  4.62k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  4.62k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  4.62k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  4.62k|}                                                                           \
  ------------------
  381|  13.3k|                        u4_ldz);
  382|       |
  383|  13.3k|            *pu4_bitstrm_ofst = u4_bitstream_offset;
  384|  13.3k|            u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1;
  385|       |
  386|  13.3k|            if(u4_word & 0x1)
  ------------------
  |  Branch (386:16): [True: 2.18k, False: 11.1k]
  ------------------
  387|  2.18k|                i_temp = (-(WORD32)u4_abs_val);
  388|  11.1k|            else
  389|  11.1k|                i_temp = (u4_abs_val);
  390|       |
  391|  13.3k|            if((i_temp < -26) || (i_temp > 25))
  ------------------
  |  Branch (391:16): [True: 111, False: 13.1k]
  |  Branch (391:34): [True: 279, False: 12.9k]
  ------------------
  392|    390|                return ERROR_INV_RANGE_QP_T;
  393|       |            //inlinined ih264d_sev
  394|       |
  395|  12.9k|            COPYTHECONTEXT("mb_qp_delta", i_temp);
  396|  12.9k|            if(i_temp)
  ------------------
  |  Branch (396:16): [True: 4.23k, False: 8.68k]
  ------------------
  397|  4.23k|            {
  398|  4.23k|                ret = ih264d_update_qp(ps_dec, (WORD8)i_temp);
  399|  4.23k|                if(ret != OK)
  ------------------
  |  |  114|  4.23k|#define OK        0
  ------------------
  |  Branch (399:20): [True: 0, False: 4.23k]
  ------------------
  400|      0|                    return ret;
  401|  4.23k|            }
  402|       |
  403|  12.9k|            ret = ih264d_parse_residual4x4_cavlc(ps_dec, ps_cur_mb_info, 0);
  404|  12.9k|            if(ret != OK)
  ------------------
  |  |  114|  12.9k|#define OK        0
  ------------------
  |  Branch (404:16): [True: 2.79k, False: 10.1k]
  ------------------
  405|  2.79k|                return ret;
  406|  10.1k|            if(EXCEED_OFFSET(ps_bitstrm))
  ------------------
  |  |   93|  10.1k|  (ps_bitstrm->u4_ofst > ps_bitstrm->u4_max_ofst)
  |  |  ------------------
  |  |  |  Branch (93:3): [True: 733, False: 9.39k]
  |  |  ------------------
  ------------------
  407|    733|                return ERROR_EOB_TERMINATE_T;
  408|  10.1k|        }
  409|  40.2k|        else
  410|  40.2k|        {
  411|  40.2k|            ih264d_update_nnz_for_skipmb(ps_dec, ps_cur_mb_info, CAVLC);
  ------------------
  |  |  338|  40.2k|#define CAVLC  0
  ------------------
  412|  40.2k|        }
  413|       |
  414|       |
  415|       |
  416|  53.5k|    }
  417|       |
  418|  49.6k|    return OK;
  ------------------
  |  |  114|  49.6k|#define OK        0
  ------------------
  419|  53.5k|}
ih264d_parse_pmb_cabac:
  436|  95.5k|{
  437|  95.5k|    UWORD32 u1_num_mb_part;
  438|  95.5k|    UWORD32 uc_sub_mb;
  439|  95.5k|    parse_pmbarams_t * ps_parse_mb_data = ps_dec->ps_parse_mb_data
  440|  95.5k|                    + u4_num_mbsNby2;
  441|  95.5k|    WORD8 * pi1_ref_idx = ps_parse_mb_data->i1_ref_idx[0];
  442|  95.5k|    const UWORD8 * pu1_num_mb_part = (const UWORD8 *)gau1_ih264d_num_mb_part;
  443|  95.5k|    const UWORD32 u1_mb_type = ps_cur_mb_info->u1_mb_type;
  444|  95.5k|    UWORD8 * pu1_col_info = ps_parse_mb_data->u1_col_info;
  445|  95.5k|    UWORD32 u1_mb_mc_mode = u1_mb_type;
  446|  95.5k|    ctxt_inc_mb_info_t * p_curr_ctxt = ps_dec->ps_curr_ctxt_mb_info;
  447|  95.5k|    decoding_envirnoment_t * ps_cab_env = &ps_dec->s_cab_dec_env;
  448|  95.5k|    dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm;
  449|  95.5k|    UWORD32 u4_sub_mb_pack = 0;
  450|  95.5k|    WORD32 ret;
  451|       |
  452|  95.5k|    UWORD8 u1_no_submb_part_size_lt8x8_flag = 1;
  453|  95.5k|    ps_cur_mb_info->u1_tran_form8x8 = 0;
  454|  95.5k|    ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
  455|       |
  456|  95.5k|    ps_cur_mb_info->u1_yuv_dc_block_flag = 0;
  457|       |
  458|  95.5k|    p_curr_ctxt->u1_mb_type = CAB_P;
  ------------------
  |  |  398|  95.5k|#define CAB_P             0x07 /* 0000 0111 */
  ------------------
  459|  95.5k|    ps_cur_mb_info->u1_mb_mc_mode = u1_mb_type;
  460|  95.5k|    uc_sub_mb = ((u1_mb_type == PRED_8x8) | (u1_mb_type == PRED_8x8R0));
  ------------------
  |  |  453|  95.5k|#define PRED_8x8    3
  ------------------
                  uc_sub_mb = ((u1_mb_type == PRED_8x8) | (u1_mb_type == PRED_8x8R0));
  ------------------
  |  |  454|  95.5k|#define PRED_8x8R0  4
  ------------------
  461|       |
  462|       |    /* Reading the subMB type */
  463|  95.5k|    if(uc_sub_mb)
  ------------------
  |  Branch (463:8): [True: 15.4k, False: 80.0k]
  ------------------
  464|  15.4k|    {
  465|       |
  466|  15.4k|        UWORD8 u1_colz = (PRED_8x8 << 6);
  ------------------
  |  |  453|  15.4k|#define PRED_8x8    3
  ------------------
  467|  15.4k|        u1_mb_mc_mode = 0;
  468|       |
  469|  15.4k|        {
  470|  15.4k|            UWORD8 u1_sub_mb_mode;
  471|  15.4k|            u1_sub_mb_mode = ih264d_parse_submb_type_cabac(
  472|  15.4k|                            0, ps_cab_env, ps_bitstrm,
  473|  15.4k|                            ps_dec->p_sub_mb_type_t);
  474|  15.4k|            if(u1_sub_mb_mode > 3)
  ------------------
  |  Branch (474:16): [True: 0, False: 15.4k]
  ------------------
  475|      0|                return ERROR_SUB_MB_TYPE;
  476|       |
  477|  15.4k|            u4_sub_mb_pack = (u4_sub_mb_pack << 8) | u1_sub_mb_mode;
  478|       |            /* Storing collocated information */
  479|  15.4k|            *pu1_col_info++ = u1_colz | ((UWORD8)(u1_sub_mb_mode << 4));
  480|  15.4k|            COPYTHECONTEXT("sub_mb_type", u1_sub_mb_mode);
  481|       |            /* check if Motion compensation is done below 8x8 */
  482|  15.4k|            if(u1_sub_mb_mode != P_L0_8x8)
  ------------------
  |  |  459|  15.4k|#define P_L0_8x8    0
  ------------------
  |  Branch (482:16): [True: 9.23k, False: 6.22k]
  ------------------
  483|  9.23k|            {
  484|  9.23k|                u1_no_submb_part_size_lt8x8_flag = 0;
  485|  9.23k|            }
  486|  15.4k|        }
  487|      0|        {
  488|  15.4k|            UWORD8 u1_sub_mb_mode;
  489|  15.4k|            u1_sub_mb_mode = ih264d_parse_submb_type_cabac(
  490|  15.4k|                            0, ps_cab_env, ps_bitstrm,
  491|  15.4k|                            ps_dec->p_sub_mb_type_t);
  492|  15.4k|            if(u1_sub_mb_mode > 3)
  ------------------
  |  Branch (492:16): [True: 0, False: 15.4k]
  ------------------
  493|      0|                return ERROR_SUB_MB_TYPE;
  494|       |
  495|  15.4k|            u4_sub_mb_pack = (u4_sub_mb_pack << 8) | u1_sub_mb_mode;
  496|       |            /* Storing collocated information */
  497|  15.4k|            *pu1_col_info++ = u1_colz | ((UWORD8)(u1_sub_mb_mode << 4));
  498|  15.4k|            COPYTHECONTEXT("sub_mb_type", u1_sub_mb_mode);
  499|       |            /* check if Motion compensation is done below 8x8 */
  500|  15.4k|            if(u1_sub_mb_mode != P_L0_8x8)
  ------------------
  |  |  459|  15.4k|#define P_L0_8x8    0
  ------------------
  |  Branch (500:16): [True: 9.27k, False: 6.18k]
  ------------------
  501|  9.27k|            {
  502|  9.27k|                u1_no_submb_part_size_lt8x8_flag = 0;
  503|  9.27k|            }
  504|  15.4k|        }
  505|      0|        {
  506|  15.4k|            UWORD8 u1_sub_mb_mode;
  507|  15.4k|            u1_sub_mb_mode = ih264d_parse_submb_type_cabac(
  508|  15.4k|                            0, ps_cab_env, ps_bitstrm,
  509|  15.4k|                            ps_dec->p_sub_mb_type_t);
  510|  15.4k|            if(u1_sub_mb_mode > 3)
  ------------------
  |  Branch (510:16): [True: 0, False: 15.4k]
  ------------------
  511|      0|                return ERROR_SUB_MB_TYPE;
  512|       |
  513|  15.4k|            u4_sub_mb_pack = (u4_sub_mb_pack << 8) | u1_sub_mb_mode;
  514|       |            /* Storing collocated information */
  515|  15.4k|            *pu1_col_info++ = u1_colz | ((UWORD8)(u1_sub_mb_mode << 4));
  516|  15.4k|            COPYTHECONTEXT("sub_mb_type", u1_sub_mb_mode);
  517|       |            /* check if Motion compensation is done below 8x8 */
  518|  15.4k|            if(u1_sub_mb_mode != P_L0_8x8)
  ------------------
  |  |  459|  15.4k|#define P_L0_8x8    0
  ------------------
  |  Branch (518:16): [True: 9.18k, False: 6.27k]
  ------------------
  519|  9.18k|            {
  520|  9.18k|                u1_no_submb_part_size_lt8x8_flag = 0;
  521|  9.18k|            }
  522|  15.4k|        }
  523|      0|        {
  524|  15.4k|            UWORD8 u1_sub_mb_mode;
  525|  15.4k|            u1_sub_mb_mode = ih264d_parse_submb_type_cabac(
  526|  15.4k|                            0, ps_cab_env, ps_bitstrm,
  527|  15.4k|                            ps_dec->p_sub_mb_type_t);
  528|  15.4k|            if(u1_sub_mb_mode > 3)
  ------------------
  |  Branch (528:16): [True: 0, False: 15.4k]
  ------------------
  529|      0|                return ERROR_SUB_MB_TYPE;
  530|       |
  531|  15.4k|            u4_sub_mb_pack = (u4_sub_mb_pack << 8) | u1_sub_mb_mode;
  532|       |            /* Storing collocated information */
  533|  15.4k|            *pu1_col_info++ = u1_colz | ((UWORD8)(u1_sub_mb_mode << 4));
  534|  15.4k|            COPYTHECONTEXT("sub_mb_type", u1_sub_mb_mode);
  535|       |            /* check if Motion compensation is done below 8x8 */
  536|  15.4k|            if(u1_sub_mb_mode != P_L0_8x8)
  ------------------
  |  |  459|  15.4k|#define P_L0_8x8    0
  ------------------
  |  Branch (536:16): [True: 9.06k, False: 6.39k]
  ------------------
  537|  9.06k|            {
  538|  9.06k|                u1_no_submb_part_size_lt8x8_flag = 0;
  539|  9.06k|            }
  540|  15.4k|        }
  541|      0|        u1_num_mb_part = 4;
  542|  15.4k|    }
  543|  80.0k|    else
  544|  80.0k|    {
  545|  80.0k|        u1_num_mb_part = pu1_num_mb_part[u1_mb_type];
  546|       |        /* Storing collocated Mb and SubMb mode information */
  547|  80.0k|        *pu1_col_info++ = (u1_mb_type << 6);
  548|  80.0k|        if(u1_mb_type)
  ------------------
  |  Branch (548:12): [True: 32.0k, False: 47.9k]
  ------------------
  549|  32.0k|            *pu1_col_info++ = (u1_mb_type << 6);
  550|  80.0k|    }
  551|       |    /* Decoding reference index 0: For simple profile the following   */
  552|       |    /* conditions are always true (mb_field_decoding_flag == 0);      */
  553|       |    /* (MbPartPredMode != PredL1)                                     */
  554|  95.5k|    {
  555|  95.5k|        WORD8 * pi1_top_ref_idx_ctx_inc_arr = p_curr_ctxt->i1_ref_idx;
  556|  95.5k|        WORD8 * pi1_left_ref_idx_ctxt_inc = ps_dec->pi1_left_ref_idx_ctxt_inc;
  557|  95.5k|        UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
  558|  95.5k|        UWORD8 uc_field = ps_cur_mb_info->u1_mb_field_decodingflag;
  559|  95.5k|        UWORD8 uc_num_ref_idx_l0_active_minus1 =
  560|  95.5k|                        (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0]
  561|  95.5k|                                        << (u1_mbaff & uc_field)) - 1;
  562|       |
  563|  95.5k|        if((uc_num_ref_idx_l0_active_minus1 > 0) & (u1_mb_type != PRED_8x8R0))
  ------------------
  |  |  454|  95.5k|#define PRED_8x8R0  4
  ------------------
  |  Branch (563:12): [True: 50.0k, False: 45.4k]
  ------------------
  564|  50.0k|        {
  565|       |            /* force the routine to decode ref idx for each partition */
  566|  50.0k|            *((UWORD32 *)pi1_ref_idx) = 0x01010101;
  567|  50.0k|            ret = ih264d_parse_ref_idx_cabac(u1_num_mb_part, 0,
  568|  50.0k|                                             uc_num_ref_idx_l0_active_minus1,
  569|  50.0k|                                             u1_mb_mc_mode, pi1_ref_idx,
  570|  50.0k|                                             pi1_left_ref_idx_ctxt_inc,
  571|  50.0k|                                             pi1_top_ref_idx_ctx_inc_arr, ps_cab_env,
  572|  50.0k|                                             ps_bitstrm, ps_dec->p_ref_idx_t);
  573|  50.0k|            if(ret != OK)
  ------------------
  |  |  114|  50.0k|#define OK        0
  ------------------
  |  Branch (573:16): [True: 150, False: 49.9k]
  ------------------
  574|    150|                return ret;
  575|  50.0k|        }
  576|  45.4k|        else
  577|  45.4k|        {
  578|       |            /* When there exists only a single frame to predict from */
  579|  45.4k|            pi1_left_ref_idx_ctxt_inc[0] = 0;
  580|  45.4k|            pi1_left_ref_idx_ctxt_inc[1] = 0;
  581|  45.4k|            pi1_top_ref_idx_ctx_inc_arr[0] = 0;
  582|  45.4k|            pi1_top_ref_idx_ctx_inc_arr[1] = 0;
  583|  45.4k|            *((UWORD32 *)pi1_ref_idx) = 0;
  584|  45.4k|        }
  585|  95.5k|    }
  586|       |
  587|  95.4k|    {
  588|  95.4k|        UWORD8 u1_p_idx;
  589|  95.4k|        UWORD32 uc_i;
  590|  95.4k|        parse_part_params_t * ps_part = ps_dec->ps_part;
  591|  95.4k|        UWORD8 u1_sub_mb_mode, u1_num_subpart, u1_mb_part_width, u1_mb_part_height;
  592|  95.4k|        UWORD32 u4_sub_mb_num;
  593|  95.4k|        const UWORD8 * pu1_top_left_sub_mb_indx;
  594|  95.4k|        mv_pred_t *ps_mv_start = ps_dec->ps_mv_cur + (u4_mb_num << 4);
  595|  95.4k|        UWORD16 u2_sub_mb_num_pack = 0x028A;
  596|       |
  597|       |        /* Loading the table pointers */
  598|  95.4k|        const UWORD8 * pu1_mb_partw = (const UWORD8 *)gau1_ih264d_mb_partw;
  599|  95.4k|        const UWORD8 * pu1_mb_parth = (const UWORD8 *)gau1_ih264d_mb_parth;
  600|  95.4k|        const UWORD8 * pu1_sub_mb_indx_mod =
  601|  95.4k|                        (const UWORD8 *)(gau1_ih264d_submb_indx_mod)
  602|  95.4k|                                        + (uc_sub_mb * 6);
  603|  95.4k|        const UWORD8 * pu1_sub_mb_partw = (const UWORD8 *)gau1_ih264d_submb_partw;
  604|  95.4k|        const UWORD8 * pu1_sub_mb_parth = (const UWORD8 *)gau1_ih264d_submb_parth;
  605|  95.4k|        const UWORD8 * pu1_num_sub_mb_part =
  606|  95.4k|                        (const UWORD8 *)gau1_ih264d_num_submb_part;
  607|       |
  608|       |        /*********************************************************/
  609|       |        /* default initialisations for condition (uc_sub_mb == 0) */
  610|       |        /* i.e. all are subpartitions of 8x8                     */
  611|       |        /*********************************************************/
  612|  95.4k|        u1_sub_mb_mode = 0;
  613|  95.4k|        u1_num_subpart = 1;
  614|  95.4k|        u1_mb_part_width = pu1_mb_partw[u1_mb_type];
  615|  95.4k|        u1_mb_part_height = pu1_mb_parth[u1_mb_type];
  616|  95.4k|        pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + (u1_mb_type << 1);
  617|  95.4k|        u4_sub_mb_num = 0;
  618|       |
  619|       |        /* Loop on number of partitions */
  620|   268k|        for(uc_i = 0, u1_p_idx = 0; uc_i < u1_num_mb_part; uc_i++)
  ------------------
  |  Branch (620:37): [True: 173k, False: 95.4k]
  ------------------
  621|   173k|        {
  622|   173k|            UWORD8 uc_j;
  623|   173k|            if(uc_sub_mb)
  ------------------
  |  Branch (623:16): [True: 61.3k, False: 112k]
  ------------------
  624|  61.3k|            {
  625|  61.3k|                u1_sub_mb_mode = u4_sub_mb_pack >> 24;
  626|  61.3k|                u1_num_subpart = pu1_num_sub_mb_part[u1_sub_mb_mode];
  627|  61.3k|                u1_mb_part_width = pu1_sub_mb_partw[u1_sub_mb_mode];
  628|  61.3k|                u1_mb_part_height = pu1_sub_mb_parth[u1_sub_mb_mode];
  629|  61.3k|                pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + (u1_sub_mb_mode << 1);
  630|  61.3k|                u4_sub_mb_num = u2_sub_mb_num_pack >> 12;
  631|  61.3k|                u4_sub_mb_pack <<= 8;
  632|  61.3k|                u2_sub_mb_num_pack <<= 4;
  633|  61.3k|            }
  634|       |            /* Loop on Number of sub-partitions */
  635|   401k|            for(uc_j = 0; uc_j < u1_num_subpart; uc_j++, pu1_top_left_sub_mb_indx++)
  ------------------
  |  Branch (635:27): [True: 227k, False: 173k]
  ------------------
  636|   227k|            {
  637|   227k|                mv_pred_t * ps_mv;
  638|       |
  639|   227k|                u4_sub_mb_num += *pu1_top_left_sub_mb_indx;
  640|   227k|                ps_mv = ps_mv_start + u4_sub_mb_num;
  641|       |
  642|       |                /* Storing Info for partitions */
  643|   227k|                ps_part->u1_is_direct = PART_NOT_DIRECT;
  ------------------
  |  |  570|   227k|#define PART_NOT_DIRECT                0
  ------------------
  644|   227k|                ps_part->u1_sub_mb_num = u4_sub_mb_num;
  645|   227k|                ps_part->u1_partheight = u1_mb_part_height;
  646|   227k|                ps_part->u1_partwidth = u1_mb_part_width;
  647|       |
  648|       |                /* Increment partition Index */
  649|   227k|                u1_p_idx++;
  650|   227k|                ps_part++;
  651|       |
  652|   227k|                ih264d_get_mvd_cabac(u4_sub_mb_num, 0, u1_mb_part_width,
  653|   227k|                                     u1_mb_part_height, 1, ps_dec, ps_mv);
  654|   227k|            }
  655|   173k|        }
  656|  95.4k|        ps_parse_mb_data->u1_num_part = u1_p_idx;
  657|  95.4k|        ps_dec->ps_part = ps_part;
  658|  95.4k|    }
  659|  95.4k|    {
  660|  95.4k|        UWORD8 u1_cbp;
  661|       |
  662|       |        /* Read the Coded block pattern */
  663|  95.4k|        u1_cbp = (WORD8)ih264d_parse_ctx_cbp_cabac(ps_dec);
  664|  95.4k|        COPYTHECONTEXT("coded_block_pattern", u1_cbp);
  665|  95.4k|        ps_cur_mb_info->u1_cbp = u1_cbp;
  666|  95.4k|        p_curr_ctxt->u1_cbp = u1_cbp;
  667|  95.4k|        p_curr_ctxt->u1_intra_chroma_pred_mode = 0;
  668|  95.4k|        p_curr_ctxt->u1_yuv_dc_csbp &= 0xFE;
  669|  95.4k|        ps_dec->pu1_left_yuv_dc_csbp[0] &= 0x6;
  670|       |
  671|  95.4k|        if(u1_cbp > 47)
  ------------------
  |  Branch (671:12): [True: 0, False: 95.4k]
  ------------------
  672|      0|            return ERROR_CBP;
  673|       |
  674|  95.4k|        ps_cur_mb_info->u1_tran_form8x8 = 0;
  675|  95.4k|        ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
  676|       |
  677|       |        /* Read the transform8x8 u4_flag if present */
  678|  95.4k|        if((ps_dec->s_high_profile.u1_transform8x8_present) && (u1_cbp & 0xf)
  ------------------
  |  Branch (678:12): [True: 18.3k, False: 77.0k]
  |  Branch (678:64): [True: 10.5k, False: 7.80k]
  ------------------
  679|  10.5k|                        && u1_no_submb_part_size_lt8x8_flag)
  ------------------
  |  Branch (679:28): [True: 9.46k, False: 1.05k]
  ------------------
  680|  9.46k|        {
  681|  9.46k|            ps_cur_mb_info->u1_tran_form8x8 = ih264d_parse_transform8x8flag_cabac(
  682|  9.46k|                            ps_dec, ps_cur_mb_info);
  683|  9.46k|            COPYTHECONTEXT("transform_size_8x8_flag", ps_cur_mb_info->u1_tran_form8x8);
  684|  9.46k|            p_curr_ctxt->u1_transform8x8_ctxt = ps_cur_mb_info->u1_tran_form8x8;
  685|  9.46k|            ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = ps_cur_mb_info->u1_tran_form8x8;
  686|       |
  687|  9.46k|        }
  688|  85.9k|        else
  689|  85.9k|        {
  690|  85.9k|            p_curr_ctxt->u1_transform8x8_ctxt = 0;
  691|  85.9k|        }
  692|       |
  693|       |        /* Read mb_qp_delta */
  694|  95.4k|        if(u1_cbp)
  ------------------
  |  Branch (694:12): [True: 32.5k, False: 62.9k]
  ------------------
  695|  32.5k|        {
  696|  32.5k|            WORD8 c_temp;
  697|  32.5k|            ret = ih264d_parse_mb_qp_delta_cabac(ps_dec, &c_temp);
  698|  32.5k|            if(ret != OK)
  ------------------
  |  |  114|  32.5k|#define OK        0
  ------------------
  |  Branch (698:16): [True: 72, False: 32.4k]
  ------------------
  699|     72|                return ret;
  700|  32.4k|            COPYTHECONTEXT("mb_qp_delta", c_temp);
  701|  32.4k|            if(c_temp != 0)
  ------------------
  |  Branch (701:16): [True: 4.45k, False: 27.9k]
  ------------------
  702|  4.45k|            {
  703|  4.45k|                ret = ih264d_update_qp(ps_dec, c_temp);
  704|  4.45k|                if(ret != OK)
  ------------------
  |  |  114|  4.45k|#define OK        0
  ------------------
  |  Branch (704:20): [True: 0, False: 4.45k]
  ------------------
  705|      0|                    return ret;
  706|  4.45k|            }
  707|  32.4k|        }
  708|  62.9k|        else
  709|  62.9k|            ps_dec->i1_prev_mb_qp_delta = 0;
  710|       |
  711|       |
  712|       |
  713|  95.3k|        ih264d_parse_residual4x4_cabac(ps_dec, ps_cur_mb_info, 0);
  714|  95.3k|        if(EXCEED_OFFSET(ps_dec->ps_bitstrm))
  ------------------
  |  |   93|  95.3k|  (ps_bitstrm->u4_ofst > ps_bitstrm->u4_max_ofst)
  |  |  ------------------
  |  |  |  Branch (93:3): [True: 2.40k, False: 92.9k]
  |  |  ------------------
  ------------------
  715|  2.40k|            return ERROR_EOB_TERMINATE_T;
  716|  95.3k|    }
  717|  92.9k|    return OK;
  ------------------
  |  |  114|  92.9k|#define OK        0
  ------------------
  718|  95.3k|}
ih264d_update_nnz_for_skipmb:
  755|  13.2M|{
  756|  13.2M|    UWORD32 *pu4_buf;
  757|  13.2M|    UWORD8 *pu1_buf;
  758|  13.2M|    UNUSED(u1_entrpy);
  ------------------
  |  |   45|  13.2M|#define UNUSED(x) ((void)(x))
  ------------------
  759|  13.2M|    pu1_buf = ps_dec->pu1_left_nnz_y;
  760|  13.2M|    pu4_buf = (UWORD32 *)pu1_buf;
  761|  13.2M|    *pu4_buf = 0;
  762|  13.2M|    pu1_buf = ps_dec->pu1_left_nnz_uv;
  763|  13.2M|    pu4_buf = (UWORD32 *)pu1_buf;
  764|  13.2M|    *pu4_buf = 0;
  765|  13.2M|    pu1_buf = ps_cur_mb_info->ps_curmb->pu1_nnz_y;
  766|  13.2M|    pu4_buf = (UWORD32 *)pu1_buf;
  767|  13.2M|    *pu4_buf = 0;
  768|  13.2M|    pu1_buf = ps_cur_mb_info->ps_curmb->pu1_nnz_uv;
  769|  13.2M|    pu4_buf = (UWORD32 *)pu1_buf;
  770|  13.2M|    *pu4_buf = 0;
  771|  13.2M|    ps_cur_mb_info->ps_curmb->u2_luma_csbp = 0;
  772|  13.2M|    ps_cur_mb_info->u2_luma_csbp = 0;
  773|  13.2M|    ps_cur_mb_info->u2_chroma_csbp = 0;
  774|  13.2M|}

ih264d_form_pred_weight_matrix:
   90|  16.6k|{
   91|  16.6k|    dec_slice_params_t *ps_cur_slice;
   92|  16.6k|    UWORD8 uc_num_ref_idx_l0_active, uc_num_ref_idx_l1_active;
   93|  16.6k|    UWORD8 i, j;
   94|  16.6k|    UWORD32 *pu4_mat_iwt_ofst;
   95|  16.6k|    UWORD16 i2_idx;
   96|  16.6k|    UWORD32 *pui32_weight_offset_l0, *pui32_weight_offset_l1;
   97|  16.6k|    UWORD32 u4_temp;
   98|       |
   99|  16.6k|    ps_cur_slice = ps_dec->ps_cur_slice;
  100|  16.6k|    uc_num_ref_idx_l0_active = ps_cur_slice->u1_num_ref_idx_lx_active[0];
  101|  16.6k|    uc_num_ref_idx_l1_active = ps_cur_slice->u1_num_ref_idx_lx_active[1];
  102|       |
  103|  16.6k|    pu4_mat_iwt_ofst = ps_dec->pu4_wts_ofsts_mat;
  104|       |
  105|  16.6k|    if(ps_cur_slice->u1_slice_type == B_SLICE)
  ------------------
  |  |  369|  16.6k|#define B_SLICE  1
  ------------------
  |  Branch (105:8): [True: 3.78k, False: 12.8k]
  ------------------
  106|  3.78k|    {
  107|  13.5k|        for(i = 0; i < uc_num_ref_idx_l0_active; i++)
  ------------------
  |  Branch (107:20): [True: 9.73k, False: 3.78k]
  ------------------
  108|  9.73k|        {
  109|  9.73k|            pui32_weight_offset_l0 = ps_cur_slice->u4_wt_ofst_lx[0][i];
  110|  33.5k|            for(j = 0; j < uc_num_ref_idx_l1_active; j++)
  ------------------
  |  Branch (110:24): [True: 23.8k, False: 9.73k]
  ------------------
  111|  23.8k|            {
  112|  23.8k|                pui32_weight_offset_l1 = ps_cur_slice->u4_wt_ofst_lx[1][j];
  113|  23.8k|                i2_idx = i * uc_num_ref_idx_l0_active + j;
  114|  23.8k|                i2_idx = X3(i2_idx);
  ------------------
  |  |   92|  23.8k|#define X3(a)   (((a) << 1) + (a))
  ------------------
  115|       |                /*        u4_temp = (pui32_weight_offset_l0[0] | (pui32_weight_offset_l1[0] << 16));
  116|       |                 pu4_mat_iwt_ofst[0] = u4_temp;
  117|       |                 u4_temp = (pui32_weight_offset_l0[1] | (pui32_weight_offset_l1[1] << 16));
  118|       |                 pu4_mat_iwt_ofst[1] = u4_temp;
  119|       |                 u4_temp = (pui32_weight_offset_l0[2] | (pui32_weight_offset_l1[2] << 16));
  120|       |                 pu4_mat_iwt_ofst[2] = u4_temp;
  121|       |                 pu4_mat_iwt_ofst += 3;*/
  122|  23.8k|                pu4_mat_iwt_ofst[0] = pui32_weight_offset_l0[0];
  123|  23.8k|                pu4_mat_iwt_ofst[1] = pui32_weight_offset_l1[0];
  124|  23.8k|                pu4_mat_iwt_ofst[2] = pui32_weight_offset_l0[1];
  125|  23.8k|                pu4_mat_iwt_ofst[3] = pui32_weight_offset_l1[1];
  126|  23.8k|                pu4_mat_iwt_ofst[4] = pui32_weight_offset_l0[2];
  127|  23.8k|                pu4_mat_iwt_ofst[5] = pui32_weight_offset_l1[2];
  128|  23.8k|                pu4_mat_iwt_ofst += 6;
  129|  23.8k|            }
  130|  9.73k|        }
  131|  3.78k|    }
  132|  12.8k|    else
  133|  12.8k|    {
  134|  34.5k|        for(i = 0; i < uc_num_ref_idx_l0_active; i++)
  ------------------
  |  Branch (134:20): [True: 21.6k, False: 12.8k]
  ------------------
  135|  21.6k|        {
  136|  21.6k|            pui32_weight_offset_l0 = ps_cur_slice->u4_wt_ofst_lx[0][i];
  137|  21.6k|            i2_idx = X3(i);
  ------------------
  |  |   92|  21.6k|#define X3(a)   (((a) << 1) + (a))
  ------------------
  138|  21.6k|            u4_temp = (UWORD32)pui32_weight_offset_l0[0];
  139|  21.6k|            pu4_mat_iwt_ofst[0] = u4_temp;
  140|  21.6k|            u4_temp = (UWORD32)pui32_weight_offset_l0[1];
  141|  21.6k|            pu4_mat_iwt_ofst[2] = u4_temp;
  142|  21.6k|            u4_temp = (UWORD32)pui32_weight_offset_l0[2];
  143|  21.6k|            pu4_mat_iwt_ofst[4] = u4_temp;
  144|  21.6k|            pu4_mat_iwt_ofst += 6;
  145|  21.6k|        }
  146|  12.8k|    }
  147|  16.6k|}
ih264d_end_of_pic_dispbuf_mgr:
  750|   126k|{
  751|   126k|    dec_slice_params_t *ps_cur_slice = ps_dec->ps_cur_slice;
  752|   126k|    UWORD8 u1_num_of_users = 0;
  753|   126k|    WORD32 ret;
  754|       |
  755|   126k|    H264_MUTEX_LOCK(&ps_dec->process_disp_mutex);
  756|   126k|    if(1)
  ------------------
  |  Branch (756:8): [True: 126k, Folded]
  ------------------
  757|   126k|    {
  758|       |
  759|   126k|        {
  760|   126k|            ih264d_delete_nonref_nondisplay_pics(ps_dec->ps_dpb_mgr);
  761|   126k|            if(ps_cur_slice->u1_mmco_equalto5
  ------------------
  |  Branch (761:16): [True: 2.51k, False: 123k]
  ------------------
  762|   123k|                            || (ps_cur_slice->u1_nal_unit_type == IDR_SLICE_NAL))
  ------------------
  |  |  328|   123k|#define IDR_SLICE_NAL                   5
  ------------------
  |  Branch (762:32): [True: 104k, False: 19.1k]
  ------------------
  763|   107k|            {
  764|   107k|                ps_dec->ps_cur_pic->i4_poc = 0;
  765|   107k|                if(ps_dec->u4_total_mbs_coded
  ------------------
  |  Branch (765:20): [True: 107k, False: 104]
  ------------------
  766|   107k|                                == (ps_dec->ps_cur_sps->u4_max_mb_addr + 1))
  767|   107k|                    ih264d_reset_ref_bufs(ps_dec->ps_dpb_mgr);
  768|   107k|                ih264d_release_display_bufs(ps_dec);
  769|   107k|            }
  770|   126k|            if(IVD_DECODE_FRAME_OUT != ps_dec->e_frm_out_mode)
  ------------------
  |  Branch (770:16): [True: 126k, False: 0]
  ------------------
  771|   126k|            {
  772|   126k|                ret = ih264d_assign_display_seq(ps_dec);
  773|   126k|                if(ret != OK)
  ------------------
  |  |  114|   126k|#define OK        0
  ------------------
  |  Branch (773:20): [True: 0, False: 126k]
  ------------------
  774|      0|                    return ret;
  775|   126k|            }
  776|   126k|        }
  777|       |
  778|   126k|        if(ps_cur_slice->u1_nal_ref_idc)
  ------------------
  |  Branch (778:12): [True: 109k, False: 16.6k]
  ------------------
  779|   109k|        {
  780|       |            /* Mark pic buf as needed for reference */
  781|   109k|            ih264_buf_mgr_set_status((buf_mgr_t *)ps_dec->pv_pic_buf_mgr,
  782|   109k|                                     ps_dec->u1_pic_buf_id,
  783|   109k|                                     BUF_MGR_REF);
  ------------------
  |  |   50|   109k|#define BUF_MGR_REF          (1 << 2)
  ------------------
  784|       |            /* Mark mv buf as needed for reference */
  785|   109k|            ih264_buf_mgr_set_status((buf_mgr_t *)ps_dec->pv_mv_buf_mgr,
  786|   109k|                                     ps_dec->as_buf_id_info_map[ps_dec->u1_pic_buf_id].mv_buf_id,
  787|   109k|                                     BUF_MGR_REF);
  ------------------
  |  |   50|   109k|#define BUF_MGR_REF          (1 << 2)
  ------------------
  788|   109k|            ps_dec->au1_pic_buf_ref_flag[ps_dec->u1_pic_buf_id] = 1;
  789|   109k|        }
  790|       |
  791|       |        /* 420 consumer */
  792|       |        /* Increment the number of users by 1 for display based upon */
  793|       |        /*the SEEK KEY FRAME control sent to decoder                 */
  794|   126k|        if(((0 == ps_dec->u1_last_pic_not_decoded)
  ------------------
  |  Branch (794:13): [True: 126k, False: 0]
  ------------------
  795|   126k|                        && (0
  ------------------
  |  Branch (795:28): [True: 126k, False: 0]
  ------------------
  796|   126k|                                        == (ps_dec->ps_cur_pic->u4_pack_slc_typ
  797|   126k|                                                        & ps_dec->u4_skip_frm_mask)))
  798|      0|                        || (ps_cur_slice->u1_nal_unit_type == IDR_SLICE_NAL))
  ------------------
  |  |  328|      0|#define IDR_SLICE_NAL                   5
  ------------------
  |  Branch (798:28): [True: 0, False: 0]
  ------------------
  799|   126k|        {
  800|       |            /* Mark pic buf as needed for display */
  801|   126k|            ih264_buf_mgr_set_status((buf_mgr_t *)ps_dec->pv_pic_buf_mgr,
  802|   126k|                                     ps_dec->u1_pic_buf_id,
  803|   126k|                                     BUF_MGR_IO);
  ------------------
  |  |   53|   126k|#define BUF_MGR_IO           (1 << 3)
  ------------------
  804|       |
  805|   126k|        }
  806|       |
  807|   126k|        if(!ps_cur_slice->u1_field_pic_flag
  ------------------
  |  Branch (807:12): [True: 126k, False: 0]
  ------------------
  808|      0|                        || ((TOP_FIELD_ONLY | BOT_FIELD_ONLY)
  ------------------
  |  |   65|      0|#define TOP_FIELD_ONLY      0x02
  ------------------
                                      || ((TOP_FIELD_ONLY | BOT_FIELD_ONLY)
  ------------------
  |  |   66|      0|#define BOT_FIELD_ONLY      0x01
  ------------------
  |  Branch (808:28): [True: 0, False: 0]
  ------------------
  809|      0|                                        != ps_dec->u1_top_bottom_decoded))
  810|   126k|        {
  811|   126k|            pic_buffer_t *ps_cur_pic = ps_dec->ps_cur_pic;
  812|   126k|            ps_cur_pic->u2_disp_width = ps_dec->u2_disp_width;
  813|   126k|            ps_cur_pic->u2_disp_height = ps_dec->u2_disp_height >> 1;
  814|       |
  815|   126k|            ps_cur_pic->u2_crop_offset_y = ps_dec->u2_crop_offset_y;
  816|   126k|            ps_cur_pic->u2_crop_offset_uv = ps_dec->u2_crop_offset_uv;
  817|   126k|            ps_cur_pic->u1_pic_type = 0;
  818|   126k|            {
  819|   126k|                WORD64 i8_display_poc;
  820|   126k|                i8_display_poc = (WORD64)ps_dec->i4_prev_max_display_seq +
  821|   126k|                            ps_dec->ps_cur_pic->i4_poc;
  822|   126k|                if(IS_OUT_OF_RANGE_S32(i8_display_poc))
  ------------------
  |  |   58|   126k|#define IS_OUT_OF_RANGE_S32(a) (((a) < INT32_MIN) || ((a) > INT32_MAX))
  |  |  ------------------
  |  |  |  Branch (58:33): [True: 0, False: 126k]
  |  |  |  Branch (58:54): [True: 257, False: 126k]
  |  |  ------------------
  ------------------
  823|    257|                {
  824|    257|                    ps_dec->i4_prev_max_display_seq = 0;
  825|    257|                }
  826|   126k|            }
  827|   126k|            ret = ih264d_insert_pic_in_display_list(
  828|   126k|                            ps_dec->ps_dpb_mgr,
  829|   126k|                            ps_dec->u1_pic_buf_id,
  830|   126k|                            ps_dec->i4_prev_max_display_seq
  831|   126k|                                            + ps_dec->ps_cur_pic->i4_poc,
  832|   126k|                            ps_dec->ps_cur_pic->i4_frame_num);
  833|   126k|            if(ret != OK)
  ------------------
  |  |  114|   126k|#define OK        0
  ------------------
  |  Branch (833:16): [True: 0, False: 126k]
  ------------------
  834|      0|                return ret;
  835|       |
  836|   126k|            {
  837|   126k|                ivd_video_decode_op_t * ps_dec_output =
  838|   126k|                                (ivd_video_decode_op_t *)ps_dec->pv_dec_out;
  839|       |
  840|   126k|                ps_dec_output->u4_frame_decoded_flag = 1;
  841|   126k|            }
  842|   126k|            if(ps_dec->au1_pic_buf_ref_flag[ps_dec->u1_pic_buf_id] == 0)
  ------------------
  |  Branch (842:16): [True: 16.6k, False: 109k]
  ------------------
  843|  16.6k|            {
  844|  16.6k|                ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_mv_buf_mgr,
  845|  16.6k|                                      ps_dec->as_buf_id_info_map[ps_dec->u1_pic_buf_id].mv_buf_id,
  846|  16.6k|                                      BUF_MGR_REF);
  ------------------
  |  |   50|  16.6k|#define BUF_MGR_REF          (1 << 2)
  ------------------
  847|  16.6k|                ps_dec->au1_pic_buf_ref_flag[ps_dec->u1_pic_buf_id] = 0;
  848|       |
  849|  16.6k|            }
  850|   126k|        }
  851|      0|        else
  852|      0|        {
  853|      0|            H264_DEC_DEBUG_PRINT("pic not inserted display %d %d\n",
  ------------------
  |  |   39|      0|#define H264_DEC_DEBUG_PRINT(...) {}
  ------------------
  854|      0|                                 ps_cur_slice->u1_field_pic_flag,
  855|      0|                                 ps_dec->u1_second_field);
  856|      0|        }
  857|       |
  858|   126k|        if(!ps_cur_slice->u1_field_pic_flag
  ------------------
  |  Branch (858:12): [True: 126k, False: 0]
  ------------------
  859|      0|                        || ((TOP_FIELD_ONLY | BOT_FIELD_ONLY)
  ------------------
  |  |   65|      0|#define TOP_FIELD_ONLY      0x02
  ------------------
                                      || ((TOP_FIELD_ONLY | BOT_FIELD_ONLY)
  ------------------
  |  |   66|      0|#define BOT_FIELD_ONLY      0x01
  ------------------
  |  Branch (859:28): [True: 0, False: 0]
  ------------------
  860|      0|                                        == ps_dec->u1_top_bottom_decoded))
  861|   126k|        {
  862|   126k|            if(IVD_DECODE_FRAME_OUT == ps_dec->e_frm_out_mode)
  ------------------
  |  Branch (862:16): [True: 0, False: 126k]
  ------------------
  863|      0|            {
  864|      0|                ret = ih264d_assign_display_seq(ps_dec);
  865|      0|                if(ret != OK)
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
  |  Branch (865:20): [True: 0, False: 0]
  ------------------
  866|      0|                    return ret;
  867|      0|            }
  868|   126k|        }
  869|   126k|    }
  870|       |
  871|   126k|    H264_MUTEX_UNLOCK(&ps_dec->process_disp_mutex);
  872|       |
  873|   126k|    return OK;
  ------------------
  |  |  114|   126k|#define OK        0
  ------------------
  874|   126k|}
ih264d_deblock_picture:
  894|   126k|{
  895|   126k|    dec_struct_t *ps_dec = (dec_struct_t *)ptr;
  896|       |
  897|   126k|    {
  898|       |        /*Deblock picture only if all the mb's in the frame have been decoded*/
  899|   126k|        if(ps_dec->u1_pic_decode_done == 1)
  ------------------
  |  Branch (899:12): [True: 126k, False: 104]
  ------------------
  900|   126k|        {
  901|   126k|            if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag
  ------------------
  |  Branch (901:16): [True: 0, False: 126k]
  ------------------
  902|   126k|                            || ps_dec->ps_cur_slice->u1_field_pic_flag)
  ------------------
  |  Branch (902:32): [True: 0, False: 126k]
  ------------------
  903|      0|            {
  904|      0|                ps_dec->p_DeblockPicture[ps_dec->ps_cur_slice->u1_mbaff_frame_flag](
  905|      0|                                ps_dec);
  906|      0|            }
  907|   126k|            else
  908|       |
  909|   126k|            {
  910|       |
  911|   126k|                ih264d_deblock_picture_progressive(ps_dec);
  912|   126k|            }
  913|       |
  914|   126k|        }
  915|   126k|    }
  916|       |
  917|   126k|}
ih264d_deblock_display:
  930|   126k|{
  931|   126k|    WORD32 ret;
  932|       |    /* Call deblocking */
  933|   126k|    ih264d_deblock_picture(ps_dec);
  934|       |
  935|   126k|    ret = ih264d_end_of_pic_dispbuf_mgr(ps_dec);
  936|   126k|    if(ret != OK)
  ------------------
  |  |  114|   126k|#define OK        0
  ------------------
  |  Branch (936:8): [True: 0, False: 126k]
  ------------------
  937|      0|        return ret;
  938|       |
  939|   126k|    return OK;
  ------------------
  |  |  114|   126k|#define OK        0
  ------------------
  940|   126k|}
ih264d_end_of_pic:
  956|   126k|{
  957|   126k|    dec_slice_params_t *ps_cur_slice = ps_dec->ps_cur_slice;
  958|   126k|    WORD32 ret;
  959|       |
  960|   126k|    {
  961|   126k|        dec_err_status_t * ps_err = ps_dec->ps_dec_err_status;
  962|   126k|        if(ps_err->u1_err_flag & REJECT_CUR_PIC)
  ------------------
  |  |  602|   126k|#define REJECT_CUR_PIC    (0x01)
  ------------------
  |  Branch (962:12): [True: 0, False: 126k]
  ------------------
  963|      0|        {
  964|      0|            ih264d_err_pic_dispbuf_mgr(ps_dec);
  965|      0|            return ERROR_NEW_FRAME_EXPECTED;
  966|      0|        }
  967|   126k|    }
  968|       |
  969|   126k|    H264_MUTEX_LOCK(&ps_dec->process_disp_mutex);
  970|   126k|    ret = ih264d_end_of_pic_processing(ps_dec);
  971|   126k|    if(ret != OK)
  ------------------
  |  |  114|   126k|#define OK        0
  ------------------
  |  Branch (971:8): [True: 0, False: 126k]
  ------------------
  972|      0|        return ret;
  973|       |    /*--------------------------------------------------------------------*/
  974|       |    /* ih264d_decode_pic_order_cnt - calculate the Pic Order Cnt                    */
  975|       |    /* Needed to detect end of picture                                    */
  976|       |    /*--------------------------------------------------------------------*/
  977|       |
  978|   126k|    H264_MUTEX_UNLOCK(&ps_dec->process_disp_mutex);
  979|       |
  980|   126k|    return OK;
  ------------------
  |  |  114|   126k|#define OK        0
  ------------------
  981|   126k|}
ih264d_fix_error_in_dpb:
  997|   136k|{
  998|       |    /*--------------------------------------------------------------------*/
  999|       |    /* If there is common node in lt_list and st_list then delete it from */
 1000|       |    /* st_list                                                            */
 1001|       |    /*--------------------------------------------------------------------*/
 1002|   136k|    UWORD8 no_of_nodes_deleted = 0;
 1003|   136k|    UWORD8 lt_ref_num = ps_dec->ps_dpb_mgr->u1_num_lt_ref_bufs;
 1004|   136k|    struct dpb_info_t *ps_lt_curr_dpb = ps_dec->ps_dpb_mgr->ps_dpb_ht_head;
 1005|   153k|    while(lt_ref_num && ps_lt_curr_dpb)
  ------------------
  |  Branch (1005:11): [True: 17.1k, False: 136k]
  |  Branch (1005:25): [True: 17.1k, False: 0]
  ------------------
 1006|  17.1k|    {
 1007|  17.1k|        if(ps_dec->ps_dpb_mgr->ps_dpb_st_head
  ------------------
  |  Branch (1007:12): [True: 1.57k, False: 15.5k]
  ------------------
 1008|  1.57k|                && ((ps_lt_curr_dpb->s_bot_field.u1_reference_info
  ------------------
  |  Branch (1008:20): [True: 0, False: 1.57k]
  ------------------
 1009|  1.57k|                        | ps_lt_curr_dpb->s_top_field.u1_reference_info)
 1010|  1.57k|                        == (IS_SHORT_TERM | IS_LONG_TERM)))
  ------------------
  |  |  596|  1.57k|#define IS_SHORT_TERM  1
  ------------------
                                      == (IS_SHORT_TERM | IS_LONG_TERM)))
  ------------------
  |  |  597|  1.57k|#define IS_LONG_TERM   2
  ------------------
 1011|      0|        {
 1012|      0|            struct dpb_info_t *ps_st_next_dpb = ps_dec->ps_dpb_mgr->ps_dpb_st_head;
 1013|      0|            struct dpb_info_t *ps_st_curr_dpb = ps_dec->ps_dpb_mgr->ps_dpb_st_head;
 1014|      0|            UWORD8 st_ref_num = ps_dec->ps_dpb_mgr->u1_num_st_ref_bufs;
 1015|      0|            while(st_ref_num && ps_st_curr_dpb)
  ------------------
  |  Branch (1015:19): [True: 0, False: 0]
  |  Branch (1015:33): [True: 0, False: 0]
  ------------------
 1016|      0|            {
 1017|      0|                if(ps_st_curr_dpb == ps_lt_curr_dpb)
  ------------------
  |  Branch (1017:20): [True: 0, False: 0]
  ------------------
 1018|      0|                {
 1019|      0|                    if(st_ref_num == ps_dec->ps_dpb_mgr->u1_num_st_ref_bufs)
  ------------------
  |  Branch (1019:24): [True: 0, False: 0]
  ------------------
 1020|      0|                    {
 1021|      0|                        ps_dec->ps_dpb_mgr->ps_dpb_st_head =
 1022|      0|                                ps_dec->ps_dpb_mgr->ps_dpb_st_head->ps_prev_short;
 1023|      0|                        ps_st_curr_dpb = ps_dec->ps_dpb_mgr->ps_dpb_st_head;
 1024|      0|                    }
 1025|      0|                    else
 1026|      0|                    {
 1027|      0|                        ps_st_next_dpb->ps_prev_short = ps_st_curr_dpb->ps_prev_short;
 1028|      0|                    }
 1029|      0|                    ps_dec->ps_dpb_mgr->u1_num_st_ref_bufs--;
 1030|      0|                    no_of_nodes_deleted++;
 1031|      0|                    break;
 1032|      0|                }
 1033|      0|                ps_st_next_dpb = ps_st_curr_dpb;
 1034|      0|                ps_st_curr_dpb = ps_st_curr_dpb->ps_prev_short;
 1035|      0|                st_ref_num--;
 1036|      0|            }
 1037|      0|        }
 1038|  17.1k|        ps_lt_curr_dpb = ps_lt_curr_dpb->ps_prev_long;
 1039|  17.1k|        lt_ref_num--;
 1040|  17.1k|    }
 1041|   136k|    return no_of_nodes_deleted;
 1042|   136k|}

ih264d_decode_spatial_direct:
   75|   602k|{
   76|   602k|    mv_pred_t s_mv_pred, *ps_mv;
   77|   602k|    UWORD32 u4_sub_mb_num;
   78|   602k|    UWORD8 u1_col_zero_flag, u1_direct_zero_pred_flag = 0;
   79|   602k|    UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
   80|   602k|    mv_pred_t *ps_mv_ntop_start;
   81|   602k|    mv_pred_t *ps_mv_nmb_start = ps_dec->ps_mv_cur + (u4_mb_num << 4);
   82|   602k|    UWORD8 partition_size, sub_partition, u1_mb_partw, u1_mb_parth;
   83|   602k|    UWORD8 i;
   84|   602k|    WORD8 i1_pred, i1_ref_frame0, i1_ref_frame1;
   85|   602k|    struct pic_buffer_t *ps_ref_frame = NULL, *ps_col_pic, *ps_pic_buff0 = NULL,
   86|   602k|                    *ps_pic_buff1 = NULL;
   87|       |
   88|   602k|    UWORD8 u1_zero_pred_cond_f, u1_zero_pred_cond_b;
   89|   602k|    WORD16 i2_def_mv[2], i2_spat_pred_mv[4], *pi2_final_mv0, *pi2_final_mv1;
   90|   602k|    UWORD16 ui2_mask_fwd = 0, ui2_mask_bwd = 0, u2_mask = 0;
   91|   602k|    UWORD32 *pui32_weight_ofsts = NULL;
   92|   602k|    directmv_t s_mvdirect;
   93|   602k|    UWORD8 u1_colz;
   94|   602k|    UWORD8 u1_final_ref_idx = 0;
   95|   602k|    const UWORD8 *pu1_mb_parth = (const UWORD8 *)gau1_ih264d_mb_parth;
   96|   602k|    const UWORD8 *pu1_mb_partw = (const UWORD8 *)gau1_ih264d_mb_partw;
   97|   602k|    const UWORD16 sub_mask_table[] =
   98|   602k|        { 0x33, 0x3, 0x11, 0x1 };
   99|   602k|    const UWORD16 mask_table[] =
  100|   602k|        { 0xffff, /*16x16 NA */
  101|   602k|          0xff, /* 16x8*/
  102|   602k|          0x3333, /* 8x16*/
  103|   602k|          0x33 };/* 8x8*/
  104|   602k|    mv_pred_t s_temp_mv_pred;
  105|   602k|    WORD32 ret = 0;
  106|       |
  107|       |    /* CHANGED CODE */
  108|   602k|    ps_mv_ntop_start = ps_dec->ps_mv_cur + (u4_mb_num << 4)
  109|   602k|                    - (ps_dec->u2_frm_wd_in_mbs << (4 + u1_mbaff)) + 12;
  110|       |
  111|       |    /* assign default values for MotionVector as zero */
  112|   602k|    i2_def_mv[0] = 0;
  113|   602k|    i2_def_mv[1] = 0;
  114|       |
  115|   602k|    u1_direct_zero_pred_flag = ps_dec->pf_mvpred(ps_dec, ps_cur_mb_info, ps_mv_nmb_start,
  116|   602k|                                              ps_mv_ntop_start, &s_mv_pred, 0, 4,
  117|   602k|                                              0, 1, B_DIRECT_SPATIAL);
  ------------------
  |  |  489|   602k|#define B_DIRECT_SPATIAL  26
  ------------------
  118|       |
  119|   602k|    i2_spat_pred_mv[0] = s_mv_pred.i2_mv[0];
  120|   602k|    i2_spat_pred_mv[1] = s_mv_pred.i2_mv[1];
  121|   602k|    i2_spat_pred_mv[2] = s_mv_pred.i2_mv[2];
  122|   602k|    i2_spat_pred_mv[3] = s_mv_pred.i2_mv[3];
  123|       |
  124|   602k|    i1_ref_frame0 = s_mv_pred.i1_ref_frame[0];
  125|   602k|    i1_ref_frame1 = s_mv_pred.i1_ref_frame[1];
  126|       |
  127|   602k|    i1_ref_frame0 = (i1_ref_frame0 < 0) ? -1 : i1_ref_frame0;
  ------------------
  |  Branch (127:21): [True: 37.6k, False: 565k]
  ------------------
  128|   602k|    i1_ref_frame1 = (i1_ref_frame1 < 0) ? -1 : i1_ref_frame1;
  ------------------
  |  Branch (128:21): [True: 19.1k, False: 583k]
  ------------------
  129|       |
  130|   602k|    i1_pred = 0;
  131|       |
  132|   602k|    {
  133|   602k|        WORD8 u1_ref_idx, u1_ref_idx1;
  134|   602k|        UWORD32 uc_Idx, uc_Idx1;
  135|   602k|        UWORD8 u1_scale_ref = (ps_dec->ps_cur_slice->u1_mbaff_frame_flag
  ------------------
  |  Branch (135:32): [True: 0, False: 602k]
  ------------------
  136|      0|                        && ps_cur_mb_info->u1_mb_field_decodingflag);
  ------------------
  |  Branch (136:28): [True: 0, False: 0]
  ------------------
  137|   602k|        u1_final_ref_idx = i1_ref_frame0;
  138|   602k|        if(i1_ref_frame0 >= 0)
  ------------------
  |  Branch (138:12): [True: 565k, False: 37.6k]
  ------------------
  139|   565k|        {
  140|       |            /* convert RefIdx if it is MbAff */
  141|   565k|            u1_ref_idx = i1_ref_frame0;
  142|   565k|            u1_ref_idx1 = i1_ref_frame0;
  143|   565k|            if(u1_scale_ref)
  ------------------
  |  Branch (143:16): [True: 0, False: 565k]
  ------------------
  144|      0|            {
  145|      0|                u1_ref_idx1 = u1_ref_idx >> 1;
  146|      0|                if((u1_ref_idx & 0x01) != (1 - ps_cur_mb_info->u1_topmb))
  ------------------
  |  Branch (146:20): [True: 0, False: 0]
  ------------------
  147|      0|                    u1_ref_idx1 += MAX_REF_BUFS;
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
  148|      0|            }
  149|       |            /* If i1_ref_frame0 < 0 then refIdxCol is obtained from ps_pic_buff1 */
  150|   565k|            ps_pic_buff0 = ps_dec->ps_ref_pic_buf_lx[0][u1_ref_idx1];
  151|   565k|            ps_ref_frame = ps_pic_buff0;
  152|   565k|            i1_pred = PRED_L0;
  ------------------
  |  |  483|   565k|#define PRED_L0   1
  ------------------
  153|   565k|        }
  154|       |
  155|   602k|        if(i1_ref_frame1 >= 0)
  ------------------
  |  Branch (155:12): [True: 583k, False: 19.1k]
  ------------------
  156|   583k|        {
  157|       |            /* convert RefIdx if it is MbAff */
  158|   583k|            u1_ref_idx = i1_ref_frame1;
  159|   583k|            u1_ref_idx1 = i1_ref_frame1;
  160|   583k|            if(u1_scale_ref)
  ------------------
  |  Branch (160:16): [True: 0, False: 583k]
  ------------------
  161|      0|            {
  162|      0|                u1_ref_idx1 = u1_ref_idx >> 1;
  163|      0|                if((u1_ref_idx & 0x01) != (1 - ps_cur_mb_info->u1_topmb))
  ------------------
  |  Branch (163:20): [True: 0, False: 0]
  ------------------
  164|      0|                    u1_ref_idx1 += MAX_REF_BUFS;
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
  165|      0|            }
  166|   583k|            ps_pic_buff1 = ps_dec->ps_ref_pic_buf_lx[1][u1_ref_idx1];
  167|   583k|            i1_pred = i1_pred | PRED_L1;
  ------------------
  |  |  484|   583k|#define PRED_L1   2
  ------------------
  168|   583k|        }
  169|   602k|        if(i1_ref_frame0 < 0)
  ------------------
  |  Branch (169:12): [True: 37.6k, False: 565k]
  ------------------
  170|  37.6k|        {
  171|  37.6k|            ps_ref_frame = ps_pic_buff1;
  172|  37.6k|            u1_final_ref_idx = i1_ref_frame1;
  173|  37.6k|        }
  174|       |
  175|   602k|        u1_zero_pred_cond_f = (u1_direct_zero_pred_flag) || (i1_ref_frame0 < 0);
  ------------------
  |  Branch (175:31): [True: 9.36k, False: 593k]
  |  Branch (175:61): [True: 37.6k, False: 555k]
  ------------------
  176|   602k|        u1_zero_pred_cond_b = (u1_direct_zero_pred_flag) || (i1_ref_frame1 < 0);
  ------------------
  |  Branch (176:31): [True: 9.36k, False: 593k]
  |  Branch (176:61): [True: 19.1k, False: 574k]
  ------------------
  177|       |
  178|   602k|        if(ps_dec->ps_cur_pps->u1_wted_bipred_idc)
  ------------------
  |  Branch (178:12): [True: 477k, False: 125k]
  ------------------
  179|   477k|        {
  180|   477k|            uc_Idx = ((i1_ref_frame0 < 1) ? 0 : i1_ref_frame0)
  ------------------
  |  Branch (180:23): [True: 475k, False: 2.14k]
  ------------------
  181|   477k|                            * ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1];
  182|   477k|            if(u1_scale_ref)
  ------------------
  |  Branch (182:16): [True: 0, False: 477k]
  ------------------
  183|      0|                uc_Idx >>= 1;
  184|   477k|            uc_Idx1 = (i1_ref_frame1 < 0) ? 0 : i1_ref_frame1;
  ------------------
  |  Branch (184:23): [True: 12.2k, False: 465k]
  ------------------
  185|   477k|            uc_Idx += (u1_scale_ref) ? (uc_Idx1 >> 1) : uc_Idx1;
  ------------------
  |  Branch (185:23): [True: 0, False: 477k]
  ------------------
  186|   477k|            pui32_weight_ofsts =
  187|   477k|                            (UWORD32*)&ps_dec->pu4_wt_ofsts[2 * X3(uc_Idx)];
  ------------------
  |  |   92|   477k|#define X3(a)   (((a) << 1) + (a))
  ------------------
  188|       |
  189|   477k|            if(i1_ref_frame0 < 0)
  ------------------
  |  Branch (189:16): [True: 32.4k, False: 444k]
  ------------------
  190|  32.4k|                pui32_weight_ofsts += 1;
  191|       |
  192|   477k|            if(u1_scale_ref && (ps_dec->ps_cur_pps->u1_wted_bipred_idc == 2))
  ------------------
  |  Branch (192:16): [True: 0, False: 477k]
  |  Branch (192:32): [True: 0, False: 0]
  ------------------
  193|      0|            {
  194|      0|                WORD16 i2_ref_idx;
  195|      0|                i2_ref_idx = MAX(i1_ref_frame0, 0);
  ------------------
  |  |   60|      0|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  196|      0|                i2_ref_idx *= (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1]
  197|      0|                                << 1);
  198|      0|                i2_ref_idx += MAX(i1_ref_frame1, 0);
  ------------------
  |  |   60|      0|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  199|      0|                if(!ps_cur_mb_info->u1_topmb)
  ------------------
  |  Branch (199:20): [True: 0, False: 0]
  ------------------
  200|      0|                    i2_ref_idx +=
  201|      0|                                    (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0]
  202|      0|                                                    << 1)
  203|      0|                                                    * (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1]
  204|      0|                                                                    << 1);
  205|      0|                pui32_weight_ofsts = (UWORD32*)&ps_dec->pu4_mbaff_wt_mat[2
  206|      0|                                * X3(i2_ref_idx)];
  ------------------
  |  |   92|      0|#define X3(a)   (((a) << 1) + (a))
  ------------------
  207|      0|            }
  208|   477k|        }
  209|   602k|    }
  210|       |
  211|   602k|    s_temp_mv_pred.i1_ref_frame[0] = i1_ref_frame0;
  212|   602k|    s_temp_mv_pred.i1_ref_frame[1] = i1_ref_frame1;
  213|   602k|    s_temp_mv_pred.u1_col_ref_pic_idx = ps_ref_frame->u1_mv_buf_id;
  214|   602k|    s_temp_mv_pred.u1_pic_type = ps_ref_frame->u1_pic_type;
  215|       |
  216|       |    /**********************************************************************/
  217|       |    /* Call the function which gets the number of partitions and          */
  218|       |    /* partition info of colocated Mb                                     */
  219|       |    /**********************************************************************/
  220|       |
  221|   602k|    ps_dec->pf_parse_mvdirect(ps_dec, ps_dec->ps_col_pic, &s_mvdirect, u1_wd_x,
  222|   602k|                           ps_dec->i4_submb_ofst, ps_cur_mb_info);
  223|   602k|    ps_col_pic = ps_dec->ps_col_pic;
  224|   602k|    if((s_mvdirect.u1_col_zeroflag_change == 0) || u1_direct_zero_pred_flag)
  ------------------
  |  Branch (224:8): [True: 599k, False: 3.22k]
  |  Branch (224:52): [True: 95, False: 3.13k]
  ------------------
  225|   599k|    {
  226|   599k|        WORD16 i2_mv_x, i2_mv_y, i2_mvX1, i2_mvY1;
  227|       |        /* Most probable case */
  228|   599k|        u1_col_zero_flag = *(ps_col_pic->pu1_col_zero_flag
  229|   599k|                        + s_mvdirect.i4_mv_indices[0]);
  230|   599k|        u1_col_zero_flag = u1_col_zero_flag & 0x01;
  231|       |
  232|   599k|        if(u1_zero_pred_cond_f || ((i1_ref_frame0 == 0) && (u1_col_zero_flag == 1)))
  ------------------
  |  Branch (232:12): [True: 46.2k, False: 553k]
  |  Branch (232:36): [True: 551k, False: 2.30k]
  |  Branch (232:60): [True: 255k, False: 296k]
  ------------------
  233|   301k|        {
  234|   301k|            i2_mv_x = 0;
  235|   301k|            i2_mv_y = 0;
  236|   301k|        }
  237|   298k|        else
  238|   298k|        {
  239|   298k|            i2_mv_x = i2_spat_pred_mv[0];
  240|   298k|            i2_mv_y = i2_spat_pred_mv[1];
  241|       |
  242|   298k|        }
  243|       |
  244|   599k|        if(u1_zero_pred_cond_b || ((i1_ref_frame1 == 0) && (u1_col_zero_flag == 1)))
  ------------------
  |  Branch (244:12): [True: 28.0k, False: 571k]
  |  Branch (244:36): [True: 568k, False: 2.88k]
  |  Branch (244:60): [True: 256k, False: 312k]
  ------------------
  245|   284k|        {
  246|   284k|            i2_mvX1 = 0;
  247|   284k|            i2_mvY1 = 0;
  248|   284k|        }
  249|   315k|        else
  250|   315k|        {
  251|   315k|            i2_mvX1 = i2_spat_pred_mv[2];
  252|   315k|            i2_mvY1 = i2_spat_pred_mv[3];
  253|   315k|        }
  254|       |
  255|   599k|        u4_sub_mb_num = ps_dec->u1_sub_mb_num;
  256|   599k|        u1_mb_partw = (u1_wd_x >> 2);
  257|       |
  258|       |
  259|   599k|        if(i1_ref_frame0 >= 0)
  ------------------
  |  Branch (259:12): [True: 562k, False: 36.8k]
  ------------------
  260|   562k|        {
  261|   562k|            {
  262|   562k|               pred_info_pkd_t *ps_pred_pkd;
  263|   562k|               WORD16 i2_mv[2];
  264|   562k|               WORD8 i1_ref_idx= 0;
  265|       |
  266|   562k|               i2_mv[0] = i2_mv_x;
  267|   562k|               i2_mv[1] = i2_mv_y;
  268|       |
  269|   562k|               ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
  270|   562k|            ih264d_fill_pred_info(i2_mv,u1_mb_partw,u1_mb_partw,u4_sub_mb_num,i1_pred,
  271|   562k|                            ps_pred_pkd,ps_pic_buff0->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts,
  272|   562k|                            ps_pic_buff0->u1_pic_type);
  273|   562k|            ps_dec->u4_pred_info_pkd_idx++;
  274|   562k|            ps_cur_mb_info->u1_num_pred_parts++;
  275|       |
  276|       |
  277|   562k|            }
  278|       |
  279|   562k|        }
  280|       |
  281|   599k|        if(i1_ref_frame1 >= 0)
  ------------------
  |  Branch (281:12): [True: 581k, False: 18.6k]
  ------------------
  282|   581k|        {
  283|   581k|            {
  284|   581k|                pred_info_pkd_t *ps_pred_pkd;
  285|   581k|               WORD16 i2_mv[2];
  286|   581k|               WORD8 i1_ref_idx= 0;
  287|       |
  288|   581k|               i2_mv[0] = i2_mvX1;
  289|   581k|               i2_mv[1] = i2_mvY1;
  290|       |
  291|   581k|               ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
  292|   581k|            ih264d_fill_pred_info(i2_mv,u1_mb_partw,u1_mb_partw,u4_sub_mb_num,i1_pred,
  293|   581k|                            ps_pred_pkd,ps_pic_buff1->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts,
  294|   581k|                            ps_pic_buff1->u1_pic_type);
  295|   581k|            ps_dec->u4_pred_info_pkd_idx++;
  296|   581k|            ps_cur_mb_info->u1_num_pred_parts++;
  297|       |
  298|       |
  299|   581k|            }
  300|   581k|        }
  301|       |
  302|       |
  303|       |        /* Replication optimisation */
  304|   599k|        s_temp_mv_pred.i2_mv[0] = i2_mv_x;
  305|   599k|        s_temp_mv_pred.i2_mv[1] = i2_mv_y;
  306|   599k|        s_temp_mv_pred.i2_mv[2] = i2_mvX1;
  307|   599k|        s_temp_mv_pred.i2_mv[3] = i2_mvY1;
  308|       |
  309|       |        /* Calculating colocated zero information */
  310|   599k|        {
  311|       |            /*************************************/
  312|       |            /* If(bit2 and bit3 set)             */
  313|       |            /* then                              */
  314|       |            /*  (bit0 and bit1) => submmbmode    */
  315|       |            /*  (bit2 and bit3) => mbmode        */
  316|       |            /* else                              */
  317|       |            /*  (bit0 and bit1) => mbmode        */
  318|       |            /*************************************/
  319|       |            /*UWORD8 u1_packed_mb_sub_mb_mode = sub_partition ?
  320|       |             (s_mvdirect.i1_partitionsize[0]) : ((s_mvdirect.i1_partitionsize[0]) << 2);*/
  321|   599k|            UWORD8 u1_packed_mb_sub_mb_mode = (u1_mb_partw == 2) ? 0x03 : 0;
  ------------------
  |  Branch (321:47): [True: 3.77k, False: 596k]
  ------------------
  322|       |
  323|   599k|            if(i1_ref_frame0 < 0)
  ------------------
  |  Branch (323:16): [True: 36.8k, False: 562k]
  ------------------
  324|  36.8k|            {
  325|  36.8k|                i2_mv_x = i2_mvX1;
  326|  36.8k|                i2_mv_y = i2_mvY1;
  327|  36.8k|            }
  328|       |
  329|       |            /* Change from left shift 4 to 6 - Varun */
  330|   599k|            u1_colz = (ps_cur_mb_info->u1_mb_field_decodingflag << 1)
  331|   599k|                            | ((u1_final_ref_idx == 0) && (ABS(i2_mv_x) <= 1)
  ------------------
  |  |  100|   597k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 71.0k, False: 526k]
  |  |  ------------------
  ------------------
  |  Branch (331:32): [True: 597k, False: 2.68k]
  |  Branch (331:59): [True: 506k, False: 90.3k]
  ------------------
  332|   506k|                                            && (ABS(i2_mv_y) <= 1));
  ------------------
  |  |  100|   506k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 40.7k, False: 466k]
  |  |  ------------------
  ------------------
  |  Branch (332:48): [True: 460k, False: 46.3k]
  ------------------
  333|   599k|            u1_colz |= (u1_packed_mb_sub_mb_mode << 6);
  334|   599k|        }
  335|   599k|        ps_mv = ps_mv_nmb_start + u4_sub_mb_num;
  336|   599k|        ih264d_rep_mv_colz(ps_dec, &s_temp_mv_pred, ps_mv, u4_sub_mb_num, u1_colz,
  337|   599k|                           u1_mb_partw, u1_mb_partw);
  338|   599k|        if(u1_wd_x == MB_SIZE)
  ------------------
  |  |  554|   599k|#define MB_SIZE             16
  ------------------
  |  Branch (338:12): [True: 596k, False: 3.77k]
  ------------------
  339|   596k|            ps_dec->u1_currB_type = 0;
  340|       |
  341|       |
  342|       |
  343|   599k|        return OK;
  ------------------
  |  |  114|   599k|#define OK        0
  ------------------
  344|   599k|    }
  345|       |    /***************************************************************************/
  346|       |    /* If present MB is 16x16 and the partition of colocated Mb is >= PRED_8x8 */
  347|       |    /* i.e 8x8 or less than 8x8 partitions then set up DMA for (0,0) and       */
  348|       |    /* spatially predicted motion vector and do the multiplexing after         */
  349|       |    /* motion compensation                                                     */
  350|       |    /***************************************************************************/
  351|       |
  352|       |
  353|  3.13k|    if((u1_wd_x == MB_SIZE) && (s_mvdirect.i1_num_partitions > 2))
  ------------------
  |  |  554|  3.13k|#define MB_SIZE             16
  ------------------
  |  Branch (353:8): [True: 3.03k, False: 92]
  |  Branch (353:32): [True: 1.86k, False: 1.17k]
  ------------------
  354|  1.86k|    {
  355|  1.86k|        ps_cur_mb_info->u1_Mux = 1;
  356|  1.86k|        if(i1_ref_frame0 >= 0)
  ------------------
  |  Branch (356:12): [True: 1.55k, False: 315]
  ------------------
  357|  1.55k|        {
  358|       |
  359|  1.55k|            {
  360|  1.55k|                pred_info_pkd_t *ps_pred_pkd;
  361|  1.55k|               WORD8 i1_ref_idx= 0;
  362|       |
  363|  1.55k|               ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
  364|  1.55k|            ih264d_fill_pred_info(&(i2_spat_pred_mv[0]),4,4,0,i1_pred,
  365|  1.55k|                            ps_pred_pkd,ps_pic_buff0->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts,
  366|  1.55k|                            ps_pic_buff0->u1_pic_type);
  367|  1.55k|            ps_dec->u4_pred_info_pkd_idx++;
  368|  1.55k|            ps_cur_mb_info->u1_num_pred_parts++;
  369|       |
  370|       |
  371|  1.55k|            }
  372|       |
  373|       |            /******    (0,0) Motion vectors DMA     *****/
  374|  1.55k|            {
  375|  1.55k|                pred_info_pkd_t *ps_pred_pkd;
  376|  1.55k|               WORD16 i2_mv[2];
  377|  1.55k|               WORD8 i1_ref_idx= 0;
  378|       |
  379|  1.55k|               i2_mv[0] = 0;
  380|  1.55k|               i2_mv[1] = 0;
  381|       |
  382|  1.55k|               ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
  383|  1.55k|            ih264d_fill_pred_info(i2_mv,4,4,0,i1_pred,
  384|  1.55k|                            ps_pred_pkd,ps_pic_buff0->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts,
  385|  1.55k|                            ps_pic_buff0->u1_pic_type);
  386|  1.55k|            ps_dec->u4_pred_info_pkd_idx++;
  387|  1.55k|            ps_cur_mb_info->u1_num_pred_parts++;
  388|       |
  389|       |
  390|  1.55k|            }
  391|  1.55k|        }
  392|  1.86k|        if(i1_ref_frame1 >= 0)
  ------------------
  |  Branch (392:12): [True: 1.52k, False: 341]
  ------------------
  393|  1.52k|        {
  394|  1.52k|            {
  395|  1.52k|                pred_info_pkd_t *ps_pred_pkd;
  396|  1.52k|               WORD16 i2_mv[2];
  397|  1.52k|               WORD8 i1_ref_idx= 0;
  398|       |
  399|  1.52k|               ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
  400|  1.52k|            ih264d_fill_pred_info(&(i2_spat_pred_mv[2]),4,4,0,i1_pred,
  401|  1.52k|                            ps_pred_pkd,ps_pic_buff1->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts,
  402|  1.52k|                            ps_pic_buff1->u1_pic_type);
  403|  1.52k|            ps_dec->u4_pred_info_pkd_idx++;
  404|  1.52k|            ps_cur_mb_info->u1_num_pred_parts++;
  405|       |
  406|       |
  407|  1.52k|            }
  408|       |
  409|       |            /******    (0,0) Motion vectors DMA     *****/
  410|       |
  411|  1.52k|            {
  412|  1.52k|                pred_info_pkd_t *ps_pred_pkd;
  413|  1.52k|               WORD16 i2_mv[2];
  414|  1.52k|               WORD8 i1_ref_idx= 0;
  415|       |
  416|  1.52k|               i2_mv[0] = 0;
  417|  1.52k|               i2_mv[1] = 0;
  418|       |
  419|  1.52k|               ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
  420|  1.52k|            ih264d_fill_pred_info(i2_mv,4,4,0,i1_pred,
  421|  1.52k|                            ps_pred_pkd,ps_pic_buff1->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts,
  422|  1.52k|                            ps_pic_buff1->u1_pic_type);
  423|  1.52k|            ps_dec->u4_pred_info_pkd_idx++;
  424|  1.52k|            ps_cur_mb_info->u1_num_pred_parts++;
  425|       |
  426|       |
  427|  1.52k|            }
  428|  1.52k|        }
  429|  1.86k|    }
  430|       |
  431|       |    /*u1_col = *(ps_col_pic->pu1_col_zero_flag + s_mvdirect.i4_mv_indices[0]);
  432|       |     u1_col &= 1;
  433|       |     u1_init = 0;*/
  434|       |
  435|  19.7k|    for(i = 0; i < s_mvdirect.i1_num_partitions; i++)
  ------------------
  |  Branch (435:16): [True: 16.5k, False: 3.13k]
  ------------------
  436|  16.5k|    {
  437|  16.5k|        partition_size = s_mvdirect.i1_partitionsize[i];
  438|  16.5k|        u4_sub_mb_num = s_mvdirect.i1_submb_num[i];
  439|       |
  440|  16.5k|        sub_partition = partition_size >> 2;
  441|  16.5k|        partition_size &= 0x3;
  442|  16.5k|        u1_mb_partw = pu1_mb_partw[partition_size];
  443|  16.5k|        u1_mb_parth = pu1_mb_parth[partition_size];
  444|  16.5k|        u2_mask = mask_table[partition_size];
  445|  16.5k|        if(sub_partition != 0)
  ------------------
  |  Branch (445:12): [True: 13.6k, False: 2.93k]
  ------------------
  446|  13.6k|        {
  447|  13.6k|            u1_mb_partw >>= 1;
  448|  13.6k|            u1_mb_parth >>= 1;
  449|  13.6k|            u2_mask = sub_mask_table[partition_size];
  450|  13.6k|        }
  451|       |
  452|  16.5k|        u1_col_zero_flag = *(ps_col_pic->pu1_col_zero_flag
  453|  16.5k|                        + s_mvdirect.i4_mv_indices[i]);
  454|  16.5k|        u1_col_zero_flag = u1_col_zero_flag & 0x01;
  455|       |
  456|       |        /*if(u1_col != u1_col_zero_flag)
  457|       |         u1_init = 1;*/
  458|       |
  459|  16.5k|        if(u1_zero_pred_cond_f || ((i1_ref_frame0 == 0) && (u1_col_zero_flag == 1)))
  ------------------
  |  Branch (459:12): [True: 3.47k, False: 13.1k]
  |  Branch (459:36): [True: 12.8k, False: 224]
  |  Branch (459:60): [True: 5.62k, False: 7.27k]
  ------------------
  460|  9.10k|        {
  461|  9.10k|            pi2_final_mv0 = &i2_def_mv[0];
  462|  9.10k|            ui2_mask_fwd |= (u2_mask << u4_sub_mb_num);
  463|  9.10k|        }
  464|  7.49k|        else
  465|  7.49k|            pi2_final_mv0 = &i2_spat_pred_mv[0];
  466|       |
  467|  16.5k|        if(u1_zero_pred_cond_b || ((i1_ref_frame1 == 0) && (u1_col_zero_flag == 1)))
  ------------------
  |  Branch (467:12): [True: 2.96k, False: 13.6k]
  |  Branch (467:36): [True: 13.3k, False: 268]
  |  Branch (467:60): [True: 5.67k, False: 7.69k]
  ------------------
  468|  8.63k|        {
  469|  8.63k|            pi2_final_mv1 = &i2_def_mv[0];
  470|  8.63k|            ui2_mask_bwd |= (u2_mask << u4_sub_mb_num);
  471|  8.63k|        }
  472|  7.96k|        else
  473|  7.96k|            pi2_final_mv1 = &i2_spat_pred_mv[2];
  474|       |
  475|  16.5k|        if(ps_cur_mb_info->u1_Mux != 1)
  ------------------
  |  Branch (475:12): [True: 2.52k, False: 14.0k]
  ------------------
  476|  2.52k|        {
  477|       |            /*u1_sub_mb_x = u4_sub_mb_num & 0x03;
  478|       |             uc_sub_mb_y = (u4_sub_mb_num >> 2);*/
  479|  2.52k|            if(i1_ref_frame0 >= 0)
  ------------------
  |  Branch (479:16): [True: 1.57k, False: 950]
  ------------------
  480|  1.57k|            {
  481|       |
  482|  1.57k|                {
  483|  1.57k|                    pred_info_pkd_t *ps_pred_pkd;
  484|  1.57k|                   WORD8 i1_ref_idx= 0;
  485|       |
  486|  1.57k|                   ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
  487|  1.57k|                ih264d_fill_pred_info(pi2_final_mv0,u1_mb_partw,u1_mb_parth,u4_sub_mb_num,i1_pred,
  488|  1.57k|                                ps_pred_pkd,ps_pic_buff0->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts,
  489|  1.57k|                                ps_pic_buff0->u1_pic_type);
  490|  1.57k|                ps_dec->u4_pred_info_pkd_idx++;
  491|  1.57k|                ps_cur_mb_info->u1_num_pred_parts++;
  492|       |
  493|       |
  494|  1.57k|                }
  495|       |
  496|  1.57k|            }
  497|       |
  498|  2.52k|            if(i1_ref_frame1 >= 0)
  ------------------
  |  Branch (498:16): [True: 2.30k, False: 217]
  ------------------
  499|  2.30k|            {
  500|  2.30k|                {
  501|  2.30k|                    pred_info_pkd_t *ps_pred_pkd;
  502|  2.30k|                   WORD8 i1_ref_idx= 0;
  503|       |
  504|  2.30k|                   ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
  505|  2.30k|                ih264d_fill_pred_info(pi2_final_mv1,u1_mb_partw,u1_mb_parth,u4_sub_mb_num,i1_pred,
  506|  2.30k|                                ps_pred_pkd,ps_pic_buff1->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts,
  507|  2.30k|                                ps_pic_buff1->u1_pic_type);
  508|  2.30k|                ps_dec->u4_pred_info_pkd_idx++;
  509|  2.30k|                ps_cur_mb_info->u1_num_pred_parts++;
  510|       |
  511|       |
  512|  2.30k|                }
  513|  2.30k|            }
  514|  2.52k|        }
  515|       |
  516|       |        /* Replication optimisation */
  517|  16.5k|        s_temp_mv_pred.i2_mv[0] = pi2_final_mv0[0];
  518|  16.5k|        s_temp_mv_pred.i2_mv[1] = pi2_final_mv0[1];
  519|  16.5k|        s_temp_mv_pred.i2_mv[2] = pi2_final_mv1[0];
  520|  16.5k|        s_temp_mv_pred.i2_mv[3] = pi2_final_mv1[1];
  521|       |
  522|       |        /* Calculating colocated zero information */
  523|  16.5k|        {
  524|  16.5k|            WORD16 i2_mv_x = 0, i2_mv_y = 0;
  525|       |            /*************************************/
  526|       |            /* If(bit2 and bit3 set)             */
  527|       |            /* then                              */
  528|       |            /*  (bit0 and bit1) => submmbmode    */
  529|       |            /*  (bit2 and bit3) => mbmode        */
  530|       |            /* else                              */
  531|       |            /*  (bit0 and bit1) => mbmode        */
  532|       |            /*************************************/
  533|  16.5k|            UWORD8 u1_packed_mb_sub_mb_mode =
  534|  16.5k|                            sub_partition ? (s_mvdirect.i1_partitionsize[i]) : ((s_mvdirect.i1_partitionsize[i])
  ------------------
  |  Branch (534:29): [True: 13.6k, False: 2.93k]
  ------------------
  535|  2.93k|                                                            << 2);
  536|       |
  537|  16.5k|            if(i1_ref_frame0 >= 0)
  ------------------
  |  Branch (537:16): [True: 13.1k, False: 3.47k]
  ------------------
  538|  13.1k|            {
  539|  13.1k|                i2_mv_x = pi2_final_mv0[0];
  540|  13.1k|                i2_mv_y = pi2_final_mv0[1];
  541|  13.1k|            }
  542|  3.47k|            else
  543|  3.47k|            {
  544|  3.47k|                i2_mv_x = pi2_final_mv1[0];
  545|  3.47k|                i2_mv_y = pi2_final_mv1[1];
  546|  3.47k|            }
  547|       |
  548|  16.5k|            u1_colz = (ps_cur_mb_info->u1_mb_field_decodingflag << 1)
  549|  16.5k|                            | ((u1_final_ref_idx == 0) && (ABS(i2_mv_x) <= 1)
  ------------------
  |  |  100|  16.3k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 436, False: 15.8k]
  |  |  ------------------
  ------------------
  |  Branch (549:32): [True: 16.3k, False: 290]
  |  Branch (549:59): [True: 16.0k, False: 303]
  ------------------
  550|  16.0k|                                            && (ABS(i2_mv_y) <= 1));
  ------------------
  |  |  100|  16.0k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 246, False: 15.7k]
  |  |  ------------------
  ------------------
  |  Branch (550:48): [True: 15.8k, False: 203]
  ------------------
  551|  16.5k|            u1_colz |= (u1_packed_mb_sub_mb_mode << 4);
  552|  16.5k|        }
  553|  16.5k|        ps_mv = ps_mv_nmb_start + u4_sub_mb_num;
  554|  16.5k|        ih264d_rep_mv_colz(ps_dec, &s_temp_mv_pred, ps_mv, u4_sub_mb_num, u1_colz,
  555|  16.5k|                           u1_mb_parth, u1_mb_partw);
  556|  16.5k|    }
  557|  3.13k|    i = 0;
  558|  3.13k|    if(i1_ref_frame0 >= 0)
  ------------------
  |  Branch (558:8): [True: 2.34k, False: 791]
  ------------------
  559|  2.34k|        ps_cur_mb_info->u2_mask[i++] = ui2_mask_fwd;
  560|  3.13k|    if(i1_ref_frame1 >= 0)
  ------------------
  |  Branch (560:8): [True: 2.68k, False: 450]
  ------------------
  561|  2.68k|        ps_cur_mb_info->u2_mask[i] = ui2_mask_bwd;
  562|       |
  563|       |    /*if(u1_init)
  564|       |     H264_DEC_DEBUG_PRINT("hit\n");
  565|       |     else
  566|       |     H264_DEC_DEBUG_PRINT("miss\n");*/
  567|       |
  568|  3.13k|    return OK;
  ------------------
  |  |  114|  3.13k|#define OK        0
  ------------------
  569|   602k|}
ih264d_decode_temporal_direct:
  587|   292k|{
  588|   292k|    struct pic_buffer_t *ps_pic_buff0, *ps_pic_buff1, *ps_col_pic;
  589|   292k|    mv_pred_t *ps_mv, s_temp_mv_pred;
  590|   292k|    UWORD32 u4_sub_mb_num;
  591|   292k|    UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
  592|   292k|    WORD16 i2_mv_x0, i2_mv_y0, i2_mv_x1, i2_mv_y1;
  593|   292k|    UWORD8 u1_mb_partw, u1_mb_parth;
  594|   292k|    UWORD8 i, partition_size, sub_partition;
  595|   292k|    UWORD32 *pui32_weight_ofsts = NULL;
  596|   292k|    directmv_t s_mvdirect;
  597|   292k|    const UWORD8 *pu1_mb_parth = (const UWORD8 *)gau1_ih264d_mb_parth;
  598|   292k|    const UWORD8 *pu1_mb_partw = (const UWORD8 *)gau1_ih264d_mb_partw;
  599|   292k|    WORD8 c_refFrm0, c_refFrm1;
  600|   292k|    UWORD8 u1_ref_idx0, u1_is_cur_mb_fld;
  601|   292k|    WORD32 pic0_poc, pic1_poc, cur_poc;
  602|   292k|    WORD32 ret = 0;
  603|       |
  604|   292k|    u1_is_cur_mb_fld = ps_cur_mb_info->u1_mb_field_decodingflag;
  605|   292k|    ps_pic_buff1 = ps_dec->ps_ref_pic_buf_lx[1][0];
  606|       |
  607|       |    /**********************************************************************/
  608|       |    /* Call the function which gets the number of partitions and          */
  609|       |    /* partition info of colocated Mb                                     */
  610|       |    /**********************************************************************/
  611|   292k|    ps_dec->pf_parse_mvdirect(ps_dec, ps_dec->ps_col_pic, &s_mvdirect, u1_wd_x,
  612|   292k|                           ps_dec->i4_submb_ofst, ps_cur_mb_info);
  613|   292k|    ps_col_pic = ps_dec->ps_col_pic;
  614|       |
  615|   601k|    for(i = 0; i < s_mvdirect.i1_num_partitions; i++)
  ------------------
  |  Branch (615:16): [True: 308k, False: 292k]
  ------------------
  616|   308k|    {
  617|   308k|        UWORD8 u1_colz;
  618|   308k|        partition_size = s_mvdirect.i1_partitionsize[i];
  619|   308k|        u4_sub_mb_num = s_mvdirect.i1_submb_num[i];
  620|   308k|        ps_mv = ps_col_pic->ps_mv + s_mvdirect.i4_mv_indices[i];
  621|       |
  622|       |        /* This should be removed to catch unitialized memory read */
  623|   308k|        u1_ref_idx0 = 0;
  624|       |
  625|   308k|        sub_partition = partition_size >> 2;
  626|   308k|        partition_size &= 0x3;
  627|   308k|        u1_mb_partw = pu1_mb_partw[partition_size];
  628|   308k|        u1_mb_parth = pu1_mb_parth[partition_size];
  629|   308k|        if(sub_partition != 0)
  ------------------
  |  Branch (629:12): [True: 15.7k, False: 293k]
  ------------------
  630|  15.7k|        {
  631|  15.7k|            u1_mb_partw >>= 1;
  632|  15.7k|            u1_mb_parth >>= 1;
  633|  15.7k|        }
  634|   308k|        c_refFrm0 = ps_mv->i1_ref_frame[0];
  635|   308k|        c_refFrm1 = ps_mv->i1_ref_frame[1];
  636|       |
  637|   308k|        if((c_refFrm0 == -1) && (c_refFrm1 == -1))
  ------------------
  |  Branch (637:12): [True: 3.80k, False: 305k]
  |  Branch (637:33): [True: 352, False: 3.45k]
  ------------------
  638|    352|        {
  639|    352|            u1_ref_idx0 = 0;
  640|    352|            ps_pic_buff0 = ps_dec->ps_ref_pic_buf_lx[0][0];
  641|    352|            if(u1_mbaff && u1_is_cur_mb_fld)
  ------------------
  |  Branch (641:16): [True: 0, False: 352]
  |  Branch (641:28): [True: 0, False: 0]
  ------------------
  642|      0|            {
  643|      0|                if(ps_cur_mb_info->u1_topmb)
  ------------------
  |  Branch (643:20): [True: 0, False: 0]
  ------------------
  644|      0|                {
  645|      0|                    pic0_poc = ps_pic_buff0->i4_top_field_order_cnt;
  646|      0|                    pic1_poc = ps_pic_buff1->i4_top_field_order_cnt;
  647|      0|                    cur_poc = ps_dec->ps_cur_pic->i4_top_field_order_cnt;
  648|      0|                }
  649|      0|                else
  650|      0|                {
  651|      0|                    pic1_poc = ps_pic_buff1->i4_bottom_field_order_cnt;
  652|      0|                    cur_poc = ps_dec->ps_cur_pic->i4_bottom_field_order_cnt;
  653|      0|                    ps_pic_buff1 = ps_dec->ps_ref_pic_buf_lx[1][MAX_REF_BUFS];
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
  654|      0|                    pic0_poc = ps_pic_buff0->i4_bottom_field_order_cnt;
  655|      0|                    ps_pic_buff0 = ps_dec->ps_ref_pic_buf_lx[0][MAX_REF_BUFS];
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
  656|      0|                }
  657|      0|            }
  658|    352|            else
  659|    352|            {
  660|    352|                pic0_poc = ps_pic_buff0->i4_avg_poc;
  661|    352|                pic1_poc = ps_pic_buff1->i4_avg_poc;
  662|    352|                cur_poc = ps_dec->ps_cur_pic->i4_poc;
  663|    352|            }
  664|    352|        }
  665|   308k|        else
  666|   308k|        {
  667|   308k|            UWORD8 uc_i, u1_num_frw_ref_pics;
  668|   308k|            UWORD8 buf_id, u1_pic_type;
  669|   308k|            buf_id = ps_mv->u1_col_ref_pic_idx;
  670|   308k|            u1_pic_type = ps_mv->u1_pic_type;
  671|   308k|            if(ps_dec->ps_cur_slice->u1_field_pic_flag)
  ------------------
  |  Branch (671:16): [True: 0, False: 308k]
  ------------------
  672|      0|            {
  673|      0|                if(s_mvdirect.u1_vert_mv_scale == FRM_TO_FLD)
  ------------------
  |  |   48|      0|#define FRM_TO_FLD    1
  ------------------
  |  Branch (673:20): [True: 0, False: 0]
  ------------------
  674|      0|                {
  675|      0|                    u1_pic_type = TOP_FLD;
  ------------------
  |  |  353|      0|#define TOP_FLD         0x01
  ------------------
  676|      0|                    if(ps_dec->ps_cur_slice->u1_bottom_field_flag)
  ------------------
  |  Branch (676:24): [True: 0, False: 0]
  ------------------
  677|      0|                        u1_pic_type = BOT_FLD;
  ------------------
  |  |  354|      0|#define BOT_FLD         0x02
  ------------------
  678|      0|                }
  679|      0|            }
  680|   308k|            u1_num_frw_ref_pics =
  681|   308k|                            ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0];
  682|       |
  683|   437k|            for(uc_i = 0; uc_i < u1_num_frw_ref_pics; uc_i++)
  ------------------
  |  Branch (683:27): [True: 400k, False: 37.0k]
  ------------------
  684|   400k|            {
  685|   400k|                if(ps_dec->ps_cur_slice->u1_field_pic_flag)
  ------------------
  |  Branch (685:20): [True: 0, False: 400k]
  ------------------
  686|      0|                {
  687|      0|                    if(ps_dec->ps_ref_pic_buf_lx[0][uc_i]->u1_mv_buf_id == buf_id)
  ------------------
  |  Branch (687:24): [True: 0, False: 0]
  ------------------
  688|      0|                    {
  689|      0|                        if(ps_dec->ps_ref_pic_buf_lx[0][uc_i]->u1_pic_type
  ------------------
  |  Branch (689:28): [True: 0, False: 0]
  ------------------
  690|      0|                                        == u1_pic_type)
  691|      0|                        {
  692|      0|                            u1_ref_idx0 = uc_i;
  693|      0|                            break;
  694|      0|                        }
  695|      0|                    }
  696|      0|                }
  697|   400k|                else
  698|   400k|                {
  699|   400k|                    if(ps_dec->ps_ref_pic_buf_lx[0][uc_i]->u1_mv_buf_id == buf_id)
  ------------------
  |  Branch (699:24): [True: 271k, False: 129k]
  ------------------
  700|   271k|                    {
  701|   271k|                        u1_ref_idx0 = uc_i;
  702|   271k|                        break;
  703|   271k|                    }
  704|   400k|                }
  705|   400k|            }
  706|       |
  707|   308k|            ps_pic_buff0 = ps_dec->ps_ref_pic_buf_lx[0][u1_ref_idx0];
  708|   308k|            ps_pic_buff1 = ps_dec->ps_ref_pic_buf_lx[1][0];
  709|       |
  710|   308k|            if(u1_mbaff && u1_is_cur_mb_fld)
  ------------------
  |  Branch (710:16): [True: 0, False: 308k]
  |  Branch (710:28): [True: 0, False: 0]
  ------------------
  711|      0|            {
  712|      0|                pic0_poc = ps_pic_buff0->i4_top_field_order_cnt;
  713|      0|                u1_ref_idx0 <<= 1;
  714|      0|                if(s_mvdirect.u1_vert_mv_scale == ONE_TO_ONE)
  ------------------
  |  |   47|      0|#define ONE_TO_ONE    0
  ------------------
  |  Branch (714:20): [True: 0, False: 0]
  ------------------
  715|      0|                {
  716|      0|                    if(u1_pic_type == BOT_FLD)
  ------------------
  |  |  354|      0|#define BOT_FLD         0x02
  ------------------
  |  Branch (716:24): [True: 0, False: 0]
  ------------------
  717|      0|                    {
  718|      0|                        pic0_poc = ps_pic_buff0->i4_bottom_field_order_cnt;
  719|      0|                        ps_pic_buff0 = ps_dec->ps_ref_pic_buf_lx[0][(u1_ref_idx0
  720|      0|                                        >> 1) + MAX_REF_BUFS];
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
  721|      0|                        if(ps_cur_mb_info->u1_topmb)
  ------------------
  |  Branch (721:28): [True: 0, False: 0]
  ------------------
  722|      0|                            u1_ref_idx0++;
  723|      0|                    }
  724|      0|                    else
  725|      0|                    {
  726|      0|                        if(1 - ps_cur_mb_info->u1_topmb)
  ------------------
  |  Branch (726:28): [True: 0, False: 0]
  ------------------
  727|      0|                            u1_ref_idx0++;
  728|      0|                    }
  729|      0|                }
  730|      0|                if(s_mvdirect.u1_vert_mv_scale == FRM_TO_FLD)
  ------------------
  |  |   48|      0|#define FRM_TO_FLD    1
  ------------------
  |  Branch (730:20): [True: 0, False: 0]
  ------------------
  731|      0|                {
  732|      0|                    if(1 - ps_cur_mb_info->u1_topmb)
  ------------------
  |  Branch (732:24): [True: 0, False: 0]
  ------------------
  733|      0|                    {
  734|      0|                        pic0_poc = ps_pic_buff0->i4_bottom_field_order_cnt;
  735|      0|                        ps_pic_buff0 = ps_dec->ps_ref_pic_buf_lx[0][(u1_ref_idx0
  736|      0|                                        >> 1) + MAX_REF_BUFS];
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
  737|      0|                    }
  738|      0|                }
  739|      0|                if(ps_cur_mb_info->u1_topmb)
  ------------------
  |  Branch (739:20): [True: 0, False: 0]
  ------------------
  740|      0|                {
  741|      0|                    pic1_poc = ps_pic_buff1->i4_top_field_order_cnt;
  742|      0|                    cur_poc = ps_dec->ps_cur_pic->i4_top_field_order_cnt;
  743|      0|                }
  744|      0|                else
  745|      0|                {
  746|      0|                    pic1_poc = ps_pic_buff1->i4_bottom_field_order_cnt;
  747|      0|                    cur_poc = ps_dec->ps_cur_pic->i4_bottom_field_order_cnt;
  748|      0|                    ps_pic_buff1 = ps_dec->ps_ref_pic_buf_lx[1][MAX_REF_BUFS];
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
  749|      0|                }
  750|      0|            }
  751|   308k|            else
  752|   308k|            {
  753|   308k|                pic0_poc = ps_pic_buff0->i4_avg_poc;
  754|   308k|                pic1_poc = ps_pic_buff1->i4_avg_poc;
  755|   308k|                cur_poc = ps_dec->ps_cur_pic->i4_poc;
  756|   308k|            }
  757|   308k|        }
  758|   308k|        {
  759|   308k|            WORD16 i16_td;
  760|   308k|            WORD64 diff;
  761|   308k|            if(c_refFrm0 >= 0)
  ------------------
  |  Branch (761:16): [True: 305k, False: 3.80k]
  ------------------
  762|   305k|            {
  763|   305k|                i2_mv_x0 = ps_mv->i2_mv[0];
  764|   305k|                i2_mv_y0 = ps_mv->i2_mv[1];
  765|   305k|            }
  766|  3.80k|            else if(c_refFrm1 >= 0)
  ------------------
  |  Branch (766:21): [True: 3.45k, False: 352]
  ------------------
  767|  3.45k|            {
  768|  3.45k|                i2_mv_x0 = ps_mv->i2_mv[2];
  769|  3.45k|                i2_mv_y0 = ps_mv->i2_mv[3];
  770|  3.45k|            }
  771|    352|            else
  772|    352|            {
  773|    352|                i2_mv_x0 = 0;
  774|    352|                i2_mv_y0 = 0;
  775|    352|            }
  776|       |            /* If FRM_TO_FLD or FLD_TO_FRM scale the "y" component of the colocated Mv*/
  777|   308k|            if(s_mvdirect.u1_vert_mv_scale == FRM_TO_FLD)
  ------------------
  |  |   48|   308k|#define FRM_TO_FLD    1
  ------------------
  |  Branch (777:16): [True: 0, False: 308k]
  ------------------
  778|      0|            {
  779|      0|                i2_mv_y0 /= 2;
  780|      0|            }
  781|   308k|            else if(s_mvdirect.u1_vert_mv_scale == FLD_TO_FRM)
  ------------------
  |  |   49|   308k|#define FLD_TO_FRM    2
  ------------------
  |  Branch (781:21): [True: 0, False: 308k]
  ------------------
  782|      0|            {
  783|      0|                i2_mv_y0 *= 2;
  784|      0|            }
  785|       |
  786|   308k|            diff = (WORD64)pic1_poc - pic0_poc;
  787|   308k|            i16_td = CLIP_S8(diff);
  ------------------
  |  |   59|   308k|#define CLIP_S8(x) CLIP3(INT8_MIN, INT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   308k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 4.61k, False: 304k]
  |  |  |  |  |  Branch (77:54): [True: 12.7k, False: 291k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  788|   308k|            if((ps_pic_buff0->u1_is_short == 0) || (i16_td == 0))
  ------------------
  |  Branch (788:16): [True: 72.4k, False: 236k]
  |  Branch (788:52): [True: 195k, False: 40.7k]
  ------------------
  789|   268k|            {
  790|   268k|                i2_mv_x1 = 0;
  791|   268k|                i2_mv_y1 = 0;
  792|   268k|            }
  793|  40.7k|            else
  794|  40.7k|            {
  795|  40.7k|                WORD16 i2_tb, i2_tx, i2_dist_scale_factor, i2_temp;
  796|       |
  797|  40.7k|                diff = (WORD64)cur_poc - pic0_poc;
  798|  40.7k|                i2_tb = CLIP_S8(diff);
  ------------------
  |  |   59|  40.7k|#define CLIP_S8(x) CLIP3(INT8_MIN, INT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  40.7k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 5.66k, False: 35.0k]
  |  |  |  |  |  Branch (77:54): [True: 13.0k, False: 22.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  799|       |
  800|  40.7k|                i2_tx = (16384 + ABS(SIGN_POW2_DIV(i16_td, 1))) / i16_td;
  ------------------
  |  |  100|  81.4k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 8.85k, False: 31.8k]
  |  |  |  Branch (100:27): [True: 10.3k, False: 30.3k]
  |  |  |  Branch (100:39): [True: 8.85k, False: 0]
  |  |  |  Branch (100:46): [True: 1.50k, False: 30.3k]
  |  |  ------------------
  ------------------
  801|  40.7k|                i2_dist_scale_factor = CLIP_S11(
  ------------------
  |  |   65|  40.7k|#define CLIP_S11(x) CLIP3(-1024, 1023, (x))
  |  |  ------------------
  |  |  |  |   77|  40.7k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 1.62k, False: 39.0k]
  |  |  |  |  |  Branch (77:54): [True: 2.21k, False: 36.8k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  802|  40.7k|                                            (((i2_tb * i2_tx) + 32) >> 6));
  803|  40.7k|                i2_temp = (i2_mv_x0 * i2_dist_scale_factor + 128) >> 8;
  804|  40.7k|                i2_mv_x1 = i2_temp - i2_mv_x0;
  805|  40.7k|                i2_mv_x0 = i2_temp;
  806|       |
  807|  40.7k|                i2_temp = (i2_mv_y0 * i2_dist_scale_factor + 128) >> 8;
  808|  40.7k|                i2_mv_y1 = i2_temp - i2_mv_y0;
  809|  40.7k|                i2_mv_y0 = i2_temp;
  810|  40.7k|            }
  811|   308k|            {
  812|   308k|                mv_pred_t *ps_mv;
  813|       |
  814|       |                /*u1_sub_mb_x = u4_sub_mb_num & 0x03;
  815|       |                 uc_sub_mb_y = u4_sub_mb_num >> 2;*/
  816|   308k|                if(ps_dec->ps_cur_pps->u1_wted_bipred_idc)
  ------------------
  |  Branch (816:20): [True: 135k, False: 173k]
  ------------------
  817|   135k|                {
  818|   135k|                    UWORD8 u1_idx =
  819|   135k|                                    u1_ref_idx0
  820|   135k|                                                    * ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1];
  821|   135k|                    UWORD8 u1_scale_ref = u1_mbaff && u1_is_cur_mb_fld;
  ------------------
  |  Branch (821:43): [True: 0, False: 135k]
  |  Branch (821:55): [True: 0, False: 0]
  ------------------
  822|   135k|                    if(u1_scale_ref)
  ------------------
  |  Branch (822:24): [True: 0, False: 135k]
  ------------------
  823|      0|                        u1_idx >>= 1;
  824|   135k|                    pui32_weight_ofsts = (UWORD32*)&ps_dec->pu4_wt_ofsts[2
  825|   135k|                                    * X3(u1_idx)];
  ------------------
  |  |   92|   135k|#define X3(a)   (((a) << 1) + (a))
  ------------------
  826|   135k|                    if(u1_scale_ref
  ------------------
  |  Branch (826:24): [True: 0, False: 135k]
  ------------------
  827|      0|                                    && (ps_dec->ps_cur_pps->u1_wted_bipred_idc
  ------------------
  |  Branch (827:40): [True: 0, False: 0]
  ------------------
  828|      0|                                                    == 2))
  829|      0|                    {
  830|      0|                        WORD16 i2_ref_idx;
  831|      0|                        i2_ref_idx = u1_ref_idx0;
  832|      0|                        i2_ref_idx *=
  833|      0|                                        (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1]
  834|      0|                                                        << 1);
  835|      0|                        if(!ps_cur_mb_info->u1_topmb)
  ------------------
  |  Branch (835:28): [True: 0, False: 0]
  ------------------
  836|      0|                            i2_ref_idx +=
  837|      0|                                            (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0]
  838|      0|                                                            << 1)
  839|      0|                                                            * (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1]
  840|      0|                                                                            << 1);
  841|      0|                        pui32_weight_ofsts =
  842|      0|                                        (UWORD32*)&ps_dec->pu4_mbaff_wt_mat[2
  843|      0|                                                        * X3(i2_ref_idx)];
  ------------------
  |  |   92|      0|#define X3(a)   (((a) << 1) + (a))
  ------------------
  844|      0|                    }
  845|   135k|                }
  846|   308k|                {
  847|   308k|                    pred_info_pkd_t *ps_pred_pkd;
  848|   308k|                   WORD16 i2_mv[2];
  849|   308k|                   WORD8 i1_ref_idx= 0;
  850|       |
  851|   308k|                   i2_mv[0] = i2_mv_x0;
  852|   308k|                   i2_mv[1] = i2_mv_y0;
  853|       |
  854|   308k|                   ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
  855|   308k|                ih264d_fill_pred_info(i2_mv,u1_mb_partw,u1_mb_parth,u4_sub_mb_num,PRED_L0 | PRED_L1,
  ------------------
  |  |  483|   308k|#define PRED_L0   1
  ------------------
                              ih264d_fill_pred_info(i2_mv,u1_mb_partw,u1_mb_parth,u4_sub_mb_num,PRED_L0 | PRED_L1,
  ------------------
  |  |  484|   308k|#define PRED_L1   2
  ------------------
  856|   308k|                                ps_pred_pkd,ps_pic_buff0->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts,
  857|   308k|                                ps_pic_buff0->u1_pic_type);
  858|   308k|                ps_dec->u4_pred_info_pkd_idx++;
  859|   308k|                ps_cur_mb_info->u1_num_pred_parts++;
  860|       |
  861|       |
  862|   308k|                }
  863|   308k|                {
  864|   308k|                   pred_info_pkd_t *ps_pred_pkd;
  865|   308k|                   WORD16 i2_mv[2];
  866|   308k|                   WORD8 i1_ref_idx= 0;
  867|       |
  868|   308k|                   i2_mv[0] = i2_mv_x1;
  869|   308k|                   i2_mv[1] = i2_mv_y1;
  870|       |
  871|   308k|                   ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
  872|   308k|                ih264d_fill_pred_info(i2_mv,u1_mb_partw,u1_mb_parth,u4_sub_mb_num,PRED_L0 | PRED_L1,
  ------------------
  |  |  483|   308k|#define PRED_L0   1
  ------------------
                              ih264d_fill_pred_info(i2_mv,u1_mb_partw,u1_mb_parth,u4_sub_mb_num,PRED_L0 | PRED_L1,
  ------------------
  |  |  484|   308k|#define PRED_L1   2
  ------------------
  873|   308k|                                ps_pred_pkd,ps_pic_buff1->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts,
  874|   308k|                                ps_pic_buff1->u1_pic_type);
  875|   308k|                ps_dec->u4_pred_info_pkd_idx++;
  876|   308k|                ps_cur_mb_info->u1_num_pred_parts++;
  877|       |
  878|       |
  879|   308k|                }
  880|       |
  881|       |                /* Replication optimisation */
  882|   308k|                s_temp_mv_pred.i2_mv[0] = i2_mv_x0;
  883|   308k|                s_temp_mv_pred.i2_mv[1] = i2_mv_y0;
  884|   308k|                s_temp_mv_pred.i2_mv[2] = i2_mv_x1;
  885|   308k|                s_temp_mv_pred.i2_mv[3] = i2_mv_y1;
  886|   308k|                s_temp_mv_pred.i1_ref_frame[0] = u1_ref_idx0;
  887|   308k|                s_temp_mv_pred.i1_ref_frame[1] = 0;
  888|   308k|                s_temp_mv_pred.u1_col_ref_pic_idx = ps_pic_buff0->u1_mv_buf_id;
  889|   308k|                s_temp_mv_pred.u1_pic_type = ps_pic_buff0->u1_pic_type;
  890|   308k|                ps_mv = ps_dec->ps_mv_cur + (u4_mb_num << 4) + u4_sub_mb_num;
  891|       |
  892|   308k|                {
  893|   308k|                    WORD16 i2_mv_x = 0, i2_mv_y = 0;
  894|   308k|                    UWORD8 u1_packed_mb_sub_mb_mode =
  895|   308k|                                    sub_partition ? (s_mvdirect.i1_partitionsize[i]) : ((s_mvdirect.i1_partitionsize[i])
  ------------------
  |  Branch (895:37): [True: 15.7k, False: 293k]
  ------------------
  896|   293k|                                                                    << 2);
  897|       |
  898|   308k|                    if(c_refFrm0 >= 0)
  ------------------
  |  Branch (898:24): [True: 305k, False: 3.80k]
  ------------------
  899|   305k|                    {
  900|   305k|                        i2_mv_x = i2_mv_x0;
  901|   305k|                        i2_mv_y = i2_mv_y0;
  902|   305k|                    }
  903|  3.80k|                    else
  904|  3.80k|                    {
  905|  3.80k|                        i2_mv_x = i2_mv_x1;
  906|  3.80k|                        i2_mv_y = i2_mv_y1;
  907|  3.80k|                    }
  908|       |
  909|   308k|                    u1_colz =
  910|   308k|                                    (ps_cur_mb_info->u1_mb_field_decodingflag << 1)
  911|   308k|                                                    | ((u1_ref_idx0 == 0)
  ------------------
  |  Branch (911:56): [True: 247k, False: 61.6k]
  ------------------
  912|   247k|                                                                    && (ABS(i2_mv_x)
  ------------------
  |  |  100|   247k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 5.13k, False: 242k]
  |  |  ------------------
  ------------------
  |  Branch (912:72): [True: 232k, False: 14.2k]
  ------------------
  913|   247k|                                                                                    <= 1)
  914|   232k|                                                                    && (ABS(i2_mv_y)
  ------------------
  |  |  100|   232k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 2.99k, False: 229k]
  |  |  ------------------
  ------------------
  |  Branch (914:72): [True: 225k, False: 7.14k]
  ------------------
  915|   232k|                                                                                    <= 1));
  916|   308k|                    u1_colz |= (u1_packed_mb_sub_mb_mode << 4);
  917|   308k|                }
  918|   308k|                ih264d_rep_mv_colz(ps_dec, &s_temp_mv_pred, ps_mv, u4_sub_mb_num,
  919|   308k|                                   u1_colz, u1_mb_parth, u1_mb_partw);
  920|   308k|            }
  921|   308k|        }
  922|   308k|    }
  923|       |    /* return value set to UWORD8 to make it homogeneous  */
  924|       |    /* with decodespatialdirect                           */
  925|   292k|    return OK;
  ------------------
  |  |  114|   292k|#define OK        0
  ------------------
  926|   292k|}
ih264d_init_ref_idx_lx_b:
 1227|  42.4k|{
 1228|  42.4k|    struct pic_buffer_t *ps_ref_pic_buf_lx;
 1229|  42.4k|    dpb_manager_t *ps_dpb_mgr;
 1230|  42.4k|    struct dpb_info_t *ps_next_dpb;
 1231|  42.4k|    WORD32 i_cur_poc, i_max_st_poc, i_min_st_poc, i_ref_poc, i_temp_poc;
 1232|  42.4k|    WORD8 i, j;
 1233|  42.4k|    UWORD8 u1_max_lt_index, u1_min_lt_index;
 1234|  42.4k|    UWORD32 u4_lt_index;
 1235|  42.4k|    WORD32 i_cur_idx;
 1236|  42.4k|    UWORD8 u1_field_pic_flag;
 1237|  42.4k|    dec_slice_params_t *ps_cur_slice;
 1238|  42.4k|    UWORD8 u1_L0, u1_L1;
 1239|  42.4k|    UWORD8 u1_num_short_term_bufs;
 1240|  42.4k|    UWORD8 u1_max_ref_idx_l0, u1_max_ref_idx_l1;
 1241|  42.4k|    struct pic_buffer_t *aps_st_pic_bufs[2 * MAX_REF_BUFS] = {NULL};
 1242|  42.4k|    ps_cur_slice = ps_dec->ps_cur_slice;
 1243|  42.4k|    u1_field_pic_flag = ps_cur_slice->u1_field_pic_flag;
 1244|  42.4k|    u1_max_ref_idx_l0 = ps_cur_slice->u1_num_ref_idx_lx_active[0]
 1245|  42.4k|                    << u1_field_pic_flag;
 1246|  42.4k|    u1_max_ref_idx_l1 = ps_cur_slice->u1_num_ref_idx_lx_active[1]
 1247|  42.4k|                    << u1_field_pic_flag;
 1248|       |
 1249|  42.4k|    ps_dpb_mgr = ps_dec->ps_dpb_mgr;
 1250|       |    /* Get the current POC */
 1251|  42.4k|    i_cur_poc = ps_dec->ps_cur_pic->i4_poc;
 1252|       |
 1253|       |    /* Get MaxStPOC,MinStPOC,MaxLt,MinLt */
 1254|  42.4k|    i_max_st_poc = i_cur_poc;
 1255|  42.4k|    i_min_st_poc = i_cur_poc;
 1256|  42.4k|    u1_max_lt_index = MAX_REF_BUFS + 1;
  ------------------
  |  |   75|  42.4k|#define MAX_REF_BUFS    32
  ------------------
 1257|  42.4k|    u1_min_lt_index = MAX_REF_BUFS + 1;
  ------------------
  |  |   75|  42.4k|#define MAX_REF_BUFS    32
  ------------------
 1258|       |    /* Start from ST head */
 1259|  42.4k|    ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head;
 1260|  78.0k|    for(i = 0; i < ps_dpb_mgr->u1_num_st_ref_bufs; i++)
  ------------------
  |  Branch (1260:16): [True: 35.5k, False: 42.4k]
  ------------------
 1261|  35.5k|    {
 1262|  35.5k|        i_ref_poc = ps_next_dpb->ps_pic_buf->i4_poc;
 1263|  35.5k|        if(i_ref_poc < i_cur_poc)
  ------------------
  |  Branch (1263:12): [True: 10.5k, False: 25.0k]
  ------------------
 1264|  10.5k|        {
 1265|       |            /* RefPic Buf POC is before Current POC in display order */
 1266|  10.5k|            i_min_st_poc = MIN(i_min_st_poc, i_ref_poc);
  ------------------
  |  |   61|  10.5k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 1.62k, False: 8.87k]
  |  |  ------------------
  ------------------
 1267|  10.5k|        }
 1268|  25.0k|        else
 1269|  25.0k|        {
 1270|       |            /* RefPic Buf POC is after Current POC in display order */
 1271|  25.0k|            i_max_st_poc = MAX(i_max_st_poc, i_ref_poc);
  ------------------
  |  |   60|  25.0k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 1.73k, False: 23.3k]
  |  |  ------------------
  ------------------
 1272|  25.0k|        }
 1273|       |
 1274|       |        /* Chase the next link */
 1275|  35.5k|        ps_next_dpb = ps_next_dpb->ps_prev_short;
 1276|  35.5k|    }
 1277|       |
 1278|       |    /* Sort ST ref pocs in ascending order */
 1279|  42.4k|    ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head;
 1280|  78.0k|    for (j = 0; j < ps_dpb_mgr->u1_num_st_ref_bufs; j++)
  ------------------
  |  Branch (1280:17): [True: 35.5k, False: 42.4k]
  ------------------
 1281|  35.5k|    {
 1282|  35.5k|        aps_st_pic_bufs[j] = ps_next_dpb->ps_pic_buf;
 1283|  35.5k|        ps_next_dpb = ps_next_dpb->ps_prev_short;
 1284|  35.5k|    }
 1285|  42.4k|    qsort(aps_st_pic_bufs, ps_dpb_mgr->u1_num_st_ref_bufs,
 1286|  42.4k|        sizeof(aps_st_pic_bufs[0]), poc_compare);
 1287|       |
 1288|       |    /* Start from LT head */
 1289|  42.4k|    ps_next_dpb = ps_dpb_mgr->ps_dpb_ht_head;
 1290|  42.4k|    if(ps_next_dpb)
  ------------------
  |  Branch (1290:8): [True: 4.60k, False: 37.8k]
  ------------------
 1291|  4.60k|    {
 1292|  4.60k|        u1_max_lt_index = ps_next_dpb->u1_lt_idx;
 1293|  4.60k|        u1_min_lt_index = ps_next_dpb->u1_lt_idx;
 1294|  4.60k|    }
 1295|  47.8k|    for(i = 0; i < ps_dpb_mgr->u1_num_lt_ref_bufs; i++)
  ------------------
  |  Branch (1295:16): [True: 5.40k, False: 42.4k]
  ------------------
 1296|  5.40k|    {
 1297|  5.40k|        u4_lt_index = ps_next_dpb->u1_lt_idx;
 1298|  5.40k|        u1_max_lt_index = (UWORD8)(MAX(u1_max_lt_index, u4_lt_index));
  ------------------
  |  |   60|  5.40k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 0, False: 5.40k]
  |  |  ------------------
  ------------------
 1299|  5.40k|        u1_min_lt_index = (UWORD8)(MIN(u1_min_lt_index, u4_lt_index));
  ------------------
  |  |   61|  5.40k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 808, False: 4.60k]
  |  |  ------------------
  ------------------
 1300|       |
 1301|       |        /* Chase the next link */
 1302|  5.40k|        ps_next_dpb = ps_next_dpb->ps_prev_long;
 1303|  5.40k|    }
 1304|       |
 1305|       |    /* 1. Initialize refIdxL0 */
 1306|  42.4k|    u1_L0 = 0;
 1307|  42.4k|    i_temp_poc = i_cur_poc;
 1308|  42.4k|    if(u1_field_pic_flag)
  ------------------
  |  Branch (1308:8): [True: 0, False: 42.4k]
  ------------------
 1309|      0|    {
 1310|      0|        ps_ref_pic_buf_lx = ps_dpb_mgr->ps_init_dpb[0][0];
 1311|      0|        ps_ref_pic_buf_lx += MAX_REF_BUFS;
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
 1312|      0|    }
 1313|  42.4k|    else
 1314|  42.4k|    {
 1315|  42.4k|        ps_ref_pic_buf_lx = ps_dpb_mgr->ps_init_dpb[0][0];
 1316|       |        /* Avoid integer overflow while decrementing by one */
 1317|  42.4k|        if (i_temp_poc > INT32_MIN)
  ------------------
  |  Branch (1317:13): [True: 42.3k, False: 69]
  ------------------
 1318|  42.3k|            i_temp_poc--;
 1319|  42.4k|    }
 1320|       |
 1321|  42.4k|    i_cur_idx = -1;
 1322|  78.0k|    for(j = 0; j < ps_dpb_mgr->u1_num_st_ref_bufs; j++)
  ------------------
  |  Branch (1322:16): [True: 35.5k, False: 42.4k]
  ------------------
 1323|  35.5k|    {
 1324|  35.5k|        if (NULL == aps_st_pic_bufs[j])
  ------------------
  |  Branch (1324:13): [True: 0, False: 35.5k]
  ------------------
 1325|      0|        {
 1326|      0|            break;
 1327|      0|        }
 1328|  35.5k|        if (aps_st_pic_bufs[j]->i4_poc <= i_temp_poc)
  ------------------
  |  Branch (1328:13): [True: 10.5k, False: 25.0k]
  ------------------
 1329|  10.5k|        {
 1330|  10.5k|            i_cur_idx = j;
 1331|  10.5k|        }
 1332|  35.5k|    }
 1333|       |    /* Arrange all short term buffers in output order as given by POC */
 1334|       |    /* 1.1 Arrange POC's less than CurrPOC in the descending POC order starting
 1335|       |     from (CurrPOC - 1)*/
 1336|  52.9k|    for(j = i_cur_idx; j >= 0; j--)
  ------------------
  |  Branch (1336:24): [True: 10.5k, False: 42.4k]
  ------------------
 1337|  10.5k|    {
 1338|  10.5k|        if(aps_st_pic_bufs[j])
  ------------------
  |  Branch (1338:12): [True: 10.5k, False: 0]
  ------------------
 1339|  10.5k|        {
 1340|       |            /* Copy info in pic buffer */
 1341|  10.5k|            ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_buf_lx,
 1342|  10.5k|                                               aps_st_pic_bufs[j]);
 1343|  10.5k|            ps_ref_pic_buf_lx++;
 1344|  10.5k|            u1_L0++;
 1345|  10.5k|        }
 1346|  10.5k|    }
 1347|       |
 1348|       |    /* 1.2. Arrange POC's more than CurrPOC in the ascending POC order starting
 1349|       |     from (CurrPOC + 1)*/
 1350|  67.5k|    for(j = i_cur_idx + 1; j < ps_dpb_mgr->u1_num_st_ref_bufs; j++)
  ------------------
  |  Branch (1350:28): [True: 25.0k, False: 42.4k]
  ------------------
 1351|  25.0k|    {
 1352|  25.0k|        if(aps_st_pic_bufs[j])
  ------------------
  |  Branch (1352:12): [True: 25.0k, False: 0]
  ------------------
 1353|  25.0k|        {
 1354|  25.0k|            ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_buf_lx,
 1355|  25.0k|                                               aps_st_pic_bufs[j]);
 1356|  25.0k|            ps_ref_pic_buf_lx++;
 1357|  25.0k|            u1_L0++;
 1358|  25.0k|        }
 1359|  25.0k|    }
 1360|       |
 1361|       |    /* 1.3 Arrange all Long term buffers in ascending order, in LongtermIndex */
 1362|       |    /* Start from ST head */
 1363|       |
 1364|  42.4k|    u1_num_short_term_bufs = u1_L0;
 1365|  86.4k|    for(u4_lt_index = u1_min_lt_index; u4_lt_index <= u1_max_lt_index; u4_lt_index++)
  ------------------
  |  Branch (1365:40): [True: 43.9k, False: 42.4k]
  ------------------
 1366|  43.9k|    {
 1367|  43.9k|        ps_next_dpb = ps_dpb_mgr->ps_dpb_ht_head;
 1368|  47.0k|        for(i = 0; i < ps_dpb_mgr->u1_num_lt_ref_bufs; i++)
  ------------------
  |  Branch (1368:20): [True: 8.44k, False: 38.5k]
  ------------------
 1369|  8.44k|        {
 1370|  8.44k|            if(ps_next_dpb->u1_lt_idx == u4_lt_index)
  ------------------
  |  Branch (1370:16): [True: 5.40k, False: 3.03k]
  ------------------
 1371|  5.40k|            {
 1372|  5.40k|                ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_buf_lx,
 1373|  5.40k|                                                   ps_next_dpb->ps_pic_buf);
 1374|  5.40k|                ps_ref_pic_buf_lx->u1_long_term_pic_num =
 1375|  5.40k|                                ps_ref_pic_buf_lx->u1_long_term_frm_idx;
 1376|       |
 1377|  5.40k|                ps_ref_pic_buf_lx++;
 1378|  5.40k|                u1_L0++;
 1379|  5.40k|                break;
 1380|  5.40k|            }
 1381|  3.03k|            ps_next_dpb = ps_next_dpb->ps_prev_long;
 1382|  3.03k|        }
 1383|  43.9k|    }
 1384|       |
 1385|  42.4k|    if(u1_field_pic_flag)
  ------------------
  |  Branch (1385:8): [True: 0, False: 42.4k]
  ------------------
 1386|      0|    {
 1387|       |        /* Initialize the rest of the entries in the */
 1388|       |        /* reference list to handle of errors        */
 1389|      0|        {
 1390|      0|            UWORD8 u1_i;
 1391|      0|            pic_buffer_t ref_pic;
 1392|       |
 1393|      0|            ref_pic = *(ps_dpb_mgr->ps_init_dpb[0][0] + MAX_REF_BUFS);
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
 1394|       |
 1395|      0|            if(NULL == ref_pic.pu1_buf1)
  ------------------
  |  Branch (1395:16): [True: 0, False: 0]
  ------------------
 1396|      0|            {
 1397|      0|                ref_pic = *ps_dec->ps_cur_pic;
 1398|      0|            }
 1399|      0|            for(u1_i = u1_L0; u1_i < u1_max_ref_idx_l0; u1_i++)
  ------------------
  |  Branch (1399:31): [True: 0, False: 0]
  ------------------
 1400|      0|            {
 1401|      0|                *ps_ref_pic_buf_lx = ref_pic;
 1402|      0|                ps_ref_pic_buf_lx++;
 1403|      0|            }
 1404|      0|        }
 1405|      0|        ih264d_convert_frm_to_fld_list(
 1406|      0|                        ps_dpb_mgr->ps_init_dpb[0][0] + MAX_REF_BUFS, &u1_L0,
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
 1407|      0|                        ps_dec, u1_num_short_term_bufs);
 1408|       |
 1409|      0|        ps_ref_pic_buf_lx = ps_dpb_mgr->ps_init_dpb[0][0] + u1_L0;
 1410|      0|    }
 1411|       |
 1412|  42.4k|    ps_dec->ps_cur_slice->u1_initial_list_size[0] = u1_L0;
 1413|       |
 1414|       |    /* Initialize the rest of the entries in the */
 1415|       |    /* reference list to handle of errors        */
 1416|  42.4k|    {
 1417|  42.4k|        UWORD8 u1_i;
 1418|  42.4k|        pic_buffer_t ref_pic;
 1419|       |
 1420|  42.4k|        ref_pic = *(ps_dpb_mgr->ps_init_dpb[0][0]);
 1421|       |
 1422|  42.4k|        if(NULL == ref_pic.pu1_buf1)
  ------------------
  |  Branch (1422:12): [True: 0, False: 42.4k]
  ------------------
 1423|      0|        {
 1424|      0|            ref_pic = *ps_dec->ps_cur_pic;
 1425|      0|        }
 1426|   105k|        for(u1_i = u1_L0; u1_i < u1_max_ref_idx_l0; u1_i++)
  ------------------
  |  Branch (1426:27): [True: 63.1k, False: 42.4k]
  ------------------
 1427|  63.1k|        {
 1428|  63.1k|            *ps_ref_pic_buf_lx = ref_pic;
 1429|  63.1k|            ps_ref_pic_buf_lx++;
 1430|  63.1k|        }
 1431|  42.4k|    }
 1432|  42.4k|    {
 1433|       |        /* 2. Initialize refIdxL1 */
 1434|  42.4k|        u1_L1 = 0;
 1435|  42.4k|        if(u1_field_pic_flag)
  ------------------
  |  Branch (1435:12): [True: 0, False: 42.4k]
  ------------------
 1436|      0|        {
 1437|      0|            ps_ref_pic_buf_lx = ps_dpb_mgr->ps_init_dpb[1][0] + MAX_REF_BUFS;
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
 1438|      0|        }
 1439|  42.4k|        else
 1440|  42.4k|        {
 1441|  42.4k|            ps_ref_pic_buf_lx = ps_dpb_mgr->ps_init_dpb[1][0];
 1442|  42.4k|        }
 1443|       |
 1444|       |        /* 2.1. Arrange POC's more than CurrPOC in the ascending POC order starting
 1445|       |         from (CurrPOC + 1)*/
 1446|  67.5k|        for(j = i_cur_idx + 1; j < ps_dpb_mgr->u1_num_st_ref_bufs; j++)
  ------------------
  |  Branch (1446:32): [True: 25.0k, False: 42.4k]
  ------------------
 1447|  25.0k|        {
 1448|  25.0k|            if(aps_st_pic_bufs[j])
  ------------------
  |  Branch (1448:16): [True: 25.0k, False: 0]
  ------------------
 1449|  25.0k|            {
 1450|       |                /* Start from ST head */
 1451|  25.0k|                ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_buf_lx,
 1452|  25.0k|                                                   aps_st_pic_bufs[j]);
 1453|  25.0k|                ps_ref_pic_buf_lx++;
 1454|  25.0k|                u1_L1++;
 1455|  25.0k|            }
 1456|  25.0k|        }
 1457|       |
 1458|       |        /* Arrange all short term buffers in output order as given by POC */
 1459|       |        /* 2.2 Arrange POC's less than CurrPOC in the descending POC order starting
 1460|       |         from (CurrPOC - 1)*/
 1461|  52.9k|        for(j = i_cur_idx; j >= 0; j--)
  ------------------
  |  Branch (1461:28): [True: 10.5k, False: 42.4k]
  ------------------
 1462|  10.5k|        {
 1463|  10.5k|            if(aps_st_pic_bufs[j])
  ------------------
  |  Branch (1463:16): [True: 10.5k, False: 0]
  ------------------
 1464|  10.5k|            {
 1465|  10.5k|                ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_buf_lx,
 1466|  10.5k|                                                   aps_st_pic_bufs[j]);
 1467|  10.5k|                ps_ref_pic_buf_lx++;
 1468|  10.5k|                u1_L1++;
 1469|  10.5k|            }
 1470|  10.5k|        }
 1471|       |
 1472|       |        /* 2.3 Arrange all Long term buffers in ascending order, in LongtermIndex */
 1473|       |        /* Start from ST head */
 1474|  42.4k|        u1_num_short_term_bufs = u1_L1;
 1475|       |
 1476|  86.4k|        for(u4_lt_index = u1_min_lt_index; u4_lt_index <= u1_max_lt_index;
  ------------------
  |  Branch (1476:44): [True: 43.9k, False: 42.4k]
  ------------------
 1477|  43.9k|                        u4_lt_index++)
 1478|  43.9k|        {
 1479|  43.9k|            ps_next_dpb = ps_dpb_mgr->ps_dpb_ht_head;
 1480|  47.0k|            for(i = 0; i < ps_dpb_mgr->u1_num_lt_ref_bufs; i++)
  ------------------
  |  Branch (1480:24): [True: 8.44k, False: 38.5k]
  ------------------
 1481|  8.44k|            {
 1482|  8.44k|                if(ps_next_dpb->u1_lt_idx == u4_lt_index)
  ------------------
  |  Branch (1482:20): [True: 5.40k, False: 3.03k]
  ------------------
 1483|  5.40k|                {
 1484|  5.40k|                    ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_buf_lx,
 1485|  5.40k|                                                       ps_next_dpb->ps_pic_buf);
 1486|  5.40k|                    ps_ref_pic_buf_lx->u1_long_term_pic_num =
 1487|  5.40k|                                    ps_ref_pic_buf_lx->u1_long_term_frm_idx;
 1488|  5.40k|                    ps_ref_pic_buf_lx++;
 1489|  5.40k|                    u1_L1++;
 1490|  5.40k|                    break;
 1491|  5.40k|                }
 1492|  3.03k|                ps_next_dpb = ps_next_dpb->ps_prev_long;
 1493|  3.03k|            }
 1494|  43.9k|        }
 1495|       |
 1496|  42.4k|        if(u1_field_pic_flag)
  ------------------
  |  Branch (1496:12): [True: 0, False: 42.4k]
  ------------------
 1497|      0|        {
 1498|       |            /* Initialize the rest of the entries in the */
 1499|       |            /* reference list to handle of errors        */
 1500|      0|            {
 1501|      0|                UWORD8 u1_i;
 1502|      0|                pic_buffer_t ref_pic;
 1503|       |
 1504|      0|                ref_pic = *(ps_dpb_mgr->ps_init_dpb[0][0] + MAX_REF_BUFS);
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
 1505|       |
 1506|      0|                if(NULL == ref_pic.pu1_buf1)
  ------------------
  |  Branch (1506:20): [True: 0, False: 0]
  ------------------
 1507|      0|                {
 1508|      0|                    ref_pic = *ps_dec->ps_cur_pic;
 1509|      0|                }
 1510|      0|                for(u1_i = u1_L1; u1_i < u1_max_ref_idx_l1; u1_i++)
  ------------------
  |  Branch (1510:35): [True: 0, False: 0]
  ------------------
 1511|      0|                {
 1512|      0|                    *ps_ref_pic_buf_lx = ref_pic;
 1513|      0|                    ps_ref_pic_buf_lx++;
 1514|      0|                }
 1515|      0|            }
 1516|       |
 1517|      0|            ih264d_convert_frm_to_fld_list(
 1518|      0|                            ps_dpb_mgr->ps_init_dpb[1][0] + MAX_REF_BUFS,
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
 1519|      0|                            &u1_L1, ps_dec, u1_num_short_term_bufs);
 1520|      0|            ps_ref_pic_buf_lx = ps_dpb_mgr->ps_init_dpb[1][0] + u1_L1;
 1521|      0|        }
 1522|       |
 1523|  42.4k|        ps_dec->ps_cur_slice->u1_initial_list_size[1] = u1_L1;
 1524|       |
 1525|       |        /* Initialize the rest of the entries in the */
 1526|       |        /* reference list to handle of errors        */
 1527|  42.4k|        {
 1528|  42.4k|            UWORD8 u1_i;
 1529|  42.4k|            pic_buffer_t ref_pic;
 1530|       |
 1531|  42.4k|            ref_pic = *(ps_dpb_mgr->ps_init_dpb[0][0]);
 1532|       |
 1533|  42.4k|            if(NULL == ref_pic.pu1_buf1)
  ------------------
  |  Branch (1533:16): [True: 0, False: 42.4k]
  ------------------
 1534|      0|            {
 1535|      0|                ref_pic = *ps_dec->ps_cur_pic;
 1536|      0|            }
 1537|   101k|            for(u1_i = u1_L1; u1_i < u1_max_ref_idx_l1; u1_i++)
  ------------------
  |  Branch (1537:31): [True: 59.3k, False: 42.4k]
  ------------------
 1538|  59.3k|            {
 1539|  59.3k|                *ps_ref_pic_buf_lx = ref_pic;
 1540|  59.3k|                ps_ref_pic_buf_lx++;
 1541|  59.3k|            }
 1542|  42.4k|        }
 1543|       |
 1544|       |        /* If list0 and list 1 ebtries are same then swap the 0th and 1st entry */
 1545|       |        /* of list 1                                                            */
 1546|  42.4k|        {
 1547|  42.4k|            struct pic_buffer_t *ps_ref_pic1_buf_l0, *ps_ref_pic1_buf_l1;
 1548|  42.4k|            struct pic_buffer_t s_ref_pic1_buf_temp;
 1549|       |
 1550|  42.4k|            ps_ref_pic1_buf_l0 = ps_dpb_mgr->ps_init_dpb[0][0];
 1551|  42.4k|            ps_ref_pic1_buf_l1 = ps_dpb_mgr->ps_init_dpb[1][0];
 1552|       |
 1553|  42.4k|            if((u1_L0 == u1_L1) && (u1_L0 > 1))
  ------------------
  |  Branch (1553:16): [True: 42.4k, False: 0]
  |  Branch (1553:36): [True: 5.30k, False: 37.1k]
  ------------------
 1554|  5.30k|            {
 1555|  5.30k|                WORD32 i_index, i_swap;
 1556|       |
 1557|  5.30k|                i_swap = 1;
 1558|       |
 1559|  15.3k|                for(i_index = 0; i_index < u1_L0; i_index++)
  ------------------
  |  Branch (1559:34): [True: 11.7k, False: 3.61k]
  ------------------
 1560|  11.7k|                {
 1561|  11.7k|                    if((ps_ref_pic1_buf_l0[i_index]).pu1_buf1
  ------------------
  |  Branch (1561:24): [True: 1.69k, False: 10.0k]
  ------------------
 1562|  11.7k|                                    != (ps_ref_pic1_buf_l1[i_index]).pu1_buf1)
 1563|  1.69k|                    {
 1564|  1.69k|                        i_swap = 0;
 1565|  1.69k|                        break;
 1566|  1.69k|                    }
 1567|  11.7k|                }
 1568|  5.30k|                if(1 == i_swap)
  ------------------
  |  Branch (1568:20): [True: 3.61k, False: 1.69k]
  ------------------
 1569|  3.61k|                {
 1570|  3.61k|                    memcpy(&s_ref_pic1_buf_temp, &ps_ref_pic1_buf_l1[1],
 1571|  3.61k|                           sizeof(struct pic_buffer_t));
 1572|  3.61k|                    memcpy(&ps_ref_pic1_buf_l1[1], &ps_ref_pic1_buf_l1[0],
 1573|  3.61k|                           sizeof(struct pic_buffer_t));
 1574|  3.61k|                    memcpy(&ps_ref_pic1_buf_l1[0], &s_ref_pic1_buf_temp,
 1575|  3.61k|                           sizeof(struct pic_buffer_t));
 1576|  3.61k|                }
 1577|  5.30k|            }
 1578|  42.4k|        }
 1579|  42.4k|    }
 1580|  42.4k|}
ih264d_one_to_one:
 1604|   895k|{
 1605|   895k|    UWORD8 *pu1_col_zero_flag_start, u1_col_mb_pred_mode, u1_num_blks, u1_sub_mb_num;
 1606|   895k|    UWORD8 u1_init_colzero_flag;
 1607|   895k|    UNUSED(ps_cur_mb_info);
  ------------------
  |  |   45|   895k|#define UNUSED(x) ((void)(x))
  ------------------
 1608|   895k|    pu1_col_zero_flag_start = ps_col_pic->pu1_col_zero_flag + u2_sub_mb_ofst;
 1609|   895k|    u1_col_mb_pred_mode = pu1_col_zero_flag_start[ps_dec->u1_sub_mb_num];
 1610|   895k|    u1_init_colzero_flag = u1_col_mb_pred_mode & 1;
 1611|   895k|    u1_col_mb_pred_mode >>= 6;
 1612|   895k|    ps_direct->u1_vert_mv_scale = ONE_TO_ONE;
  ------------------
  |  |   47|   895k|#define ONE_TO_ONE    0
  ------------------
 1613|   895k|    ps_direct->u1_col_zeroflag_change = 0;
 1614|       |
 1615|   895k|    if(u1_wd_x == MB_SIZE)
  ------------------
  |  |  554|   895k|#define MB_SIZE             16
  ------------------
  |  Branch (1615:8): [True: 881k, False: 13.8k]
  ------------------
 1616|   881k|    {
 1617|   881k|        ps_dec->u1_currB_type = (!!u1_col_mb_pred_mode);
 1618|   881k|        if(u1_col_mb_pred_mode == PRED_16x16)
  ------------------
  |  |  450|   881k|#define PRED_16x16  0
  ------------------
  |  Branch (1618:12): [True: 862k, False: 18.7k]
  ------------------
 1619|   862k|        {
 1620|   862k|            ps_direct->i1_num_partitions = 1;
 1621|   862k|            ps_direct->i4_mv_indices[0] = u2_sub_mb_ofst;
 1622|   862k|            ps_direct->i1_submb_num[0] = 0;
 1623|   862k|            ps_direct->i1_partitionsize[0] = PRED_16x16;
  ------------------
  |  |  450|   862k|#define PRED_16x16  0
  ------------------
 1624|       |
 1625|   862k|            return;
 1626|   862k|        }
 1627|  18.7k|        else if(u1_col_mb_pred_mode < PRED_8x8)
  ------------------
  |  |  453|  18.7k|#define PRED_8x8    3
  ------------------
  |  Branch (1627:17): [True: 13.6k, False: 5.03k]
  ------------------
 1628|  13.6k|        {
 1629|  13.6k|            ps_direct->i1_num_partitions = 2;
 1630|  13.6k|            ps_direct->i4_mv_indices[0] = u2_sub_mb_ofst;
 1631|  13.6k|            ps_direct->i1_submb_num[0] = 0;
 1632|  13.6k|            ps_direct->i1_partitionsize[0] = u1_col_mb_pred_mode;
 1633|  13.6k|            u1_sub_mb_num = (u1_col_mb_pred_mode == PRED_16x8) ? 8 : 2;
  ------------------
  |  |  451|  13.6k|#define PRED_16x8   1
  ------------------
  |  Branch (1633:29): [True: 8.09k, False: 5.59k]
  ------------------
 1634|  13.6k|            ps_direct->i1_submb_num[1] = u1_sub_mb_num;
 1635|  13.6k|            ps_direct->i4_mv_indices[1] = u2_sub_mb_ofst
 1636|  13.6k|                            + ps_direct->i1_submb_num[1];
 1637|  13.6k|            ps_direct->i1_partitionsize[1] = u1_col_mb_pred_mode;
 1638|  13.6k|            if((pu1_col_zero_flag_start[u1_sub_mb_num] & 1) != u1_init_colzero_flag)
  ------------------
  |  Branch (1638:16): [True: 4.34k, False: 9.34k]
  ------------------
 1639|  4.34k|                ps_direct->u1_col_zeroflag_change = 1;
 1640|  13.6k|            return;
 1641|  13.6k|        }
 1642|  5.03k|        else
 1643|  5.03k|        {
 1644|  5.03k|            u1_num_blks = 4;
 1645|  5.03k|        }
 1646|   881k|    }
 1647|  13.8k|    else
 1648|  13.8k|    {
 1649|  13.8k|        u1_num_blks = 1;
 1650|  13.8k|    }
 1651|       |
 1652|  18.8k|    {
 1653|  18.8k|        const UWORD8 *pu1_top_lt_mb_part_idx;
 1654|  18.8k|        UWORD8 u1_col_sub_mb_pred_mode, uc_blk, u1_sub_blk, u1_submb_col = 0;
 1655|  18.8k|        UWORD8 u1_num_sub_blks, uc_direct8x8inf, *pu1_col_zero_flag, u1_sub_mb_num;
 1656|  18.8k|        const UWORD8 *pu1_num_sub_mb_part =
 1657|  18.8k|                        (const UWORD8 *)gau1_ih264d_num_submb_part;
 1658|  18.8k|        UWORD8 i1_num_partitions = 0, partition_size;
 1659|  18.8k|        WORD32 mv_index;
 1660|  18.8k|        const UWORD8 *pu1_top_lt_sub_mb_idx = gau1_ih264d_submb_indx_mod_sp_drct;
 1661|       |
 1662|  18.8k|        u1_sub_mb_num = ps_dec->u1_sub_mb_num;
 1663|  18.8k|        uc_direct8x8inf = ps_dec->ps_cur_slice->u1_direct_8x8_inference_flag;
 1664|  18.8k|        pu1_top_lt_mb_part_idx = gau1_ih264d_top_left_mb_part_indx_mod
 1665|  18.8k|                        + (PRED_8x8 << 1) + 1;
  ------------------
  |  |  453|  18.8k|#define PRED_8x8    3
  ------------------
 1666|       |
 1667|  52.8k|        for(uc_blk = 0; uc_blk < u1_num_blks; uc_blk++)
  ------------------
  |  Branch (1667:25): [True: 33.9k, False: 18.8k]
  ------------------
 1668|  33.9k|        {
 1669|  33.9k|            partition_size = PRED_8x8;
  ------------------
  |  |  453|  33.9k|#define PRED_8x8    3
  ------------------
 1670|  33.9k|            pu1_top_lt_sub_mb_idx = gau1_ih264d_submb_indx_mod_sp_drct;
 1671|  33.9k|            if(uc_direct8x8inf == 1)
  ------------------
  |  Branch (1671:16): [True: 8.21k, False: 25.7k]
  ------------------
 1672|  8.21k|            {
 1673|  8.21k|                u1_submb_col = u1_sub_mb_num | (u1_sub_mb_num >> 1);
 1674|  8.21k|                mv_index = u2_sub_mb_ofst + u1_submb_col;
 1675|  8.21k|                u1_num_sub_blks = 1;
 1676|  8.21k|            }
 1677|  25.7k|            else
 1678|  25.7k|            {
 1679|       |                /* colMbPart is either 8x8, 8x4, 4x8, 4x4 */
 1680|  25.7k|                pu1_col_zero_flag = pu1_col_zero_flag_start + u1_sub_mb_num;
 1681|  25.7k|                u1_col_sub_mb_pred_mode = *pu1_col_zero_flag;
 1682|  25.7k|                u1_col_sub_mb_pred_mode = (u1_col_sub_mb_pred_mode & 0x30) >> 4;
 1683|  25.7k|                partition_size = (UWORD8)((u1_col_sub_mb_pred_mode)
 1684|  25.7k|                                | (PRED_8x8 << 2));
  ------------------
  |  |  453|  25.7k|#define PRED_8x8    3
  ------------------
 1685|  25.7k|                mv_index = u2_sub_mb_ofst + u1_sub_mb_num;
 1686|  25.7k|                pu1_top_lt_sub_mb_idx += (u1_col_sub_mb_pred_mode << 1);
 1687|  25.7k|                u1_num_sub_blks = pu1_num_sub_mb_part[u1_col_sub_mb_pred_mode];
 1688|       |
 1689|  25.7k|            }
 1690|       |
 1691|  78.5k|            for(u1_sub_blk = 0; u1_sub_blk < u1_num_sub_blks;
  ------------------
  |  Branch (1691:33): [True: 44.5k, False: 33.9k]
  ------------------
 1692|  44.5k|                            u1_sub_blk++, pu1_top_lt_sub_mb_idx++)
 1693|  44.5k|            {
 1694|  44.5k|                u1_sub_mb_num += *pu1_top_lt_sub_mb_idx;
 1695|  44.5k|                mv_index += *pu1_top_lt_sub_mb_idx;
 1696|  44.5k|                ps_direct->i4_mv_indices[i1_num_partitions] = mv_index;
 1697|  44.5k|                ps_direct->i1_submb_num[i1_num_partitions] = u1_sub_mb_num;
 1698|  44.5k|                ps_direct->i1_partitionsize[i1_num_partitions] = partition_size;
 1699|  44.5k|                i1_num_partitions++;
 1700|  44.5k|                if(!uc_direct8x8inf)
  ------------------
  |  Branch (1700:20): [True: 36.3k, False: 8.21k]
  ------------------
 1701|  36.3k|                    u1_submb_col = u1_sub_mb_num;
 1702|  44.5k|                if((pu1_col_zero_flag_start[u1_submb_col] & 1)
  ------------------
  |  Branch (1702:20): [True: 9.20k, False: 35.3k]
  ------------------
 1703|  44.5k|                                != u1_init_colzero_flag)
 1704|  9.20k|                    ps_direct->u1_col_zeroflag_change = 1;
 1705|  44.5k|            }
 1706|  33.9k|            u1_sub_mb_num = *pu1_top_lt_mb_part_idx++;
 1707|  33.9k|        }
 1708|  18.8k|        ps_direct->i1_num_partitions = i1_num_partitions;
 1709|  18.8k|    }
 1710|  18.8k|}
ih264d_cal_col_pic:
 1939|  30.2k|{
 1940|  30.2k|    struct pic_buffer_t* ps_col_pic = ps_dec->ps_col_pic;
 1941|  30.2k|    UWORD8 uc_curpictype, uc_colpictype;
 1942|  30.2k|    ps_col_pic = ps_dec->ps_ref_pic_buf_lx[1][0];
 1943|  30.2k|    uc_curpictype = (ps_dec->ps_cur_pic->u1_picturetype & 0x7);
 1944|  30.2k|    uc_colpictype = (ps_col_pic->u1_picturetype & 0x7);
 1945|  30.2k|    if(uc_curpictype == FRM_PIC)
  ------------------
  |  |  352|  30.2k|#define FRM_PIC         0x00
  ------------------
  |  Branch (1945:8): [True: 30.2k, False: 0]
  ------------------
 1946|  30.2k|    {
 1947|  30.2k|        if(uc_colpictype == FRM_PIC)
  ------------------
  |  |  352|  30.2k|#define FRM_PIC         0x00
  ------------------
  |  Branch (1947:12): [True: 30.2k, False: 0]
  ------------------
 1948|  30.2k|            ps_dec->pf_parse_mvdirect = ih264d_one_to_one;
 1949|      0|        else if(uc_colpictype == COMP_FLD_PAIR)
  ------------------
  |  |  355|      0|#define COMP_FLD_PAIR   0x03 /* TOP_FLD | BOT_FLD */
  ------------------
  |  Branch (1949:17): [True: 0, False: 0]
  ------------------
 1950|      0|        {
 1951|      0|            ps_dec->pf_parse_mvdirect = ih264d_fld_to_frm;
 1952|      0|            if(ps_col_pic->i4_top_field_order_cnt
  ------------------
  |  Branch (1952:16): [True: 0, False: 0]
  ------------------
 1953|      0|                            >= ps_col_pic->i4_bottom_field_order_cnt)
 1954|      0|            {
 1955|      0|                struct pic_buffer_t* ps_tempPic = ps_col_pic;
 1956|      0|                UWORD32 ui_half_num_of_sub_mbs = ((ps_dec->u2_pic_ht
 1957|      0|                                * ps_dec->u2_pic_wd) >> 5);
 1958|      0|                ps_col_pic = ps_dec->ps_ref_pic_buf_lx[1][MAX_REF_BUFS];
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
 1959|       |                /* memcpy ps_tempPic to ps_col_pic */
 1960|      0|                *ps_col_pic = *ps_tempPic;
 1961|      0|                ps_col_pic->pu1_buf1 = ps_tempPic->pu1_buf1
 1962|      0|                                + ps_tempPic->u2_frm_wd_y;
 1963|      0|                ps_col_pic->pu1_buf2 = ps_tempPic->pu1_buf2
 1964|      0|                                + ps_tempPic->u2_frm_wd_uv;
 1965|      0|                ps_col_pic->pu1_buf3 = ps_tempPic->pu1_buf3
 1966|      0|                                + ps_tempPic->u2_frm_wd_uv;
 1967|      0|                ps_col_pic->pu1_col_zero_flag = ps_tempPic->pu1_col_zero_flag
 1968|      0|                                + ui_half_num_of_sub_mbs;
 1969|      0|                ps_col_pic->ps_mv = ps_tempPic->ps_mv + ui_half_num_of_sub_mbs;
 1970|       |
 1971|       |
 1972|      0|                ps_col_pic->u1_pic_type = 0;/*complementary reference field pair-refering as frame */
 1973|       |
 1974|       |
 1975|       |
 1976|      0|            }
 1977|      0|        }
 1978|      0|        else
 1979|      0|        {
 1980|      0|            UWORD32 i4_error_code;
 1981|      0|            i4_error_code = ERROR_DBP_MANAGER_T;
 1982|       |//          i4_error_code |= 1<<IVD_CORRUPTEDDATA;
 1983|      0|            return i4_error_code;
 1984|      0|        }
 1985|  30.2k|    }
 1986|      0|    else if(uc_curpictype == AFRM_PIC)
  ------------------
  |  |  356|      0|#define AFRM_PIC        0x04
  ------------------
  |  Branch (1986:13): [True: 0, False: 0]
  ------------------
 1987|      0|    {
 1988|      0|        ps_dec->pf_parse_mvdirect = ih264d_fld_to_mbaff;
 1989|      0|    }
 1990|      0|    else /* must be a field*/
 1991|      0|    {
 1992|      0|        if(uc_colpictype == FRM_PIC)
  ------------------
  |  |  352|      0|#define FRM_PIC         0x00
  ------------------
  |  Branch (1992:12): [True: 0, False: 0]
  ------------------
 1993|      0|            ps_dec->pf_parse_mvdirect = ih264d_frm_to_fld;
 1994|      0|        else if(uc_colpictype == AFRM_PIC)
  ------------------
  |  |  356|      0|#define AFRM_PIC        0x04
  ------------------
  |  Branch (1994:17): [True: 0, False: 0]
  ------------------
 1995|      0|            ps_dec->pf_parse_mvdirect = ih264d_mbaff_to_fld;
 1996|      0|        else
 1997|      0|            ps_dec->pf_parse_mvdirect = ih264d_one_to_one;
 1998|      0|    }
 1999|  30.2k|    ps_dec->ps_col_pic = ps_col_pic;
 2000|  30.2k|    return OK;
  ------------------
  |  |  114|  30.2k|#define OK        0
  ------------------
 2001|  30.2k|}
ih264d_process_bslice.c:poc_compare:
 1198|  14.8k|{
 1199|  14.8k|    struct pic_buffer_t *ps_pic1 = *(struct pic_buffer_t **) pv_pic1;
 1200|  14.8k|    struct pic_buffer_t *ps_pic2 = *(struct pic_buffer_t **) pv_pic2;
 1201|  14.8k|    if (ps_pic1->i4_poc < ps_pic2->i4_poc)
  ------------------
  |  Branch (1201:9): [True: 5.95k, False: 8.90k]
  ------------------
 1202|  5.95k|    {
 1203|  5.95k|        return -1;
 1204|  5.95k|    }
 1205|  8.90k|    else if (ps_pic1->i4_poc > ps_pic2->i4_poc)
  ------------------
  |  Branch (1205:14): [True: 7.58k, False: 1.32k]
  ------------------
 1206|  7.58k|    {
 1207|  7.58k|        return 1;
 1208|  7.58k|    }
 1209|  1.32k|    else
 1210|  1.32k|    {
 1211|  1.32k|        return 0;
 1212|  1.32k|    }
 1213|  14.8k|}

ih264d_read_intra_pred_modes:
  109|  18.9k|{
  110|  18.9k|    WORD32 i4x4_luma_blk_idx = 0, i8x8_luma_blk_idx = 0;
  111|       |
  112|  18.9k|    dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm;
  113|       |
  114|  18.9k|    if(!u4_trans_form8x8)
  ------------------
  |  Branch (114:8): [True: 7.37k, False: 11.5k]
  ------------------
  115|  7.37k|    {
  116|   125k|        for(i4x4_luma_blk_idx = 0; i4x4_luma_blk_idx < 16; ++i4x4_luma_blk_idx)
  ------------------
  |  Branch (116:36): [True: 117k, False: 7.37k]
  ------------------
  117|   117k|        {
  118|   117k|            UWORD32 u4_temp;
  119|   117k|            SWITCHOFFTRACE;
  120|       |
  121|   117k|            GETBIT(u4_temp, ps_bitstrm->u4_ofst, ps_bitstrm->pu4_buffer);
  ------------------
  |  |  105|   117k|#define   GETBIT(u4_code, u4_offset, pu4_bitstream)                         \
  |  |  106|   117k|{                                                                           \
  |  |  107|   117k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  108|   117k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  109|   117k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  110|   117k|    u4_code = pu4_buf[u4_word_off] << u4_bit_off;                           \
  |  |  111|   117k|    (u4_offset)++;                                                          \
  |  |  112|   117k|    u4_code = (u4_code >> 31);                                              \
  |  |  113|   117k|}
  ------------------
  122|   117k|            *pu1_prev_intra4x4_pred_mode_flag = (UWORD8)u4_temp;
  123|   117k|            if(!(*pu1_prev_intra4x4_pred_mode_flag))
  ------------------
  |  Branch (123:16): [True: 65.7k, False: 52.2k]
  ------------------
  124|  65.7k|            {
  125|  65.7k|                GETBITS(u4_temp, ps_bitstrm->u4_ofst, ps_bitstrm->pu4_buffer, 3);
  ------------------
  |  |  120|  65.7k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  65.7k|{                                                                           \
  |  |  122|  65.7k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  65.7k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  65.7k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  65.7k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  65.7k|                                                                            \
  |  |  127|  65.7k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 64.7k, False: 1.01k]
  |  |  ------------------
  |  |  128|  65.7k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  64.7k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  65.7k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  65.7k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  65.7k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  65.7k|}                                                                           \
  ------------------
  126|       |
  127|  65.7k|                *(pu1_rem_intra4x4_pred_mode) = (UWORD8)u4_temp;
  128|  65.7k|            }
  129|       |
  130|   117k|            pu1_prev_intra4x4_pred_mode_flag++;
  131|   117k|            pu1_rem_intra4x4_pred_mode++;
  132|   117k|        }
  133|  7.37k|    }
  134|  11.5k|    else
  135|  11.5k|    {
  136|       |        /**********************************************************************/
  137|       |        /* prev_intra4x4_pred_modes to be interpreted as                      */
  138|       |        /* prev_intra8x8_pred_modes in case of transform 8x8                  */
  139|       |        /**********************************************************************/
  140|  57.8k|        for(i8x8_luma_blk_idx = 0; i8x8_luma_blk_idx < 4; i8x8_luma_blk_idx++)
  ------------------
  |  Branch (140:36): [True: 46.3k, False: 11.5k]
  ------------------
  141|  46.3k|        {
  142|  46.3k|            UWORD32 u4_temp;
  143|  46.3k|            GETBIT(u4_temp, ps_bitstrm->u4_ofst, ps_bitstrm->pu4_buffer);
  ------------------
  |  |  105|  46.3k|#define   GETBIT(u4_code, u4_offset, pu4_bitstream)                         \
  |  |  106|  46.3k|{                                                                           \
  |  |  107|  46.3k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  108|  46.3k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  109|  46.3k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  110|  46.3k|    u4_code = pu4_buf[u4_word_off] << u4_bit_off;                           \
  |  |  111|  46.3k|    (u4_offset)++;                                                          \
  |  |  112|  46.3k|    u4_code = (u4_code >> 31);                                              \
  |  |  113|  46.3k|}
  ------------------
  144|  46.3k|            *pu1_prev_intra4x4_pred_mode_flag = (UWORD8)u4_temp;
  145|  46.3k|            if(!(*pu1_prev_intra4x4_pred_mode_flag))
  ------------------
  |  Branch (145:16): [True: 9.31k, False: 36.9k]
  ------------------
  146|  9.31k|            {
  147|  9.31k|                GETBITS(u4_temp, ps_bitstrm->u4_ofst, ps_bitstrm->pu4_buffer, 3);
  ------------------
  |  |  120|  9.31k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  9.31k|{                                                                           \
  |  |  122|  9.31k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  9.31k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  9.31k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  9.31k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  9.31k|                                                                            \
  |  |  127|  9.31k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 8.68k, False: 631]
  |  |  ------------------
  |  |  128|  9.31k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  8.68k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  9.31k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  9.31k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  9.31k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  9.31k|}                                                                           \
  ------------------
  148|       |
  149|  9.31k|                (*pu1_rem_intra4x4_pred_mode) = (UWORD8)u4_temp;
  150|  9.31k|            }
  151|  46.3k|            pu1_prev_intra4x4_pred_mode_flag++;
  152|  46.3k|            pu1_rem_intra4x4_pred_mode++;
  153|  46.3k|        }
  154|  11.5k|    }
  155|  18.9k|    return (0);
  156|  18.9k|}
ih264d_unpack_coeff4x4_4x4blk:
  160|   451k|{
  161|   451k|    tu_sblk4x4_coeff_data_t *ps_tu_4x4 = (tu_sblk4x4_coeff_data_t *)ps_dec->pv_proc_tu_coeff_data;
  162|   451k|    UWORD16 u2_sig_coeff_map = ps_tu_4x4->u2_sig_coeff_map;
  163|   451k|    WORD32 idx = 0;
  164|   451k|    WORD16 *pi2_coeff_data = &ps_tu_4x4->ai2_level[0];
  165|   451k|    WORD32 dc_only_flag = 0;
  166|   451k|    WORD32 num_coeff = 0;
  167|       |
  168|   451k|    PROFILE_DISABLE_UNPACK_LUMA()
  ------------------
  |  |  112|   451k|#define PROFILE_DISABLE_UNPACK_LUMA() ;
  ------------------
  169|  1.57M|    while(u2_sig_coeff_map)
  ------------------
  |  Branch (169:11): [True: 1.12M, False: 451k]
  ------------------
  170|  1.12M|    {
  171|  1.12M|        idx = CLZ(u2_sig_coeff_map);
  172|       |
  173|  1.12M|        idx = 31 - idx;
  174|  1.12M|        RESET_BIT(u2_sig_coeff_map,idx);
  ------------------
  |  |  105|  1.12M|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  ------------------
  175|       |
  176|  1.12M|        idx = pu1_inv_scan[idx];
  177|  1.12M|        pi2_out_coeff_data[idx] = *pi2_coeff_data++;
  178|  1.12M|        num_coeff++;
  179|  1.12M|    }
  180|       |
  181|   451k|    if((num_coeff == 1) && (idx == 0))
  ------------------
  |  Branch (181:8): [True: 202k, False: 248k]
  |  Branch (181:28): [True: 59.5k, False: 143k]
  ------------------
  182|  59.5k|    {
  183|  59.5k|        dc_only_flag = 1;
  184|  59.5k|    }
  185|       |
  186|   451k|    {
  187|   451k|        WORD32 offset;
  188|   451k|        offset = (UWORD8 *)pi2_coeff_data - (UWORD8 *)ps_tu_4x4;
  189|   451k|        offset = ALIGN4(offset);
  ------------------
  |  |   52|   451k|#define ALIGN4(x)   ((((x) + 3) >> 2) << 2)
  ------------------
  190|   451k|        ps_dec->pv_proc_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_proc_tu_coeff_data + offset);
  191|   451k|    }
  192|       |
  193|   451k|    return dc_only_flag;
  194|   451k|}
ih264d_unpack_coeff4x4_8x8blk:
  200|   282k|{
  201|   282k|    UWORD8 *pu1_inv_scan;
  202|   282k|    UWORD8 u1_mb_field_decoding_flag = ps_cur_mb_info->u1_mb_field_decodingflag;
  203|   282k|    UWORD8 u1_field_coding_flag = ps_cur_mb_info->ps_curmb->u1_mb_fld;
  204|   282k|    UWORD32 u4_luma_dc_only_csbp = 0;
  205|   282k|    WORD32 dc_only_flag = 0;
  206|       |
  207|   282k|    PROFILE_DISABLE_UNPACK_LUMA()
  ------------------
  |  |  112|   282k|#define PROFILE_DISABLE_UNPACK_LUMA() ;
  ------------------
  208|   282k|    if(u1_field_coding_flag || u1_mb_field_decoding_flag)
  ------------------
  |  Branch (208:8): [True: 0, False: 282k]
  |  Branch (208:32): [True: 0, False: 282k]
  ------------------
  209|      0|    {
  210|      0|        pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan_fld;
  211|      0|    }
  212|   282k|    else
  213|   282k|    {
  214|   282k|        pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan;
  215|   282k|    }
  216|       |
  217|       |    // sub 0
  218|   282k|    if(ui2_luma_csbp & 0x1)
  ------------------
  |  Branch (218:8): [True: 89.1k, False: 192k]
  ------------------
  219|  89.1k|    {
  220|  89.1k|        memset(pi2_out_coeff_data,0,16*sizeof(WORD16));
  221|  89.1k|        dc_only_flag = ih264d_unpack_coeff4x4_4x4blk(ps_dec,
  222|  89.1k|                                      pi2_out_coeff_data,
  223|  89.1k|                                      pu1_inv_scan);
  224|       |
  225|  89.1k|        INSERT_BIT(u4_luma_dc_only_csbp, 0, dc_only_flag);
  ------------------
  |  |  109|  89.1k|#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); }
  |  |  ------------------
  |  |  |  |  105|  89.1k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  |  |  ------------------
  ------------------
  226|  89.1k|    }
  227|       |
  228|   282k|    pi2_out_coeff_data += 16;
  229|       |    // sub 1
  230|   282k|    if(ui2_luma_csbp & 0x2)
  ------------------
  |  Branch (230:8): [True: 92.4k, False: 189k]
  ------------------
  231|  92.4k|    {
  232|  92.4k|        memset(pi2_out_coeff_data,0,16*sizeof(WORD16));
  233|  92.4k|        dc_only_flag = ih264d_unpack_coeff4x4_4x4blk(ps_dec,
  234|  92.4k|                                      pi2_out_coeff_data,
  235|  92.4k|                                      pu1_inv_scan);
  236|  92.4k|        INSERT_BIT(u4_luma_dc_only_csbp, 1, dc_only_flag);
  ------------------
  |  |  109|  92.4k|#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); }
  |  |  ------------------
  |  |  |  |  105|  92.4k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  |  |  ------------------
  ------------------
  237|  92.4k|    }
  238|       |
  239|   282k|    pi2_out_coeff_data += 16 + 32;
  240|       |    // sub 2
  241|   282k|    if(ui2_luma_csbp & 0x10)
  ------------------
  |  Branch (241:8): [True: 90.7k, False: 191k]
  ------------------
  242|  90.7k|    {
  243|  90.7k|        memset(pi2_out_coeff_data,0,16*sizeof(WORD16));
  244|  90.7k|        dc_only_flag = ih264d_unpack_coeff4x4_4x4blk(ps_dec,
  245|  90.7k|                                      pi2_out_coeff_data,
  246|  90.7k|                                      pu1_inv_scan);
  247|  90.7k|        INSERT_BIT(u4_luma_dc_only_csbp, 4, dc_only_flag);
  ------------------
  |  |  109|  90.7k|#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); }
  |  |  ------------------
  |  |  |  |  105|  90.7k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  |  |  ------------------
  ------------------
  248|  90.7k|    }
  249|       |
  250|   282k|    pi2_out_coeff_data += 16;
  251|       |    // sub 3
  252|   282k|    if(ui2_luma_csbp & 0x20)
  ------------------
  |  Branch (252:8): [True: 87.3k, False: 194k]
  ------------------
  253|  87.3k|    {
  254|  87.3k|        memset(pi2_out_coeff_data,0,16*sizeof(WORD16));
  255|  87.3k|        dc_only_flag = ih264d_unpack_coeff4x4_4x4blk(ps_dec,
  256|  87.3k|                                      pi2_out_coeff_data,
  257|  87.3k|                                      pu1_inv_scan);
  258|  87.3k|        INSERT_BIT(u4_luma_dc_only_csbp, 5, dc_only_flag);
  ------------------
  |  |  109|  87.3k|#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); }
  |  |  ------------------
  |  |  |  |  105|  87.3k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  |  |  ------------------
  ------------------
  259|  87.3k|    }
  260|   282k|    return u4_luma_dc_only_csbp;
  261|   282k|}
ih264d_unpack_coeff8x8_8x8blk_cavlc:
  266|  43.1k|{
  267|  43.1k|    UWORD8 *pu1_inv_scan;
  268|  43.1k|    UWORD8 u1_mb_field_decoding_flag = ps_cur_mb_info->u1_mb_field_decodingflag;
  269|  43.1k|    UWORD8 u1_field_coding_flag = ps_cur_mb_info->ps_curmb->u1_mb_fld;
  270|  43.1k|    WORD32 dc_only_flag = 0;
  271|       |
  272|  43.1k|    PROFILE_DISABLE_UNPACK_LUMA()
  ------------------
  |  |  112|  43.1k|#define PROFILE_DISABLE_UNPACK_LUMA() ;
  ------------------
  273|  43.1k|    if(ui2_luma_csbp & 0x33)
  ------------------
  |  Branch (273:8): [True: 23.7k, False: 19.4k]
  ------------------
  274|  23.7k|    {
  275|  23.7k|        memset(pi2_out_coeff_data,0,64*sizeof(WORD16));
  276|  23.7k|    }
  277|       |
  278|  43.1k|    if(!u1_mb_field_decoding_flag)
  ------------------
  |  Branch (278:8): [True: 43.1k, False: 0]
  ------------------
  279|  43.1k|    {
  280|  43.1k|        pu1_inv_scan =
  281|  43.1k|                        (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[0];
  282|  43.1k|    }
  283|      0|    else
  284|      0|    {
  285|      0|        pu1_inv_scan =
  286|      0|                        (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[0];
  287|      0|    }
  288|       |    // sub 0
  289|  43.1k|    if(ui2_luma_csbp & 0x1)
  ------------------
  |  Branch (289:8): [True: 7.98k, False: 35.2k]
  ------------------
  290|  7.98k|    {
  291|  7.98k|        dc_only_flag = ih264d_unpack_coeff4x4_4x4blk(ps_dec,
  292|  7.98k|                                      pi2_out_coeff_data,
  293|  7.98k|                                      pu1_inv_scan);
  294|  7.98k|    }
  295|       |
  296|  43.1k|    if(!u1_mb_field_decoding_flag)
  ------------------
  |  Branch (296:8): [True: 43.1k, False: 0]
  ------------------
  297|  43.1k|    {
  298|  43.1k|        pu1_inv_scan =
  299|  43.1k|                        (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[1];
  300|  43.1k|    }
  301|      0|    else
  302|      0|    {
  303|      0|        pu1_inv_scan =
  304|      0|                        (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[1];
  305|      0|    }
  306|       |    // sub 1
  307|  43.1k|    if(ui2_luma_csbp & 0x2)
  ------------------
  |  Branch (307:8): [True: 8.90k, False: 34.2k]
  ------------------
  308|  8.90k|    {
  309|  8.90k|        dc_only_flag = 0;
  310|  8.90k|        ih264d_unpack_coeff4x4_4x4blk(ps_dec,
  311|  8.90k|                                      pi2_out_coeff_data,
  312|  8.90k|                                      pu1_inv_scan);
  313|  8.90k|    }
  314|       |
  315|  43.1k|    if(!u1_mb_field_decoding_flag)
  ------------------
  |  Branch (315:8): [True: 43.1k, False: 0]
  ------------------
  316|  43.1k|    {
  317|  43.1k|        pu1_inv_scan =
  318|  43.1k|                        (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[2];
  319|  43.1k|    }
  320|      0|    else
  321|      0|    {
  322|      0|        pu1_inv_scan =
  323|      0|                        (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[2];
  324|      0|    }
  325|       |    // sub 2
  326|  43.1k|    if(ui2_luma_csbp & 0x10)
  ------------------
  |  Branch (326:8): [True: 9.47k, False: 33.7k]
  ------------------
  327|  9.47k|    {
  328|  9.47k|        dc_only_flag = 0;
  329|  9.47k|        ih264d_unpack_coeff4x4_4x4blk(ps_dec,
  330|  9.47k|                                      pi2_out_coeff_data,
  331|  9.47k|                                      pu1_inv_scan);
  332|  9.47k|    }
  333|       |
  334|  43.1k|    if(!u1_mb_field_decoding_flag)
  ------------------
  |  Branch (334:8): [True: 43.1k, False: 0]
  ------------------
  335|  43.1k|    {
  336|  43.1k|        pu1_inv_scan =
  337|  43.1k|                        (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[3];
  338|  43.1k|    }
  339|      0|    else
  340|      0|    {
  341|      0|        pu1_inv_scan =
  342|      0|                        (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[3];
  343|      0|    }
  344|       |    // sub 3
  345|  43.1k|    if(ui2_luma_csbp & 0x20)
  ------------------
  |  Branch (345:8): [True: 9.15k, False: 34.0k]
  ------------------
  346|  9.15k|    {
  347|  9.15k|        dc_only_flag = 0;
  348|  9.15k|        ih264d_unpack_coeff4x4_4x4blk(ps_dec,
  349|  9.15k|                                      pi2_out_coeff_data,
  350|  9.15k|                                      pu1_inv_scan);
  351|  9.15k|    }
  352|  43.1k|    return dc_only_flag;
  353|  43.1k|}
ih264d_unpack_coeff4x4_8x8blk_chroma:
  358|  60.3k|{
  359|  60.3k|    UWORD8 *pu1_inv_scan;
  360|  60.3k|    UWORD8 u1_mb_field_decoding_flag = ps_cur_mb_info->u1_mb_field_decodingflag;
  361|  60.3k|    UWORD8 u1_field_coding_flag = ps_cur_mb_info->ps_curmb->u1_mb_fld;
  362|       |
  363|  60.3k|    PROFILE_DISABLE_UNPACK_CHROMA()
  ------------------
  |  |  113|  60.3k|#define PROFILE_DISABLE_UNPACK_CHROMA() ;
  ------------------
  364|  60.3k|    if(u1_field_coding_flag || u1_mb_field_decoding_flag)
  ------------------
  |  Branch (364:8): [True: 0, False: 60.3k]
  |  Branch (364:32): [True: 0, False: 60.3k]
  ------------------
  365|      0|    {
  366|      0|        pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan_fld;
  367|      0|    }
  368|  60.3k|    else
  369|  60.3k|    {
  370|  60.3k|        pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan;
  371|  60.3k|    }
  372|       |
  373|  60.3k|    if(ui2_chroma_csbp & 0x1)
  ------------------
  |  Branch (373:8): [True: 13.1k, False: 47.1k]
  ------------------
  374|  13.1k|    {
  375|  13.1k|        memset(pi2_out_coeff_data,0,16*sizeof(WORD16));
  376|  13.1k|        ih264d_unpack_coeff4x4_4x4blk(ps_dec,
  377|  13.1k|                                      pi2_out_coeff_data,
  378|  13.1k|                                      pu1_inv_scan);
  379|  13.1k|    }
  380|  60.3k|    pi2_out_coeff_data += 16;
  381|  60.3k|    if(ui2_chroma_csbp & 0x2)
  ------------------
  |  Branch (381:8): [True: 13.4k, False: 46.9k]
  ------------------
  382|  13.4k|    {
  383|  13.4k|        memset(pi2_out_coeff_data,0,16*sizeof(WORD16));
  384|  13.4k|        ih264d_unpack_coeff4x4_4x4blk(ps_dec,
  385|  13.4k|                                      pi2_out_coeff_data,
  386|  13.4k|                                      pu1_inv_scan);
  387|  13.4k|    }
  388|       |
  389|  60.3k|    pi2_out_coeff_data += 16;
  390|  60.3k|    if(ui2_chroma_csbp & 0x4)
  ------------------
  |  Branch (390:8): [True: 14.4k, False: 45.8k]
  ------------------
  391|  14.4k|    {
  392|  14.4k|        memset(pi2_out_coeff_data,0,16*sizeof(WORD16));
  393|  14.4k|        ih264d_unpack_coeff4x4_4x4blk(ps_dec,
  394|  14.4k|                                      pi2_out_coeff_data,
  395|  14.4k|                                      pu1_inv_scan);
  396|  14.4k|    }
  397|       |
  398|  60.3k|    pi2_out_coeff_data += 16;
  399|  60.3k|    if(ui2_chroma_csbp & 0x8)
  ------------------
  |  Branch (399:8): [True: 14.8k, False: 45.4k]
  ------------------
  400|  14.8k|    {
  401|  14.8k|        memset(pi2_out_coeff_data,0,16*sizeof(WORD16));
  402|  14.8k|        ih264d_unpack_coeff4x4_4x4blk(ps_dec,
  403|  14.8k|                                      pi2_out_coeff_data,
  404|  14.8k|                                      pu1_inv_scan);
  405|  14.8k|    }
  406|  60.3k|}
ih264d_unpack_luma_coeff4x4_mb:
  410|   708k|{
  411|   708k|    UWORD8 u1_mb_type = ps_cur_mb_info->u1_mb_type;
  412|   708k|    UWORD16 ui2_luma_csbp = ps_cur_mb_info->u2_luma_csbp;
  413|   708k|    UWORD8 *pu1_inv_scan = ps_dec->pu1_inv_scan;
  414|   708k|    WORD16 *pi2_coeff_data = ps_dec->pi2_coeff_data;
  415|       |
  416|   708k|    PROFILE_DISABLE_UNPACK_LUMA()
  ------------------
  |  |  112|   708k|#define PROFILE_DISABLE_UNPACK_LUMA() ;
  ------------------
  417|   708k|    if(!ps_cur_mb_info->u1_tran_form8x8)
  ------------------
  |  Branch (417:8): [True: 692k, False: 16.0k]
  ------------------
  418|   692k|    {
  419|   692k|        UWORD32 u4_luma_dc_only_csbp = 0;
  420|   692k|        UWORD32 u4_temp = 0;
  421|   692k|        WORD16* pi2_dc_val = NULL;
  422|       |        /*
  423|       |         * Reserve the pointer to dc vals. The dc vals will be copied
  424|       |         * after unpacking of ac vals since memset to 0 inside.
  425|       |         */
  426|   692k|        if(intra_flag && (u1_mb_type != I_4x4_MB))
  ------------------
  |  |  417|   110k|#define I_4x4_MB    0
  ------------------
  |  Branch (426:12): [True: 110k, False: 581k]
  |  Branch (426:26): [True: 89.8k, False: 21.1k]
  ------------------
  427|  89.8k|        {
  428|  89.8k|            if(CHECKBIT(ps_cur_mb_info->u1_yuv_dc_block_flag,0))
  ------------------
  |  |   54|  89.8k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 37.3k, False: 52.5k]
  |  |  ------------------
  ------------------
  429|  37.3k|            {
  430|  37.3k|                pi2_dc_val = (WORD16 *)ps_dec->pv_proc_tu_coeff_data;
  431|       |
  432|  37.3k|                ps_dec->pv_proc_tu_coeff_data = (void *)(pi2_dc_val + 16);
  433|  37.3k|            }
  434|  89.8k|        }
  435|       |
  436|   692k|        if(ui2_luma_csbp)
  ------------------
  |  Branch (436:12): [True: 70.5k, False: 621k]
  ------------------
  437|  70.5k|        {
  438|  70.5k|            pi2_coeff_data = ps_dec->pi2_coeff_data;
  439|  70.5k|            u4_temp = ih264d_unpack_coeff4x4_8x8blk(ps_dec,
  440|  70.5k|                                          ps_cur_mb_info,
  441|  70.5k|                                          ui2_luma_csbp,
  442|  70.5k|                                          pi2_coeff_data);
  443|  70.5k|            u4_luma_dc_only_csbp = u4_temp;
  444|       |
  445|  70.5k|            pi2_coeff_data += 32;
  446|       |
  447|  70.5k|            ui2_luma_csbp = ui2_luma_csbp >> 2;
  448|  70.5k|            u4_temp = ih264d_unpack_coeff4x4_8x8blk(ps_dec,
  449|  70.5k|                                          ps_cur_mb_info,
  450|  70.5k|                                          ui2_luma_csbp,
  451|  70.5k|                                          pi2_coeff_data);
  452|       |
  453|  70.5k|            u4_luma_dc_only_csbp |= (u4_temp << 2);
  454|       |
  455|  70.5k|            pi2_coeff_data += 32 + 64;
  456|       |
  457|  70.5k|            ui2_luma_csbp = ui2_luma_csbp >> 6;
  458|  70.5k|            u4_temp = ih264d_unpack_coeff4x4_8x8blk(ps_dec,
  459|  70.5k|                                          ps_cur_mb_info,
  460|  70.5k|                                          ui2_luma_csbp,
  461|  70.5k|                                          pi2_coeff_data);
  462|       |
  463|  70.5k|            u4_luma_dc_only_csbp |= (u4_temp << 8);
  464|       |
  465|  70.5k|            pi2_coeff_data += 32;
  466|       |
  467|  70.5k|            ui2_luma_csbp = ui2_luma_csbp >> 2;
  468|  70.5k|            u4_temp = ih264d_unpack_coeff4x4_8x8blk(ps_dec,
  469|  70.5k|                                          ps_cur_mb_info,
  470|  70.5k|                                          ui2_luma_csbp,
  471|  70.5k|                                          pi2_coeff_data);
  472|  70.5k|            u4_luma_dc_only_csbp |= (u4_temp << 10);
  473|  70.5k|        }
  474|       |
  475|   692k|        if(pi2_dc_val != NULL)
  ------------------
  |  Branch (475:12): [True: 37.3k, False: 654k]
  ------------------
  476|  37.3k|        {
  477|  37.3k|            WORD32 i;
  478|  37.3k|            pi2_coeff_data = ps_dec->pi2_coeff_data;
  479|   186k|            for(i = 0; i < 4; i++)
  ------------------
  |  Branch (479:24): [True: 149k, False: 37.3k]
  ------------------
  480|   149k|            {
  481|   149k|                pi2_coeff_data[0] = pi2_dc_val[0];
  482|   149k|                pi2_coeff_data[4 * 16] = pi2_dc_val[4];
  483|   149k|                pi2_coeff_data[8 * 16] = pi2_dc_val[8];
  484|   149k|                pi2_coeff_data[12 * 16] = pi2_dc_val[12];
  485|       |
  486|   149k|                pi2_dc_val++; /* Point to next column */
  487|   149k|                pi2_coeff_data += 16;
  488|   149k|            }
  489|  37.3k|            u4_luma_dc_only_csbp = ps_cur_mb_info->u2_luma_csbp ^ 0xFFFF;
  490|  37.3k|        }
  491|   692k|        return u4_luma_dc_only_csbp;
  492|   692k|    }
  493|  16.0k|    else
  494|  16.0k|    {
  495|  16.0k|        UWORD32 u4_luma_dc_only_cbp = 0;
  496|  16.0k|        WORD32 dc_only_flag;
  497|  16.0k|        if(ui2_luma_csbp)
  ------------------
  |  Branch (497:12): [True: 10.7k, False: 5.25k]
  ------------------
  498|  10.7k|        {
  499|  10.7k|            pi2_coeff_data = ps_dec->pi2_coeff_data;
  500|  10.7k|            dc_only_flag = ih264d_unpack_coeff8x8_8x8blk_cavlc(ps_dec,
  501|  10.7k|                                          ps_cur_mb_info,
  502|  10.7k|                                          ui2_luma_csbp,
  503|  10.7k|                                          pi2_coeff_data);
  504|  10.7k|            INSERT_BIT(u4_luma_dc_only_cbp, 0, dc_only_flag);
  ------------------
  |  |  109|  10.7k|#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); }
  |  |  ------------------
  |  |  |  |  105|  10.7k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  |  |  ------------------
  ------------------
  505|       |
  506|  10.7k|            pi2_coeff_data += 64;
  507|       |
  508|  10.7k|            ui2_luma_csbp = ui2_luma_csbp >> 2;
  509|  10.7k|            dc_only_flag = ih264d_unpack_coeff8x8_8x8blk_cavlc(ps_dec,
  510|  10.7k|                                          ps_cur_mb_info,
  511|  10.7k|                                          ui2_luma_csbp,
  512|  10.7k|                                          pi2_coeff_data);
  513|       |
  514|  10.7k|            INSERT_BIT(u4_luma_dc_only_cbp, 1, dc_only_flag);
  ------------------
  |  |  109|  10.7k|#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); }
  |  |  ------------------
  |  |  |  |  105|  10.7k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  |  |  ------------------
  ------------------
  515|       |
  516|  10.7k|            pi2_coeff_data += 64;
  517|       |
  518|  10.7k|            ui2_luma_csbp = ui2_luma_csbp >> 6;
  519|  10.7k|            dc_only_flag = ih264d_unpack_coeff8x8_8x8blk_cavlc(ps_dec,
  520|  10.7k|                                          ps_cur_mb_info,
  521|  10.7k|                                          ui2_luma_csbp,
  522|  10.7k|                                          pi2_coeff_data);
  523|       |
  524|  10.7k|            INSERT_BIT(u4_luma_dc_only_cbp, 2, dc_only_flag);
  ------------------
  |  |  109|  10.7k|#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); }
  |  |  ------------------
  |  |  |  |  105|  10.7k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  |  |  ------------------
  ------------------
  525|       |
  526|  10.7k|            pi2_coeff_data += 64;
  527|  10.7k|            ui2_luma_csbp = ui2_luma_csbp >> 2;
  528|  10.7k|            dc_only_flag = ih264d_unpack_coeff8x8_8x8blk_cavlc(ps_dec,
  529|  10.7k|                                          ps_cur_mb_info,
  530|  10.7k|                                          ui2_luma_csbp,
  531|  10.7k|                                          pi2_coeff_data);
  532|  10.7k|            INSERT_BIT(u4_luma_dc_only_cbp, 3, dc_only_flag);
  ------------------
  |  |  109|  10.7k|#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); }
  |  |  ------------------
  |  |  |  |  105|  10.7k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  |  |  ------------------
  ------------------
  533|  10.7k|        }
  534|  16.0k|        return u4_luma_dc_only_cbp;
  535|  16.0k|    }
  536|       |
  537|   708k|}
ih264d_unpack_chroma_coeff4x4_mb:
  541|   755k|{
  542|   755k|    UWORD8 u1_mb_type = ps_cur_mb_info->u1_mb_type;
  543|   755k|    UWORD16 ui2_chroma_csbp = ps_cur_mb_info->u2_chroma_csbp;
  544|   755k|    UWORD8 *pu1_inv_scan = ps_dec->pu1_inv_scan;
  545|   755k|    WORD16 *pi2_coeff_data = ps_dec->pi2_coeff_data;
  546|   755k|    WORD32 i;
  547|   755k|    WORD16 *pi2_dc_val_u = NULL;
  548|   755k|    WORD16 *pi2_dc_val_v = NULL;
  549|       |
  550|   755k|    PROFILE_DISABLE_UNPACK_CHROMA()
  ------------------
  |  |  113|   755k|#define PROFILE_DISABLE_UNPACK_CHROMA() ;
  ------------------
  551|   755k|    if((ps_cur_mb_info->u1_cbp >> 4) == CBPC_ALLZERO)
  ------------------
  |  |  507|   755k|#define CBPC_ALLZERO    0
  ------------------
  |  Branch (551:8): [True: 664k, False: 91.4k]
  ------------------
  552|   664k|        return;
  553|       |
  554|       |    /*
  555|       |     * Reserve the pointers to dc vals. The dc vals will be copied
  556|       |     * after unpacking of ac vals since memset to 0 inside.
  557|       |     */
  558|  91.4k|    if(CHECKBIT(ps_cur_mb_info->u1_yuv_dc_block_flag,1))
  ------------------
  |  |   54|  91.4k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 61.3k, False: 30.0k]
  |  |  ------------------
  ------------------
  559|  61.3k|    {
  560|  61.3k|        pi2_dc_val_u = (WORD16 *)ps_dec->pv_proc_tu_coeff_data;
  561|       |
  562|  61.3k|        ps_dec->pv_proc_tu_coeff_data = (void *)(pi2_dc_val_u + 4);
  563|  61.3k|    }
  564|  91.4k|    if(CHECKBIT(ps_cur_mb_info->u1_yuv_dc_block_flag,2))
  ------------------
  |  |   54|  91.4k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 59.5k, False: 31.8k]
  |  |  ------------------
  ------------------
  565|  59.5k|    {
  566|  59.5k|        pi2_dc_val_v = (WORD16 *)ps_dec->pv_proc_tu_coeff_data;
  567|       |
  568|  59.5k|        ps_dec->pv_proc_tu_coeff_data = (void *)(pi2_dc_val_v + 4);
  569|  59.5k|    }
  570|       |
  571|  91.4k|    if((ps_cur_mb_info->u1_cbp >> 4) == CBPC_NONZERO)
  ------------------
  |  |  509|  91.4k|#define CBPC_NONZERO    2
  ------------------
  |  Branch (571:8): [True: 30.1k, False: 61.2k]
  ------------------
  572|  30.1k|    {
  573|  30.1k|        pi2_coeff_data = ps_dec->pi2_coeff_data;
  574|  30.1k|        ih264d_unpack_coeff4x4_8x8blk_chroma(ps_dec,
  575|  30.1k|                                             ps_cur_mb_info,
  576|  30.1k|                                             ui2_chroma_csbp,
  577|  30.1k|                                             pi2_coeff_data);
  578|       |
  579|  30.1k|        pi2_coeff_data += 64;
  580|  30.1k|        ui2_chroma_csbp = ui2_chroma_csbp >> 4;
  581|  30.1k|        ih264d_unpack_coeff4x4_8x8blk_chroma(ps_dec,
  582|  30.1k|                                             ps_cur_mb_info,
  583|  30.1k|                                             ui2_chroma_csbp,
  584|  30.1k|                                             pi2_coeff_data);
  585|       |
  586|  30.1k|    }
  587|       |
  588|  91.4k|    pi2_coeff_data = ps_dec->pi2_coeff_data;
  589|  91.4k|    if(pi2_dc_val_u != NULL)
  ------------------
  |  Branch (589:8): [True: 61.3k, False: 30.0k]
  ------------------
  590|  61.3k|    {
  591|  61.3k|        pi2_coeff_data[0] = *pi2_dc_val_u++;
  592|  61.3k|        pi2_coeff_data[1 * 16] = *pi2_dc_val_u++;
  593|  61.3k|        pi2_coeff_data[2 * 16] = *pi2_dc_val_u++;
  594|  61.3k|        pi2_coeff_data[3 * 16] = *pi2_dc_val_u++;
  595|  61.3k|    }
  596|  30.0k|    else
  597|  30.0k|    {
  598|  30.0k|        pi2_coeff_data[0] = 0;
  599|  30.0k|        pi2_coeff_data[1 * 16] = 0;
  600|  30.0k|        pi2_coeff_data[2 * 16] = 0;
  601|  30.0k|        pi2_coeff_data[3 * 16] = 0;
  602|  30.0k|    }
  603|  91.4k|    pi2_coeff_data += 64;
  604|  91.4k|    if(pi2_dc_val_v != NULL)
  ------------------
  |  Branch (604:8): [True: 59.5k, False: 31.8k]
  ------------------
  605|  59.5k|    {
  606|  59.5k|        pi2_coeff_data[0] = *pi2_dc_val_v++;
  607|  59.5k|        pi2_coeff_data[1 * 16] = *pi2_dc_val_v++;
  608|  59.5k|        pi2_coeff_data[2 * 16] = *pi2_dc_val_v++;
  609|  59.5k|        pi2_coeff_data[3 * 16] = *pi2_dc_val_v++;
  610|  59.5k|    }
  611|  31.8k|    else
  612|  31.8k|    {
  613|  31.8k|        pi2_coeff_data[0] = 0;
  614|  31.8k|        pi2_coeff_data[1 * 16] = 0;
  615|  31.8k|        pi2_coeff_data[2 * 16] = 0;
  616|  31.8k|        pi2_coeff_data[3 * 16] = 0;
  617|  31.8k|    }
  618|  91.4k|}
ih264d_unpack_luma_coeff8x8_mb:
  621|  47.7k|{
  622|  47.7k|    WORD32 blk_8x8_cnt;
  623|  47.7k|    WORD16 *pi2_out_coeff_data = ps_dec->pi2_coeff_data;
  624|  47.7k|    UWORD8 u1_field_coding_flag = ps_cur_mb_info->ps_curmb->u1_mb_fld;
  625|  47.7k|    UWORD8 *pu1_inv_scan;
  626|  47.7k|    UWORD32 u4_luma_dc_only_cbp = 0;
  627|       |
  628|  47.7k|    PROFILE_DISABLE_UNPACK_LUMA()
  ------------------
  |  |  112|  47.7k|#define PROFILE_DISABLE_UNPACK_LUMA() ;
  ------------------
  629|  47.7k|    if(!u1_field_coding_flag)
  ------------------
  |  Branch (629:8): [True: 47.7k, False: 0]
  ------------------
  630|  47.7k|    {
  631|       |        /*******************************************************************/
  632|       |        /* initializing inverse scan  matrices                             */
  633|       |        /*******************************************************************/
  634|  47.7k|        pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan_prog8x8_cabac;
  635|  47.7k|    }
  636|      0|    else
  637|      0|    {
  638|       |        /*******************************************************************/
  639|       |        /* initializing inverse scan  matrices                             */
  640|       |        /*******************************************************************/
  641|      0|        pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan_int8x8_cabac;
  642|      0|    }
  643|       |
  644|   238k|    for(blk_8x8_cnt = 0; blk_8x8_cnt < 4; blk_8x8_cnt++)
  ------------------
  |  Branch (644:26): [True: 190k, False: 47.7k]
  ------------------
  645|   190k|    {
  646|   190k|        if(CHECKBIT(ps_cur_mb_info->u1_cbp, blk_8x8_cnt))
  ------------------
  |  |   54|   190k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 111k, False: 78.8k]
  |  |  ------------------
  ------------------
  647|   111k|        {
  648|   111k|            tu_blk8x8_coeff_data_t *ps_tu_8x8 = (tu_blk8x8_coeff_data_t *)ps_dec->pv_proc_tu_coeff_data;
  649|   111k|            UWORD32 u4_sig_coeff_map;
  650|   111k|            WORD32 idx = 0;
  651|   111k|            WORD16 *pi2_coeff_data = &ps_tu_8x8->ai2_level[0];
  652|   111k|            WORD32 num_coeff = 0;
  653|       |
  654|       |            /* memset 64 coefficient to zero */
  655|   111k|            memset(pi2_out_coeff_data,0,64*sizeof(WORD16));
  656|       |
  657|   111k|            u4_sig_coeff_map = ps_tu_8x8->au4_sig_coeff_map[1];
  658|       |
  659|   176k|            while(u4_sig_coeff_map)
  ------------------
  |  Branch (659:19): [True: 64.2k, False: 111k]
  ------------------
  660|  64.2k|            {
  661|  64.2k|                idx = CLZ(u4_sig_coeff_map);
  662|       |
  663|  64.2k|                idx = 31 - idx;
  664|  64.2k|                RESET_BIT(u4_sig_coeff_map,idx);
  ------------------
  |  |  105|  64.2k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  ------------------
  665|       |
  666|  64.2k|                idx = pu1_inv_scan[idx + 32];
  667|  64.2k|                pi2_out_coeff_data[idx] = *pi2_coeff_data++;
  668|  64.2k|                num_coeff++;
  669|  64.2k|            }
  670|       |
  671|   111k|            u4_sig_coeff_map = ps_tu_8x8->au4_sig_coeff_map[0];
  672|   361k|            while(u4_sig_coeff_map)
  ------------------
  |  Branch (672:19): [True: 249k, False: 111k]
  ------------------
  673|   249k|            {
  674|   249k|                idx = CLZ(u4_sig_coeff_map);
  675|       |
  676|   249k|                idx = 31 - idx;
  677|   249k|                RESET_BIT(u4_sig_coeff_map,idx);
  ------------------
  |  |  105|   249k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  ------------------
  678|       |
  679|   249k|                idx = pu1_inv_scan[idx];
  680|   249k|                pi2_out_coeff_data[idx] = *pi2_coeff_data++;
  681|   249k|                num_coeff++;
  682|   249k|            }
  683|       |
  684|   111k|            if((num_coeff == 1) && (idx == 0))
  ------------------
  |  Branch (684:16): [True: 37.5k, False: 74.3k]
  |  Branch (684:36): [True: 26.0k, False: 11.5k]
  ------------------
  685|  26.0k|            {
  686|  26.0k|                SET_BIT(u4_luma_dc_only_cbp,blk_8x8_cnt);
  ------------------
  |  |  106|  26.0k|#define SET_BIT(x, pos) (x) = (x) | (1 << pos);
  ------------------
  687|  26.0k|            }
  688|       |
  689|       |
  690|   111k|            {
  691|   111k|                WORD32 offset;
  692|   111k|                offset = (UWORD8 *)pi2_coeff_data - (UWORD8 *)ps_tu_8x8;
  693|   111k|                offset = ALIGN4(offset);
  ------------------
  |  |   52|   111k|#define ALIGN4(x)   ((((x) + 3) >> 2) << 2)
  ------------------
  694|   111k|                ps_dec->pv_proc_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_proc_tu_coeff_data + offset);
  695|   111k|            }
  696|   111k|        }
  697|   190k|        pi2_out_coeff_data += 64;
  698|   190k|    }
  699|       |
  700|  47.7k|    return u4_luma_dc_only_cbp;
  701|  47.7k|}
ih264d_process_intra_mb:
  719|   143k|{
  720|   143k|    UWORD8 u1_mb_type = ps_cur_mb_info->u1_mb_type;
  721|   143k|    UWORD8 uc_temp = ps_cur_mb_info->u1_mb_ngbr_availablity;
  722|   143k|    UWORD8 u1_top_available = BOOLEAN(uc_temp & TOP_MB_AVAILABLE_MASK);
  ------------------
  |  |   61|   143k|#define BOOLEAN(x) (!!(x))
  ------------------
  723|   143k|    UWORD8 u1_left_available = BOOLEAN(uc_temp & LEFT_MB_AVAILABLE_MASK);
  ------------------
  |  |   61|   143k|#define BOOLEAN(x) (!!(x))
  ------------------
  724|   143k|    UWORD8 u1_use_top_right_mb = BOOLEAN(uc_temp & TOP_RIGHT_MB_AVAILABLE_MASK);
  ------------------
  |  |   61|   143k|#define BOOLEAN(x) (!!(x))
  ------------------
  725|   143k|    UWORD8 u1_use_top_left_mb = BOOLEAN(uc_temp & TOP_LEFT_MB_AVAILABLE_MASK);
  ------------------
  |  |   61|   143k|#define BOOLEAN(x) (!!(x))
  ------------------
  726|   143k|    UWORD8 uc_useTopMB = u1_top_available;
  727|   143k|    UWORD16 u2_use_left_mb = u1_left_available;
  728|   143k|    UWORD16 u2_use_left_mb_pack;
  729|   143k|    UWORD8 *pu1_luma_pred_buffer;
  730|       |    /* CHANGED CODE */
  731|   143k|    UWORD8 *pu1_luma_rec_buffer;
  732|   143k|    UWORD8 *puc_top;
  733|       |
  734|   143k|    mb_neigbour_params_t *ps_left_mb;
  735|   143k|    mb_neigbour_params_t *ps_top_mb;
  736|   143k|    mb_neigbour_params_t *ps_top_right_mb;
  737|   143k|    mb_neigbour_params_t *ps_curmb;
  738|       |
  739|   143k|    UWORD16 u2_mbx = ps_cur_mb_info->u2_mbx;
  740|   143k|    UWORD32 ui_pred_width, ui_rec_width;
  741|   143k|    WORD16 *pi2_y_coeff;
  742|   143k|    UWORD8 u1_mbaff, u1_topmb, u1_mb_field_decoding_flag;
  743|   143k|    UWORD32 u4_num_pmbair;
  744|   143k|    UWORD16 ui2_luma_csbp = ps_cur_mb_info->u2_luma_csbp;
  745|   143k|    UWORD8 *pu1_yleft, *pu1_ytop_left;
  746|       |    /* Chroma variables*/
  747|   143k|    UWORD8 *pu1_top_u;
  748|   143k|    UWORD8 *pu1_uleft;
  749|   143k|    UWORD8 *pu1_u_top_left;
  750|       |    /* CHANGED CODE */
  751|   143k|    UWORD8 *pu1_mb_cb_rei1_buffer, *pu1_mb_cr_rei1_buffer;
  752|   143k|    UWORD32 u4_recwidth_cr;
  753|       |    /* CHANGED CODE */
  754|   143k|    tfr_ctxt_t *ps_frame_buf = ps_dec->ps_frame_buf_ip_recon;
  755|   143k|    UWORD32 u4_luma_dc_only_csbp = 0;
  756|   143k|    UWORD32 u4_luma_dc_only_cbp = 0;
  757|       |
  758|   143k|    UWORD8 *pu1_prev_intra4x4_pred_mode_data = (UWORD8 *)ps_dec->pv_proc_tu_coeff_data;                 //Pointer to keep track of intra4x4_pred_mode data in pv_proc_tu_coeff_data buffer
  759|   143k|    u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
  760|   143k|    u1_topmb = ps_cur_mb_info->u1_topmb;
  761|   143k|    u4_num_pmbair = (u4_mb_num >> u1_mbaff);
  762|       |
  763|       |
  764|       |    /*--------------------------------------------------------------------*/
  765|       |    /* Find the current MB's mb params                                    */
  766|       |    /*--------------------------------------------------------------------*/
  767|   143k|    u1_mb_field_decoding_flag = ps_cur_mb_info->u1_mb_field_decodingflag;
  768|       |
  769|   143k|    ps_curmb = ps_cur_mb_info->ps_curmb;
  770|   143k|    ps_top_mb = ps_cur_mb_info->ps_top_mb;
  771|   143k|    ps_left_mb = ps_cur_mb_info->ps_left_mb;
  772|   143k|    ps_top_right_mb = ps_cur_mb_info->ps_top_right_mb;
  773|       |
  774|       |    /*--------------------------------------------------------------------*/
  775|       |    /* Check whether neighbouring MB is Inter MB and                      */
  776|       |    /* constrained intra pred is 1.                                       */
  777|       |    /*--------------------------------------------------------------------*/
  778|   143k|    u2_use_left_mb_pack = (u2_use_left_mb << 8) + u2_use_left_mb;
  779|       |
  780|   143k|    if(ps_dec->ps_cur_pps->u1_constrained_intra_pred_flag)
  ------------------
  |  Branch (780:8): [True: 43.8k, False: 99.3k]
  ------------------
  781|  43.8k|    {
  782|  43.8k|        UWORD8 u1_left = (UWORD8)u2_use_left_mb;
  783|       |
  784|  43.8k|        uc_useTopMB = uc_useTopMB
  ------------------
  |  Branch (784:23): [True: 35.1k, False: 8.67k]
  ------------------
  785|  35.1k|                        && ((ps_top_mb->u1_mb_type != P_MB)
  ------------------
  |  |  419|  35.1k|#define P_MB        2
  ------------------
  |  Branch (785:29): [True: 25.8k, False: 9.28k]
  ------------------
  786|  25.8k|                                        && (ps_top_mb->u1_mb_type != B_MB));
  ------------------
  |  |  420|  25.8k|#define B_MB        3
  ------------------
  |  Branch (786:44): [True: 25.3k, False: 483]
  ------------------
  787|  43.8k|        u2_use_left_mb = u2_use_left_mb
  ------------------
  |  Branch (787:26): [True: 29.3k, False: 14.4k]
  ------------------
  788|  29.3k|                        && ((ps_left_mb->u1_mb_type != P_MB)
  ------------------
  |  |  419|  29.3k|#define P_MB        2
  ------------------
  |  Branch (788:29): [True: 22.7k, False: 6.59k]
  ------------------
  789|  22.7k|                                        && (ps_left_mb->u1_mb_type != B_MB));
  ------------------
  |  |  420|  22.7k|#define B_MB        3
  ------------------
  |  Branch (789:44): [True: 22.2k, False: 449]
  ------------------
  790|       |
  791|  43.8k|        u2_use_left_mb_pack = (u2_use_left_mb << 8) + u2_use_left_mb;
  792|  43.8k|        if(u1_mbaff)
  ------------------
  |  Branch (792:12): [True: 0, False: 43.8k]
  ------------------
  793|      0|        {
  794|      0|            if(u1_mb_field_decoding_flag ^ ps_left_mb->u1_mb_fld)
  ------------------
  |  Branch (794:16): [True: 0, False: 0]
  ------------------
  795|      0|            {
  796|      0|                u1_left = u1_left
  ------------------
  |  Branch (796:27): [True: 0, False: 0]
  ------------------
  797|      0|                                && (((ps_left_mb + 1)->u1_mb_type != P_MB)
  ------------------
  |  |  419|      0|#define P_MB        2
  ------------------
  |  Branch (797:37): [True: 0, False: 0]
  ------------------
  798|      0|                                                && ((ps_left_mb + 1)->u1_mb_type
  ------------------
  |  Branch (798:52): [True: 0, False: 0]
  ------------------
  799|      0|                                                                != B_MB));
  ------------------
  |  |  420|      0|#define B_MB        3
  ------------------
  800|      0|                u2_use_left_mb = u2_use_left_mb && u1_left;
  ------------------
  |  Branch (800:34): [True: 0, False: 0]
  |  Branch (800:52): [True: 0, False: 0]
  ------------------
  801|      0|                if(u1_mb_field_decoding_flag)
  ------------------
  |  Branch (801:20): [True: 0, False: 0]
  ------------------
  802|      0|                    u2_use_left_mb_pack = (u1_left << 8)
  803|      0|                                    + (u2_use_left_mb_pack & 0xff);
  804|      0|                else
  805|      0|                    u2_use_left_mb_pack = (u2_use_left_mb << 8)
  806|      0|                                    + (u2_use_left_mb);
  807|      0|            }
  808|      0|        }
  809|  43.8k|        u1_use_top_right_mb =
  810|  43.8k|                        u1_use_top_right_mb
  ------------------
  |  Branch (810:25): [True: 25.3k, False: 18.5k]
  ------------------
  811|  25.3k|                                        && ((ps_top_right_mb->u1_mb_type != P_MB)
  ------------------
  |  |  419|  25.3k|#define P_MB        2
  ------------------
  |  Branch (811:45): [True: 20.9k, False: 4.37k]
  ------------------
  812|  20.9k|                                                        && (ps_top_right_mb->u1_mb_type
  ------------------
  |  Branch (812:60): [True: 20.5k, False: 351]
  ------------------
  813|  20.9k|                                                                        != B_MB));
  ------------------
  |  |  420|  20.9k|#define B_MB        3
  ------------------
  814|       |
  815|  43.8k|        u1_use_top_left_mb =
  816|  43.8k|                        u1_use_top_left_mb
  ------------------
  |  Branch (816:25): [True: 24.4k, False: 19.3k]
  ------------------
  817|  24.4k|                                        && ((ps_cur_mb_info->u1_topleft_mbtype != P_MB)
  ------------------
  |  |  419|  24.4k|#define P_MB        2
  ------------------
  |  Branch (817:45): [True: 19.8k, False: 4.55k]
  ------------------
  818|  19.8k|                                                        && (ps_cur_mb_info->u1_topleft_mbtype
  ------------------
  |  Branch (818:60): [True: 19.4k, False: 434]
  ------------------
  819|  19.8k|                                                                        != B_MB));
  ------------------
  |  |  420|  19.8k|#define B_MB        3
  ------------------
  820|  43.8k|    }
  821|       |
  822|       |    /*********************Common pointer calculations *************************/
  823|       |    /* CHANGED CODE */
  824|   143k|    pu1_luma_pred_buffer = ps_dec->pu1_y;
  825|   143k|    pu1_luma_rec_buffer = ps_frame_buf->pu1_dest_y + (u4_num_pmbair << 4);
  826|   143k|    pu1_mb_cb_rei1_buffer = ps_frame_buf->pu1_dest_u
  827|   143k|                    + (u4_num_pmbair << 3) * YUV420SP_FACTOR;
  ------------------
  |  |  119|   143k|#define YUV420SP_FACTOR 2
  ------------------
  828|   143k|    pu1_mb_cr_rei1_buffer = ps_frame_buf->pu1_dest_v + (u4_num_pmbair << 3);
  829|   143k|    ui_pred_width = MB_SIZE;
  ------------------
  |  |  554|   143k|#define MB_SIZE             16
  ------------------
  830|   143k|    ui_rec_width = ps_dec->u2_frm_wd_y << u1_mb_field_decoding_flag;
  831|   143k|    u4_recwidth_cr = ps_dec->u2_frm_wd_uv << u1_mb_field_decoding_flag;
  832|       |    /************* Current and top luma pointer *****************/
  833|       |
  834|   143k|    if(u1_mbaff)
  ------------------
  |  Branch (834:8): [True: 0, False: 143k]
  ------------------
  835|      0|    {
  836|      0|        if(u1_topmb == 0)
  ------------------
  |  Branch (836:12): [True: 0, False: 0]
  ------------------
  837|      0|        {
  838|      0|            pu1_luma_rec_buffer += (
  839|      0|                            u1_mb_field_decoding_flag ?
  ------------------
  |  Branch (839:29): [True: 0, False: 0]
  ------------------
  840|      0|                                            (ui_rec_width >> 1) :
  841|      0|                                            (ui_rec_width << 4));
  842|      0|            pu1_mb_cb_rei1_buffer += (
  843|      0|                            u1_mb_field_decoding_flag ?
  ------------------
  |  Branch (843:29): [True: 0, False: 0]
  ------------------
  844|      0|                                            (u4_recwidth_cr >> 1) :
  845|      0|                                            (u4_recwidth_cr << 3));
  846|      0|            pu1_mb_cr_rei1_buffer += (
  847|      0|                            u1_mb_field_decoding_flag ?
  ------------------
  |  Branch (847:29): [True: 0, False: 0]
  ------------------
  848|      0|                                            (u4_recwidth_cr >> 1) :
  849|      0|                                            (u4_recwidth_cr << 3));
  850|      0|        }
  851|      0|    }
  852|       |
  853|       |    /* CHANGED CODE */
  854|   143k|    if(ps_dec->u4_use_intrapred_line_copy == 1)
  ------------------
  |  Branch (854:8): [True: 143k, False: 0]
  ------------------
  855|   143k|    {
  856|   143k|        puc_top = ps_dec->pu1_prev_y_intra_pred_line + (ps_cur_mb_info->u2_mbx << 4);
  857|   143k|        pu1_top_u = ps_dec->pu1_prev_u_intra_pred_line
  858|   143k|                        + (ps_cur_mb_info->u2_mbx << 3) * YUV420SP_FACTOR;
  ------------------
  |  |  119|   143k|#define YUV420SP_FACTOR 2
  ------------------
  859|   143k|    }
  860|      0|    else
  861|      0|    {
  862|      0|        puc_top = pu1_luma_rec_buffer - ui_rec_width;
  863|      0|        pu1_top_u = pu1_mb_cb_rei1_buffer - u4_recwidth_cr;
  864|      0|    }
  865|       |    /* CHANGED CODE */
  866|       |
  867|       |    /************* Left pointer *****************/
  868|   143k|    pu1_yleft = pu1_luma_rec_buffer - 1;
  869|   143k|    pu1_uleft = pu1_mb_cb_rei1_buffer - 1 * YUV420SP_FACTOR;
  ------------------
  |  |  119|   143k|#define YUV420SP_FACTOR 2
  ------------------
  870|       |
  871|       |    /**************Top Left pointer calculation**********/
  872|   143k|    pu1_ytop_left = puc_top - 1;
  873|   143k|    pu1_u_top_left = pu1_top_u - 1 * YUV420SP_FACTOR;
  ------------------
  |  |  119|   143k|#define YUV420SP_FACTOR 2
  ------------------
  874|       |
  875|       |    /* CHANGED CODE */
  876|   143k|    PROFILE_DISABLE_INTRA_PRED()
  ------------------
  |  |  105|   143k|#define PROFILE_DISABLE_INTRA_PRED() ;
  ------------------
  877|   143k|    {
  878|   143k|        pu1_prev_intra4x4_pred_mode_data = (UWORD8 *)ps_dec->pv_proc_tu_coeff_data;
  879|   143k|        if(u1_mb_type == I_4x4_MB && ps_cur_mb_info->u1_tran_form8x8 == 0)
  ------------------
  |  |  417|   286k|#define I_4x4_MB    0
  ------------------
  |  Branch (879:12): [True: 53.2k, False: 89.9k]
  |  Branch (879:38): [True: 21.1k, False: 32.1k]
  ------------------
  880|  21.1k|        {
  881|  21.1k|            ps_dec->pv_proc_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_proc_tu_coeff_data + 32);
  882|       |
  883|  21.1k|        }
  884|   122k|        else if (u1_mb_type == I_4x4_MB && ps_cur_mb_info->u1_tran_form8x8 == 1)
  ------------------
  |  |  417|   244k|#define I_4x4_MB    0
  ------------------
  |  Branch (884:18): [True: 32.1k, False: 89.9k]
  |  Branch (884:44): [True: 32.1k, False: 0]
  ------------------
  885|  32.1k|        {
  886|  32.1k|            ps_dec->pv_proc_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_proc_tu_coeff_data + 8);
  887|  32.1k|        }
  888|   143k|    }
  889|   143k|    if(!ps_cur_mb_info->u1_tran_form8x8)
  ------------------
  |  Branch (889:8): [True: 110k, False: 32.1k]
  ------------------
  890|   110k|    {
  891|   110k|        u4_luma_dc_only_csbp = ih264d_unpack_luma_coeff4x4_mb(ps_dec,
  892|   110k|                                       ps_cur_mb_info,
  893|   110k|                                       1);
  894|   110k|    }
  895|  32.1k|    else
  896|  32.1k|    {
  897|  32.1k|        if(!ps_dec->ps_cur_pps->u1_entropy_coding_mode)
  ------------------
  |  Branch (897:12): [True: 10.1k, False: 22.0k]
  ------------------
  898|  10.1k|        {
  899|  10.1k|            u4_luma_dc_only_cbp = ih264d_unpack_luma_coeff4x4_mb(ps_dec,
  900|  10.1k|                                           ps_cur_mb_info,
  901|  10.1k|                                           1);
  902|  10.1k|        }
  903|  22.0k|        else
  904|  22.0k|        {
  905|  22.0k|            u4_luma_dc_only_cbp = ih264d_unpack_luma_coeff8x8_mb(ps_dec,
  906|  22.0k|                                           ps_cur_mb_info);
  907|  22.0k|        }
  908|  32.1k|    }
  909|       |
  910|   143k|    pi2_y_coeff = ps_dec->pi2_coeff_data;
  911|       |
  912|   143k|    if(u1_mb_type != I_4x4_MB)
  ------------------
  |  |  417|   143k|#define I_4x4_MB    0
  ------------------
  |  Branch (912:8): [True: 89.9k, False: 53.2k]
  ------------------
  913|  89.9k|    {
  914|  89.9k|        UWORD8 u1_intrapred_mode = MB_TYPE_TO_INTRA_16x16_MODE(u1_mb_type);
  ------------------
  |  |   45|  89.9k|#define MB_TYPE_TO_INTRA_16x16_MODE(x) ((x - 1) & 0x03)
  ------------------
  915|       |        /*--------------------------------------------------------------------*/
  916|       |        /* 16x16 IntraPrediction                                              */
  917|       |        /*--------------------------------------------------------------------*/
  918|  89.9k|        {
  919|  89.9k|            UWORD8 u1_packed_modes = (u1_top_available << 1)
  920|  89.9k|                            + u1_left_available;
  921|  89.9k|            UWORD8 u1_err_code =
  922|  89.9k|                            (u1_intrapred_mode & 1) ?
  ------------------
  |  Branch (922:29): [True: 32.5k, False: 57.4k]
  ------------------
  923|  32.5k|                                            u1_intrapred_mode :
  924|  89.9k|                                            (u1_intrapred_mode ^ 2);
  925|       |
  926|  89.9k|            if((u1_err_code & u1_packed_modes) ^ u1_err_code)
  ------------------
  |  Branch (926:16): [True: 12.4k, False: 77.5k]
  ------------------
  927|  12.4k|            {
  928|  12.4k|                u1_intrapred_mode = 0;
  929|  12.4k|                ps_dec->i4_error_code = ERROR_INTRAPRED;
  930|  12.4k|            }
  931|  89.9k|        }
  932|  89.9k|        {
  933|       |            /* Align the size to multiple of 8, so that SIMD functions
  934|       |               can read 64 bits at a time. Only 33 bytes are actaully used */
  935|  89.9k|            UWORD8 au1_ngbr_pels[40];
  936|       |            /* Get neighbour pixels */
  937|       |            /* left pels */
  938|  89.9k|            if(u2_use_left_mb)
  ------------------
  |  Branch (938:16): [True: 64.8k, False: 25.1k]
  ------------------
  939|  64.8k|            {
  940|  64.8k|                WORD32 i;
  941|  1.10M|                for(i = 0; i < 16; i++)
  ------------------
  |  Branch (941:28): [True: 1.03M, False: 64.8k]
  ------------------
  942|  1.03M|                    au1_ngbr_pels[16 - 1 - i] = pu1_yleft[i * ui_rec_width];
  943|  64.8k|            }
  944|  25.1k|            else
  945|  25.1k|            {
  946|  25.1k|                memset(au1_ngbr_pels, 0, 16);
  947|  25.1k|            }
  948|       |
  949|       |            /* top left pels */
  950|  89.9k|            au1_ngbr_pels[16] = *pu1_ytop_left;
  951|       |
  952|       |            /* top pels */
  953|  89.9k|            if(uc_useTopMB)
  ------------------
  |  Branch (953:16): [True: 66.5k, False: 23.4k]
  ------------------
  954|  66.5k|            {
  955|  66.5k|                memcpy(au1_ngbr_pels + 16 + 1, puc_top, 16);
  956|  66.5k|            }
  957|  23.4k|            else
  958|  23.4k|            {
  959|  23.4k|                memset(au1_ngbr_pels + 16 + 1, 0, 16);
  960|  23.4k|            }
  961|  89.9k|            PROFILE_DISABLE_INTRA_PRED()
  ------------------
  |  |  105|  89.9k|#define PROFILE_DISABLE_INTRA_PRED() ;
  ------------------
  962|  89.9k|            ps_dec->apf_intra_pred_luma_16x16[u1_intrapred_mode](
  963|  89.9k|                            au1_ngbr_pels, pu1_luma_rec_buffer, 1, ui_rec_width,
  964|  89.9k|                            ((uc_useTopMB << 2) | u2_use_left_mb));
  965|  89.9k|        }
  966|  89.9k|        {
  967|  89.9k|            UWORD32 i;
  968|  89.9k|            WORD16 ai2_tmp[16];
  969|  1.52M|            for(i = 0; i < 16; i++)
  ------------------
  |  Branch (969:24): [True: 1.43M, False: 89.9k]
  ------------------
  970|  1.43M|            {
  971|  1.43M|                WORD16 *pi2_level = pi2_y_coeff + (i << 4);
  972|  1.43M|                UWORD8 *pu1_pred_sblk = pu1_luma_rec_buffer
  973|  1.43M|                                + ((i & 0x3) * BLK_SIZE)
  ------------------
  |  |  556|  1.43M|#define BLK_SIZE             4
  ------------------
  974|  1.43M|                                + (i >> 2) * (ui_rec_width << 2);
  975|  1.43M|                PROFILE_DISABLE_IQ_IT_RECON()
  ------------------
  |  |   98|  1.43M|#define PROFILE_DISABLE_IQ_IT_RECON() ;
  ------------------
  976|  1.43M|                {
  977|  1.43M|                    if(CHECKBIT(ps_cur_mb_info->u2_luma_csbp, i))
  ------------------
  |  |   54|  1.43M|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 10.6k, False: 1.42M]
  |  |  ------------------
  ------------------
  978|  10.6k|                    {
  979|  10.6k|                        ps_dec->pf_iquant_itrans_recon_luma_4x4(
  980|  10.6k|                                        pi2_level,
  981|  10.6k|                                        pu1_pred_sblk,
  982|  10.6k|                                        pu1_pred_sblk,
  983|  10.6k|                                        ui_rec_width,
  984|  10.6k|                                        ui_rec_width,
  985|  10.6k|                                        gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qp_rem6],
  986|  10.6k|                                        (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[0],
  987|  10.6k|                                        ps_cur_mb_info->u1_qp_div6, ai2_tmp, 1,
  988|  10.6k|                                        pi2_level);
  989|  10.6k|                    }
  990|  1.42M|                    else if((CHECKBIT(u4_luma_dc_only_csbp, i)) && pi2_level[0] != 0)
  ------------------
  |  |   54|  1.42M|#define CHECKBIT(a,i) ((a) &  (1 << i))
  ------------------
  |  Branch (990:29): [True: 589k, False: 839k]
  |  Branch (990:68): [True: 537k, False: 51.3k]
  ------------------
  991|   537k|                    {
  992|   537k|                        ps_dec->pf_iquant_itrans_recon_luma_4x4_dc(
  993|   537k|                                        pi2_level,
  994|   537k|                                        pu1_pred_sblk,
  995|   537k|                                        pu1_pred_sblk,
  996|   537k|                                        ui_rec_width,
  997|   537k|                                        ui_rec_width,
  998|   537k|                                        gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qp_rem6],
  999|   537k|                                        (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[0],
 1000|   537k|                                        ps_cur_mb_info->u1_qp_div6, ai2_tmp, 1,
 1001|   537k|                                        pi2_level);
 1002|   537k|                    }
 1003|  1.43M|                }
 1004|  1.43M|            }
 1005|  89.9k|        }
 1006|  89.9k|    }
 1007|  53.2k|    else if(!ps_cur_mb_info->u1_tran_form8x8)
  ------------------
  |  Branch (1007:13): [True: 21.1k, False: 32.1k]
  ------------------
 1008|  21.1k|    {
 1009|  21.1k|        UWORD8 u1_is_left_sub_block, u1_is_top_sub_block = uc_useTopMB;
 1010|  21.1k|        UWORD8 u1_sub_blk_x, u1_sub_blk_y, u1_sub_mb_num;
 1011|  21.1k|        WORD8 i1_top_pred_mode;
 1012|  21.1k|        WORD8 i1_left_pred_mode;
 1013|  21.1k|        UWORD8 *pu1_top, *pu1_left, *pu1_top_left, *pu1_top_right;
 1014|  21.1k|        WORD8 *pi1_cur_pred_mode, *pi1_left_pred_mode, *pc_topPredMode;
 1015|  21.1k|        UWORD16 ui2_left_pred_buf_width = 0xffff;
 1016|  21.1k|        WORD8 i1_intra_pred;
 1017|  21.1k|        UWORD8 *pu1_prev_intra4x4_pred_mode_flag = pu1_prev_intra4x4_pred_mode_data;
 1018|  21.1k|        UWORD8 *pu1_rem_intra4x4_pred_mode = pu1_prev_intra4x4_pred_mode_data + 16;
 1019|  21.1k|        WORD16 *pi2_y_coeff1;
 1020|  21.1k|        UWORD8 u1_cur_sub_block;
 1021|  21.1k|        UWORD16 ui2_top_rt_mask;
 1022|       |
 1023|       |        /*--------------------------------------------------------------------*/
 1024|       |        /* 4x4 IntraPrediction                                                */
 1025|       |        /*--------------------------------------------------------------------*/
 1026|       |        /* Calculation of Top Right subblock mask                             */
 1027|       |        /*                                                                    */
 1028|       |        /* (a) Set it to default mask                                         */
 1029|       |        /*     [It has 0 for sublocks which will never have top-right sub block] */
 1030|       |        /*                                                                    */
 1031|       |        /* (b) If top MB is not available                                     */
 1032|       |        /*      Clear the bits of the first row sub blocks                    */
 1033|       |        /*                                                                    */
 1034|       |        /* (c) Set/Clear bit for top-right sublock of MB                      */
 1035|       |        /*      [5 sub-block in decoding order] based on TOP RIGHT MB availablity */
 1036|       |        /*--------------------------------------------------------------------*/
 1037|       |
 1038|  21.1k|        pu1_top = puc_top;
 1039|       |
 1040|  21.1k|        ui2_top_rt_mask = (u1_use_top_right_mb << 3) | (0x5750);
 1041|  21.1k|        if(uc_useTopMB)
  ------------------
  |  Branch (1041:12): [True: 14.7k, False: 6.39k]
  ------------------
 1042|  14.7k|            ui2_top_rt_mask |= 0x7;
 1043|       |
 1044|       |        /*Top Related initialisations*/
 1045|       |
 1046|       |
 1047|  21.1k|        pi1_cur_pred_mode = ps_cur_mb_info->ps_curmb->pi1_intrapredmodes;
 1048|  21.1k|        pc_topPredMode = ps_cur_mb_info->ps_top_mb->pi1_intrapredmodes;
 1049|       |        /*--------------------------------------
 1050|       |         if(u1_mbaff)
 1051|       |         {
 1052|       |
 1053|       |         pi1_cur_pred_mode += (u2_mbx << 2);
 1054|       |         pc_topPredMode = pi1_cur_pred_mode + ps_cur_mb_info->i1_offset;
 1055|       |         pi1_cur_pred_mode += (u1_topmb) ? 0: 4;
 1056|       |         }*/
 1057|       |
 1058|  21.1k|        if(u1_top_available)
  ------------------
  |  Branch (1058:12): [True: 15.8k, False: 5.22k]
  ------------------
 1059|  15.8k|        {
 1060|  15.8k|            if(ps_top_mb->u1_mb_type == I_4x4_MB)
  ------------------
  |  |  417|  15.8k|#define I_4x4_MB    0
  ------------------
  |  Branch (1060:16): [True: 12.6k, False: 3.20k]
  ------------------
 1061|  12.6k|                *(WORD32*)pi1_cur_pred_mode = *(WORD32*)pc_topPredMode;
 1062|  3.20k|            else
 1063|  3.20k|                *(WORD32*)pi1_cur_pred_mode =
 1064|  3.20k|                                (uc_useTopMB) ? DC_DC_DC_DC : NOT_VALID;
  ------------------
  |  |  434|  2.03k|#define DC_DC_DC_DC   0x02020202 /*packed 4 bytes used in Decode Intra Mb*/
  ------------------
                                              (uc_useTopMB) ? DC_DC_DC_DC : NOT_VALID;
  ------------------
  |  |  433|  1.16k|#define NOT_VALID -1
  ------------------
  |  Branch (1064:33): [True: 2.03k, False: 1.16k]
  ------------------
 1065|  15.8k|        }
 1066|  5.22k|        else
 1067|  5.22k|            *(WORD32*)pi1_cur_pred_mode = NOT_VALID;
  ------------------
  |  |  433|  5.22k|#define NOT_VALID -1
  ------------------
 1068|       |        /* CHANGED CODE */
 1069|       |
 1070|       |        /* CHANGED CODE */
 1071|       |
 1072|       |        /*Left Related initialisations*/
 1073|  21.1k|        pi1_left_pred_mode = ps_dec->pi1_left_pred_mode;
 1074|  21.1k|        if(!u1_mbaff)
  ------------------
  |  Branch (1074:12): [True: 21.1k, False: 0]
  ------------------
 1075|  21.1k|        {
 1076|       |
 1077|  21.1k|            if(u1_left_available)
  ------------------
  |  Branch (1077:16): [True: 13.7k, False: 7.31k]
  ------------------
 1078|  13.7k|            {
 1079|       |
 1080|  13.7k|                if(ps_left_mb->u1_mb_type != I_4x4_MB)
  ------------------
  |  |  417|  13.7k|#define I_4x4_MB    0
  ------------------
  |  Branch (1080:20): [True: 2.44k, False: 11.3k]
  ------------------
 1081|  2.44k|                    *(WORD32*)pi1_left_pred_mode =
 1082|  2.44k|                                    (u2_use_left_mb_pack) ?
  ------------------
  |  Branch (1082:37): [True: 1.56k, False: 885]
  ------------------
 1083|  1.56k|                                    DC_DC_DC_DC :
  ------------------
  |  |  434|  1.56k|#define DC_DC_DC_DC   0x02020202 /*packed 4 bytes used in Decode Intra Mb*/
  ------------------
 1084|  2.44k|                                                            NOT_VALID;
  ------------------
  |  |  433|    885|#define NOT_VALID -1
  ------------------
 1085|       |
 1086|  13.7k|            }
 1087|  7.31k|            else
 1088|  7.31k|            {
 1089|       |
 1090|  7.31k|                *(WORD32*)pi1_left_pred_mode = NOT_VALID;
  ------------------
  |  |  433|  7.31k|#define NOT_VALID -1
  ------------------
 1091|  7.31k|            }
 1092|       |
 1093|  21.1k|        }
 1094|      0|        else
 1095|      0|        {
 1096|      0|            UWORD8 u1_curMbfld = ps_cur_mb_info->u1_mb_field_decodingflag;
 1097|      0|            UWORD8 u1_leftMbfld = ps_left_mb->u1_mb_fld;
 1098|       |
 1099|      0|            if(u1_curMbfld ^ u1_leftMbfld)
  ------------------
  |  Branch (1099:16): [True: 0, False: 0]
  ------------------
 1100|      0|            {
 1101|       |
 1102|      0|                if(u1_topmb
  ------------------
  |  Branch (1102:20): [True: 0, False: 0]
  ------------------
 1103|      0|                                | ((u1_topmb == 0)
  ------------------
  |  Branch (1103:36): [True: 0, False: 0]
  ------------------
 1104|      0|                                                && ((ps_curmb - 1)->u1_mb_type
  ------------------
  |  Branch (1104:52): [True: 0, False: 0]
  ------------------
 1105|      0|                                                                != I_4x4_MB)))
  ------------------
  |  |  417|      0|#define I_4x4_MB    0
  ------------------
 1106|      0|                {
 1107|      0|                    if(u1_left_available)
  ------------------
  |  Branch (1107:24): [True: 0, False: 0]
  ------------------
 1108|      0|                    {
 1109|      0|                        if(ps_left_mb->u1_mb_type != I_4x4_MB)
  ------------------
  |  |  417|      0|#define I_4x4_MB    0
  ------------------
  |  Branch (1109:28): [True: 0, False: 0]
  ------------------
 1110|      0|                        {
 1111|      0|                            if(CHECKBIT(u2_use_left_mb_pack,0) == 0)
  ------------------
  |  |   54|      0|#define CHECKBIT(a,i) ((a) &  (1 << i))
  ------------------
  |  Branch (1111:32): [True: 0, False: 0]
  ------------------
 1112|      0|                                *(WORD32*)pi1_left_pred_mode = NOT_VALID;
  ------------------
  |  |  433|      0|#define NOT_VALID -1
  ------------------
 1113|      0|                            else
 1114|      0|                                *(WORD32*)pi1_left_pred_mode = DC_DC_DC_DC;
  ------------------
  |  |  434|      0|#define DC_DC_DC_DC   0x02020202 /*packed 4 bytes used in Decode Intra Mb*/
  ------------------
 1115|      0|                        }
 1116|      0|                    }
 1117|      0|                    else
 1118|      0|                        *(WORD32*)pi1_left_pred_mode = NOT_VALID;
  ------------------
  |  |  433|      0|#define NOT_VALID -1
  ------------------
 1119|       |
 1120|      0|                    if(u1_curMbfld)
  ------------------
  |  Branch (1120:24): [True: 0, False: 0]
  ------------------
 1121|      0|                    {
 1122|      0|                        if(u1_left_available)
  ------------------
  |  Branch (1122:28): [True: 0, False: 0]
  ------------------
 1123|      0|                        {
 1124|      0|                            if((ps_left_mb + 1)->u1_mb_type != I_4x4_MB)
  ------------------
  |  |  417|      0|#define I_4x4_MB    0
  ------------------
  |  Branch (1124:32): [True: 0, False: 0]
  ------------------
 1125|      0|                            {
 1126|      0|                                if(u2_use_left_mb_pack >> 8)
  ------------------
  |  Branch (1126:36): [True: 0, False: 0]
  ------------------
 1127|      0|                                    *(WORD32*)(pi1_left_pred_mode + 4) =
 1128|      0|                                                    DC_DC_DC_DC;
  ------------------
  |  |  434|      0|#define DC_DC_DC_DC   0x02020202 /*packed 4 bytes used in Decode Intra Mb*/
  ------------------
 1129|      0|                                else
 1130|      0|                                    *(WORD32*)(pi1_left_pred_mode + 4) =
 1131|      0|                                                    NOT_VALID;
  ------------------
  |  |  433|      0|#define NOT_VALID -1
  ------------------
 1132|      0|                            }
 1133|      0|                        }
 1134|      0|                        else
 1135|      0|                            *(WORD32*)(pi1_left_pred_mode + 4) = NOT_VALID;
  ------------------
  |  |  433|      0|#define NOT_VALID -1
  ------------------
 1136|      0|                        pi1_left_pred_mode[1] = pi1_left_pred_mode[2];
 1137|      0|                        pi1_left_pred_mode[2] = pi1_left_pred_mode[4];
 1138|      0|                        pi1_left_pred_mode[3] = pi1_left_pred_mode[6];
 1139|      0|                        *(WORD32*)(pi1_left_pred_mode + 4) =
 1140|      0|                                        *(WORD32*)pi1_left_pred_mode;
 1141|      0|                    }
 1142|      0|                    else
 1143|      0|                    {
 1144|       |
 1145|      0|                        pi1_left_pred_mode[7] = pi1_left_pred_mode[3];
 1146|      0|                        pi1_left_pred_mode[6] = pi1_left_pred_mode[3];
 1147|      0|                        pi1_left_pred_mode[5] = pi1_left_pred_mode[2];
 1148|      0|                        pi1_left_pred_mode[4] = pi1_left_pred_mode[2];
 1149|      0|                        pi1_left_pred_mode[3] = pi1_left_pred_mode[1];
 1150|      0|                        pi1_left_pred_mode[2] = pi1_left_pred_mode[1];
 1151|      0|                        pi1_left_pred_mode[1] = pi1_left_pred_mode[0];
 1152|      0|                    }
 1153|      0|                }
 1154|      0|                pi1_left_pred_mode += (u1_topmb) ? 0 : 4;
  ------------------
  |  Branch (1154:39): [True: 0, False: 0]
  ------------------
 1155|      0|            }
 1156|      0|            else
 1157|      0|            {
 1158|       |
 1159|      0|                pi1_left_pred_mode += (u1_topmb) ? 0 : 4;
  ------------------
  |  Branch (1159:39): [True: 0, False: 0]
  ------------------
 1160|      0|                if(u1_left_available)
  ------------------
  |  Branch (1160:20): [True: 0, False: 0]
  ------------------
 1161|      0|                {
 1162|       |
 1163|      0|                    if(ps_left_mb->u1_mb_type != I_4x4_MB)
  ------------------
  |  |  417|      0|#define I_4x4_MB    0
  ------------------
  |  Branch (1163:24): [True: 0, False: 0]
  ------------------
 1164|      0|                        *(WORD32*)pi1_left_pred_mode =
 1165|      0|                                        (u2_use_left_mb_pack) ?
  ------------------
  |  Branch (1165:41): [True: 0, False: 0]
  ------------------
 1166|      0|                                        DC_DC_DC_DC :
  ------------------
  |  |  434|      0|#define DC_DC_DC_DC   0x02020202 /*packed 4 bytes used in Decode Intra Mb*/
  ------------------
 1167|      0|                                                                NOT_VALID;
  ------------------
  |  |  433|      0|#define NOT_VALID -1
  ------------------
 1168|      0|                }
 1169|      0|                else
 1170|      0|                    *(WORD32*)pi1_left_pred_mode = NOT_VALID;
  ------------------
  |  |  433|      0|#define NOT_VALID -1
  ------------------
 1171|      0|            }
 1172|      0|        }
 1173|       |        /* One time pointer initialisations*/
 1174|  21.1k|        pi2_y_coeff1 = pi2_y_coeff;
 1175|  21.1k|        pu1_top_left = pu1_ytop_left;
 1176|       |
 1177|       |        /* Scan the sub-blocks in Raster Scan Order */
 1178|   358k|        for(u1_sub_mb_num = 0; u1_sub_mb_num < 16; u1_sub_mb_num++)
  ------------------
  |  Branch (1178:32): [True: 337k, False: 21.1k]
  ------------------
 1179|   337k|        {
 1180|       |            /* Align the size to multiple of 8, so that SIMD functions
 1181|       |               can read 64 bits at a time. Only 13 bytes are actaully used */
 1182|   337k|            UWORD8 au1_ngbr_pels[16];
 1183|       |
 1184|   337k|            u1_sub_blk_x = u1_sub_mb_num & 0x3;
 1185|   337k|            u1_sub_blk_y = u1_sub_mb_num >> 2;
 1186|   337k|            i1_top_pred_mode = pi1_cur_pred_mode[u1_sub_blk_x];
 1187|   337k|            i1_left_pred_mode = pi1_left_pred_mode[u1_sub_blk_y];
 1188|   337k|            u1_use_top_right_mb = (!!CHECKBIT(ui2_top_rt_mask, u1_sub_mb_num));
  ------------------
  |  |   54|   337k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  ------------------
 1189|       |
 1190|       |            /*********** left subblock availability**********/
 1191|   337k|            if(u1_sub_blk_x)
  ------------------
  |  Branch (1191:16): [True: 253k, False: 84.4k]
  ------------------
 1192|   253k|                u1_is_left_sub_block = 1;
 1193|  84.4k|            else
 1194|  84.4k|                u1_is_left_sub_block =
 1195|  84.4k|                                (u1_sub_blk_y < 2) ?
  ------------------
  |  Branch (1195:33): [True: 42.2k, False: 42.2k]
  ------------------
 1196|  42.2k|                                                (CHECKBIT(u2_use_left_mb_pack,
  ------------------
  |  |   54|  42.2k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  ------------------
 1197|  42.2k|                                                          0)) :
 1198|  84.4k|                                                (u2_use_left_mb_pack >> 8);
 1199|       |
 1200|       |            /* CHANGED CODE */
 1201|   337k|            if(u1_sub_blk_y)
  ------------------
  |  Branch (1201:16): [True: 253k, False: 84.4k]
  ------------------
 1202|   253k|                u1_is_top_sub_block = 1;
 1203|       |
 1204|       |            /* CHANGED CODE */
 1205|       |            /***************** Top *********************/
 1206|   337k|            if(ps_dec->u4_use_intrapred_line_copy == 1)
  ------------------
  |  Branch (1206:16): [True: 337k, False: 0]
  ------------------
 1207|   337k|            {
 1208|       |
 1209|   337k|                if(u1_sub_blk_y)
  ------------------
  |  Branch (1209:20): [True: 253k, False: 84.4k]
  ------------------
 1210|   253k|                    pu1_top = pu1_luma_rec_buffer - ui_rec_width;
 1211|  84.4k|                else
 1212|  84.4k|                    pu1_top = puc_top + (u1_sub_blk_x << 2);
 1213|   337k|            }
 1214|      0|            else
 1215|      0|            {
 1216|      0|                pu1_top = pu1_luma_rec_buffer - ui_rec_width;
 1217|      0|            }
 1218|       |            /***************** Top Right *********************/
 1219|   337k|            pu1_top_right = pu1_top + 4;
 1220|       |            /***************** Top Left *********************/
 1221|   337k|            pu1_top_left = pu1_top - 1;
 1222|       |            /***************** Left *********************/
 1223|   337k|            pu1_left = pu1_luma_rec_buffer - 1;
 1224|       |            /* CHANGED CODE */
 1225|       |
 1226|       |            /*---------------------------------------------------------------*/
 1227|       |            /* Calculation of Intra prediction mode                          */
 1228|       |            /*---------------------------------------------------------------*/
 1229|   337k|            i1_intra_pred = ((i1_left_pred_mode < 0) | (i1_top_pred_mode < 0)) ?
  ------------------
  |  Branch (1229:29): [True: 54.6k, False: 282k]
  ------------------
 1230|   282k|                            DC : MIN(i1_left_pred_mode, i1_top_pred_mode);
  ------------------
  |  |  431|  54.6k|#define DC      2
  ------------------
                                          DC : MIN(i1_left_pred_mode, i1_top_pred_mode);
  ------------------
  |  |   61|   620k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 44.0k, False: 238k]
  |  |  ------------------
  ------------------
 1231|   337k|            {
 1232|   337k|                UWORD8 u1_packed_modes = (u1_is_top_sub_block << 1)
 1233|   337k|                                + u1_is_left_sub_block;
 1234|   337k|                UWORD8 *pu1_intra_err_codes =
 1235|   337k|                                (UWORD8 *)gau1_ih264d_intra_pred_err_code;
 1236|   337k|                UWORD8 uc_b2b0 = ((u1_sub_mb_num & 4) >> 1) | (u1_sub_mb_num & 1);
 1237|   337k|                UWORD8 uc_b3b1 = ((u1_sub_mb_num & 8) >> 2)
 1238|   337k|                                | ((u1_sub_mb_num & 2) >> 1);
 1239|       |
 1240|   337k|                u1_cur_sub_block = (uc_b3b1 << 2) + uc_b2b0;
 1241|   337k|                PROFILE_DISABLE_INTRA_PRED()
  ------------------
  |  |  105|   337k|#define PROFILE_DISABLE_INTRA_PRED() ;
  ------------------
 1242|   337k|                if(!pu1_prev_intra4x4_pred_mode_flag[u1_cur_sub_block])
  ------------------
  |  Branch (1242:20): [True: 90.9k, False: 246k]
  ------------------
 1243|  90.9k|                {
 1244|  90.9k|                    i1_intra_pred =
 1245|  90.9k|                                    pu1_rem_intra4x4_pred_mode[u1_cur_sub_block]
 1246|  90.9k|                                                    + (pu1_rem_intra4x4_pred_mode[u1_cur_sub_block]
 1247|  90.9k|                                                                    >= i1_intra_pred);
 1248|  90.9k|                }
 1249|   337k|                i1_intra_pred = CLIP3(0, 8, i1_intra_pred);
  ------------------
  |  |   77|   337k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 474, False: 337k]
  |  |  |  Branch (77:54): [True: 330, False: 336k]
  |  |  ------------------
  ------------------
 1250|   337k|                {
 1251|   337k|                    UWORD8 u1_err_code = pu1_intra_err_codes[i1_intra_pred];
 1252|       |
 1253|   337k|                    if((u1_err_code & u1_packed_modes) ^ u1_err_code)
  ------------------
  |  Branch (1253:24): [True: 9.46k, False: 328k]
  ------------------
 1254|  9.46k|                     {
 1255|  9.46k|                        i1_intra_pred = 0;
 1256|  9.46k|                        ps_dec->i4_error_code = ERROR_INTRAPRED;
 1257|  9.46k|                     }
 1258|       |
 1259|   337k|                }
 1260|   337k|            }
 1261|   337k|            {
 1262|       |                /* Get neighbour pixels */
 1263|       |                /* left pels */
 1264|   337k|                if(u1_is_left_sub_block)
  ------------------
  |  Branch (1264:20): [True: 304k, False: 32.7k]
  ------------------
 1265|   304k|                {
 1266|   304k|                    WORD32 i;
 1267|  1.52M|                    for(i = 0; i < 4; i++)
  ------------------
  |  Branch (1267:32): [True: 1.21M, False: 304k]
  ------------------
 1268|  1.21M|                        au1_ngbr_pels[4 - 1 - i] = pu1_left[i * ui_rec_width];
 1269|   304k|                }
 1270|  32.7k|                else
 1271|  32.7k|                {
 1272|  32.7k|                    memset(au1_ngbr_pels, 0, 4);
 1273|  32.7k|                }
 1274|       |
 1275|       |                /* top left pels */
 1276|   337k|                au1_ngbr_pels[4] = *pu1_top_left;
 1277|       |
 1278|       |                /* top pels */
 1279|   337k|                if(u1_is_top_sub_block)
  ------------------
  |  Branch (1279:20): [True: 312k, False: 25.5k]
  ------------------
 1280|   312k|                {
 1281|   312k|                    memcpy(au1_ngbr_pels + 4 + 1, pu1_top, 4);
 1282|   312k|                }
 1283|  25.5k|                else
 1284|  25.5k|                {
 1285|  25.5k|                    memset(au1_ngbr_pels + 4 + 1, 0, 4);
 1286|  25.5k|                }
 1287|       |
 1288|       |                /* top right pels */
 1289|   337k|                if(u1_use_top_right_mb)
  ------------------
  |  Branch (1289:20): [True: 202k, False: 134k]
  ------------------
 1290|   202k|                {
 1291|   202k|                    memcpy(au1_ngbr_pels + 4 * 2 + 1, pu1_top_right, 4);
 1292|   202k|                }
 1293|   134k|                else if(u1_is_top_sub_block)
  ------------------
  |  Branch (1293:25): [True: 109k, False: 24.8k]
  ------------------
 1294|   109k|                {
 1295|   109k|                    memset(au1_ngbr_pels + 4 * 2 + 1, au1_ngbr_pels[4 * 2], 4);
 1296|   109k|                }
 1297|   337k|            }
 1298|   337k|            PROFILE_DISABLE_INTRA_PRED()
  ------------------
  |  |  105|   337k|#define PROFILE_DISABLE_INTRA_PRED() ;
  ------------------
 1299|   337k|            ps_dec->apf_intra_pred_luma_4x4[i1_intra_pred](
 1300|   337k|                            au1_ngbr_pels, pu1_luma_rec_buffer, 1,
 1301|   337k|                            ui_rec_width,
 1302|   337k|                            ((u1_is_top_sub_block << 2) | u1_is_left_sub_block));
 1303|       |
 1304|       |            /* CHANGED CODE */
 1305|   337k|            if(CHECKBIT(ui2_luma_csbp, u1_sub_mb_num))
  ------------------
  |  |   54|   337k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 42.0k, False: 295k]
  |  |  ------------------
  ------------------
 1306|  42.0k|            {
 1307|  42.0k|                WORD16 ai2_tmp[16];
 1308|  42.0k|                PROFILE_DISABLE_IQ_IT_RECON()
  ------------------
  |  |   98|  42.0k|#define PROFILE_DISABLE_IQ_IT_RECON() ;
  ------------------
 1309|  42.0k|                {
 1310|  42.0k|                    if(CHECKBIT(u4_luma_dc_only_csbp, u1_sub_mb_num))
  ------------------
  |  |   54|  42.0k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 8.74k, False: 33.3k]
  |  |  ------------------
  ------------------
 1311|  8.74k|                    {
 1312|  8.74k|                        ps_dec->pf_iquant_itrans_recon_luma_4x4_dc(
 1313|  8.74k|                                        pi2_y_coeff1,
 1314|  8.74k|                                        pu1_luma_rec_buffer,
 1315|  8.74k|                                        pu1_luma_rec_buffer,
 1316|  8.74k|                                        ui_rec_width,
 1317|  8.74k|                                        ui_rec_width,
 1318|  8.74k|                                        gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qp_rem6],
 1319|  8.74k|                                        (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[0],
 1320|  8.74k|                                        ps_cur_mb_info->u1_qp_div6, ai2_tmp, 0,
 1321|  8.74k|                                        NULL);
 1322|  8.74k|                    }
 1323|  33.3k|                    else
 1324|  33.3k|                    {
 1325|  33.3k|                        ps_dec->pf_iquant_itrans_recon_luma_4x4(
 1326|  33.3k|                                        pi2_y_coeff1,
 1327|  33.3k|                                        pu1_luma_rec_buffer,
 1328|  33.3k|                                        pu1_luma_rec_buffer,
 1329|  33.3k|                                        ui_rec_width,
 1330|  33.3k|                                        ui_rec_width,
 1331|  33.3k|                                        gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qp_rem6],
 1332|  33.3k|                                        (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[0],
 1333|  33.3k|                                        ps_cur_mb_info->u1_qp_div6, ai2_tmp, 0,
 1334|  33.3k|                                        NULL);
 1335|  33.3k|                    }
 1336|  42.0k|                }
 1337|       |
 1338|  42.0k|            }
 1339|       |
 1340|       |            /*---------------------------------------------------------------*/
 1341|       |            /* Update sub block number                                       */
 1342|       |            /*---------------------------------------------------------------*/
 1343|   337k|            pi2_y_coeff1 += 16;
 1344|   337k|            pu1_luma_rec_buffer +=
 1345|   337k|                            (u1_sub_blk_x == 3) ? (ui_rec_width << 2) - 12 : 4;
  ------------------
  |  Branch (1345:29): [True: 84.4k, False: 253k]
  ------------------
 1346|   337k|            pu1_luma_pred_buffer +=
 1347|   337k|                            (u1_sub_blk_x == 3) ? (ui_pred_width << 2) - 12 : 4;
  ------------------
  |  Branch (1347:29): [True: 84.4k, False: 253k]
  ------------------
 1348|       |            /* CHANGED CODE */
 1349|   337k|            pi1_cur_pred_mode[u1_sub_blk_x] = i1_intra_pred;
 1350|   337k|            pi1_left_pred_mode[u1_sub_blk_y] = i1_intra_pred;
 1351|   337k|        }
 1352|  21.1k|    }
 1353|  32.1k|    else if((u1_mb_type == I_4x4_MB) && (ps_cur_mb_info->u1_tran_form8x8 == 1))
  ------------------
  |  |  417|  32.1k|#define I_4x4_MB    0
  ------------------
  |  Branch (1353:13): [True: 32.1k, False: 0]
  |  Branch (1353:41): [True: 32.1k, False: 0]
  ------------------
 1354|  32.1k|    {
 1355|  32.1k|        UWORD8 u1_is_left_sub_block, u1_is_top_sub_block = uc_useTopMB;
 1356|  32.1k|        UWORD8 u1_sub_blk_x, u1_sub_blk_y;
 1357|  32.1k|        UWORD32 u4_sub_mb_num;
 1358|  32.1k|        WORD8 i1_top_pred_mode;
 1359|  32.1k|        WORD8 i1_left_pred_mode;
 1360|  32.1k|        UWORD8 *pu1_top, *pu1_left, *pu1_top_left;
 1361|  32.1k|        WORD8 *pi1_cur_pred_mode, *pi1_left_pred_mode, *pc_topPredMode;
 1362|  32.1k|        UWORD16 ui2_left_pred_buf_width = 0xffff;
 1363|  32.1k|        WORD8 i1_intra_pred;
 1364|  32.1k|        UWORD8 *pu1_prev_intra4x4_pred_mode_flag = pu1_prev_intra4x4_pred_mode_data;
 1365|  32.1k|        UWORD8 *pu1_rem_intra4x4_pred_mode = pu1_prev_intra4x4_pred_mode_data + 4;
 1366|  32.1k|        WORD16 *pi2_y_coeff1;
 1367|  32.1k|        UWORD16 ui2_top_rt_mask;
 1368|  32.1k|        UWORD32 u4_4x4_left_offset = 0;
 1369|       |
 1370|       |        /*--------------------------------------------------------------------*/
 1371|       |        /* 8x8 IntraPrediction                                                */
 1372|       |        /*--------------------------------------------------------------------*/
 1373|       |        /* Calculation of Top Right subblock mask                             */
 1374|       |        /*                                                                    */
 1375|       |        /* (a) Set it to default mask                                         */
 1376|       |        /*  [It has 0 for sublocks which will never have top-right sub block] */
 1377|       |        /*                                                                    */
 1378|       |        /* (b) If top MB is not available                                     */
 1379|       |        /*      Clear the bits of the first row sub blocks                    */
 1380|       |        /*                                                                    */
 1381|       |        /* (c) Set/Clear bit for top-right sublock of MB                      */
 1382|       |        /*  [5 sub-block in decoding order] based on TOP RIGHT MB availablity */
 1383|       |        /*                                                                    */
 1384|       |        /* ui2_top_rt_mask: marks availibility of top right(neighbour)         */
 1385|       |        /* in the 8x8 Block ordering                                          */
 1386|       |        /*                                                                    */
 1387|       |        /*      tr0   tr1                                                     */
 1388|       |        /*   0    1   tr3                                                     */
 1389|       |        /*   2    3                                                           */
 1390|       |        /*                                                                    */
 1391|       |        /*  Top rights for 0 is in top MB                                     */
 1392|       |        /*  top right of 1 will be in top right MB                            */
 1393|       |        /*  top right of 3 in right MB and hence not available                */
 1394|       |        /*  This corresponds to ui2_top_rt_mask  having default value 0x4      */
 1395|       |        /*--------------------------------------------------------------------*/
 1396|       |
 1397|  32.1k|        ui2_top_rt_mask = (u1_use_top_right_mb << 1) | (0x4);
 1398|       |
 1399|  32.1k|        if(uc_useTopMB)
  ------------------
  |  Branch (1399:12): [True: 19.5k, False: 12.5k]
  ------------------
 1400|  19.5k|        {
 1401|  19.5k|            ui2_top_rt_mask |= 0x1;
 1402|  19.5k|        }
 1403|       |
 1404|       |        /* Top Related initialisations */
 1405|  32.1k|        pi1_cur_pred_mode = ps_cur_mb_info->ps_curmb->pi1_intrapredmodes;
 1406|  32.1k|        pc_topPredMode = ps_cur_mb_info->ps_top_mb->pi1_intrapredmodes;
 1407|       |        /*
 1408|       |         if(u1_mbaff)
 1409|       |         {
 1410|       |         pi1_cur_pred_mode += (u2_mbx << 2);
 1411|       |         pc_topPredMode = pi1_cur_pred_mode + ps_cur_mb_info->i1_offset;
 1412|       |         pi1_cur_pred_mode += (u1_topmb) ? 0: 4;
 1413|       |         }
 1414|       |         */
 1415|  32.1k|        if(u1_top_available)
  ------------------
  |  Branch (1415:12): [True: 21.4k, False: 10.6k]
  ------------------
 1416|  21.4k|        {
 1417|  21.4k|            if(ps_top_mb->u1_mb_type == I_4x4_MB)
  ------------------
  |  |  417|  21.4k|#define I_4x4_MB    0
  ------------------
  |  Branch (1417:16): [True: 16.2k, False: 5.21k]
  ------------------
 1418|  16.2k|            {
 1419|  16.2k|                *(WORD32*)pi1_cur_pred_mode = *(WORD32*)pc_topPredMode;
 1420|  16.2k|            }
 1421|  5.21k|            else
 1422|  5.21k|            {
 1423|  5.21k|                *(WORD32*)pi1_cur_pred_mode =
 1424|  5.21k|                                (uc_useTopMB) ? DC_DC_DC_DC : NOT_VALID;
  ------------------
  |  |  434|  3.26k|#define DC_DC_DC_DC   0x02020202 /*packed 4 bytes used in Decode Intra Mb*/
  ------------------
                                              (uc_useTopMB) ? DC_DC_DC_DC : NOT_VALID;
  ------------------
  |  |  433|  7.16k|#define NOT_VALID -1
  ------------------
  |  Branch (1424:33): [True: 3.26k, False: 1.94k]
  ------------------
 1425|  5.21k|            }
 1426|  21.4k|        }
 1427|  10.6k|        else
 1428|  10.6k|        {
 1429|  10.6k|            *(WORD32*)pi1_cur_pred_mode = NOT_VALID;
  ------------------
  |  |  433|  10.6k|#define NOT_VALID -1
  ------------------
 1430|  10.6k|        }
 1431|       |
 1432|  32.1k|        pu1_top = puc_top - 8;
 1433|       |
 1434|       |        /*Left Related initialisations*/
 1435|  32.1k|        pi1_left_pred_mode = ps_dec->pi1_left_pred_mode;
 1436|       |
 1437|  32.1k|        if(!u1_mbaff)
  ------------------
  |  Branch (1437:12): [True: 32.1k, False: 0]
  ------------------
 1438|  32.1k|        {
 1439|  32.1k|            if(u1_left_available)
  ------------------
  |  Branch (1439:16): [True: 25.4k, False: 6.61k]
  ------------------
 1440|  25.4k|            {
 1441|  25.4k|                if(ps_left_mb->u1_mb_type != I_4x4_MB)
  ------------------
  |  |  417|  25.4k|#define I_4x4_MB    0
  ------------------
  |  Branch (1441:20): [True: 4.94k, False: 20.5k]
  ------------------
 1442|  4.94k|                {
 1443|  4.94k|                    *(WORD32*)pi1_left_pred_mode =
 1444|  4.94k|                                    (u2_use_left_mb_pack) ?
  ------------------
  |  Branch (1444:37): [True: 3.69k, False: 1.25k]
  ------------------
 1445|  3.69k|                                    DC_DC_DC_DC :
  ------------------
  |  |  434|  3.69k|#define DC_DC_DC_DC   0x02020202 /*packed 4 bytes used in Decode Intra Mb*/
  ------------------
 1446|  4.94k|                                                            NOT_VALID;
  ------------------
  |  |  433|  6.20k|#define NOT_VALID -1
  ------------------
 1447|  4.94k|                }
 1448|  25.4k|            }
 1449|  6.61k|            else
 1450|  6.61k|            {
 1451|  6.61k|                *(WORD32*)pi1_left_pred_mode = NOT_VALID;
  ------------------
  |  |  433|  6.61k|#define NOT_VALID -1
  ------------------
 1452|  6.61k|            }
 1453|  32.1k|        }
 1454|      0|        else
 1455|      0|        {
 1456|      0|            UWORD8 u1_curMbfld = ps_cur_mb_info->u1_mb_field_decodingflag;
 1457|       |
 1458|      0|            UWORD8 u1_leftMbfld = ps_left_mb->u1_mb_fld;
 1459|       |
 1460|      0|            if((!u1_curMbfld) && (u1_leftMbfld))
  ------------------
  |  Branch (1460:16): [True: 0, False: 0]
  |  Branch (1460:34): [True: 0, False: 0]
  ------------------
 1461|      0|            {
 1462|      0|                u4_4x4_left_offset = 1;
 1463|      0|            }
 1464|       |
 1465|      0|            if(u1_curMbfld ^ u1_leftMbfld)
  ------------------
  |  Branch (1465:16): [True: 0, False: 0]
  ------------------
 1466|      0|            {
 1467|       |
 1468|      0|                if(u1_topmb
  ------------------
  |  Branch (1468:20): [True: 0, False: 0]
  ------------------
 1469|      0|                                | ((u1_topmb == 0)
  ------------------
  |  Branch (1469:36): [True: 0, False: 0]
  ------------------
 1470|      0|                                                && ((ps_curmb - 1)->u1_mb_type
  ------------------
  |  Branch (1470:52): [True: 0, False: 0]
  ------------------
 1471|      0|                                                                != I_4x4_MB)))
  ------------------
  |  |  417|      0|#define I_4x4_MB    0
  ------------------
 1472|       |
 1473|      0|                {
 1474|      0|                    if(u1_left_available)
  ------------------
  |  Branch (1474:24): [True: 0, False: 0]
  ------------------
 1475|      0|                    {
 1476|      0|                        if(ps_left_mb->u1_mb_type != I_4x4_MB)
  ------------------
  |  |  417|      0|#define I_4x4_MB    0
  ------------------
  |  Branch (1476:28): [True: 0, False: 0]
  ------------------
 1477|      0|                        {
 1478|      0|                            if(CHECKBIT(u2_use_left_mb_pack,0) == 0)
  ------------------
  |  |   54|      0|#define CHECKBIT(a,i) ((a) &  (1 << i))
  ------------------
  |  Branch (1478:32): [True: 0, False: 0]
  ------------------
 1479|      0|                            {
 1480|      0|                                *(WORD32*)pi1_left_pred_mode = NOT_VALID;
  ------------------
  |  |  433|      0|#define NOT_VALID -1
  ------------------
 1481|      0|                            }
 1482|      0|                            else
 1483|      0|                            {
 1484|      0|                                *(WORD32*)pi1_left_pred_mode = DC_DC_DC_DC;
  ------------------
  |  |  434|      0|#define DC_DC_DC_DC   0x02020202 /*packed 4 bytes used in Decode Intra Mb*/
  ------------------
 1485|      0|                            }
 1486|      0|                        }
 1487|      0|                    }
 1488|      0|                    else
 1489|      0|                    {
 1490|      0|                        *(WORD32*)pi1_left_pred_mode = NOT_VALID;
  ------------------
  |  |  433|      0|#define NOT_VALID -1
  ------------------
 1491|      0|                    }
 1492|       |
 1493|      0|                    if(u1_curMbfld)
  ------------------
  |  Branch (1493:24): [True: 0, False: 0]
  ------------------
 1494|      0|                    {
 1495|      0|                        if(u1_left_available)
  ------------------
  |  Branch (1495:28): [True: 0, False: 0]
  ------------------
 1496|      0|                        {
 1497|      0|                            if((ps_left_mb + 1)->u1_mb_type != I_4x4_MB)
  ------------------
  |  |  417|      0|#define I_4x4_MB    0
  ------------------
  |  Branch (1497:32): [True: 0, False: 0]
  ------------------
 1498|      0|                            {
 1499|      0|                                if(u2_use_left_mb_pack >> 8)
  ------------------
  |  Branch (1499:36): [True: 0, False: 0]
  ------------------
 1500|      0|                                {
 1501|      0|                                    *(WORD32*)(pi1_left_pred_mode + 4) =
 1502|      0|                                                    DC_DC_DC_DC;
  ------------------
  |  |  434|      0|#define DC_DC_DC_DC   0x02020202 /*packed 4 bytes used in Decode Intra Mb*/
  ------------------
 1503|      0|                                }
 1504|      0|                                else
 1505|      0|                                {
 1506|      0|                                    *(WORD32*)(pi1_left_pred_mode + 4) =
 1507|      0|                                                    NOT_VALID;
  ------------------
  |  |  433|      0|#define NOT_VALID -1
  ------------------
 1508|      0|                                }
 1509|      0|                            }
 1510|      0|                        }
 1511|      0|                        else
 1512|      0|                        {
 1513|      0|                            *(WORD32*)(pi1_left_pred_mode + 4) = NOT_VALID;
  ------------------
  |  |  433|      0|#define NOT_VALID -1
  ------------------
 1514|      0|                        }
 1515|       |
 1516|      0|                        pi1_left_pred_mode[1] = pi1_left_pred_mode[2];
 1517|      0|                        pi1_left_pred_mode[2] = pi1_left_pred_mode[4];
 1518|      0|                        pi1_left_pred_mode[3] = pi1_left_pred_mode[6];
 1519|      0|                        *(WORD32*)(pi1_left_pred_mode + 4) =
 1520|      0|                                        *(WORD32*)pi1_left_pred_mode;
 1521|      0|                    }
 1522|      0|                    else
 1523|      0|                    {
 1524|      0|                        pi1_left_pred_mode[7] = pi1_left_pred_mode[3];
 1525|      0|                        pi1_left_pred_mode[6] = pi1_left_pred_mode[3];
 1526|      0|                        pi1_left_pred_mode[5] = pi1_left_pred_mode[2];
 1527|      0|                        pi1_left_pred_mode[4] = pi1_left_pred_mode[2];
 1528|      0|                        pi1_left_pred_mode[3] = pi1_left_pred_mode[1];
 1529|      0|                        pi1_left_pred_mode[2] = pi1_left_pred_mode[1];
 1530|      0|                        pi1_left_pred_mode[1] = pi1_left_pred_mode[0];
 1531|      0|                    }
 1532|      0|                }
 1533|      0|                pi1_left_pred_mode += (u1_topmb) ? 0 : 4;
  ------------------
  |  Branch (1533:39): [True: 0, False: 0]
  ------------------
 1534|      0|            }
 1535|      0|            else
 1536|      0|            {
 1537|      0|                pi1_left_pred_mode += (u1_topmb) ? 0 : 4;
  ------------------
  |  Branch (1537:39): [True: 0, False: 0]
  ------------------
 1538|       |
 1539|      0|                if(u1_left_available)
  ------------------
  |  Branch (1539:20): [True: 0, False: 0]
  ------------------
 1540|      0|                {
 1541|      0|                    if(ps_left_mb->u1_mb_type != I_4x4_MB)
  ------------------
  |  |  417|      0|#define I_4x4_MB    0
  ------------------
  |  Branch (1541:24): [True: 0, False: 0]
  ------------------
 1542|      0|                    {
 1543|      0|                        *(WORD32*)pi1_left_pred_mode =
 1544|      0|                                        (u2_use_left_mb_pack) ?
  ------------------
  |  Branch (1544:41): [True: 0, False: 0]
  ------------------
 1545|      0|                                        DC_DC_DC_DC :
  ------------------
  |  |  434|      0|#define DC_DC_DC_DC   0x02020202 /*packed 4 bytes used in Decode Intra Mb*/
  ------------------
 1546|      0|                                                                NOT_VALID;
  ------------------
  |  |  433|      0|#define NOT_VALID -1
  ------------------
 1547|      0|                    }
 1548|      0|                }
 1549|      0|                else
 1550|      0|                {
 1551|      0|                    *(WORD32*)pi1_left_pred_mode = NOT_VALID;
  ------------------
  |  |  433|      0|#define NOT_VALID -1
  ------------------
 1552|      0|                }
 1553|      0|            }
 1554|      0|        }
 1555|       |
 1556|       |        /* One time pointer initialisations*/
 1557|  32.1k|        pi2_y_coeff1 = pi2_y_coeff;
 1558|       |
 1559|  32.1k|        if(u1_use_top_left_mb)
  ------------------
  |  Branch (1559:12): [True: 15.1k, False: 16.9k]
  ------------------
 1560|  15.1k|        {
 1561|  15.1k|            pu1_top_left = pu1_ytop_left;
 1562|  15.1k|        }
 1563|  16.9k|        else
 1564|  16.9k|        {
 1565|  16.9k|            pu1_top_left = NULL;
 1566|  16.9k|        }
 1567|       |
 1568|       |        /* Scan the sub-blocks in Raster Scan Order */
 1569|   160k|        for(u4_sub_mb_num = 0; u4_sub_mb_num < 4; u4_sub_mb_num++)
  ------------------
  |  Branch (1569:32): [True: 128k, False: 32.1k]
  ------------------
 1570|   128k|        {
 1571|   128k|            u1_sub_blk_x = (u4_sub_mb_num & 0x1);
 1572|   128k|            u1_sub_blk_y = (u4_sub_mb_num >> 1);
 1573|   128k|            i1_top_pred_mode = pi1_cur_pred_mode[u1_sub_blk_x << 1];
 1574|   128k|            i1_left_pred_mode = pi1_left_pred_mode[u1_sub_blk_y << 1];
 1575|       |
 1576|   128k|            if(2 == u4_sub_mb_num)
  ------------------
  |  Branch (1576:16): [True: 32.1k, False: 96.3k]
  ------------------
 1577|  32.1k|            {
 1578|  32.1k|                i1_left_pred_mode = pi1_left_pred_mode[(u1_sub_blk_y << 1)
 1579|  32.1k|                                + u4_4x4_left_offset];
 1580|  32.1k|            }
 1581|       |
 1582|   128k|            u1_use_top_right_mb = (!!CHECKBIT(ui2_top_rt_mask, u4_sub_mb_num));
  ------------------
  |  |   54|   128k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  ------------------
 1583|       |
 1584|       |            /*********** left subblock availability**********/
 1585|   128k|            if(u1_sub_blk_x)
  ------------------
  |  Branch (1585:16): [True: 64.2k, False: 64.2k]
  ------------------
 1586|  64.2k|            {
 1587|  64.2k|                u1_is_left_sub_block = 1;
 1588|  64.2k|            }
 1589|  64.2k|            else
 1590|  64.2k|            {
 1591|  64.2k|                u1_is_left_sub_block =
 1592|  64.2k|                                (u1_sub_blk_y < 1) ?
  ------------------
  |  Branch (1592:33): [True: 32.1k, False: 32.1k]
  ------------------
 1593|  32.1k|                                                (CHECKBIT(u2_use_left_mb_pack,
  ------------------
  |  |   54|  32.1k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  ------------------
 1594|  32.1k|                                                          0)) :
 1595|  64.2k|                                                (u2_use_left_mb_pack >> 8);
 1596|  64.2k|            }
 1597|       |
 1598|       |            /***************** Top *********************/
 1599|   128k|            if(u1_sub_blk_y)
  ------------------
  |  Branch (1599:16): [True: 64.2k, False: 64.2k]
  ------------------
 1600|  64.2k|            {
 1601|  64.2k|                u1_is_top_sub_block = 1;
 1602|       |                // sushant
 1603|  64.2k|                pu1_top = /*pu1_luma_pred_buffer*/pu1_luma_rec_buffer - ui_rec_width;
 1604|  64.2k|            }
 1605|  64.2k|            else
 1606|  64.2k|            {
 1607|  64.2k|                pu1_top += 8;
 1608|  64.2k|            }
 1609|       |
 1610|       |            /***************** Left *********************/
 1611|   128k|            if((u1_sub_blk_x) | (u4_num_pmbair != 0))
  ------------------
  |  Branch (1611:16): [True: 115k, False: 13.2k]
  ------------------
 1612|   115k|            {
 1613|       |                // sushant
 1614|   115k|                pu1_left = /*pu1_luma_pred_buffer*/pu1_luma_rec_buffer - 1;
 1615|   115k|                ui2_left_pred_buf_width = ui_rec_width;
 1616|   115k|            }
 1617|  13.2k|            else
 1618|  13.2k|            {
 1619|  13.2k|                pu1_left = pu1_yleft;
 1620|  13.2k|                pu1_yleft += (ui_rec_width << 3);
 1621|  13.2k|                ui2_left_pred_buf_width = ui_rec_width;
 1622|  13.2k|            }
 1623|       |
 1624|       |            /***************** Top Left *********************/
 1625|   128k|            if(u4_sub_mb_num)
  ------------------
  |  Branch (1625:16): [True: 96.3k, False: 32.1k]
  ------------------
 1626|  96.3k|            {
 1627|  96.3k|                pu1_top_left = (u1_sub_blk_x) ?
  ------------------
  |  Branch (1627:32): [True: 64.2k, False: 32.1k]
  ------------------
 1628|  64.2k|                                pu1_top - 1 : pu1_left - ui_rec_width;
 1629|       |
 1630|  96.3k|                if((u1_sub_blk_x && (!u1_is_top_sub_block))
  ------------------
  |  Branch (1630:21): [True: 64.2k, False: 32.1k]
  |  Branch (1630:37): [True: 12.5k, False: 51.6k]
  ------------------
 1631|  83.7k|                                || ((!u1_sub_blk_x) && (!u1_is_left_sub_block)))
  ------------------
  |  Branch (1631:37): [True: 32.1k, False: 51.6k]
  |  Branch (1631:56): [True: 7.87k, False: 24.2k]
  ------------------
 1632|  20.4k|                {
 1633|  20.4k|                    pu1_top_left = NULL;
 1634|  20.4k|                }
 1635|  96.3k|            }
 1636|       |
 1637|       |            /*---------------------------------------------------------------*/
 1638|       |            /* Calculation of Intra prediction mode                          */
 1639|       |            /*---------------------------------------------------------------*/
 1640|   128k|            i1_intra_pred = ((i1_left_pred_mode < 0) | (i1_top_pred_mode < 0)) ?
  ------------------
  |  Branch (1640:29): [True: 38.8k, False: 89.6k]
  ------------------
 1641|  89.6k|                            DC : MIN(i1_left_pred_mode, i1_top_pred_mode);
  ------------------
  |  |  431|  38.8k|#define DC      2
  ------------------
                                          DC : MIN(i1_left_pred_mode, i1_top_pred_mode);
  ------------------
  |  |   61|   218k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 19.0k, False: 70.5k]
  |  |  ------------------
  ------------------
 1642|   128k|            {
 1643|   128k|                UWORD8 u1_packed_modes = (u1_is_top_sub_block << 1)
 1644|   128k|                                + u1_is_left_sub_block;
 1645|   128k|                UWORD8 *pu1_intra_err_codes =
 1646|   128k|                                (UWORD8 *)gau1_ih264d_intra_pred_err_code;
 1647|       |
 1648|       |                /********************************************************************/
 1649|       |                /* Same intra4x4_pred_mode array is filled with intra4x4_pred_mode  */
 1650|       |                /* for a MB with 8x8 intrapredicition                               */
 1651|       |                /********************************************************************/
 1652|   128k|                PROFILE_DISABLE_INTRA_PRED()
  ------------------
  |  |  105|   128k|#define PROFILE_DISABLE_INTRA_PRED() ;
  ------------------
 1653|   128k|                if(!pu1_prev_intra4x4_pred_mode_flag[u4_sub_mb_num])
  ------------------
  |  Branch (1653:20): [True: 31.6k, False: 96.7k]
  ------------------
 1654|  31.6k|                {
 1655|  31.6k|                    i1_intra_pred = pu1_rem_intra4x4_pred_mode[u4_sub_mb_num]
 1656|  31.6k|                                    + (pu1_rem_intra4x4_pred_mode[u4_sub_mb_num]
 1657|  31.6k|                                                    >= i1_intra_pred);
 1658|  31.6k|                }
 1659|   128k|                i1_intra_pred = CLIP3(0, 8, i1_intra_pred);
  ------------------
  |  |   77|   128k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 377, False: 128k]
  |  |  |  Branch (77:54): [True: 253, False: 127k]
  |  |  ------------------
  ------------------
 1660|   128k|                {
 1661|   128k|                    UWORD8 u1_err_code = pu1_intra_err_codes[i1_intra_pred];
 1662|       |
 1663|   128k|                    if((u1_err_code & u1_packed_modes) ^ u1_err_code)
  ------------------
  |  Branch (1663:24): [True: 4.41k, False: 124k]
  ------------------
 1664|  4.41k|                    {
 1665|  4.41k|                        i1_intra_pred = 0;
 1666|  4.41k|                        ps_dec->i4_error_code = ERROR_INTRAPRED;
 1667|  4.41k|                    }
 1668|   128k|                }
 1669|   128k|            }
 1670|       |
 1671|   128k|            {
 1672|       |                /* Align the size to multiple of 8, so that SIMD functions
 1673|       |                can read 64 bits at a time. Only 25 bytes are actaully used */
 1674|   128k|                UWORD8 au1_ngbr_pels[32] = {0};
 1675|   128k|                WORD32 ngbr_avail;
 1676|   128k|                ngbr_avail = u1_is_left_sub_block << 0;
 1677|   128k|                ngbr_avail |= u1_is_top_sub_block << 2;
 1678|       |
 1679|   128k|                if(pu1_top_left)
  ------------------
  |  Branch (1679:20): [True: 90.9k, False: 37.4k]
  ------------------
 1680|  90.9k|                    ngbr_avail |= 1 << 1;
 1681|       |
 1682|   128k|                ngbr_avail |= u1_use_top_right_mb << 3;
 1683|   128k|                PROFILE_DISABLE_INTRA_PRED()
  ------------------
  |  |  105|   128k|#define PROFILE_DISABLE_INTRA_PRED() ;
  ------------------
 1684|   128k|                {
 1685|   128k|                    ps_dec->pf_intra_pred_ref_filtering(pu1_left, pu1_top_left,
 1686|   128k|                                                        pu1_top, au1_ngbr_pels,
 1687|   128k|                                                        ui2_left_pred_buf_width,
 1688|   128k|                                                        ngbr_avail);
 1689|       |
 1690|   128k|                    ps_dec->apf_intra_pred_luma_8x8[i1_intra_pred](
 1691|   128k|                                    au1_ngbr_pels, pu1_luma_rec_buffer, 1,
 1692|   128k|                                    ui_rec_width,
 1693|   128k|                                    ((u1_is_top_sub_block << 2) | u1_is_left_sub_block));
 1694|   128k|                }
 1695|   128k|            }
 1696|       |
 1697|       |            /* Inverse Transform and Reconstruction */
 1698|   128k|            if(CHECKBIT(ps_cur_mb_info->u1_cbp, u4_sub_mb_num))
  ------------------
  |  |   54|   128k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 75.6k, False: 52.7k]
  |  |  ------------------
  ------------------
 1699|  75.6k|            {
 1700|  75.6k|                WORD16 *pi2_scale_matrix_ptr;
 1701|  75.6k|                WORD16 ai2_tmp[64];
 1702|       |
 1703|  75.6k|                pi2_scale_matrix_ptr =
 1704|  75.6k|                                ps_dec->s_high_profile.i2_scalinglist8x8[0];
 1705|  75.6k|                PROFILE_DISABLE_IQ_IT_RECON()
  ------------------
  |  |   98|  75.6k|#define PROFILE_DISABLE_IQ_IT_RECON() ;
  ------------------
 1706|  75.6k|                {
 1707|  75.6k|                    if(CHECKBIT(u4_luma_dc_only_cbp, u4_sub_mb_num))
  ------------------
  |  |   54|  75.6k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 19.0k, False: 56.6k]
  |  |  ------------------
  ------------------
 1708|  19.0k|                    {
 1709|  19.0k|                        ps_dec->pf_iquant_itrans_recon_luma_8x8_dc(
 1710|  19.0k|                                        pi2_y_coeff1,
 1711|  19.0k|                                        pu1_luma_rec_buffer,
 1712|  19.0k|                                        pu1_luma_rec_buffer,
 1713|  19.0k|                                        ui_rec_width,
 1714|  19.0k|                                        ui_rec_width,
 1715|  19.0k|                                        gau1_ih264d_dequant8x8_cavlc[ps_cur_mb_info->u1_qp_rem6],
 1716|  19.0k|                                        (UWORD16 *)pi2_scale_matrix_ptr,
 1717|  19.0k|                                        ps_cur_mb_info->u1_qp_div6, ai2_tmp, 0,
 1718|  19.0k|                                        NULL);
 1719|  19.0k|                    }
 1720|  56.6k|                    else
 1721|  56.6k|                    {
 1722|  56.6k|                        ps_dec->pf_iquant_itrans_recon_luma_8x8(
 1723|  56.6k|                                        pi2_y_coeff1,
 1724|  56.6k|                                        pu1_luma_rec_buffer,
 1725|  56.6k|                                        pu1_luma_rec_buffer,
 1726|  56.6k|                                        ui_rec_width,
 1727|  56.6k|                                        ui_rec_width,
 1728|  56.6k|                                        gau1_ih264d_dequant8x8_cavlc[ps_cur_mb_info->u1_qp_rem6],
 1729|  56.6k|                                        (UWORD16 *)pi2_scale_matrix_ptr,
 1730|  56.6k|                                        ps_cur_mb_info->u1_qp_div6, ai2_tmp, 0,
 1731|  56.6k|                                        NULL);
 1732|  56.6k|                    }
 1733|  75.6k|                }
 1734|       |
 1735|  75.6k|            }
 1736|       |
 1737|       |            /*---------------------------------------------------------------*/
 1738|       |            /* Update sub block number                                       */
 1739|       |            /*---------------------------------------------------------------*/
 1740|   128k|            pi2_y_coeff1 += 64;
 1741|       |
 1742|   128k|            pu1_luma_rec_buffer +=
 1743|   128k|                            (u1_sub_blk_x == 1) ?
  ------------------
  |  Branch (1743:29): [True: 64.2k, False: 64.2k]
  ------------------
 1744|  64.2k|                                            (ui_rec_width << 3) - (8 * 1) : 8;
 1745|       |
 1746|       |            /*---------------------------------------------------------------*/
 1747|       |            /* Pred mode filled in terms of 4x4 block so replicated in 2     */
 1748|       |            /* locations.                                                    */
 1749|       |            /*---------------------------------------------------------------*/
 1750|   128k|            pi1_cur_pred_mode[u1_sub_blk_x << 1] = i1_intra_pred;
 1751|   128k|            pi1_cur_pred_mode[(u1_sub_blk_x << 1) + 1] = i1_intra_pred;
 1752|   128k|            pi1_left_pred_mode[u1_sub_blk_y << 1] = i1_intra_pred;
 1753|   128k|            pi1_left_pred_mode[(u1_sub_blk_y << 1) + 1] = i1_intra_pred;
 1754|   128k|        }
 1755|  32.1k|    }
 1756|       |    /* Decode Chroma Block */
 1757|   143k|    ih264d_unpack_chroma_coeff4x4_mb(ps_dec,
 1758|   143k|                                     ps_cur_mb_info);
 1759|       |    /*--------------------------------------------------------------------*/
 1760|       |    /* Chroma Blocks decoding                                             */
 1761|       |    /*--------------------------------------------------------------------*/
 1762|   143k|    {
 1763|   143k|        UWORD8 u1_intra_chrom_pred_mode;
 1764|   143k|        UWORD8 u1_chroma_cbp = (UWORD8)(ps_cur_mb_info->u1_cbp >> 4);
 1765|       |
 1766|       |        /*--------------------------------------------------------------------*/
 1767|       |        /* Perform Chroma intra prediction                                    */
 1768|       |        /*--------------------------------------------------------------------*/
 1769|       |
 1770|   143k|        u1_intra_chrom_pred_mode = CHROMA_TO_LUMA_INTRA_MODE(
  ------------------
  |  |   44|   143k|#define CHROMA_TO_LUMA_INTRA_MODE(x)   (x ^ ( (!(x & 0x01)) << 1))
  ------------------
 1771|   143k|                        ps_cur_mb_info->u1_chroma_pred_mode);
 1772|       |
 1773|   143k|        {
 1774|   143k|            UWORD8 u1_packed_modes = (u1_top_available << 1)
 1775|   143k|                            + u1_left_available;
 1776|   143k|            UWORD8 u1_err_code =
 1777|   143k|                            (u1_intra_chrom_pred_mode & 1) ?
  ------------------
  |  Branch (1777:29): [True: 48.5k, False: 94.5k]
  ------------------
 1778|  48.5k|                                            u1_intra_chrom_pred_mode :
 1779|   143k|                                            (u1_intra_chrom_pred_mode ^ 2);
 1780|   143k|            if((u1_err_code & u1_packed_modes) ^ u1_err_code)
  ------------------
  |  Branch (1780:16): [True: 10.9k, False: 132k]
  ------------------
 1781|  10.9k|            {
 1782|  10.9k|                u1_intra_chrom_pred_mode = 0;
 1783|  10.9k|                ps_dec->i4_error_code = ERROR_INTRAPRED;
 1784|  10.9k|            }
 1785|   143k|        }
 1786|       |
 1787|       |        /* CHANGED CODE */
 1788|   143k|        if(u1_chroma_cbp != CBPC_ALLZERO)
  ------------------
  |  |  507|   143k|#define CBPC_ALLZERO    0
  ------------------
  |  Branch (1788:12): [True: 42.4k, False: 100k]
  ------------------
 1789|  42.4k|        {
 1790|  42.4k|            UWORD16 u2_chroma_csbp =
 1791|  42.4k|                            (u1_chroma_cbp == CBPC_ACZERO) ?
  ------------------
  |  |  508|  42.4k|#define CBPC_ACZERO     1
  ------------------
  |  Branch (1791:29): [True: 30.3k, False: 12.0k]
  ------------------
 1792|  30.3k|                                            0 : ps_cur_mb_info->u2_chroma_csbp;
 1793|  42.4k|            UWORD32 u4_scale_u;
 1794|  42.4k|            UWORD32 u4_scale_v;
 1795|       |
 1796|  42.4k|            {
 1797|  42.4k|                UWORD16 au2_ngbr_pels[33];
 1798|  42.4k|                UWORD8 *pu1_ngbr_pels = (UWORD8 *)au2_ngbr_pels;
 1799|  42.4k|                UWORD16 *pu2_left_uv;
 1800|  42.4k|                UWORD16 *pu2_topleft_uv;
 1801|  42.4k|                WORD32 use_left1 = (u2_use_left_mb_pack & 0x0ff);
 1802|  42.4k|                WORD32 use_left2 = (u2_use_left_mb_pack & 0xff00) >> 8;
 1803|       |
 1804|  42.4k|                pu2_left_uv = (UWORD16 *)pu1_uleft;
 1805|  42.4k|                pu2_topleft_uv = (UWORD16 *)pu1_u_top_left;
 1806|       |                /* Get neighbour pixels */
 1807|       |                /* left pels */
 1808|  42.4k|                if(u2_use_left_mb_pack)
  ------------------
  |  Branch (1808:20): [True: 26.5k, False: 15.8k]
  ------------------
 1809|  26.5k|                {
 1810|  26.5k|                    WORD32 i;
 1811|  26.5k|                    if(use_left1)
  ------------------
  |  Branch (1811:24): [True: 26.5k, False: 0]
  ------------------
 1812|  26.5k|                    {
 1813|   132k|                        for(i = 0; i < 4; i++)
  ------------------
  |  Branch (1813:36): [True: 106k, False: 26.5k]
  ------------------
 1814|   106k|                            au2_ngbr_pels[8 - 1 - i] = pu2_left_uv[i
 1815|   106k|                                            * u4_recwidth_cr / YUV420SP_FACTOR];
  ------------------
  |  |  119|   106k|#define YUV420SP_FACTOR 2
  ------------------
 1816|  26.5k|                    }
 1817|      0|                    else
 1818|      0|                    {
 1819|      0|                        memset(au2_ngbr_pels + 4, 0, 4 * sizeof(UWORD16));
 1820|      0|                    }
 1821|       |
 1822|  26.5k|                    if(use_left2)
  ------------------
  |  Branch (1822:24): [True: 26.5k, False: 0]
  ------------------
 1823|  26.5k|                    {
 1824|   132k|                        for(i = 4; i < 8; i++)
  ------------------
  |  Branch (1824:36): [True: 106k, False: 26.5k]
  ------------------
 1825|   106k|                            au2_ngbr_pels[8 - 1 - i] = pu2_left_uv[i
 1826|   106k|                                            * u4_recwidth_cr / YUV420SP_FACTOR];
  ------------------
  |  |  119|   106k|#define YUV420SP_FACTOR 2
  ------------------
 1827|  26.5k|                    }
 1828|      0|                    else
 1829|      0|                    {
 1830|      0|                        memset(au2_ngbr_pels, 0, 4 * sizeof(UWORD16));
 1831|      0|                    }
 1832|  26.5k|                }
 1833|  15.8k|                else
 1834|  15.8k|                {
 1835|  15.8k|                    memset(au2_ngbr_pels, 0, 8 * sizeof(UWORD16));
 1836|  15.8k|                }
 1837|       |
 1838|       |                /* top left pels */
 1839|  42.4k|                au2_ngbr_pels[8] = *pu2_topleft_uv;
 1840|       |
 1841|       |                /* top pels */
 1842|  42.4k|                if(uc_useTopMB)
  ------------------
  |  Branch (1842:20): [True: 29.5k, False: 12.9k]
  ------------------
 1843|  29.5k|                {
 1844|  29.5k|                    memcpy(au2_ngbr_pels + 8 + 1, pu1_top_u,
 1845|  29.5k|                           8 * sizeof(UWORD16));
 1846|  29.5k|                }
 1847|  12.9k|                else
 1848|  12.9k|                {
 1849|  12.9k|                    memset(au2_ngbr_pels + 8 + 1, 0, 8 * sizeof(UWORD16));
 1850|  12.9k|                }
 1851|       |
 1852|  42.4k|                PROFILE_DISABLE_INTRA_PRED()
  ------------------
  |  |  105|  42.4k|#define PROFILE_DISABLE_INTRA_PRED() ;
  ------------------
 1853|  42.4k|                ps_dec->apf_intra_pred_chroma[u1_intra_chrom_pred_mode](
 1854|  42.4k|                                pu1_ngbr_pels,
 1855|  42.4k|                                pu1_mb_cb_rei1_buffer,
 1856|  42.4k|                                1,
 1857|  42.4k|                                u4_recwidth_cr,
 1858|  42.4k|                                ((uc_useTopMB << 2) | (use_left2 << 4)
 1859|  42.4k|                                                | use_left1));
 1860|  42.4k|            }
 1861|  42.4k|            u4_scale_u = ps_cur_mb_info->u1_qpc_div6;
 1862|  42.4k|            u4_scale_v = ps_cur_mb_info->u1_qpcr_div6;
 1863|  42.4k|            pi2_y_coeff = ps_dec->pi2_coeff_data;
 1864|       |
 1865|  42.4k|            {
 1866|  42.4k|                UWORD32 i;
 1867|  42.4k|                WORD16 ai2_tmp[16];
 1868|   212k|                for(i = 0; i < 4; i++)
  ------------------
  |  Branch (1868:28): [True: 169k, False: 42.4k]
  ------------------
 1869|   169k|                {
 1870|   169k|                    WORD16 *pi2_level = pi2_y_coeff + (i << 4);
 1871|   169k|                    UWORD8 *pu1_pred_sblk = pu1_mb_cb_rei1_buffer
 1872|   169k|                                    + ((i & 0x1) * BLK_SIZE * YUV420SP_FACTOR)
  ------------------
  |  |  556|   169k|#define BLK_SIZE             4
  ------------------
                                                  + ((i & 0x1) * BLK_SIZE * YUV420SP_FACTOR)
  ------------------
  |  |  119|   169k|#define YUV420SP_FACTOR 2
  ------------------
 1873|   169k|                                    + (i >> 1) * (u4_recwidth_cr << 2);
 1874|   169k|                    PROFILE_DISABLE_IQ_IT_RECON()
  ------------------
  |  |   98|   169k|#define PROFILE_DISABLE_IQ_IT_RECON() ;
  ------------------
 1875|   169k|                    {
 1876|   169k|                        if(CHECKBIT(u2_chroma_csbp, i))
  ------------------
  |  |   54|   169k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 9.47k, False: 160k]
  |  |  ------------------
  ------------------
 1877|  9.47k|                        {
 1878|  9.47k|                            ps_dec->pf_iquant_itrans_recon_chroma_4x4(
 1879|  9.47k|                                            pi2_level,
 1880|  9.47k|                                            pu1_pred_sblk,
 1881|  9.47k|                                            pu1_pred_sblk,
 1882|  9.47k|                                            u4_recwidth_cr,
 1883|  9.47k|                                            u4_recwidth_cr,
 1884|  9.47k|                                            gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpc_rem6],
 1885|  9.47k|                                            (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[1],
 1886|  9.47k|                                            u4_scale_u, ai2_tmp, pi2_level);
 1887|  9.47k|                        }
 1888|   160k|                        else if(pi2_level[0] != 0)
  ------------------
  |  Branch (1888:33): [True: 103k, False: 56.3k]
  ------------------
 1889|   103k|                        {
 1890|   103k|                            ps_dec->pf_iquant_itrans_recon_chroma_4x4_dc(
 1891|   103k|                                            pi2_level,
 1892|   103k|                                            pu1_pred_sblk,
 1893|   103k|                                            pu1_pred_sblk,
 1894|   103k|                                            u4_recwidth_cr,
 1895|   103k|                                            u4_recwidth_cr,
 1896|   103k|                                            gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpc_rem6],
 1897|   103k|                                            (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[1],
 1898|   103k|                                            u4_scale_u, ai2_tmp, pi2_level);
 1899|   103k|                        }
 1900|   169k|                    }
 1901|       |
 1902|   169k|                }
 1903|  42.4k|            }
 1904|       |
 1905|  42.4k|            pi2_y_coeff += MB_CHROM_SIZE;
  ------------------
  |  |  564|  42.4k|#define MB_CHROM_SIZE                 64
  ------------------
 1906|  42.4k|            u2_chroma_csbp = u2_chroma_csbp >> 4;
 1907|  42.4k|            {
 1908|  42.4k|                UWORD32 i;
 1909|  42.4k|                WORD16 ai2_tmp[16];
 1910|   212k|                for(i = 0; i < 4; i++)
  ------------------
  |  Branch (1910:28): [True: 169k, False: 42.4k]
  ------------------
 1911|   169k|                {
 1912|   169k|                    WORD16 *pi2_level = pi2_y_coeff + (i << 4);
 1913|   169k|                    UWORD8 *pu1_pred_sblk = pu1_mb_cb_rei1_buffer + 1
 1914|   169k|                                    + ((i & 0x1) * BLK_SIZE * YUV420SP_FACTOR)
  ------------------
  |  |  556|   169k|#define BLK_SIZE             4
  ------------------
                                                  + ((i & 0x1) * BLK_SIZE * YUV420SP_FACTOR)
  ------------------
  |  |  119|   169k|#define YUV420SP_FACTOR 2
  ------------------
 1915|   169k|                                    + (i >> 1) * (u4_recwidth_cr << 2);
 1916|   169k|                    PROFILE_DISABLE_IQ_IT_RECON()
  ------------------
  |  |   98|   169k|#define PROFILE_DISABLE_IQ_IT_RECON() ;
  ------------------
 1917|   169k|                    {
 1918|   169k|                        if(CHECKBIT(u2_chroma_csbp, i))
  ------------------
  |  |   54|   169k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 10.2k, False: 159k]
  |  |  ------------------
  ------------------
 1919|  10.2k|                        {
 1920|  10.2k|                            ps_dec->pf_iquant_itrans_recon_chroma_4x4(
 1921|  10.2k|                                            pi2_level,
 1922|  10.2k|                                            pu1_pred_sblk,
 1923|  10.2k|                                            pu1_pred_sblk,
 1924|  10.2k|                                            u4_recwidth_cr,
 1925|  10.2k|                                            u4_recwidth_cr,
 1926|  10.2k|                                            gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpcr_rem6],
 1927|  10.2k|                                            (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[2],
 1928|  10.2k|                                            u4_scale_v, ai2_tmp, pi2_level);
 1929|  10.2k|                        }
 1930|   159k|                        else if(pi2_level[0] != 0)
  ------------------
  |  Branch (1930:33): [True: 92.8k, False: 66.4k]
  ------------------
 1931|  92.8k|                        {
 1932|  92.8k|                            ps_dec->pf_iquant_itrans_recon_chroma_4x4_dc(
 1933|  92.8k|                                            pi2_level,
 1934|  92.8k|                                            pu1_pred_sblk,
 1935|  92.8k|                                            pu1_pred_sblk,
 1936|  92.8k|                                            u4_recwidth_cr,
 1937|  92.8k|                                            u4_recwidth_cr,
 1938|  92.8k|                                            gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpcr_rem6],
 1939|  92.8k|                                            (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[2],
 1940|  92.8k|                                            u4_scale_v, ai2_tmp, pi2_level);
 1941|  92.8k|                        }
 1942|   169k|                    }
 1943|   169k|                }
 1944|  42.4k|            }
 1945|       |
 1946|  42.4k|        }
 1947|   100k|        else
 1948|   100k|        {
 1949|       |            /* If no inverse transform is needed, pass recon buffer pointer */
 1950|       |            /* to Intraprediction module instead of pred buffer pointer     */
 1951|   100k|            {
 1952|   100k|                UWORD16 au2_ngbr_pels[33];
 1953|   100k|                UWORD8 *pu1_ngbr_pels = (UWORD8 *)au2_ngbr_pels;
 1954|   100k|                UWORD16 *pu2_left_uv;
 1955|   100k|                UWORD16 *pu2_topleft_uv;
 1956|   100k|                WORD32 use_left1 = (u2_use_left_mb_pack & 0x0ff);
 1957|   100k|                WORD32 use_left2 = (u2_use_left_mb_pack & 0xff00) >> 8;
 1958|       |
 1959|   100k|                pu2_topleft_uv = (UWORD16 *)pu1_u_top_left;
 1960|   100k|                pu2_left_uv = (UWORD16 *)pu1_uleft;
 1961|       |
 1962|       |                /* Get neighbour pixels */
 1963|       |                /* left pels */
 1964|   100k|                if(u2_use_left_mb_pack)
  ------------------
  |  Branch (1964:20): [True: 75.4k, False: 25.3k]
  ------------------
 1965|  75.4k|                {
 1966|  75.4k|                    WORD32 i;
 1967|  75.4k|                    if(use_left1)
  ------------------
  |  Branch (1967:24): [True: 75.4k, False: 0]
  ------------------
 1968|  75.4k|                    {
 1969|   377k|                        for(i = 0; i < 4; i++)
  ------------------
  |  Branch (1969:36): [True: 301k, False: 75.4k]
  ------------------
 1970|   301k|                            au2_ngbr_pels[8 - 1 - i] = pu2_left_uv[i
 1971|   301k|                                            * u4_recwidth_cr / YUV420SP_FACTOR];
  ------------------
  |  |  119|   301k|#define YUV420SP_FACTOR 2
  ------------------
 1972|  75.4k|                    }
 1973|      0|                    else
 1974|      0|                    {
 1975|      0|                        memset(au2_ngbr_pels + 4, 0, 4 * sizeof(UWORD16));
 1976|      0|                    }
 1977|       |
 1978|  75.4k|                    if(use_left2)
  ------------------
  |  Branch (1978:24): [True: 75.4k, False: 0]
  ------------------
 1979|  75.4k|                    {
 1980|   377k|                        for(i = 4; i < 8; i++)
  ------------------
  |  Branch (1980:36): [True: 301k, False: 75.4k]
  ------------------
 1981|   301k|                            au2_ngbr_pels[8 - 1 - i] = pu2_left_uv[i
 1982|   301k|                                            * u4_recwidth_cr / YUV420SP_FACTOR];
  ------------------
  |  |  119|   301k|#define YUV420SP_FACTOR 2
  ------------------
 1983|  75.4k|                    }
 1984|      0|                    else
 1985|      0|                    {
 1986|      0|                        memset(au2_ngbr_pels, 0, 4 * sizeof(UWORD16));
 1987|      0|                    }
 1988|       |
 1989|  75.4k|                }
 1990|  25.3k|                else
 1991|  25.3k|                {
 1992|  25.3k|                    memset(au2_ngbr_pels, 0, 8 * sizeof(UWORD16));
 1993|  25.3k|                }
 1994|       |
 1995|       |                /* top left pels */
 1996|   100k|                au2_ngbr_pels[8] = *pu2_topleft_uv;
 1997|       |
 1998|       |                /* top pels */
 1999|   100k|                if(uc_useTopMB)
  ------------------
  |  Branch (1999:20): [True: 71.2k, False: 29.4k]
  ------------------
 2000|  71.2k|                {
 2001|  71.2k|                    memcpy(au2_ngbr_pels + 8 + 1, pu1_top_u,
 2002|  71.2k|                           8 * sizeof(UWORD16));
 2003|  71.2k|                }
 2004|  29.4k|                else
 2005|  29.4k|                {
 2006|  29.4k|                    memset(au2_ngbr_pels + 8 + 1, 0, 8 * sizeof(UWORD16));
 2007|  29.4k|                }
 2008|       |
 2009|   100k|                PROFILE_DISABLE_INTRA_PRED()
  ------------------
  |  |  105|   100k|#define PROFILE_DISABLE_INTRA_PRED() ;
  ------------------
 2010|   100k|                ps_dec->apf_intra_pred_chroma[u1_intra_chrom_pred_mode](
 2011|   100k|                                pu1_ngbr_pels,
 2012|   100k|                                pu1_mb_cb_rei1_buffer,
 2013|   100k|                                1,
 2014|   100k|                                u4_recwidth_cr,
 2015|   100k|                                ((uc_useTopMB << 2) | (use_left2 << 4)
 2016|   100k|                                                | use_left1));
 2017|   100k|            }
 2018|       |
 2019|   100k|        }
 2020|       |
 2021|   143k|    }
 2022|   143k|    return OK;
  ------------------
  |  |  114|   143k|#define OK        0
  ------------------
 2023|   143k|}

ih264d_insert_pic_in_ref_pic_listx:
   66|   147k|{
   67|   147k|    *ps_ref_pic_buf_lx = *ps_pic;
   68|   147k|}
ih264d_mv_pred_ref_tfr_nby2_pmb:
   73|   858k|{
   74|   858k|    parse_pmbarams_t * ps_mb_part_info;
   75|   858k|    parse_part_params_t * ps_part;
   76|   858k|    mv_pred_t *ps_mv_nmb, *ps_mv_nmb_start, *ps_mv_ntop, *ps_mv_ntop_start;
   77|   858k|    UWORD32 i, j;
   78|   858k|    const UWORD32 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
   79|   858k|    dec_mb_info_t * ps_cur_mb_info;
   80|   858k|    WORD32 i2_mv_x, i2_mv_y;
   81|   858k|    WORD32 ret;
   82|       |
   83|   858k|    ps_dec->i4_submb_ofst -= (WORD32)(u4_num_mbs - u4_mb_idx) << 4;
   84|   858k|    ps_mb_part_info = ps_dec->ps_parse_mb_data; // + u4_mb_idx;
   85|   858k|    ps_part = ps_dec->ps_parse_part_params; // + u4_mb_idx;
   86|       |
   87|       |    /* N/2 Mb MvPred and Transfer Setup Loop */
   88|  6.65M|    for(i = u4_mb_idx; i < u4_num_mbs; i++, ps_mb_part_info++)
  ------------------
  |  Branch (88:24): [True: 5.79M, False: 858k]
  ------------------
   89|  5.79M|    {
   90|  5.79M|        UWORD32 u1_colz;
   91|  5.79M|        UWORD32 u1_field;
   92|  5.79M|        mv_pred_t s_mvPred;
   93|  5.79M|        mv_pred_t *ps_mv_pred = &s_mvPred;
   94|       |
   95|       |
   96|       |
   97|  5.79M|        *ps_mv_pred = ps_dec->s_default_mv_pred;
   98|       |
   99|  5.79M|        ps_dec->i4_submb_ofst += SUB_BLK_SIZE;
  ------------------
  |  |  562|  5.79M|#define SUB_BLK_SIZE                  ((SUB_BLK_WIDTH) * (SUB_BLK_WIDTH))
  |  |  ------------------
  |  |  |  |  560|  5.79M|#define SUB_BLK_WIDTH                 4
  |  |  ------------------
  |  |               #define SUB_BLK_SIZE                  ((SUB_BLK_WIDTH) * (SUB_BLK_WIDTH))
  |  |  ------------------
  |  |  |  |  560|  5.79M|#define SUB_BLK_WIDTH                 4
  |  |  ------------------
  ------------------
  100|       |
  101|       |        /* Restore the slice scratch MbX and MbY context */
  102|  5.79M|        ps_cur_mb_info = ps_dec->ps_nmb_info + i;
  103|  5.79M|        u1_field = ps_cur_mb_info->u1_mb_field_decodingflag;
  104|       |
  105|       |
  106|       |
  107|  5.79M|        ps_mv_nmb_start = ps_dec->ps_mv_cur + (i << 4);
  108|  5.79M|        ps_dec->u2_mbx = ps_cur_mb_info->u2_mbx;
  109|  5.79M|        ps_dec->u2_mby = ps_cur_mb_info->u2_mby;
  110|  5.79M|        ps_dec->u2_mv_2mb[i & 0x1] = 0;
  111|       |
  112|       |        /* Look for MV Prediction and Reference Transfer in Non-I Mbs */
  113|  5.79M|        if(!ps_mb_part_info->u4_isI_mb)
  ------------------
  |  Branch (113:12): [True: 5.77M, False: 26.0k]
  ------------------
  114|  5.77M|        {
  115|  5.77M|            UWORD32 u1_blk_no;
  116|  5.77M|            WORD32 i1_ref_idx, i1_ref_idx1;
  117|  5.77M|            UWORD32 u1_sub_mb_x, u1_sub_mb_y, u1_sub_mb_num;
  118|  5.77M|            UWORD32 u1_num_part, u1_num_ref, u1_wd, u1_ht;
  119|  5.77M|            UWORD32 *pu4_wt_offst, **ppu4_wt_ofst;
  120|  5.77M|            UWORD32 u1_scale_ref, u4_bot_mb;
  121|  5.77M|            WORD8 *pi1_ref_idx = ps_mb_part_info->i1_ref_idx[0];
  122|  5.77M|            pic_buffer_t *ps_ref_frame, **pps_ref_frame;
  123|  5.77M|            deblk_mb_t * ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + i;
  124|       |
  125|       |            /* MB Level initialisations */
  126|  5.77M|            ps_dec->u4_num_pmbair = i >> u1_mbaff;
  127|  5.77M|            ps_dec->u4_mb_idx_mv = i;
  128|  5.77M|            ppu4_wt_ofst = ps_mb_part_info->pu4_wt_offst;
  129|  5.77M|            pps_ref_frame = ps_dec->ps_ref_pic_buf_lx[0];
  130|       |            /* CHANGED CODE */
  131|  5.77M|            ps_mv_ntop_start = ps_mv_nmb_start
  132|  5.77M|                            - (ps_dec->u2_frm_wd_in_mbs << (4 + u1_mbaff)) + 12;
  133|       |
  134|  5.77M|            u1_num_part = ps_mb_part_info->u1_num_part;
  135|  5.77M|            ps_cur_deblk_mb->u1_mb_type |= (u1_num_part > 1) << 1;
  136|  5.77M|            ps_cur_mb_info->u4_pred_info_pkd_idx = ps_dec->u4_pred_info_pkd_idx;
  137|  5.77M|            ps_cur_mb_info->u1_num_pred_parts = 0;
  138|       |
  139|       |
  140|       |            /****************************************************/
  141|       |            /* weighted u4_ofst pointer calculations, this loop  */
  142|       |            /* runs maximum 4 times, even in direct cases       */
  143|       |            /****************************************************/
  144|  5.77M|            u1_scale_ref = u1_mbaff & u1_field;
  145|       |
  146|  5.77M|            u4_bot_mb = 1 - ps_cur_mb_info->u1_topmb;
  147|  5.77M|            if(ps_dec->ps_cur_pps->u1_wted_pred_flag)
  ------------------
  |  Branch (147:16): [True: 3.32M, False: 2.44M]
  ------------------
  148|  3.32M|            {
  149|  3.32M|                u1_num_ref = MIN(u1_num_part, 4);
  ------------------
  |  |   61|  3.32M|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 3.31M, False: 13.3k]
  |  |  ------------------
  ------------------
  150|  6.70M|                for(u1_blk_no = 0; u1_blk_no < u1_num_ref; u1_blk_no++)
  ------------------
  |  Branch (150:36): [True: 3.37M, False: 3.32M]
  ------------------
  151|  3.37M|                {
  152|  3.37M|                    i1_ref_idx = pi1_ref_idx[u1_blk_no];
  153|  3.37M|                    if(u1_scale_ref)
  ------------------
  |  Branch (153:24): [True: 0, False: 3.37M]
  ------------------
  154|      0|                        i1_ref_idx >>= 1;
  155|  3.37M|                    pu4_wt_offst = (UWORD32*)&ps_dec->pu4_wt_ofsts[2
  156|  3.37M|                                    * X3(i1_ref_idx)];
  ------------------
  |  |   92|  3.37M|#define X3(a)   (((a) << 1) + (a))
  ------------------
  157|  3.37M|                    ppu4_wt_ofst[u1_blk_no] = pu4_wt_offst;
  158|  3.37M|                }
  159|  3.32M|            }
  160|  2.44M|            else
  161|  2.44M|            {
  162|  2.44M|                ppu4_wt_ofst[0] = NULL;
  163|  2.44M|                ppu4_wt_ofst[1] = NULL;
  164|  2.44M|                ppu4_wt_ofst[2] = NULL;
  165|  2.44M|                ppu4_wt_ofst[3] = NULL;
  166|  2.44M|            }
  167|       |
  168|       |            /**************************************************/
  169|       |            /* Loop on Partitions                             */
  170|       |            /**************************************************/
  171|  11.6M|            for(j = 0; j < u1_num_part; j++, ps_part++)
  ------------------
  |  Branch (171:24): [True: 5.92M, False: 5.77M]
  ------------------
  172|  5.92M|            {
  173|       |
  174|  5.92M|                u1_sub_mb_num = ps_part->u1_sub_mb_num;
  175|  5.92M|                ps_dec->u1_sub_mb_num = u1_sub_mb_num;
  176|       |
  177|  5.92M|                if(PART_NOT_DIRECT != ps_part->u1_is_direct)
  ------------------
  |  |  570|  5.92M|#define PART_NOT_DIRECT                0
  ------------------
  |  Branch (177:20): [True: 5.62M, False: 291k]
  ------------------
  178|  5.62M|                {
  179|       |                    /* Mb Skip Mode */
  180|       |                    /* Setting the default and other members of MvPred Structure */
  181|  5.62M|                    s_mvPred.i2_mv[2] = -1;
  182|  5.62M|                    s_mvPred.i2_mv[3] = -1;
  183|  5.62M|                    s_mvPred.i1_ref_frame[0] = 0;
  184|  5.62M|                    i1_ref_idx = (u1_scale_ref && u4_bot_mb) ? MAX_REF_BUFS : 0;
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
  |  Branch (184:35): [True: 0, False: 5.62M]
  |  Branch (184:51): [True: 0, False: 0]
  ------------------
  185|  5.62M|                    ps_ref_frame = pps_ref_frame[i1_ref_idx];
  186|  5.62M|                    s_mvPred.u1_col_ref_pic_idx = ps_ref_frame->u1_mv_buf_id;
  187|  5.62M|                    s_mvPred.u1_pic_type = ps_ref_frame->u1_pic_type;
  188|  5.62M|                    pu4_wt_offst = (UWORD32*)&ps_dec->pu4_wt_ofsts[0];
  189|       |
  190|  5.62M|                    ps_dec->pf_mvpred(ps_dec, ps_cur_mb_info, ps_mv_nmb_start,
  191|  5.62M|                                      ps_mv_ntop_start, &s_mvPred, 0, 4, 0, 1,
  192|  5.62M|                                      MB_SKIP);
  ------------------
  |  |  456|  5.62M|#define MB_SKIP     255
  ------------------
  193|       |
  194|       |
  195|       |
  196|       |
  197|       |
  198|       |
  199|  5.62M|                    {
  200|  5.62M|                        pred_info_pkd_t *ps_pred_pkd;
  201|  5.62M|                        ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
  202|  5.62M|                    ih264d_fill_pred_info (s_mvPred.i2_mv,4,4,0,PRED_L0,ps_pred_pkd,ps_ref_frame->u1_pic_buf_id,
  ------------------
  |  |  483|  5.62M|#define PRED_L0   1
  ------------------
  203|  5.62M|                                           (i1_ref_idx >> u1_scale_ref),pu4_wt_offst,
  204|  5.62M|                                           ps_ref_frame->u1_pic_type);
  205|       |
  206|       |
  207|  5.62M|                    ps_dec->u4_pred_info_pkd_idx++;
  208|  5.62M|                    ps_cur_mb_info->u1_num_pred_parts++;
  209|  5.62M|                    }
  210|       |
  211|       |
  212|       |
  213|       |                    /* Storing colocated zero information */
  214|  5.62M|                    u1_colz = ((ABS(s_mvPred.i2_mv[0]) <= 1)
  ------------------
  |  |  100|  5.62M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 19.4k, False: 5.61M]
  |  |  ------------------
  ------------------
  |  Branch (214:32): [True: 5.60M, False: 21.9k]
  ------------------
  215|  5.60M|                                    && (ABS(s_mvPred.i2_mv[1]) <= 1))
  ------------------
  |  |  100|  5.60M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 1.38k, False: 5.60M]
  |  |  ------------------
  ------------------
  |  Branch (215:40): [True: 5.60M, False: 6.79k]
  ------------------
  216|  5.62M|                                    + (u1_field << 1);
  217|       |
  218|  5.62M|                    ih264d_rep_mv_colz(ps_dec, &s_mvPred, ps_mv_nmb_start, 0,
  219|  5.62M|                                       u1_colz, 4, 4);
  220|  5.62M|                }
  221|   291k|                else
  222|   291k|                {
  223|   291k|                    u1_sub_mb_x = u1_sub_mb_num & 0x03;
  224|   291k|                    u1_sub_mb_y = u1_sub_mb_num >> 2;
  225|   291k|                    u1_blk_no =
  226|   291k|                                    (u1_num_part < 4) ?
  ------------------
  |  Branch (226:37): [True: 172k, False: 118k]
  ------------------
  227|   172k|                                                    j :
  228|   291k|                                                    (((u1_sub_mb_y >> 1) << 1)
  229|   118k|                                                                    + (u1_sub_mb_x
  230|   118k|                                                                                    >> 1));
  231|       |
  232|   291k|                    ps_mv_ntop = ps_mv_ntop_start + u1_sub_mb_x;
  233|   291k|                    ps_mv_nmb = ps_mv_nmb_start + u1_sub_mb_num;
  234|       |
  235|   291k|                    u1_wd = ps_part->u1_partwidth;
  236|   291k|                    u1_ht = ps_part->u1_partheight;
  237|       |
  238|       |                    /* Populate the colpic info and reference frames */
  239|   291k|                    i1_ref_idx = pi1_ref_idx[u1_blk_no];
  240|   291k|                    s_mvPred.i1_ref_frame[0] = i1_ref_idx;
  241|       |
  242|       |                    /********************************************************/
  243|       |                    /* Predict Mv                                           */
  244|       |                    /* Add Mv Residuals and store back                      */
  245|       |                    /********************************************************/
  246|   291k|                    ps_dec->pf_mvpred(ps_dec, ps_cur_mb_info, ps_mv_nmb, ps_mv_ntop,
  247|   291k|                                      &s_mvPred, u1_sub_mb_num, u1_wd, 0, 1,
  248|   291k|                                      ps_cur_mb_info->u1_mb_mc_mode);
  249|   291k|                    i2_mv_x = ps_mv_nmb->i2_mv[0];
  250|   291k|                    i2_mv_y = ps_mv_nmb->i2_mv[1];
  251|   291k|                    i2_mv_x += s_mvPred.i2_mv[0];
  252|   291k|                    i2_mv_y += s_mvPred.i2_mv[1];
  253|   291k|                    s_mvPred.i2_mv[0] = i2_mv_x;
  254|   291k|                    s_mvPred.i2_mv[1] = i2_mv_y;
  255|       |
  256|       |                    /********************************************************/
  257|       |                    /* Transfer setup call                                  */
  258|       |                    /* convert RefIdx if it is MbAff                        */
  259|       |                    /* Pass Weight Offset and refFrame                      */
  260|       |                    /********************************************************/
  261|   291k|                    i1_ref_idx1 = i1_ref_idx >> u1_scale_ref;
  262|   291k|                    if(u1_scale_ref && ((i1_ref_idx & 0x01) != u4_bot_mb))
  ------------------
  |  Branch (262:24): [True: 0, False: 291k]
  |  Branch (262:40): [True: 0, False: 0]
  ------------------
  263|      0|                        i1_ref_idx1 += MAX_REF_BUFS;
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
  264|   291k|                    ps_ref_frame = pps_ref_frame[i1_ref_idx1];
  265|   291k|                    pu4_wt_offst = ppu4_wt_ofst[u1_blk_no];
  266|       |
  267|       |
  268|       |
  269|       |
  270|       |
  271|       |
  272|   291k|                    {
  273|   291k|                    pred_info_pkd_t *ps_pred_pkd;
  274|   291k|                    ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
  275|   291k|                    ih264d_fill_pred_info (s_mvPred.i2_mv,u1_wd,u1_ht,u1_sub_mb_num,PRED_L0,ps_pred_pkd,
  ------------------
  |  |  483|   291k|#define PRED_L0   1
  ------------------
  276|   291k|                                           ps_ref_frame->u1_pic_buf_id,(i1_ref_idx >> u1_scale_ref),pu4_wt_offst,
  277|   291k|                                           ps_ref_frame->u1_pic_type);
  278|       |
  279|   291k|                    ps_dec->u4_pred_info_pkd_idx++;
  280|   291k|                    ps_cur_mb_info->u1_num_pred_parts++;
  281|   291k|                    }
  282|       |
  283|       |
  284|       |
  285|       |                    /* Fill colocated info in MvPred structure */
  286|   291k|                    s_mvPred.u1_col_ref_pic_idx = ps_ref_frame->u1_mv_buf_id;
  287|   291k|                    s_mvPred.u1_pic_type = ps_ref_frame->u1_pic_type;
  288|       |
  289|       |                    /* Calculating colocated zero information */
  290|   291k|                    u1_colz =
  291|   291k|                                    (u1_field << 1)
  292|   291k|                                                    | ((i1_ref_idx == 0)
  ------------------
  |  Branch (292:56): [True: 215k, False: 75.8k]
  ------------------
  293|   215k|                                                                    && (ABS(i2_mv_x)
  ------------------
  |  |  100|   215k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 51.8k, False: 163k]
  |  |  ------------------
  ------------------
  |  Branch (293:72): [True: 121k, False: 93.4k]
  ------------------
  294|   215k|                                                                                    <= 1)
  295|   121k|                                                                    && (ABS(i2_mv_y)
  ------------------
  |  |  100|   121k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 18.4k, False: 103k]
  |  |  ------------------
  ------------------
  |  Branch (295:72): [True: 96.3k, False: 25.4k]
  ------------------
  296|   121k|                                                                                    <= 1));
  297|   291k|                    u1_colz |= ps_mb_part_info->u1_col_info[u1_blk_no];
  298|       |
  299|       |                    /* Replicate the motion vectors and colzero u4_flag  */
  300|       |                    /* for all sub-partitions                         */
  301|       |
  302|   291k|                    ih264d_rep_mv_colz(ps_dec, &s_mvPred, ps_mv_nmb,
  303|   291k|                                       u1_sub_mb_num, u1_colz, u1_ht,
  304|   291k|                                       u1_wd);
  305|   291k|                }
  306|  5.92M|            }
  307|       |
  308|  5.77M|        }
  309|  26.0k|        else
  310|  26.0k|        {
  311|       |            /* Storing colocated zero information */
  312|  26.0k|            ih264d_rep_mv_colz(ps_dec, &s_mvPred, ps_mv_nmb_start, 0,
  313|  26.0k|                               (UWORD8)(u1_field << 1), 4, 4);
  314|       |
  315|  26.0k|        }
  316|       |        /*if num _cores is set to 3,compute bs will be done in another thread*/
  317|  5.79M|        if(ps_dec->u4_num_cores < 3)
  ------------------
  |  Branch (317:12): [True: 5.79M, False: 0]
  ------------------
  318|  5.79M|        {
  319|       |
  320|  5.79M|            if(ps_dec->u4_app_disable_deblk_frm == 0)
  ------------------
  |  Branch (320:16): [True: 5.79M, False: 0]
  ------------------
  321|  5.79M|                ps_dec->pf_compute_bs(ps_dec, ps_cur_mb_info,
  322|  5.79M|                                     (UWORD16)(i >> u1_mbaff));
  323|  5.79M|        }
  324|  5.79M|    }
  325|       |
  326|       |
  327|       |
  328|   858k|    return OK;
  ------------------
  |  |  114|   858k|#define OK        0
  ------------------
  329|   858k|}
ih264d_decode_recon_tfr_nmb:
  338|   868k|{
  339|   868k|    WORD32 i,j;
  340|   868k|    UWORD32 u1_end_of_row_next;
  341|   868k|    dec_mb_info_t * ps_cur_mb_info;
  342|   868k|    UWORD32 u4_update_mbaff = 0;
  343|   868k|    WORD32 ret;
  344|   868k|    const UWORD32 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
  345|   868k|    const UWORD32 u1_slice_type = ps_dec->ps_cur_slice->u1_slice_type;
  346|   868k|    const WORD32 u1_skip_th = (
  347|   868k|                    (u1_slice_type != I_SLICE) ?
  ------------------
  |  |  370|   868k|#define I_SLICE  2
  ------------------
  |  Branch (347:21): [True: 860k, False: 8.62k]
  ------------------
  348|   860k|                                    (ps_dec->u1_B ? B_8x8 : PRED_8x8R0) : -1);
  ------------------
  |  |  480|   466k|#define B_8x8    22
  ------------------
                                                  (ps_dec->u1_B ? B_8x8 : PRED_8x8R0) : -1);
  ------------------
  |  |  454|   394k|#define PRED_8x8R0  4
  ------------------
  |  Branch (348:38): [True: 466k, False: 394k]
  ------------------
  349|   868k|    const UWORD32 u1_ipcm_th = (
  350|   868k|                    (u1_slice_type != I_SLICE) ? (ps_dec->u1_B ? 23 : 5) : 0);
  ------------------
  |  |  370|   868k|#define I_SLICE  2
  ------------------
  |  Branch (350:21): [True: 860k, False: 8.62k]
  |  Branch (350:51): [True: 466k, False: 394k]
  ------------------
  351|       |
  352|       |
  353|       |
  354|       |
  355|       |
  356|       |    /* N Mb MC Loop */
  357|  5.72M|    for(i = u4_mb_idx; i < u4_num_mbs; i++)
  ------------------
  |  Branch (357:24): [True: 4.85M, False: 868k]
  ------------------
  358|  4.85M|    {
  359|  4.85M|        ps_cur_mb_info = ps_dec->ps_nmb_info + i;
  360|  4.85M|        ps_dec->u4_dma_buf_idx = 0;
  361|  4.85M|        ps_dec->u4_pred_info_idx = 0;
  362|       |
  363|  4.85M|        if(ps_cur_mb_info->u1_mb_type <= u1_skip_th)
  ------------------
  |  Branch (363:12): [True: 107k, False: 4.75M]
  ------------------
  364|   107k|        {
  365|   107k|            {
  366|   107k|                WORD32 pred_cnt = 0;
  367|   107k|                pred_info_pkd_t *ps_pred_pkd;
  368|   107k|                UWORD32 u4_pred_info_pkd_idx;
  369|   107k|                WORD8 i1_pred;
  370|       |
  371|   107k|                u4_pred_info_pkd_idx = ps_cur_mb_info->u4_pred_info_pkd_idx;
  372|       |
  373|   367k|                while(pred_cnt < ps_cur_mb_info->u1_num_pred_parts)
  ------------------
  |  Branch (373:23): [True: 259k, False: 107k]
  ------------------
  374|   259k|                {
  375|       |
  376|   259k|                    ps_pred_pkd = ps_dec->ps_pred_pkd + u4_pred_info_pkd_idx;
  377|       |
  378|   259k|                     ps_dec->p_form_mb_part_info(ps_pred_pkd,ps_dec,
  379|   259k|                                               ps_cur_mb_info->u2_mbx,ps_cur_mb_info->u2_mby,(i >> u1_mbaff),
  380|   259k|                                         ps_cur_mb_info);
  381|   259k|                    u4_pred_info_pkd_idx++;
  382|   259k|                    pred_cnt++;
  383|   259k|                }
  384|   107k|            }
  385|       |
  386|   107k|            ps_dec->p_motion_compensate(ps_dec, ps_cur_mb_info);
  387|       |
  388|   107k|        }
  389|  4.75M|        else if(ps_cur_mb_info->u1_mb_type == MB_SKIP)
  ------------------
  |  |  456|  4.75M|#define MB_SKIP     255
  ------------------
  |  Branch (389:17): [True: 4.69M, False: 53.3k]
  ------------------
  390|  4.69M|        {
  391|  4.69M|            {
  392|  4.69M|                WORD32 pred_cnt = 0;
  393|  4.69M|                pred_info_pkd_t *ps_pred_pkd;
  394|  4.69M|                UWORD32 u4_pred_info_pkd_idx;
  395|  4.69M|                WORD8 i1_pred;
  396|       |
  397|  4.69M|                u4_pred_info_pkd_idx = ps_cur_mb_info->u4_pred_info_pkd_idx;
  398|       |
  399|  9.65M|                while(pred_cnt < ps_cur_mb_info->u1_num_pred_parts)
  ------------------
  |  Branch (399:23): [True: 4.95M, False: 4.69M]
  ------------------
  400|  4.95M|                {
  401|       |
  402|  4.95M|                    ps_pred_pkd = ps_dec->ps_pred_pkd + u4_pred_info_pkd_idx;
  403|       |
  404|  4.95M|                    ps_dec->p_form_mb_part_info(ps_pred_pkd,ps_dec,
  405|  4.95M|                                               ps_cur_mb_info->u2_mbx,ps_cur_mb_info->u2_mby,(i >> u1_mbaff),
  406|  4.95M|                                         ps_cur_mb_info);
  407|       |
  408|  4.95M|                    u4_pred_info_pkd_idx++;
  409|  4.95M|                    pred_cnt++;
  410|  4.95M|                }
  411|  4.69M|            }
  412|       |            /* Decode MB skip */
  413|  4.69M|            ps_dec->p_motion_compensate(ps_dec, ps_cur_mb_info);
  414|       |
  415|  4.69M|        }
  416|       |
  417|  4.85M|     }
  418|       |
  419|       |
  420|       |    /* N Mb IQ IT RECON  Loop */
  421|  5.72M|    for(j = u4_mb_idx; j < i; j++)
  ------------------
  |  Branch (421:24): [True: 4.85M, False: 868k]
  ------------------
  422|  4.85M|    {
  423|  4.85M|        ps_cur_mb_info = ps_dec->ps_nmb_info + j;
  424|       |
  425|  4.85M|        if(ps_cur_mb_info->u1_mb_type <= u1_skip_th)
  ------------------
  |  Branch (425:12): [True: 107k, False: 4.75M]
  ------------------
  426|   107k|        {
  427|   107k|            ih264d_process_inter_mb(ps_dec, ps_cur_mb_info, j);
  428|       |
  429|   107k|        }
  430|  4.75M|        else if(ps_cur_mb_info->u1_mb_type != MB_SKIP)
  ------------------
  |  |  456|  4.75M|#define MB_SKIP     255
  ------------------
  |  Branch (430:17): [True: 53.3k, False: 4.69M]
  ------------------
  431|  53.3k|        {
  432|  53.3k|            if((u1_ipcm_th + 25) != ps_cur_mb_info->u1_mb_type)
  ------------------
  |  Branch (432:16): [True: 53.0k, False: 310]
  ------------------
  433|  53.0k|            {
  434|  53.0k|                ps_cur_mb_info->u1_mb_type -= (u1_skip_th + 1);
  435|  53.0k|                ih264d_process_intra_mb(ps_dec, ps_cur_mb_info, j);
  436|  53.0k|            }
  437|  53.3k|        }
  438|       |
  439|       |
  440|  4.85M|        if(ps_dec->u4_use_intrapred_line_copy)
  ------------------
  |  Branch (440:12): [True: 4.85M, False: 0]
  ------------------
  441|  4.85M|        {
  442|  4.85M|            ih264d_copy_intra_pred_line(ps_dec, ps_cur_mb_info, j);
  443|  4.85M|        }
  444|       |
  445|  4.85M|    }
  446|       |
  447|       |    /*N MB deblocking*/
  448|   868k|    if(ps_dec->u4_nmb_deblk == 1)
  ------------------
  |  Branch (448:8): [True: 868k, False: 0]
  ------------------
  449|   868k|    {
  450|       |
  451|   868k|        UWORD32 u4_cur_mb, u4_right_mb;
  452|   868k|        UWORD32 u4_mb_x, u4_mb_y;
  453|   868k|        UWORD32 u4_wd_y, u4_wd_uv;
  454|   868k|        tfr_ctxt_t *ps_tfr_cxt = &(ps_dec->s_tran_addrecon);
  455|   868k|        UWORD8 u1_field_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag;
  456|   868k|        const WORD32 i4_cb_qp_idx_ofst =
  457|   868k|                       ps_dec->ps_cur_pps->i1_chroma_qp_index_offset;
  458|   868k|        const WORD32 i4_cr_qp_idx_ofst =
  459|   868k|                       ps_dec->ps_cur_pps->i1_second_chroma_qp_index_offset;
  460|       |
  461|   868k|        u4_wd_y = ps_dec->u2_frm_wd_y << u1_field_pic_flag;
  462|   868k|        u4_wd_uv = ps_dec->u2_frm_wd_uv << u1_field_pic_flag;
  463|       |
  464|       |
  465|   868k|        ps_cur_mb_info = ps_dec->ps_nmb_info + u4_mb_idx;
  466|       |
  467|   868k|        ps_dec->u4_deblk_mb_x = ps_cur_mb_info->u2_mbx;
  468|   868k|        ps_dec->u4_deblk_mb_y = ps_cur_mb_info->u2_mby;
  469|       |
  470|  5.72M|        for(j = u4_mb_idx; j < i; j++)
  ------------------
  |  Branch (470:28): [True: 4.85M, False: 868k]
  ------------------
  471|  4.85M|        {
  472|       |
  473|  4.85M|            ih264d_deblock_mb_nonmbaff(ps_dec, ps_tfr_cxt,
  474|  4.85M|                                       i4_cb_qp_idx_ofst, i4_cr_qp_idx_ofst,
  475|  4.85M|                                        u4_wd_y, u4_wd_uv);
  476|       |
  477|       |
  478|  4.85M|        }
  479|       |
  480|       |
  481|       |
  482|   868k|    }
  483|       |
  484|       |
  485|       |
  486|   868k|    if(u4_tfr_n_mb)
  ------------------
  |  Branch (486:8): [True: 868k, False: 0]
  ------------------
  487|   868k|    {
  488|       |        /****************************************************************/
  489|       |        /* Check for End Of Row in Next iteration                       */
  490|       |        /****************************************************************/
  491|   868k|        u1_end_of_row_next =
  492|   868k|                        u4_num_mbs_next
  ------------------
  |  Branch (492:25): [True: 9.24k, False: 859k]
  ------------------
  493|  9.24k|                                        && (u4_num_mbs_next
  ------------------
  |  Branch (493:44): [True: 9.24k, False: 0]
  ------------------
  494|  9.24k|                                                        <= (ps_dec->u4_recon_mb_grp
  495|  9.24k|                                                                        >> u1_mbaff));
  496|       |
  497|       |        /****************************************************************/
  498|       |        /* Transfer the Following things                                */
  499|       |        /* N-Mb DeblkParams Data    ( To Ext DeblkParams Buffer )       */
  500|       |        /* N-Mb Recon Data          ( To Ext Frame Buffer )             */
  501|       |        /* N-Mb Intrapredline Data  ( Updated Internally)               */
  502|       |        /* N-Mb MV Data             ( To Ext MV Buffer )                */
  503|       |        /* N-Mb MVTop/TopRight Data ( To Int MV Top Scratch Buffers)    */
  504|       |        /****************************************************************/
  505|   868k|        ih264d_transfer_mb_group_data(ps_dec, u4_num_mbs, u4_end_of_row,
  506|   868k|                                      u1_end_of_row_next);
  507|   868k|        ps_dec->u4_num_mbs_prev_nmb = u4_num_mbs;
  508|       |
  509|   868k|        ps_dec->u4_pred_info_idx = 0;
  510|   868k|        ps_dec->u4_dma_buf_idx = 0;
  511|       |
  512|       |
  513|   868k|    }
  514|   868k|    return OK;
  ------------------
  |  |  114|   868k|#define OK        0
  ------------------
  515|   868k|}
ih264d_process_inter_mb:
  532|   375k|{
  533|       |    /* CHANGED CODE */
  534|   375k|    UWORD8 *pu1_rec_y, *pu1_rec_u, *pu1_rec_v;
  535|       |
  536|       |    /*CHANGED CODE */
  537|   375k|    UWORD32 ui_rec_width, u4_recwidth_cr;
  538|   375k|    WORD16 *pi2_y_coeff;
  539|   375k|    UWORD32 u1_mb_field_decoding_flag;
  540|   375k|    const UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
  541|   375k|    UWORD32 uc_botMb;
  542|   375k|    UWORD32 u4_num_pmbair;
  543|       |    /* CHANGED CODE */
  544|   375k|    tfr_ctxt_t *ps_frame_buf = ps_dec->ps_frame_buf_ip_recon;
  545|   375k|    UWORD32 u4_luma_dc_only_csbp = 0;
  546|   375k|    UWORD32 u4_luma_dc_only_cbp = 0;
  547|       |    /* CHANGED CODE */
  548|       |
  549|   375k|    uc_botMb = 1 - ps_cur_mb_info->u1_topmb;
  550|   375k|    u4_num_pmbair = (u4_mb_num >> u1_mbaff);
  551|   375k|    u1_mb_field_decoding_flag = ps_cur_mb_info->u1_mb_field_decodingflag;
  552|       |
  553|       |
  554|       |    /* CHANGED CODE */
  555|   375k|    pu1_rec_y = ps_frame_buf->pu1_dest_y + (u4_num_pmbair << 4);
  556|   375k|    pu1_rec_u =
  557|   375k|                    ps_frame_buf->pu1_dest_u
  558|   375k|                                    + (u4_num_pmbair << 3) * YUV420SP_FACTOR;
  ------------------
  |  |  119|   375k|#define YUV420SP_FACTOR 2
  ------------------
  559|   375k|    pu1_rec_v = ps_frame_buf->pu1_dest_v + (u4_num_pmbair << 3);
  560|   375k|    ui_rec_width = ps_dec->u2_frm_wd_y << u1_mb_field_decoding_flag;
  561|   375k|    u4_recwidth_cr = ps_dec->u2_frm_wd_uv << u1_mb_field_decoding_flag;
  562|       |
  563|       |    /* CHANGED CODE */
  564|       |
  565|   375k|    if(u1_mbaff)
  ------------------
  |  Branch (565:8): [True: 0, False: 375k]
  ------------------
  566|      0|    {
  567|      0|        if(uc_botMb)
  ------------------
  |  Branch (567:12): [True: 0, False: 0]
  ------------------
  568|      0|        {
  569|      0|            pu1_rec_y += (u1_mb_field_decoding_flag ?
  ------------------
  |  Branch (569:27): [True: 0, False: 0]
  ------------------
  570|      0|                            (ui_rec_width >> 1) : (ui_rec_width << 4));
  571|      0|            pu1_rec_u += (u1_mb_field_decoding_flag ?
  ------------------
  |  Branch (571:27): [True: 0, False: 0]
  ------------------
  572|      0|                            (u4_recwidth_cr >> 1) : (u4_recwidth_cr << 3));
  573|      0|            pu1_rec_v += (u1_mb_field_decoding_flag ?
  ------------------
  |  Branch (573:27): [True: 0, False: 0]
  ------------------
  574|      0|                            (u4_recwidth_cr >> 1) : (u4_recwidth_cr << 3));
  575|      0|        }
  576|      0|    }
  577|       |
  578|   375k|    if(!ps_cur_mb_info->u1_tran_form8x8)
  ------------------
  |  Branch (578:8): [True: 367k, False: 8.58k]
  ------------------
  579|   367k|    {
  580|   367k|        u4_luma_dc_only_csbp = ih264d_unpack_luma_coeff4x4_mb(ps_dec,
  581|   367k|                                       ps_cur_mb_info,
  582|   367k|                                       0);
  583|   367k|    }
  584|  8.58k|    else
  585|  8.58k|    {
  586|  8.58k|        if(!ps_dec->ps_cur_pps->u1_entropy_coding_mode)
  ------------------
  |  Branch (586:12): [True: 2.63k, False: 5.94k]
  ------------------
  587|  2.63k|        {
  588|  2.63k|            u4_luma_dc_only_cbp = ih264d_unpack_luma_coeff4x4_mb(ps_dec,
  589|  2.63k|                                           ps_cur_mb_info,
  590|  2.63k|                                           0);
  591|  2.63k|        }
  592|  5.94k|        else
  593|  5.94k|        {
  594|  5.94k|            u4_luma_dc_only_cbp = ih264d_unpack_luma_coeff8x8_mb(ps_dec,
  595|  5.94k|                                           ps_cur_mb_info);
  596|  5.94k|        }
  597|  8.58k|    }
  598|       |
  599|   375k|    pi2_y_coeff = ps_dec->pi2_coeff_data;
  600|       |    /* Inverse Transform and Reconstruction */
  601|   375k|    if(ps_cur_mb_info->u1_cbp & 0x0f)
  ------------------
  |  Branch (601:8): [True: 61.0k, False: 314k]
  ------------------
  602|  61.0k|    {
  603|       |        /* CHANGED CODE */
  604|  61.0k|        if(!ps_cur_mb_info->u1_tran_form8x8)
  ------------------
  |  Branch (604:12): [True: 52.4k, False: 8.58k]
  ------------------
  605|  52.4k|        {
  606|  52.4k|            UWORD32 i;
  607|  52.4k|            WORD16 ai2_tmp[16];
  608|   891k|            for(i = 0; i < 16; i++)
  ------------------
  |  Branch (608:24): [True: 838k, False: 52.4k]
  ------------------
  609|   838k|            {
  610|   838k|                if(CHECKBIT(ps_cur_mb_info->u2_luma_csbp, i))
  ------------------
  |  |   54|   838k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 185k, False: 653k]
  |  |  ------------------
  ------------------
  611|   185k|                {
  612|   185k|                    WORD16 *pi2_level = pi2_y_coeff + (i << 4);
  613|   185k|                    UWORD8 *pu1_pred_sblk = pu1_rec_y + ((i & 0x3) * BLK_SIZE)
  ------------------
  |  |  556|   185k|#define BLK_SIZE             4
  ------------------
  614|   185k|                                    + (i >> 2) * (ui_rec_width << 2);
  615|   185k|                    PROFILE_DISABLE_IQ_IT_RECON()
  ------------------
  |  |   98|   185k|#define PROFILE_DISABLE_IQ_IT_RECON() ;
  ------------------
  616|   185k|                    {
  617|   185k|                        if(CHECKBIT(u4_luma_dc_only_csbp, i))
  ------------------
  |  |   54|   185k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 35.1k, False: 150k]
  |  |  ------------------
  ------------------
  618|  35.1k|                        {
  619|  35.1k|                            ps_dec->pf_iquant_itrans_recon_luma_4x4_dc(
  620|  35.1k|                                            pi2_level,
  621|  35.1k|                                            pu1_pred_sblk,
  622|  35.1k|                                            pu1_pred_sblk,
  623|  35.1k|                                            ui_rec_width,
  624|  35.1k|                                            ui_rec_width,
  625|  35.1k|                                            gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qp_rem6],
  626|  35.1k|                                            (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[3],
  627|  35.1k|                                            ps_cur_mb_info->u1_qp_div6, ai2_tmp, 0,
  628|  35.1k|                                            NULL);
  629|  35.1k|                        }
  630|   150k|                        else
  631|   150k|                        {
  632|   150k|                            ps_dec->pf_iquant_itrans_recon_luma_4x4(
  633|   150k|                                            pi2_level,
  634|   150k|                                            pu1_pred_sblk,
  635|   150k|                                            pu1_pred_sblk,
  636|   150k|                                            ui_rec_width,
  637|   150k|                                            ui_rec_width,
  638|   150k|                                            gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qp_rem6],
  639|   150k|                                            (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[3],
  640|   150k|                                            ps_cur_mb_info->u1_qp_div6, ai2_tmp, 0,
  641|   150k|                                            NULL);
  642|   150k|                        }
  643|   185k|                    }
  644|   185k|                }
  645|   838k|            }
  646|  52.4k|        }
  647|  8.58k|        else
  648|  8.58k|        {
  649|  8.58k|            WORD16 *pi2_scale_matrix_ptr;
  650|  8.58k|            WORD32 i;
  651|       |
  652|  8.58k|            pi2_scale_matrix_ptr =
  653|  8.58k|                            ps_dec->s_high_profile.i2_scalinglist8x8[1];
  654|       |
  655|  42.9k|            for(i = 0; i < 4; i++)
  ------------------
  |  Branch (655:24): [True: 34.3k, False: 8.58k]
  ------------------
  656|  34.3k|            {
  657|  34.3k|                WORD16 ai2_tmp[64];
  658|  34.3k|                WORD16 *pi16_levelBlock = pi2_y_coeff + (i << 6); /* move to the next 8x8 adding 64 */
  659|       |
  660|  34.3k|                UWORD8 *pu1_pred_sblk = pu1_rec_y + ((i & 0x1) * BLK8x8SIZE)
  ------------------
  |  |  555|  34.3k|#define BLK8x8SIZE          8
  ------------------
  661|  34.3k|                                + (i >> 1) * (ui_rec_width << 3);
  662|  34.3k|                if(CHECKBIT(ps_cur_mb_info->u1_cbp, i))
  ------------------
  |  |   54|  34.3k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 18.2k, False: 16.0k]
  |  |  ------------------
  ------------------
  663|  18.2k|                {
  664|  18.2k|                    PROFILE_DISABLE_IQ_IT_RECON()
  ------------------
  |  |   98|  18.2k|#define PROFILE_DISABLE_IQ_IT_RECON() ;
  ------------------
  665|  18.2k|                    {
  666|  18.2k|                        if(CHECKBIT(u4_luma_dc_only_cbp, i))
  ------------------
  |  |   54|  18.2k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 883, False: 17.3k]
  |  |  ------------------
  ------------------
  667|    883|                        {
  668|    883|                            ps_dec->pf_iquant_itrans_recon_luma_8x8_dc(
  669|    883|                                            pi16_levelBlock,
  670|    883|                                            pu1_pred_sblk,
  671|    883|                                            pu1_pred_sblk,
  672|    883|                                            ui_rec_width,
  673|    883|                                            ui_rec_width,
  674|    883|                                            gau1_ih264d_dequant8x8_cavlc[ps_cur_mb_info->u1_qp_rem6],
  675|    883|                                            (UWORD16 *)pi2_scale_matrix_ptr,
  676|    883|                                            ps_cur_mb_info->u1_qp_div6, ai2_tmp, 0,
  677|    883|                                            NULL);
  678|    883|                        }
  679|  17.3k|                        else
  680|  17.3k|                        {
  681|  17.3k|                            ps_dec->pf_iquant_itrans_recon_luma_8x8(
  682|  17.3k|                                            pi16_levelBlock,
  683|  17.3k|                                            pu1_pred_sblk,
  684|  17.3k|                                            pu1_pred_sblk,
  685|  17.3k|                                            ui_rec_width,
  686|  17.3k|                                            ui_rec_width,
  687|  17.3k|                                            gau1_ih264d_dequant8x8_cavlc[ps_cur_mb_info->u1_qp_rem6],
  688|  17.3k|                                            (UWORD16 *)pi2_scale_matrix_ptr,
  689|  17.3k|                                            ps_cur_mb_info->u1_qp_div6, ai2_tmp, 0,
  690|  17.3k|                                            NULL);
  691|  17.3k|                        }
  692|  18.2k|                    }
  693|  18.2k|                }
  694|  34.3k|            }
  695|       |
  696|  8.58k|        }
  697|  61.0k|    }
  698|       |
  699|       |    /* Decode Chroma Block */
  700|   375k|    ih264d_unpack_chroma_coeff4x4_mb(ps_dec,
  701|   375k|                                     ps_cur_mb_info);
  702|       |    /*--------------------------------------------------------------------*/
  703|       |    /* Chroma Blocks decoding                                             */
  704|       |    /*--------------------------------------------------------------------*/
  705|   375k|    {
  706|   375k|        UWORD8 u1_chroma_cbp = (UWORD8)(ps_cur_mb_info->u1_cbp >> 4);
  707|       |
  708|   375k|        if(u1_chroma_cbp != CBPC_ALLZERO)
  ------------------
  |  |  507|   375k|#define CBPC_ALLZERO    0
  ------------------
  |  Branch (708:12): [True: 26.4k, False: 349k]
  ------------------
  709|  26.4k|        {
  710|  26.4k|            UWORD32 u4_scale_u = ps_cur_mb_info->u1_qpc_div6;
  711|  26.4k|            UWORD32 u4_scale_v = ps_cur_mb_info->u1_qpcr_div6;
  712|  26.4k|            UWORD16 u2_chroma_csbp = ps_cur_mb_info->u2_chroma_csbp;
  713|       |
  714|  26.4k|            pi2_y_coeff = ps_dec->pi2_coeff_data;
  715|       |
  716|  26.4k|            {
  717|  26.4k|                UWORD32 i;
  718|  26.4k|                WORD16 ai2_tmp[16];
  719|   132k|                for(i = 0; i < 4; i++)
  ------------------
  |  Branch (719:28): [True: 105k, False: 26.4k]
  ------------------
  720|   105k|                {
  721|   105k|                    WORD16 *pi2_level = pi2_y_coeff + (i << 4);
  722|   105k|                    UWORD8 *pu1_pred_sblk = pu1_rec_u
  723|   105k|                                    + ((i & 0x1) * BLK_SIZE * YUV420SP_FACTOR)
  ------------------
  |  |  556|   105k|#define BLK_SIZE             4
  ------------------
                                                  + ((i & 0x1) * BLK_SIZE * YUV420SP_FACTOR)
  ------------------
  |  |  119|   105k|#define YUV420SP_FACTOR 2
  ------------------
  724|   105k|                                    + (i >> 1) * (u4_recwidth_cr << 2);
  725|   105k|                    PROFILE_DISABLE_IQ_IT_RECON()
  ------------------
  |  |   98|   105k|#define PROFILE_DISABLE_IQ_IT_RECON() ;
  ------------------
  726|   105k|                    {
  727|   105k|                        if(CHECKBIT(u2_chroma_csbp, i))
  ------------------
  |  |   54|   105k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 6.22k, False: 99.6k]
  |  |  ------------------
  ------------------
  728|  6.22k|                        {
  729|  6.22k|                            ps_dec->pf_iquant_itrans_recon_chroma_4x4(
  730|  6.22k|                                            pi2_level,
  731|  6.22k|                                            pu1_pred_sblk,
  732|  6.22k|                                            pu1_pred_sblk,
  733|  6.22k|                                            u4_recwidth_cr,
  734|  6.22k|                                            u4_recwidth_cr,
  735|  6.22k|                                            gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpc_rem6],
  736|  6.22k|                                            (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[4],
  737|  6.22k|                                            u4_scale_u, ai2_tmp, pi2_level);
  738|  6.22k|                        }
  739|  99.6k|                        else if(pi2_level[0] != 0)
  ------------------
  |  Branch (739:33): [True: 60.4k, False: 39.2k]
  ------------------
  740|  60.4k|                        {
  741|  60.4k|                            ps_dec->pf_iquant_itrans_recon_chroma_4x4_dc(
  742|  60.4k|                                            pi2_level,
  743|  60.4k|                                            pu1_pred_sblk,
  744|  60.4k|                                            pu1_pred_sblk,
  745|  60.4k|                                            u4_recwidth_cr,
  746|  60.4k|                                            u4_recwidth_cr,
  747|  60.4k|                                            gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpc_rem6],
  748|  60.4k|                                            (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[4],
  749|  60.4k|                                            u4_scale_u, ai2_tmp, pi2_level);
  750|  60.4k|                        }
  751|   105k|                    }
  752|   105k|                }
  753|  26.4k|            }
  754|       |
  755|  26.4k|            pi2_y_coeff += MB_CHROM_SIZE;
  ------------------
  |  |  564|  26.4k|#define MB_CHROM_SIZE                 64
  ------------------
  756|  26.4k|            u2_chroma_csbp >>= 4;
  757|       |
  758|  26.4k|            {
  759|  26.4k|                UWORD32 i;
  760|  26.4k|                WORD16 ai2_tmp[16];
  761|   132k|                for(i = 0; i < 4; i++)
  ------------------
  |  Branch (761:28): [True: 105k, False: 26.4k]
  ------------------
  762|   105k|                {
  763|   105k|                    WORD16 *pi2_level = pi2_y_coeff + (i << 4);
  764|   105k|                    UWORD8 *pu1_pred_sblk = pu1_rec_u + 1
  765|   105k|                                    + ((i & 0x1) * BLK_SIZE * YUV420SP_FACTOR)
  ------------------
  |  |  556|   105k|#define BLK_SIZE             4
  ------------------
                                                  + ((i & 0x1) * BLK_SIZE * YUV420SP_FACTOR)
  ------------------
  |  |  119|   105k|#define YUV420SP_FACTOR 2
  ------------------
  766|   105k|                                    + (i >> 1) * (u4_recwidth_cr << 2);
  767|   105k|                    PROFILE_DISABLE_IQ_IT_RECON()
  ------------------
  |  |   98|   105k|#define PROFILE_DISABLE_IQ_IT_RECON() ;
  ------------------
  768|   105k|                    {
  769|   105k|                        if(CHECKBIT(u2_chroma_csbp, i))
  ------------------
  |  |   54|   105k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 7.83k, False: 98.0k]
  |  |  ------------------
  ------------------
  770|  7.83k|                        {
  771|  7.83k|                            ps_dec->pf_iquant_itrans_recon_chroma_4x4(
  772|  7.83k|                                            pi2_level,
  773|  7.83k|                                            pu1_pred_sblk,
  774|  7.83k|                                            pu1_pred_sblk,
  775|  7.83k|                                            u4_recwidth_cr,
  776|  7.83k|                                            u4_recwidth_cr,
  777|  7.83k|                                            gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpcr_rem6],
  778|  7.83k|                                            (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[5],
  779|  7.83k|                                            u4_scale_v, ai2_tmp, pi2_level);
  780|  7.83k|                        }
  781|  98.0k|                        else if(pi2_level[0] != 0)
  ------------------
  |  Branch (781:33): [True: 52.5k, False: 45.4k]
  ------------------
  782|  52.5k|                        {
  783|  52.5k|                            ps_dec->pf_iquant_itrans_recon_chroma_4x4_dc(
  784|  52.5k|                                            pi2_level,
  785|  52.5k|                                            pu1_pred_sblk,
  786|  52.5k|                                            pu1_pred_sblk,
  787|  52.5k|                                            u4_recwidth_cr,
  788|  52.5k|                                            u4_recwidth_cr,
  789|  52.5k|                                            gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpcr_rem6],
  790|  52.5k|                                            (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[5],
  791|  52.5k|                                            u4_scale_v, ai2_tmp, pi2_level);
  792|  52.5k|                        }
  793|   105k|                    }
  794|   105k|                }
  795|  26.4k|            }
  796|  26.4k|        }
  797|   375k|    }
  798|   375k|    return (0);
  799|   375k|}
ih264d_parse_pred_weight_table:
  815|  28.6k|{
  816|  28.6k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
  817|  28.6k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
  818|  28.6k|    WORD8 i, cont, lx;
  819|  28.6k|    UWORD8 uc_weight_flag;
  820|  28.6k|    UWORD32 *pui32_weight_offset_lx;
  821|  28.6k|    WORD16 c_weight, c_offset;
  822|  28.6k|    UWORD32 ui32_y_def_weight_ofst, ui32_cr_def_weight_ofst;
  823|  28.6k|    UWORD32 ui32_temp;
  824|  28.6k|    UWORD8 uc_luma_log2_weight_denom;
  825|  28.6k|    UWORD8 uc_chroma_log2_weight_denom;
  826|       |
  827|       |    /* Variables for error resilience checks */
  828|  28.6k|    UWORD32 u4_temp;
  829|  28.6k|    WORD32 i_temp;
  830|       |
  831|  28.6k|    u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  832|  28.6k|    if(u4_temp > MAX_LOG2_WEIGHT_DENOM)
  ------------------
  |  |  608|  28.6k|#define MAX_LOG2_WEIGHT_DENOM       7
  ------------------
  |  Branch (832:8): [True: 6.66k, False: 21.9k]
  ------------------
  833|  6.66k|    {
  834|  6.66k|        return ERROR_PRED_WEIGHT_TABLE_T;
  835|  6.66k|    }
  836|  21.9k|    uc_luma_log2_weight_denom = u4_temp;
  837|  21.9k|    COPYTHECONTEXT("SH: luma_log2_weight_denom",uc_luma_log2_weight_denom);
  838|  21.9k|    ui32_y_def_weight_ofst = (1 << uc_luma_log2_weight_denom);
  839|       |
  840|  21.9k|    u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  841|  21.9k|    if(u4_temp > MAX_LOG2_WEIGHT_DENOM)
  ------------------
  |  |  608|  21.9k|#define MAX_LOG2_WEIGHT_DENOM       7
  ------------------
  |  Branch (841:8): [True: 2.07k, False: 19.8k]
  ------------------
  842|  2.07k|    {
  843|  2.07k|        return ERROR_PRED_WEIGHT_TABLE_T;
  844|  2.07k|    }
  845|  19.8k|    uc_chroma_log2_weight_denom = u4_temp;
  846|  19.8k|    COPYTHECONTEXT("SH: chroma_log2_weight_denom",uc_chroma_log2_weight_denom);
  847|  19.8k|    ui32_cr_def_weight_ofst = (1 << uc_chroma_log2_weight_denom);
  848|       |
  849|  19.8k|    ps_cur_slice->u2_log2Y_crwd = uc_luma_log2_weight_denom
  850|  19.8k|                    | (uc_chroma_log2_weight_denom << 8);
  851|       |
  852|  19.8k|    cont = (ps_cur_slice->u1_slice_type == B_SLICE);
  ------------------
  |  |  369|  19.8k|#define B_SLICE  1
  ------------------
  853|  19.8k|    lx = 0;
  854|  19.8k|    do
  855|  22.9k|    {
  856|  55.4k|        for(i = 0; i < ps_cur_slice->u1_num_ref_idx_lx_active[lx]; i++)
  ------------------
  |  Branch (856:20): [True: 35.1k, False: 20.3k]
  ------------------
  857|  35.1k|        {
  858|  35.1k|            pui32_weight_offset_lx = ps_cur_slice->u4_wt_ofst_lx[lx][i];
  859|       |
  860|  35.1k|            uc_weight_flag = ih264d_get_bit_h264(ps_bitstrm);
  861|  35.1k|            pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
  862|  35.1k|            COPYTHECONTEXT("SH: luma_weight_l0_flag",uc_weight_flag);
  863|  35.1k|            if(uc_weight_flag)
  ------------------
  |  Branch (863:16): [True: 17.3k, False: 17.7k]
  ------------------
  864|  17.3k|            {
  865|  17.3k|                i_temp = ih264d_sev(pu4_bitstrm_ofst,
  866|  17.3k|                                    pu4_bitstrm_buf);
  867|  17.3k|                if((i_temp < PRED_WEIGHT_MIN) || (i_temp > PRED_WEIGHT_MAX))
  ------------------
  |  |  609|  17.3k|#define PRED_WEIGHT_MIN             (-128)
  ------------------
                              if((i_temp < PRED_WEIGHT_MIN) || (i_temp > PRED_WEIGHT_MAX))
  ------------------
  |  |  610|  17.2k|#define PRED_WEIGHT_MAX             127
  ------------------
  |  Branch (867:20): [True: 106, False: 17.2k]
  |  Branch (867:50): [True: 223, False: 17.0k]
  ------------------
  868|    329|                    return ERROR_PRED_WEIGHT_TABLE_T;
  869|  17.0k|                c_weight = i_temp;
  870|  17.0k|                COPYTHECONTEXT("SH: luma_weight_l0",c_weight);
  871|       |
  872|  17.0k|                i_temp = ih264d_sev(pu4_bitstrm_ofst,
  873|  17.0k|                                    pu4_bitstrm_buf);
  874|  17.0k|                if((i_temp < PRED_WEIGHT_MIN) || (i_temp > PRED_WEIGHT_MAX))
  ------------------
  |  |  609|  17.0k|#define PRED_WEIGHT_MIN             (-128)
  ------------------
                              if((i_temp < PRED_WEIGHT_MIN) || (i_temp > PRED_WEIGHT_MAX))
  ------------------
  |  |  610|  16.8k|#define PRED_WEIGHT_MAX             127
  ------------------
  |  Branch (874:20): [True: 184, False: 16.8k]
  |  Branch (874:50): [True: 404, False: 16.4k]
  ------------------
  875|    588|                    return ERROR_PRED_WEIGHT_TABLE_T;
  876|  16.4k|                c_offset = i_temp;
  877|  16.4k|                COPYTHECONTEXT("SH: luma_offset_l0",c_offset);
  878|       |
  879|  16.4k|                ui32_temp = (c_offset << 16) | (c_weight & 0xFFFF);
  880|  16.4k|                pui32_weight_offset_lx[0] = ui32_temp;
  881|  16.4k|            }
  882|  17.7k|            else
  883|  17.7k|            {
  884|       |
  885|  17.7k|                pui32_weight_offset_lx[0] = ui32_y_def_weight_ofst;
  886|  17.7k|            }
  887|       |
  888|  34.1k|            {
  889|  34.1k|                WORD8 c_weightCb, c_weightCr, c_offsetCb, c_offsetCr;
  890|  34.1k|                uc_weight_flag = ih264d_get_bit_h264(ps_bitstrm);
  891|  34.1k|                pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
  892|  34.1k|                COPYTHECONTEXT("SH: chroma_weight_l0_flag",uc_weight_flag);
  893|  34.1k|                if(uc_weight_flag)
  ------------------
  |  Branch (893:20): [True: 17.0k, False: 17.1k]
  ------------------
  894|  17.0k|                {
  895|  17.0k|                    i_temp = ih264d_sev(pu4_bitstrm_ofst,
  896|  17.0k|                                        pu4_bitstrm_buf);
  897|  17.0k|                    if((i_temp < PRED_WEIGHT_MIN) || (i_temp > PRED_WEIGHT_MAX))
  ------------------
  |  |  609|  17.0k|#define PRED_WEIGHT_MIN             (-128)
  ------------------
                                  if((i_temp < PRED_WEIGHT_MIN) || (i_temp > PRED_WEIGHT_MAX))
  ------------------
  |  |  610|  16.8k|#define PRED_WEIGHT_MAX             127
  ------------------
  |  Branch (897:24): [True: 121, False: 16.8k]
  |  Branch (897:54): [True: 302, False: 16.5k]
  ------------------
  898|    423|                        return ERROR_PRED_WEIGHT_TABLE_T;
  899|  16.5k|                    c_weightCb = i_temp;
  900|  16.5k|                    COPYTHECONTEXT("SH: chroma_weight_l0",c_weightCb);
  901|       |
  902|  16.5k|                    i_temp = ih264d_sev(pu4_bitstrm_ofst,
  903|  16.5k|                                        pu4_bitstrm_buf);
  904|  16.5k|                    if((i_temp < PRED_WEIGHT_MIN) || (i_temp > PRED_WEIGHT_MAX))
  ------------------
  |  |  609|  16.5k|#define PRED_WEIGHT_MIN             (-128)
  ------------------
                                  if((i_temp < PRED_WEIGHT_MIN) || (i_temp > PRED_WEIGHT_MAX))
  ------------------
  |  |  610|  16.4k|#define PRED_WEIGHT_MAX             127
  ------------------
  |  Branch (904:24): [True: 151, False: 16.4k]
  |  Branch (904:54): [True: 256, False: 16.1k]
  ------------------
  905|    407|                        return ERROR_PRED_WEIGHT_TABLE_T;
  906|  16.1k|                    c_offsetCb = i_temp;
  907|  16.1k|                    COPYTHECONTEXT("SH: chroma_weight_l0",c_offsetCb);
  908|       |
  909|  16.1k|                    ui32_temp = (c_offsetCb << 16) | (c_weightCb & 0xFFFF);
  910|  16.1k|                    pui32_weight_offset_lx[1] = ui32_temp;
  911|       |
  912|  16.1k|                    i_temp = ih264d_sev(pu4_bitstrm_ofst,
  913|  16.1k|                                        pu4_bitstrm_buf);
  914|  16.1k|                    if((i_temp < PRED_WEIGHT_MIN) || (i_temp > PRED_WEIGHT_MAX))
  ------------------
  |  |  609|  16.1k|#define PRED_WEIGHT_MIN             (-128)
  ------------------
                                  if((i_temp < PRED_WEIGHT_MIN) || (i_temp > PRED_WEIGHT_MAX))
  ------------------
  |  |  610|  16.0k|#define PRED_WEIGHT_MAX             127
  ------------------
  |  Branch (914:24): [True: 122, False: 16.0k]
  |  Branch (914:54): [True: 323, False: 15.7k]
  ------------------
  915|    445|                        return ERROR_PRED_WEIGHT_TABLE_T;
  916|  15.7k|                    c_weightCr = i_temp;
  917|  15.7k|                    COPYTHECONTEXT("SH: chroma_weight_l0",c_weightCr);
  918|       |
  919|  15.7k|                    i_temp = ih264d_sev(pu4_bitstrm_ofst,
  920|  15.7k|                                        pu4_bitstrm_buf);
  921|  15.7k|                    if((i_temp < PRED_WEIGHT_MIN) || (i_temp > PRED_WEIGHT_MAX))
  ------------------
  |  |  609|  15.7k|#define PRED_WEIGHT_MIN             (-128)
  ------------------
                                  if((i_temp < PRED_WEIGHT_MIN) || (i_temp > PRED_WEIGHT_MAX))
  ------------------
  |  |  610|  15.6k|#define PRED_WEIGHT_MAX             127
  ------------------
  |  Branch (921:24): [True: 116, False: 15.6k]
  |  Branch (921:54): [True: 257, False: 15.3k]
  ------------------
  922|    373|                        return ERROR_PRED_WEIGHT_TABLE_T;
  923|  15.3k|                    c_offsetCr = i_temp;
  924|  15.3k|                    COPYTHECONTEXT("SH: chroma_weight_l0",c_offsetCr);
  925|       |
  926|  15.3k|                    ui32_temp = (c_offsetCr << 16) | (c_weightCr & 0xFFFF);
  927|  15.3k|                    pui32_weight_offset_lx[2] = ui32_temp;
  928|  15.3k|                }
  929|  17.1k|                else
  930|  17.1k|                {
  931|  17.1k|                    pui32_weight_offset_lx[1] = ui32_cr_def_weight_ofst;
  932|  17.1k|                    pui32_weight_offset_lx[2] = ui32_cr_def_weight_ofst;
  933|  17.1k|                }
  934|  34.1k|            }
  935|  34.1k|        }
  936|  20.3k|        lx++;
  937|  20.3k|    }
  938|  20.3k|    while(cont--);
  ------------------
  |  Branch (938:11): [True: 3.02k, False: 17.3k]
  ------------------
  939|       |
  940|  17.3k|    return OK;
  ------------------
  |  |  114|  17.3k|#define OK        0
  ------------------
  941|  19.8k|}
ih264d_init_ref_idx_lx_p:
  985|  77.5k|{
  986|  77.5k|    struct pic_buffer_t *ps_ref_pic_buf_lx;
  987|  77.5k|    dpb_manager_t *ps_dpb_mgr;
  988|  77.5k|    struct dpb_info_t *ps_next_dpb;
  989|  77.5k|    WORD8 i, j;
  990|  77.5k|    UWORD8 u1_max_lt_index, u1_min_lt_index;
  991|  77.5k|    UWORD32 u4_lt_index;
  992|  77.5k|    UWORD8 u1_field_pic_flag;
  993|  77.5k|    dec_slice_params_t *ps_cur_slice;
  994|  77.5k|    UWORD8 u1_L0;
  995|  77.5k|    WORD32 i4_cur_pic_num, i4_min_st_pic_num;
  996|  77.5k|    WORD32 i4_temp_pic_num, i4_ref_pic_num;
  997|  77.5k|    UWORD8 u1_num_short_term_bufs;
  998|  77.5k|    UWORD8 u1_max_ref_idx_l0;
  999|  77.5k|    struct pic_buffer_t *aps_st_pic_bufs[2 * MAX_REF_BUFS] = {NULL};
 1000|       |
 1001|  77.5k|    ps_cur_slice = ps_dec->ps_cur_slice;
 1002|  77.5k|    u1_field_pic_flag = ps_cur_slice->u1_field_pic_flag;
 1003|  77.5k|    u1_max_ref_idx_l0 = ps_cur_slice->u1_num_ref_idx_lx_active[0]
 1004|  77.5k|                    << u1_field_pic_flag;
 1005|       |
 1006|  77.5k|    ps_dpb_mgr = ps_dec->ps_dpb_mgr;
 1007|       |    /* Get the current frame number */
 1008|  77.5k|    i4_cur_pic_num = ps_dec->ps_cur_pic->i4_pic_num;
 1009|       |
 1010|       |    /* Get Min pic_num,MinLt */
 1011|  77.5k|    i4_min_st_pic_num = i4_cur_pic_num;
 1012|  77.5k|    u1_max_lt_index = MAX_REF_BUFS + 1;
  ------------------
  |  |   75|  77.5k|#define MAX_REF_BUFS    32
  ------------------
 1013|  77.5k|    u1_min_lt_index = MAX_REF_BUFS + 1;
  ------------------
  |  |   75|  77.5k|#define MAX_REF_BUFS    32
  ------------------
 1014|       |
 1015|       |    /* Start from ST head */
 1016|  77.5k|    ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head;
 1017|   131k|    for(i = 0; i < ps_dpb_mgr->u1_num_st_ref_bufs; i++)
  ------------------
  |  Branch (1017:16): [True: 54.1k, False: 77.5k]
  ------------------
 1018|  54.1k|    {
 1019|  54.1k|        i4_ref_pic_num = ps_next_dpb->ps_pic_buf->i4_pic_num;
 1020|  54.1k|        if(i4_ref_pic_num < i4_cur_pic_num)
  ------------------
  |  Branch (1020:12): [True: 19.7k, False: 34.3k]
  ------------------
 1021|  19.7k|        {
 1022|       |            /* RefPic Buf pic_num is before Current pic_num in decode order */
 1023|  19.7k|            i4_min_st_pic_num = MIN(i4_min_st_pic_num, i4_ref_pic_num);
  ------------------
  |  |   61|  19.7k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 679, False: 19.1k]
  |  |  ------------------
  ------------------
 1024|  19.7k|        }
 1025|       |
 1026|       |        /* Chase the next link */
 1027|  54.1k|        ps_next_dpb = ps_next_dpb->ps_prev_short;
 1028|  54.1k|    }
 1029|       |
 1030|       |    /* Sort ST ref pocs in ascending order */
 1031|  77.5k|    ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head;
 1032|   131k|    for (j = 0; j < ps_dpb_mgr->u1_num_st_ref_bufs; j++)
  ------------------
  |  Branch (1032:17): [True: 54.1k, False: 77.5k]
  ------------------
 1033|  54.1k|    {
 1034|  54.1k|        aps_st_pic_bufs[j] = ps_next_dpb->ps_pic_buf;
 1035|  54.1k|        ps_next_dpb = ps_next_dpb->ps_prev_short;
 1036|  54.1k|    }
 1037|  77.5k|    qsort(aps_st_pic_bufs, ps_dpb_mgr->u1_num_st_ref_bufs,
 1038|  77.5k|        sizeof(aps_st_pic_bufs[0]), pic_num_compare);
 1039|       |
 1040|       |    /* Start from LT head */
 1041|  77.5k|    ps_next_dpb = ps_dpb_mgr->ps_dpb_ht_head;
 1042|  77.5k|    if(ps_next_dpb)
  ------------------
  |  Branch (1042:8): [True: 9.92k, False: 67.6k]
  ------------------
 1043|  9.92k|    {
 1044|  9.92k|        u1_max_lt_index = ps_next_dpb->u1_lt_idx;
 1045|  9.92k|        u1_min_lt_index = ps_next_dpb->u1_lt_idx;
 1046|       |
 1047|  20.8k|        for(i = 0; i < ps_dpb_mgr->u1_num_lt_ref_bufs; i++)
  ------------------
  |  Branch (1047:20): [True: 10.9k, False: 9.92k]
  ------------------
 1048|  10.9k|        {
 1049|  10.9k|            u4_lt_index = ps_next_dpb->u1_lt_idx;
 1050|  10.9k|            u1_max_lt_index = (UWORD8)(MAX(u1_max_lt_index, u4_lt_index));
  ------------------
  |  |   60|  10.9k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 0, False: 10.9k]
  |  |  ------------------
  ------------------
 1051|  10.9k|            u1_min_lt_index = (UWORD8)(MIN(u1_min_lt_index, u4_lt_index));
  ------------------
  |  |   61|  10.9k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 1.01k, False: 9.92k]
  |  |  ------------------
  ------------------
 1052|       |
 1053|       |            /* Chase the next link */
 1054|  10.9k|            ps_next_dpb = ps_next_dpb->ps_prev_long;
 1055|  10.9k|        }
 1056|  9.92k|    }
 1057|       |    /* 1. Initialize refIdxL0 */
 1058|  77.5k|    u1_L0 = 0;
 1059|  77.5k|    if(u1_field_pic_flag)
  ------------------
  |  Branch (1059:8): [True: 0, False: 77.5k]
  ------------------
 1060|      0|    {
 1061|      0|        ps_ref_pic_buf_lx = ps_dpb_mgr->ps_init_dpb[0][0];
 1062|      0|        ps_ref_pic_buf_lx += MAX_REF_BUFS;
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
 1063|      0|        i4_temp_pic_num = i4_cur_pic_num;
 1064|      0|    }
 1065|  77.5k|    else
 1066|  77.5k|    {
 1067|  77.5k|        ps_ref_pic_buf_lx = ps_dpb_mgr->ps_init_dpb[0][0];
 1068|  77.5k|        i4_temp_pic_num = i4_cur_pic_num;
 1069|  77.5k|    }
 1070|       |    /* Arrange all short term buffers in output order as given by pic_num */
 1071|       |    /* Arrange pic_num's less than Curr pic_num in the descending pic_num */
 1072|       |    /* order starting from (Curr pic_num - 1)                             */
 1073|   131k|    for(j = ps_dpb_mgr->u1_num_st_ref_bufs - 1; j >= 0; j--)
  ------------------
  |  Branch (1073:49): [True: 54.1k, False: 77.5k]
  ------------------
 1074|  54.1k|    {
 1075|  54.1k|        if(aps_st_pic_bufs[j])
  ------------------
  |  Branch (1075:12): [True: 54.1k, False: 0]
  ------------------
 1076|  54.1k|        {
 1077|       |            /* Copy info in pic buffer */
 1078|  54.1k|            ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_buf_lx,
 1079|  54.1k|                                               aps_st_pic_bufs[j]);
 1080|  54.1k|            ps_ref_pic_buf_lx++;
 1081|  54.1k|            u1_L0++;
 1082|  54.1k|        }
 1083|  54.1k|    }
 1084|       |
 1085|       |    /* Arrange all Long term buffers in ascending order, in LongtermIndex */
 1086|       |    /* Start from LT head */
 1087|  77.5k|    u1_num_short_term_bufs = u1_L0;
 1088|   159k|    for(u4_lt_index = u1_min_lt_index; u4_lt_index <= u1_max_lt_index;
  ------------------
  |  Branch (1088:40): [True: 81.7k, False: 77.5k]
  ------------------
 1089|  81.7k|                    u4_lt_index++)
 1090|  81.7k|    {
 1091|  81.7k|        ps_next_dpb = ps_dpb_mgr->ps_dpb_ht_head;
 1092|  92.1k|        for(i = 0; i < ps_dpb_mgr->u1_num_lt_ref_bufs; i++)
  ------------------
  |  Branch (1092:20): [True: 21.2k, False: 70.8k]
  ------------------
 1093|  21.2k|        {
 1094|  21.2k|            if(ps_next_dpb->u1_lt_idx == u4_lt_index)
  ------------------
  |  Branch (1094:16): [True: 10.9k, False: 10.3k]
  ------------------
 1095|  10.9k|            {
 1096|  10.9k|                ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_buf_lx,
 1097|  10.9k|                                                   ps_next_dpb->ps_pic_buf);
 1098|       |
 1099|  10.9k|                ps_ref_pic_buf_lx->u1_long_term_pic_num =
 1100|  10.9k|                                ps_ref_pic_buf_lx->u1_long_term_frm_idx;
 1101|  10.9k|                ps_ref_pic_buf_lx++;
 1102|  10.9k|                u1_L0++;
 1103|  10.9k|                break;
 1104|  10.9k|            }
 1105|  10.3k|            ps_next_dpb = ps_next_dpb->ps_prev_long;
 1106|  10.3k|        }
 1107|  81.7k|    }
 1108|       |
 1109|  77.5k|    if(u1_field_pic_flag)
  ------------------
  |  Branch (1109:8): [True: 0, False: 77.5k]
  ------------------
 1110|      0|    {
 1111|       |        /* Initialize the rest of the entries in the */
 1112|       |        /* reference list to handle of errors        */
 1113|      0|        {
 1114|      0|            UWORD8 u1_i;
 1115|      0|            pic_buffer_t ref_pic;
 1116|       |
 1117|      0|            ref_pic = *(ps_dpb_mgr->ps_init_dpb[0][0] + MAX_REF_BUFS);
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
 1118|       |
 1119|      0|            if(NULL == ref_pic.pu1_buf1)
  ------------------
  |  Branch (1119:16): [True: 0, False: 0]
  ------------------
 1120|      0|            {
 1121|      0|                ref_pic = *ps_dec->ps_cur_pic;
 1122|      0|            }
 1123|      0|            for(u1_i = u1_L0; u1_i < u1_max_ref_idx_l0; u1_i++)
  ------------------
  |  Branch (1123:31): [True: 0, False: 0]
  ------------------
 1124|      0|            {
 1125|      0|                *ps_ref_pic_buf_lx = ref_pic;
 1126|      0|                ps_ref_pic_buf_lx++;
 1127|      0|            }
 1128|      0|        }
 1129|       |
 1130|      0|        ih264d_convert_frm_to_fld_list(
 1131|      0|                        ps_dpb_mgr->ps_init_dpb[0][0] + MAX_REF_BUFS, &u1_L0,
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
 1132|      0|                        ps_dec, u1_num_short_term_bufs);
 1133|       |
 1134|      0|        ps_ref_pic_buf_lx = ps_dpb_mgr->ps_init_dpb[0][0] + u1_L0;
 1135|      0|    }
 1136|       |
 1137|       |    /* Initialize the rest of the entries in the */
 1138|       |    /* reference list to handle of errors        */
 1139|  77.5k|    {
 1140|  77.5k|        UWORD8 u1_i;
 1141|  77.5k|        pic_buffer_t ref_pic;
 1142|       |
 1143|  77.5k|        ref_pic = *(ps_dpb_mgr->ps_init_dpb[0][0]);
 1144|       |
 1145|  77.5k|        if(NULL == ref_pic.pu1_buf1)
  ------------------
  |  Branch (1145:12): [True: 0, False: 77.5k]
  ------------------
 1146|      0|        {
 1147|      0|            ref_pic = *ps_dec->ps_cur_pic;
 1148|      0|        }
 1149|   178k|        for(u1_i = u1_L0; u1_i < u1_max_ref_idx_l0; u1_i++)
  ------------------
  |  Branch (1149:27): [True: 100k, False: 77.5k]
  ------------------
 1150|   100k|        {
 1151|   100k|            *ps_ref_pic_buf_lx = ref_pic;
 1152|   100k|            ps_ref_pic_buf_lx++;
 1153|   100k|        }
 1154|  77.5k|    }
 1155|  77.5k|    ps_dec->ps_cur_slice->u1_initial_list_size[0] = u1_L0;
 1156|  77.5k|}
ih264d_process_pslice.c:pic_num_compare:
  944|  4.34k|{
  945|  4.34k|    struct pic_buffer_t *ps_pic1 = *(struct pic_buffer_t **) pv_pic1;
  946|  4.34k|    struct pic_buffer_t *ps_pic2 = *(struct pic_buffer_t **) pv_pic2;
  947|  4.34k|    if (ps_pic1->i4_pic_num < ps_pic2->i4_pic_num)
  ------------------
  |  Branch (947:9): [True: 1.31k, False: 3.02k]
  ------------------
  948|  1.31k|    {
  949|  1.31k|        return -1;
  950|  1.31k|    }
  951|  3.02k|    else if (ps_pic1->i4_pic_num > ps_pic2->i4_pic_num)
  ------------------
  |  Branch (951:14): [True: 1.88k, False: 1.14k]
  ------------------
  952|  1.88k|    {
  953|  1.88k|        return 1;
  954|  1.88k|    }
  955|  1.14k|    else
  956|  1.14k|    {
  957|  1.14k|        return 0;
  958|  1.14k|    }
  959|  4.34k|}

ih264d_scaling_list:
   52|  19.4k|{
   53|  19.4k|    WORD32 i4_j, i4_delta_scale, i4_lastScale = 8, i4_nextScale = 8;
   54|  19.4k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
   55|  19.4k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
   56|       |
   57|  19.4k|    *pu1_use_default_scaling_matrix_flag = 0;
   58|       |
   59|   524k|    for(i4_j = 0; i4_j < i4_size_of_scalinglist; i4_j++)
  ------------------
  |  Branch (59:19): [True: 505k, False: 18.6k]
  ------------------
   60|   505k|    {
   61|   505k|        if(i4_nextScale != 0)
  ------------------
  |  Branch (61:12): [True: 214k, False: 290k]
  ------------------
   62|   214k|        {
   63|   214k|            i4_delta_scale = ih264d_sev(pu4_bitstrm_ofst,
   64|   214k|                                        pu4_bitstrm_buf);
   65|       |
   66|   214k|            if(i4_delta_scale < MIN_H264_DELTA_SCALE ||
  ------------------
  |  |  639|   429k|#define MIN_H264_DELTA_SCALE (-128)
  ------------------
  |  Branch (66:16): [True: 152, False: 214k]
  ------------------
   67|   214k|                        i4_delta_scale > MAX_H264_DELTA_SCALE)
  ------------------
  |  |  644|   214k|#define MAX_H264_DELTA_SCALE 127
  ------------------
  |  Branch (67:25): [True: 608, False: 214k]
  ------------------
   68|    760|            {
   69|    760|                return ERROR_INV_RANGE_QP_T;
   70|    760|            }
   71|   214k|            i4_nextScale = ((i4_lastScale + i4_delta_scale + 256) & 0xff);
   72|       |
   73|   214k|            *pu1_use_default_scaling_matrix_flag = ((i4_j == 0)
  ------------------
  |  Branch (73:53): [True: 19.3k, False: 194k]
  ------------------
   74|  19.3k|                            && (i4_nextScale == 0));
  ------------------
  |  Branch (74:32): [True: 227, False: 19.1k]
  ------------------
   75|       |
   76|   214k|        }
   77|   504k|        pi2_scaling_list[i4_j] =
   78|   504k|                        (i4_nextScale == 0) ? (i4_lastScale) : (i4_nextScale);
  ------------------
  |  Branch (78:25): [True: 304k, False: 199k]
  ------------------
   79|   504k|        i4_lastScale = pi2_scaling_list[i4_j];
   80|   504k|    }
   81|  18.6k|    return OK;
  ------------------
  |  |  114|  18.6k|#define OK        0
  ------------------
   82|  19.4k|}
ih264d_form_default_scaling_matrix:
   85|   123k|{
   86|       |
   87|       |    /*************************************************************************/
   88|       |    /* perform the inverse scanning for the frame and field scaling matrices */
   89|       |    /*************************************************************************/
   90|   123k|    {
   91|   123k|        UWORD8 *pu1_inv_scan;
   92|   123k|        WORD32 i4_i, i4_j;
   93|       |
   94|   123k|        pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan;
   95|       |
   96|       |        /* for all 4x4 matrices */
   97|   864k|        for(i4_i = 0; i4_i < 6; i4_i++)
  ------------------
  |  Branch (97:23): [True: 741k, False: 123k]
  ------------------
   98|   741k|        {
   99|  12.6M|            for(i4_j = 0; i4_j < 16; i4_j++)
  ------------------
  |  Branch (99:27): [True: 11.8M, False: 741k]
  ------------------
  100|  11.8M|            {
  101|  11.8M|                ps_dec->s_high_profile.i2_scalinglist4x4[i4_i][pu1_inv_scan[i4_j]] =
  102|  11.8M|                                16;
  103|       |
  104|  11.8M|            }
  105|   741k|        }
  106|       |
  107|       |        /* for all 8x8 matrices */
  108|   370k|        for(i4_i = 0; i4_i < 2; i4_i++)
  ------------------
  |  Branch (108:23): [True: 247k, False: 123k]
  ------------------
  109|   247k|        {
  110|  16.0M|            for(i4_j = 0; i4_j < 64; i4_j++)
  ------------------
  |  Branch (110:27): [True: 15.8M, False: 247k]
  ------------------
  111|  15.8M|            {
  112|  15.8M|                ps_dec->s_high_profile.i2_scalinglist8x8[i4_i][gau1_ih264d_inv_scan_prog8x8_cabac[i4_j]] =
  113|  15.8M|                                16;
  114|       |
  115|  15.8M|            }
  116|   247k|        }
  117|   123k|    }
  118|   123k|    return OK;
  ------------------
  |  |  114|   123k|#define OK        0
  ------------------
  119|   123k|}
ih264d_form_scaling_matrix_picture:
  124|  9.80k|{
  125|       |    /* default scaling matrices */
  126|  9.80k|    WORD32 i4_i;
  127|       |
  128|       |    /* check the SPS first */
  129|  9.80k|    if(ps_seq->i4_seq_scaling_matrix_present_flag)
  ------------------
  |  Branch (129:8): [True: 8.81k, False: 990]
  ------------------
  130|  8.81k|    {
  131|  79.3k|        for(i4_i = 0; i4_i < 8; i4_i++)
  ------------------
  |  Branch (131:23): [True: 70.5k, False: 8.81k]
  ------------------
  132|  70.5k|        {
  133|  70.5k|            if(i4_i < 6)
  ------------------
  |  Branch (133:16): [True: 52.8k, False: 17.6k]
  ------------------
  134|  52.8k|            {
  135|       |                /* fall-back rule A */
  136|  52.8k|                if(!ps_seq->u1_seq_scaling_list_present_flag[i4_i])
  ------------------
  |  Branch (136:20): [True: 29.5k, False: 23.3k]
  ------------------
  137|  29.5k|                {
  138|  29.5k|                    if((i4_i == 0) || (i4_i == 3))
  ------------------
  |  Branch (138:24): [True: 2.51k, False: 26.9k]
  |  Branch (138:39): [True: 7.82k, False: 19.1k]
  ------------------
  139|  10.3k|                    {
  140|  10.3k|                        ps_dec->s_high_profile.pi2_scale_mat[i4_i] =
  141|  10.3k|                                        (i4_i == 0) ? (WORD16 *)(gai2_ih264d_default_intra4x4) : (WORD16 *)(gai2_ih264d_default_inter4x4);
  ------------------
  |  Branch (141:41): [True: 2.51k, False: 7.82k]
  ------------------
  142|  10.3k|                    }
  143|  19.1k|                    else
  144|  19.1k|                    {
  145|  19.1k|                        ps_dec->s_high_profile.pi2_scale_mat[i4_i] =
  146|  19.1k|                                        ps_dec->s_high_profile.pi2_scale_mat[i4_i
  147|  19.1k|                                                        - 1];
  148|  19.1k|                    }
  149|  29.5k|                }
  150|  23.3k|                else
  151|  23.3k|                {
  152|  23.3k|                    if(ps_seq->u1_use_default_scaling_matrix_flag[i4_i])
  ------------------
  |  Branch (152:24): [True: 237, False: 23.1k]
  ------------------
  153|    237|                    {
  154|    237|                        ps_dec->s_high_profile.pi2_scale_mat[i4_i] =
  155|    237|                                        (i4_i < 3) ? (WORD16 *)(gai2_ih264d_default_intra4x4) : (WORD16 *)(gai2_ih264d_default_inter4x4);
  ------------------
  |  Branch (155:41): [True: 216, False: 21]
  ------------------
  156|    237|                    }
  157|  23.1k|                    else
  158|  23.1k|                    {
  159|  23.1k|                        ps_dec->s_high_profile.pi2_scale_mat[i4_i] =
  160|  23.1k|                                        ps_seq->i2_scalinglist4x4[i4_i];
  161|  23.1k|                    }
  162|  23.3k|                }
  163|       |
  164|  52.8k|            }
  165|  17.6k|            else
  166|  17.6k|            {
  167|       |                /* fall-back rule A */
  168|  17.6k|                if((!ps_seq->u1_seq_scaling_list_present_flag[i4_i])
  ------------------
  |  Branch (168:20): [True: 10.5k, False: 7.06k]
  ------------------
  169|  7.06k|                                || (ps_seq->u1_use_default_scaling_matrix_flag[i4_i]))
  ------------------
  |  Branch (169:36): [True: 204, False: 6.85k]
  ------------------
  170|  10.7k|                {
  171|  10.7k|                    ps_dec->s_high_profile.pi2_scale_mat[i4_i] =
  172|  10.7k|                                    (i4_i == 6) ? ((WORD16*)gai2_ih264d_default_intra8x8) : ((WORD16*)gai2_ih264d_default_inter8x8);
  ------------------
  |  Branch (172:37): [True: 2.47k, False: 8.29k]
  ------------------
  173|  10.7k|                }
  174|  6.85k|                else
  175|  6.85k|                {
  176|  6.85k|                    ps_dec->s_high_profile.pi2_scale_mat[i4_i] =
  177|  6.85k|                                    ps_seq->i2_scalinglist8x8[i4_i - 6];
  178|  6.85k|                }
  179|  17.6k|            }
  180|  70.5k|        }
  181|  8.81k|    }
  182|       |
  183|       |    /* checking for the PPS */
  184|       |
  185|  9.80k|    if(ps_pic->i4_pic_scaling_matrix_present_flag)
  ------------------
  |  Branch (185:8): [True: 2.17k, False: 7.62k]
  ------------------
  186|  2.17k|    {
  187|  19.6k|        for(i4_i = 0; i4_i < 8; i4_i++)
  ------------------
  |  Branch (187:23): [True: 17.4k, False: 2.17k]
  ------------------
  188|  17.4k|        {
  189|  17.4k|            if(i4_i < 6)
  ------------------
  |  Branch (189:16): [True: 13.0k, False: 4.35k]
  ------------------
  190|  13.0k|            {
  191|       |                /* fall back rule B */
  192|  13.0k|                if(!ps_pic->u1_pic_scaling_list_present_flag[i4_i])
  ------------------
  |  Branch (192:20): [True: 10.8k, False: 2.18k]
  ------------------
  193|  10.8k|                {
  194|  10.8k|                    if((i4_i == 0) || (i4_i == 3))
  ------------------
  |  Branch (194:24): [True: 1.83k, False: 9.05k]
  |  Branch (194:39): [True: 1.74k, False: 7.30k]
  ------------------
  195|  3.58k|                    {
  196|  3.58k|                        if(!ps_seq->i4_seq_scaling_matrix_present_flag)
  ------------------
  |  Branch (196:28): [True: 1.65k, False: 1.93k]
  ------------------
  197|  1.65k|                        {
  198|  1.65k|                            ps_dec->s_high_profile.pi2_scale_mat[i4_i] =
  199|  1.65k|                                            (i4_i == 0) ? (WORD16 *)(gai2_ih264d_default_intra4x4) : (WORD16 *)(gai2_ih264d_default_inter4x4);
  ------------------
  |  Branch (199:45): [True: 863, False: 787]
  ------------------
  200|  1.65k|                        }
  201|  3.58k|                    }
  202|  7.30k|                    else
  203|  7.30k|                    {
  204|  7.30k|                        ps_dec->s_high_profile.pi2_scale_mat[i4_i] =
  205|  7.30k|                                        ps_dec->s_high_profile.pi2_scale_mat[i4_i
  206|  7.30k|                                                        - 1];
  207|  7.30k|                    }
  208|  10.8k|                }
  209|  2.18k|                else
  210|  2.18k|                {
  211|  2.18k|                    if(ps_pic->u1_pic_use_default_scaling_matrix_flag[i4_i])
  ------------------
  |  Branch (211:24): [True: 310, False: 1.87k]
  ------------------
  212|    310|                    {
  213|    310|                        ps_dec->s_high_profile.pi2_scale_mat[i4_i] =
  214|    310|                                        (i4_i < 3) ? (WORD16 *)(gai2_ih264d_default_intra4x4) : (WORD16 *)(gai2_ih264d_default_inter4x4);
  ------------------
  |  Branch (214:41): [True: 129, False: 181]
  ------------------
  215|    310|                    }
  216|  1.87k|                    else
  217|  1.87k|                    {
  218|  1.87k|                        ps_dec->s_high_profile.pi2_scale_mat[i4_i] =
  219|  1.87k|                                        ps_pic->i2_pic_scalinglist4x4[i4_i];
  220|  1.87k|                    }
  221|  2.18k|                }
  222|  13.0k|            }
  223|  4.35k|            else
  224|  4.35k|            {
  225|  4.35k|                if(!ps_pic->u1_pic_scaling_list_present_flag[i4_i])
  ------------------
  |  Branch (225:20): [True: 3.65k, False: 703]
  ------------------
  226|  3.65k|                {
  227|  3.65k|                    if(!ps_seq->i4_seq_scaling_matrix_present_flag)
  ------------------
  |  Branch (227:24): [True: 1.85k, False: 1.80k]
  ------------------
  228|  1.85k|                    {
  229|  1.85k|                        ps_dec->s_high_profile.pi2_scale_mat[i4_i] =
  230|  1.85k|                                        (i4_i == 6) ? ((WORD16*)gai2_ih264d_default_intra8x8) : ((WORD16*)gai2_ih264d_default_inter8x8);
  ------------------
  |  Branch (230:41): [True: 909, False: 942]
  ------------------
  231|  1.85k|                    }
  232|  3.65k|                }
  233|    703|                else
  234|    703|                {
  235|    703|                    if(ps_pic->u1_pic_use_default_scaling_matrix_flag[i4_i])
  ------------------
  |  Branch (235:24): [True: 179, False: 524]
  ------------------
  236|    179|                    {
  237|    179|                        ps_dec->s_high_profile.pi2_scale_mat[i4_i] =
  238|    179|                                        (i4_i == 6) ? (WORD16 *)(gai2_ih264d_default_intra8x8) : (WORD16 *)(gai2_ih264d_default_inter8x8);
  ------------------
  |  Branch (238:41): [True: 25, False: 154]
  ------------------
  239|    179|                    }
  240|    524|                    else
  241|    524|                    {
  242|    524|                        ps_dec->s_high_profile.pi2_scale_mat[i4_i] =
  243|    524|                                        ps_pic->i2_pic_scalinglist8x8[i4_i - 6];
  244|    524|                    }
  245|    703|                }
  246|  4.35k|            }
  247|  17.4k|        }
  248|  2.17k|    }
  249|       |
  250|       |    /*************************************************************************/
  251|       |    /* perform the inverse scanning for the frame and field scaling matrices */
  252|       |    /*************************************************************************/
  253|  9.80k|    {
  254|  9.80k|        UWORD8 *pu1_inv_scan_4x4;
  255|  9.80k|        WORD32 i4_i, i4_j;
  256|       |
  257|  9.80k|        pu1_inv_scan_4x4 = (UWORD8 *)gau1_ih264d_inv_scan;
  258|       |
  259|       |        /* for all 4x4 matrices */
  260|  68.6k|        for(i4_i = 0; i4_i < 6; i4_i++)
  ------------------
  |  Branch (260:23): [True: 58.8k, False: 9.80k]
  ------------------
  261|  58.8k|        {
  262|  58.8k|            if(ps_dec->s_high_profile.pi2_scale_mat[i4_i] == NULL)
  ------------------
  |  Branch (262:16): [True: 0, False: 58.8k]
  ------------------
  263|      0|                return ERROR_CORRUPTED_SLICE;
  264|       |
  265|  1.00M|            for(i4_j = 0; i4_j < 16; i4_j++)
  ------------------
  |  Branch (265:27): [True: 941k, False: 58.8k]
  ------------------
  266|   941k|            {
  267|   941k|                ps_dec->s_high_profile.i2_scalinglist4x4[i4_i][pu1_inv_scan_4x4[i4_j]] =
  268|   941k|                                ps_dec->s_high_profile.pi2_scale_mat[i4_i][i4_j];
  269|       |
  270|   941k|            }
  271|  58.8k|        }
  272|       |
  273|       |        /* for all 8x8 matrices */
  274|  29.4k|        for(i4_i = 0; i4_i < 2; i4_i++)
  ------------------
  |  Branch (274:23): [True: 19.6k, False: 9.80k]
  ------------------
  275|  19.6k|        {
  276|  19.6k|            if(ps_dec->s_high_profile.pi2_scale_mat[i4_i + 6] == NULL)
  ------------------
  |  Branch (276:16): [True: 0, False: 19.6k]
  ------------------
  277|      0|                return ERROR_CORRUPTED_SLICE;
  278|       |
  279|  1.27M|            for(i4_j = 0; i4_j < 64; i4_j++)
  ------------------
  |  Branch (279:27): [True: 1.25M, False: 19.6k]
  ------------------
  280|  1.25M|            {
  281|  1.25M|                ps_dec->s_high_profile.i2_scalinglist8x8[i4_i][gau1_ih264d_inv_scan_prog8x8_cabac[i4_j]] =
  282|  1.25M|                                ps_dec->s_high_profile.pi2_scale_mat[i4_i + 6][i4_j];
  283|       |
  284|  1.25M|            }
  285|  19.6k|        }
  286|  9.80k|    }
  287|  9.80k|    return OK;
  ------------------
  |  |  114|  9.80k|#define OK        0
  ------------------
  288|  9.80k|}

ih264d_parse_buffering_period:
   74|  1.19k|{
   75|  1.19k|    UWORD8 u1_seq_parameter_set_id;
   76|  1.19k|    dec_seq_params_t *ps_seq;
   77|  1.19k|    UWORD8 u1_nal_hrd_present, u1_vcl_hrd_present;
   78|  1.19k|    UWORD32 i;
   79|  1.19k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
   80|  1.19k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
   81|  1.19k|    UNUSED(ps_buf_prd);
  ------------------
  |  |   45|  1.19k|#define UNUSED(x) ((void)(x))
  ------------------
   82|  1.19k|    u1_seq_parameter_set_id = ih264d_uev(pu4_bitstrm_ofst,
   83|  1.19k|                                         pu4_bitstrm_buf);
   84|  1.19k|    if(u1_seq_parameter_set_id >= MAX_NUM_SEQ_PARAMS)
  ------------------
  |  |  521|  1.19k|#define MAX_NUM_SEQ_PARAMS 32
  ------------------
  |  Branch (84:8): [True: 72, False: 1.12k]
  ------------------
   85|     72|        return ERROR_INVALID_SEQ_PARAM;
   86|  1.12k|    ps_seq = &ps_dec->ps_sps[u1_seq_parameter_set_id];
   87|  1.12k|    if(TRUE != ps_seq->u1_is_valid)
  ------------------
  |  |  591|  1.12k|#define TRUE    1
  ------------------
  |  Branch (87:8): [True: 179, False: 947]
  ------------------
   88|    179|        return ERROR_INVALID_SEQ_PARAM;
   89|       |
   90|    947|    ps_dec->ps_sei->u1_seq_param_set_id = u1_seq_parameter_set_id;
   91|    947|    ps_dec->ps_cur_sps = ps_seq;
   92|    947|    if(FALSE == ps_seq->u1_is_valid)
  ------------------
  |  |  592|    947|#define FALSE   0
  ------------------
  |  Branch (92:8): [True: 0, False: 947]
  ------------------
   93|      0|        return ERROR_INVALID_SEQ_PARAM;
   94|    947|    if(1 == ps_seq->u1_vui_parameters_present_flag)
  ------------------
  |  Branch (94:8): [True: 707, False: 240]
  ------------------
   95|    707|    {
   96|    707|        u1_nal_hrd_present = ps_seq->s_vui.u1_nal_hrd_params_present;
   97|    707|        if(u1_nal_hrd_present)
  ------------------
  |  Branch (97:12): [True: 256, False: 451]
  ------------------
   98|    256|        {
   99|    974|            for(i = 0; i < ps_seq->s_vui.s_nal_hrd.u4_cpb_cnt; i++)
  ------------------
  |  Branch (99:24): [True: 718, False: 256]
  ------------------
  100|    718|            {
  101|    718|                ih264d_get_bits_h264(
  102|    718|                                ps_bitstrm,
  103|    718|                                ps_seq->s_vui.s_nal_hrd.u1_initial_cpb_removal_delay);
  104|    718|                ih264d_get_bits_h264(
  105|    718|                                ps_bitstrm,
  106|    718|                                ps_seq->s_vui.s_nal_hrd.u1_initial_cpb_removal_delay);
  107|    718|            }
  108|    256|        }
  109|       |
  110|    707|        u1_vcl_hrd_present = ps_seq->s_vui.u1_vcl_hrd_params_present;
  111|    707|        if(u1_vcl_hrd_present)
  ------------------
  |  Branch (111:12): [True: 433, False: 274]
  ------------------
  112|    433|        {
  113|  4.05k|            for(i = 0; i < ps_seq->s_vui.s_vcl_hrd.u4_cpb_cnt; i++)
  ------------------
  |  Branch (113:24): [True: 3.62k, False: 433]
  ------------------
  114|  3.62k|            {
  115|  3.62k|                ih264d_get_bits_h264(
  116|  3.62k|                                ps_bitstrm,
  117|  3.62k|                                ps_seq->s_vui.s_vcl_hrd.u1_initial_cpb_removal_delay);
  118|  3.62k|                ih264d_get_bits_h264(
  119|  3.62k|                                ps_bitstrm,
  120|  3.62k|                                ps_seq->s_vui.s_vcl_hrd.u1_initial_cpb_removal_delay);
  121|  3.62k|            }
  122|    433|        }
  123|    707|    }
  124|    947|    return (OK);
  ------------------
  |  |  114|    947|#define OK        0
  ------------------
  125|    947|}
ih264d_parse_pic_timing:
  151|  1.06k|{
  152|  1.06k|    sei *ps_sei;
  153|  1.06k|    vui_t *ps_vu4;
  154|  1.06k|    UWORD8 u1_cpb_dpb_present;
  155|  1.06k|    UWORD8 u1_pic_struct_present_flag;
  156|  1.06k|    UWORD32 u4_start_offset, u4_bits_consumed;
  157|  1.06k|    UWORD8 u1_cpb_removal_delay_length, u1_dpb_output_delay_length;
  158|       |
  159|  1.06k|    ps_sei = (sei *)ps_dec->ps_sei;
  160|  1.06k|    ps_vu4 = &ps_dec->ps_cur_sps->s_vui;
  161|       |
  162|  1.06k|    u1_cpb_dpb_present = ps_vu4->u1_vcl_hrd_params_present
  163|  1.06k|                    + ps_vu4->u1_nal_hrd_params_present;
  164|       |
  165|  1.06k|    if(ps_vu4->u1_vcl_hrd_params_present)
  ------------------
  |  Branch (165:8): [True: 282, False: 785]
  ------------------
  166|    282|    {
  167|    282|        u1_cpb_removal_delay_length =
  168|    282|                        ps_vu4->s_vcl_hrd.u1_cpb_removal_delay_length;
  169|    282|        u1_dpb_output_delay_length =
  170|    282|                        ps_vu4->s_vcl_hrd.u1_dpb_output_delay_length;
  171|    282|    }
  172|    785|    else if(ps_vu4->u1_nal_hrd_params_present)
  ------------------
  |  Branch (172:13): [True: 308, False: 477]
  ------------------
  173|    308|    {
  174|    308|        u1_cpb_removal_delay_length =
  175|    308|                        ps_vu4->s_nal_hrd.u1_cpb_removal_delay_length;
  176|    308|        u1_dpb_output_delay_length =
  177|    308|                        ps_vu4->s_nal_hrd.u1_dpb_output_delay_length;
  178|    308|    }
  179|    477|    else
  180|    477|    {
  181|    477|        u1_cpb_removal_delay_length = 24;
  182|    477|        u1_dpb_output_delay_length = 24;
  183|       |
  184|    477|    }
  185|       |
  186|  1.06k|    u4_start_offset = ps_bitstrm->u4_ofst;
  187|  1.06k|    if(u1_cpb_dpb_present)
  ------------------
  |  Branch (187:8): [True: 590, False: 477]
  ------------------
  188|    590|    {
  189|    590|        ih264d_get_bits_h264(ps_bitstrm, u1_cpb_removal_delay_length);
  190|    590|        ih264d_get_bits_h264(ps_bitstrm, u1_dpb_output_delay_length);
  191|    590|    }
  192|       |
  193|  1.06k|    u1_pic_struct_present_flag = ps_vu4->u1_pic_struct_present_flag;
  194|  1.06k|    if(u1_pic_struct_present_flag)
  ------------------
  |  Branch (194:8): [True: 326, False: 741]
  ------------------
  195|    326|    {
  196|    326|        ps_sei->u1_pic_struct = ih264d_get_bits_h264(ps_bitstrm, 4);
  197|    326|        ps_dec->u1_pic_struct_copy = ps_sei->u1_pic_struct;
  198|    326|        ps_sei->u1_is_valid = 1;
  199|    326|    }
  200|  1.06k|    u4_bits_consumed = ps_bitstrm->u4_ofst - u4_start_offset;
  201|       |
  202|  1.06k|    if((ui4_payload_size << 3) < u4_bits_consumed)
  ------------------
  |  Branch (202:8): [True: 67, False: 1.00k]
  ------------------
  203|     67|        return ERROR_CORRUPTED_SLICE;
  204|       |
  205|  1.00k|    ih264d_flush_bits_h264(ps_bitstrm,
  206|  1.00k|                           (ui4_payload_size << 3) - u4_bits_consumed);
  207|       |
  208|  1.00k|    return (OK);
  ------------------
  |  |  114|  1.00k|#define OK        0
  ------------------
  209|  1.06k|}
ih264d_parse_recovery_point:
  235|  1.86k|{
  236|  1.86k|    sei *ps_sei = ps_dec->ps_sei;
  237|  1.86k|    dec_err_status_t *ps_err = ps_dec->ps_dec_err_status;
  238|  1.86k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
  239|  1.86k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
  240|  1.86k|    UNUSED(ui4_payload_size);
  ------------------
  |  |   45|  1.86k|#define UNUSED(x) ((void)(x))
  ------------------
  241|  1.86k|    ps_sei->u2_recovery_frame_cnt = ih264d_uev(pu4_bitstrm_ofst,
  242|  1.86k|                                               pu4_bitstrm_buf);
  243|  1.86k|    ps_err->u4_frm_sei_sync = ps_err->u4_cur_frm
  244|  1.86k|                    + ps_sei->u2_recovery_frame_cnt;
  245|  1.86k|    ps_sei->u1_exact_match_flag = ih264d_get_bit_h264(ps_bitstrm);
  246|  1.86k|    ps_sei->u1_broken_link_flag = ih264d_get_bit_h264(ps_bitstrm);
  247|  1.86k|    ps_sei->u1_changing_slice_grp_idc = ih264d_get_bits_h264(ps_bitstrm, 2);
  248|       |
  249|  1.86k|    return (OK);
  ------------------
  |  |  114|  1.86k|#define OK        0
  ------------------
  250|  1.86k|}
ih264d_parse_mdcv:
  276|  1.63k|{
  277|  1.63k|    sei *ps_sei = ps_dec->ps_sei_parse;
  278|  1.63k|    dec_err_status_t *ps_err = ps_dec->ps_dec_err_status;
  279|  1.63k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
  280|  1.63k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
  281|  1.63k|    UWORD32 u4_count;
  282|  1.63k|    UNUSED(ui4_payload_size);
  ------------------
  |  |   45|  1.63k|#define UNUSED(x) ((void)(x))
  ------------------
  283|       |
  284|  1.63k|    if((ps_dec == NULL) || (ps_sei == NULL))
  ------------------
  |  Branch (284:8): [True: 0, False: 1.63k]
  |  Branch (284:28): [True: 0, False: 1.63k]
  ------------------
  285|      0|    {
  286|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  287|      0|    }
  288|       |
  289|  1.63k|    ps_sei->u1_sei_mdcv_params_present_flag = 1;
  290|       |
  291|       |    /* display primaries x */
  292|  5.43k|    for(u4_count = 0; u4_count < NUM_SEI_MDCV_PRIMARIES; u4_count++)
  ------------------
  |  |   51|  5.43k|#define NUM_SEI_MDCV_PRIMARIES        3
  ------------------
  |  Branch (292:23): [True: 4.41k, False: 1.01k]
  ------------------
  293|  4.41k|    {
  294|  4.41k|        ps_sei->s_sei_mdcv_params.au2_display_primaries_x[u4_count] =
  295|  4.41k|                                    (UWORD16)ih264d_get_bits_h264(ps_bitstrm, 16);
  296|       |
  297|  4.41k|        if((ps_sei->s_sei_mdcv_params.au2_display_primaries_x[u4_count] >
  ------------------
  |  Branch (297:12): [True: 81, False: 4.33k]
  ------------------
  298|  4.41k|                                                DISPLAY_PRIMARIES_X_UPPER_LIMIT) ||
  ------------------
  |  |  620|  4.41k|#define DISPLAY_PRIMARIES_X_UPPER_LIMIT                37000
  ------------------
  299|  4.33k|           (ps_sei->s_sei_mdcv_params.au2_display_primaries_x[u4_count] <
  ------------------
  |  Branch (299:12): [True: 77, False: 4.25k]
  ------------------
  300|  4.33k|                                                DISPLAY_PRIMARIES_X_LOWER_LIMIT) ||
  ------------------
  |  |  621|  4.33k|#define DISPLAY_PRIMARIES_X_LOWER_LIMIT                5
  ------------------
  301|  4.25k|           ((ps_sei->s_sei_mdcv_params.au2_display_primaries_x[u4_count] %
  ------------------
  |  Branch (301:12): [True: 142, False: 4.11k]
  ------------------
  302|  4.25k|                                               DISPLAY_PRIMARIES_X_DIVISION_FACTOR) != 0))
  ------------------
  |  |  622|  4.25k|#define DISPLAY_PRIMARIES_X_DIVISION_FACTOR            5
  ------------------
  303|    300|        {
  304|    300|            ps_sei->u1_sei_mdcv_params_present_flag = 0;
  305|    300|            return ERROR_INV_SEI_MDCV_PARAMS;
  306|    300|        }
  307|       |
  308|  4.11k|        ps_sei->s_sei_mdcv_params.au2_display_primaries_y[u4_count] =
  309|  4.11k|                                    (UWORD16)ih264d_get_bits_h264(ps_bitstrm, 16);
  310|       |
  311|  4.11k|        if((ps_sei->s_sei_mdcv_params.au2_display_primaries_y[u4_count] >
  ------------------
  |  Branch (311:12): [True: 110, False: 4.00k]
  ------------------
  312|  4.11k|                                                DISPLAY_PRIMARIES_Y_UPPER_LIMIT) ||
  ------------------
  |  |  624|  4.11k|#define DISPLAY_PRIMARIES_Y_UPPER_LIMIT                42000
  ------------------
  313|  4.00k|           (ps_sei->s_sei_mdcv_params.au2_display_primaries_y[u4_count] <
  ------------------
  |  Branch (313:12): [True: 123, False: 3.88k]
  ------------------
  314|  4.00k|                                               DISPLAY_PRIMARIES_Y_LOWER_LIMIT) ||
  ------------------
  |  |  625|  4.00k|#define DISPLAY_PRIMARIES_Y_LOWER_LIMIT                5
  ------------------
  315|  3.88k|           ((ps_sei->s_sei_mdcv_params.au2_display_primaries_y[u4_count] %
  ------------------
  |  Branch (315:12): [True: 87, False: 3.79k]
  ------------------
  316|  3.88k|                                              DISPLAY_PRIMARIES_Y_DIVISION_FACTOR) != 0))
  ------------------
  |  |  626|  3.88k|#define DISPLAY_PRIMARIES_Y_DIVISION_FACTOR            5
  ------------------
  317|    320|        {
  318|    320|            ps_sei->u1_sei_mdcv_params_present_flag = 0;
  319|    320|            return ERROR_INV_SEI_MDCV_PARAMS;
  320|    320|        }
  321|  4.11k|    }
  322|       |
  323|       |    /* white point x */
  324|  1.01k|    ps_sei->s_sei_mdcv_params.u2_white_point_x = (UWORD16)ih264d_get_bits_h264(ps_bitstrm, 16);
  325|       |
  326|  1.01k|    if((ps_sei->s_sei_mdcv_params.u2_white_point_x > WHITE_POINT_X_UPPER_LIMIT) ||
  ------------------
  |  |  628|  1.01k|#define WHITE_POINT_X_UPPER_LIMIT                      37000
  ------------------
  |  Branch (326:8): [True: 68, False: 945]
  ------------------
  327|    945|       (ps_sei->s_sei_mdcv_params.u2_white_point_x < WHITE_POINT_X_LOWER_LIMIT) ||
  ------------------
  |  |  629|    945|#define WHITE_POINT_X_LOWER_LIMIT                      5
  ------------------
  |  Branch (327:8): [True: 72, False: 873]
  ------------------
  328|    873|       ((ps_sei->s_sei_mdcv_params.u2_white_point_x % WHITE_POINT_X_DIVISION_FACTOR) != 0))
  ------------------
  |  |  630|    873|#define WHITE_POINT_X_DIVISION_FACTOR                  5
  ------------------
  |  Branch (328:8): [True: 75, False: 798]
  ------------------
  329|    215|    {
  330|    215|        ps_sei->u1_sei_mdcv_params_present_flag = 0;
  331|    215|        return ERROR_INV_SEI_MDCV_PARAMS;
  332|    215|    }
  333|       |    /* white point y */
  334|    798|    ps_sei->s_sei_mdcv_params.u2_white_point_y = (UWORD16)ih264d_get_bits_h264(ps_bitstrm, 16);
  335|       |
  336|    798|    if((ps_sei->s_sei_mdcv_params.u2_white_point_y > WHITE_POINT_Y_UPPER_LIMIT) ||
  ------------------
  |  |  632|    798|#define WHITE_POINT_Y_UPPER_LIMIT                      42000
  ------------------
  |  Branch (336:8): [True: 76, False: 722]
  ------------------
  337|    722|       (ps_sei->s_sei_mdcv_params.u2_white_point_y < WHITE_POINT_Y_LOWER_LIMIT) ||
  ------------------
  |  |  633|    722|#define WHITE_POINT_Y_LOWER_LIMIT                      5
  ------------------
  |  Branch (337:8): [True: 72, False: 650]
  ------------------
  338|    650|       ((ps_sei->s_sei_mdcv_params.u2_white_point_y % WHITE_POINT_Y_DIVISION_FACTOR) != 0))
  ------------------
  |  |  634|    650|#define WHITE_POINT_Y_DIVISION_FACTOR                  5
  ------------------
  |  Branch (338:8): [True: 80, False: 570]
  ------------------
  339|    228|    {
  340|    228|        ps_sei->u1_sei_mdcv_params_present_flag = 0;
  341|    228|        return ERROR_INV_SEI_MDCV_PARAMS;
  342|    228|    }
  343|       |    /* max display mastering luminance */
  344|    570|    ps_sei->s_sei_mdcv_params.u4_max_display_mastering_luminance =
  345|    570|                                    (UWORD32)ih264d_get_bits_h264(ps_bitstrm, 32);
  346|       |
  347|    570|    if((ps_sei->s_sei_mdcv_params.u4_max_display_mastering_luminance >
  ------------------
  |  Branch (347:8): [True: 84, False: 486]
  ------------------
  348|    570|                                            MAX_DISPLAY_MASTERING_LUMINANCE_UPPER_LIMIT) ||
  ------------------
  |  |  636|    570|#define MAX_DISPLAY_MASTERING_LUMINANCE_UPPER_LIMIT        100000000
  ------------------
  349|    486|       (ps_sei->s_sei_mdcv_params.u4_max_display_mastering_luminance <
  ------------------
  |  Branch (349:8): [True: 73, False: 413]
  ------------------
  350|    486|                                            MAX_DISPLAY_MASTERING_LUMINANCE_LOWER_LIMIT) ||
  ------------------
  |  |  637|    486|#define MAX_DISPLAY_MASTERING_LUMINANCE_LOWER_LIMIT        50000
  ------------------
  351|    413|       ((ps_sei->s_sei_mdcv_params.u4_max_display_mastering_luminance %
  ------------------
  |  Branch (351:8): [True: 92, False: 321]
  ------------------
  352|    413|                                        MAX_DISPLAY_MASTERING_LUMINANCE_DIVISION_FACTOR) != 0))
  ------------------
  |  |  638|    413|#define MAX_DISPLAY_MASTERING_LUMINANCE_DIVISION_FACTOR    10000
  ------------------
  353|    249|    {
  354|    249|        ps_sei->u1_sei_mdcv_params_present_flag = 0;
  355|    249|        return ERROR_INV_SEI_MDCV_PARAMS;
  356|    249|    }
  357|       |    /* min display mastering luminance */
  358|    321|    ps_sei->s_sei_mdcv_params.u4_min_display_mastering_luminance =
  359|    321|                                    (UWORD32)ih264d_get_bits_h264(ps_bitstrm, 32);
  360|       |
  361|    321|    if((ps_sei->s_sei_mdcv_params.u4_min_display_mastering_luminance >
  ------------------
  |  Branch (361:8): [True: 91, False: 230]
  ------------------
  362|    321|                                            MIN_DISPLAY_MASTERING_LUMINANCE_UPPER_LIMIT) ||
  ------------------
  |  |  640|    321|#define MIN_DISPLAY_MASTERING_LUMINANCE_UPPER_LIMIT        50000
  ------------------
  363|    230|        (ps_sei->s_sei_mdcv_params.u4_min_display_mastering_luminance <
  ------------------
  |  Branch (363:9): [True: 83, False: 147]
  ------------------
  364|    230|                                            MIN_DISPLAY_MASTERING_LUMINANCE_LOWER_LIMIT))
  ------------------
  |  |  641|    230|#define MIN_DISPLAY_MASTERING_LUMINANCE_LOWER_LIMIT        1
  ------------------
  365|    174|    {
  366|    174|        ps_sei->u1_sei_mdcv_params_present_flag = 0;
  367|    174|        return ERROR_INV_SEI_MDCV_PARAMS;
  368|    174|    }
  369|    147|    if(ps_sei->s_sei_mdcv_params.u4_max_display_mastering_luminance <=
  ------------------
  |  Branch (369:8): [True: 66, False: 81]
  ------------------
  370|    147|            ps_sei->s_sei_mdcv_params.u4_min_display_mastering_luminance)
  371|     66|    {
  372|     66|        ps_sei->u1_sei_mdcv_params_present_flag = 0;
  373|     66|        return ERROR_INV_SEI_MDCV_PARAMS;
  374|     66|    }
  375|     81|    return (OK);
  ------------------
  |  |  114|     81|#define OK        0
  ------------------
  376|    147|}
ih264d_parse_cll:
  402|    777|{
  403|    777|    sei *ps_sei = ps_dec->ps_sei_parse;
  404|    777|    dec_err_status_t *ps_err = ps_dec->ps_dec_err_status;
  405|    777|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
  406|    777|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
  407|    777|    UNUSED(ui4_payload_size);
  ------------------
  |  |   45|    777|#define UNUSED(x) ((void)(x))
  ------------------
  408|       |
  409|    777|    if((ps_dec == NULL) || (ps_sei == NULL))
  ------------------
  |  Branch (409:8): [True: 0, False: 777]
  |  Branch (409:28): [True: 0, False: 777]
  ------------------
  410|      0|    {
  411|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  412|      0|    }
  413|       |
  414|    777|    ps_sei->u1_sei_cll_params_present_flag = 1;
  415|       |
  416|    777|    ps_sei->s_sei_cll_params.u2_max_content_light_level =
  417|    777|                        (UWORD16)ih264d_get_bits_h264(ps_bitstrm, 16);
  418|    777|    ps_sei->s_sei_cll_params.u2_max_pic_average_light_level =
  419|    777|                        (UWORD16)ih264d_get_bits_h264(ps_bitstrm, 16);
  420|       |    /*No any sanity checks done for CLL params*/
  421|       |
  422|    777|    return (OK);
  ------------------
  |  |  114|    777|#define OK        0
  ------------------
  423|    777|}
ih264d_parse_ave:
  449|    533|{
  450|    533|    sei *ps_sei = ps_dec->ps_sei_parse;
  451|    533|    dec_err_status_t *ps_err = ps_dec->ps_dec_err_status;
  452|    533|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
  453|    533|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
  454|    533|    UNUSED(ui4_payload_size);
  ------------------
  |  |   45|    533|#define UNUSED(x) ((void)(x))
  ------------------
  455|       |
  456|    533|    if((ps_dec == NULL) || (ps_sei == NULL))
  ------------------
  |  Branch (456:8): [True: 0, False: 533]
  |  Branch (456:28): [True: 0, False: 533]
  ------------------
  457|      0|    {
  458|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  459|      0|    }
  460|       |
  461|    533|    ps_sei->u1_sei_ave_params_present_flag = 1;
  462|       |
  463|    533|    ps_sei->s_sei_ave_params.u4_ambient_illuminance = (UWORD32)ih264d_get_bits_h264(ps_bitstrm, 32);
  464|    533|    if(0 == ps_sei->s_sei_ave_params.u4_ambient_illuminance)
  ------------------
  |  Branch (464:8): [True: 66, False: 467]
  ------------------
  465|     66|    {
  466|     66|        ps_sei->u1_sei_ave_params_present_flag = 0;
  467|     66|        return ERROR_INV_SEI_AVE_PARAMS;
  468|     66|    }
  469|       |
  470|    467|    ps_sei->s_sei_ave_params.u2_ambient_light_x = (UWORD16)ih264d_get_bits_h264(ps_bitstrm, 16);
  471|    467|    if(ps_sei->s_sei_ave_params.u2_ambient_light_x > AMBIENT_LIGHT_X_UPPER_LIMIT)
  ------------------
  |  |  643|    467|#define AMBIENT_LIGHT_X_UPPER_LIMIT        50000
  ------------------
  |  Branch (471:8): [True: 86, False: 381]
  ------------------
  472|     86|    {
  473|     86|        ps_sei->u1_sei_ave_params_present_flag = 0;
  474|     86|        return ERROR_INV_SEI_AVE_PARAMS;
  475|     86|    }
  476|       |
  477|    381|    ps_sei->s_sei_ave_params.u2_ambient_light_y = (UWORD16)ih264d_get_bits_h264(ps_bitstrm, 16);
  478|    381|    if(ps_sei->s_sei_ave_params.u2_ambient_light_y > AMBIENT_LIGHT_Y_UPPER_LIMIT)
  ------------------
  |  |  644|    381|#define AMBIENT_LIGHT_Y_UPPER_LIMIT        50000
  ------------------
  |  Branch (478:8): [True: 90, False: 291]
  ------------------
  479|     90|    {
  480|     90|        ps_sei->u1_sei_ave_params_present_flag = 0;
  481|     90|        return ERROR_INV_SEI_AVE_PARAMS;
  482|     90|    }
  483|    291|    return (OK);
  ------------------
  |  |  114|    291|#define OK        0
  ------------------
  484|    381|}
ih264d_parse_ccv:
  510|  32.3k|{
  511|  32.3k|    sei *ps_sei = ps_dec->ps_sei_parse;
  512|  32.3k|    dec_err_status_t *ps_err = ps_dec->ps_dec_err_status;
  513|  32.3k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
  514|  32.3k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
  515|  32.3k|    UWORD32 u4_count;
  516|  32.3k|    UNUSED(ui4_payload_size);
  ------------------
  |  |   45|  32.3k|#define UNUSED(x) ((void)(x))
  ------------------
  517|       |
  518|  32.3k|    if((ps_dec == NULL) || (ps_sei == NULL))
  ------------------
  |  Branch (518:8): [True: 0, False: 32.3k]
  |  Branch (518:28): [True: 0, False: 32.3k]
  ------------------
  519|      0|    {
  520|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  521|      0|    }
  522|       |
  523|  32.3k|    ps_sei->u1_sei_ccv_params_present_flag = 0;
  524|       |
  525|  32.3k|    ps_sei->s_sei_ccv_params.u1_ccv_cancel_flag = (UWORD8)ih264d_get_bit_h264(ps_bitstrm);
  526|       |
  527|  32.3k|    if(ps_sei->s_sei_ccv_params.u1_ccv_cancel_flag > 1)
  ------------------
  |  Branch (527:8): [True: 0, False: 32.3k]
  ------------------
  528|      0|    {
  529|      0|        return ERROR_INV_SEI_CCV_PARAMS;
  530|      0|    }
  531|  32.3k|    if(0 == ps_sei->s_sei_ccv_params.u1_ccv_cancel_flag)
  ------------------
  |  Branch (531:8): [True: 3.46k, False: 28.8k]
  ------------------
  532|  3.46k|    {
  533|  3.46k|        ps_sei->s_sei_ccv_params.u1_ccv_persistence_flag =
  534|  3.46k|                                                (UWORD8)ih264d_get_bit_h264(ps_bitstrm);
  535|  3.46k|        if(ps_sei->s_sei_ccv_params.u1_ccv_persistence_flag > 1)
  ------------------
  |  Branch (535:12): [True: 0, False: 3.46k]
  ------------------
  536|      0|        {
  537|      0|            return ERROR_INV_SEI_CCV_PARAMS;
  538|      0|        }
  539|  3.46k|        ps_sei->s_sei_ccv_params.u1_ccv_primaries_present_flag =
  540|  3.46k|                                                (UWORD8)ih264d_get_bit_h264(ps_bitstrm);
  541|  3.46k|        if(ps_sei->s_sei_ccv_params.u1_ccv_primaries_present_flag > 1)
  ------------------
  |  Branch (541:12): [True: 0, False: 3.46k]
  ------------------
  542|      0|        {
  543|      0|            return ERROR_INV_SEI_CCV_PARAMS;
  544|      0|        }
  545|  3.46k|        ps_sei->s_sei_ccv_params.u1_ccv_min_luminance_value_present_flag =
  546|  3.46k|                                                (UWORD8)ih264d_get_bit_h264(ps_bitstrm);
  547|  3.46k|        if(ps_sei->s_sei_ccv_params.u1_ccv_min_luminance_value_present_flag > 1)
  ------------------
  |  Branch (547:12): [True: 0, False: 3.46k]
  ------------------
  548|      0|        {
  549|      0|            return ERROR_INV_SEI_CCV_PARAMS;
  550|      0|        }
  551|  3.46k|        ps_sei->s_sei_ccv_params.u1_ccv_max_luminance_value_present_flag =
  552|  3.46k|                                                (UWORD8)ih264d_get_bit_h264(ps_bitstrm);
  553|  3.46k|        if(ps_sei->s_sei_ccv_params.u1_ccv_max_luminance_value_present_flag > 1)
  ------------------
  |  Branch (553:12): [True: 0, False: 3.46k]
  ------------------
  554|      0|        {
  555|      0|            return ERROR_INV_SEI_CCV_PARAMS;
  556|      0|        }
  557|  3.46k|        ps_sei->s_sei_ccv_params.u1_ccv_avg_luminance_value_present_flag =
  558|  3.46k|                                                (UWORD8)ih264d_get_bit_h264(ps_bitstrm);
  559|  3.46k|        if(ps_sei->s_sei_ccv_params.u1_ccv_avg_luminance_value_present_flag > 1)
  ------------------
  |  Branch (559:12): [True: 0, False: 3.46k]
  ------------------
  560|      0|        {
  561|      0|            return ERROR_INV_SEI_CCV_PARAMS;
  562|      0|        }
  563|       |
  564|  3.46k|        if((ps_sei->s_sei_ccv_params.u1_ccv_primaries_present_flag == 0) &&
  ------------------
  |  Branch (564:12): [True: 2.90k, False: 565]
  ------------------
  565|  2.90k|           (ps_sei->s_sei_ccv_params.u1_ccv_min_luminance_value_present_flag == 0) &&
  ------------------
  |  Branch (565:12): [True: 866, False: 2.03k]
  ------------------
  566|    866|           (ps_sei->s_sei_ccv_params.u1_ccv_max_luminance_value_present_flag == 0) &&
  ------------------
  |  Branch (566:12): [True: 274, False: 592]
  ------------------
  567|    274|           (ps_sei->s_sei_ccv_params.u1_ccv_avg_luminance_value_present_flag == 0))
  ------------------
  |  Branch (567:12): [True: 75, False: 199]
  ------------------
  568|     75|        {
  569|     75|            return ERROR_INV_SEI_CCV_PARAMS;
  570|     75|	 }
  571|       |
  572|  3.39k|        ps_sei->s_sei_ccv_params.u1_ccv_reserved_zero_2bits =
  573|  3.39k|                                                (UWORD8)ih264d_get_bits_h264(ps_bitstrm, 2);
  574|  3.39k|        if((ps_sei->s_sei_ccv_params.u1_ccv_reserved_zero_2bits != 0))
  ------------------
  |  Branch (574:12): [True: 76, False: 3.31k]
  ------------------
  575|     76|        {
  576|     76|            return ERROR_INV_SEI_CCV_PARAMS;
  577|     76|        }
  578|       |
  579|       |        /* ccv primaries */
  580|  3.31k|        if(1 == ps_sei->s_sei_ccv_params.u1_ccv_primaries_present_flag)
  ------------------
  |  Branch (580:12): [True: 495, False: 2.82k]
  ------------------
  581|    495|        {
  582|  1.17k|            for(u4_count = 0; u4_count < NUM_SEI_CCV_PRIMARIES; u4_count++)
  ------------------
  |  |   56|  1.17k|#define NUM_SEI_CCV_PRIMARIES         3
  ------------------
  |  Branch (582:31): [True: 1.06k, False: 110]
  ------------------
  583|  1.06k|            {
  584|  1.06k|                ps_sei->s_sei_ccv_params.ai4_ccv_primaries_x[u4_count] =
  585|  1.06k|                                                (WORD32)ih264d_get_bits_h264(ps_bitstrm, 32);
  586|  1.06k|                if((ps_sei->s_sei_ccv_params.ai4_ccv_primaries_x[u4_count] >
  ------------------
  |  Branch (586:20): [True: 70, False: 997]
  ------------------
  587|  1.06k|                                                        CCV_PRIMARIES_X_UPPER_LIMIT) ||
  ------------------
  |  |  646|  1.06k|#define CCV_PRIMARIES_X_UPPER_LIMIT        5000000
  ------------------
  588|    997|                   (ps_sei->s_sei_ccv_params.ai4_ccv_primaries_x[u4_count] <
  ------------------
  |  Branch (588:20): [True: 104, False: 893]
  ------------------
  589|    997|                                                        CCV_PRIMARIES_X_LOWER_LIMIT))
  ------------------
  |  |  647|    997|#define CCV_PRIMARIES_X_LOWER_LIMIT        -5000000
  ------------------
  590|    174|                {
  591|    174|                    return ERROR_INV_SEI_CCV_PARAMS;
  592|    174|                }
  593|       |
  594|    893|                ps_sei->s_sei_ccv_params.ai4_ccv_primaries_y[u4_count] =
  595|    893|                                                (WORD32)ih264d_get_bits_h264(ps_bitstrm, 32);
  596|    893|                if((ps_sei->s_sei_ccv_params.ai4_ccv_primaries_y[u4_count] >
  ------------------
  |  Branch (596:20): [True: 118, False: 775]
  ------------------
  597|    893|                                                        CCV_PRIMARIES_Y_UPPER_LIMIT) ||
  ------------------
  |  |  648|    893|#define CCV_PRIMARIES_Y_UPPER_LIMIT        5000000
  ------------------
  598|    775|                   (ps_sei->s_sei_ccv_params.ai4_ccv_primaries_y[u4_count] <
  ------------------
  |  Branch (598:20): [True: 93, False: 682]
  ------------------
  599|    775|                                                        CCV_PRIMARIES_Y_LOWER_LIMIT))
  ------------------
  |  |  649|    775|#define CCV_PRIMARIES_Y_LOWER_LIMIT        -5000000
  ------------------
  600|    211|                {
  601|    211|                    return ERROR_INV_SEI_CCV_PARAMS;
  602|    211|                }
  603|    893|            }
  604|    495|        }
  605|       |
  606|  2.93k|        if(1 == ps_sei->s_sei_ccv_params.u1_ccv_min_luminance_value_present_flag)
  ------------------
  |  Branch (606:12): [True: 2.04k, False: 887]
  ------------------
  607|  2.04k|        {
  608|  2.04k|            ps_sei->s_sei_ccv_params.u4_ccv_min_luminance_value =
  609|  2.04k|                                                (UWORD32)ih264d_get_bits_h264(ps_bitstrm, 32);
  610|  2.04k|        }
  611|       |
  612|  2.93k|        if(1 == ps_sei->s_sei_ccv_params.u1_ccv_max_luminance_value_present_flag)
  ------------------
  |  Branch (612:12): [True: 2.64k, False: 286]
  ------------------
  613|  2.64k|        {
  614|  2.64k|            ps_sei->s_sei_ccv_params.u4_ccv_max_luminance_value =
  615|  2.64k|                                                (UWORD32)ih264d_get_bits_h264(ps_bitstrm, 32);
  616|  2.64k|            if((1 == ps_sei->s_sei_ccv_params.u1_ccv_min_luminance_value_present_flag) &&
  ------------------
  |  Branch (616:16): [True: 1.95k, False: 687]
  ------------------
  617|  1.95k|                (ps_sei->s_sei_ccv_params.u4_ccv_max_luminance_value <
  ------------------
  |  Branch (617:17): [True: 221, False: 1.73k]
  ------------------
  618|  1.95k|                                                ps_sei->s_sei_ccv_params.u4_ccv_min_luminance_value))
  619|    221|            {
  620|    221|                return ERROR_INV_SEI_CCV_PARAMS;
  621|    221|            }
  622|  2.64k|        }
  623|  2.71k|        if(1 == ps_sei->s_sei_ccv_params.u1_ccv_avg_luminance_value_present_flag)
  ------------------
  |  Branch (623:12): [True: 1.18k, False: 1.52k]
  ------------------
  624|  1.18k|        {
  625|  1.18k|            ps_sei->s_sei_ccv_params.u4_ccv_avg_luminance_value =
  626|  1.18k|                                                (UWORD32)ih264d_get_bits_h264(ps_bitstrm, 32);
  627|  1.18k|            if((1 == ps_sei->s_sei_ccv_params.u1_ccv_min_luminance_value_present_flag) &&
  ------------------
  |  Branch (627:16): [True: 397, False: 786]
  ------------------
  628|    397|                (ps_sei->s_sei_ccv_params.u4_ccv_avg_luminance_value <
  ------------------
  |  Branch (628:17): [True: 83, False: 314]
  ------------------
  629|    397|                                                ps_sei->s_sei_ccv_params.u4_ccv_min_luminance_value))
  630|     83|            {
  631|     83|                return ERROR_INV_SEI_CCV_PARAMS;
  632|     83|            }
  633|  1.10k|            if((1 == ps_sei->s_sei_ccv_params.u1_ccv_max_luminance_value_present_flag) &&
  ------------------
  |  Branch (633:16): [True: 886, False: 214]
  ------------------
  634|    886|                (ps_sei->s_sei_ccv_params.u4_ccv_max_luminance_value <
  ------------------
  |  Branch (634:17): [True: 101, False: 785]
  ------------------
  635|    886|                                                ps_sei->s_sei_ccv_params.u4_ccv_avg_luminance_value))
  636|    101|            {
  637|    101|                return ERROR_INV_SEI_CCV_PARAMS;
  638|    101|            }
  639|  1.10k|        }
  640|  2.71k|    }
  641|  31.4k|    ps_sei->u1_sei_ccv_params_present_flag = 1;
  642|  31.4k|    return (OK);
  ------------------
  |  |  114|  31.4k|#define OK        0
  ------------------
  643|  32.3k|}
ih264d_parse_sii:
  668|  1.98k|{
  669|  1.98k|    sei *ps_sei;
  670|  1.98k|    dec_err_status_t *ps_err;
  671|  1.98k|    int i;
  672|  1.98k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
  673|  1.98k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
  674|  1.98k|    UNUSED(ui4_payload_size);
  ------------------
  |  |   45|  1.98k|#define UNUSED(x) ((void)(x))
  ------------------
  675|       |
  676|  1.98k|    if(ps_dec == NULL)
  ------------------
  |  Branch (676:8): [True: 0, False: 1.98k]
  ------------------
  677|      0|    {
  678|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  679|      0|    }
  680|  1.98k|    ps_sei = ps_dec->ps_sei_parse;
  681|       |
  682|  1.98k|    if(ps_sei == NULL)
  ------------------
  |  Branch (682:8): [True: 0, False: 1.98k]
  ------------------
  683|      0|    {
  684|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  685|      0|    }
  686|  1.98k|    ps_err = ps_dec->ps_dec_err_status;
  687|       |
  688|  1.98k|    ps_sei->u1_sei_sii_params_present_flag = 0;
  689|  1.98k|    memset(&ps_sei->s_sei_sii_params, 0, sizeof(ps_sei->s_sei_sii_params));
  690|       |
  691|  1.98k|    ps_sei->s_sei_sii_params.u4_sii_sub_layer_idx = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  692|       |
  693|  1.98k|    if(0 == ps_sei->s_sei_sii_params.u4_sii_sub_layer_idx)
  ------------------
  |  Branch (693:8): [True: 1.90k, False: 81]
  ------------------
  694|  1.90k|    {
  695|  1.90k|        ps_sei->s_sei_sii_params.u1_shutter_interval_info_present_flag =
  696|  1.90k|            (UWORD8) ih264d_get_bit_h264(ps_bitstrm);
  697|       |
  698|  1.90k|        if(1 == ps_sei->s_sei_sii_params.u1_shutter_interval_info_present_flag)
  ------------------
  |  Branch (698:12): [True: 837, False: 1.06k]
  ------------------
  699|    837|        {
  700|    837|            ps_sei->s_sei_sii_params.u4_sii_time_scale =
  701|    837|                (UWORD32) ih264d_get_bits_h264(ps_bitstrm, 32);
  702|       |
  703|    837|            ps_sei->s_sei_sii_params.u1_fixed_shutter_interval_within_cvs_flag =
  704|    837|                (UWORD8) ih264d_get_bit_h264(ps_bitstrm);
  705|       |
  706|    837|            if(1 == ps_sei->s_sei_sii_params.u1_fixed_shutter_interval_within_cvs_flag)
  ------------------
  |  Branch (706:16): [True: 356, False: 481]
  ------------------
  707|    356|            {
  708|    356|                ps_sei->s_sei_sii_params.u4_sii_num_units_in_shutter_interval =
  709|    356|                    (UWORD32) ih264d_get_bits_h264(ps_bitstrm, 32);
  710|    356|            }
  711|    481|            else
  712|    481|            {
  713|    481|                ps_sei->s_sei_sii_params.u1_sii_max_sub_layers_minus1 =
  714|    481|                    (UWORD8) ih264d_get_bits_h264(ps_bitstrm, 3);
  715|  1.64k|                for(i = 0; i <= ps_sei->s_sei_sii_params.u1_sii_max_sub_layers_minus1; i++)
  ------------------
  |  Branch (715:28): [True: 1.15k, False: 481]
  ------------------
  716|  1.15k|                {
  717|  1.15k|                    ps_sei->s_sei_sii_params.au4_sub_layer_num_units_in_shutter_interval[i] =
  718|  1.15k|                        (UWORD32) ih264d_get_bits_h264(ps_bitstrm, 32);
  719|  1.15k|                }
  720|    481|            }
  721|    837|        }
  722|  1.90k|    }
  723|       |
  724|  1.98k|    if((ps_sei->s_sei_sii_params.u4_sii_sub_layer_idx >
  ------------------
  |  Branch (724:8): [True: 81, False: 1.90k]
  ------------------
  725|  1.98k|        ps_sei->s_sei_sii_params.u1_sii_max_sub_layers_minus1) &&
  726|     81|       (ps_sei->s_sei_sii_params.u1_fixed_shutter_interval_within_cvs_flag == 0))
  ------------------
  |  Branch (726:8): [True: 81, False: 0]
  ------------------
  727|     81|    {
  728|     81|        return ERROR_INV_SEI_SII_PARAMS;
  729|     81|    }
  730|       |
  731|  1.90k|    ps_sei->u1_sei_sii_params_present_flag = 1;
  732|  1.90k|    return (OK);
  ------------------
  |  |  114|  1.90k|#define OK        0
  ------------------
  733|  1.98k|}
ih264d_parse_fgc:
  758|  2.20k|{
  759|  2.20k|    sei *ps_sei = ps_dec->ps_sei_parse;
  760|  2.20k|    dec_err_status_t *ps_err = ps_dec->ps_dec_err_status;
  761|  2.20k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
  762|  2.20k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
  763|  2.20k|    UWORD32 u4_count;
  764|  2.20k|    WORD32 i4_luma_bitdepth, i4_chroma_bitdepth;
  765|  2.20k|    UWORD32 c, i, j;
  766|  2.20k|    UNUSED(ui4_payload_size);
  ------------------
  |  |   45|  2.20k|#define UNUSED(x) ((void)(x))
  ------------------
  767|       |
  768|  2.20k|    if((ps_dec == NULL) || (ps_sei == NULL))
  ------------------
  |  Branch (768:8): [True: 0, False: 2.20k]
  |  Branch (768:28): [True: 0, False: 2.20k]
  ------------------
  769|      0|    {
  770|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  771|      0|    }
  772|       |
  773|  2.20k|    ps_sei->u1_sei_fgc_params_present_flag = 0;
  774|       |
  775|  2.20k|    ps_sei->s_sei_fgc_params.u1_film_grain_characteristics_cancel_flag =
  776|  2.20k|        (UWORD8) ih264d_get_bit_h264(ps_bitstrm);
  777|       |
  778|  2.20k|    if(0 == ps_sei->s_sei_fgc_params.u1_film_grain_characteristics_cancel_flag)
  ------------------
  |  Branch (778:8): [True: 2.10k, False: 98]
  ------------------
  779|  2.10k|    {
  780|  2.10k|        ps_sei->s_sei_fgc_params.u1_film_grain_model_id =
  781|  2.10k|            (UWORD8) ih264d_get_bits_h264(ps_bitstrm, 2);
  782|  2.10k|        if(ps_sei->s_sei_fgc_params.u1_film_grain_model_id > 1)
  ------------------
  |  Branch (782:12): [True: 69, False: 2.03k]
  ------------------
  783|     69|        {
  784|     69|            return ERROR_INV_SEI_FGC_PARAMS;
  785|     69|        }
  786|  2.03k|        ps_sei->s_sei_fgc_params.u1_separate_colour_description_present_flag =
  787|  2.03k|            (UWORD8) ih264d_get_bit_h264(ps_bitstrm);
  788|       |
  789|  2.03k|        if(ps_sei->s_sei_fgc_params.u1_separate_colour_description_present_flag)
  ------------------
  |  Branch (789:12): [True: 1.89k, False: 139]
  ------------------
  790|  1.89k|        {
  791|  1.89k|            ps_sei->s_sei_fgc_params.u1_film_grain_bit_depth_luma_minus8 =
  792|  1.89k|                (UWORD8) ih264d_get_bits_h264(ps_bitstrm, 3);
  793|       |
  794|  1.89k|            i4_luma_bitdepth = ps_sei->s_sei_fgc_params.u1_film_grain_bit_depth_luma_minus8 + 8;
  795|       |
  796|  1.89k|            ps_sei->s_sei_fgc_params.u1_film_grain_bit_depth_chroma_minus8 =
  797|  1.89k|                (UWORD8) ih264d_get_bits_h264(ps_bitstrm, 3);
  798|       |
  799|  1.89k|            i4_chroma_bitdepth = ps_sei->s_sei_fgc_params.u1_film_grain_bit_depth_chroma_minus8 + 8;
  800|       |
  801|  1.89k|            ps_sei->s_sei_fgc_params.u1_film_grain_full_range_flag =
  802|  1.89k|                (UWORD8) ih264d_get_bit_h264(ps_bitstrm);
  803|       |
  804|  1.89k|            ps_sei->s_sei_fgc_params.u1_film_grain_colour_primaries =
  805|  1.89k|                (UWORD8) ih264d_get_bits_h264(ps_bitstrm, 8);
  806|       |
  807|  1.89k|            ps_sei->s_sei_fgc_params.u1_film_grain_transfer_characteristics =
  808|  1.89k|                (UWORD8) ih264d_get_bits_h264(ps_bitstrm, 8);
  809|       |
  810|  1.89k|            ps_sei->s_sei_fgc_params.u1_film_grain_matrix_coefficients =
  811|  1.89k|                (UWORD8) ih264d_get_bits_h264(ps_bitstrm, 8);
  812|  1.89k|        }
  813|    139|        else
  814|    139|        {
  815|    139|            if(ps_dec->ps_cur_sps == NULL)
  ------------------
  |  Branch (815:16): [True: 71, False: 68]
  ------------------
  816|     71|            {
  817|     71|                return NOT_OK;
  ------------------
  |  |  116|     71|#define NOT_OK    -1
  ------------------
  818|     71|            }
  819|     68|            i4_luma_bitdepth = ps_dec->ps_cur_sps->i4_bit_depth_luma_minus8 + 8;
  820|     68|            i4_chroma_bitdepth = ps_dec->ps_cur_sps->i4_bit_depth_chroma_minus8 + 8;
  821|     68|        }
  822|  1.96k|        ps_sei->s_sei_fgc_params.u1_blending_mode_id = (UWORD8) ih264d_get_bits_h264(ps_bitstrm, 2);
  823|       |
  824|  1.96k|        if(ps_sei->s_sei_fgc_params.u1_blending_mode_id > 1)
  ------------------
  |  Branch (824:12): [True: 103, False: 1.85k]
  ------------------
  825|    103|        {
  826|    103|            return ERROR_INV_SEI_FGC_PARAMS;
  827|    103|        }
  828|       |
  829|  1.85k|        ps_sei->s_sei_fgc_params.u1_log2_scale_factor =
  830|  1.85k|            (UWORD8) ih264d_get_bits_h264(ps_bitstrm, 4);
  831|       |
  832|  7.43k|        for(c = 0; c < SEI_FGC_NUM_COLOUR_COMPONENTS; c++)
  ------------------
  |  |   63|  7.43k|#define SEI_FGC_NUM_COLOUR_COMPONENTS 3
  ------------------
  |  Branch (832:20): [True: 5.57k, False: 1.85k]
  ------------------
  833|  5.57k|        {
  834|  5.57k|            ps_sei->s_sei_fgc_params.au1_comp_model_present_flag[c] =
  835|  5.57k|                (UWORD8) ih264d_get_bit_h264(ps_bitstrm);
  836|  5.57k|        }
  837|       |
  838|  4.87k|        for(c = 0; c < SEI_FGC_NUM_COLOUR_COMPONENTS; c++)
  ------------------
  |  |   63|  4.87k|#define SEI_FGC_NUM_COLOUR_COMPONENTS 3
  ------------------
  |  Branch (838:20): [True: 4.22k, False: 648]
  ------------------
  839|  4.22k|        {
  840|  4.22k|            if(ps_sei->s_sei_fgc_params.au1_comp_model_present_flag[c])
  ------------------
  |  Branch (840:16): [True: 1.30k, False: 2.92k]
  ------------------
  841|  1.30k|            {
  842|  1.30k|                ps_sei->s_sei_fgc_params.au1_num_intensity_intervals_minus1[c] =
  843|  1.30k|                    (UWORD8) ih264d_get_bits_h264(ps_bitstrm, 8);
  844|       |
  845|  1.30k|                ps_sei->s_sei_fgc_params.au1_num_model_values_minus1[c] =
  846|  1.30k|                    (UWORD8) ih264d_get_bits_h264(ps_bitstrm, 3);
  847|       |
  848|  1.30k|                if(ps_sei->s_sei_fgc_params.au1_num_model_values_minus1[c] >
  ------------------
  |  Branch (848:20): [True: 79, False: 1.22k]
  ------------------
  849|  1.30k|                   (SEI_FGC_MAX_NUM_MODEL_VALUES - 1))
  ------------------
  |  |   64|  1.30k|#define SEI_FGC_MAX_NUM_MODEL_VALUES 6
  ------------------
  850|     79|                {
  851|     79|                    return ERROR_INV_SEI_FGC_PARAMS;
  852|     79|                }
  853|       |
  854|  5.10k|                for(i = 0; i <= ps_sei->s_sei_fgc_params.au1_num_intensity_intervals_minus1[c]; i++)
  ------------------
  |  Branch (854:28): [True: 5.01k, False: 90]
  ------------------
  855|  5.01k|                {
  856|       |                    /* Although the fag end of both the NALU and the bitstream buffer */
  857|       |                    /* is being parsed, not all FGC SEI symbols would have been */
  858|       |                    /* decoded semantically. The code below detects this condition */
  859|  5.01k|                    if((ps_bitstrm->u4_ofst + 8 + 8) >= ps_bitstrm->u4_max_ofst)
  ------------------
  |  Branch (859:24): [True: 146, False: 4.87k]
  ------------------
  860|    146|                    {
  861|    146|                        return ERROR_INV_SEI_FGC_PARAMS;
  862|    146|                    }
  863|       |
  864|  4.87k|                    ps_sei->s_sei_fgc_params.au1_intensity_interval_lower_bound[c][i] =
  865|  4.87k|                        (UWORD8) ih264d_get_bits_h264(ps_bitstrm, 8);
  866|       |
  867|  4.87k|                    ps_sei->s_sei_fgc_params.au1_intensity_interval_upper_bound[c][i] =
  868|  4.87k|                        (UWORD8) ih264d_get_bits_h264(ps_bitstrm, 8);
  869|       |
  870|  22.8k|                    for(j = 0; j <= ps_sei->s_sei_fgc_params.au1_num_model_values_minus1[c]; j++)
  ------------------
  |  Branch (870:32): [True: 18.9k, False: 3.88k]
  ------------------
  871|  18.9k|                    {
  872|  18.9k|                        ps_sei->s_sei_fgc_params.ai4_comp_model_value[c][i][j] =
  873|  18.9k|                            (WORD32) ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  874|  18.9k|                        if(0 == ps_sei->s_sei_fgc_params.u1_film_grain_model_id)
  ------------------
  |  Branch (874:28): [True: 6.61k, False: 12.3k]
  ------------------
  875|  6.61k|                        {
  876|  6.61k|                            if((1 == j) || (2 == j))
  ------------------
  |  Branch (876:32): [True: 1.40k, False: 5.20k]
  |  Branch (876:44): [True: 1.15k, False: 4.05k]
  ------------------
  877|  2.56k|                            {
  878|  2.56k|                                if((ps_sei->s_sei_fgc_params.ai4_comp_model_value[c][i][j] < 0) ||
  ------------------
  |  Branch (878:36): [True: 115, False: 2.44k]
  ------------------
  879|  2.44k|                                   (ps_sei->s_sei_fgc_params.ai4_comp_model_value[c][i][j] > 16))
  ------------------
  |  Branch (879:36): [True: 120, False: 2.32k]
  ------------------
  880|    235|                                    return ERROR_INV_SEI_FGC_PARAMS;
  881|  2.56k|                            }
  882|  4.05k|                            else if((3 == j) || (4 == j))
  ------------------
  |  Branch (882:37): [True: 954, False: 3.10k]
  |  Branch (882:49): [True: 596, False: 2.50k]
  ------------------
  883|  1.55k|                            {
  884|  1.55k|                                if((ps_sei->s_sei_fgc_params.ai4_comp_model_value[c][i][j] < 0) ||
  ------------------
  |  Branch (884:36): [True: 146, False: 1.40k]
  ------------------
  885|  1.40k|                                   (ps_sei->s_sei_fgc_params.ai4_comp_model_value[c][i][j] >
  ------------------
  |  Branch (885:36): [True: 116, False: 1.28k]
  ------------------
  886|  1.40k|                                    ps_sei->s_sei_fgc_params.ai4_comp_model_value[c][i][j - 2]))
  887|    262|                                    return ERROR_INV_SEI_FGC_PARAMS;
  888|  1.55k|                            }
  889|  2.50k|                            else
  890|  2.50k|                            {
  891|  2.50k|                                WORD32 max_lim = (c == 0) ? (1 << i4_luma_bitdepth) - 1
  ------------------
  |  Branch (891:50): [True: 695, False: 1.81k]
  ------------------
  892|  2.50k|                                                          : (1 << i4_chroma_bitdepth) - 1;
  893|       |
  894|  2.50k|                                if((ps_sei->s_sei_fgc_params.ai4_comp_model_value[c][i][j] < 0) ||
  ------------------
  |  Branch (894:36): [True: 190, False: 2.31k]
  ------------------
  895|  2.31k|                                   (ps_sei->s_sei_fgc_params.ai4_comp_model_value[c][i][j] >
  ------------------
  |  Branch (895:36): [True: 105, False: 2.21k]
  ------------------
  896|  2.31k|                                    max_lim))
  897|    295|                                {
  898|    295|                                    return ERROR_INV_SEI_FGC_PARAMS;
  899|    295|                                }
  900|  2.50k|                            }
  901|  6.61k|                        }
  902|  12.3k|                        else
  903|  12.3k|                        {
  904|  12.3k|                            WORD32 max_lim = (c == 0) ? (1 << (i4_luma_bitdepth - 1))
  ------------------
  |  Branch (904:46): [True: 2.82k, False: 9.50k]
  ------------------
  905|  12.3k|                                                      : (1 << (i4_chroma_bitdepth - 1));
  906|       |
  907|  12.3k|                            if((ps_sei->s_sei_fgc_params.ai4_comp_model_value[c][i][j] <
  ------------------
  |  Branch (907:32): [True: 72, False: 12.2k]
  ------------------
  908|  12.3k|                                -max_lim) ||
  909|  12.2k|                               (ps_sei->s_sei_fgc_params.ai4_comp_model_value[c][i][j] >= max_lim))
  ------------------
  |  Branch (909:32): [True: 122, False: 12.1k]
  ------------------
  910|    194|                            {
  911|    194|                                return ERROR_INV_SEI_FGC_PARAMS;
  912|    194|                            }
  913|  12.3k|                        }
  914|  18.9k|                    }
  915|  4.87k|                }
  916|  1.22k|            }
  917|  4.22k|        }
  918|       |
  919|    648|        ps_sei->s_sei_fgc_params.u4_film_grain_characteristics_repetition_period =
  920|    648|            (UWORD32) ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  921|       |
  922|    648|        if(ps_sei->s_sei_fgc_params.u4_film_grain_characteristics_repetition_period < 0 ||
  ------------------
  |  Branch (922:12): [True: 0, False: 648]
  ------------------
  923|    648|           ps_sei->s_sei_fgc_params.u4_film_grain_characteristics_repetition_period > 16384)
  ------------------
  |  Branch (923:12): [True: 112, False: 536]
  ------------------
  924|    112|        {
  925|    112|            return ERROR_INV_SEI_FGC_PARAMS;
  926|    112|        }
  927|       |
  928|    536|        ps_sei->u1_sei_fgc_params_present_flag = 1;
  929|    536|    }
  930|       |
  931|    634|    return (OK);
  ------------------
  |  |  114|    634|#define OK        0
  ------------------
  932|  2.20k|}
ih264d_parse_sei_payload:
  961|  74.6k|{
  962|  74.6k|    sei *ps_sei;
  963|  74.6k|    WORD32 i4_status = 0;
  964|  74.6k|    ps_sei = (sei *)ps_dec->ps_sei_parse;
  965|       |
  966|  74.6k|    if(ui4_payload_size == 0)
  ------------------
  |  Branch (966:8): [True: 359, False: 74.2k]
  ------------------
  967|    359|        return -1;
  968|  74.2k|    if(NULL == ps_bitstrm)
  ------------------
  |  Branch (968:8): [True: 0, False: 74.2k]
  ------------------
  969|      0|    {
  970|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  971|      0|    }
  972|       |
  973|  74.2k|    switch(ui4_payload_type)
  974|  74.2k|    {
  975|  1.19k|        case SEI_BUF_PERIOD:
  ------------------
  |  |   48|  1.19k|#define SEI_BUF_PERIOD      0
  ------------------
  |  Branch (975:9): [True: 1.19k, False: 73.0k]
  ------------------
  976|       |
  977|  1.19k|            i4_status = ih264d_parse_buffering_period(&ps_sei->s_buf_period,
  978|  1.19k|                                                      ps_bitstrm, ps_dec);
  979|  1.19k|            break;
  980|  1.29k|        case SEI_PIC_TIMING:
  ------------------
  |  |   49|  1.29k|#define SEI_PIC_TIMING      1
  ------------------
  |  Branch (980:9): [True: 1.29k, False: 72.9k]
  ------------------
  981|  1.29k|            if(NULL == ps_dec->ps_cur_sps)
  ------------------
  |  Branch (981:16): [True: 230, False: 1.06k]
  ------------------
  982|    230|                i4_status = ih264d_flush_bits_h264(ps_bitstrm, (ui4_payload_size << 3));
  983|  1.06k|            else
  984|  1.06k|                i4_status = ih264d_parse_pic_timing(ps_bitstrm, ps_dec,
  985|  1.06k|                                        ui4_payload_size);
  986|  1.29k|            break;
  987|  1.86k|        case SEI_RECOVERY_PT:
  ------------------
  |  |   54|  1.86k|#define SEI_RECOVERY_PT     6
  ------------------
  |  Branch (987:9): [True: 1.86k, False: 72.3k]
  ------------------
  988|  1.86k|            i4_status = ih264d_parse_recovery_point(ps_bitstrm, ps_dec,
  989|  1.86k|                                        ui4_payload_size);
  990|  1.86k|            break;
  991|  1.63k|        case SEI_MASTERING_DISP_COL_VOL:
  ------------------
  |  |   68|  1.63k|#define SEI_MASTERING_DISP_COL_VOL       137
  ------------------
  |  Branch (991:9): [True: 1.63k, False: 72.6k]
  ------------------
  992|       |
  993|  1.63k|            i4_status = ih264d_parse_mdcv(ps_bitstrm, ps_dec,
  994|  1.63k|                                          ui4_payload_size);
  995|  1.63k|            break;
  996|    777|        case SEI_CONTENT_LIGHT_LEVEL_DATA:
  ------------------
  |  |   69|    777|#define SEI_CONTENT_LIGHT_LEVEL_DATA     144
  ------------------
  |  Branch (996:9): [True: 777, False: 73.4k]
  ------------------
  997|       |
  998|    777|            i4_status = ih264d_parse_cll(ps_bitstrm, ps_dec,
  999|    777|                                         ui4_payload_size);
 1000|    777|            break;
 1001|    533|        case SEI_AMBIENT_VIEWING_ENVIRONMENT:
  ------------------
  |  |   70|    533|#define SEI_AMBIENT_VIEWING_ENVIRONMENT  148
  ------------------
  |  Branch (1001:9): [True: 533, False: 73.7k]
  ------------------
 1002|       |
 1003|    533|            i4_status = ih264d_parse_ave(ps_bitstrm, ps_dec,
 1004|    533|                                         ui4_payload_size);
 1005|    533|            break;
 1006|  32.3k|        case SEI_CONTENT_COLOR_VOLUME:
  ------------------
  |  |   71|  32.3k|#define SEI_CONTENT_COLOR_VOLUME         149
  ------------------
  |  Branch (1006:9): [True: 32.3k, False: 41.8k]
  ------------------
 1007|       |
 1008|  32.3k|            i4_status = ih264d_parse_ccv(ps_bitstrm, ps_dec,
 1009|  32.3k|                                         ui4_payload_size);
 1010|  32.3k|            break;
 1011|  1.98k|        case SEI_SHUTTER_INTERVAL_INFO:
  ------------------
  |  |   72|  1.98k|#define SEI_SHUTTER_INTERVAL_INFO        205
  ------------------
  |  Branch (1011:9): [True: 1.98k, False: 72.2k]
  ------------------
 1012|       |
 1013|  1.98k|            i4_status = ih264d_parse_sii(ps_bitstrm, ps_dec, ui4_payload_size);
 1014|  1.98k|            break;
 1015|       |
 1016|  2.20k|        case SEI_FILM_GRAIN_CHARACTERISTICS:
  ------------------
  |  |   67|  2.20k|#define SEI_FILM_GRAIN_CHARACTERISTICS 19
  ------------------
  |  Branch (1016:9): [True: 2.20k, False: 72.0k]
  ------------------
 1017|  2.20k|            i4_status = ih264d_parse_fgc(ps_bitstrm, ps_dec, ui4_payload_size);
 1018|       |
 1019|  2.20k|            break;
 1020|  30.4k|        default:
  ------------------
  |  Branch (1020:9): [True: 30.4k, False: 43.8k]
  ------------------
 1021|  30.4k|            i4_status = ih264d_flush_bits_h264(ps_bitstrm, (ui4_payload_size << 3));
 1022|  30.4k|            break;
 1023|  74.2k|    }
 1024|  74.2k|    return (i4_status);
 1025|  74.2k|}
ih264d_parse_sei_message:
 1051|  8.73k|{
 1052|  8.73k|    UWORD32 ui4_payload_type, ui4_payload_size;
 1053|  8.73k|    UWORD32 u4_bits;
 1054|  8.73k|    WORD32 i4_status = 0;
 1055|       |
 1056|  8.73k|    do
 1057|  76.0k|    {
 1058|  76.0k|        ui4_payload_type = 0;
 1059|       |
 1060|  76.0k|        if(!CHECK_BITS_SUFFICIENT(ps_bitstrm, 8))
  ------------------
  |  |   95|  76.0k|  (ps_bitstrm->u4_ofst + bits_to_read <= ps_bitstrm->u4_max_ofst)
  ------------------
  |  Branch (1060:12): [True: 257, False: 75.7k]
  ------------------
 1061|    257|        {
 1062|    257|            return ERROR_EOB_GETBITS_T;
 1063|    257|        }
 1064|  75.7k|        u4_bits = ih264d_get_bits_h264(ps_bitstrm, 8);
 1065|   781k|        while(0xff == u4_bits && CHECK_BITS_SUFFICIENT(ps_bitstrm, 8))
  ------------------
  |  |   95|   705k|  (ps_bitstrm->u4_ofst + bits_to_read <= ps_bitstrm->u4_max_ofst)
  |  |  ------------------
  |  |  |  Branch (95:3): [True: 705k, False: 38]
  |  |  ------------------
  ------------------
  |  Branch (1065:15): [True: 705k, False: 75.7k]
  ------------------
 1066|   705k|        {
 1067|   705k|            u4_bits = ih264d_get_bits_h264(ps_bitstrm, 8);
 1068|   705k|            ui4_payload_type += 255;
 1069|   705k|        }
 1070|  75.7k|        ui4_payload_type += u4_bits;
 1071|       |
 1072|  75.7k|        ui4_payload_size = 0;
 1073|  75.7k|        if(!CHECK_BITS_SUFFICIENT(ps_bitstrm, 8))
  ------------------
  |  |   95|  75.7k|  (ps_bitstrm->u4_ofst + bits_to_read <= ps_bitstrm->u4_max_ofst)
  ------------------
  |  Branch (1073:12): [True: 243, False: 75.5k]
  ------------------
 1074|    243|        {
 1075|    243|            return ERROR_EOB_GETBITS_T;
 1076|    243|        }
 1077|  75.5k|        u4_bits = ih264d_get_bits_h264(ps_bitstrm, 8);
 1078|   857k|        while(0xff == u4_bits && CHECK_BITS_SUFFICIENT(ps_bitstrm, 8))
  ------------------
  |  |   95|   781k|  (ps_bitstrm->u4_ofst + bits_to_read <= ps_bitstrm->u4_max_ofst)
  |  |  ------------------
  |  |  |  Branch (95:3): [True: 781k, False: 19]
  |  |  ------------------
  ------------------
  |  Branch (1078:15): [True: 781k, False: 75.5k]
  ------------------
 1079|   781k|        {
 1080|   781k|            u4_bits = ih264d_get_bits_h264(ps_bitstrm, 8);
 1081|   781k|            ui4_payload_size += 255;
 1082|   781k|        }
 1083|  75.5k|        ui4_payload_size += u4_bits;
 1084|       |
 1085|  75.5k|        if(!CHECK_BITS_SUFFICIENT(ps_bitstrm, (ui4_payload_size << 3)))
  ------------------
  |  |   95|  75.5k|  (ps_bitstrm->u4_ofst + bits_to_read <= ps_bitstrm->u4_max_ofst)
  ------------------
  |  Branch (1085:12): [True: 928, False: 74.6k]
  ------------------
 1086|    928|        {
 1087|    928|            return ERROR_EOB_GETBITS_T;
 1088|    928|        }
 1089|  74.6k|        i4_status = ih264d_parse_sei_payload(ps_bitstrm, ui4_payload_type,
 1090|  74.6k|                                             ui4_payload_size, ps_dec);
 1091|  74.6k|        if(i4_status != OK)
  ------------------
  |  |  114|  74.6k|#define OK        0
  ------------------
  |  Branch (1091:12): [True: 5.05k, False: 69.5k]
  ------------------
 1092|  5.05k|            return i4_status;
 1093|       |
 1094|  69.5k|        if(ih264d_check_byte_aligned(ps_bitstrm) == 0)
  ------------------
  |  Branch (1094:12): [True: 34.2k, False: 35.3k]
  ------------------
 1095|  34.2k|        {
 1096|  34.2k|            u4_bits = ih264d_get_bit_h264(ps_bitstrm);
 1097|  34.2k|            if(0 == u4_bits)
  ------------------
  |  Branch (1097:16): [True: 31.9k, False: 2.29k]
  ------------------
 1098|  31.9k|            {
 1099|  31.9k|                H264_DEC_DEBUG_PRINT("\nError in parsing SEI message");
  ------------------
  |  |   39|  31.9k|#define H264_DEC_DEBUG_PRINT(...) {}
  ------------------
 1100|  31.9k|            }
 1101|   218k|            while(0 == ih264d_check_byte_aligned(ps_bitstrm)
  ------------------
  |  Branch (1101:19): [True: 185k, False: 33.4k]
  ------------------
 1102|   185k|                            && CHECK_BITS_SUFFICIENT(ps_bitstrm, 1))
  ------------------
  |  |   95|   185k|  (ps_bitstrm->u4_ofst + bits_to_read <= ps_bitstrm->u4_max_ofst)
  |  |  ------------------
  |  |  |  Branch (95:3): [True: 184k, False: 756]
  |  |  ------------------
  ------------------
 1103|   184k|            {
 1104|   184k|                u4_bits = ih264d_get_bit_h264(ps_bitstrm);
 1105|   184k|                if(u4_bits)
  ------------------
  |  Branch (1105:20): [True: 93.4k, False: 91.1k]
  ------------------
 1106|  93.4k|                {
 1107|  93.4k|                    H264_DEC_DEBUG_PRINT("\nError in parsing SEI message");
  ------------------
  |  |   39|  93.4k|#define H264_DEC_DEBUG_PRINT(...) {}
  ------------------
 1108|  93.4k|                }
 1109|   184k|            }
 1110|  34.2k|        }
 1111|  69.5k|    }
 1112|  8.73k|    while(MORE_RBSP_DATA(ps_bitstrm));
  ------------------
  |  |   97|  69.5k|    CHECK_BITS_SUFFICIENT(ps_bitstrm, 1)
  |  |  ------------------
  |  |  |  |   95|  69.5k|  (ps_bitstrm->u4_ofst + bits_to_read <= ps_bitstrm->u4_max_ofst)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (95:3): [True: 67.3k, False: 2.24k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 1113|  2.24k|    return (i4_status);
 1114|  8.73k|}
ih264d_export_sei_mdcv_params:
 1137|   195k|{
 1138|   195k|    if((ps_sei_export == NULL) || (ps_sei == NULL))
  ------------------
  |  Branch (1138:8): [True: 0, False: 195k]
  |  Branch (1138:35): [True: 108k, False: 86.7k]
  ------------------
 1139|   108k|    {
 1140|   108k|        return NOT_OK;
  ------------------
  |  |  116|   108k|#define NOT_OK    -1
  ------------------
 1141|   108k|    }
 1142|       |
 1143|  86.7k|    ps_sei_export->u1_sei_mdcv_params_present_flag = ps_sei->u1_sei_mdcv_params_present_flag;
 1144|  86.7k|    ps_sei_decode_op->u1_sei_mdcv_params_present_flag = ps_sei->u1_sei_mdcv_params_present_flag;
 1145|       |
 1146|  86.7k|    if(0 == ps_sei_export->u1_sei_mdcv_params_present_flag)
  ------------------
  |  Branch (1146:8): [True: 86.7k, False: 40]
  ------------------
 1147|  86.7k|    {
 1148|  86.7k|        memset(&ps_sei_export->s_sei_mdcv_params, 0, sizeof(sei_mdcv_params_t));
 1149|  86.7k|    }
 1150|     40|    else
 1151|     40|    {
 1152|     40|        memcpy(&ps_sei_export->s_sei_mdcv_params, &ps_sei->s_sei_mdcv_params,
 1153|     40|                                                    sizeof(sei_mdcv_params_t));
 1154|     40|    }
 1155|       |
 1156|  86.7k|    return (OK);
  ------------------
  |  |  114|  86.7k|#define OK        0
  ------------------
 1157|   195k|}
ih264d_export_sei_cll_params:
 1180|   195k|{
 1181|   195k|    if((ps_sei_export == NULL) || (ps_sei == NULL))
  ------------------
  |  Branch (1181:8): [True: 0, False: 195k]
  |  Branch (1181:35): [True: 108k, False: 86.7k]
  ------------------
 1182|   108k|    {
 1183|   108k|        return NOT_OK;
  ------------------
  |  |  116|   108k|#define NOT_OK    -1
  ------------------
 1184|   108k|    }
 1185|       |
 1186|  86.7k|    ps_sei_export->u1_sei_cll_params_present_flag = ps_sei->u1_sei_cll_params_present_flag;
 1187|  86.7k|    ps_sei_decode_op->u1_sei_cll_params_present_flag = ps_sei->u1_sei_cll_params_present_flag;
 1188|       |
 1189|  86.7k|    if(0 == ps_sei_export->u1_sei_cll_params_present_flag)
  ------------------
  |  Branch (1189:8): [True: 86.7k, False: 73]
  ------------------
 1190|  86.7k|    {
 1191|  86.7k|        memset(&ps_sei_export->s_sei_cll_params, 0, sizeof(sei_cll_params_t));
 1192|  86.7k|    }
 1193|     73|    else
 1194|     73|    {
 1195|     73|        memcpy(&ps_sei_export->s_sei_cll_params, &ps_sei->s_sei_cll_params,
 1196|     73|                                                    sizeof(sei_cll_params_t));
 1197|     73|    }
 1198|  86.7k|    return (OK);
  ------------------
  |  |  114|  86.7k|#define OK        0
  ------------------
 1199|   195k|}
ih264d_export_sei_ave_params:
 1222|   195k|{
 1223|   195k|    if((ps_sei_export == NULL) || (ps_sei == NULL))
  ------------------
  |  Branch (1223:8): [True: 0, False: 195k]
  |  Branch (1223:35): [True: 108k, False: 86.7k]
  ------------------
 1224|   108k|    {
 1225|   108k|        return NOT_OK;
  ------------------
  |  |  116|   108k|#define NOT_OK    -1
  ------------------
 1226|   108k|    }
 1227|       |
 1228|  86.7k|    ps_sei_export->u1_sei_ave_params_present_flag = ps_sei->u1_sei_ave_params_present_flag;
 1229|  86.7k|    ps_sei_decode_op->u1_sei_ave_params_present_flag = ps_sei->u1_sei_ave_params_present_flag;
 1230|       |
 1231|  86.7k|    if(0 == ps_sei_export->u1_sei_ave_params_present_flag)
  ------------------
  |  Branch (1231:8): [True: 86.7k, False: 68]
  ------------------
 1232|  86.7k|    {
 1233|  86.7k|        memset(&ps_sei_export->s_sei_ave_params, 0, sizeof(sei_ave_params_t));
 1234|  86.7k|    }
 1235|     68|    else
 1236|     68|    {
 1237|     68|        memcpy(&ps_sei_export->s_sei_ave_params, &ps_sei->s_sei_ave_params,
 1238|     68|                                                    sizeof(sei_ave_params_t));
 1239|     68|    }
 1240|       |
 1241|  86.7k|    return (OK);
  ------------------
  |  |  114|  86.7k|#define OK        0
  ------------------
 1242|   195k|}
ih264d_export_sei_ccv_params:
 1265|   195k|{
 1266|   195k|    if((ps_sei_export == NULL) || (ps_sei == NULL))
  ------------------
  |  Branch (1266:8): [True: 0, False: 195k]
  |  Branch (1266:35): [True: 108k, False: 86.7k]
  ------------------
 1267|   108k|    {
 1268|   108k|        return NOT_OK;
  ------------------
  |  |  116|   108k|#define NOT_OK    -1
  ------------------
 1269|   108k|    }
 1270|       |
 1271|  86.7k|    ps_sei_export->u1_sei_ccv_params_present_flag = ps_sei->u1_sei_ccv_params_present_flag;
 1272|  86.7k|    ps_sei_decode_op->u1_sei_ccv_params_present_flag = ps_sei->u1_sei_ccv_params_present_flag;
 1273|       |
 1274|  86.7k|    if(0 == ps_sei_export->u1_sei_ccv_params_present_flag)
  ------------------
  |  Branch (1274:8): [True: 86.6k, False: 167]
  ------------------
 1275|  86.6k|    {
 1276|  86.6k|        memset(&ps_sei_export->s_sei_ccv_params, 0, sizeof(sei_ccv_params_t));
 1277|  86.6k|    }
 1278|    167|    else
 1279|    167|    {
 1280|    167|        memcpy(&ps_sei_export->s_sei_ccv_params, &ps_sei->s_sei_ccv_params,
 1281|    167|                                                    sizeof(sei_ccv_params_t));
 1282|    167|    }
 1283|  86.7k|    return (OK);
  ------------------
  |  |  114|  86.7k|#define OK        0
  ------------------
 1284|   195k|}
ih264d_export_sei_sii_params:
 1307|   195k|{
 1308|   195k|    if((ps_sei_export == NULL) || (ps_sei == NULL))
  ------------------
  |  Branch (1308:8): [True: 0, False: 195k]
  |  Branch (1308:35): [True: 108k, False: 86.7k]
  ------------------
 1309|   108k|    {
 1310|   108k|        return NOT_OK;
  ------------------
  |  |  116|   108k|#define NOT_OK    -1
  ------------------
 1311|   108k|    }
 1312|       |
 1313|  86.7k|    ps_sei_export->u1_sei_sii_params_present_flag = ps_sei->u1_sei_sii_params_present_flag;
 1314|  86.7k|    ps_sei_decode_op->u1_sei_sii_params_present_flag = ps_sei->u1_sei_sii_params_present_flag;
 1315|       |
 1316|  86.7k|    if(0 == ps_sei_export->u1_sei_sii_params_present_flag)
  ------------------
  |  Branch (1316:8): [True: 86.7k, False: 70]
  ------------------
 1317|  86.7k|    {
 1318|  86.7k|        memset(&ps_sei_export->s_sei_sii_params, 0, sizeof(sei_sii_params_t));
 1319|  86.7k|    }
 1320|     70|    else
 1321|     70|    {
 1322|     70|        memcpy(&ps_sei_export->s_sei_sii_params, &ps_sei->s_sei_sii_params,
 1323|     70|               sizeof(sei_sii_params_t));
 1324|     70|    }
 1325|  86.7k|    return (OK);
  ------------------
  |  |  114|  86.7k|#define OK        0
  ------------------
 1326|   195k|}
ih264d_export_sei_fgc_params:
 1349|   195k|{
 1350|   195k|    if((ps_sei_export == NULL) || (ps_sei == NULL))
  ------------------
  |  Branch (1350:8): [True: 0, False: 195k]
  |  Branch (1350:35): [True: 108k, False: 86.7k]
  ------------------
 1351|   108k|    {
 1352|   108k|        return NOT_OK;
  ------------------
  |  |  116|   108k|#define NOT_OK    -1
  ------------------
 1353|   108k|    }
 1354|       |
 1355|  86.7k|    ps_sei_export->u1_sei_fgc_params_present_flag = ps_sei->u1_sei_fgc_params_present_flag;
 1356|  86.7k|    ps_sei_decode_op->u1_sei_fgc_params_present_flag = ps_sei->u1_sei_fgc_params_present_flag;
 1357|       |
 1358|  86.7k|    if(0 == ps_sei_export->u1_sei_fgc_params_present_flag)
  ------------------
  |  Branch (1358:8): [True: 85.7k, False: 1.07k]
  ------------------
 1359|  85.7k|    {
 1360|  85.7k|        memset(&ps_sei_export->s_sei_fgc_params, 0, sizeof(sei_fgc_params_t));
 1361|  85.7k|    }
 1362|  1.07k|    else
 1363|  1.07k|    {
 1364|  1.07k|        memcpy(&ps_sei_export->s_sei_fgc_params, &ps_sei->s_sei_fgc_params,
 1365|  1.07k|               sizeof(sei_fgc_params_t));
 1366|  1.07k|    }
 1367|       |
 1368|  86.7k|    return (OK);
  ------------------
  |  |  114|  86.7k|#define OK        0
  ------------------
 1369|   195k|}

ih264d_check_mb_map_deblk:
  322|   200M|{
  323|   200M|    UWORD32 i = 0;
  324|   200M|    UWORD32 u4_mb_num;
  325|   200M|    UWORD32 u4_cond;
  326|   200M|    volatile UWORD8 *mb_map = ps_dec->pu1_recon_mb_map;
  327|   200M|    const WORD32 i4_cb_qp_idx_ofst =
  328|   200M|                    ps_dec->ps_cur_pps->i1_chroma_qp_index_offset;
  329|   200M|    const WORD32 i4_cr_qp_idx_ofst =
  330|   200M|                    ps_dec->ps_cur_pps->i1_second_chroma_qp_index_offset;
  331|       |
  332|   200M|    UWORD32 u4_wd_y, u4_wd_uv;
  333|   200M|    UWORD8 u1_field_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag;
  334|       |
  335|       |
  336|   200M|    u4_wd_y = ps_dec->u2_frm_wd_y << u1_field_pic_flag;
  337|   200M|    u4_wd_uv = ps_dec->u2_frm_wd_uv << u1_field_pic_flag;
  338|       |
  339|       |
  340|   202M|    for(i = 0; i < deblk_mb_grp; i++)
  ------------------
  |  Branch (340:16): [True: 1.07M, False: 200M]
  ------------------
  341|  1.07M|    {
  342|  1.07M|        WORD32 nop_cnt = 8*128;
  343|  1.07M|        while(u4_check_mb_map == 1)
  ------------------
  |  Branch (343:15): [True: 0, False: 1.07M]
  ------------------
  344|      0|        {
  345|      0|            u4_mb_num = ps_dec->u4_cur_deblk_mb_num;
  346|       |            /*we wait for the right mb because of intra pred data dependency*/
  347|      0|            u4_mb_num = MIN(u4_mb_num + 1, (ps_dec->u4_deblk_mb_y + 1) * ps_dec->u2_frm_wd_in_mbs - 1);
  ------------------
  |  |   61|      0|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  348|      0|            CHECK_MB_MAP_BYTE(u4_mb_num, mb_map, u4_cond);
  ------------------
  |  |   80|      0|#define CHECK_MB_MAP_BYTE(u4_mb_num, mb_map, u4_cond)                                               \
  |  |   81|      0|{                                                                                                   \
  |  |   82|      0|        volatile UWORD8 *pu1_mb_flag;                                                               \
  |  |   83|      0|                                                                                                    \
  |  |   84|      0|        pu1_mb_flag    = (UWORD8 *)mb_map + (u4_mb_num );                                           \
  |  |   85|      0|                                                                                                    \
  |  |   86|      0|        u4_cond = (*pu1_mb_flag);                                                                   \
  |  |   87|      0|}
  ------------------
  349|       |
  350|      0|            if(u4_cond)
  ------------------
  |  Branch (350:16): [True: 0, False: 0]
  ------------------
  351|      0|            {
  352|      0|                break;
  353|      0|            }
  354|      0|            else
  355|      0|            {
  356|      0|                if(nop_cnt > 0)
  ------------------
  |  Branch (356:20): [True: 0, False: 0]
  ------------------
  357|      0|                {
  358|      0|                    nop_cnt -= 128;
  359|      0|                    NOP(128);
  ------------------
  |  |   87|      0|#define NOP(nop_cnt) {UWORD32 nop_i; for (nop_i = 0; nop_i < nop_cnt; nop_i++) asm("nop");}
  |  |  ------------------
  |  |  |  Branch (87:54): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  360|      0|                }
  361|      0|                else
  362|      0|                {
  363|      0|                    nop_cnt = 8*128;
  364|      0|                    ithread_yield();
  365|      0|                }
  366|      0|            }
  367|      0|        }
  368|       |
  369|  1.07M|        ih264d_deblock_mb_nonmbaff(ps_dec, ps_tfr_cxt,
  370|  1.07M|                                   i4_cb_qp_idx_ofst, i4_cr_qp_idx_ofst,
  371|  1.07M|                                    u4_wd_y, u4_wd_uv);
  372|       |
  373|       |
  374|  1.07M|    }
  375|       |
  376|       |
  377|   200M|}

ih264d_parse_tfr_nmb:
   68|  1.11M|{
   69|  1.11M|    WORD32 i, u4_mb_num;
   70|       |
   71|  1.11M|    const UWORD32 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
   72|  1.11M|    UWORD32 u4_n_mb_start;
   73|       |
   74|  1.11M|    UNUSED(u4_mb_idx);
  ------------------
  |  |   45|  1.11M|#define UNUSED(x) ((void)(x))
  ------------------
   75|  1.11M|    UNUSED(u4_num_mbs_next);
  ------------------
  |  |   45|  1.11M|#define UNUSED(x) ((void)(x))
  ------------------
   76|  1.11M|    if(u4_tfr_n_mb)
  ------------------
  |  Branch (76:8): [True: 1.11M, False: 0]
  ------------------
   77|  1.11M|    {
   78|       |
   79|       |
   80|  1.11M|        u4_n_mb_start = (ps_dec->u4_cur_mb_addr + 1) - u4_num_mbs;
   81|       |
   82|       |        // copy into s_frmMbInfo
   83|       |
   84|  1.11M|        u4_mb_num = u4_n_mb_start;
   85|  1.11M|        u4_mb_num = (ps_dec->u4_cur_mb_addr + 1) - u4_num_mbs;
   86|       |
   87|  8.66M|        for(i = 0; i < u4_num_mbs; i++)
  ------------------
  |  Branch (87:20): [True: 7.54M, False: 1.11M]
  ------------------
   88|  7.54M|        {
   89|  7.54M|            UPDATE_SLICE_NUM_MAP(ps_dec->pu2_slice_num_map, u4_mb_num,
  ------------------
  |  |  148|  7.54M|#define UPDATE_SLICE_NUM_MAP(slice_map, u4_mb_number,u2_slice_num)                                                  \
  |  |  149|  7.54M|{                                                                                                   \
  |  |  150|  7.54M|        volatile UWORD16 *pu2_slice_map;                                                               \
  |  |  151|  7.54M|                                                                                                    \
  |  |  152|  7.54M|        pu2_slice_map    = (UWORD16 *)slice_map + (u4_mb_number);                                         \
  |  |  153|  7.54M|        (*pu2_slice_map) = u2_slice_num;                                                              \
  |  |  154|  7.54M|}
  ------------------
   90|  7.54M|                                 ps_dec->u2_cur_slice_num);
   91|  7.54M|            DATA_SYNC();
  ------------------
  |  |  116|  7.54M|#define DATA_SYNC()  __sync_synchronize()
  ------------------
   92|  7.54M|            UPDATE_MB_MAP_MBNUM_BYTE(ps_dec->pu1_dec_mb_map, u4_mb_num);
  ------------------
  |  |  136|  7.54M|#define UPDATE_MB_MAP_MBNUM_BYTE(mb_map, u4_mb_number)                                                  \
  |  |  137|  7.54M|{                                                                                                   \
  |  |  138|  7.54M|        volatile UWORD8 *pu1_mb_flag;                                                                       \
  |  |  139|  7.54M|                                                                                                    \
  |  |  140|  7.54M|        pu1_mb_flag    = (UWORD8 *)mb_map + (u4_mb_number);                                                     \
  |  |  141|  7.54M|        /*                                                                                          \
  |  |  142|  7.54M|         * In case of MbAff, update the mb_map only if the entire MB is done. We can check that     \
  |  |  143|  7.54M|         * by checking if Y is odd, implying that this is the second row in the MbAff MB            \
  |  |  144|  7.54M|         */                                                                                         \
  |  |  145|  7.54M|        (*pu1_mb_flag) = 1;                                                             \
  |  |  146|  7.54M|}
  ------------------
   93|       |
   94|  7.54M|            u4_mb_num++;
   95|  7.54M|        }
   96|       |
   97|       |        /****************************************************************/
   98|       |        /* Check for End Of Row in Next iteration                       */
   99|       |        /****************************************************************/
  100|       |
  101|       |        /****************************************************************/
  102|       |        /* Transfer the Following things                                */
  103|       |        /* N-Mb DeblkParams Data    ( To Ext DeblkParams Buffer )       */
  104|       |        /* N-Mb Recon Data          ( To Ext Frame Buffer )             */
  105|       |        /* N-Mb Intrapredline Data  ( Updated Internally)               */
  106|       |        /* N-Mb MV Data             ( To Ext MV Buffer )                */
  107|       |        /* N-Mb MVTop/TopRight Data ( To Int MV Top Scratch Buffers)    */
  108|       |        /****************************************************************/
  109|       |
  110|       |        /* Swap top and current pointers */
  111|       |
  112|  1.11M|        ps_dec->s_tran_addrecon_parse.pu1_dest_y +=
  113|  1.11M|                        ps_dec->s_tran_addrecon_parse.u4_inc_y[u4_end_of_row];
  114|  1.11M|        ps_dec->s_tran_addrecon_parse.pu1_dest_u +=
  115|  1.11M|                        ps_dec->s_tran_addrecon_parse.u4_inc_uv[u4_end_of_row];
  116|  1.11M|        ps_dec->s_tran_addrecon_parse.pu1_dest_v +=
  117|  1.11M|                        ps_dec->s_tran_addrecon_parse.u4_inc_uv[u4_end_of_row];
  118|       |
  119|  1.11M|        if(u4_end_of_row)
  ------------------
  |  Branch (119:12): [True: 1.09M, False: 12.9k]
  ------------------
  120|  1.09M|        {
  121|  1.09M|            UWORD16 u2_mb_y;
  122|  1.09M|            UWORD32 u4_frame_stride, y_offset;
  123|       |
  124|  1.09M|            ps_dec->ps_top_mb_row = ps_dec->ps_cur_mb_row;
  125|  1.09M|            ps_dec->ps_cur_mb_row += ((ps_dec->u2_frm_wd_in_mbs) << u1_mbaff);
  126|       |
  127|  1.09M|            u2_mb_y = ps_dec->u2_mby + (1 + u1_mbaff);
  128|  1.09M|            u4_frame_stride = ps_dec->u2_frm_wd_y
  129|  1.09M|                            << ps_dec->ps_cur_slice->u1_field_pic_flag;
  130|  1.09M|            y_offset = (u2_mb_y * u4_frame_stride) << 4;
  131|  1.09M|            ps_dec->s_tran_addrecon_parse.pu1_dest_y =
  132|  1.09M|                            ps_dec->s_cur_pic.pu1_buf1 + y_offset;
  133|       |
  134|  1.09M|            u4_frame_stride = ps_dec->u2_frm_wd_uv
  135|  1.09M|                            << ps_dec->ps_cur_slice->u1_field_pic_flag;
  136|  1.09M|            y_offset = (u2_mb_y * u4_frame_stride) << 3;
  137|  1.09M|            ps_dec->s_tran_addrecon_parse.pu1_dest_u =
  138|  1.09M|                            ps_dec->s_cur_pic.pu1_buf2 + y_offset;
  139|  1.09M|            ps_dec->s_tran_addrecon_parse.pu1_dest_v =
  140|  1.09M|                            ps_dec->s_cur_pic.pu1_buf3 + y_offset;
  141|       |
  142|  1.09M|        }
  143|       |
  144|  1.11M|        ps_dec->ps_deblk_mbn += u4_num_mbs;
  145|       |
  146|       |        /*
  147|       |         * The Slice boundary is also a valid condition to transfer. So recalculate
  148|       |         * the Left increment, in case the number of MBs is lesser than the
  149|       |         * N MB value. c_numMbs will be equal to N of N MB if the entire N Mb is
  150|       |         * decoded.
  151|       |         */
  152|  1.11M|        ps_dec->s_tran_addrecon.u2_mv_left_inc = ((u4_num_mbs >> u1_mbaff) - 1)
  153|  1.11M|                        << (4 + u1_mbaff);
  154|  1.11M|        ps_dec->s_tran_addrecon.u2_mv_top_left_inc = (u4_num_mbs << 2) - 1
  155|  1.11M|                        - (u1_mbaff << 2);
  156|       |
  157|       |        /* reassign left MV and cur MV pointers */
  158|  1.11M|        ps_dec->ps_mv_left = ps_dec->ps_mv_cur
  159|  1.11M|                        + ps_dec->s_tran_addrecon.u2_mv_left_inc;
  160|       |
  161|  1.11M|        ps_dec->ps_mv_cur += (u4_num_mbs << 4);
  162|  1.11M|        ps_dec->u4_num_mbs_prev_nmb = u4_num_mbs;
  163|       |
  164|  1.11M|    }
  165|  1.11M|}
ih264d_decode_tfr_nmb:
  171|  1.10M|{
  172|       |
  173|  1.10M|    UWORD32 u1_end_of_row_next;
  174|       |
  175|  1.10M|    const UWORD32 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
  176|       |
  177|       |    /****************************************************************/
  178|       |    /* Check for End Of Row in Next iteration                       */
  179|       |    /****************************************************************/
  180|  1.10M|    u1_end_of_row_next = u4_num_mbs_next &&
  ------------------
  |  Branch (180:26): [True: 16.1k, False: 1.08M]
  ------------------
  181|  16.1k|                        ((u4_num_mbs_next) <= (ps_dec->u4_recon_mb_grp >> u1_mbaff));
  ------------------
  |  Branch (181:25): [True: 16.1k, False: 0]
  ------------------
  182|       |
  183|       |    /****************************************************************/
  184|       |    /* Transfer the Following things                                */
  185|       |    /* N-Mb DeblkParams Data    ( To Ext DeblkParams Buffer )       */
  186|       |    /* N-Mb Recon Data          ( To Ext Frame Buffer )             */
  187|       |    /* N-Mb Intrapredline Data  ( Updated Internally)               */
  188|       |    /* N-Mb MV Data             ( To Ext MV Buffer )                */
  189|       |    /* N-Mb MVTop/TopRight Data ( To Int MV Top Scratch Buffers)    */
  190|       |    /****************************************************************/
  191|  1.10M|    if(u4_end_of_row)
  ------------------
  |  Branch (191:8): [True: 1.08M, False: 16.1k]
  ------------------
  192|  1.08M|    {
  193|  1.08M|        ps_dec->i2_dec_thread_mb_y += (1 << u1_mbaff);
  194|  1.08M|    }
  195|  1.10M|    ih264d_transfer_mb_group_data(ps_dec, u4_num_mbs, u4_end_of_row,
  196|  1.10M|                                  u1_end_of_row_next);
  197|       |
  198|  1.10M|}
ih264d_decode_recon_tfr_nmb_thread:
  204|   913k|{
  205|   913k|    WORD32 i,j;
  206|   913k|    dec_mb_info_t * ps_cur_mb_info;
  207|   913k|    UWORD32 u4_update_mbaff = 0;
  208|   913k|    const UWORD32 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
  209|   913k|    UWORD32 u1_slice_type, u1_B;
  210|   913k|    WORD32 u1_skip_th;
  211|   913k|    UWORD32 u1_ipcm_th;
  212|   913k|    UWORD32 u4_cond;
  213|   913k|    UWORD16 u2_slice_num,u2_cur_dec_mb_num;
  214|   913k|    WORD32 ret;
  215|   913k|    UWORD32 u4_mb_num;
  216|   913k|    WORD32 nop_cnt = 8*128;
  217|   913k|    u1_slice_type = ps_dec->ps_decode_cur_slice->slice_type;
  218|       |
  219|   913k|    u1_B = (u1_slice_type == B_SLICE);
  ------------------
  |  |  369|   913k|#define B_SLICE  1
  ------------------
  220|       |
  221|   913k|    u1_skip_th = ((u1_slice_type != I_SLICE) ?
  ------------------
  |  |  370|   913k|#define I_SLICE  2
  ------------------
  |  Branch (221:19): [True: 906k, False: 6.90k]
  ------------------
  222|   906k|                                    (u1_B ? B_8x8 : PRED_8x8R0) : -1);
  ------------------
  |  |  480|  52.5k|#define B_8x8    22
  ------------------
                                                  (u1_B ? B_8x8 : PRED_8x8R0) : -1);
  ------------------
  |  |  454|   853k|#define PRED_8x8R0  4
  ------------------
  |  Branch (222:38): [True: 52.5k, False: 853k]
  ------------------
  223|       |
  224|   913k|    u1_ipcm_th = ((u1_slice_type != I_SLICE) ? (u1_B ? 23 : 5) : 0);
  ------------------
  |  |  370|   913k|#define I_SLICE  2
  ------------------
  |  Branch (224:19): [True: 906k, False: 6.90k]
  |  Branch (224:49): [True: 52.5k, False: 853k]
  ------------------
  225|       |
  226|   913k|    u2_cur_dec_mb_num = ps_dec->cur_dec_mb_num;
  227|       |
  228|  5.00M|    while(1)
  ------------------
  |  Branch (228:11): [True: 5.00M, Folded]
  ------------------
  229|  5.00M|    {
  230|       |
  231|  5.00M|        UWORD32 u4_max_mb = (UWORD32)(ps_dec->i2_dec_thread_mb_y + (1 << u1_mbaff)) * ps_dec->u2_frm_wd_in_mbs - 1;
  232|  5.00M|        u4_mb_num = u2_cur_dec_mb_num;
  233|       |        /*introducing 1 MB delay*/
  234|  5.00M|        u4_mb_num = MIN(u4_mb_num + u4_num_mbs + 1, u4_max_mb);
  ------------------
  |  |   61|  5.00M|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 0, False: 5.00M]
  |  |  ------------------
  ------------------
  235|       |
  236|  5.00M|        CHECK_MB_MAP_BYTE(u4_mb_num, ps_dec->pu1_dec_mb_map, u4_cond);
  ------------------
  |  |   80|  5.00M|#define CHECK_MB_MAP_BYTE(u4_mb_num, mb_map, u4_cond)                                               \
  |  |   81|  5.00M|{                                                                                                   \
  |  |   82|  5.00M|        volatile UWORD8 *pu1_mb_flag;                                                               \
  |  |   83|  5.00M|                                                                                                    \
  |  |   84|  5.00M|        pu1_mb_flag    = (UWORD8 *)mb_map + (u4_mb_num );                                           \
  |  |   85|  5.00M|                                                                                                    \
  |  |   86|  5.00M|        u4_cond = (*pu1_mb_flag);                                                                   \
  |  |   87|  5.00M|}
  ------------------
  237|  5.00M|        if(u4_cond)
  ------------------
  |  Branch (237:12): [True: 913k, False: 4.08M]
  ------------------
  238|   913k|        {
  239|   913k|            break;
  240|   913k|        }
  241|  4.08M|        else
  242|  4.08M|        {
  243|  4.08M|            if(nop_cnt > 0)
  ------------------
  |  Branch (243:16): [True: 3.62M, False: 461k]
  ------------------
  244|  3.62M|            {
  245|  3.62M|                nop_cnt -= 128;
  246|  3.62M|                NOP(128);
  ------------------
  |  |   87|   467M|#define NOP(nop_cnt) {UWORD32 nop_i; for (nop_i = 0; nop_i < nop_cnt; nop_i++) asm("nop");}
  |  |  ------------------
  |  |  |  Branch (87:54): [True: 464M, False: 3.62M]
  |  |  ------------------
  ------------------
  247|  3.62M|            }
  248|   461k|            else
  249|   461k|            {
  250|   461k|                if(ps_dec->u4_output_present && (2 == ps_dec->u4_num_cores) &&
  ------------------
  |  Branch (250:20): [True: 183k, False: 278k]
  |  Branch (250:49): [True: 183k, False: 0]
  ------------------
  251|   183k|                   (ps_dec->u4_fmt_conv_cur_row < ps_dec->s_disp_frame_info.u4_y_ht))
  ------------------
  |  Branch (251:20): [True: 70.6k, False: 112k]
  ------------------
  252|  70.6k|                {
  253|  70.6k|                    ps_dec->u4_fmt_conv_num_rows =
  254|  70.6k|                                MIN(FMT_CONV_NUM_ROWS,
  ------------------
  |  |   61|  70.6k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 68.4k, False: 2.18k]
  |  |  ------------------
  ------------------
  255|  70.6k|                                    (ps_dec->s_disp_frame_info.u4_y_ht
  256|  70.6k|                                                    - ps_dec->u4_fmt_conv_cur_row));
  257|  70.6k|                    ih264d_format_convert(ps_dec, &(ps_dec->s_disp_op),
  258|  70.6k|                                          ps_dec->u4_fmt_conv_cur_row,
  259|  70.6k|                                          ps_dec->u4_fmt_conv_num_rows);
  260|  70.6k|                    ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows;
  261|  70.6k|                }
  262|   390k|                else
  263|   390k|                {
  264|   390k|                    nop_cnt = 8*128;
  265|   390k|                    ithread_yield();
  266|   390k|                }
  267|   461k|            }
  268|  4.08M|        }
  269|  5.00M|    }
  270|       |    /* N Mb MC Loop */
  271|  7.79M|    for(i = 0; i < u4_num_mbs; i++)
  ------------------
  |  Branch (271:16): [True: 6.88M, False: 904k]
  ------------------
  272|  6.88M|    {
  273|  6.88M|        u4_mb_num = u2_cur_dec_mb_num;
  274|       |
  275|  6.88M|        GET_SLICE_NUM_MAP(ps_dec->pu2_slice_num_map, u2_cur_dec_mb_num,
  ------------------
  |  |  156|  6.88M|#define GET_SLICE_NUM_MAP(slice_map, mb_number,u2_slice_num)                                                  \
  |  |  157|  6.88M|{                                                                                                   \
  |  |  158|  6.88M|        volatile UWORD16 *pu2_slice_map;                                                               \
  |  |  159|  6.88M|                                                                                                    \
  |  |  160|  6.88M|        pu2_slice_map    = (UWORD16 *)slice_map + (mb_number);                                         \
  |  |  161|  6.88M|        u2_slice_num = (*pu2_slice_map) ;                                                               \
  |  |  162|  6.88M|}
  ------------------
  276|  6.88M|                          u2_slice_num);
  277|       |
  278|  6.88M|        if(u2_slice_num != ps_dec->u2_cur_slice_num_dec_thread)
  ------------------
  |  Branch (278:12): [True: 8.63k, False: 6.88M]
  ------------------
  279|  8.63k|        {
  280|  8.63k|            ps_dec->u4_cur_slice_decode_done = 1;
  281|  8.63k|            break;
  282|  8.63k|        }
  283|       |
  284|  6.88M|        ps_cur_mb_info = &ps_dec->ps_frm_mb_info[u2_cur_dec_mb_num];
  285|       |
  286|  6.88M|        ps_dec->u4_dma_buf_idx = 0;
  287|  6.88M|        ps_dec->u4_pred_info_idx = 0;
  288|       |
  289|  6.88M|        if(ps_cur_mb_info->u1_mb_type <= u1_skip_th)
  ------------------
  |  Branch (289:12): [True: 132k, False: 6.74M]
  ------------------
  290|   132k|        {
  291|   132k|            WORD32 pred_cnt = 0;
  292|   132k|            pred_info_pkd_t *ps_pred_pkd;
  293|   132k|            UWORD32 u4_pred_info_pkd_idx;
  294|   132k|            WORD8 i1_pred;
  295|       |
  296|   132k|            u4_pred_info_pkd_idx = ps_cur_mb_info->u4_pred_info_pkd_idx;
  297|       |
  298|   490k|            while(pred_cnt < ps_cur_mb_info->u1_num_pred_parts)
  ------------------
  |  Branch (298:19): [True: 357k, False: 132k]
  ------------------
  299|   357k|            {
  300|   357k|                ps_pred_pkd = ps_dec->ps_pred_pkd + u4_pred_info_pkd_idx;
  301|       |
  302|   357k|                ps_dec->p_form_mb_part_info_thread(ps_pred_pkd,ps_dec,
  303|   357k|                                                   ps_cur_mb_info->u2_mbx,
  304|   357k|                                                   ps_cur_mb_info->u2_mby,
  305|   357k|                                                   (i >> u1_mbaff),
  306|   357k|                                                   ps_cur_mb_info);
  307|       |
  308|   357k|                u4_pred_info_pkd_idx++;
  309|   357k|                pred_cnt++;
  310|   357k|            }
  311|   132k|            ps_dec->p_mc_dec_thread(ps_dec, ps_cur_mb_info);
  312|   132k|        }
  313|  6.74M|        else if(ps_cur_mb_info->u1_mb_type == MB_SKIP)
  ------------------
  |  |  456|  6.74M|#define MB_SKIP     255
  ------------------
  |  Branch (313:17): [True: 6.70M, False: 46.2k]
  ------------------
  314|  6.70M|        {
  315|  6.70M|            WORD32 pred_cnt = 0;
  316|  6.70M|            pred_info_pkd_t *ps_pred_pkd;
  317|  6.70M|            UWORD32 u4_pred_info_pkd_idx;
  318|  6.70M|            WORD8 i1_pred;
  319|       |
  320|  6.70M|            u4_pred_info_pkd_idx = ps_cur_mb_info->u4_pred_info_pkd_idx;
  321|       |
  322|  13.8M|            while(pred_cnt < ps_cur_mb_info->u1_num_pred_parts)
  ------------------
  |  Branch (322:19): [True: 7.17M, False: 6.70M]
  ------------------
  323|  7.17M|            {
  324|  7.17M|                ps_pred_pkd = ps_dec->ps_pred_pkd + u4_pred_info_pkd_idx;
  325|       |
  326|  7.17M|                ps_dec->p_form_mb_part_info_thread(ps_pred_pkd,ps_dec,
  327|  7.17M|                                                   ps_cur_mb_info->u2_mbx,
  328|  7.17M|                                                   ps_cur_mb_info->u2_mby,
  329|  7.17M|                                                   (i >> u1_mbaff),
  330|  7.17M|                                                   ps_cur_mb_info);
  331|       |
  332|  7.17M|                u4_pred_info_pkd_idx++;
  333|  7.17M|                pred_cnt++;
  334|  7.17M|            }
  335|       |            /* Decode MB skip */
  336|  6.70M|            ps_dec->p_mc_dec_thread(ps_dec, ps_cur_mb_info);
  337|  6.70M|        }
  338|       |
  339|  6.88M|        u2_cur_dec_mb_num++;
  340|  6.88M|    }
  341|       |
  342|       |    /* N Mb IQ IT RECON  Loop */
  343|  7.79M|    for(j = 0; j < i; j++)
  ------------------
  |  Branch (343:16): [True: 6.88M, False: 913k]
  ------------------
  344|  6.88M|    {
  345|  6.88M|        ps_cur_mb_info = &ps_dec->ps_frm_mb_info[ps_dec->cur_dec_mb_num];
  346|       |
  347|  6.88M|        if((ps_dec->u4_num_cores == 2) || !ps_dec->i1_recon_in_thread3_flag)
  ------------------
  |  Branch (347:12): [True: 6.88M, False: 0]
  |  Branch (347:43): [True: 0, False: 0]
  ------------------
  348|  6.88M|        {
  349|  6.88M|            if(ps_cur_mb_info->u1_mb_type <= u1_skip_th)
  ------------------
  |  Branch (349:16): [True: 132k, False: 6.74M]
  ------------------
  350|   132k|            {
  351|   132k|                ih264d_process_inter_mb(ps_dec, ps_cur_mb_info, j);
  352|   132k|            }
  353|  6.74M|            else if(ps_cur_mb_info->u1_mb_type != MB_SKIP)
  ------------------
  |  |  456|  6.74M|#define MB_SKIP     255
  ------------------
  |  Branch (353:21): [True: 46.2k, False: 6.70M]
  ------------------
  354|  46.2k|            {
  355|  46.2k|                if((u1_ipcm_th + 25) != ps_cur_mb_info->u1_mb_type)
  ------------------
  |  Branch (355:20): [True: 45.9k, False: 331]
  ------------------
  356|  45.9k|                {
  357|  45.9k|                    ps_cur_mb_info->u1_mb_type -= (u1_skip_th + 1);
  358|  45.9k|                    ih264d_process_intra_mb(ps_dec, ps_cur_mb_info, j);
  359|  45.9k|                }
  360|  46.2k|            }
  361|       |
  362|       |
  363|  6.88M|         if(ps_dec->u4_use_intrapred_line_copy == 1)
  ------------------
  |  Branch (363:13): [True: 6.88M, False: 0]
  ------------------
  364|  6.88M|                ih264d_copy_intra_pred_line(ps_dec, ps_cur_mb_info, j);
  365|  6.88M|        }
  366|       |
  367|  6.88M|        DATA_SYNC();
  ------------------
  |  |  116|  6.88M|#define DATA_SYNC()  __sync_synchronize()
  ------------------
  368|       |
  369|  6.88M|        if(u1_mbaff)
  ------------------
  |  Branch (369:12): [True: 0, False: 6.88M]
  ------------------
  370|      0|        {
  371|      0|            if(u4_update_mbaff)
  ------------------
  |  Branch (371:16): [True: 0, False: 0]
  ------------------
  372|      0|            {
  373|      0|                UWORD32 u4_mb_num = ps_cur_mb_info->u2_mbx
  374|      0|                                + ps_dec->u2_frm_wd_in_mbs
  375|      0|                                                * (ps_cur_mb_info->u2_mby >> 1);
  376|      0|                UPDATE_MB_MAP_MBNUM_BYTE(ps_dec->pu1_recon_mb_map, u4_mb_num);
  ------------------
  |  |  136|      0|#define UPDATE_MB_MAP_MBNUM_BYTE(mb_map, u4_mb_number)                                                  \
  |  |  137|      0|{                                                                                                   \
  |  |  138|      0|        volatile UWORD8 *pu1_mb_flag;                                                                       \
  |  |  139|      0|                                                                                                    \
  |  |  140|      0|        pu1_mb_flag    = (UWORD8 *)mb_map + (u4_mb_number);                                                     \
  |  |  141|      0|        /*                                                                                          \
  |  |  142|      0|         * In case of MbAff, update the mb_map only if the entire MB is done. We can check that     \
  |  |  143|      0|         * by checking if Y is odd, implying that this is the second row in the MbAff MB            \
  |  |  144|      0|         */                                                                                         \
  |  |  145|      0|        (*pu1_mb_flag) = 1;                                                             \
  |  |  146|      0|}
  ------------------
  377|      0|                u4_update_mbaff = 0;
  378|      0|            }
  379|      0|            else
  380|      0|            {
  381|      0|                u4_update_mbaff = 1;
  382|      0|            }
  383|      0|        }
  384|  6.88M|        else
  385|  6.88M|        {
  386|  6.88M|            UWORD32 u4_mb_num = ps_cur_mb_info->u2_mbx
  387|  6.88M|                            + ps_dec->u2_frm_wd_in_mbs * ps_cur_mb_info->u2_mby;
  388|  6.88M|            UPDATE_MB_MAP_MBNUM_BYTE(ps_dec->pu1_recon_mb_map, u4_mb_num);
  ------------------
  |  |  136|  6.88M|#define UPDATE_MB_MAP_MBNUM_BYTE(mb_map, u4_mb_number)                                                  \
  |  |  137|  6.88M|{                                                                                                   \
  |  |  138|  6.88M|        volatile UWORD8 *pu1_mb_flag;                                                                       \
  |  |  139|  6.88M|                                                                                                    \
  |  |  140|  6.88M|        pu1_mb_flag    = (UWORD8 *)mb_map + (u4_mb_number);                                                     \
  |  |  141|  6.88M|        /*                                                                                          \
  |  |  142|  6.88M|         * In case of MbAff, update the mb_map only if the entire MB is done. We can check that     \
  |  |  143|  6.88M|         * by checking if Y is odd, implying that this is the second row in the MbAff MB            \
  |  |  144|  6.88M|         */                                                                                         \
  |  |  145|  6.88M|        (*pu1_mb_flag) = 1;                                                             \
  |  |  146|  6.88M|}
  ------------------
  389|  6.88M|        }
  390|  6.88M|        ps_dec->cur_dec_mb_num++;
  391|  6.88M|     }
  392|       |
  393|       |    /*N MB deblocking*/
  394|   913k|    if(ps_dec->u4_nmb_deblk == 1)
  ------------------
  |  Branch (394:8): [True: 0, False: 913k]
  ------------------
  395|      0|    {
  396|      0|        UWORD32 u4_wd_y, u4_wd_uv;
  397|      0|        tfr_ctxt_t *ps_tfr_cxt = &(ps_dec->s_tran_addrecon);
  398|      0|        UWORD8 u1_field_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag;
  399|      0|        const WORD32 i4_cb_qp_idx_ofst =
  400|      0|                       ps_dec->ps_cur_pps->i1_chroma_qp_index_offset;
  401|      0|        const WORD32 i4_cr_qp_idx_ofst =
  402|      0|                       ps_dec->ps_cur_pps->i1_second_chroma_qp_index_offset;
  403|       |
  404|      0|        u4_wd_y = ps_dec->u2_frm_wd_y << u1_field_pic_flag;
  405|      0|        u4_wd_uv = ps_dec->u2_frm_wd_uv << u1_field_pic_flag;
  406|       |
  407|      0|        ps_cur_mb_info = &ps_dec->ps_frm_mb_info[ps_dec->u4_cur_deblk_mb_num];
  408|       |
  409|      0|        ps_dec->u4_deblk_mb_x = ps_cur_mb_info->u2_mbx;
  410|      0|        ps_dec->u4_deblk_mb_y = ps_cur_mb_info->u2_mby;
  411|       |
  412|       |
  413|      0|        for(j = 0; j < i; j++)
  ------------------
  |  Branch (413:20): [True: 0, False: 0]
  ------------------
  414|      0|        {
  415|      0|            ih264d_deblock_mb_nonmbaff(ps_dec, ps_tfr_cxt,
  416|      0|                                       i4_cb_qp_idx_ofst, i4_cr_qp_idx_ofst,
  417|      0|                                        u4_wd_y, u4_wd_uv);
  418|       |
  419|      0|        }
  420|      0|    }
  421|       |
  422|       |    /*handle the last mb in picture case*/
  423|   913k|    if(ps_dec->cur_dec_mb_num > ps_dec->ps_cur_sps->u4_max_mb_addr)
  ------------------
  |  Branch (423:8): [True: 29.5k, False: 883k]
  ------------------
  424|  29.5k|        ps_dec->u4_cur_slice_decode_done = 1;
  425|       |
  426|   913k|    if(i != u4_num_mbs)
  ------------------
  |  Branch (426:8): [True: 8.63k, False: 904k]
  ------------------
  427|  8.63k|    {
  428|  8.63k|        u4_end_of_row = 0;
  429|       |        /*Number of MB's left in row*/
  430|  8.63k|        u4_num_mbs_next = u4_num_mbs_next + ((u4_num_mbs - i) >> u1_mbaff);
  431|  8.63k|    }
  432|       |
  433|   913k|    ih264d_decode_tfr_nmb(ps_dec, (i), u4_num_mbs_next, u4_end_of_row);
  434|       |
  435|   913k|    return OK;
  ------------------
  |  |  114|   913k|#define OK        0
  ------------------
  436|   913k|}
ih264d_decode_slice_thread:
  439|  38.1k|{
  440|  38.1k|    UWORD32 u4_num_mbs_next, u4_num_mbsleft, u4_end_of_row = 0;
  441|  38.1k|    const UWORD32 i2_pic_wdin_mbs = ps_dec->u2_frm_wd_in_mbs;
  442|  38.1k|    UWORD32 u4_mbaff, u4_num_mbs;
  443|       |
  444|  38.1k|    UWORD16 u2_first_mb_in_slice;
  445|  38.1k|    UWORD16 i16_mb_x, i16_mb_y;
  446|  38.1k|    UWORD8 u1_field_pic;
  447|  38.1k|    UWORD32 u4_frame_stride, x_offset, y_offset;
  448|  38.1k|    WORD32 ret;
  449|       |
  450|  38.1k|    tfr_ctxt_t *ps_trns_addr;
  451|       |
  452|       |    /*check for mb map of first mb in slice to ensure slice header is parsed*/
  453|   701k|    while(1)
  ------------------
  |  Branch (453:11): [True: 701k, Folded]
  ------------------
  454|   701k|    {
  455|   701k|        UWORD32 u4_mb_num = ps_dec->cur_dec_mb_num;
  456|   701k|        UWORD32 u4_cond = 0;
  457|   701k|        WORD32 nop_cnt = 8 * 128;
  458|   701k|        CHECK_MB_MAP_BYTE(u4_mb_num, ps_dec->pu1_dec_mb_map, u4_cond);
  ------------------
  |  |   80|   701k|#define CHECK_MB_MAP_BYTE(u4_mb_num, mb_map, u4_cond)                                               \
  |  |   81|   701k|{                                                                                                   \
  |  |   82|   701k|        volatile UWORD8 *pu1_mb_flag;                                                               \
  |  |   83|   701k|                                                                                                    \
  |  |   84|   701k|        pu1_mb_flag    = (UWORD8 *)mb_map + (u4_mb_num );                                           \
  |  |   85|   701k|                                                                                                    \
  |  |   86|   701k|        u4_cond = (*pu1_mb_flag);                                                                   \
  |  |   87|   701k|}
  ------------------
  459|   701k|        if(u4_cond)
  ------------------
  |  Branch (459:12): [True: 38.1k, False: 662k]
  ------------------
  460|  38.1k|        {
  461|  38.1k|            break;
  462|  38.1k|        }
  463|   662k|        else
  464|   662k|        {
  465|   662k|            if(nop_cnt > 0)
  ------------------
  |  Branch (465:16): [True: 662k, False: 0]
  ------------------
  466|   662k|            {
  467|   662k|                nop_cnt -= 128;
  468|   662k|                NOP(128);
  ------------------
  |  |   87|  85.5M|#define NOP(nop_cnt) {UWORD32 nop_i; for (nop_i = 0; nop_i < nop_cnt; nop_i++) asm("nop");}
  |  |  ------------------
  |  |  |  Branch (87:54): [True: 84.8M, False: 662k]
  |  |  ------------------
  ------------------
  469|   662k|            }
  470|      0|            else if(ps_dec->u4_output_present && (2 == ps_dec->u4_num_cores) &&
  ------------------
  |  Branch (470:21): [True: 0, False: 0]
  |  Branch (470:50): [True: 0, False: 0]
  ------------------
  471|      0|               (ps_dec->u4_fmt_conv_cur_row < ps_dec->s_disp_frame_info.u4_y_ht))
  ------------------
  |  Branch (471:16): [True: 0, False: 0]
  ------------------
  472|      0|            {
  473|      0|                ps_dec->u4_fmt_conv_num_rows =
  474|      0|                                MIN(FMT_CONV_NUM_ROWS,
  ------------------
  |  |   61|      0|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  475|      0|                                    (ps_dec->s_disp_frame_info.u4_y_ht
  476|      0|                                                    - ps_dec->u4_fmt_conv_cur_row));
  477|      0|                ih264d_format_convert(ps_dec, &(ps_dec->s_disp_op),
  478|      0|                                      ps_dec->u4_fmt_conv_cur_row,
  479|      0|                                      ps_dec->u4_fmt_conv_num_rows);
  480|      0|                ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows;
  481|      0|            }
  482|      0|            else
  483|      0|            {
  484|      0|                nop_cnt = 8*128;
  485|      0|                ithread_yield();
  486|      0|            }
  487|   662k|            DEBUG_THREADS_PRINTF("waiting for mb mapcur_dec_mb_num = %d,ps_dec->u4_cur_mb_addr  = %d\n",u2_cur_dec_mb_num,
  488|   662k|                            ps_dec->u4_cur_mb_addr);
  489|       |
  490|   662k|        }
  491|   701k|    }
  492|       |
  493|       |
  494|       |
  495|  38.1k|    u4_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
  496|       |
  497|  38.1k|    u2_first_mb_in_slice = ps_dec->ps_decode_cur_slice->u4_first_mb_in_slice;
  498|       |
  499|  38.1k|    i16_mb_x = MOD(u2_first_mb_in_slice, i2_pic_wdin_mbs);
  ------------------
  |  |   64|  38.1k|#define MOD(x,y) ((x)%(y))
  ------------------
  500|  38.1k|    i16_mb_y = DIV(u2_first_mb_in_slice, i2_pic_wdin_mbs);
  ------------------
  |  |   65|  38.1k|#define DIV(x,y) ((x)/(y))
  ------------------
  501|  38.1k|    i16_mb_y <<= u4_mbaff;
  502|  38.1k|    ps_dec->i2_dec_thread_mb_y = i16_mb_y;
  503|       |
  504|       |
  505|  38.1k|    ps_dec->cur_dec_mb_num = u2_first_mb_in_slice << u4_mbaff;
  506|       |
  507|  38.1k|    if((ps_dec->u4_num_cores == 2) || !ps_dec->i1_recon_in_thread3_flag)
  ------------------
  |  Branch (507:8): [True: 38.1k, False: 0]
  |  Branch (507:39): [True: 0, False: 0]
  ------------------
  508|  38.1k|    {
  509|  38.1k|        ps_dec->pv_proc_tu_coeff_data =
  510|  38.1k|                (void *) ps_dec->ps_decode_cur_slice->pv_tu_coeff_data_start;
  511|  38.1k|    }
  512|       |
  513|       |    // recalculate recon pointers
  514|  38.1k|    u1_field_pic = ps_dec->ps_cur_slice->u1_field_pic_flag;
  515|  38.1k|    u4_frame_stride = ps_dec->u2_frm_wd_y << u1_field_pic;
  516|  38.1k|    x_offset = i16_mb_x << 4;
  517|  38.1k|    y_offset = (i16_mb_y * u4_frame_stride) << 4;
  518|       |
  519|  38.1k|    ps_trns_addr = &(ps_dec->s_tran_addrecon);
  520|       |
  521|  38.1k|    ps_trns_addr->pu1_dest_y = ps_dec->s_cur_pic.pu1_buf1 + x_offset + y_offset;
  522|       |
  523|  38.1k|    u4_frame_stride = ps_dec->u2_frm_wd_uv << u1_field_pic;
  524|  38.1k|    x_offset >>= 1;
  525|  38.1k|    y_offset = (i16_mb_y * u4_frame_stride) << 3;
  526|       |
  527|  38.1k|    x_offset *= YUV420SP_FACTOR;
  ------------------
  |  |  119|  38.1k|#define YUV420SP_FACTOR 2
  ------------------
  528|       |
  529|  38.1k|    ps_trns_addr->pu1_dest_u = ps_dec->s_cur_pic.pu1_buf2 + x_offset + y_offset;
  530|  38.1k|    ps_trns_addr->pu1_dest_v = ps_dec->s_cur_pic.pu1_buf3 + x_offset + y_offset;
  531|       |
  532|  38.1k|    ps_trns_addr->pu1_mb_y = ps_trns_addr->pu1_dest_y;
  533|  38.1k|    ps_trns_addr->pu1_mb_u = ps_trns_addr->pu1_dest_u;
  534|  38.1k|    ps_trns_addr->pu1_mb_v = ps_trns_addr->pu1_dest_v;
  535|       |
  536|       |
  537|       |    /* Initialise MC and formMbPartInfo fn ptrs one time based on profile_idc */
  538|  38.1k|    {
  539|  38.1k|        ps_dec->p_mc_dec_thread = ih264d_motion_compensate_bp;
  540|  38.1k|        ps_dec->p_form_mb_part_info_thread = ih264d_form_mb_part_info_bp;
  541|  38.1k|    }
  542|  38.1k|    {
  543|  38.1k|        UWORD8 uc_nofield_nombaff;
  544|  38.1k|        uc_nofield_nombaff = ((ps_dec->ps_cur_slice->u1_field_pic_flag == 0)
  ------------------
  |  Branch (544:31): [True: 38.1k, False: 0]
  ------------------
  545|  38.1k|                        && (ps_dec->ps_cur_slice->u1_mbaff_frame_flag == 0)
  ------------------
  |  Branch (545:28): [True: 38.1k, False: 0]
  ------------------
  546|  38.1k|                        && (ps_dec->ps_decode_cur_slice->slice_type != B_SLICE)
  ------------------
  |  |  369|  38.1k|#define B_SLICE  1
  ------------------
  |  Branch (546:28): [True: 31.4k, False: 6.72k]
  ------------------
  547|  31.4k|                        && (ps_dec->ps_cur_pps->u1_wted_pred_flag == 0));
  ------------------
  |  Branch (547:28): [True: 11.9k, False: 19.5k]
  ------------------
  548|       |
  549|  38.1k|        if(uc_nofield_nombaff == 0)
  ------------------
  |  Branch (549:12): [True: 26.2k, False: 11.9k]
  ------------------
  550|  26.2k|        {
  551|  26.2k|            ps_dec->p_mc_dec_thread = ih264d_motion_compensate_mp;
  552|  26.2k|            ps_dec->p_form_mb_part_info_thread = ih264d_form_mb_part_info_mp;
  553|  26.2k|        }
  554|       |
  555|  38.1k|    }
  556|       |
  557|  38.1k|    ps_dec->u4_cur_slice_decode_done = 0;
  558|       |
  559|       |
  560|   951k|    while(ps_dec->u4_cur_slice_decode_done != 1)
  ------------------
  |  Branch (560:11): [True: 913k, False: 38.1k]
  ------------------
  561|   913k|    {
  562|       |
  563|   913k|        u4_num_mbsleft = ((i2_pic_wdin_mbs - i16_mb_x) << u4_mbaff);
  564|       |
  565|   913k|        if(u4_num_mbsleft <= ps_dec->u4_recon_mb_grp)
  ------------------
  |  Branch (565:12): [True: 913k, False: 0]
  ------------------
  566|   913k|        {
  567|   913k|            u4_num_mbs = u4_num_mbsleft;
  568|       |
  569|       |            /*Indicate number of mb's left in a row*/
  570|   913k|            u4_num_mbs_next = 0;
  571|   913k|            u4_end_of_row = 1;
  572|   913k|            i16_mb_x = 0;
  573|   913k|        }
  574|      0|        else
  575|      0|        {
  576|      0|            u4_num_mbs = ps_dec->u4_recon_mb_grp;
  577|       |
  578|       |            /*Indicate number of mb's left in a row*/
  579|      0|            u4_num_mbs_next = i2_pic_wdin_mbs - i16_mb_x
  580|      0|                            - (ps_dec->u4_recon_mb_grp >> u4_mbaff);
  581|      0|            i16_mb_x += (u4_num_mbs >> u4_mbaff);
  582|      0|            u4_end_of_row = 0;
  583|       |
  584|      0|        }
  585|   913k|        ret = ih264d_decode_recon_tfr_nmb_thread(ps_dec, u4_num_mbs, u4_num_mbs_next,
  586|   913k|                                           u4_end_of_row);
  587|   913k|        if(ret != OK)
  ------------------
  |  |  114|   913k|#define OK        0
  ------------------
  |  Branch (587:12): [True: 0, False: 913k]
  ------------------
  588|      0|            return ret;
  589|   913k|    }
  590|  38.1k|    return OK;
  ------------------
  |  |  114|  38.1k|#define OK        0
  ------------------
  591|  38.1k|}
ih264d_decode_picture_thread:
  594|  29.5k|{
  595|  29.5k|    ithread_set_name("ih264d_decode_picture_thread");
  596|       |
  597|  29.5k|    while(1)
  ------------------
  |  Branch (597:11): [True: 29.5k, Folded]
  ------------------
  598|  29.5k|    {
  599|  29.5k|        WORD32 ret;
  600|  29.5k|        if(ps_dec->i4_threads_active)
  ------------------
  |  Branch (600:12): [True: 0, False: 29.5k]
  ------------------
  601|      0|        {
  602|      0|            ret = ithread_mutex_lock(ps_dec->apv_proc_start_mutex[0]);
  603|      0|            if(OK != ret)
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
  |  Branch (603:16): [True: 0, False: 0]
  ------------------
  604|      0|                break;
  605|       |
  606|      0|            while(ps_dec->ai4_process_start[0] != PROC_START)
  ------------------
  |  Branch (606:19): [True: 0, False: 0]
  ------------------
  607|      0|            {
  608|      0|                ithread_cond_wait(ps_dec->apv_proc_start_condition[0],
  609|      0|                                  ps_dec->apv_proc_start_mutex[0]);
  610|      0|            }
  611|      0|            ps_dec->ai4_process_start[0] = PROC_IN_PROGRESS;
  612|       |
  613|      0|            ret = ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[0]);
  614|      0|            if(OK != ret || ps_dec->i4_break_threads == 1)
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
  |  Branch (614:16): [True: 0, False: 0]
  |  Branch (614:29): [True: 0, False: 0]
  ------------------
  615|      0|                break;
  616|      0|        }
  617|  38.1k|        while(1)
  ------------------
  |  Branch (617:15): [True: 38.1k, Folded]
  ------------------
  618|  38.1k|        {
  619|       |            /*Complete all writes before processing next slice*/
  620|       |
  621|  38.1k|            DEBUG_THREADS_PRINTF(" Entering decode slice\n");
  622|       |
  623|  38.1k|            ih264d_decode_slice_thread(ps_dec);
  624|  38.1k|            DEBUG_THREADS_PRINTF(" Exit  ih264d_decode_slice_thread \n");
  625|       |
  626|       |
  627|  38.1k|            if(ps_dec->cur_dec_mb_num
  ------------------
  |  Branch (627:16): [True: 29.5k, False: 8.63k]
  ------------------
  628|  38.1k|                            > ps_dec->ps_cur_sps->u4_max_mb_addr)
  629|  29.5k|            {
  630|       |                /*Last slice in frame*/
  631|  29.5k|                break;
  632|  29.5k|            }
  633|  8.63k|            else
  634|  8.63k|            {
  635|  8.63k|                ps_dec->ps_decode_cur_slice++;
  636|  8.63k|                ps_dec->u2_cur_slice_num_dec_thread++;
  637|  8.63k|            }
  638|       |
  639|  38.1k|        }
  640|  29.5k|        if(ps_dec->u4_output_present && (2 == ps_dec->u4_num_cores) &&
  ------------------
  |  Branch (640:12): [True: 6.00k, False: 23.5k]
  |  Branch (640:41): [True: 6.00k, False: 0]
  ------------------
  641|  6.00k|            (ps_dec->u4_fmt_conv_cur_row < ps_dec->s_disp_frame_info.u4_y_ht))
  ------------------
  |  Branch (641:13): [True: 3.81k, False: 2.18k]
  ------------------
  642|  3.81k|        {
  643|  3.81k|            ps_dec->u4_fmt_conv_num_rows =
  644|  3.81k|                            (ps_dec->s_disp_frame_info.u4_y_ht
  645|  3.81k|                                            - ps_dec->u4_fmt_conv_cur_row);
  646|  3.81k|            ih264d_format_convert(ps_dec, &(ps_dec->s_disp_op),
  647|  3.81k|                                ps_dec->u4_fmt_conv_cur_row,
  648|  3.81k|                                ps_dec->u4_fmt_conv_num_rows);
  649|  3.81k|            ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows;
  650|  3.81k|        }
  651|       |
  652|  29.5k|        if(ps_dec->i4_threads_active)
  ------------------
  |  Branch (652:12): [True: 0, False: 29.5k]
  ------------------
  653|      0|        {
  654|      0|            ret = ithread_mutex_lock(ps_dec->apv_proc_done_mutex[0]);
  655|      0|            if(OK != ret)
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
  |  Branch (655:16): [True: 0, False: 0]
  ------------------
  656|      0|                break;
  657|       |
  658|      0|            ps_dec->ai4_process_done[0] = PROC_DONE;
  659|      0|            ithread_cond_signal(ps_dec->apv_proc_done_condition[0]);
  660|       |
  661|      0|            ret = ithread_mutex_unlock(ps_dec->apv_proc_done_mutex[0]);
  662|      0|            if(OK != ret)
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
  |  Branch (662:16): [True: 0, False: 0]
  ------------------
  663|      0|                break;
  664|      0|        }
  665|  29.5k|        else
  666|  29.5k|        {
  667|  29.5k|            break;
  668|  29.5k|        }
  669|  29.5k|    }
  670|  29.5k|}
ih264d_signal_decode_thread:
  673|  82.6k|{
  674|  82.6k|    if(ps_dec->u4_dec_thread_created == 1)
  ------------------
  |  Branch (674:8): [True: 50.8k, False: 31.7k]
  ------------------
  675|  50.8k|    {
  676|  50.8k|        if(ps_dec->i4_threads_active)
  ------------------
  |  Branch (676:12): [True: 0, False: 50.8k]
  ------------------
  677|      0|        {
  678|      0|            proc_state_t i4_process_state;
  679|      0|            ithread_mutex_lock(ps_dec->apv_proc_start_mutex[0]);
  680|      0|            i4_process_state = ps_dec->ai4_process_start[0];
  681|      0|            ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[0]);
  682|       |
  683|       |            // only wait if the thread has started decoding
  684|      0|            if(i4_process_state != PROC_INIT)
  ------------------
  |  Branch (684:16): [True: 0, False: 0]
  ------------------
  685|      0|            {
  686|      0|                ithread_mutex_lock(ps_dec->apv_proc_done_mutex[0]);
  687|       |
  688|      0|                while(ps_dec->ai4_process_done[0] != PROC_DONE)
  ------------------
  |  Branch (688:23): [True: 0, False: 0]
  ------------------
  689|      0|                {
  690|      0|                    ithread_cond_wait(ps_dec->apv_proc_done_condition[0],
  691|      0|                                        ps_dec->apv_proc_done_mutex[0]);
  692|      0|                }
  693|      0|                ps_dec->ai4_process_done[0] = PROC_INIT;
  694|      0|                ithread_mutex_unlock(ps_dec->apv_proc_done_mutex[0]);
  695|      0|            }
  696|      0|        }
  697|  50.8k|        else
  698|  50.8k|        {
  699|       |            ithread_join(ps_dec->pv_dec_thread_handle, NULL);
  700|  50.8k|            ps_dec->u4_dec_thread_created = 0;
  701|  50.8k|        }
  702|  50.8k|    }
  703|  82.6k|}

ih264d_is_end_of_pic:
   88|  4.09k|{
   89|  4.09k|    WORD8 i1_is_end_of_pic;
   90|  4.09k|    WORD8 a, b, c, d, e, f, g, h;
   91|       |
   92|  4.09k|    a = b = c = d = e = f = g = h = 0;
   93|  4.09k|    a = (ps_prev_slice->u2_frame_num != u2_frame_num);
   94|  4.09k|    b = (ps_prev_slice->u1_field_pic_flag != u1_field_pic_flag);
   95|  4.09k|    if(u1_field_pic_flag && ps_prev_slice->u1_field_pic_flag)
  ------------------
  |  Branch (95:8): [True: 0, False: 4.09k]
  |  Branch (95:29): [True: 0, False: 0]
  ------------------
   96|      0|        c = (u1_bottom_field_flag != ps_prev_slice->u1_bottom_field_flag);
   97|  4.09k|    d =
   98|  4.09k|                    (u1_nal_ref_idc == 0 && ps_prev_slice->u1_nal_ref_idc != 0)
  ------------------
  |  Branch (98:22): [True: 1.61k, False: 2.48k]
  |  Branch (98:45): [True: 0, False: 1.61k]
  ------------------
   99|  4.09k|                                    || (u1_nal_ref_idc != 0
  ------------------
  |  Branch (99:41): [True: 2.48k, False: 1.61k]
  ------------------
  100|  2.48k|                                                    && ps_prev_slice->u1_nal_ref_idc
  ------------------
  |  Branch (100:56): [True: 0, False: 2.48k]
  ------------------
  101|  2.48k|                                                                    == 0);
  102|  4.09k|    if(!a)
  ------------------
  |  Branch (102:8): [True: 3.98k, False: 110]
  ------------------
  103|  3.98k|    {
  104|  3.98k|        if((u1_pic_order_cnt_type == 0)
  ------------------
  |  Branch (104:12): [True: 3.35k, False: 630]
  ------------------
  105|  3.35k|                        && (ps_prev_slice->u1_pic_order_cnt_type == 0))
  ------------------
  |  Branch (105:28): [True: 3.35k, False: 0]
  ------------------
  106|  3.35k|        {
  107|  3.35k|            e =
  108|  3.35k|                            ((ps_cur_poc->i4_pic_order_cnt_lsb
  ------------------
  |  Branch (108:30): [True: 67, False: 3.28k]
  ------------------
  109|  3.35k|                                            != ps_prev_poc->i4_pic_order_cnt_lsb)
  110|  3.28k|                                            || (ps_cur_poc->i4_delta_pic_order_cnt_bottom
  ------------------
  |  Branch (110:48): [True: 0, False: 3.28k]
  ------------------
  111|  3.28k|                                                            != ps_prev_poc->i4_delta_pic_order_cnt_bottom));
  112|  3.35k|        }
  113|       |
  114|  3.98k|        if((u1_pic_order_cnt_type == 1)
  ------------------
  |  Branch (114:12): [True: 371, False: 3.61k]
  ------------------
  115|    371|                        && (ps_prev_slice->u1_pic_order_cnt_type == 1))
  ------------------
  |  Branch (115:28): [True: 371, False: 0]
  ------------------
  116|    371|        {
  117|    371|            f =
  118|    371|                            ((ps_cur_poc->i4_delta_pic_order_cnt[0]
  ------------------
  |  Branch (118:30): [True: 125, False: 246]
  ------------------
  119|    371|                                            != ps_prev_poc->i4_delta_pic_order_cnt[0])
  120|    246|                                            || (ps_cur_poc->i4_delta_pic_order_cnt[1]
  ------------------
  |  Branch (120:48): [True: 70, False: 176]
  ------------------
  121|    246|                                                            != ps_prev_poc->i4_delta_pic_order_cnt[1]));
  122|    371|        }
  123|  3.98k|    }
  124|       |
  125|  4.09k|    if((u1_nal_unit_type == IDR_SLICE_NAL)
  ------------------
  |  |  328|  4.09k|#define IDR_SLICE_NAL                   5
  ------------------
  |  Branch (125:8): [True: 1.97k, False: 2.12k]
  ------------------
  126|  1.97k|                    && (ps_prev_slice->u1_nal_unit_type == IDR_SLICE_NAL))
  ------------------
  |  |  328|  1.97k|#define IDR_SLICE_NAL                   5
  ------------------
  |  Branch (126:24): [True: 1.97k, False: 0]
  ------------------
  127|  1.97k|    {
  128|  1.97k|        g = (u4_idr_pic_id != ps_prev_slice->u4_idr_pic_id);
  129|  1.97k|    }
  130|       |
  131|  4.09k|    if((u1_nal_unit_type == IDR_SLICE_NAL)
  ------------------
  |  |  328|  4.09k|#define IDR_SLICE_NAL                   5
  ------------------
  |  Branch (131:8): [True: 1.97k, False: 2.12k]
  ------------------
  132|  1.97k|                    && (ps_prev_slice->u1_nal_unit_type != IDR_SLICE_NAL))
  ------------------
  |  |  328|  1.97k|#define IDR_SLICE_NAL                   5
  ------------------
  |  Branch (132:24): [True: 0, False: 1.97k]
  ------------------
  133|      0|    {
  134|      0|        h = 1;
  135|      0|    }
  136|  4.09k|    i1_is_end_of_pic = a + b + c + d + e + f + g + h;
  137|  4.09k|    return (i1_is_end_of_pic);
  138|  4.09k|}
ih264d_end_of_pic_processing:
  531|   126k|{
  532|   126k|    UWORD8 u1_pic_type, u1_nal_ref_idc;
  533|   126k|    dec_slice_params_t *ps_cur_slice = ps_dec->ps_cur_slice;
  534|       |
  535|       |    /* If nal_ref_idc is equal to 0 for one slice or slice data partition NAL
  536|       |     unit of a particular picture, it shall be equal to 0 for all slice and
  537|       |     slice data partition NAL units of the picture. nal_ref_idc greater
  538|       |     than 0 indicates that the content of the NAL unit belongs to a decoded
  539|       |     picture that is stored and marked for use as a reference picture in the
  540|       |     decoded picture buffer. */
  541|       |
  542|       |    /* 1. Do MMCO
  543|       |     2. Add Cur Pic to list of reference pics.
  544|       |     */
  545|       |
  546|       |    /* Call MMCO */
  547|   126k|    u1_pic_type = 0;
  548|   126k|    u1_nal_ref_idc = ps_cur_slice->u1_nal_ref_idc;
  549|       |
  550|   126k|    if(u1_nal_ref_idc)
  ------------------
  |  Branch (550:8): [True: 109k, False: 16.6k]
  ------------------
  551|   109k|    {
  552|   109k|        if(ps_cur_slice->u1_nal_unit_type == IDR_SLICE_NAL)
  ------------------
  |  |  328|   109k|#define IDR_SLICE_NAL                   5
  ------------------
  |  Branch (552:12): [True: 97.7k, False: 11.9k]
  ------------------
  553|  97.7k|        {
  554|  97.7k|            ps_dec->ps_dpb_mgr->u1_mmco_error_in_seq = 0;
  555|  97.7k|            if(ps_dec->ps_dpb_cmds->u1_long_term_reference_flag == 0)
  ------------------
  |  Branch (555:16): [True: 84.0k, False: 13.6k]
  ------------------
  556|  84.0k|            {
  557|  84.0k|                ih264d_reset_ref_bufs(ps_dec->ps_dpb_mgr);
  558|       |                /* ignore DPB errors */
  559|  84.0k|                ih264d_insert_st_node(ps_dec->ps_dpb_mgr,
  560|  84.0k|                                      ps_dec->ps_cur_pic,
  561|  84.0k|                                      ps_dec->u1_pic_buf_id,
  562|  84.0k|                                      ps_cur_slice->u2_frame_num);
  563|  84.0k|                ps_dec->ps_dpb_mgr->u1_max_lt_frame_idx = NO_LONG_TERM_INDICIES;
  ------------------
  |  |   53|  84.0k|#define NO_LONG_TERM_INDICIES      255
  ------------------
  564|  84.0k|            }
  565|  13.6k|            else
  566|  13.6k|            {
  567|       |                /* Equivalent of inserting a pic directly as longterm Pic */
  568|       |
  569|  13.6k|                {
  570|       |                    /* ignore DPB errors */
  571|  13.6k|                    ih264d_insert_st_node(ps_dec->ps_dpb_mgr,
  572|  13.6k|                                          ps_dec->ps_cur_pic,
  573|  13.6k|                                          ps_dec->u1_pic_buf_id,
  574|  13.6k|                                          ps_cur_slice->u2_frame_num);
  575|       |
  576|       |                    /* Set longTermIdx = 0, MaxLongTermFrameIdx = 0 */
  577|  13.6k|                    ih264d_delete_st_node_or_make_lt(
  578|  13.6k|                                    ps_dec->ps_dpb_mgr,
  579|  13.6k|                                    ps_cur_slice->u2_frame_num, 0,
  580|  13.6k|                                    ps_cur_slice->u1_field_pic_flag);
  581|       |
  582|  13.6k|                    ps_dec->ps_dpb_mgr->u1_max_lt_frame_idx = 0;
  583|  13.6k|                }
  584|  13.6k|            }
  585|  97.7k|        }
  586|  11.9k|        else
  587|  11.9k|        {
  588|       |
  589|  11.9k|            {
  590|  11.9k|                UWORD16 u2_pic_num = ps_cur_slice->u2_frame_num;
  591|       |
  592|  11.9k|                if(!ps_dec->ps_dpb_mgr->u1_mmco_error_in_seq)
  ------------------
  |  Branch (592:20): [True: 11.3k, False: 607]
  ------------------
  593|  11.3k|                {
  594|  11.3k|                    WORD32 ret = ih264d_do_mmco_buffer(ps_dec->ps_dpb_cmds, ps_dec->ps_dpb_mgr,
  595|  11.3k|                                               ps_dec->ps_cur_sps->u1_num_ref_frames, u2_pic_num,
  596|  11.3k|                                               (ps_dec->ps_cur_sps->u2_u4_max_pic_num_minus1),
  597|  11.3k|                                               ps_dec->u1_nal_unit_type, ps_dec->ps_cur_pic,
  598|  11.3k|                                               ps_dec->u1_pic_buf_id,
  599|  11.3k|                                               ps_cur_slice->u1_field_pic_flag,
  600|  11.3k|                                               ps_dec->e_dec_status);
  601|  11.3k|                    ps_dec->ps_dpb_mgr->u1_mmco_error_in_seq = ret != OK;
  ------------------
  |  |  114|  11.3k|#define OK        0
  ------------------
  602|  11.3k|                }
  603|  11.9k|            }
  604|  11.9k|        }
  605|   109k|        ih264d_update_default_index_list(ps_dec->ps_dpb_mgr);
  606|   109k|    }
  607|       |
  608|   126k|    if(ps_cur_slice->u1_field_pic_flag)
  ------------------
  |  Branch (608:8): [True: 0, False: 126k]
  ------------------
  609|      0|    {
  610|      0|        if(ps_cur_slice->u1_bottom_field_flag)
  ------------------
  |  Branch (610:12): [True: 0, False: 0]
  ------------------
  611|      0|        {
  612|      0|            if(u1_nal_ref_idc)
  ------------------
  |  Branch (612:16): [True: 0, False: 0]
  ------------------
  613|      0|                u1_pic_type = u1_pic_type | BOT_REF;
  ------------------
  |  |  358|      0|#define BOT_REF         0x10
  ------------------
  614|      0|            u1_pic_type = u1_pic_type | BOT_FLD;
  ------------------
  |  |  354|      0|#define BOT_FLD         0x02
  ------------------
  615|      0|        }
  616|      0|        else
  617|      0|        {
  618|      0|            if(u1_nal_ref_idc)
  ------------------
  |  Branch (618:16): [True: 0, False: 0]
  ------------------
  619|      0|                u1_pic_type = u1_pic_type | TOP_REF;
  ------------------
  |  |  357|      0|#define TOP_REF         0x08
  ------------------
  620|      0|            u1_pic_type = u1_pic_type | TOP_FLD;
  ------------------
  |  |  353|      0|#define TOP_FLD         0x01
  ------------------
  621|      0|        }
  622|      0|    }
  623|   126k|    else
  624|   126k|        u1_pic_type = TOP_REF | BOT_REF;
  ------------------
  |  |  357|   126k|#define TOP_REF         0x08
  ------------------
                      u1_pic_type = TOP_REF | BOT_REF;
  ------------------
  |  |  358|   126k|#define BOT_REF         0x10
  ------------------
  625|   126k|    ps_dec->ps_cur_pic->u1_pic_type |= u1_pic_type;
  626|       |
  627|       |
  628|   126k|    if(ps_cur_slice->u1_field_pic_flag)
  ------------------
  |  Branch (628:8): [True: 0, False: 126k]
  ------------------
  629|      0|    {
  630|      0|        H264_DEC_DEBUG_PRINT("Toggling secondField\n");
  ------------------
  |  |   39|      0|#define H264_DEC_DEBUG_PRINT(...) {}
  ------------------
  631|      0|        ps_dec->u1_second_field = 1 - ps_dec->u1_second_field;
  632|      0|    }
  633|       |
  634|   126k|    return OK;
  ------------------
  |  |  114|   126k|#define OK        0
  ------------------
  635|   126k|}
ih264d_get_dpb_size:
  659|   123k|{
  660|   123k|    WORD32 i4_size;
  661|   123k|    UWORD8 u1_level_idc;
  662|       |
  663|   123k|    u1_level_idc = ps_seq->u1_level_idc;
  664|       |
  665|   123k|    switch(u1_level_idc)
  666|   123k|    {
  667|    912|        case 10:
  ------------------
  |  Branch (667:9): [True: 912, False: 122k]
  ------------------
  668|    912|            i4_size = 152064;
  669|    912|            break;
  670|  20.6k|        case 11:
  ------------------
  |  Branch (670:9): [True: 20.6k, False: 102k]
  ------------------
  671|  20.6k|            i4_size = 345600;
  672|  20.6k|            break;
  673|  2.77k|        case 12:
  ------------------
  |  Branch (673:9): [True: 2.77k, False: 120k]
  ------------------
  674|  2.77k|            i4_size = 912384;
  675|  2.77k|            break;
  676|    881|        case 13:
  ------------------
  |  Branch (676:9): [True: 881, False: 122k]
  ------------------
  677|    881|            i4_size = 912384;
  678|    881|            break;
  679|  28.0k|        case 20:
  ------------------
  |  Branch (679:9): [True: 28.0k, False: 95.4k]
  ------------------
  680|  28.0k|            i4_size = 912384;
  681|  28.0k|            break;
  682|  4.59k|        case 21:
  ------------------
  |  Branch (682:9): [True: 4.59k, False: 118k]
  ------------------
  683|  4.59k|            i4_size = 1824768;
  684|  4.59k|            break;
  685|  1.13k|        case 22:
  ------------------
  |  Branch (685:9): [True: 1.13k, False: 122k]
  ------------------
  686|  1.13k|            i4_size = 3110400;
  687|  1.13k|            break;
  688|  2.20k|        case 30:
  ------------------
  |  Branch (688:9): [True: 2.20k, False: 121k]
  ------------------
  689|  2.20k|            i4_size = 3110400;
  690|  2.20k|            break;
  691|  6.37k|        case 31:
  ------------------
  |  Branch (691:9): [True: 6.37k, False: 117k]
  ------------------
  692|  6.37k|            i4_size = 6912000;
  693|  6.37k|            break;
  694|  26.2k|        case 32:
  ------------------
  |  Branch (694:9): [True: 26.2k, False: 97.1k]
  ------------------
  695|  26.2k|            i4_size = 7864320;
  696|  26.2k|            break;
  697|  6.08k|        case 40:
  ------------------
  |  Branch (697:9): [True: 6.08k, False: 117k]
  ------------------
  698|  6.08k|            i4_size = 12582912;
  699|  6.08k|            break;
  700|  1.44k|        case 41:
  ------------------
  |  Branch (700:9): [True: 1.44k, False: 121k]
  ------------------
  701|  1.44k|            i4_size = 12582912;
  702|  1.44k|            break;
  703|  5.41k|        case 42:
  ------------------
  |  Branch (703:9): [True: 5.41k, False: 118k]
  ------------------
  704|  5.41k|            i4_size = 12582912;
  705|  5.41k|            break;
  706|     96|        case 50:
  ------------------
  |  Branch (706:9): [True: 96, False: 123k]
  ------------------
  707|     96|            i4_size = 42393600;
  708|     96|            break;
  709|     14|        case 51:
  ------------------
  |  Branch (709:9): [True: 14, False: 123k]
  ------------------
  710|     14|            i4_size = 70778880;
  711|     14|            break;
  712|      8|        case 52:
  ------------------
  |  Branch (712:9): [True: 8, False: 123k]
  ------------------
  713|      8|            i4_size = 70778880;
  714|      8|            break;
  715|  16.6k|        default:
  ------------------
  |  Branch (715:9): [True: 16.6k, False: 106k]
  ------------------
  716|  16.6k|            i4_size = 70778880;
  717|  16.6k|            break;
  718|   123k|    }
  719|       |
  720|   123k|    i4_size /= (ps_seq->u2_frm_wd_in_mbs * (ps_seq->u2_frm_ht_in_mbs << (1 - ps_seq->u1_frame_mbs_only_flag)));
  721|   123k|    i4_size /= 384;
  722|   123k|    i4_size = MIN(i4_size, 16);
  ------------------
  |  |   61|   123k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 11.3k, False: 112k]
  |  |  ------------------
  ------------------
  723|   123k|    i4_size = MAX(i4_size, 1);
  ------------------
  |  |   60|   123k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 123k, False: 118]
  |  |  ------------------
  ------------------
  724|   123k|    return (i4_size);
  725|   123k|}
ih264d_init_dec_mb_grp:
  737|  24.4k|{
  738|  24.4k|    dec_seq_params_t *ps_seq = ps_dec->ps_cur_sps;
  739|  24.4k|    UWORD8 u1_frm = ps_seq->u1_frame_mbs_only_flag;
  740|       |
  741|  24.4k|    ps_dec->u4_recon_mb_grp = ps_dec->u2_frm_wd_in_mbs << ps_seq->u1_mb_aff_flag;
  742|       |
  743|  24.4k|    ps_dec->u4_recon_mb_grp_pair = ps_dec->u4_recon_mb_grp >> 1;
  744|       |
  745|  24.4k|    if(!ps_dec->u4_recon_mb_grp)
  ------------------
  |  Branch (745:8): [True: 0, False: 24.4k]
  ------------------
  746|      0|    {
  747|      0|        return ERROR_MB_GROUP_ASSGN_T;
  748|      0|    }
  749|       |
  750|  24.4k|    ps_dec->u4_num_mbs_prev_nmb = ps_dec->u4_recon_mb_grp;
  751|       |
  752|  24.4k|    return OK;
  ------------------
  |  |  114|  24.4k|#define OK        0
  ------------------
  753|  24.4k|}
ih264d_get_next_display_field:
  979|   182k|{
  980|   182k|    pic_buffer_t *pic_buf;
  981|       |
  982|   182k|    UWORD8 i1_cur_fld;
  983|   182k|    WORD32 u4_api_ret = -1;
  984|   182k|    WORD32 i4_disp_buf_id;
  985|   182k|    iv_yuv_buf_t *ps_op_frm;
  986|       |
  987|       |
  988|       |
  989|   182k|    ps_op_frm = &(ps_dec->s_disp_frame_info);
  990|   182k|    H264_MUTEX_LOCK(&ps_dec->process_disp_mutex);
  991|   182k|    pic_buf = (pic_buffer_t *)ih264_disp_mgr_get(
  992|   182k|                    (disp_mgr_t *)ps_dec->pv_disp_buf_mgr, &i4_disp_buf_id);
  993|   182k|    ps_dec->u4_num_fld_in_frm = 0;
  994|   182k|    u4_api_ret = -1;
  995|   182k|    pv_disp_op->u4_ts = 0;
  996|   182k|    pv_disp_op->e_output_format = ps_dec->u1_chroma_format;
  997|       |
  998|   182k|    pv_disp_op->s_disp_frm_buf.pv_y_buf = ps_out_buffer->pu1_bufs[0];
  999|   182k|    pv_disp_op->s_disp_frm_buf.pv_u_buf = ps_out_buffer->pu1_bufs[1];
 1000|   182k|    pv_disp_op->s_disp_frm_buf.pv_v_buf = ps_out_buffer->pu1_bufs[2];
 1001|   182k|    ps_dec->i4_display_index  = DEFAULT_POC;
  ------------------
  |  |   45|   182k|#define DEFAULT_POC 0x7FFFFFFF
  ------------------
 1002|   182k|    if(pic_buf != NULL)
  ------------------
  |  Branch (1002:8): [True: 84.2k, False: 98.5k]
  ------------------
 1003|  84.2k|    {
 1004|  84.2k|        ps_dec->pv_disp_sei_params = &pic_buf->s_sei_pic;
 1005|  84.2k|        pv_disp_op->e4_fld_type = 0;
 1006|  84.2k|        pv_disp_op->u4_disp_buf_id = i4_disp_buf_id;
 1007|       |
 1008|  84.2k|        ps_op_frm->u4_y_ht = pic_buf->u2_disp_height << 1;
 1009|  84.2k|        ps_op_frm->u4_u_ht = ps_op_frm->u4_v_ht = ps_op_frm->u4_y_ht >> 1;
 1010|  84.2k|        ps_op_frm->u4_y_wd = pic_buf->u2_disp_width;
 1011|       |
 1012|  84.2k|        ps_op_frm->u4_u_wd = ps_op_frm->u4_v_wd = ps_op_frm->u4_y_wd >> 1;
 1013|       |
 1014|  84.2k|        ps_op_frm->u4_y_strd = pic_buf->u2_frm_wd_y;
 1015|  84.2k|        ps_op_frm->u4_u_strd = ps_op_frm->u4_v_strd = pic_buf->u2_frm_wd_uv;
 1016|       |
 1017|       |        /* ! */
 1018|  84.2k|        pv_disp_op->u4_ts = pic_buf->u4_ts;
 1019|  84.2k|        ps_dec->i4_display_index = pic_buf->i4_poc;
 1020|       |
 1021|       |        /* set the start of the Y, U and V buffer pointer for display    */
 1022|  84.2k|        ps_op_frm->pv_y_buf = pic_buf->pu1_buf1 + pic_buf->u2_crop_offset_y;
 1023|  84.2k|        ps_op_frm->pv_u_buf = pic_buf->pu1_buf2 + pic_buf->u2_crop_offset_uv;
 1024|  84.2k|        ps_op_frm->pv_v_buf = pic_buf->pu1_buf3 + pic_buf->u2_crop_offset_uv;
 1025|  84.2k|        ps_dec->u4_num_fld_in_frm++;
 1026|  84.2k|        ps_dec->u4_num_fld_in_frm++;
 1027|  84.2k|        u4_api_ret = 0;
 1028|       |
 1029|  84.2k|        if(pic_buf->u1_picturetype == 0)
  ------------------
  |  Branch (1029:12): [True: 84.2k, False: 0]
  ------------------
 1030|  84.2k|            pv_disp_op->u4_progressive_frame_flag = 1;
 1031|      0|        else
 1032|      0|            pv_disp_op->u4_progressive_frame_flag = 0;
 1033|       |
 1034|  84.2k|    } H264_MUTEX_UNLOCK(&ps_dec->process_disp_mutex);
 1035|   182k|    pv_disp_op->u4_error_code = u4_api_ret;
 1036|   182k|    pv_disp_op->e_pic_type = 0xFFFFFFFF; //Junk;
 1037|       |
 1038|   182k|    if(u4_api_ret)
  ------------------
  |  Branch (1038:8): [True: 98.5k, False: 84.2k]
  ------------------
 1039|  98.5k|    {
 1040|  98.5k|        pv_disp_op->u4_error_code = 1; //put a proper error code here
 1041|  98.5k|    }
 1042|  84.2k|    else
 1043|  84.2k|    {
 1044|       |
 1045|       |        //Release the buffer if being sent for display
 1046|  84.2k|        UWORD32 temp;
 1047|  84.2k|        UWORD32 dest_inc_Y = 0, dest_inc_UV = 0;
 1048|       |
 1049|  84.2k|        pv_disp_op->s_disp_frm_buf.u4_y_wd = temp = MIN(ps_op_frm->u4_y_wd,
  ------------------
  |  |   61|  84.2k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 84.2k, False: 0]
  |  |  ------------------
  ------------------
 1050|  84.2k|                                                        ps_op_frm->u4_y_strd);
 1051|  84.2k|        pv_disp_op->s_disp_frm_buf.u4_u_wd = pv_disp_op->s_disp_frm_buf.u4_y_wd
 1052|  84.2k|                        >> 1;
 1053|  84.2k|        pv_disp_op->s_disp_frm_buf.u4_v_wd = pv_disp_op->s_disp_frm_buf.u4_y_wd
 1054|  84.2k|                        >> 1;
 1055|       |
 1056|  84.2k|        pv_disp_op->s_disp_frm_buf.u4_y_ht = ps_op_frm->u4_y_ht;
 1057|  84.2k|        pv_disp_op->s_disp_frm_buf.u4_u_ht = pv_disp_op->s_disp_frm_buf.u4_y_ht
 1058|  84.2k|                        >> 1;
 1059|  84.2k|        pv_disp_op->s_disp_frm_buf.u4_v_ht = pv_disp_op->s_disp_frm_buf.u4_y_ht
 1060|  84.2k|                        >> 1;
 1061|  84.2k|        if(0 == ps_dec->u4_share_disp_buf)
  ------------------
  |  Branch (1061:12): [True: 84.2k, False: 0]
  ------------------
 1062|  84.2k|        {
 1063|  84.2k|            pv_disp_op->s_disp_frm_buf.u4_y_strd =
 1064|  84.2k|                            pv_disp_op->s_disp_frm_buf.u4_y_wd;
 1065|  84.2k|            pv_disp_op->s_disp_frm_buf.u4_u_strd =
 1066|  84.2k|                            pv_disp_op->s_disp_frm_buf.u4_y_wd >> 1;
 1067|  84.2k|            pv_disp_op->s_disp_frm_buf.u4_v_strd =
 1068|  84.2k|                            pv_disp_op->s_disp_frm_buf.u4_y_wd >> 1;
 1069|       |
 1070|  84.2k|        }
 1071|      0|        else
 1072|      0|        {
 1073|      0|            pv_disp_op->s_disp_frm_buf.u4_y_strd = ps_op_frm->u4_y_strd;
 1074|      0|        }
 1075|       |
 1076|  84.2k|        if(ps_dec->u4_app_disp_width)
  ------------------
  |  Branch (1076:12): [True: 0, False: 84.2k]
  ------------------
 1077|      0|        {
 1078|      0|            pv_disp_op->s_disp_frm_buf.u4_y_strd = MAX(
  ------------------
  |  |   60|      0|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1079|      0|                            ps_dec->u4_app_disp_width,
 1080|      0|                            pv_disp_op->s_disp_frm_buf.u4_y_strd);
 1081|      0|        }
 1082|       |
 1083|  84.2k|        pv_disp_op->u4_error_code = 0;
 1084|  84.2k|        if(pv_disp_op->e_output_format == IV_YUV_420P)
  ------------------
  |  Branch (1084:12): [True: 46.7k, False: 37.5k]
  ------------------
 1085|  46.7k|        {
 1086|  46.7k|            UWORD32 i;
 1087|  46.7k|            pv_disp_op->s_disp_frm_buf.u4_u_strd =
 1088|  46.7k|                            pv_disp_op->s_disp_frm_buf.u4_y_strd >> 1;
 1089|  46.7k|            pv_disp_op->s_disp_frm_buf.u4_v_strd =
 1090|  46.7k|                            pv_disp_op->s_disp_frm_buf.u4_y_strd >> 1;
 1091|       |
 1092|  46.7k|            pv_disp_op->s_disp_frm_buf.u4_u_wd = ps_op_frm->u4_y_wd >> 1;
 1093|  46.7k|            pv_disp_op->s_disp_frm_buf.u4_v_wd = ps_op_frm->u4_y_wd >> 1;
 1094|       |
 1095|  46.7k|            if(1 == ps_dec->u4_share_disp_buf)
  ------------------
  |  Branch (1095:16): [True: 0, False: 46.7k]
  ------------------
 1096|      0|            {
 1097|      0|                pv_disp_op->s_disp_frm_buf.pv_y_buf = ps_op_frm->pv_y_buf;
 1098|       |
 1099|      0|                for(i = 0; i < MAX_DISP_BUFS_NEW; i++)
  ------------------
  |  |   76|      0|#define MAX_DISP_BUFS_NEW 64
  ------------------
  |  Branch (1099:28): [True: 0, False: 0]
  ------------------
 1100|      0|                {
 1101|      0|                    UWORD8 *buf = ps_dec->disp_bufs[i].buf[0];
 1102|      0|                    buf += ps_dec->disp_bufs[i].u4_ofst[0];
 1103|      0|                    if(((UWORD8 *)pv_disp_op->s_disp_frm_buf.pv_y_buf
  ------------------
  |  Branch (1103:24): [True: 0, False: 0]
  ------------------
 1104|      0|                                    - pic_buf->u2_crop_offset_y) == buf)
 1105|      0|                    {
 1106|      0|                        buf = ps_dec->disp_bufs[i].buf[1];
 1107|      0|                        buf += ps_dec->disp_bufs[i].u4_ofst[1];
 1108|      0|                        pv_disp_op->s_disp_frm_buf.pv_u_buf = buf
 1109|      0|                                        + (pic_buf->u2_crop_offset_uv
 1110|      0|                                           / YUV420SP_FACTOR);
  ------------------
  |  |  119|      0|#define YUV420SP_FACTOR 2
  ------------------
 1111|       |
 1112|      0|                        buf = ps_dec->disp_bufs[i].buf[2];
 1113|      0|                        buf += ps_dec->disp_bufs[i].u4_ofst[2];
 1114|      0|                        pv_disp_op->s_disp_frm_buf.pv_v_buf = buf
 1115|      0|                                        + (pic_buf->u2_crop_offset_uv
 1116|      0|                                           / YUV420SP_FACTOR);
  ------------------
  |  |  119|      0|#define YUV420SP_FACTOR 2
  ------------------
 1117|       |
 1118|      0|                    }
 1119|      0|                }
 1120|      0|            }
 1121|       |
 1122|  46.7k|        }
 1123|  37.5k|        else if((pv_disp_op->e_output_format == IV_YUV_420SP_UV)
  ------------------
  |  Branch (1123:17): [True: 23.4k, False: 14.1k]
  ------------------
 1124|  14.1k|                        || (pv_disp_op->e_output_format == IV_YUV_420SP_VU))
  ------------------
  |  Branch (1124:28): [True: 14.1k, False: 0]
  ------------------
 1125|  37.5k|        {
 1126|  37.5k|            pv_disp_op->s_disp_frm_buf.u4_u_strd =
 1127|  37.5k|                            pv_disp_op->s_disp_frm_buf.u4_y_strd;
 1128|  37.5k|            pv_disp_op->s_disp_frm_buf.u4_v_strd = 0;
 1129|       |
 1130|  37.5k|            if(1 == ps_dec->u4_share_disp_buf)
  ------------------
  |  Branch (1130:16): [True: 0, False: 37.5k]
  ------------------
 1131|      0|            {
 1132|      0|                UWORD32 i;
 1133|       |
 1134|      0|                pv_disp_op->s_disp_frm_buf.pv_y_buf = ps_op_frm->pv_y_buf;
 1135|       |
 1136|      0|                for(i = 0; i < MAX_DISP_BUFS_NEW; i++)
  ------------------
  |  |   76|      0|#define MAX_DISP_BUFS_NEW 64
  ------------------
  |  Branch (1136:28): [True: 0, False: 0]
  ------------------
 1137|      0|                {
 1138|      0|                    UWORD8 *buf = ps_dec->disp_bufs[i].buf[0];
 1139|      0|                    buf += ps_dec->disp_bufs[i].u4_ofst[0];
 1140|      0|                    if((UWORD8 *)pv_disp_op->s_disp_frm_buf.pv_y_buf
  ------------------
  |  Branch (1140:24): [True: 0, False: 0]
  ------------------
 1141|      0|                                    - pic_buf->u2_crop_offset_y == buf)
 1142|      0|                    {
 1143|      0|                        buf = ps_dec->disp_bufs[i].buf[1];
 1144|      0|                        buf += ps_dec->disp_bufs[i].u4_ofst[1];
 1145|      0|                        pv_disp_op->s_disp_frm_buf.pv_u_buf = buf
 1146|      0|                                        + pic_buf->u2_crop_offset_uv;
 1147|      0|                        ;
 1148|       |
 1149|      0|                        buf = ps_dec->disp_bufs[i].buf[2];
 1150|      0|                        buf += ps_dec->disp_bufs[i].u4_ofst[2];
 1151|      0|                        pv_disp_op->s_disp_frm_buf.pv_v_buf = buf
 1152|      0|                                        + pic_buf->u2_crop_offset_uv;
 1153|      0|                        ;
 1154|      0|                    }
 1155|      0|                }
 1156|      0|            }
 1157|  37.5k|            pv_disp_op->s_disp_frm_buf.u4_u_wd =
 1158|  37.5k|                            pv_disp_op->s_disp_frm_buf.u4_y_wd;
 1159|  37.5k|            pv_disp_op->s_disp_frm_buf.u4_v_wd = 0;
 1160|       |
 1161|  37.5k|        }
 1162|      0|        else if((pv_disp_op->e_output_format == IV_RGB_565)
  ------------------
  |  Branch (1162:17): [True: 0, False: 0]
  ------------------
 1163|      0|                        || (pv_disp_op->e_output_format == IV_YUV_422ILE))
  ------------------
  |  Branch (1163:28): [True: 0, False: 0]
  ------------------
 1164|      0|        {
 1165|       |
 1166|      0|            pv_disp_op->s_disp_frm_buf.u4_u_strd = 0;
 1167|      0|            pv_disp_op->s_disp_frm_buf.u4_v_strd = 0;
 1168|      0|            pv_disp_op->s_disp_frm_buf.u4_u_wd = 0;
 1169|      0|            pv_disp_op->s_disp_frm_buf.u4_v_wd = 0;
 1170|      0|            pv_disp_op->s_disp_frm_buf.u4_u_ht = 0;
 1171|      0|            pv_disp_op->s_disp_frm_buf.u4_v_ht = 0;
 1172|       |
 1173|      0|        }
 1174|       |
 1175|       |
 1176|  84.2k|    }
 1177|       |
 1178|   182k|    return u4_api_ret;
 1179|   182k|}
ih264d_release_display_field:
 1203|   126k|{
 1204|   126k|    if(1 == pv_disp_op->u4_error_code)
  ------------------
  |  Branch (1204:8): [True: 76.3k, False: 50.0k]
  ------------------
 1205|  76.3k|    {
 1206|  76.3k|        if(1 == ps_dec->u1_flushfrm)
  ------------------
  |  Branch (1206:12): [True: 0, False: 76.3k]
  ------------------
 1207|      0|        {
 1208|      0|            UWORD32 i;
 1209|       |
 1210|      0|            if(1 == ps_dec->u4_share_disp_buf)
  ------------------
  |  Branch (1210:16): [True: 0, False: 0]
  ------------------
 1211|      0|            {
 1212|      0|                H264_MUTEX_LOCK(&ps_dec->process_disp_mutex);
 1213|      0|                for(i = 0; i < (MAX_DISP_BUFS_NEW); i++)
  ------------------
  |  |   76|      0|#define MAX_DISP_BUFS_NEW 64
  ------------------
  |  Branch (1213:28): [True: 0, False: 0]
  ------------------
 1214|      0|                {
 1215|      0|                    if(1 == ps_dec->u4_disp_buf_mapping[i])
  ------------------
  |  Branch (1215:24): [True: 0, False: 0]
  ------------------
 1216|      0|                    {
 1217|      0|                        ih264_buf_mgr_release(
 1218|      0|                                        (buf_mgr_t *)ps_dec->pv_pic_buf_mgr, i,
 1219|      0|                                        BUF_MGR_IO);
  ------------------
  |  |   53|      0|#define BUF_MGR_IO           (1 << 3)
  ------------------
 1220|      0|                        ps_dec->u4_disp_buf_mapping[i] = 0;
 1221|      0|                    }
 1222|      0|                } H264_MUTEX_UNLOCK(&ps_dec->process_disp_mutex);
 1223|       |
 1224|      0|                memset(ps_dec->u4_disp_buf_to_be_freed, 0,
 1225|      0|                       (MAX_DISP_BUFS_NEW) * sizeof(UWORD32));
  ------------------
  |  |   76|      0|#define MAX_DISP_BUFS_NEW 64
  ------------------
 1226|      0|                for(i = 0; i < ps_dec->u1_pic_bufs; i++)
  ------------------
  |  Branch (1226:28): [True: 0, False: 0]
  ------------------
 1227|      0|                    ps_dec->u4_disp_buf_mapping[i] = 1;
 1228|      0|            }
 1229|      0|            ps_dec->u1_flushfrm = 0;
 1230|       |
 1231|      0|        }
 1232|  76.3k|    }
 1233|  50.0k|    else
 1234|  50.0k|    {
 1235|  50.0k|        H264_MUTEX_LOCK(&ps_dec->process_disp_mutex);
 1236|       |
 1237|  50.0k|        if(0 == ps_dec->u4_share_disp_buf)
  ------------------
  |  Branch (1237:12): [True: 50.0k, False: 0]
  ------------------
 1238|  50.0k|        {
 1239|  50.0k|            ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_pic_buf_mgr,
 1240|  50.0k|                                  pv_disp_op->u4_disp_buf_id,
 1241|  50.0k|                                  BUF_MGR_IO);
  ------------------
  |  |   53|  50.0k|#define BUF_MGR_IO           (1 << 3)
  ------------------
 1242|       |
 1243|  50.0k|        }
 1244|      0|        else
 1245|      0|        {
 1246|      0|            ps_dec->u4_disp_buf_mapping[pv_disp_op->u4_disp_buf_id] = 1;
 1247|      0|        } H264_MUTEX_UNLOCK(&ps_dec->process_disp_mutex);
 1248|       |
 1249|  50.0k|    }
 1250|   126k|}
ih264d_assign_display_seq:
 1274|   126k|{
 1275|   126k|    WORD32 i;
 1276|   126k|    WORD32 i4_min_poc;
 1277|   126k|    WORD32 i4_min_poc_buf_id;
 1278|   126k|    WORD32 i4_min_index;
 1279|   126k|    dpb_manager_t *ps_dpb_mgr = ps_dec->ps_dpb_mgr;
 1280|   126k|    WORD32 (*i4_poc_buf_id_map)[3] = ps_dpb_mgr->ai4_poc_buf_id_map;
 1281|       |
 1282|   126k|    i4_min_poc = 0x7fffffff;
 1283|   126k|    i4_min_poc_buf_id = -1;
 1284|   126k|    i4_min_index = -1;
 1285|       |
 1286|   126k|    if(ps_dpb_mgr->i1_poc_buf_id_entries >= ps_dec->i4_display_delay)
  ------------------
  |  Branch (1286:8): [True: 3.11k, False: 123k]
  ------------------
 1287|  3.11k|    {
 1288|  52.9k|        for(i = 0; i < MAX_FRAMES; i++)
  ------------------
  |  |  600|  52.9k|#define MAX_FRAMES              16
  ------------------
  |  Branch (1288:20): [True: 49.8k, False: 3.11k]
  ------------------
 1289|  49.8k|        {
 1290|  49.8k|            if((i4_poc_buf_id_map[i][0] != -1)
  ------------------
  |  Branch (1290:16): [True: 42.5k, False: 7.30k]
  ------------------
 1291|  42.5k|                            && (DO_NOT_DISP
  ------------------
  |  |  602|  42.5k|#define DO_NOT_DISP             254
  ------------------
  |  Branch (1291:32): [True: 42.5k, False: 0]
  ------------------
 1292|  42.5k|                                            != ps_dpb_mgr->ai4_poc_buf_id_map[i][0]))
 1293|  42.5k|            {
 1294|       |                /* Checking for <= is necessary to handle cases where there is one
 1295|       |                   valid buffer with poc set to 0x7FFFFFFF. */
 1296|  42.5k|                if(i4_poc_buf_id_map[i][1] <= i4_min_poc)
  ------------------
  |  Branch (1296:20): [True: 19.8k, False: 22.6k]
  ------------------
 1297|  19.8k|                {
 1298|  19.8k|                    i4_min_poc = i4_poc_buf_id_map[i][1];
 1299|  19.8k|                    i4_min_poc_buf_id = i4_poc_buf_id_map[i][0];
 1300|  19.8k|                    i4_min_index = i;
 1301|  19.8k|                }
 1302|  42.5k|            }
 1303|  49.8k|        }
 1304|       |
 1305|  3.11k|        if((i4_min_index != -1) && (DO_NOT_DISP != i4_min_poc_buf_id))
  ------------------
  |  |  602|  3.11k|#define DO_NOT_DISP             254
  ------------------
  |  Branch (1305:12): [True: 3.11k, False: 0]
  |  Branch (1305:36): [True: 3.11k, False: 0]
  ------------------
 1306|  3.11k|        {
 1307|  3.11k|            ps_dec->i4_cur_display_seq++;
 1308|  3.11k|            ih264_disp_mgr_add(
 1309|  3.11k|                            (disp_mgr_t *)ps_dec->pv_disp_buf_mgr,
 1310|  3.11k|                            i4_min_poc_buf_id, ps_dec->i4_cur_display_seq,
 1311|  3.11k|                            ps_dec->apv_buf_id_pic_buf_map[i4_min_poc_buf_id]);
 1312|  3.11k|            i4_poc_buf_id_map[i4_min_index][0] = -1;
 1313|  3.11k|            i4_poc_buf_id_map[i4_min_index][1] = 0x7fffffff;
 1314|  3.11k|            ps_dpb_mgr->i1_poc_buf_id_entries--;
 1315|  3.11k|        }
 1316|      0|        else if(DO_NOT_DISP == i4_min_poc_buf_id)
  ------------------
  |  |  602|      0|#define DO_NOT_DISP             254
  ------------------
  |  Branch (1316:17): [True: 0, False: 0]
  ------------------
 1317|      0|        {
 1318|      0|            WORD32 i4_error_code;
 1319|      0|            i4_error_code = ERROR_GAPS_IN_FRM_NUM;
 1320|       |//          i4_error_code |= 1<<IVD_CORRUPTEDDATA;
 1321|      0|            return i4_error_code;
 1322|      0|        }
 1323|  3.11k|    }
 1324|   126k|    return OK;
  ------------------
  |  |  114|   126k|#define OK        0
  ------------------
 1325|   126k|}
ih264d_release_display_bufs:
 1350|   107k|{
 1351|   107k|    WORD32 i, j;
 1352|   107k|    WORD32 i4_min_poc;
 1353|   107k|    WORD32 i4_min_poc_buf_id;
 1354|   107k|    WORD32 i4_min_index;
 1355|   107k|    WORD64 i8_temp;
 1356|   107k|    dpb_manager_t *ps_dpb_mgr = ps_dec->ps_dpb_mgr;
 1357|   107k|    WORD32 (*i4_poc_buf_id_map)[3] = ps_dpb_mgr->ai4_poc_buf_id_map;
 1358|       |
 1359|   107k|    i4_min_poc = 0x7fffffff;
 1360|   107k|    i4_min_poc_buf_id = 0;
 1361|   107k|    i4_min_index = 0;
 1362|       |
 1363|   107k|    ih264d_delete_nonref_nondisplay_pics(ps_dpb_mgr);
 1364|       |
 1365|   197k|    for(j = 0; j < ps_dpb_mgr->i1_poc_buf_id_entries; j++)
  ------------------
  |  Branch (1365:16): [True: 90.5k, False: 107k]
  ------------------
 1366|  90.5k|    {
 1367|  90.5k|        i4_min_poc = 0x7fffffff;
 1368|  1.53M|        for(i = 0; i < MAX_FRAMES; i++)
  ------------------
  |  |  600|  1.53M|#define MAX_FRAMES              16
  ------------------
  |  Branch (1368:20): [True: 1.44M, False: 90.5k]
  ------------------
 1369|  1.44M|        {
 1370|  1.44M|            if(i4_poc_buf_id_map[i][0] != -1)
  ------------------
  |  Branch (1370:16): [True: 114k, False: 1.33M]
  ------------------
 1371|   114k|            {
 1372|       |                /* Checking for <= is necessary to handle cases where there is one
 1373|       |                   valid buffer with poc set to 0x7FFFFFFF. */
 1374|   114k|                if(i4_poc_buf_id_map[i][1] <= i4_min_poc)
  ------------------
  |  Branch (1374:20): [True: 99.5k, False: 15.2k]
  ------------------
 1375|  99.5k|                {
 1376|  99.5k|                    i4_min_poc = i4_poc_buf_id_map[i][1];
 1377|  99.5k|                    i4_min_poc_buf_id = i4_poc_buf_id_map[i][0];
 1378|  99.5k|                    i4_min_index = i;
 1379|  99.5k|                }
 1380|   114k|            }
 1381|  1.44M|        }
 1382|       |
 1383|  90.5k|        if(DO_NOT_DISP != i4_min_poc_buf_id)
  ------------------
  |  |  602|  90.5k|#define DO_NOT_DISP             254
  ------------------
  |  Branch (1383:12): [True: 90.5k, False: 0]
  ------------------
 1384|  90.5k|        {
 1385|  90.5k|            ps_dec->i4_cur_display_seq++;
 1386|  90.5k|            ih264_disp_mgr_add(
 1387|  90.5k|                            (disp_mgr_t *)ps_dec->pv_disp_buf_mgr,
 1388|  90.5k|                            i4_min_poc_buf_id, ps_dec->i4_cur_display_seq,
 1389|  90.5k|                            ps_dec->apv_buf_id_pic_buf_map[i4_min_poc_buf_id]);
 1390|  90.5k|            i4_poc_buf_id_map[i4_min_index][0] = -1;
 1391|  90.5k|            i4_poc_buf_id_map[i4_min_index][1] = 0x7fffffff;
 1392|  90.5k|            ps_dpb_mgr->ai4_poc_buf_id_map[i4_min_index][2] = 0;
 1393|  90.5k|        }
 1394|      0|        else
 1395|      0|        {
 1396|      0|            i4_poc_buf_id_map[i4_min_index][0] = -1;
 1397|      0|            i4_poc_buf_id_map[i4_min_index][1] = 0x7fffffff;
 1398|      0|            ps_dpb_mgr->ai4_poc_buf_id_map[i4_min_index][2] = 0;
 1399|      0|        }
 1400|  90.5k|    }
 1401|   107k|    ps_dpb_mgr->i1_poc_buf_id_entries = 0;
 1402|   107k|    i8_temp = (WORD64)ps_dec->i4_prev_max_display_seq + ps_dec->i4_max_poc
 1403|   107k|              + ps_dec->u1_max_dec_frame_buffering + 1;
 1404|       |    /*If i4_prev_max_display_seq overflows integer range, reset it */
 1405|   107k|    ps_dec->i4_prev_max_display_seq = IS_OUT_OF_RANGE_S32(i8_temp)?
  ------------------
  |  |   58|   107k|#define IS_OUT_OF_RANGE_S32(a) (((a) < INT32_MIN) || ((a) > INT32_MAX))
  |  |  ------------------
  |  |  |  Branch (58:33): [True: 0, False: 107k]
  |  |  |  Branch (58:54): [True: 209, False: 107k]
  |  |  ------------------
  ------------------
 1406|   107k|                                      0 : i8_temp;
 1407|   107k|    ps_dec->i4_max_poc = 0;
 1408|   107k|}
ih264d_assign_pic_num:
 1440|   133k|{
 1441|   133k|    dpb_manager_t *ps_dpb_mgr;
 1442|   133k|    struct dpb_info_t *ps_next_dpb;
 1443|   133k|    WORD8 i;
 1444|   133k|    WORD32 i4_cur_frame_num, i4_max_frame_num;
 1445|   133k|    WORD32 i4_ref_frame_num;
 1446|   133k|    UWORD8 u1_fld_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag;
 1447|       |
 1448|   133k|    i4_max_frame_num = ps_dec->ps_cur_sps->u2_u4_max_pic_num_minus1 + 1;
 1449|   133k|    i4_cur_frame_num = ps_dec->ps_cur_pic->i4_frame_num;
 1450|   133k|    ps_dpb_mgr = ps_dec->ps_dpb_mgr;
 1451|       |
 1452|       |    /* Start from ST head */
 1453|   133k|    ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head;
 1454|   230k|    for(i = 0; i < ps_dpb_mgr->u1_num_st_ref_bufs; i++)
  ------------------
  |  Branch (1454:16): [True: 97.3k, False: 133k]
  ------------------
 1455|  97.3k|    {
 1456|  97.3k|        WORD32 i4_pic_num;
 1457|       |
 1458|  97.3k|        i4_ref_frame_num = ps_next_dpb->ps_pic_buf->i4_frame_num;
 1459|  97.3k|        if(i4_ref_frame_num > i4_cur_frame_num)
  ------------------
  |  Branch (1459:12): [True: 25.9k, False: 71.4k]
  ------------------
 1460|  25.9k|        {
 1461|       |            /* RefPic Buf frame_num is before Current frame_num in decode order */
 1462|  25.9k|            i4_pic_num = i4_ref_frame_num - i4_max_frame_num;
 1463|  25.9k|        }
 1464|  71.4k|        else
 1465|  71.4k|        {
 1466|       |            /* RefPic Buf frame_num is after Current frame_num in decode order */
 1467|  71.4k|            i4_pic_num = i4_ref_frame_num;
 1468|  71.4k|        }
 1469|       |
 1470|  97.3k|        ps_next_dpb->ps_pic_buf->i4_pic_num = i4_pic_num;
 1471|  97.3k|        ps_next_dpb->i4_frame_num = i4_pic_num;
 1472|  97.3k|        ps_next_dpb->ps_pic_buf->u1_long_term_frm_idx = MAX_REF_BUFS + 1;
  ------------------
  |  |   75|  97.3k|#define MAX_REF_BUFS    32
  ------------------
 1473|  97.3k|        if(u1_fld_pic_flag)
  ------------------
  |  Branch (1473:12): [True: 0, False: 97.3k]
  ------------------
 1474|      0|        {
 1475|       |            /* Assign the pic num to top fields and bot fields */
 1476|       |
 1477|      0|            ps_next_dpb->s_top_field.i4_pic_num = i4_pic_num * 2
 1478|      0|                            + !(ps_dec->ps_cur_slice->u1_bottom_field_flag);
 1479|      0|            ps_next_dpb->s_bot_field.i4_pic_num = i4_pic_num * 2
 1480|      0|                            + ps_dec->ps_cur_slice->u1_bottom_field_flag;
 1481|      0|        }
 1482|       |        /* Chase the next link */
 1483|  97.3k|        ps_next_dpb = ps_next_dpb->ps_prev_short;
 1484|  97.3k|    }
 1485|       |
 1486|   133k|    if(ps_dec->ps_cur_sps->u1_gaps_in_frame_num_value_allowed_flag
  ------------------
  |  Branch (1486:8): [True: 0, False: 133k]
  ------------------
 1487|      0|                    && ps_dpb_mgr->u1_num_gaps)
  ------------------
  |  Branch (1487:24): [True: 0, False: 0]
  ------------------
 1488|      0|    {
 1489|      0|        WORD32 i4_start_frm, i4_end_frm;
 1490|       |        /* Assign pic numbers for gaps */
 1491|      0|        for(i = 0; i < MAX_FRAMES; i++)
  ------------------
  |  |  600|      0|#define MAX_FRAMES              16
  ------------------
  |  Branch (1491:20): [True: 0, False: 0]
  ------------------
 1492|      0|        {
 1493|      0|            i4_start_frm = ps_dpb_mgr->ai4_gaps_start_frm_num[i];
 1494|      0|            if(i4_start_frm != INVALID_FRAME_NUM)
  ------------------
  |  |  601|      0|#define INVALID_FRAME_NUM       0x0fffffff
  ------------------
  |  Branch (1494:16): [True: 0, False: 0]
  ------------------
 1495|      0|            {
 1496|      0|                if(i4_start_frm > i4_cur_frame_num)
  ------------------
  |  Branch (1496:20): [True: 0, False: 0]
  ------------------
 1497|      0|                {
 1498|       |                    /* gap's frame_num is before Current frame_num in
 1499|       |                     decode order */
 1500|      0|                    i4_start_frm -= i4_max_frame_num;
 1501|      0|                }
 1502|      0|                ps_dpb_mgr->ai4_gaps_start_frm_num[i] = i4_start_frm;
 1503|      0|                i4_end_frm = ps_dpb_mgr->ai4_gaps_end_frm_num[i];
 1504|       |
 1505|      0|                if(i4_end_frm > i4_cur_frame_num)
  ------------------
  |  Branch (1505:20): [True: 0, False: 0]
  ------------------
 1506|      0|                {
 1507|       |                    /* gap's frame_num is before Current frame_num in
 1508|       |                     decode order */
 1509|      0|                    i4_end_frm -= i4_max_frame_num;
 1510|      0|                }
 1511|      0|                ps_dpb_mgr->ai4_gaps_end_frm_num[i] = i4_end_frm;
 1512|      0|            }
 1513|      0|        }
 1514|      0|    }
 1515|   133k|}
ih264d_update_qp:
 1530|   273k|{
 1531|   273k|    WORD32 i_temp;
 1532|   273k|    i_temp = (ps_dec->u1_qp + i1_qp + 52) % 52;
 1533|       |
 1534|   273k|    if((i_temp < 0) || (i_temp > 51) || (i1_qp < -26) || (i1_qp > 25))
  ------------------
  |  Branch (1534:8): [True: 0, False: 273k]
  |  Branch (1534:24): [True: 0, False: 273k]
  |  Branch (1534:41): [True: 0, False: 273k]
  |  Branch (1534:58): [True: 0, False: 273k]
  ------------------
 1535|      0|        return ERROR_INV_RANGE_QP_T;
 1536|       |
 1537|   273k|    ps_dec->u1_qp = i_temp;
 1538|   273k|    ps_dec->u1_qp_y_rem6 = ps_dec->u1_qp % 6;
 1539|   273k|    ps_dec->u1_qp_y_div6 = ps_dec->u1_qp / 6;
 1540|   273k|    i_temp = CLIP3(0, 51, ps_dec->u1_qp + ps_dec->ps_cur_pps->i1_chroma_qp_index_offset);
  ------------------
  |  |   77|   273k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 3.64k, False: 270k]
  |  |  |  Branch (77:54): [True: 812, False: 269k]
  |  |  ------------------
  ------------------
 1541|   273k|    ps_dec->u1_qp_u_rem6 = MOD(gau1_ih264d_qp_scale_cr[12 + i_temp], 6);
  ------------------
  |  |   64|   273k|#define MOD(x,y) ((x)%(y))
  ------------------
 1542|   273k|    ps_dec->u1_qp_u_div6 = DIV(gau1_ih264d_qp_scale_cr[12 + i_temp], 6);
  ------------------
  |  |   65|   273k|#define DIV(x,y) ((x)/(y))
  ------------------
 1543|       |
 1544|   273k|    i_temp = CLIP3(0, 51, ps_dec->u1_qp + ps_dec->ps_cur_pps->i1_second_chroma_qp_index_offset);
  ------------------
  |  |   77|   273k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 1.65k, False: 272k]
  |  |  |  Branch (77:54): [True: 6.62k, False: 265k]
  |  |  ------------------
  ------------------
 1545|   273k|    ps_dec->u1_qp_v_rem6 = MOD(gau1_ih264d_qp_scale_cr[12 + i_temp], 6);
  ------------------
  |  |   64|   273k|#define MOD(x,y) ((x)%(y))
  ------------------
 1546|   273k|    ps_dec->u1_qp_v_div6 = DIV(gau1_ih264d_qp_scale_cr[12 + i_temp], 6);
  ------------------
  |  |   65|   273k|#define DIV(x,y) ((x)/(y))
  ------------------
 1547|       |
 1548|   273k|    ps_dec->pu2_quant_scale_y =
 1549|   273k|                    gau2_ih264_iquant_scale_4x4[ps_dec->u1_qp_y_rem6];
 1550|   273k|    ps_dec->pu2_quant_scale_u =
 1551|   273k|                    gau2_ih264_iquant_scale_4x4[ps_dec->u1_qp_u_rem6];
 1552|   273k|    ps_dec->pu2_quant_scale_v =
 1553|   273k|                    gau2_ih264_iquant_scale_4x4[ps_dec->u1_qp_v_rem6];
 1554|   273k|    return OK;
  ------------------
  |  |  114|   273k|#define OK        0
  ------------------
 1555|   273k|}
ih264d_create_pic_buffers:
 1750|  24.4k|{
 1751|  24.4k|    struct pic_buffer_t *ps_pic_buf;
 1752|  24.4k|    UWORD8 i;
 1753|  24.4k|    UWORD32 u4_luma_size, u4_chroma_size;
 1754|  24.4k|    UWORD8 u1_frm = ps_dec->ps_cur_sps->u1_frame_mbs_only_flag;
 1755|  24.4k|    WORD32 j;
 1756|  24.4k|    UWORD8 *pu1_buf;
 1757|       |
 1758|  24.4k|    ps_pic_buf = ps_dec->ps_pic_buf_base;
 1759|  24.4k|    ih264_disp_mgr_init((disp_mgr_t *)ps_dec->pv_disp_buf_mgr);
 1760|  24.4k|    ih264_buf_mgr_init((buf_mgr_t *)ps_dec->pv_pic_buf_mgr);
 1761|  24.4k|    u4_luma_size = ps_dec->u2_frm_wd_y * ps_dec->u2_frm_ht_y;
 1762|  24.4k|    u4_chroma_size = ps_dec->u2_frm_wd_uv * ps_dec->u2_frm_ht_uv;
 1763|       |
 1764|  24.4k|    {
 1765|  24.4k|        if(ps_dec->u4_share_disp_buf == 1)
  ------------------
  |  Branch (1765:12): [True: 0, False: 24.4k]
  ------------------
 1766|      0|        {
 1767|       |            /* In case of buffers getting shared between application and library
 1768|       |             there is no need of reference memtabs. Instead of setting the i4_size
 1769|       |             to zero, it is reduced to a small i4_size to ensure that changes
 1770|       |             in the code are minimal */
 1771|      0|            if((ps_dec->u1_chroma_format == IV_YUV_420SP_UV)
  ------------------
  |  Branch (1771:16): [True: 0, False: 0]
  ------------------
 1772|      0|                            || (ps_dec->u1_chroma_format == IV_YUV_420SP_VU)
  ------------------
  |  Branch (1772:32): [True: 0, False: 0]
  ------------------
 1773|      0|                            || (ps_dec->u1_chroma_format == IV_YUV_420P))
  ------------------
  |  Branch (1773:32): [True: 0, False: 0]
  ------------------
 1774|      0|            {
 1775|      0|                u4_luma_size = 64;
 1776|      0|            }
 1777|       |
 1778|      0|            if(ps_dec->u1_chroma_format == IV_YUV_420SP_UV)
  ------------------
  |  Branch (1778:16): [True: 0, False: 0]
  ------------------
 1779|      0|            {
 1780|      0|                u4_chroma_size = 64;
 1781|      0|            }
 1782|       |
 1783|      0|        }
 1784|  24.4k|    }
 1785|       |
 1786|  24.4k|    pu1_buf = ps_dec->pu1_pic_buf_base;
 1787|       |
 1788|       |    /* Allocate memory for refernce buffers */
 1789|   442k|    for(i = 0; i < u1_num_of_buf; i++)
  ------------------
  |  Branch (1789:16): [True: 418k, False: 24.4k]
  ------------------
 1790|   418k|    {
 1791|   418k|        UWORD32 u4_offset;
 1792|   418k|        WORD32 buf_ret;
 1793|   418k|        UWORD8 *pu1_luma, *pu1_chroma;
 1794|   418k|        void *pv_mem_ctxt = ps_dec->pv_mem_ctxt;
 1795|       |
 1796|   418k|        pu1_luma = pu1_buf;
 1797|   418k|        pu1_buf += ALIGN64(u4_luma_size);
  ------------------
  |  |   48|   418k|#define ALIGN64(x)  ((((x) + 63) >> 6) << 6)
  ------------------
 1798|   418k|        pu1_chroma = pu1_buf;
 1799|   418k|        pu1_buf += ALIGN64(u4_chroma_size);
  ------------------
  |  |   48|   418k|#define ALIGN64(x)  ((((x) + 63) >> 6) << 6)
  ------------------
 1800|       |
 1801|       |        /* Offset to the start of the pic from the top left corner of the frame
 1802|       |         buffer */
 1803|       |
 1804|   418k|        if((0 == ps_dec->u4_share_disp_buf)
  ------------------
  |  Branch (1804:12): [True: 418k, False: 0]
  ------------------
 1805|      0|                        || (NULL == ps_dec->disp_bufs[i].buf[0]))
  ------------------
  |  Branch (1805:28): [True: 0, False: 0]
  ------------------
 1806|   418k|        {
 1807|   418k|            UWORD32 pad_len_h, pad_len_v;
 1808|       |
 1809|   418k|            u4_offset = ps_dec->u2_frm_wd_y * (PAD_LEN_Y_V << 1) + PAD_LEN_Y_H;
  ------------------
  |  |  572|   418k|#define PAD_LEN_Y_V                   20
  ------------------
                          u4_offset = ps_dec->u2_frm_wd_y * (PAD_LEN_Y_V << 1) + PAD_LEN_Y_H;
  ------------------
  |  |  571|   418k|#define PAD_LEN_Y_H                   32
  ------------------
 1810|   418k|            ps_pic_buf->pu1_buf1 = (UWORD8 *)(pu1_luma) + u4_offset;
 1811|       |
 1812|   418k|            pad_len_h = MAX(PAD_LEN_UV_H, (PAD_LEN_Y_H >> 1));
  ------------------
  |  |   60|   418k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [Folded, False: 418k]
  |  |  ------------------
  ------------------
 1813|   418k|            pad_len_v = MAX(PAD_LEN_UV_V, PAD_LEN_Y_V);
  ------------------
  |  |   60|   418k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [Folded, False: 418k]
  |  |  ------------------
  ------------------
 1814|       |
 1815|   418k|            u4_offset = ps_dec->u2_frm_wd_uv * pad_len_v + pad_len_h;
 1816|       |
 1817|   418k|            ps_pic_buf->pu1_buf2 = (UWORD8 *)(pu1_chroma) + u4_offset;
 1818|   418k|            ps_pic_buf->pu1_buf3 = (UWORD8 *)(NULL) + u4_offset;
 1819|       |
 1820|   418k|        }
 1821|      0|        else
 1822|      0|        {
 1823|      0|            UWORD32 pad_len_h, pad_len_v;
 1824|      0|            u4_offset = ps_dec->u2_frm_wd_y * (PAD_LEN_Y_V << 1) + PAD_LEN_Y_H;
  ------------------
  |  |  572|      0|#define PAD_LEN_Y_V                   20
  ------------------
                          u4_offset = ps_dec->u2_frm_wd_y * (PAD_LEN_Y_V << 1) + PAD_LEN_Y_H;
  ------------------
  |  |  571|      0|#define PAD_LEN_Y_H                   32
  ------------------
 1825|      0|            ps_pic_buf->pu1_buf1 = (UWORD8 *)ps_dec->disp_bufs[i].buf[0]
 1826|      0|                            + u4_offset;
 1827|       |
 1828|      0|            ps_dec->disp_bufs[i].u4_ofst[0] = u4_offset;
 1829|       |
 1830|      0|            if(ps_dec->u1_chroma_format == IV_YUV_420P)
  ------------------
  |  Branch (1830:16): [True: 0, False: 0]
  ------------------
 1831|      0|            {
 1832|      0|                pad_len_h = MAX(PAD_LEN_UV_H * YUV420SP_FACTOR,
  ------------------
  |  |   60|      0|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 0, Folded]
  |  |  ------------------
  ------------------
 1833|      0|                                (PAD_LEN_Y_H >> 1));
 1834|      0|                pad_len_v = MAX(PAD_LEN_UV_V, PAD_LEN_Y_V);
  ------------------
  |  |   60|      0|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1835|       |
 1836|      0|                u4_offset = ps_dec->u2_frm_wd_uv * pad_len_v + pad_len_h;
 1837|      0|                ps_pic_buf->pu1_buf2 = (UWORD8 *)(pu1_chroma) + u4_offset;
 1838|      0|                ps_pic_buf->pu1_buf3 = (UWORD8 *)(NULL) + u4_offset;
 1839|       |
 1840|      0|                ps_dec->disp_bufs[i].u4_ofst[1] = u4_offset;
 1841|      0|                ps_dec->disp_bufs[i].u4_ofst[2] = u4_offset;
 1842|       |
 1843|      0|            }
 1844|      0|            else
 1845|      0|            {
 1846|      0|                pad_len_h = MAX(PAD_LEN_UV_H * YUV420SP_FACTOR,
  ------------------
  |  |   60|      0|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 0, Folded]
  |  |  ------------------
  ------------------
 1847|      0|                                (PAD_LEN_Y_H >> 1));
 1848|      0|                pad_len_v = MAX(PAD_LEN_UV_V, PAD_LEN_Y_V);
  ------------------
  |  |   60|      0|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1849|       |
 1850|      0|                u4_offset = ps_dec->u2_frm_wd_uv * pad_len_v + pad_len_h;
 1851|      0|                ps_pic_buf->pu1_buf2 = (UWORD8 *)(ps_dec->disp_bufs[i].buf[1])
 1852|      0|                                + u4_offset;
 1853|      0|                ps_pic_buf->pu1_buf3 = (UWORD8 *)(ps_dec->disp_bufs[i].buf[1])
 1854|      0|                                + u4_offset;
 1855|       |
 1856|      0|                ps_dec->disp_bufs[i].u4_ofst[1] = u4_offset;
 1857|      0|                ps_dec->disp_bufs[i].u4_ofst[2] = u4_offset;
 1858|      0|            }
 1859|      0|        }
 1860|       |
 1861|   418k|        ps_pic_buf->u2_frm_ht_y = ps_dec->u2_frm_ht_y;
 1862|   418k|        ps_pic_buf->u2_frm_ht_uv = ps_dec->u2_frm_ht_uv;
 1863|   418k|        ps_pic_buf->u2_frm_wd_y = ps_dec->u2_frm_wd_y;
 1864|   418k|        ps_pic_buf->u2_frm_wd_uv = ps_dec->u2_frm_wd_uv;
 1865|       |
 1866|   418k|        ps_pic_buf->u1_pic_buf_id = i;
 1867|       |
 1868|   418k|        buf_ret = ih264_buf_mgr_add((buf_mgr_t *)ps_dec->pv_pic_buf_mgr,
 1869|   418k|                                    ps_pic_buf, i);
 1870|   418k|        if(0 != buf_ret)
  ------------------
  |  Branch (1870:12): [True: 0, False: 418k]
  ------------------
 1871|      0|        {
 1872|      0|            ps_dec->i4_error_code = ERROR_BUF_MGR;
 1873|      0|            return ERROR_BUF_MGR;
 1874|      0|        }
 1875|       |
 1876|   418k|        ps_dec->apv_buf_id_pic_buf_map[i] = (void *)ps_pic_buf;
 1877|   418k|        ps_pic_buf++;
 1878|   418k|    }
 1879|       |
 1880|  24.4k|    if(1 == ps_dec->u4_share_disp_buf)
  ------------------
  |  Branch (1880:8): [True: 0, False: 24.4k]
  ------------------
 1881|      0|    {
 1882|      0|        for(i = 0; i < u1_num_of_buf; i++)
  ------------------
  |  Branch (1882:20): [True: 0, False: 0]
  ------------------
 1883|      0|            ps_dec->u4_disp_buf_mapping[i] = 1;
 1884|      0|    }
 1885|  24.4k|    return OK;
  ------------------
  |  |  114|  24.4k|#define OK        0
  ------------------
 1886|  24.4k|}
ih264d_allocate_dynamic_bufs:
 1903|  24.4k|{
 1904|  24.4k|    struct MemReq s_MemReq;
 1905|  24.4k|    struct MemBlock *p_MemBlock;
 1906|       |
 1907|  24.4k|    pred_info_t *ps_pred_frame;
 1908|  24.4k|    dec_mb_info_t *ps_frm_mb_info;
 1909|  24.4k|    dec_slice_struct_t *ps_dec_slice_buf;
 1910|  24.4k|    UWORD8 *pu1_dec_mb_map, *pu1_recon_mb_map;
 1911|  24.4k|    UWORD16 *pu2_slice_num_map;
 1912|       |
 1913|  24.4k|    WORD16 *pi16_res_coeff;
 1914|  24.4k|    WORD16 i16_status = 0;
 1915|  24.4k|    UWORD8 uc_frmOrFld = (1 - ps_dec->ps_cur_sps->u1_frame_mbs_only_flag);
 1916|  24.4k|    UWORD16 u4_luma_wd = ps_dec->u2_frm_wd_y;
 1917|  24.4k|    UWORD16 u4_chroma_wd = ps_dec->u2_frm_wd_uv;
 1918|  24.4k|    WORD8 c_i = 0;
 1919|  24.4k|    dec_seq_params_t *ps_sps = ps_dec->ps_cur_sps;
 1920|  24.4k|    UWORD32 u4_total_mbs = ps_sps->u4_total_num_of_mbs << uc_frmOrFld;
 1921|  24.4k|    UWORD32 u4_wd_mbs = ps_dec->u2_frm_wd_in_mbs;
 1922|  24.4k|    UWORD32 u4_ht_mbs = ps_dec->u2_frm_ht_in_mbs;
 1923|  24.4k|    UWORD32 u4_blk_wd;
 1924|  24.4k|    UWORD32 ui_size = 0;
 1925|  24.4k|    UWORD32 u4_int_scratch_size = 0, u4_ref_pred_size = 0;
 1926|  24.4k|    UWORD8 *pu1_buf;
 1927|  24.4k|    WORD32 num_entries;
 1928|  24.4k|    WORD32 size;
 1929|  24.4k|    void *pv_buf;
 1930|  24.4k|    UWORD32 u4_num_bufs;
 1931|  24.4k|    UWORD32 u4_luma_size, u4_chroma_size;
 1932|  24.4k|    void *pv_mem_ctxt = ps_dec->pv_mem_ctxt;
 1933|       |
 1934|  24.4k|    size = u4_total_mbs;
 1935|  24.4k|    pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
 1936|  24.4k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  24.4k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 24.4k]
  |  |  ------------------
  ------------------
 1937|  24.4k|    memset(pv_buf, 0, size);
 1938|  24.4k|    ps_dec->pu1_dec_mb_map = pv_buf;
 1939|       |
 1940|  24.4k|    size = u4_total_mbs;
 1941|  24.4k|    pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
 1942|  24.4k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  24.4k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 24.4k]
  |  |  ------------------
  ------------------
 1943|  24.4k|    memset(pv_buf, 0, size);
 1944|  24.4k|    ps_dec->pu1_recon_mb_map = pv_buf;
 1945|       |
 1946|  24.4k|    size = u4_total_mbs * sizeof(UWORD16);
 1947|  24.4k|    pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
 1948|  24.4k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  24.4k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 24.4k]
  |  |  ------------------
  ------------------
 1949|  24.4k|    memset(pv_buf, 0, size);
 1950|  24.4k|    ps_dec->pu2_slice_num_map = pv_buf;
 1951|       |
 1952|       |    /************************************************************/
 1953|       |    /* Post allocation Initialisations                          */
 1954|       |    /************************************************************/
 1955|  24.4k|    ps_dec->ps_parse_cur_slice = &(ps_dec->ps_dec_slice_buf[0]);
 1956|  24.4k|    ps_dec->ps_decode_cur_slice = &(ps_dec->ps_dec_slice_buf[0]);
 1957|  24.4k|    ps_dec->ps_computebs_cur_slice = &(ps_dec->ps_dec_slice_buf[0]);
 1958|       |
 1959|  24.4k|    ps_dec->ps_pred_start = ps_dec->ps_pred;
 1960|       |
 1961|  24.4k|    size = sizeof(parse_pmbarams_t) * (ps_dec->u4_recon_mb_grp);
 1962|  24.4k|    pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
 1963|  24.4k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  24.4k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 24.4k]
  |  |  ------------------
  ------------------
 1964|  24.4k|    memset(pv_buf, 0, size);
 1965|  24.4k|    ps_dec->ps_parse_mb_data = pv_buf;
 1966|       |
 1967|  24.4k|    size = sizeof(parse_part_params_t)
 1968|  24.4k|                        * ((ps_dec->u4_recon_mb_grp) << 4);
 1969|  24.4k|    pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
 1970|  24.4k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  24.4k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 24.4k]
  |  |  ------------------
  ------------------
 1971|  24.4k|    memset(pv_buf, 0, size);
 1972|  24.4k|    ps_dec->ps_parse_part_params = pv_buf;
 1973|       |
 1974|  24.4k|    size = ((u4_wd_mbs * sizeof(deblkmb_neighbour_t)) << uc_frmOrFld);
 1975|  24.4k|    pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
 1976|  24.4k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  24.4k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 24.4k]
  |  |  ------------------
  ------------------
 1977|  24.4k|    memset(pv_buf, 0, size);
 1978|  24.4k|    ps_dec->ps_deblk_top_mb = pv_buf;
 1979|       |
 1980|  24.4k|    size = ((sizeof(ctxt_inc_mb_info_t))
 1981|  24.4k|                        * (((u4_wd_mbs + 1) << uc_frmOrFld) + 1));
 1982|  24.4k|    pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
 1983|  24.4k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  24.4k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 24.4k]
  |  |  ------------------
  ------------------
 1984|  24.4k|    memset(pv_buf, 0, size);
 1985|  24.4k|    ps_dec->p_ctxt_inc_mb_map = pv_buf;
 1986|       |
 1987|       |    /* 0th entry of CtxtIncMbMap will be always be containing default values
 1988|       |     for CABAC context representing MB not available */
 1989|  24.4k|    ps_dec->p_ctxt_inc_mb_map += 1;
 1990|       |
 1991|  24.4k|    size = (sizeof(mv_pred_t) * ps_dec->u4_recon_mb_grp
 1992|  24.4k|                        * 16);
 1993|  24.4k|    pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
 1994|  24.4k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  24.4k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 24.4k]
  |  |  ------------------
  ------------------
 1995|  24.4k|    memset(pv_buf, 0, size);
 1996|  24.4k|    ps_dec->ps_mv_p[0] = pv_buf;
 1997|       |
 1998|  24.4k|    size = (sizeof(mv_pred_t) * ps_dec->u4_recon_mb_grp
 1999|  24.4k|                        * 16);
 2000|  24.4k|    pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2001|  24.4k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  24.4k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 24.4k]
  |  |  ------------------
  ------------------
 2002|  24.4k|    memset(pv_buf, 0, size);
 2003|  24.4k|    ps_dec->ps_mv_p[1] = pv_buf;
 2004|       |
 2005|  24.4k|    {
 2006|  24.4k|        UWORD8 i;
 2007|   122k|        for(i = 0; i < MV_SCRATCH_BUFS; i++)
  ------------------
  |  |   63|   122k|#define MV_SCRATCH_BUFS             4
  ------------------
  |  Branch (2007:20): [True: 97.7k, False: 24.4k]
  ------------------
 2008|  97.7k|        {
 2009|  97.7k|            size = (sizeof(mv_pred_t)
 2010|  97.7k|                            * ps_dec->u4_recon_mb_grp * 4);
 2011|  97.7k|            pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2012|  97.7k|            RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  97.7k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 97.7k]
  |  |  ------------------
  ------------------
 2013|  97.7k|            memset(pv_buf, 0, size);
 2014|  97.7k|            ps_dec->ps_mv_top_p[i] = pv_buf;
 2015|  97.7k|        }
 2016|  24.4k|    }
 2017|       |
 2018|  24.4k|    size = sizeof(UWORD8) * ((u4_wd_mbs + 2) * MB_SIZE) * 2;
  ------------------
  |  |  554|  24.4k|#define MB_SIZE             16
  ------------------
 2019|  24.4k|    pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2020|  24.4k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  24.4k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 24.4k]
  |  |  ------------------
  ------------------
 2021|  24.4k|    ps_dec->pu1_y_intra_pred_line = pv_buf;
 2022|  24.4k|    memset(ps_dec->pu1_y_intra_pred_line, 0, size);
 2023|  24.4k|    ps_dec->pu1_y_intra_pred_line += MB_SIZE;
  ------------------
  |  |  554|  24.4k|#define MB_SIZE             16
  ------------------
 2024|       |
 2025|  24.4k|    size = sizeof(UWORD8) * ((u4_wd_mbs + 2) * MB_SIZE) * 2;
  ------------------
  |  |  554|  24.4k|#define MB_SIZE             16
  ------------------
 2026|  24.4k|    pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2027|  24.4k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  24.4k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 24.4k]
  |  |  ------------------
  ------------------
 2028|  24.4k|    ps_dec->pu1_u_intra_pred_line = pv_buf;
 2029|  24.4k|    memset(ps_dec->pu1_u_intra_pred_line, 0, size);
 2030|  24.4k|    ps_dec->pu1_u_intra_pred_line += MB_SIZE;
  ------------------
  |  |  554|  24.4k|#define MB_SIZE             16
  ------------------
 2031|       |
 2032|  24.4k|    size = sizeof(UWORD8) * ((u4_wd_mbs + 2) * MB_SIZE) * 2;
  ------------------
  |  |  554|  24.4k|#define MB_SIZE             16
  ------------------
 2033|  24.4k|    pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2034|  24.4k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  24.4k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 24.4k]
  |  |  ------------------
  ------------------
 2035|  24.4k|    ps_dec->pu1_v_intra_pred_line = pv_buf;
 2036|  24.4k|    memset(ps_dec->pu1_v_intra_pred_line, 0, size);
 2037|  24.4k|    ps_dec->pu1_v_intra_pred_line += MB_SIZE;
  ------------------
  |  |  554|  24.4k|#define MB_SIZE             16
  ------------------
 2038|       |
 2039|  24.4k|    if(ps_dec->u1_separate_parse)
  ------------------
  |  Branch (2039:8): [True: 7.98k, False: 16.4k]
  ------------------
 2040|  7.98k|    {
 2041|       |        /* Needs one extra row of info, to hold top row data */
 2042|  7.98k|        size = sizeof(mb_neigbour_params_t)
 2043|  7.98k|                        * 2 * ((u4_wd_mbs + 2) * (u4_ht_mbs + 1));
 2044|  7.98k|    }
 2045|  16.4k|    else
 2046|  16.4k|    {
 2047|  16.4k|        size = sizeof(mb_neigbour_params_t)
 2048|  16.4k|                        * 2 * ((u4_wd_mbs + 2) << uc_frmOrFld);
 2049|  16.4k|    }
 2050|  24.4k|    pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2051|  24.4k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  24.4k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 24.4k]
  |  |  ------------------
  ------------------
 2052|       |
 2053|  24.4k|    ps_dec->ps_nbr_mb_row = pv_buf;
 2054|  24.4k|    memset(ps_dec->ps_nbr_mb_row, 0, size);
 2055|       |
 2056|       |    /* Allocate deblock MB info */
 2057|  24.4k|    size = (u4_total_mbs + u4_wd_mbs) * sizeof(deblk_mb_t);
 2058|       |
 2059|  24.4k|    pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2060|  24.4k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  24.4k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 24.4k]
  |  |  ------------------
  ------------------
 2061|  24.4k|    ps_dec->ps_deblk_pic = pv_buf;
 2062|       |
 2063|  24.4k|    memset(ps_dec->ps_deblk_pic, 0, size);
 2064|       |
 2065|       |    /* Allocate frame level mb info */
 2066|  24.4k|    size = sizeof(dec_mb_info_t) * u4_total_mbs;
 2067|  24.4k|    pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2068|  24.4k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  24.4k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 24.4k]
  |  |  ------------------
  ------------------
 2069|  24.4k|    ps_dec->ps_frm_mb_info = pv_buf;
 2070|  24.4k|    memset(ps_dec->ps_frm_mb_info, 0, size);
 2071|       |
 2072|       |    /* Allocate memory for slice headers dec_slice_struct_t */
 2073|  24.4k|    num_entries = MAX_FRAMES;
  ------------------
  |  |  600|  24.4k|#define MAX_FRAMES              16
  ------------------
 2074|  24.4k|    if((1 >= ps_dec->ps_cur_sps->u1_num_ref_frames) &&
  ------------------
  |  Branch (2074:8): [True: 20.1k, False: 4.27k]
  ------------------
 2075|  20.1k|        (0 == ps_dec->i4_display_delay))
  ------------------
  |  Branch (2075:9): [True: 0, False: 20.1k]
  ------------------
 2076|      0|    {
 2077|      0|        num_entries = 1;
 2078|      0|    }
 2079|  24.4k|    num_entries = ((2 * num_entries) + 1);
 2080|  24.4k|    num_entries *= 2;
 2081|       |
 2082|  24.4k|    size = num_entries * sizeof(void *);
 2083|  24.4k|    size += PAD_MAP_IDX_POC * sizeof(void *);
  ------------------
  |  |  100|  24.4k|#define PAD_MAP_IDX_POC             (1)
  ------------------
 2084|  24.4k|    size *= u4_total_mbs;
 2085|  24.4k|    size += sizeof(dec_slice_struct_t) * u4_total_mbs;
 2086|  24.4k|    pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2087|  24.4k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  24.4k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 24.4k]
  |  |  ------------------
  ------------------
 2088|       |
 2089|  24.4k|    ps_dec->ps_dec_slice_buf = pv_buf;
 2090|  24.4k|    memset(ps_dec->ps_dec_slice_buf, 0, size);
 2091|  24.4k|    pu1_buf = (UWORD8 *)ps_dec->ps_dec_slice_buf;
 2092|  24.4k|    pu1_buf += sizeof(dec_slice_struct_t) * u4_total_mbs;
 2093|  24.4k|    ps_dec->pv_map_ref_idx_to_poc_buf = (void *)pu1_buf;
 2094|       |
 2095|       |    /* Allocate memory for packed pred info */
 2096|  24.4k|    num_entries = u4_total_mbs;
 2097|  24.4k|    num_entries *= 16 * 2;
 2098|       |
 2099|  24.4k|    size = sizeof(pred_info_pkd_t) * num_entries;
 2100|  24.4k|    pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2101|  24.4k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  24.4k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 24.4k]
  |  |  ------------------
  ------------------
 2102|  24.4k|    memset(pv_buf, 0, size);
 2103|  24.4k|    ps_dec->ps_pred_pkd = pv_buf;
 2104|       |
 2105|       |    /* Allocate memory for coeff data */
 2106|  24.4k|    size = MB_LUM_SIZE * sizeof(WORD16);
  ------------------
  |  |  563|  24.4k|#define MB_LUM_SIZE                   256
  ------------------
 2107|       |    /*For I16x16 MBs, 16 4x4 AC coeffs and 1 4x4 DC coeff TU blocks will be sent
 2108|       |    For all MBs along with 8 4x4 AC coeffs 2 2x2 DC coeff TU blocks will be sent
 2109|       |    So use 17 4x4 TU blocks for luma and 9 4x4 TU blocks for chroma */
 2110|  24.4k|    size += u4_total_mbs * (MAX(17 * sizeof(tu_sblk4x4_coeff_data_t),4 * sizeof(tu_blk8x8_coeff_data_t))
  ------------------
  |  |   60|  24.4k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 24.4k, Folded]
  |  |  ------------------
  ------------------
 2111|  24.4k|                                            + 9 * sizeof(tu_sblk4x4_coeff_data_t));
 2112|       |    //32 bytes for each mb to store u1_prev_intra4x4_pred_mode and u1_rem_intra4x4_pred_mode data
 2113|  24.4k|    size += u4_total_mbs * 32;
 2114|  24.4k|    pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2115|  24.4k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  24.4k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 24.4k]
  |  |  ------------------
  ------------------
 2116|  24.4k|    memset(pv_buf, 0, size);
 2117|       |
 2118|  24.4k|    ps_dec->pi2_coeff_data = pv_buf;
 2119|       |
 2120|  24.4k|    ps_dec->pv_pic_tu_coeff_data = (void *)(ps_dec->pi2_coeff_data + MB_LUM_SIZE);
  ------------------
  |  |  563|  24.4k|#define MB_LUM_SIZE                   256
  ------------------
 2121|       |
 2122|       |    /* Allocate MV bank buffer */
 2123|  24.4k|    {
 2124|  24.4k|        UWORD32 col_flag_buffer_size, mvpred_buffer_size;
 2125|       |
 2126|  24.4k|        col_flag_buffer_size = ((ps_dec->u2_pic_wd * ps_dec->u2_pic_ht) >> 4);
 2127|  24.4k|        mvpred_buffer_size = sizeof(mv_pred_t)
 2128|  24.4k|                        * ((ps_dec->u2_pic_wd * (ps_dec->u2_pic_ht + PAD_MV_BANK_ROW)) >> 4);
  ------------------
  |  |  576|  24.4k|#define PAD_MV_BANK_ROW             64
  ------------------
 2129|       |
 2130|  24.4k|        u4_num_bufs = ps_dec->ps_cur_sps->u1_num_ref_frames + 1;
 2131|       |
 2132|  24.4k|        u4_num_bufs = MIN(u4_num_bufs, ps_dec->u1_pic_bufs);
  ------------------
  |  |   61|  24.4k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 24.4k, False: 0]
  |  |  ------------------
  ------------------
 2133|  24.4k|        u4_num_bufs = MAX(u4_num_bufs, 2);
  ------------------
  |  |   60|  24.4k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 4.27k, False: 20.1k]
  |  |  ------------------
  ------------------
 2134|  24.4k|        size = ALIGN64(mvpred_buffer_size) + ALIGN64(col_flag_buffer_size);
  ------------------
  |  |   48|  24.4k|#define ALIGN64(x)  ((((x) + 63) >> 6) << 6)
  ------------------
                      size = ALIGN64(mvpred_buffer_size) + ALIGN64(col_flag_buffer_size);
  ------------------
  |  |   48|  24.4k|#define ALIGN64(x)  ((((x) + 63) >> 6) << 6)
  ------------------
 2135|  24.4k|        size *= u4_num_bufs;
 2136|  24.4k|        pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2137|  24.4k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  24.4k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 24.4k]
  |  |  ------------------
  ------------------
 2138|  24.4k|        memset(pv_buf, 0, size);
 2139|  24.4k|        ps_dec->pu1_mv_bank_buf_base = pv_buf;
 2140|  24.4k|    }
 2141|       |
 2142|       |    /* Allocate Pic buffer */
 2143|      0|    u4_luma_size = ps_dec->u2_frm_wd_y * ps_dec->u2_frm_ht_y;
 2144|  24.4k|    u4_chroma_size = ps_dec->u2_frm_wd_uv * ps_dec->u2_frm_ht_uv;
 2145|       |
 2146|  24.4k|    {
 2147|  24.4k|        if(ps_dec->u4_share_disp_buf == 1)
  ------------------
  |  Branch (2147:12): [True: 0, False: 24.4k]
  ------------------
 2148|      0|        {
 2149|       |            /* In case of buffers getting shared between application and library
 2150|       |             there is no need of reference memtabs. Instead of setting the i4_size
 2151|       |             to zero, it is reduced to a small i4_size to ensure that changes
 2152|       |             in the code are minimal */
 2153|      0|            if((ps_dec->u1_chroma_format == IV_YUV_420SP_UV)
  ------------------
  |  Branch (2153:16): [True: 0, False: 0]
  ------------------
 2154|      0|                            || (ps_dec->u1_chroma_format == IV_YUV_420SP_VU)
  ------------------
  |  Branch (2154:32): [True: 0, False: 0]
  ------------------
 2155|      0|                            || (ps_dec->u1_chroma_format == IV_YUV_420P))
  ------------------
  |  Branch (2155:32): [True: 0, False: 0]
  ------------------
 2156|      0|            {
 2157|      0|                u4_luma_size = 64;
 2158|      0|            }
 2159|       |
 2160|      0|            if(ps_dec->u1_chroma_format == IV_YUV_420SP_UV)
  ------------------
  |  Branch (2160:16): [True: 0, False: 0]
  ------------------
 2161|      0|            {
 2162|      0|                u4_chroma_size = 64;
 2163|      0|            }
 2164|       |
 2165|      0|        }
 2166|  24.4k|    }
 2167|       |
 2168|  24.4k|    size = ALIGN64(u4_luma_size) + ALIGN64(u4_chroma_size);
  ------------------
  |  |   48|  24.4k|#define ALIGN64(x)  ((((x) + 63) >> 6) << 6)
  ------------------
                  size = ALIGN64(u4_luma_size) + ALIGN64(u4_chroma_size);
  ------------------
  |  |   48|  24.4k|#define ALIGN64(x)  ((((x) + 63) >> 6) << 6)
  ------------------
 2169|  24.4k|    size *= ps_dec->u1_pic_bufs;
 2170|  24.4k|    pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2171|  24.4k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  24.4k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 24.4k]
  |  |  ------------------
  ------------------
 2172|  24.4k|    memset(pv_buf, 0, size);
 2173|  24.4k|    ps_dec->pu1_pic_buf_base = pv_buf;
 2174|       |
 2175|       |    /* Allocate memory for mb_info maps */
 2176|  24.4k|    if(ps_dec->u1_enable_mb_info)
  ------------------
  |  Branch (2176:8): [True: 0, False: 24.4k]
  ------------------
 2177|      0|    {
 2178|      0|        size = (u4_total_mbs << 2) * MAX_DISP_BUFS_NEW;
  ------------------
  |  |   76|      0|#define MAX_DISP_BUFS_NEW 64
  ------------------
 2179|       |
 2180|      0|        pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2181|      0|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|      0|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2182|      0|        memset(pv_buf, 0, size);
 2183|      0|        ps_dec->pu1_qp_map_base = pv_buf;
 2184|       |
 2185|      0|        pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2186|      0|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|      0|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2187|      0|        memset(pv_buf, 0, size);
 2188|      0|        ps_dec->pu1_mb_type_map_base = pv_buf;
 2189|      0|    }
 2190|       |
 2191|       |    /* Post allocation Increment Actions */
 2192|       |
 2193|       |    /***************************************************************************/
 2194|       |    /*Initialize cabac context pointers for every SE that has fixed contextIdx */
 2195|       |    /***************************************************************************/
 2196|  24.4k|    {
 2197|  24.4k|        bin_ctxt_model_t * const p_cabac_ctxt_table_t =
 2198|  24.4k|                        ps_dec->p_cabac_ctxt_table_t;
 2199|  24.4k|        bin_ctxt_model_t * * p_coeff_abs_level_minus1_t =
 2200|  24.4k|                        ps_dec->p_coeff_abs_level_minus1_t;
 2201|  24.4k|        bin_ctxt_model_t * * p_cbf_t = ps_dec->p_cbf_t;
 2202|       |
 2203|  24.4k|        ps_dec->p_mb_field_dec_flag_t = p_cabac_ctxt_table_t
 2204|  24.4k|                        + MB_FIELD_DECODING_FLAG;
 2205|  24.4k|        ps_dec->p_prev_intra4x4_pred_mode_flag_t = p_cabac_ctxt_table_t
 2206|  24.4k|                        + PREV_INTRA4X4_PRED_MODE_FLAG;
 2207|  24.4k|        ps_dec->p_rem_intra4x4_pred_mode_t = p_cabac_ctxt_table_t
 2208|  24.4k|                        + REM_INTRA4X4_PRED_MODE;
 2209|  24.4k|        ps_dec->p_intra_chroma_pred_mode_t = p_cabac_ctxt_table_t
 2210|  24.4k|                        + INTRA_CHROMA_PRED_MODE;
 2211|  24.4k|        ps_dec->p_mb_qp_delta_t = p_cabac_ctxt_table_t + MB_QP_DELTA;
 2212|  24.4k|        ps_dec->p_ref_idx_t = p_cabac_ctxt_table_t + REF_IDX;
 2213|  24.4k|        ps_dec->p_mvd_x_t = p_cabac_ctxt_table_t + MVD_X;
 2214|  24.4k|        ps_dec->p_mvd_y_t = p_cabac_ctxt_table_t + MVD_Y;
 2215|  24.4k|        p_cbf_t[0] = p_cabac_ctxt_table_t + CBF + 0;
 2216|  24.4k|        p_cbf_t[1] = p_cabac_ctxt_table_t + CBF + 4;
 2217|  24.4k|        p_cbf_t[2] = p_cabac_ctxt_table_t + CBF + 8;
 2218|  24.4k|        p_cbf_t[3] = p_cabac_ctxt_table_t + CBF + 12;
 2219|  24.4k|        p_cbf_t[4] = p_cabac_ctxt_table_t + CBF + 16;
 2220|  24.4k|        ps_dec->p_cbp_luma_t = p_cabac_ctxt_table_t + CBP_LUMA;
 2221|  24.4k|        ps_dec->p_cbp_chroma_t = p_cabac_ctxt_table_t + CBP_CHROMA;
 2222|       |
 2223|  24.4k|        p_coeff_abs_level_minus1_t[LUMA_DC_CTXCAT] = p_cabac_ctxt_table_t
  ------------------
  |  |   71|  24.4k|#define LUMA_DC_CTXCAT    0
  ------------------
 2224|  24.4k|                        + COEFF_ABS_LEVEL_MINUS1 + COEFF_ABS_LEVEL_CAT_0_OFFSET;
 2225|       |
 2226|  24.4k|        p_coeff_abs_level_minus1_t[LUMA_AC_CTXCAT] = p_cabac_ctxt_table_t
  ------------------
  |  |   72|  24.4k|#define LUMA_AC_CTXCAT    1
  ------------------
 2227|  24.4k|                        + COEFF_ABS_LEVEL_MINUS1 + COEFF_ABS_LEVEL_CAT_1_OFFSET;
 2228|       |
 2229|  24.4k|        p_coeff_abs_level_minus1_t[LUMA_4X4_CTXCAT] = p_cabac_ctxt_table_t
  ------------------
  |  |   73|  24.4k|#define LUMA_4X4_CTXCAT   2
  ------------------
 2230|  24.4k|                        + COEFF_ABS_LEVEL_MINUS1 + COEFF_ABS_LEVEL_CAT_2_OFFSET;
 2231|       |
 2232|  24.4k|        p_coeff_abs_level_minus1_t[CHROMA_DC_CTXCAT] = p_cabac_ctxt_table_t
  ------------------
  |  |   74|  24.4k|#define CHROMA_DC_CTXCAT  3
  ------------------
 2233|  24.4k|                        + COEFF_ABS_LEVEL_MINUS1 + COEFF_ABS_LEVEL_CAT_3_OFFSET;
 2234|       |
 2235|  24.4k|        p_coeff_abs_level_minus1_t[CHROMA_AC_CTXCAT] = p_cabac_ctxt_table_t
  ------------------
  |  |   75|  24.4k|#define CHROMA_AC_CTXCAT  4
  ------------------
 2236|  24.4k|                        + COEFF_ABS_LEVEL_MINUS1 + COEFF_ABS_LEVEL_CAT_4_OFFSET;
 2237|       |
 2238|  24.4k|        p_coeff_abs_level_minus1_t[LUMA_8X8_CTXCAT] = p_cabac_ctxt_table_t
  ------------------
  |  |   76|  24.4k|#define LUMA_8X8_CTXCAT   5
  ------------------
 2239|  24.4k|                        + COEFF_ABS_LEVEL_MINUS1_8X8
 2240|  24.4k|                        + COEFF_ABS_LEVEL_CAT_5_OFFSET;
 2241|       |
 2242|       |        /********************************************************/
 2243|       |        /* context for the high profile related syntax elements */
 2244|       |        /* This is maintained seperately in s_high_profile     */
 2245|       |        /********************************************************/
 2246|  24.4k|        {
 2247|       |
 2248|  24.4k|            ps_dec->s_high_profile.ps_transform8x8_flag = p_cabac_ctxt_table_t
 2249|  24.4k|                            + TRANSFORM_SIZE_8X8_FLAG;
 2250|       |
 2251|  24.4k|            ps_dec->s_high_profile.ps_sigcoeff_8x8_frame = p_cabac_ctxt_table_t
 2252|  24.4k|                            + SIGNIFICANT_COEFF_FLAG_8X8_FRAME;
 2253|       |
 2254|  24.4k|            ps_dec->s_high_profile.ps_last_sigcoeff_8x8_frame =
 2255|  24.4k|                            p_cabac_ctxt_table_t
 2256|  24.4k|                                            + LAST_SIGNIFICANT_COEFF_FLAG_8X8_FRAME;
 2257|       |
 2258|  24.4k|            ps_dec->s_high_profile.ps_coeff_abs_levelminus1 =
 2259|  24.4k|                            p_cabac_ctxt_table_t + COEFF_ABS_LEVEL_MINUS1_8X8;
 2260|       |
 2261|  24.4k|            ps_dec->s_high_profile.ps_sigcoeff_8x8_field = p_cabac_ctxt_table_t
 2262|  24.4k|                            + SIGNIFICANT_COEFF_FLAG_8X8_FIELD;
 2263|       |
 2264|  24.4k|            ps_dec->s_high_profile.ps_last_sigcoeff_8x8_field =
 2265|  24.4k|                            p_cabac_ctxt_table_t
 2266|  24.4k|                                            + LAST_SIGNIFICANT_COEFF_FLAG_8X8_FIELD;
 2267|  24.4k|        }
 2268|  24.4k|    }
 2269|  24.4k|    return (i16_status);
 2270|  24.4k|}
ih264d_free_dynamic_bufs:
 2287|   151k|{
 2288|   151k|    PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->pu1_bits_buf_dynamic);
  ------------------
  |  |   43|   151k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|   151k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 0, False: 151k]
  |  |  ------------------
  ------------------
 2289|       |
 2290|   151k|    PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->ps_deblk_pic);
  ------------------
  |  |   43|   151k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|   151k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 24.4k, False: 126k]
  |  |  ------------------
  ------------------
 2291|   151k|    PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->pu1_dec_mb_map);
  ------------------
  |  |   43|   151k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|   151k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 24.4k, False: 126k]
  |  |  ------------------
  ------------------
 2292|   151k|    PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->pu1_recon_mb_map);
  ------------------
  |  |   43|   151k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|   151k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 24.4k, False: 126k]
  |  |  ------------------
  ------------------
 2293|   151k|    PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->pu2_slice_num_map);
  ------------------
  |  |   43|   151k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|   151k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 24.4k, False: 126k]
  |  |  ------------------
  ------------------
 2294|   151k|    PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->ps_dec_slice_buf);
  ------------------
  |  |   43|   151k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|   151k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 24.4k, False: 126k]
  |  |  ------------------
  ------------------
 2295|   151k|    PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->ps_frm_mb_info);
  ------------------
  |  |   43|   151k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|   151k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 24.4k, False: 126k]
  |  |  ------------------
  ------------------
 2296|   151k|    PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->pi2_coeff_data);
  ------------------
  |  |   43|   151k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|   151k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 24.4k, False: 126k]
  |  |  ------------------
  ------------------
 2297|   151k|    PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->ps_parse_mb_data);
  ------------------
  |  |   43|   151k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|   151k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 24.4k, False: 126k]
  |  |  ------------------
  ------------------
 2298|   151k|    PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->ps_parse_part_params);
  ------------------
  |  |   43|   151k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|   151k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 24.4k, False: 126k]
  |  |  ------------------
  ------------------
 2299|   151k|    PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->ps_deblk_top_mb);
  ------------------
  |  |   43|   151k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|   151k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 24.4k, False: 126k]
  |  |  ------------------
  ------------------
 2300|       |
 2301|   151k|    if(ps_dec->p_ctxt_inc_mb_map)
  ------------------
  |  Branch (2301:8): [True: 24.4k, False: 126k]
  ------------------
 2302|  24.4k|    {
 2303|  24.4k|        ps_dec->p_ctxt_inc_mb_map -= 1;
 2304|  24.4k|        PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->p_ctxt_inc_mb_map);
  ------------------
  |  |   43|  24.4k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  24.4k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 24.4k, False: 0]
  |  |  ------------------
  ------------------
 2305|  24.4k|    }
 2306|       |
 2307|   151k|    PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->ps_mv_p[0]);
  ------------------
  |  |   43|   151k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|   151k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 24.4k, False: 126k]
  |  |  ------------------
  ------------------
 2308|   151k|    PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->ps_mv_p[1]);
  ------------------
  |  |   43|   151k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|   151k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 24.4k, False: 126k]
  |  |  ------------------
  ------------------
 2309|   151k|    PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->ps_pred_pkd);
  ------------------
  |  |   43|   151k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|   151k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 24.4k, False: 126k]
  |  |  ------------------
  ------------------
 2310|   151k|    {
 2311|   151k|        UWORD8 i;
 2312|   756k|        for(i = 0; i < MV_SCRATCH_BUFS; i++)
  ------------------
  |  |   63|   756k|#define MV_SCRATCH_BUFS             4
  ------------------
  |  Branch (2312:20): [True: 605k, False: 151k]
  ------------------
 2313|   605k|        {
 2314|   605k|            PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->ps_mv_top_p[i]);
  ------------------
  |  |   43|   605k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|   605k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 97.7k, False: 507k]
  |  |  ------------------
  ------------------
 2315|   605k|        }
 2316|   151k|    }
 2317|       |
 2318|   151k|    if(ps_dec->pu1_y_intra_pred_line)
  ------------------
  |  Branch (2318:8): [True: 24.4k, False: 126k]
  ------------------
 2319|  24.4k|    {
 2320|  24.4k|        ps_dec->pu1_y_intra_pred_line -= MB_SIZE;
  ------------------
  |  |  554|  24.4k|#define MB_SIZE             16
  ------------------
 2321|  24.4k|    }
 2322|   151k|    PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->pu1_y_intra_pred_line);
  ------------------
  |  |   43|   151k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|   151k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 24.4k, False: 126k]
  |  |  ------------------
  ------------------
 2323|       |
 2324|   151k|    if(ps_dec->pu1_u_intra_pred_line)
  ------------------
  |  Branch (2324:8): [True: 24.4k, False: 126k]
  ------------------
 2325|  24.4k|    {
 2326|  24.4k|        ps_dec->pu1_u_intra_pred_line -= MB_SIZE;
  ------------------
  |  |  554|  24.4k|#define MB_SIZE             16
  ------------------
 2327|  24.4k|    }
 2328|   151k|    PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->pu1_u_intra_pred_line);
  ------------------
  |  |   43|   151k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|   151k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 24.4k, False: 126k]
  |  |  ------------------
  ------------------
 2329|       |
 2330|   151k|    if(ps_dec->pu1_v_intra_pred_line)
  ------------------
  |  Branch (2330:8): [True: 24.4k, False: 126k]
  ------------------
 2331|  24.4k|    {
 2332|  24.4k|        ps_dec->pu1_v_intra_pred_line -= MB_SIZE;
  ------------------
  |  |  554|  24.4k|#define MB_SIZE             16
  ------------------
 2333|  24.4k|    }
 2334|   151k|    PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->pu1_v_intra_pred_line);
  ------------------
  |  |   43|   151k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|   151k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 24.4k, False: 126k]
  |  |  ------------------
  ------------------
 2335|   151k|    PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->ps_nbr_mb_row);
  ------------------
  |  |   43|   151k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|   151k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 24.4k, False: 126k]
  |  |  ------------------
  ------------------
 2336|   151k|    PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->pu1_mv_bank_buf_base);
  ------------------
  |  |   43|   151k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|   151k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 24.4k, False: 126k]
  |  |  ------------------
  ------------------
 2337|   151k|    PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->pu1_pic_buf_base);
  ------------------
  |  |   43|   151k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|   151k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 24.4k, False: 126k]
  |  |  ------------------
  ------------------
 2338|       |
 2339|       |    /* Free memory for mb_info maps */
 2340|   151k|    if(ps_dec->u1_enable_mb_info)
  ------------------
  |  Branch (2340:8): [True: 0, False: 151k]
  ------------------
 2341|      0|    {
 2342|      0|        PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->pu1_qp_map_base);
  ------------------
  |  |   43|      0|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|      0|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2343|       |        PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->pu1_mb_type_map_base);
  ------------------
  |  |   43|      0|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|      0|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2344|      0|    }
 2345|   151k|    return 0;
 2346|   151k|}
ih264d_create_mv_bank:
 2373|  24.4k|{
 2374|  24.4k|    UWORD8  i;
 2375|  24.4k|    UWORD32 col_flag_buffer_size, mvpred_buffer_size;
 2376|  24.4k|    UWORD8 *pu1_mv_buf_mgr_base, *pu1_mv_bank_base;
 2377|  24.4k|    col_mv_buf_t *ps_col_mv;
 2378|  24.4k|    mv_pred_t *ps_mv;
 2379|  24.4k|    UWORD8 *pu1_col_zero_flag_buf;
 2380|  24.4k|    dec_struct_t *ps_dec = (dec_struct_t *)pv_dec;
 2381|  24.4k|    WORD32 buf_ret;
 2382|  24.4k|    UWORD32 u4_num_bufs;
 2383|  24.4k|    UWORD8 *pu1_buf;
 2384|  24.4k|    WORD32 size;
 2385|  24.4k|    void *pv_mem_ctxt = ps_dec->pv_mem_ctxt;
 2386|       |
 2387|  24.4k|    col_flag_buffer_size = ((ui_width * ui_height) >> 4);
 2388|  24.4k|    mvpred_buffer_size = sizeof(mv_pred_t)
 2389|  24.4k|                    * ((ui_width * (ui_height + PAD_MV_BANK_ROW)) >> 4);
  ------------------
  |  |  576|  24.4k|#define PAD_MV_BANK_ROW             64
  ------------------
 2390|       |
 2391|  24.4k|    ih264_buf_mgr_init((buf_mgr_t *)ps_dec->pv_mv_buf_mgr);
 2392|       |
 2393|  24.4k|    ps_col_mv = ps_dec->ps_col_mv_base;
 2394|       |
 2395|  24.4k|    u4_num_bufs = ps_dec->ps_cur_sps->u1_num_ref_frames + 1;
 2396|       |
 2397|  24.4k|    u4_num_bufs = MIN(u4_num_bufs, ps_dec->u1_pic_bufs);
  ------------------
  |  |   61|  24.4k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 24.4k, False: 0]
  |  |  ------------------
  ------------------
 2398|  24.4k|    u4_num_bufs = MAX(u4_num_bufs, 2);
  ------------------
  |  |   60|  24.4k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 4.27k, False: 20.1k]
  |  |  ------------------
  ------------------
 2399|  24.4k|    pu1_buf = ps_dec->pu1_mv_bank_buf_base;
 2400|  93.0k|    for(i = 0 ; i < u4_num_bufs ; i++)
  ------------------
  |  Branch (2400:17): [True: 68.6k, False: 24.4k]
  ------------------
 2401|  68.6k|    {
 2402|  68.6k|        pu1_col_zero_flag_buf = pu1_buf;
 2403|  68.6k|        pu1_buf += ALIGN64(col_flag_buffer_size);
  ------------------
  |  |   48|  68.6k|#define ALIGN64(x)  ((((x) + 63) >> 6) << 6)
  ------------------
 2404|       |
 2405|  68.6k|        ps_mv = (mv_pred_t *)pu1_buf;
 2406|  68.6k|        pu1_buf += ALIGN64(mvpred_buffer_size);
  ------------------
  |  |   48|  68.6k|#define ALIGN64(x)  ((((x) + 63) >> 6) << 6)
  ------------------
 2407|       |
 2408|  68.6k|        memset(ps_mv, 0, ((ui_width * OFFSET_MV_BANK_ROW) >> 4) * sizeof(mv_pred_t));
  ------------------
  |  |   97|  68.6k|#define OFFSET_MV_BANK_ROW          ((PAD_MV_BANK_ROW)>>1)
  |  |  ------------------
  |  |  |  |  576|  68.6k|#define PAD_MV_BANK_ROW             64
  |  |  ------------------
  ------------------
 2409|  68.6k|        ps_mv += (ui_width*OFFSET_MV_BANK_ROW) >> 4;
  ------------------
  |  |   97|  68.6k|#define OFFSET_MV_BANK_ROW          ((PAD_MV_BANK_ROW)>>1)
  |  |  ------------------
  |  |  |  |  576|  68.6k|#define PAD_MV_BANK_ROW             64
  |  |  ------------------
  ------------------
 2410|       |
 2411|  68.6k|        ps_col_mv->pv_col_zero_flag = (void *)pu1_col_zero_flag_buf;
 2412|  68.6k|        ps_col_mv->pv_mv = (void *)ps_mv;
 2413|  68.6k|        buf_ret = ih264_buf_mgr_add((buf_mgr_t *)ps_dec->pv_mv_buf_mgr, ps_col_mv, i);
 2414|  68.6k|        if(0 != buf_ret)
  ------------------
  |  Branch (2414:12): [True: 0, False: 68.6k]
  ------------------
 2415|      0|        {
 2416|      0|            ps_dec->i4_error_code = ERROR_BUF_MGR;
 2417|      0|            return ERROR_BUF_MGR;
 2418|      0|        }
 2419|  68.6k|        ps_col_mv++;
 2420|  68.6k|    }
 2421|  24.4k|    return OK;
  ------------------
  |  |  114|  24.4k|#define OK        0
  ------------------
 2422|  24.4k|}
ih264d_unpack_coeff4x4_dc_4x4blk:
 2427|   173k|{
 2428|   173k|    UWORD16 u2_sig_coeff_map = ps_tu_4x4->u2_sig_coeff_map;
 2429|   173k|    WORD32 idx;
 2430|   173k|    WORD16 *pi2_coeff_data = &ps_tu_4x4->ai2_level[0];
 2431|       |
 2432|   437k|    while(u2_sig_coeff_map)
  ------------------
  |  Branch (2432:11): [True: 264k, False: 173k]
  ------------------
 2433|   264k|    {
 2434|   264k|        idx = CLZ(u2_sig_coeff_map);
 2435|       |
 2436|   264k|        idx = 31 - idx;
 2437|   264k|        RESET_BIT(u2_sig_coeff_map,idx);
  ------------------
  |  |  105|   264k|#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
  ------------------
 2438|       |
 2439|   264k|        idx = pu1_inv_scan[idx];
 2440|   264k|        pi2_out_coeff_data[idx] = *pi2_coeff_data++;
 2441|       |
 2442|   264k|    }
 2443|   173k|}

ih264d_parse_hrd_parametres:
   70|  8.67k|{
   71|  8.67k|    UWORD8 u1_index;
   72|  8.67k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
   73|  8.67k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
   74|       |
   75|  8.67k|    ps_hrd->u4_cpb_cnt = 1
   76|  8.67k|                    + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
   77|  8.67k|    if(ps_hrd->u4_cpb_cnt > 31)
  ------------------
  |  Branch (77:8): [True: 357, False: 8.31k]
  ------------------
   78|    357|        return ERROR_INV_SPS_PPS_T;
   79|  8.31k|    ps_hrd->u1_bit_rate_scale = ih264d_get_bits_h264(ps_bitstrm, 4);
   80|  8.31k|    ps_hrd->u1_cpb_size_scale = ih264d_get_bits_h264(ps_bitstrm, 4);
   81|       |
   82|  27.0k|    for(u1_index = 0; u1_index < (UWORD8)ps_hrd->u4_cpb_cnt; u1_index++)
  ------------------
  |  Branch (82:23): [True: 18.7k, False: 8.31k]
  ------------------
   83|  18.7k|    {
   84|  18.7k|        ps_hrd->u4_bit_rate[u1_index] = 1
   85|  18.7k|                        + ih264d_uev(pu4_bitstrm_ofst,
   86|  18.7k|                                     pu4_bitstrm_buf);
   87|  18.7k|        ps_hrd->u4_cpb_size[u1_index] = 1
   88|  18.7k|                        + ih264d_uev(pu4_bitstrm_ofst,
   89|  18.7k|                                     pu4_bitstrm_buf);
   90|  18.7k|        ps_hrd->u1_cbr_flag[u1_index] = ih264d_get_bits_h264(ps_bitstrm, 1);
   91|  18.7k|    }
   92|       |
   93|  8.31k|    ps_hrd->u1_initial_cpb_removal_delay = 1
   94|  8.31k|                    + ih264d_get_bits_h264(ps_bitstrm, 5);
   95|  8.31k|    ps_hrd->u1_cpb_removal_delay_length = 1
   96|  8.31k|                    + ih264d_get_bits_h264(ps_bitstrm, 5);
   97|  8.31k|    ps_hrd->u1_dpb_output_delay_length = 1
   98|  8.31k|                    + ih264d_get_bits_h264(ps_bitstrm, 5);
   99|  8.31k|    ps_hrd->u1_time_offset_length = ih264d_get_bits_h264(ps_bitstrm, 5);
  100|       |
  101|  8.31k|    return OK;
  ------------------
  |  |  114|  8.31k|#define OK        0
  ------------------
  102|  8.67k|}
ih264d_parse_vui_parametres:
  127|  10.7k|{
  128|  10.7k|    UWORD8 u4_bits;
  129|  10.7k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
  130|  10.7k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
  131|  10.7k|    WORD32 ret;
  132|       |
  133|  10.7k|    u4_bits = ih264d_get_bits_h264(ps_bitstrm, 1);
  134|  10.7k|    if(u4_bits)
  ------------------
  |  Branch (134:8): [True: 8.35k, False: 2.42k]
  ------------------
  135|  8.35k|    {
  136|  8.35k|        u4_bits = ih264d_get_bits_h264(ps_bitstrm, 8);
  137|  8.35k|        ps_vu4->u1_aspect_ratio_idc = (UWORD8)u4_bits;
  138|  8.35k|        if(VUI_EXTENDED_SAR == u4_bits)
  ------------------
  |  |   46|  8.35k|#define VUI_EXTENDED_SAR    255
  ------------------
  |  Branch (138:12): [True: 3.94k, False: 4.40k]
  ------------------
  139|  3.94k|        {
  140|  3.94k|            ps_vu4->u2_sar_width = ih264d_get_bits_h264(ps_bitstrm, 16);
  141|  3.94k|            ps_vu4->u2_sar_height = ih264d_get_bits_h264(ps_bitstrm, 16);
  142|  3.94k|        }
  143|  8.35k|    }
  144|       |
  145|  10.7k|    u4_bits = ih264d_get_bits_h264(ps_bitstrm, 1);
  146|  10.7k|    if(u4_bits)
  ------------------
  |  Branch (146:8): [True: 2.84k, False: 7.93k]
  ------------------
  147|  2.84k|    {
  148|  2.84k|        ps_vu4->u1_overscan_appropriate_flag = ih264d_get_bits_h264(
  149|  2.84k|                        ps_bitstrm, 1);
  150|  2.84k|    }
  151|  10.7k|    u4_bits = ih264d_get_bits_h264(ps_bitstrm, 1);
  152|       |    /* Initialize to unspecified (5 for video_format and
  153|       |       2 for colour_primaries, tfr_chars, matrix_coeffs  */
  154|  10.7k|    ps_vu4->u1_video_format = 5;
  155|  10.7k|    ps_vu4->u1_video_full_range_flag = 0;
  156|  10.7k|    ps_vu4->u1_colour_primaries = 2;
  157|  10.7k|    ps_vu4->u1_tfr_chars = 2;
  158|  10.7k|    ps_vu4->u1_matrix_coeffs = 2;
  159|       |
  160|  10.7k|    if(u4_bits)
  ------------------
  |  Branch (160:8): [True: 3.09k, False: 7.68k]
  ------------------
  161|  3.09k|    {
  162|  3.09k|        ps_vu4->u1_video_format = ih264d_get_bits_h264(ps_bitstrm, 3);
  163|  3.09k|        ps_vu4->u1_video_full_range_flag = ih264d_get_bits_h264(ps_bitstrm,
  164|  3.09k|                                                                1);
  165|  3.09k|        u4_bits = ih264d_get_bits_h264(ps_bitstrm, 1);
  166|  3.09k|        if(u4_bits)
  ------------------
  |  Branch (166:12): [True: 1.65k, False: 1.44k]
  ------------------
  167|  1.65k|        {
  168|  1.65k|            ps_vu4->u1_colour_primaries = ih264d_get_bits_h264(ps_bitstrm,
  169|  1.65k|                                                               8);
  170|  1.65k|            ps_vu4->u1_tfr_chars = ih264d_get_bits_h264(ps_bitstrm, 8);
  171|  1.65k|            ps_vu4->u1_matrix_coeffs = ih264d_get_bits_h264(ps_bitstrm, 8);
  172|  1.65k|        }
  173|  3.09k|    }
  174|       |
  175|  10.7k|    u4_bits = ih264d_get_bits_h264(ps_bitstrm, 1);
  176|  10.7k|    if(u4_bits)
  ------------------
  |  Branch (176:8): [True: 2.21k, False: 8.56k]
  ------------------
  177|  2.21k|    {
  178|  2.21k|        ps_vu4->u1_cr_top_field = ih264d_uev(pu4_bitstrm_ofst,
  179|  2.21k|                                             pu4_bitstrm_buf);
  180|  2.21k|        ps_vu4->u1_cr_bottom_field = ih264d_uev(pu4_bitstrm_ofst,
  181|  2.21k|                                                pu4_bitstrm_buf);
  182|  2.21k|    }
  183|       |
  184|  10.7k|    u4_bits = ih264d_get_bits_h264(ps_bitstrm, 1);
  185|  10.7k|    if(u4_bits)
  ------------------
  |  Branch (185:8): [True: 1.64k, False: 9.13k]
  ------------------
  186|  1.64k|    {
  187|  1.64k|        ps_vu4->u4_num_units_in_tick = ih264d_get_bits_h264(ps_bitstrm, 32);
  188|  1.64k|        ps_vu4->u4_time_scale = ih264d_get_bits_h264(ps_bitstrm, 32);
  189|  1.64k|        ps_vu4->u1_fixed_frame_rate_flag = ih264d_get_bits_h264(ps_bitstrm,
  190|  1.64k|                                                                1);
  191|  1.64k|    }
  192|       |
  193|  10.7k|    u4_bits = ih264d_get_bits_h264(ps_bitstrm, 1);
  194|  10.7k|    ps_vu4->u1_nal_hrd_params_present = u4_bits;
  195|  10.7k|    if(u4_bits)
  ------------------
  |  Branch (195:8): [True: 1.80k, False: 8.97k]
  ------------------
  196|  1.80k|    {
  197|  1.80k|        ret = ih264d_parse_hrd_parametres(&ps_vu4->s_nal_hrd, ps_bitstrm);
  198|  1.80k|        if(ret != OK)
  ------------------
  |  |  114|  1.80k|#define OK        0
  ------------------
  |  Branch (198:12): [True: 105, False: 1.69k]
  ------------------
  199|    105|            return ret;
  200|  1.80k|    }
  201|  10.6k|    u4_bits = ih264d_get_bits_h264(ps_bitstrm, 1);
  202|  10.6k|    ps_vu4->u1_vcl_hrd_params_present = u4_bits;
  203|  10.6k|    if(u4_bits)
  ------------------
  |  Branch (203:8): [True: 1.77k, False: 8.89k]
  ------------------
  204|  1.77k|    {
  205|  1.77k|        ret = ih264d_parse_hrd_parametres(&ps_vu4->s_vcl_hrd, ps_bitstrm);
  206|  1.77k|        if(ret != OK)
  ------------------
  |  |  114|  1.77k|#define OK        0
  ------------------
  |  Branch (206:12): [True: 116, False: 1.66k]
  ------------------
  207|    116|            return ret;
  208|  1.77k|    }
  209|       |
  210|  10.5k|    if(ps_vu4->u1_nal_hrd_params_present || u4_bits)
  ------------------
  |  Branch (210:8): [True: 1.68k, False: 8.87k]
  |  Branch (210:45): [True: 602, False: 8.27k]
  ------------------
  211|  2.28k|    {
  212|  2.28k|        ps_vu4->u1_low_delay_hrd_flag = ih264d_get_bits_h264(ps_bitstrm, 1);
  213|  2.28k|    }
  214|  10.5k|    ps_vu4->u1_pic_struct_present_flag = ih264d_get_bits_h264(ps_bitstrm, 1);
  215|       |
  216|  10.5k|    ps_vu4->u1_bitstream_restriction_flag = ih264d_get_bits_h264(ps_bitstrm, 1);
  217|       |
  218|  10.5k|    if(ps_vu4->u1_bitstream_restriction_flag)
  ------------------
  |  Branch (218:8): [True: 1.96k, False: 8.59k]
  ------------------
  219|  1.96k|    {
  220|  1.96k|        ps_vu4->u1_mv_over_pic_boundaries_flag = ih264d_get_bits_h264(
  221|  1.96k|                        ps_bitstrm, 1);
  222|  1.96k|        ps_vu4->u4_max_bytes_per_pic_denom = ih264d_uev(pu4_bitstrm_ofst,
  223|  1.96k|                                                        pu4_bitstrm_buf);
  224|  1.96k|        ps_vu4->u4_max_bits_per_mb_denom = ih264d_uev(pu4_bitstrm_ofst,
  225|  1.96k|                                                      pu4_bitstrm_buf);
  226|  1.96k|        ps_vu4->u4_log2_max_mv_length_horz = ih264d_uev(pu4_bitstrm_ofst,
  227|  1.96k|                                                        pu4_bitstrm_buf);
  228|  1.96k|        ps_vu4->u4_log2_max_mv_length_vert = ih264d_uev(pu4_bitstrm_ofst,
  229|  1.96k|                                                        pu4_bitstrm_buf);
  230|  1.96k|        ps_vu4->u4_num_reorder_frames = ih264d_uev(pu4_bitstrm_ofst,
  231|  1.96k|                                                   pu4_bitstrm_buf);
  232|  1.96k|        ps_vu4->u4_max_dec_frame_buffering = ih264d_uev(pu4_bitstrm_ofst,
  233|  1.96k|                                                        pu4_bitstrm_buf);
  234|  1.96k|        if((ps_vu4->u4_max_dec_frame_buffering > (H264_MAX_REF_PICS * 2)) ||
  ------------------
  |  |  534|  1.96k|#define H264_MAX_REF_PICS         16
  ------------------
  |  Branch (234:12): [True: 130, False: 1.83k]
  ------------------
  235|  1.83k|           (ps_vu4->u4_num_reorder_frames > ps_vu4->u4_max_dec_frame_buffering))
  ------------------
  |  Branch (235:12): [True: 134, False: 1.69k]
  ------------------
  236|    264|        {
  237|    264|            return ERROR_INV_SPS_PPS_T;
  238|    264|        }
  239|  1.96k|    }
  240|  8.59k|    else
  241|  8.59k|    {
  242|       |        /* Setting this to a large value if not present */
  243|  8.59k|        ps_vu4->u4_num_reorder_frames = 64;
  244|  8.59k|        ps_vu4->u4_max_dec_frame_buffering = 64;
  245|  8.59k|    }
  246|       |
  247|  10.2k|    return OK;
  ------------------
  |  |  114|  10.2k|#define OK        0
  ------------------
  248|  10.5k|}

isvcd_set_processor:
  979|  22.5k|{
  980|  22.5k|    isvcd_ctl_set_processor_ip_t *ps_ip;
  981|  22.5k|    isvcd_ctl_set_processor_op_t *ps_op;
  982|  22.5k|    UWORD8 u1_layer_id;
  983|  22.5k|    svc_dec_lyr_struct_t *ps_codec;
  984|  22.5k|    svc_dec_ctxt_t *ps_svcd_ctxt;
  985|  22.5k|    ps_svcd_ctxt = (svc_dec_ctxt_t *) dec_hdl->pv_codec_handle;
  986|       |
  987|  22.5k|    ps_ip = (isvcd_ctl_set_processor_ip_t *) pv_api_ip;
  988|  22.5k|    ps_op = (isvcd_ctl_set_processor_op_t *) pv_api_op;
  989|       |
  990|  22.5k|    ps_svcd_ctxt->e_processor_arch = (IVD_ARCH_T) ps_ip->u4_arch;
  991|  22.5k|    ps_svcd_ctxt->e_processor_soc = (IVD_SOC_T) ps_ip->u4_soc;
  992|       |
  993|  90.3k|    for(u1_layer_id = 0; u1_layer_id < MAX_NUM_RES_LYRS; u1_layer_id++)
  ------------------
  |  |   94|  90.3k|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (993:26): [True: 67.7k, False: 22.5k]
  ------------------
  994|  67.7k|    {
  995|  67.7k|        ps_codec = &ps_svcd_ctxt->ps_svc_dec_lyr[u1_layer_id];
  996|  67.7k|        ps_codec->s_dec.e_processor_arch = (IVD_ARCH_T) ps_ip->u4_arch;
  997|  67.7k|        ps_codec->s_dec.e_processor_soc = (IVD_SOC_T) ps_ip->u4_soc;
  998|       |
  999|  67.7k|        isvcd_init_function_ptr(ps_codec);
 1000|  67.7k|    }
 1001|       |
 1002|  22.5k|    ps_op->u4_error_code = 0;
 1003|  22.5k|    return IV_SUCCESS;
 1004|  22.5k|}
isvcd_init_decoder:
 1029|  83.4k|{
 1030|  83.4k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec = (svc_dec_lyr_struct_t *) ps_dec_svc_lyr_params;
 1031|  83.4k|    dec_struct_t *ps_dec;
 1032|  83.4k|    dec_slice_params_t *ps_cur_slice;
 1033|  83.4k|    pocstruct_t *ps_prev_poc, *ps_cur_poc;
 1034|  83.4k|    size_t size;
 1035|  83.4k|    ps_dec = &ps_svc_lyr_dec->s_dec;
 1036|       |
 1037|  83.4k|    size = sizeof(pred_info_t) * 2 * 32;
 1038|  83.4k|    memset(ps_dec->ps_pred, 0, size);
 1039|       |
 1040|  83.4k|    size = sizeof(disp_mgr_t);
 1041|  83.4k|    memset(ps_dec->pv_disp_buf_mgr, 0, size);
 1042|       |
 1043|  83.4k|    size = ih264_buf_mgr_size();
 1044|  83.4k|    memset(ps_dec->pv_pic_buf_mgr, 0, size);
 1045|       |
 1046|  83.4k|    size = sizeof(dec_err_status_t);
 1047|  83.4k|    memset(ps_dec->ps_dec_err_status, 0, size);
 1048|       |
 1049|  83.4k|    size = sizeof(sei);
 1050|  83.4k|    memset(ps_dec->ps_sei, 0, size);
 1051|       |
 1052|  83.4k|    size = sizeof(sei);
 1053|  83.4k|    memset(ps_dec->ps_sei_parse, 0, size);
 1054|       |
 1055|  83.4k|    size = sizeof(dpb_commands_t);
 1056|  83.4k|    memset(ps_dec->ps_dpb_cmds, 0, size);
 1057|       |
 1058|  83.4k|    size = sizeof(dec_bit_stream_t);
 1059|  83.4k|    memset(ps_dec->ps_bitstrm, 0, size);
 1060|       |
 1061|  83.4k|    size = sizeof(dec_slice_params_t);
 1062|  83.4k|    memset(ps_dec->ps_cur_slice, 0, size);
 1063|       |
 1064|  83.4k|    size = MAX(sizeof(dec_seq_params_t), sizeof(dec_pic_params_t));
  ------------------
  |  |   60|  83.4k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 83.4k, Folded]
  |  |  ------------------
  ------------------
 1065|  83.4k|    memset(ps_dec->pv_scratch_sps_pps, 0, size);
 1066|       |
 1067|  83.4k|    size = sizeof(dec_svc_seq_params_t);
 1068|  83.4k|    memset(ps_svc_lyr_dec->pv_scratch_subset_sps, 0, size);
 1069|       |
 1070|  83.4k|    size = sizeof(ctxt_inc_mb_info_t);
 1071|  83.4k|    memset(ps_dec->ps_left_mb_ctxt_info, 0, size);
 1072|       |
 1073|  83.4k|    size = (sizeof(neighbouradd_t) << 2);
 1074|  83.4k|    memset(ps_dec->ps_left_mvpred_addr, 0, size);
 1075|       |
 1076|  83.4k|    size = ih264_buf_mgr_size();
 1077|  83.4k|    memset(ps_dec->pv_mv_buf_mgr, 0, size);
 1078|       |
 1079|       |    /* Free any dynamic buffers that are allocated */
 1080|  83.4k|    isvcd_free_dynamic_bufs(ps_svc_lyr_dec);
 1081|       |
 1082|  83.4k|    isvcd_init_dpb_ref_bufs(ps_dec);
 1083|       |
 1084|  83.4k|    ps_cur_slice = ps_dec->ps_cur_slice;
 1085|  83.4k|    ps_dec->init_done = 0;
 1086|       |
 1087|  83.4k|    ps_dec->u4_num_cores = 1;
 1088|  83.4k|    ps_dec->u2_pic_ht = ps_dec->u2_pic_wd = 0;
 1089|       |
 1090|  83.4k|    ps_dec->u1_separate_parse = DEFAULT_SEPARATE_PARSE;
  ------------------
  |  |  540|  83.4k|#define DEFAULT_SEPARATE_PARSE (H264_DEFAULT_NUM_CORES == 2)? 1 :0
  |  |  ------------------
  |  |  |  |  539|  83.4k|#define H264_DEFAULT_NUM_CORES 1
  |  |  ------------------
  |  |  |  Branch (540:32): [Folded, False: 83.4k]
  |  |  ------------------
  ------------------
 1091|  83.4k|    ps_dec->u4_app_disable_deblk_frm = 0;
 1092|  83.4k|    ps_dec->i4_degrade_type = 0;
 1093|  83.4k|    ps_dec->i4_degrade_pics = 0;
 1094|       |
 1095|       |    /* Initialization of function pointers ih264d_deblock_picture function*/
 1096|  83.4k|    ps_dec->p_DeblockPicture[0] = ih264d_deblock_picture_non_mbaff;
 1097|  83.4k|    ps_dec->p_DeblockPicture[1] = ih264d_deblock_picture_mbaff;
 1098|       |
 1099|  83.4k|    ps_dec->s_cab_dec_env.pv_codec_handle = ps_dec;
 1100|  83.4k|    ps_dec->u4_num_fld_in_frm = 0;
 1101|  83.4k|    ps_dec->ps_dpb_mgr->pv_codec_handle = ps_dec;
 1102|       |
 1103|       |    /* Initialize the sei validity u4_flag with zero indiacting sei is not valid*/
 1104|  83.4k|    ps_dec->ps_sei->u1_is_valid = 0;
 1105|       |
 1106|       |    /* decParams Initializations */
 1107|  83.4k|    ps_dec->ps_cur_pps = NULL;
 1108|  83.4k|    ps_dec->ps_cur_sps = NULL;
 1109|  83.4k|    ps_dec->u1_init_dec_flag = 0;
 1110|  83.4k|    ps_dec->u1_first_slice_in_stream = 1;
 1111|  83.4k|    ps_dec->u1_last_pic_not_decoded = 0;
 1112|  83.4k|    ps_dec->u4_app_disp_width = 0;
 1113|  83.4k|    ps_dec->i4_header_decoded = 0;
 1114|  83.4k|    ps_dec->u4_total_frames_decoded = 0;
 1115|       |
 1116|  83.4k|    ps_dec->i4_error_code = 0;
 1117|  83.4k|    ps_dec->i4_content_type = IV_CONTENTTYPE_NA;
 1118|  83.4k|    ps_dec->ps_cur_slice->u1_mbaff_frame_flag = 0;
 1119|       |
 1120|  83.4k|    ps_dec->ps_dec_err_status->u1_err_flag = ACCEPT_ALL_PICS;
  ------------------
  |  |  601|  83.4k|#define ACCEPT_ALL_PICS   (0x00)
  ------------------
 1121|  83.4k|    ps_dec->ps_dec_err_status->u1_cur_pic_type = PIC_TYPE_UNKNOWN;
  ------------------
  |  |  608|  83.4k|#define PIC_TYPE_UNKNOWN  (0xFF)
  ------------------
 1122|  83.4k|    ps_dec->ps_dec_err_status->u4_frm_sei_sync = SYNC_FRM_DEFAULT;
  ------------------
  |  |  610|  83.4k|#define SYNC_FRM_DEFAULT  (0xFFFFFFFF)
  ------------------
 1123|  83.4k|    ps_dec->ps_dec_err_status->u4_cur_frm = INIT_FRAME;
  ------------------
  |  |  611|  83.4k|#define INIT_FRAME        (0xFFFFFF)
  ------------------
 1124|  83.4k|    ps_dec->ps_dec_err_status->u1_pic_aud_i = PIC_TYPE_UNKNOWN;
  ------------------
  |  |  608|  83.4k|#define PIC_TYPE_UNKNOWN  (0xFF)
  ------------------
 1125|       |
 1126|  83.4k|    ps_dec->u1_pr_sl_type = 0xFF;
 1127|  83.4k|    ps_dec->u2_mbx = 0xffff;
 1128|  83.4k|    ps_dec->u2_mby = 0;
 1129|  83.4k|    ps_dec->u4_total_mbs_coded = 0;
 1130|       |
 1131|       |    /* POC initializations */
 1132|  83.4k|    ps_prev_poc = &ps_dec->s_prev_pic_poc;
 1133|  83.4k|    ps_cur_poc = &ps_dec->s_cur_pic_poc;
 1134|  83.4k|    ps_prev_poc->i4_pic_order_cnt_lsb = ps_cur_poc->i4_pic_order_cnt_lsb = 0;
 1135|  83.4k|    ps_prev_poc->i4_pic_order_cnt_msb = ps_cur_poc->i4_pic_order_cnt_msb = 0;
 1136|  83.4k|    ps_prev_poc->i4_delta_pic_order_cnt_bottom = ps_cur_poc->i4_delta_pic_order_cnt_bottom = 0;
 1137|  83.4k|    ps_prev_poc->i4_delta_pic_order_cnt[0] = ps_cur_poc->i4_delta_pic_order_cnt[0] = 0;
 1138|  83.4k|    ps_prev_poc->i4_delta_pic_order_cnt[1] = ps_cur_poc->i4_delta_pic_order_cnt[1] = 0;
 1139|  83.4k|    ps_prev_poc->u1_mmco_equalto5 = ps_cur_poc->u1_mmco_equalto5 = 0;
 1140|  83.4k|    ps_prev_poc->i4_top_field_order_count = ps_cur_poc->i4_top_field_order_count = 0;
 1141|  83.4k|    ps_prev_poc->i4_bottom_field_order_count = ps_cur_poc->i4_bottom_field_order_count = 0;
 1142|  83.4k|    ps_prev_poc->u1_bot_field = ps_cur_poc->u1_bot_field = 0;
 1143|  83.4k|    ps_prev_poc->u1_mmco_equalto5 = ps_cur_poc->u1_mmco_equalto5 = 0;
 1144|  83.4k|    ps_prev_poc->i4_prev_frame_num_ofst = ps_cur_poc->i4_prev_frame_num_ofst = 0;
 1145|  83.4k|    ps_cur_slice->u1_mmco_equalto5 = 0;
 1146|  83.4k|    ps_cur_slice->u2_frame_num = 0;
 1147|       |
 1148|  83.4k|    ps_dec->i4_max_poc = 0;
 1149|  83.4k|    ps_dec->i4_prev_max_display_seq = 0;
 1150|  83.4k|    ps_dec->u4_recon_mb_grp = 4;
 1151|  83.4k|    ps_dec->i4_reorder_depth = -1;
 1152|       |
 1153|       |    /* Field PIC initializations */
 1154|  83.4k|    ps_dec->u1_second_field = 0;
 1155|  83.4k|    ps_dec->s_prev_seq_params.u1_eoseq_pending = 0;
 1156|       |
 1157|       |    /* Set the cropping parameters as zero */
 1158|  83.4k|    ps_dec->u2_crop_offset_y = 0;
 1159|  83.4k|    ps_dec->u2_crop_offset_uv = 0;
 1160|       |
 1161|       |    /* The Initial Frame Rate Info is not Present */
 1162|  83.4k|    ps_dec->i4_vui_frame_rate = -1;
 1163|  83.4k|    ps_dec->i4_pic_type = NA_SLICE;
  ------------------
  |  |  367|  83.4k|#define NA_SLICE -1
  ------------------
 1164|  83.4k|    ps_dec->i4_frametype = IV_NA_FRAME;
 1165|  83.4k|    ps_dec->i4_content_type = IV_CONTENTTYPE_NA;
 1166|       |
 1167|  83.4k|    ps_dec->u1_res_changed = 0;
 1168|       |
 1169|  83.4k|    ps_dec->u1_frame_decoded_flag = 0;
 1170|       |
 1171|       |    /* Set the default frame seek mask mode */
 1172|  83.4k|    ps_dec->u4_skip_frm_mask = SKIP_NONE;
  ------------------
  |  |  375|  83.4k|#define SKIP_NONE  (0x0)
  ------------------
 1173|       |
 1174|       |    /********************************************************/
 1175|       |    /* Initialize CAVLC residual decoding function pointers */
 1176|       |    /********************************************************/
 1177|  83.4k|    ps_dec->pf_cavlc_4x4res_block[0] = ih264d_cavlc_4x4res_block_totalcoeff_1;
 1178|  83.4k|    ps_dec->pf_cavlc_4x4res_block[1] = ih264d_cavlc_4x4res_block_totalcoeff_2to10;
 1179|  83.4k|    ps_dec->pf_cavlc_4x4res_block[2] = ih264d_cavlc_4x4res_block_totalcoeff_11to16;
 1180|       |
 1181|  83.4k|    ps_dec->pf_cavlc_parse4x4coeff[0] = ih264d_cavlc_parse4x4coeff_n0to7;
 1182|  83.4k|    ps_dec->pf_cavlc_parse4x4coeff[1] = ih264d_cavlc_parse4x4coeff_n8;
 1183|       |
 1184|  83.4k|    ps_dec->pf_cavlc_parse_8x8block[0] = ih264d_cavlc_parse_8x8block_none_available;
 1185|  83.4k|    ps_dec->pf_cavlc_parse_8x8block[1] = ih264d_cavlc_parse_8x8block_left_available;
 1186|  83.4k|    ps_dec->pf_cavlc_parse_8x8block[2] = ih264d_cavlc_parse_8x8block_top_available;
 1187|  83.4k|    ps_dec->pf_cavlc_parse_8x8block[3] = ih264d_cavlc_parse_8x8block_both_available;
 1188|       |
 1189|       |    /***************************************************************************/
 1190|       |    /* Initialize Bs calculation function pointers for P and B, 16x16/non16x16 */
 1191|       |    /***************************************************************************/
 1192|  83.4k|    ps_dec->pf_fill_bs1[0][0] = ih264d_fill_bs1_16x16mb_pslice;
 1193|  83.4k|    ps_dec->pf_fill_bs1[0][1] = ih264d_fill_bs1_non16x16mb_pslice;
 1194|       |
 1195|  83.4k|    ps_dec->pf_fill_bs1[1][0] = ih264d_fill_bs1_16x16mb_bslice;
 1196|  83.4k|    ps_dec->pf_fill_bs1[1][1] = ih264d_fill_bs1_non16x16mb_bslice;
 1197|       |
 1198|  83.4k|    ps_dec->pf_fill_bs_xtra_left_edge[0] = ih264d_fill_bs_xtra_left_edge_cur_frm;
 1199|  83.4k|    ps_dec->pf_fill_bs_xtra_left_edge[1] = ih264d_fill_bs_xtra_left_edge_cur_fld;
 1200|       |
 1201|       |    /* Initialize Reference Pic Buffers */
 1202|  83.4k|    ih264d_init_ref_bufs(ps_dec->ps_dpb_mgr);
 1203|       |
 1204|  83.4k|    ps_dec->u2_prv_frame_num = 0;
 1205|  83.4k|    ps_dec->u1_top_bottom_decoded = 0;
 1206|  83.4k|    ps_dec->u1_dangling_field = 0;
 1207|       |
 1208|  83.4k|    ps_dec->s_cab_dec_env.cabac_table = gau4_ih264d_cabac_table;
 1209|       |
 1210|  83.4k|    ps_dec->pu1_left_mv_ctxt_inc = ps_dec->u1_left_mv_ctxt_inc_arr[0];
 1211|  83.4k|    ps_dec->pi1_left_ref_idx_ctxt_inc = &ps_dec->i1_left_ref_idx_ctx_inc_arr[0][0];
 1212|  83.4k|    ps_dec->pu1_left_yuv_dc_csbp = &ps_dec->u1_yuv_dc_csbp_topmb;
 1213|       |
 1214|       |    /* ! */
 1215|       |    /* Initializing flush frame u4_flag */
 1216|  83.4k|    ps_dec->u1_flushfrm = 0;
 1217|       |
 1218|  83.4k|    ps_dec->s_cab_dec_env.pv_codec_handle = (void *) ps_dec;
 1219|  83.4k|    ps_dec->ps_bitstrm->pv_codec_handle = (void *) ps_dec;
 1220|  83.4k|    ps_dec->ps_cur_slice->pv_codec_handle = (void *) ps_dec;
 1221|  83.4k|    ps_dec->ps_dpb_mgr->pv_codec_handle = (void *) ps_dec;
 1222|       |
 1223|  83.4k|    memset(ps_dec->disp_bufs, 0, (MAX_DISP_BUFS_NEW) * sizeof(disp_buf_t));
  ------------------
  |  |   76|  83.4k|#define MAX_DISP_BUFS_NEW 64
  ------------------
 1224|  83.4k|    memset(ps_dec->u4_disp_buf_mapping, 0, (MAX_DISP_BUFS_NEW) * sizeof(UWORD32));
  ------------------
  |  |   76|  83.4k|#define MAX_DISP_BUFS_NEW 64
  ------------------
 1225|  83.4k|    memset(ps_dec->u4_disp_buf_to_be_freed, 0, (MAX_DISP_BUFS_NEW) * sizeof(UWORD32));
  ------------------
  |  |   76|  83.4k|#define MAX_DISP_BUFS_NEW 64
  ------------------
 1226|  83.4k|    memset(ps_dec->ps_cur_slice, 0, sizeof(dec_slice_params_t));
 1227|       |
 1228|  83.4k|    ih264d_init_arch(ps_dec);
 1229|  83.4k|    isvcd_init_function_ptr(ps_svc_lyr_dec);
 1230|  83.4k|    ps_dec->e_frm_out_mode = IVD_DISPLAY_FRAME_OUT;
 1231|  83.4k|    ps_dec->init_done = 1;
 1232|  83.4k|    ps_svc_lyr_dec->u1_layer_identifier = BASE_LAYER;
  ------------------
  |  |  108|  83.4k|#define BASE_LAYER 0
  ------------------
 1233|  83.4k|}
isvcd_nal_parse_ctxt_free:
 1257|  22.5k|{
 1258|  22.5k|    dec_struct_t *ps_dec;
 1259|  22.5k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec;
 1260|  22.5k|    void (*pf_aligned_free)(void *pv_mem_ctxt, void *pv_buf);
 1261|  22.5k|    void *pv_mem_ctxt;
 1262|  22.5k|    nal_parse_ctxt_t *ps_ctxt;
 1263|  22.5k|    ps_svc_lyr_dec = &ps_svcd_ctxt->ps_svc_dec_lyr[0];
 1264|  22.5k|    ps_dec = &ps_svc_lyr_dec->s_dec;
 1265|  22.5k|    pf_aligned_free = ps_dec->pf_aligned_free;
 1266|       |
 1267|  22.5k|    pv_mem_ctxt = ps_dec->pv_mem_ctxt;
 1268|  22.5k|    ps_ctxt = (nal_parse_ctxt_t *) ps_svcd_ctxt->pv_nal_parse_ctxt;
 1269|       |
 1270|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_ctxt->s_dqid_ctxt.ps_dqid_node);
 1271|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_ctxt->pv_nal_header_buf);
 1272|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_ctxt->pv_nal_unit);
 1273|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_svcd_ctxt->pv_vcl_nal_buff);
 1274|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_svcd_ctxt->pv_non_vcl_nal_buff);
 1275|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_svcd_ctxt->pv_nal_parse_ctxt);
 1276|  22.5k|}
isvcd_residual_resample_ctxt_free:
 1299|  22.5k|{
 1300|  22.5k|    dec_struct_t *ps_dec;
 1301|  22.5k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec;
 1302|  22.5k|    void (*pf_aligned_free)(void *pv_mem_ctxt, void *pv_buf);
 1303|  22.5k|    void *pv_mem_ctxt;
 1304|  22.5k|    residual_sampling_ctxt_t *ps_ctxt;
 1305|  22.5k|    ps_svc_lyr_dec = &ps_svcd_ctxt->ps_svc_dec_lyr[0];
 1306|  22.5k|    ps_dec = &ps_svc_lyr_dec->s_dec;
 1307|  22.5k|    pf_aligned_free = ps_dec->pf_aligned_free;
 1308|       |
 1309|  22.5k|    pv_mem_ctxt = ps_dec->pv_mem_ctxt;
 1310|  22.5k|    ps_ctxt = (residual_sampling_ctxt_t *) ps_svcd_ctxt->pv_residual_sample_ctxt;
 1311|       |
 1312|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_ctxt->pi2_refarray_buffer);
 1313|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_ctxt->pu1_ref_x_ptr_incr);
 1314|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_ctxt->as_res_lyrs[0].s_luma_map_ctxt.ps_x_offset_length);
 1315|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_ctxt->as_res_lyrs[0].s_luma_map_ctxt.ps_y_offset_length);
 1316|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_ctxt->as_res_lyrs[0].s_luma_map_ctxt.ps_x_pos_phase);
 1317|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_ctxt->as_res_lyrs[0].s_luma_map_ctxt.ps_y_pos_phase);
 1318|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_svcd_ctxt->pv_residual_sample_ctxt);
 1319|  22.5k|}
isvcd_intra_resample_ctxt_free:
 1342|  22.5k|{
 1343|  22.5k|    dec_struct_t *ps_dec;
 1344|  22.5k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec;
 1345|  22.5k|    void (*pf_aligned_free)(void *pv_mem_ctxt, void *pv_buf);
 1346|  22.5k|    void *pv_mem_ctxt;
 1347|  22.5k|    intra_sampling_ctxt_t *ps_ctxt;
 1348|  22.5k|    ps_svc_lyr_dec = &ps_svcd_ctxt->ps_svc_dec_lyr[0];
 1349|  22.5k|    ps_dec = &ps_svc_lyr_dec->s_dec;
 1350|  22.5k|    pf_aligned_free = ps_dec->pf_aligned_free;
 1351|       |
 1352|  22.5k|    pv_mem_ctxt = ps_dec->pv_mem_ctxt;
 1353|  22.5k|    ps_ctxt = (intra_sampling_ctxt_t *) ps_svcd_ctxt->pv_intra_sample_ctxt;
 1354|       |
 1355|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_ctxt->pu1_refarray_buffer);
 1356|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_ctxt->pu1_refarray_cb);
 1357|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_ctxt->pu1_refarray_cr);
 1358|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_ctxt->pi4_temp_interpolation_buffer);
 1359|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_ctxt->as_res_lyrs[0].s_luma_map_ctxt.ps_x_offset_length);
 1360|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_ctxt->as_res_lyrs[0].s_luma_map_ctxt.ps_y_offset_length);
 1361|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_ctxt->as_res_lyrs[0].s_luma_map_ctxt.ps_x_min_max);
 1362|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_ctxt->as_res_lyrs[0].s_luma_map_ctxt.ps_y_min_max);
 1363|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_ctxt->as_res_lyrs[0].s_luma_map_ctxt.ps_x_pos_phase);
 1364|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_ctxt->as_res_lyrs[0].s_luma_map_ctxt.ps_y_pos_phase);
 1365|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_ctxt->as_res_lyrs[0].s_luma_map_ctxt.pi2_xd_index);
 1366|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_ctxt->as_res_lyrs[0].s_luma_map_ctxt.pi2_yd_index);
 1367|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_ctxt->as_res_lyrs[0].s_luma_map_ctxt.pi2_ya_index);
 1368|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_ctxt->as_res_lyrs[0].s_luma_map_ctxt.ps_seg_lookup_horz);
 1369|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_ctxt->as_res_lyrs[0].s_luma_map_ctxt.ps_seg_lookup_vert);
 1370|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_ctxt->as_res_lyrs[0].s_luma_map_ctxt.pu1_refarray_x_idx);
 1371|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_svcd_ctxt->pv_intra_sample_ctxt);
 1372|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_svcd_ctxt->pv_ii_pred_ctxt);
 1373|  22.5k|}
isvcd_mode_mv_resample_ctxt_free:
 1395|  22.5k|{
 1396|  22.5k|    dec_struct_t *ps_dec;
 1397|  22.5k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec;
 1398|  22.5k|    void (*pf_aligned_free)(void *pv_mem_ctxt, void *pv_buf);
 1399|  22.5k|    void *pv_mem_ctxt;
 1400|  22.5k|    mode_motion_ctxt_t *ps_mode_motion;
 1401|       |
 1402|  22.5k|    ps_svc_lyr_dec = &ps_svcd_ctxt->ps_svc_dec_lyr[0];
 1403|  22.5k|    ps_dec = &ps_svc_lyr_dec->s_dec;
 1404|  22.5k|    pf_aligned_free = ps_dec->pf_aligned_free;
 1405|       |
 1406|  22.5k|    pv_mem_ctxt = ps_dec->pv_mem_ctxt;
 1407|  22.5k|    ps_mode_motion = (mode_motion_ctxt_t *) ps_svcd_ctxt->pv_mode_mv_sample_ctxt;
 1408|       |
 1409|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_mode_motion->ps_motion_pred_struct);
 1410|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_mode_motion->as_res_lyr_mem[0].pi2_ref_loc_x);
 1411|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_mode_motion->as_res_lyr_mem[0].pi2_ref_loc_y);
 1412|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_svcd_ctxt->pv_ref_lyr_offset);
 1413|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_svcd_ctxt->pv_mode_mv_sample_ctxt);
 1414|  22.5k|}
isvcd_free_static_bufs:
 1437|  22.5k|{
 1438|  22.5k|    dec_struct_t *ps_dec;
 1439|  22.5k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec;
 1440|       |
 1441|  22.5k|    UWORD8 u1_layer_id;
 1442|  22.5k|    svc_dec_ctxt_t *ps_svcd_ctxt;
 1443|       |
 1444|  22.5k|    void (*pf_aligned_free)(void *pv_mem_ctxt, void *pv_buf);
 1445|  22.5k|    void *pv_mem_ctxt;
 1446|       |
 1447|  22.5k|    ps_svcd_ctxt = (svc_dec_ctxt_t *) dec_hdl->pv_codec_handle;
 1448|       |
 1449|  22.5k|    isvcd_intra_resample_ctxt_free(ps_svcd_ctxt);
 1450|  22.5k|    isvcd_residual_resample_ctxt_free(ps_svcd_ctxt);
 1451|  22.5k|    isvcd_mode_mv_resample_ctxt_free(ps_svcd_ctxt);
 1452|  22.5k|    isvcd_nal_parse_ctxt_free(ps_svcd_ctxt);
 1453|       |
 1454|  90.3k|    for(u1_layer_id = 0; u1_layer_id < MAX_NUM_RES_LYRS; u1_layer_id++)
  ------------------
  |  |   94|  90.3k|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (1454:26): [True: 67.7k, False: 22.5k]
  ------------------
 1455|  67.7k|    {
 1456|  67.7k|        ps_svc_lyr_dec = &ps_svcd_ctxt->ps_svc_dec_lyr[u1_layer_id];
 1457|  67.7k|        ps_dec = &ps_svc_lyr_dec->s_dec;
 1458|  67.7k|        pf_aligned_free = ps_dec->pf_aligned_free;
 1459|  67.7k|        pv_mem_ctxt = ps_dec->pv_mem_ctxt;
 1460|       |
 1461|       |#ifdef KEEP_THREADS_ACTIVE
 1462|       |        /* Wait for threads */
 1463|       |        ps_dec->i4_break_threads = 1;
 1464|       |        if(ps_dec->u4_dec_thread_created)
 1465|       |        {
 1466|       |            ithread_mutex_lock(ps_dec->apv_proc_start_mutex[0]);
 1467|       |
 1468|       |            ps_dec->ai4_process_start[0] = PROC_START;
 1469|       |
 1470|       |            ithread_cond_signal(ps_dec->apv_proc_start_condition[0]);
 1471|       |
 1472|       |            ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[0]);
 1473|       |
 1474|       |            ithread_join(ps_dec->pv_dec_thread_handle, NULL);
 1475|       |
 1476|       |            ps_dec->u4_dec_thread_created = 0;
 1477|       |        }
 1478|       |
 1479|       |        if(ps_dec->u4_bs_deblk_thread_created)
 1480|       |        {
 1481|       |            ithread_mutex_lock(ps_dec->apv_proc_start_mutex[1]);
 1482|       |
 1483|       |            ps_dec->ai4_process_start[1] = PROC_START;
 1484|       |
 1485|       |            ithread_cond_signal(ps_dec->apv_proc_start_condition[1]);
 1486|       |
 1487|       |            ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[1]);
 1488|       |
 1489|       |            ithread_join(ps_dec->pv_bs_deblk_thread_handle, NULL);
 1490|       |
 1491|       |            ps_dec->u4_bs_deblk_thread_created = 0;
 1492|       |        }
 1493|       |
 1494|       |        // destroy mutex and condition variable for both the threads
 1495|       |        // 1. ih264d_decode_picture_thread
 1496|       |        // 2. ih264d_recon_deblk_thread
 1497|       |        {
 1498|       |            UWORD32 i;
 1499|       |            for(i = 0; i < 2; i++)
 1500|       |            {
 1501|       |                ithread_cond_destroy(ps_dec->apv_proc_start_condition[i]);
 1502|       |                ithread_cond_destroy(ps_dec->apv_proc_done_condition[i]);
 1503|       |
 1504|       |                ithread_mutex_destroy(ps_dec->apv_proc_start_mutex[i]);
 1505|       |                ithread_mutex_destroy(ps_dec->apv_proc_done_mutex[i]);
 1506|       |            }
 1507|       |        }
 1508|       |        PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->apv_proc_start_mutex[0]);
 1509|       |        PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->apv_proc_start_condition[0]);
 1510|       |#endif
 1511|  67.7k|        if(0 == u1_layer_id)
  ------------------
  |  Branch (1511:12): [True: 22.5k, False: 45.1k]
  ------------------
 1512|  22.5k|        {
 1513|  22.5k|            UWORD8 u1_sps_ctr;
 1514|  22.5k|            PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->ps_sps);
  ------------------
  |  |   43|  22.5k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  22.5k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 22.5k, False: 0]
  |  |  ------------------
  ------------------
 1515|  1.46M|            for(u1_sps_ctr = 0; u1_sps_ctr < (2 * MAX_NUM_SEQ_PARAMS); u1_sps_ctr++)
  ------------------
  |  |  521|  1.46M|#define MAX_NUM_SEQ_PARAMS 32
  ------------------
  |  Branch (1515:33): [True: 1.44M, False: 22.5k]
  ------------------
 1516|  1.44M|            {
 1517|  1.44M|                if(NULL != ps_svcd_ctxt->ps_subset_sps[u1_sps_ctr].s_sps_svc_ext.ps_svc_vui_ext)
  ------------------
  |  Branch (1517:20): [True: 391, False: 1.44M]
  ------------------
 1518|    391|                {
 1519|    391|                    PS_DEC_ALIGNED_FREE(
  ------------------
  |  |   43|    391|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|    391|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 391, False: 0]
  |  |  ------------------
  ------------------
 1520|    391|                        ps_dec,
 1521|    391|                        ps_svcd_ctxt->ps_subset_sps[u1_sps_ctr].s_sps_svc_ext.ps_svc_vui_ext);
 1522|    391|                }
 1523|  1.44M|            }
 1524|  22.5k|            PS_DEC_ALIGNED_FREE(ps_dec, ps_svc_lyr_dec->ps_subset_sps);
  ------------------
  |  |   43|  22.5k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  22.5k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 22.5k, False: 0]
  |  |  ------------------
  ------------------
 1525|  22.5k|            PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->ps_pps);
  ------------------
  |  |   43|  22.5k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  22.5k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 22.5k, False: 0]
  |  |  ------------------
  ------------------
 1526|  22.5k|            PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->ps_sei);
  ------------------
  |  |   43|  22.5k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  22.5k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 22.5k, False: 0]
  |  |  ------------------
  ------------------
 1527|  22.5k|            PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->ps_sei_parse);
  ------------------
  |  |   43|  22.5k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  22.5k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 22.5k, False: 0]
  |  |  ------------------
  ------------------
 1528|  22.5k|        }
 1529|       |
 1530|  67.7k|        PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->pv_dec_thread_handle);
  ------------------
  |  |   43|  67.7k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  67.7k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 67.7k, False: 0]
  |  |  ------------------
  ------------------
 1531|  67.7k|        PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->pv_bs_deblk_thread_handle);
  ------------------
  |  |   43|  67.7k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  67.7k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 67.7k, False: 0]
  |  |  ------------------
  ------------------
 1532|       |
 1533|  67.7k|        PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->ps_dpb_mgr);
  ------------------
  |  |   43|  67.7k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  67.7k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 67.7k, False: 0]
  |  |  ------------------
  ------------------
 1534|  67.7k|        PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->ps_pred);
  ------------------
  |  |   43|  67.7k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  67.7k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 67.7k, False: 0]
  |  |  ------------------
  ------------------
 1535|  67.7k|        PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->pv_disp_buf_mgr);
  ------------------
  |  |   43|  67.7k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  67.7k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 67.7k, False: 0]
  |  |  ------------------
  ------------------
 1536|  67.7k|        PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->ps_pic_buf_base);
  ------------------
  |  |   43|  67.7k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  67.7k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 67.7k, False: 0]
  |  |  ------------------
  ------------------
 1537|  67.7k|        PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->ps_dec_err_status);
  ------------------
  |  |   43|  67.7k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  67.7k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 67.7k, False: 0]
  |  |  ------------------
  ------------------
 1538|       |
 1539|  67.7k|        PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->ps_dpb_cmds);
  ------------------
  |  |   43|  67.7k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  67.7k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 67.7k, False: 0]
  |  |  ------------------
  ------------------
 1540|  67.7k|        PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->ps_bitstrm);
  ------------------
  |  |   43|  67.7k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  67.7k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 67.7k, False: 0]
  |  |  ------------------
  ------------------
 1541|  67.7k|        PS_DEC_ALIGNED_FREE(ps_dec, ps_svc_lyr_dec->ps_nal_svc_ext);
  ------------------
  |  |   43|  67.7k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  67.7k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 67.7k, False: 0]
  |  |  ------------------
  ------------------
 1542|  67.7k|        PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->ps_cur_slice);
  ------------------
  |  |   43|  67.7k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  67.7k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 67.7k, False: 0]
  |  |  ------------------
  ------------------
 1543|  67.7k|        PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->pv_scratch_sps_pps);
  ------------------
  |  |   43|  67.7k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  67.7k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 67.7k, False: 0]
  |  |  ------------------
  ------------------
 1544|  67.7k|        PS_DEC_ALIGNED_FREE(ps_dec, ps_svc_lyr_dec->pv_scratch_subset_sps);
  ------------------
  |  |   43|  67.7k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  67.7k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 67.7k, False: 0]
  |  |  ------------------
  ------------------
 1545|  67.7k|        PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->pu1_bits_buf_static);
  ------------------
  |  |   43|  67.7k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  67.7k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 67.7k, False: 0]
  |  |  ------------------
  ------------------
 1546|  67.7k|        PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->ppv_map_ref_idx_to_poc_base);
  ------------------
  |  |   43|  67.7k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  67.7k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 67.7k, False: 0]
  |  |  ------------------
  ------------------
 1547|  67.7k|        PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->p_cabac_ctxt_table_t);
  ------------------
  |  |   43|  67.7k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  67.7k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 67.7k, False: 0]
  |  |  ------------------
  ------------------
 1548|  67.7k|        PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->ps_left_mb_ctxt_info);
  ------------------
  |  |   43|  67.7k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  67.7k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 67.7k, False: 0]
  |  |  ------------------
  ------------------
 1549|  67.7k|        PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->pu1_ref_buff_base);
  ------------------
  |  |   43|  67.7k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  67.7k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 67.7k, False: 0]
  |  |  ------------------
  ------------------
 1550|  67.7k|        PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->pi2_pred1);
  ------------------
  |  |   43|  67.7k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  67.7k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 67.7k, False: 0]
  |  |  ------------------
  ------------------
 1551|  67.7k|        PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->pu1_temp_mc_buffer);
  ------------------
  |  |   43|  67.7k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  67.7k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 67.7k, False: 0]
  |  |  ------------------
  ------------------
 1552|  67.7k|        PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->pu1_init_dpb_base);
  ------------------
  |  |   43|  67.7k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  67.7k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 67.7k, False: 0]
  |  |  ------------------
  ------------------
 1553|  67.7k|        PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->pu4_mbaff_wt_mat);
  ------------------
  |  |   43|  67.7k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  67.7k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 67.7k, False: 0]
  |  |  ------------------
  ------------------
 1554|  67.7k|        PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->pu4_wts_ofsts_mat);
  ------------------
  |  |   43|  67.7k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  67.7k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 67.7k, False: 0]
  |  |  ------------------
  ------------------
 1555|  67.7k|        PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->ps_left_mvpred_addr);
  ------------------
  |  |   43|  67.7k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  67.7k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 67.7k, False: 0]
  |  |  ------------------
  ------------------
 1556|  67.7k|        PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->ps_col_mv_base);
  ------------------
  |  |   43|  67.7k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  67.7k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 67.7k, False: 0]
  |  |  ------------------
  ------------------
 1557|  67.7k|        PS_DEC_ALIGNED_FREE(ps_dec, ps_svc_lyr_dec->pu1_ii_resamp_buffer_luma);
  ------------------
  |  |   43|  67.7k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  67.7k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 67.7k, False: 0]
  |  |  ------------------
  ------------------
 1558|       |
 1559|  67.7k|        if(NULL != ps_dec->pv_pic_buf_mgr)
  ------------------
  |  Branch (1559:12): [True: 67.7k, False: 0]
  ------------------
 1560|  67.7k|        {
 1561|  67.7k|            if(u1_layer_id < ps_svcd_ctxt->u1_prev_num_res_layers)
  ------------------
  |  Branch (1561:16): [True: 37.3k, False: 30.3k]
  ------------------
 1562|  37.3k|            {
 1563|  37.3k|                if(((buf_mgr_t *) ps_dec->pv_pic_buf_mgr)->pv_mutex != NULL)
  ------------------
  |  Branch (1563:20): [True: 23.1k, False: 14.1k]
  ------------------
 1564|  23.1k|                    ih264_buf_mgr_free(ps_dec->pv_pic_buf_mgr);
 1565|  37.3k|            }
 1566|  67.7k|            PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->pv_pic_buf_mgr);
  ------------------
  |  |   43|  67.7k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  67.7k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 67.7k, False: 0]
  |  |  ------------------
  ------------------
 1567|  67.7k|        }
 1568|  67.7k|        if(NULL != ps_dec->pv_mv_buf_mgr)
  ------------------
  |  Branch (1568:12): [True: 67.7k, False: 0]
  ------------------
 1569|  67.7k|        {
 1570|  67.7k|            if(u1_layer_id < ps_svcd_ctxt->u1_prev_num_res_layers)
  ------------------
  |  Branch (1570:16): [True: 37.3k, False: 30.3k]
  ------------------
 1571|  37.3k|            {
 1572|  37.3k|                if(((buf_mgr_t *) ps_dec->pv_mv_buf_mgr)->pv_mutex != NULL)
  ------------------
  |  Branch (1572:20): [True: 23.1k, False: 14.1k]
  ------------------
 1573|  23.1k|                    ih264_buf_mgr_free(ps_dec->pv_mv_buf_mgr);
 1574|  37.3k|            }
 1575|  67.7k|            PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->pv_mv_buf_mgr);
  ------------------
  |  |   43|  67.7k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|  67.7k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 67.7k, False: 0]
  |  |  ------------------
  ------------------
 1576|  67.7k|        }
 1577|  67.7k|    }
 1578|       |
 1579|  22.5k|    pf_aligned_free(pv_mem_ctxt, ps_svcd_ctxt->ps_svc_dec_lyr);
 1580|  22.5k|    pf_aligned_free(pv_mem_ctxt, dec_hdl->pv_codec_handle);
 1581|       |
 1582|  22.5k|    if(dec_hdl)
  ------------------
  |  Branch (1582:8): [True: 22.5k, False: 0]
  ------------------
 1583|  22.5k|    {
 1584|  22.5k|        pf_aligned_free(pv_mem_ctxt, dec_hdl);
 1585|  22.5k|    }
 1586|       |
 1587|  22.5k|    return IV_SUCCESS;
 1588|  22.5k|}
isvcd_nal_parse_ctxt_create:
 1614|  22.5k|{
 1615|  22.5k|    isvcd_create_ip_t *ps_create_ip;
 1616|  22.5k|    void *pv_buf;
 1617|  22.5k|    void *(*pf_aligned_alloc)(void *pv_mem_ctxt, WORD32 alignment, WORD32 size);
 1618|  22.5k|    void *pv_mem_ctxt;
 1619|  22.5k|    WORD32 size;
 1620|  22.5k|    nal_parse_ctxt_t *ps_nal_parse_ctxt;
 1621|  22.5k|    UWORD8 *pu1_ptr;
 1622|  22.5k|    UNUSED(pv_api_op);
  ------------------
  |  |   45|  22.5k|#define UNUSED(x) ((void)(x))
  ------------------
 1623|       |
 1624|  22.5k|    ps_create_ip = (isvcd_create_ip_t *) pv_api_ip;
 1625|       |
 1626|  22.5k|    pf_aligned_alloc = ps_create_ip->s_ivd_create_ip_t.pf_aligned_alloc;
 1627|  22.5k|    pv_mem_ctxt = ps_create_ip->s_ivd_create_ip_t.pv_mem_ctxt;
 1628|       |
 1629|       |    /*-----------------------------------------------------------------------*/
 1630|       |    /* Handle                                                                */
 1631|       |    /*-----------------------------------------------------------------------*/
 1632|  22.5k|    size = sizeof(nal_parse_ctxt_t);
 1633|  22.5k|    pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 1634|  22.5k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 1635|  22.5k|    memset(pv_buf, 0, size);
 1636|  22.5k|    ps_nal_parse_ctxt = pv_buf;
 1637|       |
 1638|       |    /* set the lowest dqid to -1 */
 1639|  22.5k|    ps_nal_parse_ctxt->i4_prev_dq_id = -1;
 1640|       |
 1641|       |    /*-----------------------------------------------------------------------*/
 1642|       |    /* DQID list buffer and initialization of vcl node buffer context        */
 1643|       |    /*-----------------------------------------------------------------------*/
 1644|  22.5k|    {
 1645|  22.5k|        WORD32 i4_lyr_idx;
 1646|  22.5k|        WORD32 i4_max_num_lyrs;
 1647|  22.5k|        vcl_node_t *ps_vcl_node;
 1648|  22.5k|        dqid_node_t *ps_dqid_node;
 1649|  22.5k|        dqid_ctxt_t *ps_dqid_ctxt;
 1650|       |
 1651|  22.5k|        size = sizeof(vcl_node_t);
 1652|  22.5k|        size += sizeof(dqid_node_t);
 1653|  22.5k|        size *= MAX_NUM_RES_LYRS;
  ------------------
  |  |   94|  22.5k|#define MAX_NUM_RES_LYRS 3
  ------------------
 1654|       |
 1655|  22.5k|        ps_dqid_ctxt = &ps_nal_parse_ctxt->s_dqid_ctxt;
 1656|       |
 1657|  22.5k|        ps_dqid_ctxt->i4_max_num_lyrs = MAX_NUM_RES_LYRS;
  ------------------
  |  |   94|  22.5k|#define MAX_NUM_RES_LYRS 3
  ------------------
 1658|       |
 1659|  22.5k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 1660|  22.5k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 1661|  22.5k|        memset(pv_buf, 0, size);
 1662|       |
 1663|  22.5k|        ps_dqid_ctxt->ps_dqid_node = pv_buf;
 1664|  22.5k|        ps_dqid_node = ps_dqid_ctxt->ps_dqid_node;
 1665|       |
 1666|  22.5k|        i4_max_num_lyrs = ps_dqid_ctxt->i4_max_num_lyrs;
 1667|       |
 1668|  22.5k|        pu1_ptr = pv_buf;
 1669|  22.5k|        pu1_ptr += sizeof(dqid_node_t) * i4_max_num_lyrs;
 1670|  22.5k|        ps_vcl_node = (vcl_node_t *) pu1_ptr;
 1671|       |
 1672|  90.3k|        for(i4_lyr_idx = 0; i4_lyr_idx < i4_max_num_lyrs; i4_lyr_idx++)
  ------------------
  |  Branch (1672:29): [True: 67.7k, False: 22.5k]
  ------------------
 1673|  67.7k|        {
 1674|  67.7k|            ps_dqid_node->ps_vcl_node = ps_vcl_node;
 1675|       |
 1676|       |            /* Loop updates */
 1677|  67.7k|            ps_vcl_node += 1;
 1678|  67.7k|            ps_dqid_node += 1;
 1679|  67.7k|        } /* Loop over all the layers */
 1680|  22.5k|    }
 1681|       |
 1682|       |    /*-----------------------------------------------------------------------*/
 1683|       |    /* Common memory                                                         */
 1684|       |    /*-----------------------------------------------------------------------*/
 1685|  22.5k|    size = UP_ALIGN_8(HEADER_BUFFER_LEN_BEFORE_EP);
  ------------------
  |  |   51|  22.5k|#define UP_ALIGN_8(x) (((((UWORD64) x) + 7) >> 3) << 3)
  ------------------
 1686|  22.5k|    pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 1687|  22.5k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 1688|  22.5k|    memset(pv_buf, 0, size);
 1689|  22.5k|    ps_nal_parse_ctxt->pv_nal_header_buf = (void *) pv_buf;
 1690|       |
 1691|       |    /*-----------------------------------------------------------------------*/
 1692|       |    /* Layer params memory                                                   */
 1693|       |    /*-----------------------------------------------------------------------*/
 1694|  22.5k|    size = sizeof(nal_unit_t);
 1695|  22.5k|    pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 1696|  22.5k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 1697|  22.5k|    memset(pv_buf, 0, size);
 1698|  22.5k|    ps_nal_parse_ctxt->pv_nal_unit = pv_buf;
 1699|       |
 1700|  22.5k|    size = MAX_VCL_NAL_BUFF_SIZE * sizeof(UWORD8);
  ------------------
  |  |   69|  22.5k|#define MAX_VCL_NAL_BUFF_SIZE (1024 * 1024 * 2)
  ------------------
 1701|  22.5k|    pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 1702|  22.5k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 1703|  22.5k|    memset(pv_buf, 0, size);
 1704|  22.5k|    ps_svcd_ctxt->pv_vcl_nal_buff = pv_buf;
 1705|       |
 1706|  22.5k|    size = MAX_NON_VCL_NAL_BUFF_SIZE * sizeof(UWORD8);
  ------------------
  |  |   70|  22.5k|#define MAX_NON_VCL_NAL_BUFF_SIZE (1024 * 1024 * 2)
  ------------------
 1707|  22.5k|    pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 1708|  22.5k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 1709|  22.5k|    memset(pv_buf, 0, size);
 1710|  22.5k|    ps_svcd_ctxt->pv_non_vcl_nal_buff = pv_buf;
 1711|       |
 1712|       |    /*-----------------------------------------------------------------------*/
 1713|       |    /* Registering the seq and pic prms buffer pointers                      */
 1714|       |    /*-----------------------------------------------------------------------*/
 1715|  22.5k|    if(NULL == ps_svcd_ctxt->ps_sps || NULL == ps_svcd_ctxt->ps_pps)
  ------------------
  |  Branch (1715:8): [True: 0, False: 22.5k]
  |  Branch (1715:40): [True: 0, False: 22.5k]
  ------------------
 1716|      0|    {
 1717|      0|        return IV_FAIL;
 1718|      0|    }
 1719|       |
 1720|  22.5k|    ps_svcd_ctxt->pv_nal_parse_ctxt = ps_nal_parse_ctxt;
 1721|  22.5k|    ps_nal_parse_ctxt->pv_seq_prms = ps_svcd_ctxt->ps_sps;
 1722|  22.5k|    ps_nal_parse_ctxt->pv_pic_prms = ps_svcd_ctxt->ps_pps;
 1723|       |
 1724|       |    /* register VCL and NON VCL buffer pointers */
 1725|  22.5k|    if(NULL == ps_svcd_ctxt->pv_vcl_nal_buff || NULL == ps_svcd_ctxt->pv_non_vcl_nal_buff)
  ------------------
  |  Branch (1725:8): [True: 0, False: 22.5k]
  |  Branch (1725:49): [True: 0, False: 22.5k]
  ------------------
 1726|      0|    {
 1727|      0|        return IV_FAIL;
 1728|      0|    }
 1729|       |
 1730|  22.5k|    ps_nal_parse_ctxt->pv_vcl_nal_buf = (UWORD8 *) ps_svcd_ctxt->pv_vcl_nal_buff;
 1731|  22.5k|    ps_nal_parse_ctxt->pv_non_vcl_nal_buf = (UWORD8 *) ps_svcd_ctxt->pv_non_vcl_nal_buff;
 1732|  22.5k|    isvcd_nal_parse_reset_ctxt(ANNEX_B, PARTIAL_INPUT_MODE, ps_nal_parse_ctxt);
  ------------------
  |  |   64|  22.5k|#define ANNEX_B 0     /*!< Annex B stream*/
  ------------------
 1733|       |
 1734|  22.5k|    return IV_SUCCESS;
 1735|  22.5k|}
isvcd_intra_resample_ctxt_create:
 1759|  22.5k|{
 1760|  22.5k|    isvcd_create_ip_t *ps_create_ip;
 1761|  22.5k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec;
 1762|  22.5k|    void *pv_buf;
 1763|  22.5k|    UWORD8 u1_layer_id;
 1764|  22.5k|    void *(*pf_aligned_alloc)(void *pv_mem_ctxt, WORD32 alignment, WORD32 size);
 1765|  22.5k|    void *pv_mem_ctxt;
 1766|  22.5k|    WORD32 size;
 1767|  22.5k|    intra_inter_pred_ctxt_t *ps_ii_pred_ctxt;
 1768|       |
 1769|  22.5k|    intra_sampling_ctxt_t *ps_ctxt;
 1770|  22.5k|    UNUSED(pv_api_op);
  ------------------
  |  |   45|  22.5k|#define UNUSED(x) ((void)(x))
  ------------------
 1771|  22.5k|    ps_create_ip = (isvcd_create_ip_t *) pv_api_ip;
 1772|       |
 1773|  22.5k|    pf_aligned_alloc = ps_create_ip->s_ivd_create_ip_t.pf_aligned_alloc;
 1774|  22.5k|    pv_mem_ctxt = ps_create_ip->s_ivd_create_ip_t.pv_mem_ctxt;
 1775|       |
 1776|  22.5k|    {
 1777|  22.5k|        intra_samp_lyr_ctxt *ps_lyr_ctxt;
 1778|       |
 1779|       |        /* allocate context structure */
 1780|  22.5k|        size = ((sizeof(intra_sampling_ctxt_t) + 127) >> 7) << 7;
 1781|  22.5k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 1782|  22.5k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 1783|  22.5k|        memset(pv_buf, 0, size);
 1784|  22.5k|        ps_ctxt = pv_buf;
 1785|       |
 1786|       |        /* luma reference array buffer  */
 1787|  22.5k|        size = REF_ARRAY_WIDTH * REF_ARRAY_HEIGHT * sizeof(UWORD8);
  ------------------
  |  |   73|  22.5k|#define REF_ARRAY_WIDTH 48
  ------------------
                      size = REF_ARRAY_WIDTH * REF_ARRAY_HEIGHT * sizeof(UWORD8);
  ------------------
  |  |   74|  22.5k|#define REF_ARRAY_HEIGHT 48
  ------------------
 1788|  22.5k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 1789|  22.5k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 1790|  22.5k|        memset(pv_buf, 0, size);
 1791|  22.5k|        ps_ctxt->pu1_refarray_buffer = pv_buf;
 1792|       |
 1793|       |        /* cb reference array buffer */
 1794|  22.5k|        size = REF_ARRAY_WIDTH * REF_ARRAY_HEIGHT * sizeof(UWORD8);
  ------------------
  |  |   73|  22.5k|#define REF_ARRAY_WIDTH 48
  ------------------
                      size = REF_ARRAY_WIDTH * REF_ARRAY_HEIGHT * sizeof(UWORD8);
  ------------------
  |  |   74|  22.5k|#define REF_ARRAY_HEIGHT 48
  ------------------
 1795|  22.5k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 1796|  22.5k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 1797|  22.5k|        memset(pv_buf, 0, size);
 1798|  22.5k|        ps_ctxt->pu1_refarray_cb = pv_buf;
 1799|       |
 1800|       |        /* cr reference array buffer */
 1801|  22.5k|        size = ((DYADIC_REF_W_C + 2) * (DYADIC_REF_H_C + 2) * sizeof(UWORD8));
  ------------------
  |  |   58|  22.5k|#define DYADIC_REF_W_C 10
  ------------------
                      size = ((DYADIC_REF_W_C + 2) * (DYADIC_REF_H_C + 2) * sizeof(UWORD8));
  ------------------
  |  |   59|  22.5k|#define DYADIC_REF_H_C 10
  ------------------
 1802|  22.5k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 1803|  22.5k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 1804|  22.5k|        memset(pv_buf, 0, size);
 1805|  22.5k|        ps_ctxt->pu1_refarray_cr = pv_buf;
 1806|       |
 1807|       |        /* Temp Intermediate Buffer */
 1808|  22.5k|        size = INTERMEDIATE_BUFF_WIDTH * INTERMEDIATE_BUFF_HEIGHT * sizeof(WORD32);
  ------------------
  |  |   50|  22.5k|#define INTERMEDIATE_BUFF_WIDTH 48
  ------------------
                      size = INTERMEDIATE_BUFF_WIDTH * INTERMEDIATE_BUFF_HEIGHT * sizeof(WORD32);
  ------------------
  |  |   51|  22.5k|#define INTERMEDIATE_BUFF_HEIGHT (MB_HEIGHT + 4)
  |  |  ------------------
  |  |  |  |   68|  22.5k|#define MB_HEIGHT 16
  |  |  ------------------
  ------------------
 1809|  22.5k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 1810|  22.5k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 1811|  22.5k|        memset(pv_buf, 0, size);
 1812|  22.5k|        ps_ctxt->pi4_temp_interpolation_buffer = pv_buf;
 1813|       |
 1814|       |        /****************** projected locations buffers ******************/
 1815|  22.5k|        {
 1816|  22.5k|            intra_samp_map_ctxt_t *ps_luma_map;
 1817|  22.5k|            intra_samp_map_ctxt_t *ps_chroma_map;
 1818|  22.5k|            WORD32 i4_lyr_id;
 1819|  22.5k|            ref_mb_map_t *ps_off_len_map;
 1820|  22.5k|            ref_pixel_map_t *ps_pos_phase_map;
 1821|  22.5k|            ref_min_max_map_t *ps_min_max;
 1822|  22.5k|            WORD16 *pi2_mem;
 1823|  22.5k|            UWORD8 *pu1_mem;
 1824|  22.5k|            seg_lookup_desc_t *ps_seg_lookup;
 1825|       |
 1826|       |            /****************** Horz offset length ******************/
 1827|       |
 1828|  22.5k|            size = (H264_MAX_FRAME_WIDTH >> 4) * MAX_NUM_RES_LYRS * 2 * sizeof(ref_mb_map_t);
  ------------------
  |  |   39|  22.5k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
                          size = (H264_MAX_FRAME_WIDTH >> 4) * MAX_NUM_RES_LYRS * 2 * sizeof(ref_mb_map_t);
  ------------------
  |  |   94|  22.5k|#define MAX_NUM_RES_LYRS 3
  ------------------
 1829|  22.5k|            pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 1830|  22.5k|            RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 1831|  22.5k|            memset(pv_buf, 0, size);
 1832|  22.5k|            ps_off_len_map = pv_buf;
 1833|       |
 1834|       |            /* loop over num layers -1 */
 1835|  90.3k|            for(i4_lyr_id = 0; i4_lyr_id < MAX_NUM_RES_LYRS; i4_lyr_id++)
  ------------------
  |  |   94|  90.3k|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (1835:32): [True: 67.7k, False: 22.5k]
  ------------------
 1836|  67.7k|            {
 1837|       |                /* derive the layer map ctxt */
 1838|  67.7k|                ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[i4_lyr_id];
 1839|  67.7k|                ps_luma_map = &ps_lyr_ctxt->s_luma_map_ctxt;
 1840|  67.7k|                ps_chroma_map = &ps_lyr_ctxt->s_chroma_map_ctxt;
 1841|       |
 1842|       |                /* initialise the pointers */
 1843|  67.7k|                ps_luma_map->ps_x_offset_length = ps_off_len_map;
 1844|  67.7k|                ps_off_len_map += (H264_MAX_FRAME_WIDTH >> 4);
  ------------------
  |  |   39|  67.7k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
 1845|  67.7k|                ps_chroma_map->ps_x_offset_length = ps_off_len_map;
 1846|  67.7k|                ps_off_len_map += (H264_MAX_FRAME_WIDTH >> 4);
  ------------------
  |  |   39|  67.7k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
 1847|       |
 1848|  67.7k|            } /* end of loop over resolution layers */
 1849|       |
 1850|       |            /****************** Vert offset length ******************/
 1851|  22.5k|            size = (H264_MAX_FRAME_HEIGHT >> 4) * MAX_NUM_RES_LYRS * 2 * sizeof(ref_mb_map_t);
  ------------------
  |  |   40|  22.5k|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
                          size = (H264_MAX_FRAME_HEIGHT >> 4) * MAX_NUM_RES_LYRS * 2 * sizeof(ref_mb_map_t);
  ------------------
  |  |   94|  22.5k|#define MAX_NUM_RES_LYRS 3
  ------------------
 1852|  22.5k|            pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 1853|  22.5k|            RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 1854|  22.5k|            memset(pv_buf, 0, size);
 1855|  22.5k|            ps_off_len_map = pv_buf;
 1856|       |
 1857|       |            /* loop over num layers -1 */
 1858|  90.3k|            for(i4_lyr_id = 0; i4_lyr_id < MAX_NUM_RES_LYRS; i4_lyr_id++)
  ------------------
  |  |   94|  90.3k|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (1858:32): [True: 67.7k, False: 22.5k]
  ------------------
 1859|  67.7k|            {
 1860|       |                /* derive the layer map ctxt */
 1861|  67.7k|                ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[i4_lyr_id];
 1862|  67.7k|                ps_luma_map = &ps_lyr_ctxt->s_luma_map_ctxt;
 1863|  67.7k|                ps_chroma_map = &ps_lyr_ctxt->s_chroma_map_ctxt;
 1864|       |
 1865|       |                /* initialise the pointers */
 1866|  67.7k|                ps_luma_map->ps_y_offset_length = ps_off_len_map;
 1867|  67.7k|                ps_off_len_map += (H264_MAX_FRAME_HEIGHT >> 4);
  ------------------
  |  |   40|  67.7k|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
 1868|  67.7k|                ps_chroma_map->ps_y_offset_length = ps_off_len_map;
 1869|  67.7k|                ps_off_len_map += (H264_MAX_FRAME_HEIGHT >> 4);
  ------------------
  |  |   40|  67.7k|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
 1870|       |
 1871|  67.7k|            } /* end of loop over resolution layers */
 1872|       |
 1873|       |            /****************** Horz Min Max Pos ******************/
 1874|       |
 1875|  22.5k|            size = (H264_MAX_FRAME_WIDTH >> 4) * MAX_NUM_RES_LYRS * 2 * sizeof(ref_mb_map_t);
  ------------------
  |  |   39|  22.5k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
                          size = (H264_MAX_FRAME_WIDTH >> 4) * MAX_NUM_RES_LYRS * 2 * sizeof(ref_mb_map_t);
  ------------------
  |  |   94|  22.5k|#define MAX_NUM_RES_LYRS 3
  ------------------
 1876|  22.5k|            pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 1877|  22.5k|            RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 1878|  22.5k|            memset(pv_buf, 0, size);
 1879|  22.5k|            ps_min_max = pv_buf;
 1880|       |
 1881|       |            /* loop over num layers -1 */
 1882|  90.3k|            for(i4_lyr_id = 0; i4_lyr_id < MAX_NUM_RES_LYRS; i4_lyr_id++)
  ------------------
  |  |   94|  90.3k|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (1882:32): [True: 67.7k, False: 22.5k]
  ------------------
 1883|  67.7k|            {
 1884|       |                /* derive the layer map ctxt */
 1885|  67.7k|                ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[i4_lyr_id];
 1886|  67.7k|                ps_luma_map = &ps_lyr_ctxt->s_luma_map_ctxt;
 1887|  67.7k|                ps_chroma_map = &ps_lyr_ctxt->s_chroma_map_ctxt;
 1888|       |
 1889|       |                /* initialise the pointers */
 1890|  67.7k|                ps_luma_map->ps_x_min_max = ps_min_max;
 1891|  67.7k|                ps_min_max += (H264_MAX_FRAME_WIDTH >> 4);
  ------------------
  |  |   39|  67.7k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
 1892|  67.7k|                ps_chroma_map->ps_x_min_max = ps_min_max;
 1893|  67.7k|                ps_min_max += (H264_MAX_FRAME_WIDTH >> 4);
  ------------------
  |  |   39|  67.7k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
 1894|  67.7k|            } /* end of loop over resolution layers */
 1895|       |
 1896|       |            /****************** Vert Min Max Pos ******************/
 1897|  22.5k|            size = (H264_MAX_FRAME_HEIGHT >> 4) * MAX_NUM_RES_LYRS * 2 * sizeof(ref_mb_map_t);
  ------------------
  |  |   40|  22.5k|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
                          size = (H264_MAX_FRAME_HEIGHT >> 4) * MAX_NUM_RES_LYRS * 2 * sizeof(ref_mb_map_t);
  ------------------
  |  |   94|  22.5k|#define MAX_NUM_RES_LYRS 3
  ------------------
 1898|       |
 1899|  22.5k|            pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 1900|  22.5k|            RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 1901|  22.5k|            memset(pv_buf, 0, size);
 1902|  22.5k|            ps_min_max = pv_buf;
 1903|       |
 1904|       |            /* loop over num layers -1 */
 1905|  90.3k|            for(i4_lyr_id = 0; i4_lyr_id < MAX_NUM_RES_LYRS; i4_lyr_id++)
  ------------------
  |  |   94|  90.3k|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (1905:32): [True: 67.7k, False: 22.5k]
  ------------------
 1906|  67.7k|            {
 1907|       |                /* derive the layer map ctxt */
 1908|  67.7k|                ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[i4_lyr_id];
 1909|  67.7k|                ps_luma_map = &ps_lyr_ctxt->s_luma_map_ctxt;
 1910|  67.7k|                ps_chroma_map = &ps_lyr_ctxt->s_chroma_map_ctxt;
 1911|       |
 1912|       |                /* initialise the pointers */
 1913|  67.7k|                ps_luma_map->ps_y_min_max = ps_min_max;
 1914|  67.7k|                ps_min_max += (H264_MAX_FRAME_HEIGHT >> 4);
  ------------------
  |  |   40|  67.7k|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
 1915|  67.7k|                ps_chroma_map->ps_y_min_max = ps_min_max;
 1916|  67.7k|                ps_min_max += (H264_MAX_FRAME_HEIGHT >> 4);
  ------------------
  |  |   40|  67.7k|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
 1917|       |
 1918|  67.7k|            } /* end of loop over resolution layers */
 1919|       |
 1920|       |            /****************** Horz position phase ******************/
 1921|  22.5k|            size = (H264_MAX_FRAME_WIDTH) *MAX_NUM_RES_LYRS * 2 * sizeof(ref_pixel_map_t);
  ------------------
  |  |   39|  22.5k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
                          size = (H264_MAX_FRAME_WIDTH) *MAX_NUM_RES_LYRS * 2 * sizeof(ref_pixel_map_t);
  ------------------
  |  |   94|  22.5k|#define MAX_NUM_RES_LYRS 3
  ------------------
 1922|  22.5k|            pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 1923|  22.5k|            RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 1924|  22.5k|            memset(pv_buf, 0, size);
 1925|  22.5k|            ps_pos_phase_map = pv_buf;
 1926|       |
 1927|       |            /* loop over num layers -1 */
 1928|  90.3k|            for(i4_lyr_id = 0; i4_lyr_id < MAX_NUM_RES_LYRS; i4_lyr_id++)
  ------------------
  |  |   94|  90.3k|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (1928:32): [True: 67.7k, False: 22.5k]
  ------------------
 1929|  67.7k|            {
 1930|       |                /* derive the layer map ctxt */
 1931|  67.7k|                ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[i4_lyr_id];
 1932|  67.7k|                ps_luma_map = &ps_lyr_ctxt->s_luma_map_ctxt;
 1933|  67.7k|                ps_chroma_map = &ps_lyr_ctxt->s_chroma_map_ctxt;
 1934|       |
 1935|       |                /* initialise the pointers */
 1936|  67.7k|                ps_luma_map->ps_x_pos_phase = ps_pos_phase_map;
 1937|  67.7k|                ps_pos_phase_map += (H264_MAX_FRAME_WIDTH);
  ------------------
  |  |   39|  67.7k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
 1938|  67.7k|                ps_chroma_map->ps_x_pos_phase = ps_pos_phase_map;
 1939|  67.7k|                ps_pos_phase_map += (H264_MAX_FRAME_WIDTH);
  ------------------
  |  |   39|  67.7k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
 1940|       |
 1941|  67.7k|            } /* end of loop over resolution layers */
 1942|       |
 1943|       |            /****************** Vert position phase ******************/
 1944|       |
 1945|  22.5k|            size = (H264_MAX_FRAME_HEIGHT) *MAX_NUM_RES_LYRS * 2 * sizeof(ref_pixel_map_t);
  ------------------
  |  |   40|  22.5k|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
                          size = (H264_MAX_FRAME_HEIGHT) *MAX_NUM_RES_LYRS * 2 * sizeof(ref_pixel_map_t);
  ------------------
  |  |   94|  22.5k|#define MAX_NUM_RES_LYRS 3
  ------------------
 1946|  22.5k|            pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 1947|  22.5k|            RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 1948|  22.5k|            memset(pv_buf, 0, size);
 1949|  22.5k|            ps_pos_phase_map = pv_buf;
 1950|       |
 1951|       |            /* loop over num layers -1 */
 1952|  90.3k|            for(i4_lyr_id = 0; i4_lyr_id < MAX_NUM_RES_LYRS; i4_lyr_id++)
  ------------------
  |  |   94|  90.3k|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (1952:32): [True: 67.7k, False: 22.5k]
  ------------------
 1953|  67.7k|            {
 1954|       |                /* derive the layer map ctxt */
 1955|  67.7k|                ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[i4_lyr_id];
 1956|  67.7k|                ps_luma_map = &ps_lyr_ctxt->s_luma_map_ctxt;
 1957|  67.7k|                ps_chroma_map = &ps_lyr_ctxt->s_chroma_map_ctxt;
 1958|       |
 1959|       |                /* initialise the pointers */
 1960|  67.7k|                ps_luma_map->ps_y_pos_phase = ps_pos_phase_map;
 1961|  67.7k|                ps_pos_phase_map += (H264_MAX_FRAME_HEIGHT);
  ------------------
  |  |   40|  67.7k|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
 1962|  67.7k|                ps_chroma_map->ps_y_pos_phase = ps_pos_phase_map;
 1963|  67.7k|                ps_pos_phase_map += (H264_MAX_FRAME_HEIGHT);
  ------------------
  |  |   40|  67.7k|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
 1964|       |
 1965|  67.7k|            } /* end of loop over resolution layers */
 1966|       |
 1967|       |            /**************** XD Index ******************************/
 1968|  22.5k|            size = (MB_WIDTH) *MAX_NUM_RES_LYRS * 2 * sizeof(WORD16);
  ------------------
  |  |   67|  22.5k|#define MB_WIDTH 16
  ------------------
                          size = (MB_WIDTH) *MAX_NUM_RES_LYRS * 2 * sizeof(WORD16);
  ------------------
  |  |   94|  22.5k|#define MAX_NUM_RES_LYRS 3
  ------------------
 1969|  22.5k|            pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 1970|  22.5k|            RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 1971|  22.5k|            memset(pv_buf, 0, size);
 1972|  22.5k|            pi2_mem = pv_buf;
 1973|       |
 1974|       |            /* loop over num layers -1 */
 1975|  90.3k|            for(i4_lyr_id = 0; i4_lyr_id < MAX_NUM_RES_LYRS; i4_lyr_id++)
  ------------------
  |  |   94|  90.3k|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (1975:32): [True: 67.7k, False: 22.5k]
  ------------------
 1976|  67.7k|            {
 1977|       |                /* derive the layer map ctxt */
 1978|  67.7k|                ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[i4_lyr_id];
 1979|  67.7k|                ps_luma_map = &ps_lyr_ctxt->s_luma_map_ctxt;
 1980|  67.7k|                ps_chroma_map = &ps_lyr_ctxt->s_chroma_map_ctxt;
 1981|       |
 1982|       |                /* initialise the pointers */
 1983|  67.7k|                ps_luma_map->pi2_xd_index = pi2_mem;
 1984|  67.7k|                pi2_mem += MB_WIDTH;
  ------------------
  |  |   67|  67.7k|#define MB_WIDTH 16
  ------------------
 1985|  67.7k|                ps_chroma_map->pi2_xd_index = pi2_mem;
 1986|  67.7k|                pi2_mem += MB_WIDTH;
  ------------------
  |  |   67|  67.7k|#define MB_WIDTH 16
  ------------------
 1987|       |
 1988|  67.7k|            } /* end of loop over resolution layers */
 1989|       |
 1990|       |            /**************** YD Index ******************************/
 1991|  22.5k|            size = (MB_HEIGHT) *MAX_NUM_RES_LYRS * 2 * sizeof(WORD16);
  ------------------
  |  |   68|  22.5k|#define MB_HEIGHT 16
  ------------------
                          size = (MB_HEIGHT) *MAX_NUM_RES_LYRS * 2 * sizeof(WORD16);
  ------------------
  |  |   94|  22.5k|#define MAX_NUM_RES_LYRS 3
  ------------------
 1992|  22.5k|            pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 1993|  22.5k|            RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 1994|  22.5k|            memset(pv_buf, 0, size);
 1995|  22.5k|            pi2_mem = pv_buf;
 1996|       |
 1997|       |            /* loop over num layers -1 */
 1998|  90.3k|            for(i4_lyr_id = 0; i4_lyr_id < MAX_NUM_RES_LYRS; i4_lyr_id++)
  ------------------
  |  |   94|  90.3k|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (1998:32): [True: 67.7k, False: 22.5k]
  ------------------
 1999|  67.7k|            {
 2000|       |                /* derive the layer map ctxt */
 2001|  67.7k|                ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[i4_lyr_id];
 2002|  67.7k|                ps_luma_map = &ps_lyr_ctxt->s_luma_map_ctxt;
 2003|  67.7k|                ps_chroma_map = &ps_lyr_ctxt->s_chroma_map_ctxt;
 2004|       |
 2005|       |                /* initialise the pointers */
 2006|  67.7k|                ps_luma_map->pi2_yd_index = pi2_mem;
 2007|  67.7k|                pi2_mem += MB_HEIGHT;
  ------------------
  |  |   68|  67.7k|#define MB_HEIGHT 16
  ------------------
 2008|  67.7k|                ps_chroma_map->pi2_yd_index = pi2_mem;
 2009|  67.7k|                pi2_mem += MB_HEIGHT;
  ------------------
  |  |   68|  67.7k|#define MB_HEIGHT 16
  ------------------
 2010|       |
 2011|  67.7k|            } /* end of loop over resolution layers */
 2012|       |
 2013|       |            /**************** YA Index ******************************/
 2014|  22.5k|            size = MB_HEIGHT * MAX_NUM_RES_LYRS * 2 * sizeof(WORD16);
  ------------------
  |  |   68|  22.5k|#define MB_HEIGHT 16
  ------------------
                          size = MB_HEIGHT * MAX_NUM_RES_LYRS * 2 * sizeof(WORD16);
  ------------------
  |  |   94|  22.5k|#define MAX_NUM_RES_LYRS 3
  ------------------
 2015|  22.5k|            pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2016|  22.5k|            RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 2017|  22.5k|            memset(pv_buf, 0, size);
 2018|  22.5k|            pi2_mem = pv_buf;
 2019|       |
 2020|       |            /* loop over num layers -1 */
 2021|  90.3k|            for(i4_lyr_id = 0; i4_lyr_id < MAX_NUM_RES_LYRS; i4_lyr_id++)
  ------------------
  |  |   94|  90.3k|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (2021:32): [True: 67.7k, False: 22.5k]
  ------------------
 2022|  67.7k|            {
 2023|       |                /* derive the layer map ctxt */
 2024|  67.7k|                ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[i4_lyr_id];
 2025|  67.7k|                ps_luma_map = &ps_lyr_ctxt->s_luma_map_ctxt;
 2026|  67.7k|                ps_chroma_map = &ps_lyr_ctxt->s_chroma_map_ctxt;
 2027|       |
 2028|       |                /* initialise the pointers */
 2029|  67.7k|                ps_luma_map->pi2_ya_index = pi2_mem;
 2030|  67.7k|                pi2_mem += MB_HEIGHT;
  ------------------
  |  |   68|  67.7k|#define MB_HEIGHT 16
  ------------------
 2031|  67.7k|                ps_chroma_map->pi2_ya_index = pi2_mem;
 2032|  67.7k|                pi2_mem += MB_HEIGHT;
  ------------------
  |  |   68|  67.7k|#define MB_HEIGHT 16
  ------------------
 2033|       |
 2034|  67.7k|            } /* end of loop over resolution layers */
 2035|       |
 2036|       |            /**************** Horizontal segment lookup **************************/
 2037|       |            /* (MB_WIDTH x seg_lookup_desc_t) x (num layers - 1)   (for luma   )*/
 2038|       |            /* (BLOCK_WIDTH x seg_lookup_desc_t) x (num layers - 1) (for chroma )*/
 2039|  22.5k|            size = (MB_WIDTH * sizeof(seg_lookup_desc_t)) * MAX_NUM_RES_LYRS;
  ------------------
  |  |   67|  22.5k|#define MB_WIDTH 16
  ------------------
                          size = (MB_WIDTH * sizeof(seg_lookup_desc_t)) * MAX_NUM_RES_LYRS;
  ------------------
  |  |   94|  22.5k|#define MAX_NUM_RES_LYRS 3
  ------------------
 2040|       |
 2041|  22.5k|            size += (BLOCK_WIDTH * sizeof(seg_lookup_desc_t)) * MAX_NUM_RES_LYRS;
  ------------------
  |  |   64|  22.5k|#define BLOCK_WIDTH 8
  ------------------
                          size += (BLOCK_WIDTH * sizeof(seg_lookup_desc_t)) * MAX_NUM_RES_LYRS;
  ------------------
  |  |   94|  22.5k|#define MAX_NUM_RES_LYRS 3
  ------------------
 2042|       |
 2043|  22.5k|            pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2044|  22.5k|            RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 2045|  22.5k|            memset(pv_buf, 0, size);
 2046|  22.5k|            ps_seg_lookup = pv_buf;
 2047|       |
 2048|       |            /* loop over num layers -1 */
 2049|  90.3k|            for(i4_lyr_id = 0; i4_lyr_id < MAX_NUM_RES_LYRS; i4_lyr_id++)
  ------------------
  |  |   94|  90.3k|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (2049:32): [True: 67.7k, False: 22.5k]
  ------------------
 2050|  67.7k|            {
 2051|       |                /* derive the layer map ctxt */
 2052|  67.7k|                ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[i4_lyr_id];
 2053|  67.7k|                ps_luma_map = &ps_lyr_ctxt->s_luma_map_ctxt;
 2054|  67.7k|                ps_chroma_map = &ps_lyr_ctxt->s_chroma_map_ctxt;
 2055|       |
 2056|       |                /* initialise the pointers */
 2057|  67.7k|                ps_luma_map->ps_seg_lookup_horz = ps_seg_lookup;
 2058|  67.7k|                ps_seg_lookup += MB_WIDTH;
  ------------------
  |  |   67|  67.7k|#define MB_WIDTH 16
  ------------------
 2059|  67.7k|                ps_chroma_map->ps_seg_lookup_horz = ps_seg_lookup;
 2060|  67.7k|                ps_seg_lookup += BLOCK_WIDTH;
  ------------------
  |  |   64|  67.7k|#define BLOCK_WIDTH 8
  ------------------
 2061|       |
 2062|  67.7k|            } /* end of loop over resolution layers */
 2063|       |
 2064|       |            /**************** Vertical segment lookup ****************************/
 2065|       |            /* (MB_HEIGHT x seg_lookup_desc_t) x (num layers - 1)    (for luma  )*/
 2066|       |            /* (BLOCK_HEIGHT x seg_lookup_desc_t) x (num layers - 1) (for chroma)*/
 2067|  22.5k|            size = (MB_HEIGHT * sizeof(seg_lookup_desc_t)) * MAX_NUM_RES_LYRS;
  ------------------
  |  |   68|  22.5k|#define MB_HEIGHT 16
  ------------------
                          size = (MB_HEIGHT * sizeof(seg_lookup_desc_t)) * MAX_NUM_RES_LYRS;
  ------------------
  |  |   94|  22.5k|#define MAX_NUM_RES_LYRS 3
  ------------------
 2068|       |
 2069|  22.5k|            size += (BLOCK_HEIGHT * sizeof(seg_lookup_desc_t)) * MAX_NUM_RES_LYRS;
  ------------------
  |  |   65|  22.5k|#define BLOCK_HEIGHT 8
  ------------------
                          size += (BLOCK_HEIGHT * sizeof(seg_lookup_desc_t)) * MAX_NUM_RES_LYRS;
  ------------------
  |  |   94|  22.5k|#define MAX_NUM_RES_LYRS 3
  ------------------
 2070|       |
 2071|  22.5k|            pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2072|  22.5k|            RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 2073|  22.5k|            memset(pv_buf, 0, size);
 2074|  22.5k|            ps_seg_lookup = pv_buf;
 2075|       |
 2076|       |            /* loop over num layers -1 */
 2077|  90.3k|            for(i4_lyr_id = 0; i4_lyr_id < MAX_NUM_RES_LYRS; i4_lyr_id++)
  ------------------
  |  |   94|  90.3k|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (2077:32): [True: 67.7k, False: 22.5k]
  ------------------
 2078|  67.7k|            {
 2079|       |                /* derive the layer map ctxt */
 2080|  67.7k|                ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[i4_lyr_id];
 2081|  67.7k|                ps_luma_map = &ps_lyr_ctxt->s_luma_map_ctxt;
 2082|  67.7k|                ps_chroma_map = &ps_lyr_ctxt->s_chroma_map_ctxt;
 2083|       |
 2084|       |                /* initialise the pointers */
 2085|  67.7k|                ps_luma_map->ps_seg_lookup_vert = ps_seg_lookup;
 2086|  67.7k|                ps_seg_lookup += MB_HEIGHT;
  ------------------
  |  |   68|  67.7k|#define MB_HEIGHT 16
  ------------------
 2087|  67.7k|                ps_chroma_map->ps_seg_lookup_vert = ps_seg_lookup;
 2088|  67.7k|                ps_seg_lookup += BLOCK_HEIGHT;
  ------------------
  |  |   65|  67.7k|#define BLOCK_HEIGHT 8
  ------------------
 2089|       |
 2090|  67.7k|            } /* end of loop over resolution layers */
 2091|       |
 2092|       |            /**************** X and Y Reference Array Index lookup ***************/
 2093|       |            /* (MAX_REF_IDX_ARRAY) x (num layers - 1)     (for luma  x-index)     */
 2094|       |            /* (MAX_REF_IDX_ARRAY) x (num layers - 1)     (for luma  y-index)     */
 2095|       |            /* (MAX_REF_IDX_ARRAY) x (num layers - 1)     (for chroma x-index)    */
 2096|       |            /* (MAX_REF_IDX_ARRAY) x (num layers - 1)     (for chroma y-index)    */
 2097|       |            /*********************************************************************/
 2098|  22.5k|            size = (MAX_REF_IDX_ARRAY * MAX_NUM_RES_LYRS * 4);
  ------------------
  |  |   55|  22.5k|#define MAX_REF_IDX_ARRAY (MAX_REF_ARR_WD_HT + MB_WIDTH)
  |  |  ------------------
  |  |  |  |   53|  22.5k|#define MAX_REF_ARR_WD_HT 48
  |  |  ------------------
  |  |               #define MAX_REF_IDX_ARRAY (MAX_REF_ARR_WD_HT + MB_WIDTH)
  |  |  ------------------
  |  |  |  |   67|  22.5k|#define MB_WIDTH 16
  |  |  ------------------
  ------------------
                          size = (MAX_REF_IDX_ARRAY * MAX_NUM_RES_LYRS * 4);
  ------------------
  |  |   94|  22.5k|#define MAX_NUM_RES_LYRS 3
  ------------------
 2099|  22.5k|            pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2100|  22.5k|            RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 2101|  22.5k|            memset(pv_buf, 0, size);
 2102|  22.5k|            pu1_mem = pv_buf;
 2103|       |
 2104|       |            /* loop over num layers -1 */
 2105|  90.3k|            for(i4_lyr_id = 0; i4_lyr_id < MAX_NUM_RES_LYRS; i4_lyr_id++)
  ------------------
  |  |   94|  90.3k|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (2105:32): [True: 67.7k, False: 22.5k]
  ------------------
 2106|  67.7k|            {
 2107|       |                /* derive the layer map ctxt */
 2108|  67.7k|                ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[i4_lyr_id];
 2109|  67.7k|                ps_luma_map = &ps_lyr_ctxt->s_luma_map_ctxt;
 2110|  67.7k|                ps_chroma_map = &ps_lyr_ctxt->s_chroma_map_ctxt;
 2111|       |
 2112|       |                /* initialise the pointers */
 2113|  67.7k|                ps_luma_map->pu1_refarray_x_idx = pu1_mem;
 2114|  67.7k|                pu1_mem += MAX_REF_IDX_ARRAY;
  ------------------
  |  |   55|  67.7k|#define MAX_REF_IDX_ARRAY (MAX_REF_ARR_WD_HT + MB_WIDTH)
  |  |  ------------------
  |  |  |  |   53|  67.7k|#define MAX_REF_ARR_WD_HT 48
  |  |  ------------------
  |  |               #define MAX_REF_IDX_ARRAY (MAX_REF_ARR_WD_HT + MB_WIDTH)
  |  |  ------------------
  |  |  |  |   67|  67.7k|#define MB_WIDTH 16
  |  |  ------------------
  ------------------
 2115|       |
 2116|  67.7k|                ps_luma_map->pu1_refarray_y_idx = pu1_mem;
 2117|  67.7k|                pu1_mem += MAX_REF_IDX_ARRAY;
  ------------------
  |  |   55|  67.7k|#define MAX_REF_IDX_ARRAY (MAX_REF_ARR_WD_HT + MB_WIDTH)
  |  |  ------------------
  |  |  |  |   53|  67.7k|#define MAX_REF_ARR_WD_HT 48
  |  |  ------------------
  |  |               #define MAX_REF_IDX_ARRAY (MAX_REF_ARR_WD_HT + MB_WIDTH)
  |  |  ------------------
  |  |  |  |   67|  67.7k|#define MB_WIDTH 16
  |  |  ------------------
  ------------------
 2118|       |
 2119|  67.7k|                ps_chroma_map->pu1_refarray_x_idx = pu1_mem;
 2120|  67.7k|                pu1_mem += MAX_REF_IDX_ARRAY;
  ------------------
  |  |   55|  67.7k|#define MAX_REF_IDX_ARRAY (MAX_REF_ARR_WD_HT + MB_WIDTH)
  |  |  ------------------
  |  |  |  |   53|  67.7k|#define MAX_REF_ARR_WD_HT 48
  |  |  ------------------
  |  |               #define MAX_REF_IDX_ARRAY (MAX_REF_ARR_WD_HT + MB_WIDTH)
  |  |  ------------------
  |  |  |  |   67|  67.7k|#define MB_WIDTH 16
  |  |  ------------------
  ------------------
 2121|       |
 2122|  67.7k|                ps_chroma_map->pu1_refarray_y_idx = pu1_mem;
 2123|  67.7k|                pu1_mem += MAX_REF_IDX_ARRAY;
  ------------------
  |  |   55|  67.7k|#define MAX_REF_IDX_ARRAY (MAX_REF_ARR_WD_HT + MB_WIDTH)
  |  |  ------------------
  |  |  |  |   53|  67.7k|#define MAX_REF_ARR_WD_HT 48
  |  |  ------------------
  |  |               #define MAX_REF_IDX_ARRAY (MAX_REF_ARR_WD_HT + MB_WIDTH)
  |  |  ------------------
  |  |  |  |   67|  67.7k|#define MB_WIDTH 16
  |  |  ------------------
  ------------------
 2124|       |
 2125|  67.7k|            } /* end of loop over resolution layers */
 2126|  22.5k|        }
 2127|       |
 2128|      0|        size = ((sizeof(intra_inter_pred_ctxt_t) + 127) >> 7) << 7;
 2129|  22.5k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2130|  22.5k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 2131|  22.5k|        memset(pv_buf, 0, size);
 2132|  22.5k|        ps_ii_pred_ctxt = pv_buf;
 2133|  22.5k|    }
 2134|       |
 2135|      0|    ps_svcd_ctxt->pv_intra_sample_ctxt = ps_ctxt;
 2136|  22.5k|    ps_svcd_ctxt->pv_ii_pred_ctxt = ps_ii_pred_ctxt;
 2137|       |
 2138|  90.3k|    for(u1_layer_id = 0; u1_layer_id < MAX_NUM_RES_LYRS; u1_layer_id++)
  ------------------
  |  |   94|  90.3k|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (2138:26): [True: 67.7k, False: 22.5k]
  ------------------
 2139|  67.7k|    {
 2140|  67.7k|        ps_svc_lyr_dec = &ps_svcd_ctxt->ps_svc_dec_lyr[u1_layer_id];
 2141|  67.7k|        ps_svc_lyr_dec->pv_intra_sample_ctxt = ps_svcd_ctxt->pv_intra_sample_ctxt;
 2142|  67.7k|        ps_svc_lyr_dec->pv_ii_pred_ctxt = ps_svcd_ctxt->pv_ii_pred_ctxt;
 2143|  67.7k|    }
 2144|       |
 2145|  22.5k|    return IV_SUCCESS;
 2146|  22.5k|}
isvcd_residual_resample_ctxt_create:
 2170|  22.5k|{
 2171|  22.5k|    isvcd_create_ip_t *ps_create_ip;
 2172|  22.5k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec;
 2173|  22.5k|    void *pv_buf;
 2174|  22.5k|    UWORD8 u1_layer_id;
 2175|  22.5k|    void *(*pf_aligned_alloc)(void *pv_mem_ctxt, WORD32 alignment, WORD32 size);
 2176|  22.5k|    void *pv_mem_ctxt;
 2177|  22.5k|    WORD32 size;
 2178|       |
 2179|  22.5k|    residual_sampling_ctxt_t *ps_ctxt;
 2180|  22.5k|    res_lyr_ctxt *ps_lyr_ctxt;
 2181|  22.5k|    UNUSED(pv_api_op);
  ------------------
  |  |   45|  22.5k|#define UNUSED(x) ((void)(x))
  ------------------
 2182|  22.5k|    ps_create_ip = (isvcd_create_ip_t *) pv_api_ip;
 2183|       |
 2184|  22.5k|    pf_aligned_alloc = ps_create_ip->s_ivd_create_ip_t.pf_aligned_alloc;
 2185|  22.5k|    pv_mem_ctxt = ps_create_ip->s_ivd_create_ip_t.pv_mem_ctxt;
 2186|       |
 2187|       |    /* allocate context structure */
 2188|  22.5k|    size = ((sizeof(residual_sampling_ctxt_t) + 127) >> 7) << 7;
 2189|  22.5k|    pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2190|  22.5k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 2191|  22.5k|    memset(pv_buf, 0, size);
 2192|  22.5k|    ps_ctxt = pv_buf;
 2193|       |
 2194|       |    /* reference array buffer  */
 2195|  22.5k|    size = REF_ARRAY_WIDTH_RES_SAMP * REF_ARRAY_HEIGHT_RES_SAMP * sizeof(WORD16);
  ------------------
  |  |   45|  22.5k|#define REF_ARRAY_WIDTH_RES_SAMP (MB_WIDTH + 6)
  |  |  ------------------
  |  |  |  |   67|  22.5k|#define MB_WIDTH 16
  |  |  ------------------
  ------------------
                  size = REF_ARRAY_WIDTH_RES_SAMP * REF_ARRAY_HEIGHT_RES_SAMP * sizeof(WORD16);
  ------------------
  |  |   46|  22.5k|#define REF_ARRAY_HEIGHT_RES_SAMP (MB_HEIGHT + 6)
  |  |  ------------------
  |  |  |  |   68|  22.5k|#define MB_HEIGHT 16
  |  |  ------------------
  ------------------
 2196|  22.5k|    pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2197|  22.5k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 2198|  22.5k|    memset(pv_buf, 0, size);
 2199|  22.5k|    ps_ctxt->pi2_refarray_buffer = pv_buf;
 2200|       |
 2201|       |    /* reference array pointer increment buffer */
 2202|  22.5k|    {
 2203|  22.5k|        WORD32 i4_size;
 2204|       |
 2205|  22.5k|        i4_size = REF_ARRAY_WIDTH_RES_SAMP * REF_ARRAY_HEIGHT_RES_SAMP * sizeof(UWORD8);
  ------------------
  |  |   45|  22.5k|#define REF_ARRAY_WIDTH_RES_SAMP (MB_WIDTH + 6)
  |  |  ------------------
  |  |  |  |   67|  22.5k|#define MB_WIDTH 16
  |  |  ------------------
  ------------------
                      i4_size = REF_ARRAY_WIDTH_RES_SAMP * REF_ARRAY_HEIGHT_RES_SAMP * sizeof(UWORD8);
  ------------------
  |  |   46|  22.5k|#define REF_ARRAY_HEIGHT_RES_SAMP (MB_HEIGHT + 6)
  |  |  ------------------
  |  |  |  |   68|  22.5k|#define MB_HEIGHT 16
  |  |  ------------------
  ------------------
 2206|  22.5k|        size = REF_ARRAY_WIDTH_RES_SAMP * REF_ARRAY_HEIGHT_RES_SAMP * 2 * sizeof(UWORD8);
  ------------------
  |  |   45|  22.5k|#define REF_ARRAY_WIDTH_RES_SAMP (MB_WIDTH + 6)
  |  |  ------------------
  |  |  |  |   67|  22.5k|#define MB_WIDTH 16
  |  |  ------------------
  ------------------
                      size = REF_ARRAY_WIDTH_RES_SAMP * REF_ARRAY_HEIGHT_RES_SAMP * 2 * sizeof(UWORD8);
  ------------------
  |  |   46|  22.5k|#define REF_ARRAY_HEIGHT_RES_SAMP (MB_HEIGHT + 6)
  |  |  ------------------
  |  |  |  |   68|  22.5k|#define MB_HEIGHT 16
  |  |  ------------------
  ------------------
 2207|  22.5k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2208|  22.5k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 2209|  22.5k|        memset(pv_buf, 0, size);
 2210|  22.5k|        ps_ctxt->pu1_ref_x_ptr_incr = pv_buf;
 2211|  22.5k|        ps_ctxt->pu1_ref_y_ptr_incr = ps_ctxt->pu1_ref_x_ptr_incr + i4_size;
 2212|  22.5k|    }
 2213|       |
 2214|       |    /****************** projected locations buffers ******************/
 2215|      0|    {
 2216|  22.5k|        residual_samp_map_ctxt_t *ps_luma_map;
 2217|  22.5k|        residual_samp_map_ctxt_t *ps_chroma_map;
 2218|  22.5k|        WORD32 i4_lyr_id;
 2219|  22.5k|        ref_mb_map_t *ps_off_len_map;
 2220|  22.5k|        ref_pixel_map_t *ps_pos_phase_map;
 2221|       |
 2222|       |        /****************** Horz offset length ******************/
 2223|  22.5k|        size = (H264_MAX_FRAME_WIDTH >> 4) * MAX_NUM_RES_LYRS * 2 * sizeof(ref_mb_map_t);
  ------------------
  |  |   39|  22.5k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
                      size = (H264_MAX_FRAME_WIDTH >> 4) * MAX_NUM_RES_LYRS * 2 * sizeof(ref_mb_map_t);
  ------------------
  |  |   94|  22.5k|#define MAX_NUM_RES_LYRS 3
  ------------------
 2224|  22.5k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2225|  22.5k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 2226|  22.5k|        memset(pv_buf, 0, size);
 2227|  22.5k|        ps_off_len_map = pv_buf;
 2228|       |
 2229|       |        /* loop over num layers -1 */
 2230|  90.3k|        for(i4_lyr_id = 0; i4_lyr_id < MAX_NUM_RES_LYRS; i4_lyr_id++)
  ------------------
  |  |   94|  90.3k|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (2230:28): [True: 67.7k, False: 22.5k]
  ------------------
 2231|  67.7k|        {
 2232|       |            /* derive the layer map ctxt */
 2233|  67.7k|            ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[i4_lyr_id];
 2234|  67.7k|            ps_luma_map = &ps_lyr_ctxt->s_luma_map_ctxt;
 2235|  67.7k|            ps_chroma_map = &ps_lyr_ctxt->s_chroma_map_ctxt;
 2236|       |
 2237|       |            /* initialise the pointers */
 2238|  67.7k|            ps_luma_map->ps_x_offset_length = ps_off_len_map;
 2239|  67.7k|            ps_off_len_map += (H264_MAX_FRAME_WIDTH >> 4);
  ------------------
  |  |   39|  67.7k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
 2240|  67.7k|            ps_chroma_map->ps_x_offset_length = ps_off_len_map;
 2241|  67.7k|            ps_off_len_map += (H264_MAX_FRAME_WIDTH >> 4);
  ------------------
  |  |   39|  67.7k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
 2242|       |
 2243|  67.7k|        } /* end of loop over resolution layers */
 2244|       |
 2245|       |        /****************** Vert offset length ******************/
 2246|  22.5k|        size = (H264_MAX_FRAME_HEIGHT >> 4) * MAX_NUM_RES_LYRS * 2 * sizeof(ref_mb_map_t);
  ------------------
  |  |   40|  22.5k|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
                      size = (H264_MAX_FRAME_HEIGHT >> 4) * MAX_NUM_RES_LYRS * 2 * sizeof(ref_mb_map_t);
  ------------------
  |  |   94|  22.5k|#define MAX_NUM_RES_LYRS 3
  ------------------
 2247|  22.5k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2248|  22.5k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 2249|  22.5k|        memset(pv_buf, 0, size);
 2250|  22.5k|        ps_off_len_map = pv_buf;
 2251|       |
 2252|       |        /* loop over num layers -1 */
 2253|  90.3k|        for(i4_lyr_id = 0; i4_lyr_id < MAX_NUM_RES_LYRS; i4_lyr_id++)
  ------------------
  |  |   94|  90.3k|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (2253:28): [True: 67.7k, False: 22.5k]
  ------------------
 2254|  67.7k|        {
 2255|       |            /* derive the layer map ctxt */
 2256|  67.7k|            ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[i4_lyr_id];
 2257|  67.7k|            ps_luma_map = &ps_lyr_ctxt->s_luma_map_ctxt;
 2258|  67.7k|            ps_chroma_map = &ps_lyr_ctxt->s_chroma_map_ctxt;
 2259|       |
 2260|       |            /* initialise the pointers */
 2261|  67.7k|            ps_luma_map->ps_y_offset_length = ps_off_len_map;
 2262|  67.7k|            ps_off_len_map += (H264_MAX_FRAME_HEIGHT >> 4);
  ------------------
  |  |   40|  67.7k|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
 2263|  67.7k|            ps_chroma_map->ps_y_offset_length = ps_off_len_map;
 2264|  67.7k|            ps_off_len_map += (H264_MAX_FRAME_HEIGHT >> 4);
  ------------------
  |  |   40|  67.7k|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
 2265|       |
 2266|  67.7k|        } /* end of loop over resolution layers */
 2267|       |
 2268|       |        /****************** Horz position phase ******************/
 2269|  22.5k|        size = H264_MAX_FRAME_WIDTH * MAX_NUM_RES_LYRS * 2 * sizeof(ref_pixel_map_t);
  ------------------
  |  |   39|  22.5k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
                      size = H264_MAX_FRAME_WIDTH * MAX_NUM_RES_LYRS * 2 * sizeof(ref_pixel_map_t);
  ------------------
  |  |   94|  22.5k|#define MAX_NUM_RES_LYRS 3
  ------------------
 2270|  22.5k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2271|  22.5k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 2272|  22.5k|        memset(pv_buf, 0, size);
 2273|  22.5k|        ps_pos_phase_map = pv_buf;
 2274|       |
 2275|       |        /* loop over num layers -1 */
 2276|  90.3k|        for(i4_lyr_id = 0; i4_lyr_id < MAX_NUM_RES_LYRS; i4_lyr_id++)
  ------------------
  |  |   94|  90.3k|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (2276:28): [True: 67.7k, False: 22.5k]
  ------------------
 2277|  67.7k|        {
 2278|       |            /* derive the layer map ctxt */
 2279|  67.7k|            ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[i4_lyr_id];
 2280|  67.7k|            ps_luma_map = &ps_lyr_ctxt->s_luma_map_ctxt;
 2281|  67.7k|            ps_chroma_map = &ps_lyr_ctxt->s_chroma_map_ctxt;
 2282|       |
 2283|       |            /* initialise the pointers */
 2284|  67.7k|            ps_luma_map->ps_x_pos_phase = ps_pos_phase_map;
 2285|  67.7k|            ps_pos_phase_map += H264_MAX_FRAME_WIDTH;
  ------------------
  |  |   39|  67.7k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
 2286|  67.7k|            ps_chroma_map->ps_x_pos_phase = ps_pos_phase_map;
 2287|  67.7k|            ps_pos_phase_map += H264_MAX_FRAME_WIDTH;
  ------------------
  |  |   39|  67.7k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
 2288|       |
 2289|  67.7k|        } /* end of loop over resolution layers */
 2290|       |
 2291|       |        /****************** Vert position phase ******************/
 2292|       |
 2293|  22.5k|        size = H264_MAX_FRAME_HEIGHT * MAX_NUM_RES_LYRS * 2 * sizeof(ref_pixel_map_t);
  ------------------
  |  |   40|  22.5k|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
                      size = H264_MAX_FRAME_HEIGHT * MAX_NUM_RES_LYRS * 2 * sizeof(ref_pixel_map_t);
  ------------------
  |  |   94|  22.5k|#define MAX_NUM_RES_LYRS 3
  ------------------
 2294|  22.5k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2295|  22.5k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 2296|  22.5k|        memset(pv_buf, 0, size);
 2297|  22.5k|        ps_pos_phase_map = pv_buf;
 2298|       |
 2299|       |        /* loop over num layers -1 */
 2300|  90.3k|        for(i4_lyr_id = 0; i4_lyr_id < MAX_NUM_RES_LYRS; i4_lyr_id++)
  ------------------
  |  |   94|  90.3k|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (2300:28): [True: 67.7k, False: 22.5k]
  ------------------
 2301|  67.7k|        {
 2302|       |            /* derive the layer map ctxt */
 2303|  67.7k|            ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[i4_lyr_id];
 2304|  67.7k|            ps_luma_map = &ps_lyr_ctxt->s_luma_map_ctxt;
 2305|  67.7k|            ps_chroma_map = &ps_lyr_ctxt->s_chroma_map_ctxt;
 2306|       |
 2307|       |            /* initialise the pointers */
 2308|  67.7k|            ps_luma_map->ps_y_pos_phase = ps_pos_phase_map;
 2309|  67.7k|            ps_pos_phase_map += H264_MAX_FRAME_HEIGHT;
  ------------------
  |  |   40|  67.7k|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
 2310|  67.7k|            ps_chroma_map->ps_y_pos_phase = ps_pos_phase_map;
 2311|  67.7k|            ps_pos_phase_map += H264_MAX_FRAME_HEIGHT;
  ------------------
  |  |   40|  67.7k|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
 2312|       |
 2313|  67.7k|        } /* end of loop over resolution layers */
 2314|  22.5k|    }
 2315|       |
 2316|      0|    ps_svcd_ctxt->pv_residual_sample_ctxt = ps_ctxt;
 2317|       |
 2318|  90.3k|    for(u1_layer_id = 0; u1_layer_id < MAX_NUM_RES_LYRS; u1_layer_id++)
  ------------------
  |  |   94|  90.3k|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (2318:26): [True: 67.7k, False: 22.5k]
  ------------------
 2319|  67.7k|    {
 2320|  67.7k|        ps_svc_lyr_dec = &ps_svcd_ctxt->ps_svc_dec_lyr[u1_layer_id];
 2321|  67.7k|        ps_svc_lyr_dec->pv_residual_sample_ctxt = ps_svcd_ctxt->pv_residual_sample_ctxt;
 2322|  67.7k|    }
 2323|  22.5k|    return IV_SUCCESS;
 2324|  22.5k|}
isvcd_mode_mv_resample_ctxt_create:
 2347|  22.5k|{
 2348|  22.5k|    isvcd_create_ip_t *ps_create_ip;
 2349|  22.5k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec;
 2350|  22.5k|    void *pv_buf;
 2351|  22.5k|    WORD16 *pi2_mem;
 2352|  22.5k|    UWORD8 u1_layer_id;
 2353|  22.5k|    void *(*pf_aligned_alloc)(void *pv_mem_ctxt, WORD32 alignment, WORD32 size);
 2354|  22.5k|    void *pv_mem_ctxt;
 2355|  22.5k|    WORD32 size, i4_res_id;
 2356|  22.5k|    ref_lyr_scaled_offset_t *ps_ref_pic_offsets;
 2357|  22.5k|    mode_motion_ctxt_t *ps_mode_motion;
 2358|  22.5k|    mode_motion_lyr_ctxt *ps_lyr_mem;
 2359|  22.5k|    UNUSED(pv_api_op);
  ------------------
  |  |   45|  22.5k|#define UNUSED(x) ((void)(x))
  ------------------
 2360|  22.5k|    ps_create_ip = (isvcd_create_ip_t *) pv_api_ip;
 2361|       |
 2362|  22.5k|    pf_aligned_alloc = ps_create_ip->s_ivd_create_ip_t.pf_aligned_alloc;
 2363|  22.5k|    pv_mem_ctxt = ps_create_ip->s_ivd_create_ip_t.pv_mem_ctxt;
 2364|       |
 2365|  22.5k|    size = ((sizeof(mode_motion_ctxt_t) + 127) >> 7) << 7;
 2366|  22.5k|    pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2367|  22.5k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 2368|  22.5k|    memset(pv_buf, 0, size);
 2369|  22.5k|    ps_mode_motion = pv_buf;
 2370|       |
 2371|       |    /* motion pred structure */
 2372|  22.5k|    size = 2 * NUM_MB_PARTS * NUM_SUB_MB_PARTS * sizeof(mv_pred_t);
  ------------------
  |  |   59|  22.5k|#define NUM_MB_PARTS 4
  ------------------
                  size = 2 * NUM_MB_PARTS * NUM_SUB_MB_PARTS * sizeof(mv_pred_t);
  ------------------
  |  |   60|  22.5k|#define NUM_SUB_MB_PARTS 4
  ------------------
 2373|  22.5k|    pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2374|  22.5k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 2375|  22.5k|    memset(pv_buf, 0, size);
 2376|  22.5k|    ps_mode_motion->ps_motion_pred_struct = (mv_pred_t *) pv_buf;
 2377|       |
 2378|       |    /* projected locations X */
 2379|  22.5k|    size = H264_MAX_FRAME_WIDTH * MAX_NUM_RES_LYRS * sizeof(WORD16);
  ------------------
  |  |   39|  22.5k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
                  size = H264_MAX_FRAME_WIDTH * MAX_NUM_RES_LYRS * sizeof(WORD16);
  ------------------
  |  |   94|  22.5k|#define MAX_NUM_RES_LYRS 3
  ------------------
 2380|  22.5k|    pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2381|  22.5k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 2382|  22.5k|    memset(pv_buf, 0, size);
 2383|  22.5k|    pi2_mem = (WORD16 *) pv_buf;
 2384|       |
 2385|       |    /* loop over NUM resolution layers */
 2386|  90.3k|    for(i4_res_id = 0; i4_res_id < MAX_NUM_RES_LYRS; i4_res_id++)
  ------------------
  |  |   94|  90.3k|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (2386:24): [True: 67.7k, False: 22.5k]
  ------------------
 2387|  67.7k|    {
 2388|  67.7k|        ps_lyr_mem = &ps_mode_motion->as_res_lyr_mem[i4_res_id];
 2389|       |
 2390|       |        /* initialise the pointers */
 2391|  67.7k|        ps_lyr_mem->pi2_ref_loc_x = pi2_mem;
 2392|       |
 2393|       |        /* increment the buffer pointer */
 2394|  67.7k|        pi2_mem += H264_MAX_FRAME_WIDTH;
  ------------------
  |  |   39|  67.7k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
 2395|       |
 2396|  67.7k|    } /* end of loop over num resolution layers */
 2397|       |
 2398|       |    /* projected locations Y */
 2399|  22.5k|    size = H264_MAX_FRAME_HEIGHT * MAX_NUM_RES_LYRS * sizeof(WORD16);
  ------------------
  |  |   40|  22.5k|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
                  size = H264_MAX_FRAME_HEIGHT * MAX_NUM_RES_LYRS * sizeof(WORD16);
  ------------------
  |  |   94|  22.5k|#define MAX_NUM_RES_LYRS 3
  ------------------
 2400|  22.5k|    pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2401|  22.5k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 2402|  22.5k|    memset(pv_buf, 0, size);
 2403|  22.5k|    pi2_mem = (WORD16 *) pv_buf;
 2404|       |    /* loop over NUM resolution layers */
 2405|  90.3k|    for(i4_res_id = 0; i4_res_id < MAX_NUM_RES_LYRS; i4_res_id++)
  ------------------
  |  |   94|  90.3k|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (2405:24): [True: 67.7k, False: 22.5k]
  ------------------
 2406|  67.7k|    {
 2407|  67.7k|        ps_lyr_mem = &ps_mode_motion->as_res_lyr_mem[i4_res_id];
 2408|       |
 2409|       |        /* initialise the pointers */
 2410|  67.7k|        ps_lyr_mem->pi2_ref_loc_y = pi2_mem;
 2411|       |        /* increment the buffer pointer */
 2412|  67.7k|        pi2_mem += H264_MAX_FRAME_HEIGHT;
  ------------------
  |  |   40|  67.7k|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
 2413|       |
 2414|  67.7k|    } /* end of loop over num resolution layers */
 2415|       |
 2416|  22.5k|    size = sizeof(ref_lyr_scaled_offset_t) * MAX_NUM_RES_LYRS * MAX_NUM_PIC_BUFS;
  ------------------
  |  |   94|  22.5k|#define MAX_NUM_RES_LYRS 3
  ------------------
                  size = sizeof(ref_lyr_scaled_offset_t) * MAX_NUM_RES_LYRS * MAX_NUM_PIC_BUFS;
  ------------------
  |  |   99|  22.5k|#define MAX_NUM_PIC_BUFS (32 + 1)
  ------------------
 2417|  22.5k|    pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2418|  22.5k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 2419|  22.5k|    memset(pv_buf, 0, size);
 2420|  22.5k|    ps_svcd_ctxt->pv_ref_lyr_offset = pv_buf;
 2421|       |
 2422|       |    /* loop over NUM resolution layers */
 2423|  22.5k|    ps_ref_pic_offsets = (ref_lyr_scaled_offset_t *) ps_svcd_ctxt->pv_ref_lyr_offset;
 2424|       |
 2425|  90.3k|    for(i4_res_id = 0; i4_res_id < MAX_NUM_RES_LYRS; i4_res_id++)
  ------------------
  |  |   94|  90.3k|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (2425:24): [True: 67.7k, False: 22.5k]
  ------------------
 2426|  67.7k|    {
 2427|  67.7k|        ps_lyr_mem = &ps_mode_motion->as_res_lyr_mem[i4_res_id];
 2428|       |
 2429|       |        /* store the current resolution layer pic offset start pointer */
 2430|  67.7k|        ps_lyr_mem->ps_ref_pic_lyr_offsets = ps_ref_pic_offsets + (i4_res_id * MAX_NUM_PIC_BUFS);
  ------------------
  |  |   99|  67.7k|#define MAX_NUM_PIC_BUFS (32 + 1)
  ------------------
 2431|       |
 2432|  67.7k|    } /* end of loop over num resolution layers */
 2433|       |
 2434|  22.5k|    ps_svcd_ctxt->pv_mode_mv_sample_ctxt = ps_mode_motion;
 2435|       |
 2436|  90.3k|    for(u1_layer_id = 0; u1_layer_id < MAX_NUM_RES_LYRS; u1_layer_id++)
  ------------------
  |  |   94|  90.3k|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (2436:26): [True: 67.7k, False: 22.5k]
  ------------------
 2437|  67.7k|    {
 2438|  67.7k|        ps_svc_lyr_dec = &ps_svcd_ctxt->ps_svc_dec_lyr[u1_layer_id];
 2439|  67.7k|        ps_svc_lyr_dec->pv_mode_mv_sample_ctxt = ps_svcd_ctxt->pv_mode_mv_sample_ctxt;
 2440|  67.7k|        ps_svc_lyr_dec->pv_ref_lyr_offset = ps_svcd_ctxt->pv_ref_lyr_offset;
 2441|  67.7k|    }
 2442|  22.5k|    return IV_SUCCESS;
 2443|  22.5k|}
isvcd_allocate_static_bufs:
 2465|  22.6k|{
 2466|  22.6k|    isvcd_create_ip_t *ps_create_ip;
 2467|  22.6k|    isvcd_create_op_t *ps_create_op;
 2468|  22.6k|    void *pv_buf;
 2469|  22.6k|    UWORD8 *pu1_buf;
 2470|  22.6k|    dec_struct_t *ps_dec;
 2471|  22.6k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec;
 2472|  22.6k|    svc_dec_ctxt_t *ps_svcd_ctxt;
 2473|  22.6k|    void *(*pf_aligned_alloc)(void *pv_mem_ctxt, WORD32 alignment, WORD32 size);
 2474|  22.6k|    void (*pf_aligned_free)(void *pv_mem_ctxt, void *pv_buf);
 2475|  22.6k|    void *pv_mem_ctxt;
 2476|  22.6k|    WORD32 size;
 2477|  22.6k|    UWORD8 u1_layer_id, u1_sps_ctr;
 2478|  22.6k|    UWORD8 u1_chroma_format;
 2479|  22.6k|    WORD32 ret;
 2480|       |
 2481|  22.6k|    ps_create_ip = (isvcd_create_ip_t *) pv_api_ip;
 2482|  22.6k|    ps_create_op = (isvcd_create_op_t *) pv_api_op;
 2483|       |
 2484|  22.6k|    ps_create_op->s_ivd_create_op_t.u4_error_code = 0;
 2485|  22.6k|    pf_aligned_alloc = ps_create_ip->s_ivd_create_ip_t.pf_aligned_alloc;
 2486|  22.6k|    pf_aligned_free = ps_create_ip->s_ivd_create_ip_t.pf_aligned_free;
 2487|  22.6k|    pv_mem_ctxt = ps_create_ip->s_ivd_create_ip_t.pv_mem_ctxt;
 2488|  22.6k|    u1_chroma_format = (UWORD8) (ps_create_ip->s_ivd_create_ip_t.e_output_format);
 2489|       |
 2490|  22.6k|    if((u1_chroma_format != IV_YUV_420P) && (u1_chroma_format != IV_YUV_420SP_UV) &&
  ------------------
  |  Branch (2490:8): [True: 12.8k, False: 9.72k]
  |  Branch (2490:45): [True: 5.99k, False: 6.87k]
  ------------------
 2491|  5.99k|       (u1_chroma_format != IV_YUV_420SP_VU))
  ------------------
  |  Branch (2491:8): [True: 9, False: 5.99k]
  ------------------
 2492|      9|    {
 2493|      9|        ps_create_op->s_ivd_create_op_t.pv_handle = NULL;
 2494|       |
 2495|      9|        return IV_FAIL;
 2496|      9|    }
 2497|       |
 2498|       |    /* Initialize return handle to NULL */
 2499|  22.5k|    ps_create_op->s_ivd_create_op_t.pv_handle = NULL;
 2500|  22.5k|    pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, sizeof(iv_obj_t));
 2501|  22.5k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 2502|  22.5k|    memset(pv_buf, 0, sizeof(iv_obj_t));
 2503|  22.5k|    *dec_hdl = (iv_obj_t *) pv_buf;
 2504|  22.5k|    ps_create_op->s_ivd_create_op_t.pv_handle = *dec_hdl;
 2505|       |
 2506|  22.5k|    (*dec_hdl)->pv_codec_handle = NULL;
 2507|  22.5k|    pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, sizeof(svc_dec_ctxt_t));
 2508|  22.5k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 2509|  22.5k|    (*dec_hdl)->pv_codec_handle = (svc_dec_ctxt_t *) pv_buf;
 2510|  22.5k|    ps_svcd_ctxt = (svc_dec_ctxt_t *) pv_buf;
 2511|       |
 2512|  22.5k|    memset(ps_svcd_ctxt, 0, sizeof(svc_dec_ctxt_t));
 2513|       |
 2514|  22.5k|    ps_svcd_ctxt->u1_prev_num_res_layers = UINT8_MAX;
 2515|  22.5k|    ps_svcd_ctxt->u1_pre_parse_in_flush = 1;
 2516|       |    /* set default to maximum values supported */
 2517|  22.5k|    ps_svcd_ctxt->u1_tgt_dep_id = MAX_DEPENDENCY_ID;
  ------------------
  |  |  103|  22.5k|#define MAX_DEPENDENCY_ID 4
  ------------------
 2518|  22.5k|    ps_svcd_ctxt->u1_tgt_quality_id = MAX_QUALITY_ID;
  ------------------
  |  |  102|  22.5k|#define MAX_QUALITY_ID 0
  ------------------
 2519|  22.5k|    ps_svcd_ctxt->u1_tgt_temp_id = MAX_TEMPORAL_ID;
  ------------------
  |  |  104|  22.5k|#define MAX_TEMPORAL_ID 7
  ------------------
 2520|  22.5k|    ps_svcd_ctxt->u1_tgt_priority_id = MAX_PRIORITY_ID;
  ------------------
  |  |  105|  22.5k|#define MAX_PRIORITY_ID 63
  ------------------
 2521|       |
 2522|       |    /* two sets of MAX_NUM_SEQ_PARAMS are created one for sps-base layer;  one for
 2523|       |     * subset_sps- enhancement*/
 2524|  22.5k|    size = ((sizeof(dec_seq_params_t)) * MAX_NUM_SEQ_PARAMS * 2);
  ------------------
  |  |  521|  22.5k|#define MAX_NUM_SEQ_PARAMS 32
  ------------------
 2525|  22.5k|    pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2526|  22.5k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 2527|  22.5k|    memset(pv_buf, 0, size);
 2528|  22.5k|    ps_svcd_ctxt->ps_sps = pv_buf;
 2529|       |
 2530|       |    /* two sets of MAX_NUM_SEQ_PARAMS are created one for sps-base layer;  one for
 2531|       |     * subset_sps- enhancement*/
 2532|  22.5k|    size = ((sizeof(dec_svc_seq_params_t)) * MAX_NUM_SEQ_PARAMS * 2);
  ------------------
  |  |  521|  22.5k|#define MAX_NUM_SEQ_PARAMS 32
  ------------------
 2533|  22.5k|    pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2534|  22.5k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 2535|  22.5k|    memset(pv_buf, 0, size);
 2536|  22.5k|    ps_svcd_ctxt->ps_subset_sps = pv_buf;
 2537|       |
 2538|  1.46M|    for(u1_sps_ctr = 0; u1_sps_ctr < (2 * MAX_NUM_SEQ_PARAMS); u1_sps_ctr++)
  ------------------
  |  |  521|  1.46M|#define MAX_NUM_SEQ_PARAMS 32
  ------------------
  |  Branch (2538:25): [True: 1.44M, False: 22.5k]
  ------------------
 2539|  1.44M|    {
 2540|  1.44M|        ps_svcd_ctxt->ps_subset_sps[u1_sps_ctr].ps_seq = &ps_svcd_ctxt->ps_sps[u1_sps_ctr];
 2541|  1.44M|    }
 2542|       |
 2543|  22.5k|    size = sizeof(sei);
 2544|  22.5k|    pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2545|  22.5k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 2546|  22.5k|    memset(pv_buf, 0, size);
 2547|  22.5k|    ps_svcd_ctxt->ps_sei = (sei *) pv_buf;
 2548|       |
 2549|  22.5k|    size = sizeof(sei);
 2550|  22.5k|    pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2551|  22.5k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 2552|  22.5k|    memset(pv_buf, 0, size);
 2553|  22.5k|    ps_svcd_ctxt->ps_sei_parse = (sei *) pv_buf;
 2554|       |
 2555|  22.5k|    size = (sizeof(dec_pic_params_t)) * MAX_NUM_PIC_PARAMS;
  ------------------
  |  |  524|  22.5k|#define MAX_NUM_PIC_PARAMS 256
  ------------------
 2556|  22.5k|    pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2557|  22.5k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 2558|  22.5k|    memset(pv_buf, 0, size);
 2559|  22.5k|    ps_svcd_ctxt->ps_pps = pv_buf;
 2560|       |
 2561|  22.5k|    size = (sizeof(svc_dec_lyr_struct_t)) * MAX_NUM_RES_LYRS;
  ------------------
  |  |   94|  22.5k|#define MAX_NUM_RES_LYRS 3
  ------------------
 2562|  22.5k|    pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2563|  22.5k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  22.5k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 22.5k]
  |  |  ------------------
  ------------------
 2564|  22.5k|    memset(pv_buf, 0, size);
 2565|  22.5k|    ps_svcd_ctxt->ps_svc_dec_lyr = pv_buf;
 2566|  22.5k|    ps_svcd_ctxt->u1_target_layer_id = 0;
 2567|  22.5k|    ps_svcd_ctxt->u1_cur_layer_id = 0;
 2568|  22.5k|    ps_svcd_ctxt->i4_eos_flag = 0;
 2569|       |
 2570|  22.5k|    ret = isvcd_mode_mv_resample_ctxt_create(ps_svcd_ctxt, pv_api_ip, pv_api_op);
 2571|  22.5k|    if(ret != IV_SUCCESS)
  ------------------
  |  Branch (2571:8): [True: 0, False: 22.5k]
  ------------------
 2572|      0|    {
 2573|      0|        return ret;
 2574|      0|    }
 2575|  22.5k|    ret = isvcd_intra_resample_ctxt_create(ps_svcd_ctxt, pv_api_ip, pv_api_op);
 2576|  22.5k|    if(ret != IV_SUCCESS)
  ------------------
  |  Branch (2576:8): [True: 0, False: 22.5k]
  ------------------
 2577|      0|    {
 2578|      0|        return ret;
 2579|      0|    }
 2580|  22.5k|    ret = isvcd_residual_resample_ctxt_create(ps_svcd_ctxt, pv_api_ip, pv_api_op);
 2581|  22.5k|    if(ret != IV_SUCCESS)
  ------------------
  |  Branch (2581:8): [True: 0, False: 22.5k]
  ------------------
 2582|      0|    {
 2583|      0|        return ret;
 2584|      0|    }
 2585|  22.5k|    ret = isvcd_nal_parse_ctxt_create(ps_svcd_ctxt, pv_api_ip, pv_api_op);
 2586|  22.5k|    if(ret != IV_SUCCESS)
  ------------------
  |  Branch (2586:8): [True: 0, False: 22.5k]
  ------------------
 2587|      0|    {
 2588|      0|        return ret;
 2589|      0|    }
 2590|  90.3k|    for(u1_layer_id = 0; u1_layer_id < MAX_NUM_RES_LYRS; u1_layer_id++)
  ------------------
  |  |   94|  90.3k|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (2590:26): [True: 67.7k, False: 22.5k]
  ------------------
 2591|  67.7k|    {
 2592|  67.7k|        ps_svc_lyr_dec = &ps_svcd_ctxt->ps_svc_dec_lyr[u1_layer_id];
 2593|  67.7k|        ps_dec = &ps_svc_lyr_dec->s_dec;
 2594|       |
 2595|  67.7k|        ps_svc_lyr_dec->ps_svcd_ctxt = ps_svcd_ctxt;
 2596|  67.7k|        ps_svc_lyr_dec->u1_layer_id = u1_layer_id;
 2597|  67.7k|        ps_svc_lyr_dec->u1_dyadic_flag = 1;
 2598|  67.7k|        ps_svc_lyr_dec->u1_restricted_res_change_flag = 1;
 2599|  67.7k|        ps_svc_lyr_dec->u1_base_res_flag = 1;
 2600|  67.7k|        ps_svc_lyr_dec->u1_ref_layer_id = u1_layer_id - 1;
 2601|  67.7k|        ps_svc_lyr_dec->ps_dec_svc_ref_layer =
 2602|  67.7k|            &ps_svcd_ctxt->ps_svc_dec_lyr[ps_svc_lyr_dec->u1_ref_layer_id];
 2603|  67.7k|        ps_svc_lyr_dec->u4_pps_id_for_layer = UINT32_MAX;
 2604|       |
 2605|  67.7k|#ifndef LOGO_EN
 2606|  67.7k|        ps_dec->u4_share_disp_buf = ps_create_ip->s_ivd_create_ip_t.u4_share_disp_buf;
 2607|       |#else
 2608|       |        ps_dec->u4_share_disp_buf = 0;
 2609|       |#endif
 2610|       |
 2611|  67.7k|        ps_dec->u1_chroma_format = (UWORD8) (ps_create_ip->s_ivd_create_ip_t.e_output_format);
 2612|       |
 2613|  67.7k|        if((ps_dec->u1_chroma_format != IV_YUV_420P) &&
  ------------------
  |  Branch (2613:12): [True: 38.5k, False: 29.1k]
  ------------------
 2614|  38.5k|           (ps_dec->u1_chroma_format != IV_YUV_420SP_UV) &&
  ------------------
  |  Branch (2614:12): [True: 17.9k, False: 20.6k]
  ------------------
 2615|  17.9k|           (ps_dec->u1_chroma_format != IV_YUV_420SP_VU))
  ------------------
  |  Branch (2615:12): [True: 0, False: 17.9k]
  ------------------
 2616|      0|        {
 2617|      0|            ps_dec->u4_share_disp_buf = 0;
 2618|      0|        }
 2619|       |
 2620|  67.7k|        ps_dec->u1_enable_mb_info = ps_create_ip->u4_enable_frame_info;
 2621|  67.7k|        ps_dec->pf_aligned_alloc = pf_aligned_alloc;
 2622|  67.7k|        ps_dec->pf_aligned_free = pf_aligned_free;
 2623|  67.7k|        ps_dec->pv_mem_ctxt = pv_mem_ctxt;
 2624|       |
 2625|  67.7k|        ps_dec->ps_sps = ps_svcd_ctxt->ps_sps;
 2626|  67.7k|        ps_svc_lyr_dec->ps_subset_sps = ps_svcd_ctxt->ps_subset_sps;
 2627|  67.7k|        ps_dec->ps_pps = ps_svcd_ctxt->ps_pps;
 2628|  67.7k|        ps_dec->ps_sei = ps_svcd_ctxt->ps_sei;
 2629|  67.7k|        ps_dec->ps_sei_parse = ps_svcd_ctxt->ps_sei_parse;
 2630|       |
 2631|  67.7k|        size = ithread_get_handle_size();
 2632|  67.7k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2633|  67.7k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  67.7k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 67.7k]
  |  |  ------------------
  ------------------
 2634|  67.7k|        memset(pv_buf, 0, size);
 2635|  67.7k|        ps_dec->pv_dec_thread_handle = pv_buf;
 2636|       |
 2637|  67.7k|        size = ithread_get_handle_size();
 2638|  67.7k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2639|  67.7k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  67.7k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 67.7k]
  |  |  ------------------
  ------------------
 2640|  67.7k|        memset(pv_buf, 0, size);
 2641|  67.7k|        ps_dec->pv_bs_deblk_thread_handle = pv_buf;
 2642|       |
 2643|       |#ifdef KEEP_THREADS_ACTIVE
 2644|       |        {
 2645|       |            UWORD32 i;
 2646|       |            /* Request memory to hold mutex (start/done) for both threads */
 2647|       |            size = ithread_get_mutex_lock_size() << 2;
 2648|       |            pv_buf = pf_aligned_alloc(pv_mem_ctxt, 8, size);
 2649|       |            RETURN_IF((NULL == pv_buf), IV_FAIL);
 2650|       |            memset(pv_buf, 0, size);
 2651|       |
 2652|       |            // init mutex variable for both the threads
 2653|       |            // 1. ih264d_decode_picture_thread
 2654|       |            // 2. ih264d_recon_deblk_thread
 2655|       |            for(i = 0; i < 2; i++)
 2656|       |            {
 2657|       |                WORD32 ret;
 2658|       |                WORD32 mutex_size = ithread_get_mutex_lock_size();
 2659|       |
 2660|       |                ps_dec->apv_proc_start_mutex[i] = (UWORD8 *) pv_buf + (2 * i * mutex_size);
 2661|       |                ps_dec->apv_proc_done_mutex[i] = (UWORD8 *) pv_buf + ((2 * i + 1) * mutex_size);
 2662|       |
 2663|       |                ret = ithread_mutex_init(ps_dec->apv_proc_start_mutex[0]);
 2664|       |                RETURN_IF((ret != IV_SUCCESS), ret);
 2665|       |
 2666|       |                ret = ithread_mutex_init(ps_dec->apv_proc_done_mutex[i]);
 2667|       |                RETURN_IF((ret != IV_SUCCESS), ret);
 2668|       |            }
 2669|       |
 2670|       |            size = ithread_get_cond_struct_size() << 2;
 2671|       |            pv_buf = pf_aligned_alloc(pv_mem_ctxt, 8, size);
 2672|       |            RETURN_IF((NULL == pv_buf), IV_FAIL);
 2673|       |            memset(pv_buf, 0, size);
 2674|       |
 2675|       |            // init condition variable for both the threads
 2676|       |            for(i = 0; i < 2; i++)
 2677|       |            {
 2678|       |                WORD32 ret;
 2679|       |                WORD32 cond_size = ithread_get_cond_struct_size();
 2680|       |                ps_dec->apv_proc_start_condition[i] = (UWORD8 *) pv_buf + (2 * i * cond_size);
 2681|       |                ps_dec->apv_proc_done_condition[i] = (UWORD8 *) pv_buf + ((2 * i + 1) * cond_size);
 2682|       |
 2683|       |                ret = ithread_cond_init(ps_dec->apv_proc_start_condition[i]);
 2684|       |                RETURN_IF((ret != IV_SUCCESS), ret);
 2685|       |
 2686|       |                ret = ithread_cond_init(ps_dec->apv_proc_done_condition[i]);
 2687|       |                RETURN_IF((ret != IV_SUCCESS), ret);
 2688|       |            }
 2689|       |        }
 2690|       |#endif
 2691|  67.7k|        size = sizeof(dpb_manager_t);
 2692|  67.7k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2693|  67.7k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  67.7k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 67.7k]
  |  |  ------------------
  ------------------
 2694|  67.7k|        memset(pv_buf, 0, size);
 2695|  67.7k|        ps_dec->ps_dpb_mgr = pv_buf;
 2696|       |
 2697|  67.7k|        size = sizeof(pred_info_t) * 2 * 32;
 2698|  67.7k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2699|  67.7k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  67.7k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 67.7k]
  |  |  ------------------
  ------------------
 2700|  67.7k|        memset(pv_buf, 0, size);
 2701|  67.7k|        ps_dec->ps_pred = pv_buf;
 2702|       |
 2703|  67.7k|        size = sizeof(disp_mgr_t);
 2704|  67.7k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2705|  67.7k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  67.7k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 67.7k]
  |  |  ------------------
  ------------------
 2706|  67.7k|        memset(pv_buf, 0, size);
 2707|  67.7k|        ps_dec->pv_disp_buf_mgr = pv_buf;
 2708|       |
 2709|  67.7k|        size = ih264_buf_mgr_size();
 2710|  67.7k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2711|  67.7k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  67.7k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 67.7k]
  |  |  ------------------
  ------------------
 2712|  67.7k|        memset(pv_buf, 0, size);
 2713|  67.7k|        ps_dec->pv_pic_buf_mgr = pv_buf;
 2714|       |
 2715|  67.7k|        size = sizeof(struct pic_buffer_t) * (H264_MAX_REF_PICS * 2);
  ------------------
  |  |  534|  67.7k|#define H264_MAX_REF_PICS         16
  ------------------
 2716|  67.7k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2717|  67.7k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  67.7k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 67.7k]
  |  |  ------------------
  ------------------
 2718|  67.7k|        memset(pv_buf, 0, size);
 2719|  67.7k|        ps_dec->ps_pic_buf_base = pv_buf;
 2720|       |
 2721|  67.7k|        size = sizeof(dec_err_status_t);
 2722|  67.7k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2723|  67.7k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  67.7k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 67.7k]
  |  |  ------------------
  ------------------
 2724|  67.7k|        memset(pv_buf, 0, size);
 2725|  67.7k|        ps_dec->ps_dec_err_status = (dec_err_status_t *) pv_buf;
 2726|       |
 2727|  67.7k|        size = sizeof(dpb_commands_t);
 2728|  67.7k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2729|  67.7k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  67.7k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 67.7k]
  |  |  ------------------
  ------------------
 2730|  67.7k|        memset(pv_buf, 0, size);
 2731|  67.7k|        ps_dec->ps_dpb_cmds = (dpb_commands_t *) pv_buf;
 2732|       |
 2733|  67.7k|        size = sizeof(dec_bit_stream_t);
 2734|  67.7k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2735|  67.7k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  67.7k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 67.7k]
  |  |  ------------------
  ------------------
 2736|  67.7k|        memset(pv_buf, 0, size);
 2737|  67.7k|        ps_dec->ps_bitstrm = (dec_bit_stream_t *) pv_buf;
 2738|       |
 2739|  67.7k|        size = sizeof(dec_nal_unit_svc_ext_params_t);
 2740|  67.7k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2741|  67.7k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  67.7k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 67.7k]
  |  |  ------------------
  ------------------
 2742|  67.7k|        memset(pv_buf, 0, size);
 2743|  67.7k|        ps_svc_lyr_dec->ps_nal_svc_ext = (dec_nal_unit_svc_ext_params_t *) pv_buf;
 2744|       |
 2745|  67.7k|        size = sizeof(dec_slice_params_t);
 2746|  67.7k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2747|  67.7k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  67.7k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 67.7k]
  |  |  ------------------
  ------------------
 2748|  67.7k|        memset(pv_buf, 0, size);
 2749|  67.7k|        ps_dec->ps_cur_slice = (dec_slice_params_t *) pv_buf;
 2750|       |
 2751|  67.7k|        size = MAX(sizeof(dec_seq_params_t), sizeof(dec_pic_params_t));
  ------------------
  |  |   60|  67.7k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 67.7k, Folded]
  |  |  ------------------
  ------------------
 2752|  67.7k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2753|  67.7k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  67.7k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 67.7k]
  |  |  ------------------
  ------------------
 2754|  67.7k|        memset(pv_buf, 0, size);
 2755|  67.7k|        ps_dec->pv_scratch_sps_pps = pv_buf;
 2756|       |
 2757|  67.7k|        size = sizeof(dec_svc_seq_params_t);
 2758|  67.7k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2759|  67.7k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  67.7k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 67.7k]
  |  |  ------------------
  ------------------
 2760|  67.7k|        memset(pv_buf, 0, size);
 2761|  67.7k|        ps_svc_lyr_dec->pv_scratch_subset_sps = pv_buf;
 2762|       |
 2763|  67.7k|        ps_dec->u4_static_bits_buf_size = 256000;
 2764|  67.7k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, ps_dec->u4_static_bits_buf_size);
 2765|  67.7k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  67.7k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 67.7k]
  |  |  ------------------
  ------------------
 2766|  67.7k|        memset(pv_buf, 0, ps_dec->u4_static_bits_buf_size);
 2767|  67.7k|        ps_dec->pu1_bits_buf_static = pv_buf;
 2768|       |
 2769|  67.7k|        size = ((TOTAL_LIST_ENTRIES + PAD_MAP_IDX_POC) * sizeof(void *));
  ------------------
  |  |   95|  67.7k|#define TOTAL_LIST_ENTRIES      6 * POC_LIST_L0_TO_L1_DIFF//BOT_LIST_FLD_L1 + POC_LIST_L0_TO_L1_DIFF_1  //0+33+33+17+17+17+17
  |  |  ------------------
  |  |  |  |   86|  67.7k|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |  600|  67.7k|#define MAX_FRAMES              16
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
                      size = ((TOTAL_LIST_ENTRIES + PAD_MAP_IDX_POC) * sizeof(void *));
  ------------------
  |  |  100|  67.7k|#define PAD_MAP_IDX_POC             (1)
  ------------------
 2770|  67.7k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2771|  67.7k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  67.7k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 67.7k]
  |  |  ------------------
  ------------------
 2772|  67.7k|        ps_dec->ppv_map_ref_idx_to_poc_base = pv_buf;
 2773|  67.7k|        memset(ps_dec->ppv_map_ref_idx_to_poc_base, 0, size);
 2774|       |
 2775|  67.7k|        ps_dec->ppv_map_ref_idx_to_poc = ps_dec->ppv_map_ref_idx_to_poc_base + OFFSET_MAP_IDX_POC;
  ------------------
  |  |  103|  67.7k|#define OFFSET_MAP_IDX_POC          (1)
  ------------------
 2776|       |
 2777|  67.7k|        size = (sizeof(bin_ctxt_model_t) * NUM_CABAC_CTXTS_SVC);
  ------------------
  |  |   46|  67.7k|#define NUM_CABAC_CTXTS_SVC 467
  ------------------
 2778|  67.7k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2779|  67.7k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  67.7k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 67.7k]
  |  |  ------------------
  ------------------
 2780|  67.7k|        memset(pv_buf, 0, size);
 2781|  67.7k|        ps_dec->p_cabac_ctxt_table_t = pv_buf;
 2782|       |
 2783|  67.7k|        size = sizeof(ctxt_inc_mb_info_t);
 2784|  67.7k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2785|  67.7k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  67.7k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 67.7k]
  |  |  ------------------
  ------------------
 2786|  67.7k|        memset(pv_buf, 0, size);
 2787|  67.7k|        ps_dec->ps_left_mb_ctxt_info = pv_buf;
 2788|       |
 2789|  67.7k|        size = MAX_REF_BUF_SIZE * 2;
  ------------------
  |  |   68|  67.7k|#define MAX_REF_BUF_SIZE       (3776*2*2)
  ------------------
 2790|  67.7k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2791|  67.7k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  67.7k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 67.7k]
  |  |  ------------------
  ------------------
 2792|  67.7k|        memset(pv_buf, 0, size);
 2793|  67.7k|        ps_dec->pu1_ref_buff_base = pv_buf;
 2794|  67.7k|        ps_dec->pu1_ref_buff = ps_dec->pu1_ref_buff_base + MAX_REF_BUF_SIZE;
  ------------------
  |  |   68|  67.7k|#define MAX_REF_BUF_SIZE       (3776*2*2)
  ------------------
 2795|       |
 2796|  67.7k|        size = ((sizeof(WORD16)) * PRED_BUFFER_WIDTH * PRED_BUFFER_HEIGHT * 2);
  ------------------
  |  |   55|  67.7k|#define PRED_BUFFER_WIDTH   24*2
  ------------------
                      size = ((sizeof(WORD16)) * PRED_BUFFER_WIDTH * PRED_BUFFER_HEIGHT * 2);
  ------------------
  |  |   56|  67.7k|#define PRED_BUFFER_HEIGHT  24*2
  ------------------
 2797|  67.7k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2798|  67.7k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  67.7k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 67.7k]
  |  |  ------------------
  ------------------
 2799|  67.7k|        memset(pv_buf, 0, size);
 2800|  67.7k|        ps_dec->pi2_pred1 = pv_buf;
 2801|       |
 2802|  67.7k|        size = sizeof(UWORD8) * (MB_LUM_SIZE);
  ------------------
  |  |  563|  67.7k|#define MB_LUM_SIZE                   256
  ------------------
 2803|  67.7k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2804|  67.7k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  67.7k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 67.7k]
  |  |  ------------------
  ------------------
 2805|  67.7k|        memset(pv_buf, 0, size);
 2806|  67.7k|        ps_dec->pu1_temp_mc_buffer = pv_buf;
 2807|       |
 2808|  67.7k|        size = 8 * MAX_REF_BUFS * sizeof(struct pic_buffer_t);
  ------------------
  |  |   75|  67.7k|#define MAX_REF_BUFS    32
  ------------------
 2809|  67.7k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2810|  67.7k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  67.7k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 67.7k]
  |  |  ------------------
  ------------------
 2811|  67.7k|        memset(pv_buf, 0, size);
 2812|       |
 2813|  67.7k|        ps_dec->pu1_init_dpb_base = pv_buf;
 2814|  67.7k|        pu1_buf = pv_buf;
 2815|  67.7k|        ps_dec->ps_dpb_mgr->ps_init_dpb[0][0] = (struct pic_buffer_t *) pu1_buf;
 2816|       |
 2817|  67.7k|        pu1_buf += size / 2;
 2818|  67.7k|        ps_dec->ps_dpb_mgr->ps_init_dpb[1][0] = (struct pic_buffer_t *) pu1_buf;
 2819|       |
 2820|  67.7k|        size = (sizeof(UWORD32) * 2 * 3 * ((MAX_FRAMES << 1) * (MAX_FRAMES << 1)) * 2);
  ------------------
  |  |  600|  67.7k|#define MAX_FRAMES              16
  ------------------
                      size = (sizeof(UWORD32) * 2 * 3 * ((MAX_FRAMES << 1) * (MAX_FRAMES << 1)) * 2);
  ------------------
  |  |  600|  67.7k|#define MAX_FRAMES              16
  ------------------
 2821|  67.7k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2822|  67.7k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  67.7k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 67.7k]
  |  |  ------------------
  ------------------
 2823|  67.7k|        memset(pv_buf, 0, size);
 2824|  67.7k|        ps_dec->pu4_mbaff_wt_mat = pv_buf;
 2825|       |
 2826|  67.7k|        size = sizeof(UWORD32) * 2 * 3 * ((MAX_FRAMES << 1) * (MAX_FRAMES << 1));
  ------------------
  |  |  600|  67.7k|#define MAX_FRAMES              16
  ------------------
                      size = sizeof(UWORD32) * 2 * 3 * ((MAX_FRAMES << 1) * (MAX_FRAMES << 1));
  ------------------
  |  |  600|  67.7k|#define MAX_FRAMES              16
  ------------------
 2827|  67.7k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2828|  67.7k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  67.7k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 67.7k]
  |  |  ------------------
  ------------------
 2829|  67.7k|        memset(pv_buf, 0, size);
 2830|  67.7k|        ps_dec->pu4_wts_ofsts_mat = pv_buf;
 2831|       |
 2832|  67.7k|        size = (sizeof(neighbouradd_t) << 2);
 2833|  67.7k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2834|  67.7k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  67.7k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 67.7k]
  |  |  ------------------
  ------------------
 2835|  67.7k|        memset(pv_buf, 0, size);
 2836|  67.7k|        ps_dec->ps_left_mvpred_addr = pv_buf;
 2837|       |
 2838|  67.7k|        size = ih264_buf_mgr_size();
 2839|  67.7k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2840|  67.7k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  67.7k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 67.7k]
  |  |  ------------------
  ------------------
 2841|  67.7k|        memset(pv_buf, 0, size);
 2842|  67.7k|        ps_dec->pv_mv_buf_mgr = pv_buf;
 2843|       |
 2844|  67.7k|        size = sizeof(col_mv_buf_t) * (H264_MAX_REF_PICS * 2);
  ------------------
  |  |  534|  67.7k|#define H264_MAX_REF_PICS         16
  ------------------
 2845|  67.7k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2846|  67.7k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  67.7k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 67.7k]
  |  |  ------------------
  ------------------
 2847|  67.7k|        ps_dec->ps_col_mv_base = pv_buf;
 2848|  67.7k|        memset(ps_dec->ps_col_mv_base, 0, size);
 2849|       |
 2850|  67.7k|        size = ((MB_SIZE * MB_SIZE * 3) >> 1) + MB_SIZE;
  ------------------
  |  |  554|  67.7k|#define MB_SIZE             16
  ------------------
                      size = ((MB_SIZE * MB_SIZE * 3) >> 1) + MB_SIZE;
  ------------------
  |  |  554|  67.7k|#define MB_SIZE             16
  ------------------
                      size = ((MB_SIZE * MB_SIZE * 3) >> 1) + MB_SIZE;
  ------------------
  |  |  554|  67.7k|#define MB_SIZE             16
  ------------------
 2851|  67.7k|        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
 2852|  67.7k|        RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  67.7k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 67.7k]
  |  |  ------------------
  ------------------
 2853|  67.7k|        ps_svc_lyr_dec->pu1_ii_resamp_buffer_luma = pv_buf;
 2854|  67.7k|        ps_svc_lyr_dec->pu1_ii_resamp_buffer_chroma =
 2855|  67.7k|            ps_svc_lyr_dec->pu1_ii_resamp_buffer_luma + (MB_SIZE * MB_SIZE);
  ------------------
  |  |  554|  67.7k|#define MB_SIZE             16
  ------------------
                          ps_svc_lyr_dec->pu1_ii_resamp_buffer_luma + (MB_SIZE * MB_SIZE);
  ------------------
  |  |  554|  67.7k|#define MB_SIZE             16
  ------------------
 2856|  67.7k|        memset(ps_svc_lyr_dec->pu1_ii_resamp_buffer_luma, 0, size);
 2857|       |
 2858|  67.7k|        isvcd_init_decoder(ps_svc_lyr_dec);
 2859|  67.7k|    }
 2860|  22.5k|    return IV_SUCCESS;
 2861|  22.5k|}
isvcd_create:
 2884|  22.6k|{
 2885|  22.6k|    isvcd_create_ip_t *ps_create_ip;
 2886|  22.6k|    isvcd_create_op_t *ps_create_op;
 2887|  22.6k|    WORD32 ret;
 2888|       |
 2889|  22.6k|    ps_create_ip = (isvcd_create_ip_t *) pv_api_ip;
 2890|  22.6k|    ps_create_op = (isvcd_create_op_t *) pv_api_op;
 2891|       |
 2892|  22.6k|    ps_create_op->s_ivd_create_op_t.u4_error_code = 0;
 2893|  22.6k|    dec_hdl = NULL;
 2894|  22.6k|    ret = isvcd_allocate_static_bufs(&dec_hdl, pv_api_ip, pv_api_op);
 2895|       |
 2896|       |    /* If allocation of some buffer fails, then free buffers allocated till then */
 2897|  22.6k|    if(IV_FAIL == ret)
  ------------------
  |  Branch (2897:8): [True: 9, False: 22.5k]
  ------------------
 2898|      9|    {
 2899|      9|        if(dec_hdl)
  ------------------
  |  Branch (2899:12): [True: 0, False: 9]
  ------------------
 2900|      0|        {
 2901|      0|            if(dec_hdl->pv_codec_handle)
  ------------------
  |  Branch (2901:16): [True: 0, False: 0]
  ------------------
 2902|      0|            {
 2903|      0|                isvcd_free_static_bufs(dec_hdl);
 2904|      0|            }
 2905|      0|            else
 2906|      0|            {
 2907|      0|                void (*pf_aligned_free)(void *pv_mem_ctxt, void *pv_buf);
 2908|      0|                void *pv_mem_ctxt;
 2909|       |
 2910|      0|                pf_aligned_free = ps_create_ip->s_ivd_create_ip_t.pf_aligned_free;
 2911|      0|                pv_mem_ctxt = ps_create_ip->s_ivd_create_ip_t.pv_mem_ctxt;
 2912|      0|                pf_aligned_free(pv_mem_ctxt, dec_hdl);
 2913|      0|            }
 2914|      0|        }
 2915|      9|        ps_create_op->s_ivd_create_op_t.u4_error_code = IVD_MEM_ALLOC_FAILED;
 2916|      9|        ps_create_op->s_ivd_create_op_t.u4_error_code |= 1 << IVD_FATALERROR;
 2917|      9|        return IV_FAIL;
 2918|      9|    }
 2919|       |
 2920|  22.5k|    return IV_SUCCESS;
 2921|  22.6k|}
isvcd_update_dqid:
 2951|   227k|{
 2952|   227k|    vcl_node_t *ps_vcl_node;
 2953|       |
 2954|       |    /* sanity checks */
 2955|   227k|    if((NULL == ps_cur_lyr_node) || (NULL == pps_bot_lyr_node))
  ------------------
  |  Branch (2955:8): [True: 0, False: 227k]
  |  Branch (2955:37): [True: 0, False: 227k]
  ------------------
 2956|      0|    {
 2957|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 2958|      0|    }
 2959|       |
 2960|   227k|    ps_vcl_node = ps_cur_lyr_node->ps_bot_node;
 2961|   227k|    while(NULL != ps_vcl_node)
  ------------------
  |  Branch (2961:11): [True: 72.5k, False: 154k]
  ------------------
 2962|  72.5k|    {
 2963|  72.5k|        WORD32 i4_dqid;
 2964|       |
 2965|  72.5k|        i4_dqid = (ps_vcl_node->i4_dependency_id << 4) + ps_vcl_node->i4_quality_id;
 2966|       |
 2967|       |        /* if reference layer DQ ID matches */
 2968|       |        /* or reference layer is a layer below reference dq id layer */
 2969|  72.5k|        if((i4_dqid == i4_ref_lyr_dqid) ||
  ------------------
  |  Branch (2969:12): [True: 63.2k, False: 9.35k]
  ------------------
 2970|  9.35k|           (ps_vcl_node->i4_quality_id < (i4_ref_lyr_dqid & 0x0F)) ||
  ------------------
  |  Branch (2970:12): [True: 9.18k, False: 170]
  ------------------
 2971|    170|           (ps_vcl_node->i4_dependency_id < (i4_ref_lyr_dqid >> 4)))
  ------------------
  |  Branch (2971:12): [True: 84, False: 86]
  ------------------
 2972|  72.5k|        {
 2973|  72.5k|            break;
 2974|  72.5k|        }
 2975|     86|        ps_vcl_node = ps_vcl_node->ps_bot_node;
 2976|     86|    }
 2977|       |
 2978|       |    /* Update the top and bottom node of ref layer and current layer nodes */
 2979|       |
 2980|   227k|    if(NULL != ps_vcl_node)
  ------------------
  |  Branch (2980:8): [True: 72.5k, False: 154k]
  ------------------
 2981|  72.5k|    {
 2982|  72.5k|        ps_cur_lyr_node->ps_bot_node = ps_vcl_node;
 2983|  72.5k|        ps_vcl_node->ps_top_node = ps_cur_lyr_node;
 2984|  72.5k|    }
 2985|       |
 2986|       |    /* Update pointer to bottom VCL node */
 2987|   227k|    *pps_bot_lyr_node = ps_vcl_node;
 2988|   227k|    return (OK);
  ------------------
  |  |  114|   227k|#define OK        0
  ------------------
 2989|   227k|}
isvcd_detect_res_change:
 3017|   227k|{
 3018|   227k|    UWORD16 u2_scaled_ref_width_sps;
 3019|   227k|    UWORD16 u2_scaled_ref_ht_sps;
 3020|   227k|    UNUSED(ps_prev_subset_sps);
  ------------------
  |  |   45|   227k|#define UNUSED(x) ((void)(x))
  ------------------
 3021|       |
 3022|   227k|    if(NULL == ps_prev_sps)
  ------------------
  |  Branch (3022:8): [True: 154k, False: 72.4k]
  ------------------
 3023|   154k|    {
 3024|       |        /* indicates bottom most layer in Access unit */
 3025|   154k|        return (SVCD_FALSE);
  ------------------
  |  |   45|   154k|#define SVCD_FALSE 0
  ------------------
 3026|   154k|    }
 3027|       |    /* Check for the ESS idc */
 3028|  72.4k|    if(2 == ps_curr_subset_sps->s_sps_svc_ext.u1_extended_spatial_scalability_idc)
  ------------------
  |  Branch (3028:8): [True: 311, False: 72.1k]
  ------------------
 3029|    311|    {
 3030|    311|        return (SVCD_TRUE);
  ------------------
  |  |   46|    311|#define SVCD_TRUE 1
  ------------------
 3031|    311|    }
 3032|       |
 3033|       |    /* Calculate the scaled reference width and height */
 3034|  72.1k|    u2_scaled_ref_width_sps = (ps_curr_sps->u2_frm_wd_in_mbs << 4);
 3035|  72.1k|    u2_scaled_ref_width_sps -=
 3036|  72.1k|        (ps_curr_subset_sps->s_sps_svc_ext.i4_seq_scaled_ref_layer_left_offset +
 3037|  72.1k|         ps_curr_subset_sps->s_sps_svc_ext.i4_seq_scaled_ref_layer_right_offset);
 3038|       |
 3039|  72.1k|    u2_scaled_ref_ht_sps = (ps_curr_sps->u2_frm_ht_in_mbs << 4);
 3040|  72.1k|    u2_scaled_ref_ht_sps -=
 3041|  72.1k|        (ps_curr_subset_sps->s_sps_svc_ext.i4_seq_scaled_ref_layer_top_offset +
 3042|  72.1k|         ps_curr_subset_sps->s_sps_svc_ext.i4_seq_scaled_ref_layer_bottom_offset);
 3043|       |
 3044|       |    /* Check for frame width being different */
 3045|  72.1k|    if(u2_scaled_ref_width_sps != (ps_prev_sps->u2_frm_wd_in_mbs << 4))
  ------------------
  |  Branch (3045:8): [True: 50.9k, False: 21.2k]
  ------------------
 3046|  50.9k|    {
 3047|  50.9k|        return (SVCD_TRUE);
  ------------------
  |  |   46|  50.9k|#define SVCD_TRUE 1
  ------------------
 3048|  50.9k|    }
 3049|       |
 3050|       |    /* Check for frame height being different */
 3051|  21.2k|    if(u2_scaled_ref_ht_sps != (ps_prev_sps->u2_frm_ht_in_mbs << 4))
  ------------------
  |  Branch (3051:8): [True: 1.10k, False: 20.1k]
  ------------------
 3052|  1.10k|    {
 3053|  1.10k|        return (SVCD_TRUE);
  ------------------
  |  |   46|  1.10k|#define SVCD_TRUE 1
  ------------------
 3054|  1.10k|    }
 3055|       |
 3056|       |    /* check for crop offset not MB aligned */
 3057|  20.1k|    if((0 != (ps_curr_subset_sps->s_sps_svc_ext.i4_seq_scaled_ref_layer_left_offset & 15)) ||
  ------------------
  |  Branch (3057:8): [True: 0, False: 20.1k]
  ------------------
 3058|  20.1k|       (0 != (ps_curr_subset_sps->s_sps_svc_ext.i4_seq_scaled_ref_layer_top_offset & 15)))
  ------------------
  |  Branch (3058:8): [True: 0, False: 20.1k]
  ------------------
 3059|      0|    {
 3060|      0|        return (SVCD_TRUE);
  ------------------
  |  |   46|      0|#define SVCD_TRUE 1
  ------------------
 3061|      0|    }
 3062|       |
 3063|       |    /* check for chroma Phase Y being different */
 3064|  20.1k|    if(ps_curr_subset_sps->s_sps_svc_ext.u1_chroma_phase_x_plus1_flag !=
  ------------------
  |  Branch (3064:8): [True: 920, False: 19.1k]
  ------------------
 3065|  20.1k|       ps_curr_subset_sps->s_sps_svc_ext.u1_seq_ref_layer_chroma_phase_x_plus1_flag)
 3066|    920|    {
 3067|    920|        return (SVCD_TRUE);
  ------------------
  |  |   46|    920|#define SVCD_TRUE 1
  ------------------
 3068|    920|    }
 3069|       |
 3070|       |    /* check for chroma Phase Y being different */
 3071|  19.1k|    if(ps_curr_subset_sps->s_sps_svc_ext.u1_chroma_phase_y_plus1 !=
  ------------------
  |  Branch (3071:8): [True: 150, False: 19.0k]
  ------------------
 3072|  19.1k|       ps_curr_subset_sps->s_sps_svc_ext.u1_seq_ref_layer_chroma_phase_y_plus1)
 3073|    150|    {
 3074|    150|        return (SVCD_TRUE);
  ------------------
  |  |   46|    150|#define SVCD_TRUE 1
  ------------------
 3075|    150|    }
 3076|       |
 3077|       |    /* If none of the above are true then there is no resolution change */
 3078|  19.0k|    return (SVCD_FALSE);
  ------------------
  |  |   45|  19.0k|#define SVCD_FALSE 0
  ------------------
 3079|  19.1k|}
isvcd_parse_ref_pic_list_modify:
 3107|  46.1k|{
 3108|  46.1k|    WORD32 i4_mod_flag;
 3109|  46.1k|    UWORD16 ui_nextUev;
 3110|  46.1k|    WORD32 i4_num_sets_ctr = 0;
 3111|  46.1k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
 3112|  46.1k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
 3113|       |
 3114|  46.1k|    if(I_SLICE != ps_slice_prms->u1_slice_type)
  ------------------
  |  |  370|  46.1k|#define I_SLICE  2
  ------------------
  |  Branch (3114:8): [True: 37.4k, False: 8.65k]
  ------------------
 3115|  37.4k|    {
 3116|       |        /* ref_pic_list_modification_flag_l0 */
 3117|  37.4k|        i4_mod_flag = ih264d_get_bit_h264(ps_bitstrm);
 3118|       |
 3119|  37.4k|        if(0 != i4_mod_flag)
  ------------------
  |  Branch (3119:12): [True: 15.2k, False: 22.2k]
  ------------------
 3120|  15.2k|        {
 3121|  15.2k|            WORD32 i4_mod_pic_num_idc;
 3122|       |
 3123|  15.2k|            i4_num_sets_ctr = 0;
 3124|  15.2k|            do
 3125|   102k|            {
 3126|       |                /* modification_of_pic_nums_idc */
 3127|   102k|                i4_mod_pic_num_idc = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 3128|       |
 3129|   102k|                if((i4_mod_pic_num_idc > 3) || (i4_mod_pic_num_idc < 0))
  ------------------
  |  Branch (3129:20): [True: 7.90k, False: 94.4k]
  |  Branch (3129:48): [True: 261, False: 94.1k]
  ------------------
 3130|  8.16k|                {
 3131|  8.16k|                    return ERROR_INV_SLICE_HDR_T;
 3132|  8.16k|                }
 3133|  94.1k|                if(3 != i4_mod_pic_num_idc)
  ------------------
  |  Branch (3133:20): [True: 92.1k, False: 2.06k]
  ------------------
 3134|  92.1k|                {
 3135|       |                    /* i4_mod_pic_num_idc = 0,1 ==> abs_diff_pic_num_minus1 */
 3136|       |                    /* i4_mod_pic_num_idc = 2 ==> long_term_pic_num */
 3137|       |
 3138|  92.1k|                    ui_nextUev = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 3139|  92.1k|                    if(ui_nextUev > (ps_curr_sps->u2_u4_max_pic_num_minus1 + 1))
  ------------------
  |  Branch (3139:24): [True: 1.55k, False: 90.5k]
  ------------------
 3140|  1.55k|                        return ERROR_INV_SLICE_HDR_T;
 3141|  92.1k|                }
 3142|       |
 3143|  92.6k|                i4_num_sets_ctr++;
 3144|       |
 3145|       |                /* if the number of commands recieved exceeds max limit */
 3146|  92.6k|                if((H264_MAX_REF_PICS) == i4_num_sets_ctr) break;
  ------------------
  |  |  534|  92.6k|#define H264_MAX_REF_PICS         16
  ------------------
  |  Branch (3146:20): [True: 3.45k, False: 89.1k]
  ------------------
 3147|       |
 3148|  92.6k|            } while(3 != i4_mod_pic_num_idc);
  ------------------
  |  Branch (3148:21): [True: 87.1k, False: 2.03k]
  ------------------
 3149|  15.2k|        }
 3150|       |
 3151|       |        /*********** if (I_SLICE != u1_slice_type) ***************************/
 3152|  37.4k|    }
 3153|       |
 3154|  36.3k|    if(B_SLICE != ps_slice_prms->u1_slice_type)
  ------------------
  |  |  369|  36.3k|#define B_SLICE  1
  ------------------
  |  Branch (3154:8): [True: 24.2k, False: 12.1k]
  ------------------
 3155|  24.2k|    {
 3156|  24.2k|        return (OK);
  ------------------
  |  |  114|  24.2k|#define OK        0
  ------------------
 3157|  24.2k|    }
 3158|       |
 3159|       |    /* ref_pic_list_modification_flag_l1 */
 3160|  12.1k|    i4_mod_flag = ih264d_get_bit_h264(ps_bitstrm);
 3161|       |
 3162|  12.1k|    if(0 != i4_mod_flag)
  ------------------
  |  Branch (3162:8): [True: 3.76k, False: 8.39k]
  ------------------
 3163|  3.76k|    {
 3164|  3.76k|        WORD32 i4_mod_pic_num_idc;
 3165|       |
 3166|  3.76k|        do
 3167|  27.7k|        {
 3168|       |            /* modification_of_pic_nums_idc */
 3169|  27.7k|            i4_mod_pic_num_idc = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 3170|       |
 3171|  27.7k|            if((i4_mod_pic_num_idc > 3) || (i4_mod_pic_num_idc < 0))
  ------------------
  |  Branch (3171:16): [True: 2.77k, False: 24.9k]
  |  Branch (3171:44): [True: 52, False: 24.9k]
  ------------------
 3172|  2.82k|            {
 3173|  2.82k|                return ERROR_INV_SLICE_HDR_T;
 3174|  2.82k|            }
 3175|  24.9k|            if(3 != i4_mod_pic_num_idc)
  ------------------
  |  Branch (3175:16): [True: 24.7k, False: 229]
  ------------------
 3176|  24.7k|            {
 3177|       |                /* i4_mod_pic_num_idc = 0,1 ==> abs_diff_pic_num_minus1 */
 3178|       |                /* i4_mod_pic_num_idc = 2 ==> long_term_pic_num */
 3179|       |
 3180|  24.7k|                ui_nextUev = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 3181|  24.7k|                if(ui_nextUev > (ps_curr_sps->u2_u4_max_pic_num_minus1 + 1))
  ------------------
  |  Branch (3181:20): [True: 593, False: 24.1k]
  ------------------
 3182|    593|                    return ERROR_INV_SLICE_HDR_T;
 3183|  24.7k|            }
 3184|       |
 3185|  24.3k|            i4_num_sets_ctr++;
 3186|       |
 3187|       |            /* if the number of commands recieved exceeds max limit */
 3188|  24.3k|            if((H264_MAX_REF_PICS) == i4_num_sets_ctr) break;
  ------------------
  |  |  534|  24.3k|#define H264_MAX_REF_PICS         16
  ------------------
  |  Branch (3188:16): [True: 113, False: 24.2k]
  ------------------
 3189|       |
 3190|  24.3k|        } while(3 != i4_mod_pic_num_idc);
  ------------------
  |  Branch (3190:17): [True: 23.9k, False: 229]
  ------------------
 3191|  3.76k|    }
 3192|       |
 3193|  8.74k|    return (OK);
  ------------------
  |  |  114|  8.74k|#define OK        0
  ------------------
 3194|  12.1k|}
isvcd_parse_slice_hdr_refdq_id:
 3228|  54.6k|{
 3229|  54.6k|    UWORD8 u1_pps_id;
 3230|  54.6k|    WORD32 i_temp;
 3231|  54.6k|    UWORD32 u4_temp;
 3232|  54.6k|    WORD32 i4_nal_unit_type;
 3233|  54.6k|    WORD32 i4_nal_ref_idc, i4_quality_id;
 3234|  54.6k|    WORD32 i4_use_ref_base, i4_idr_pic_flag;
 3235|  54.6k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
 3236|  54.6k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
 3237|  54.6k|    dec_svc_seq_params_t *ps_subset_sps = NULL;
 3238|  54.6k|    WORD32 ret = OK;
  ------------------
  |  |  114|  54.6k|#define OK        0
  ------------------
 3239|       |
 3240|  54.6k|    i4_nal_unit_type = ps_vcl_node->i4_nal_unit_type;
 3241|  54.6k|    i4_nal_ref_idc = ps_vcl_node->i4_nal_ref_idc;
 3242|  54.6k|    i4_quality_id = ps_vcl_node->i4_quality_id;
 3243|  54.6k|    i4_use_ref_base = ps_vcl_node->i4_use_ref_base;
 3244|  54.6k|    i4_idr_pic_flag = ps_vcl_node->i4_idr_pic_flag;
 3245|       |
 3246|       |    /*-----------------------------------------------------------------------*/
 3247|       |    /*--------------------- first mb in slice -------------------------------*/
 3248|       |    /*-----------------------------------------------------------------------*/
 3249|  54.6k|    ps_slice_prms->u2_first_mb_in_slice = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 3250|       |
 3251|       |    /*-----------------------------------------------------------------------*/
 3252|       |    /*---------------------------- slice type -------------------------------*/
 3253|       |    /*-----------------------------------------------------------------------*/
 3254|  54.6k|    ps_slice_prms->u1_slice_type = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 3255|  54.6k|    if(ps_slice_prms->u1_slice_type > 4)
  ------------------
  |  Branch (3255:8): [True: 2.01k, False: 52.5k]
  ------------------
 3256|  2.01k|    {
 3257|  2.01k|        ps_slice_prms->u1_slice_type -= 5;
 3258|  2.01k|    }
 3259|       |
 3260|       |    /*-----------------------------------------------------------------------*/
 3261|       |    /*----------------------------- PPS id ----------------------------------*/
 3262|       |    /*-----------------------------------------------------------------------*/
 3263|  54.6k|    u1_pps_id = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 3264|       |
 3265|       |    /* set correspoding sps and pps id also */
 3266|  54.6k|    ps_pps += u1_pps_id;
 3267|  54.6k|    if(FALSE == ps_pps->u1_is_valid)
  ------------------
  |  |  592|  54.6k|#define FALSE   0
  ------------------
  |  Branch (3267:8): [True: 3.01k, False: 51.6k]
  ------------------
 3268|  3.01k|    {
 3269|  3.01k|        return ERROR_INV_SLICE_HDR_T;
 3270|  3.01k|    }
 3271|  51.6k|    ps_sps = ps_pps->ps_sps;
 3272|  51.6k|    ps_subset_sps = &ps_svcd_ctxt->ps_subset_sps[ps_sps->u1_seq_parameter_set_id];
 3273|  51.6k|    if(CODED_SLICE_EXTENSION_NAL == i4_nal_unit_type)
  ------------------
  |  |   66|  51.6k|#define CODED_SLICE_EXTENSION_NAL 20
  ------------------
  |  Branch (3273:8): [True: 50.7k, False: 841]
  ------------------
 3274|  50.7k|    {
 3275|  50.7k|        ps_sps += MAX_NUM_SEQ_PARAMS;
  ------------------
  |  |  521|  50.7k|#define MAX_NUM_SEQ_PARAMS 32
  ------------------
 3276|  50.7k|        ps_subset_sps =
 3277|  50.7k|            &ps_svcd_ctxt->ps_subset_sps[MAX_NUM_SEQ_PARAMS + ps_sps->u1_seq_parameter_set_id];
  ------------------
  |  |  521|  50.7k|#define MAX_NUM_SEQ_PARAMS 32
  ------------------
 3278|  50.7k|    }
 3279|       |    /*-----------------------------------------------------------------------*/
 3280|       |    /*--------------------------- frm num -----------------------------------*/
 3281|       |    /*-----------------------------------------------------------------------*/
 3282|  51.6k|    if(!ps_sps) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  Branch (3282:8): [True: 0, False: 51.6k]
  ------------------
 3283|  51.6k|    if(FALSE == ps_sps->u1_is_valid) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  |  592|  51.6k|#define FALSE   0
  ------------------
  |  Branch (3283:8): [True: 2.43k, False: 49.1k]
  ------------------
 3284|       |
 3285|  49.1k|    ps_slice_prms->u2_frame_num = ih264d_get_bits_h264(ps_bitstrm, ps_sps->u1_bits_in_frm_num);
 3286|       |
 3287|       |    /*-----------------------------------------------------------------------*/
 3288|       |    /*------------------ field pic flag and bottom field flag ---------------*/
 3289|       |    /*-----------------------------------------------------------------------*/
 3290|  49.1k|    if(SVCD_TRUE != ps_sps->u1_frame_mbs_only_flag)
  ------------------
  |  |   46|  49.1k|#define SVCD_TRUE 1
  ------------------
  |  Branch (3290:8): [True: 243, False: 48.9k]
  ------------------
 3291|    243|    {
 3292|    243|        return ERROR_INV_SLICE_HDR_T;
 3293|    243|    }
 3294|       |    /*-----------------------------------------------------------------------*/
 3295|       |    /*--------------------------- IDR pic id --------------------------------*/
 3296|       |    /*-----------------------------------------------------------------------*/
 3297|  48.9k|    if(SVCD_TRUE == i4_idr_pic_flag)
  ------------------
  |  |   46|  48.9k|#define SVCD_TRUE 1
  ------------------
  |  Branch (3297:8): [True: 41.8k, False: 7.04k]
  ------------------
 3298|  41.8k|    {
 3299|  41.8k|        UWORD32 u4_idr_pic_id = 0;
 3300|  41.8k|        u4_idr_pic_id = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 3301|  41.8k|        if(u4_idr_pic_id > 65535) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  Branch (3301:12): [True: 1.09k, False: 40.7k]
  ------------------
 3302|  41.8k|    }
 3303|       |
 3304|       |    /*-----------------------------------------------------------------------*/
 3305|       |    /*----------------- poc lsb and delts_poc_bottom ------------------------*/
 3306|       |    /*-----------------------------------------------------------------------*/
 3307|  47.8k|    if(0 == ps_sps->u1_pic_order_cnt_type)
  ------------------
  |  Branch (3307:8): [True: 43.4k, False: 4.35k]
  ------------------
 3308|  43.4k|    {
 3309|  43.4k|        i_temp = ih264d_get_bits_h264(ps_bitstrm, ps_sps->u1_log2_max_pic_order_cnt_lsb_minus);
 3310|       |
 3311|  43.4k|        if(i_temp < 0 || i_temp >= ps_sps->i4_max_pic_order_cntLsb) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  Branch (3311:12): [True: 0, False: 43.4k]
  |  Branch (3311:26): [True: 0, False: 43.4k]
  ------------------
 3312|  43.4k|        if(SVCD_TRUE == ps_pps->u1_pic_order_present_flag)
  ------------------
  |  |   46|  43.4k|#define SVCD_TRUE 1
  ------------------
  |  Branch (3312:12): [True: 7.38k, False: 36.0k]
  ------------------
 3313|  7.38k|        {
 3314|  7.38k|            i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 3315|  7.38k|        }
 3316|  43.4k|    }
 3317|       |
 3318|       |    /*-----------------------------------------------------------------------*/
 3319|       |    /*---------------- delta_poc_count[0] and [1] ---------------------------*/
 3320|       |    /*-----------------------------------------------------------------------*/
 3321|  47.8k|    if((1 == ps_sps->u1_pic_order_cnt_type) && (!ps_sps->u1_delta_pic_order_always_zero_flag))
  ------------------
  |  Branch (3321:8): [True: 3.84k, False: 43.9k]
  |  Branch (3321:48): [True: 2.66k, False: 1.18k]
  ------------------
 3322|  2.66k|    {
 3323|  2.66k|        i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 3324|       |
 3325|  2.66k|        if(ps_pps->u1_pic_order_present_flag)
  ------------------
  |  Branch (3325:12): [True: 1.92k, False: 739]
  ------------------
 3326|  1.92k|        {
 3327|  1.92k|            i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 3328|  1.92k|        }
 3329|  2.66k|    }
 3330|       |
 3331|       |    /*-----------------------------------------------------------------------*/
 3332|       |    /*---------------------- redundant pic cnt ------------------------------*/
 3333|       |    /*-----------------------------------------------------------------------*/
 3334|  47.8k|    if(ps_pps->u1_redundant_pic_cnt_present_flag)
  ------------------
  |  Branch (3334:8): [True: 18.1k, False: 29.6k]
  ------------------
 3335|  18.1k|    {
 3336|  18.1k|        u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 3337|  18.1k|        if(u4_temp > MAX_REDUNDANT_PIC_CNT) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  |  611|  18.1k|#define MAX_REDUNDANT_PIC_CNT       127
  ------------------
  |  Branch (3337:12): [True: 853, False: 17.2k]
  ------------------
 3338|  18.1k|    }
 3339|       |    /*-----------------------------------------------------------------------*/
 3340|       |    /*-----------------Direct_spatial_mv_pred_flag --------------------------*/
 3341|       |    /*-----------------num ref active override flag -------------------------*/
 3342|       |    /*-----------------num_ref_idx_active_l0&1 ------------------------------*/
 3343|       |    /*-----------------------------------------------------------------------*/
 3344|  46.9k|    if(0 == i4_quality_id)
  ------------------
  |  Branch (3344:8): [True: 46.9k, False: 0]
  ------------------
 3345|  46.9k|    {
 3346|  46.9k|        if(B_SLICE == ps_slice_prms->u1_slice_type)
  ------------------
  |  |  369|  46.9k|#define B_SLICE  1
  ------------------
  |  Branch (3346:12): [True: 17.1k, False: 29.7k]
  ------------------
 3347|  17.1k|        {
 3348|  17.1k|            ps_slice_prms->u1_direct_spatial_mv_pred_flag = ih264d_get_bit_h264(ps_bitstrm);
 3349|  17.1k|        }
 3350|       |
 3351|  46.9k|        if((P_SLICE == ps_slice_prms->u1_slice_type) || (B_SLICE == ps_slice_prms->u1_slice_type))
  ------------------
  |  |  368|  46.9k|#define P_SLICE  0
  ------------------
                      if((P_SLICE == ps_slice_prms->u1_slice_type) || (B_SLICE == ps_slice_prms->u1_slice_type))
  ------------------
  |  |  369|  26.1k|#define B_SLICE  1
  ------------------
  |  Branch (3351:12): [True: 20.8k, False: 26.1k]
  |  Branch (3351:57): [True: 17.1k, False: 8.96k]
  ------------------
 3352|  38.0k|        {
 3353|  38.0k|            WORD8 i1_over_ride_flag;
 3354|  38.0k|            i1_over_ride_flag = ih264d_get_bit_h264(ps_bitstrm);
 3355|       |
 3356|  38.0k|            ps_slice_prms->u1_num_ref_idx_active_override_flag = i1_over_ride_flag;
 3357|       |
 3358|  38.0k|            if(SVCD_TRUE == i1_over_ride_flag)
  ------------------
  |  |   46|  38.0k|#define SVCD_TRUE 1
  ------------------
  |  Branch (3358:16): [True: 21.4k, False: 16.5k]
  ------------------
 3359|  21.4k|            {
 3360|  21.4k|                UWORD8 u8_ref_idx_l0;
 3361|  21.4k|                u8_ref_idx_l0 = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 3362|  21.4k|                if(u8_ref_idx_l0 > H264_MAX_REF_PICS)
  ------------------
  |  |  534|  21.4k|#define H264_MAX_REF_PICS         16
  ------------------
  |  Branch (3362:20): [True: 582, False: 20.8k]
  ------------------
 3363|    582|                {
 3364|    582|                    return ERROR_NUM_REF;
 3365|    582|                }
 3366|  20.8k|                if(B_SLICE == ps_slice_prms->u1_slice_type)
  ------------------
  |  |  369|  20.8k|#define B_SLICE  1
  ------------------
  |  Branch (3366:20): [True: 8.81k, False: 12.0k]
  ------------------
 3367|  8.81k|                {
 3368|  8.81k|                    UWORD8 u8_ref_idx_l1;
 3369|  8.81k|                    u8_ref_idx_l1 = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 3370|  8.81k|                    if(u8_ref_idx_l1 > H264_MAX_REF_PICS)
  ------------------
  |  |  534|  8.81k|#define H264_MAX_REF_PICS         16
  ------------------
  |  Branch (3370:24): [True: 284, False: 8.53k]
  ------------------
 3371|    284|                    {
 3372|    284|                        return ERROR_NUM_REF;
 3373|    284|                    }
 3374|  8.81k|                }
 3375|  20.8k|            }
 3376|  38.0k|        }
 3377|       |
 3378|       |        /*-----------------------------------------------------------------------*/
 3379|       |        /*---------------------- ref pic list modification ----------------------*/
 3380|       |        /*-----------------------------------------------------------------------*/
 3381|  46.1k|        {
 3382|  46.1k|            ret = isvcd_parse_ref_pic_list_modify(ps_bitstrm, ps_slice_prms, ps_sps);
 3383|  46.1k|            if(OK != ret) return ret;
  ------------------
  |  |  114|  46.1k|#define OK        0
  ------------------
  |  Branch (3383:16): [True: 13.1k, False: 32.9k]
  ------------------
 3384|  46.1k|        }
 3385|       |
 3386|  32.9k|        if(((1 == ps_pps->u1_wted_pred_flag) && (P_SLICE == ps_slice_prms->u1_slice_type)) ||
  ------------------
  |  |  368|  6.43k|#define P_SLICE  0
  ------------------
  |  Branch (3386:13): [True: 6.43k, False: 26.5k]
  |  Branch (3386:49): [True: 2.37k, False: 4.05k]
  ------------------
 3387|  30.5k|           ((B_SLICE == ps_slice_prms->u1_slice_type) && (1 == ps_pps->u1_wted_bipred_idc)))
  ------------------
  |  |  369|  30.5k|#define B_SLICE  1
  ------------------
  |  Branch (3387:13): [True: 8.74k, False: 21.8k]
  |  Branch (3387:58): [True: 3.90k, False: 4.83k]
  ------------------
 3388|  6.27k|        {
 3389|  6.27k|            if((ps_slice_prms->u1_num_ref_idx_lx_active[0] >= H264_MAX_REF_IDX) ||
  ------------------
  |  |  535|  6.27k|#define H264_MAX_REF_IDX          32
  ------------------
  |  Branch (3389:16): [True: 0, False: 6.27k]
  ------------------
 3390|  6.27k|               (ps_slice_prms->u1_num_ref_idx_lx_active[1] >= H264_MAX_REF_IDX))
  ------------------
  |  |  535|  6.27k|#define H264_MAX_REF_IDX          32
  ------------------
  |  Branch (3390:16): [True: 0, False: 6.27k]
  ------------------
 3391|      0|            {
 3392|      0|                return ERROR_NUM_REF;
 3393|      0|            }
 3394|       |            /*-------------------------------------------------------------------*/
 3395|       |            /*------------------------- Pred weight table -----------------------*/
 3396|       |            /*-------------------------------------------------------------------*/
 3397|  6.27k|            if(CODED_SLICE_EXTENSION_NAL == i4_nal_unit_type)
  ------------------
  |  |   66|  6.27k|#define CODED_SLICE_EXTENSION_NAL 20
  ------------------
  |  Branch (3397:16): [True: 6.10k, False: 171]
  ------------------
 3398|  6.10k|            {
 3399|  6.10k|                WORD32 i4_base_pred_wt_tbl_flag = 1;
 3400|       |
 3401|       |                /* base_pred_weight_table_flag */
 3402|  6.10k|                if(0 == i4_no_int_lyr_pred)
  ------------------
  |  Branch (3402:20): [True: 6.04k, False: 67]
  ------------------
 3403|  6.04k|                {
 3404|  6.04k|                    i4_base_pred_wt_tbl_flag = ih264d_get_bit_h264(ps_bitstrm);
 3405|  6.04k|                }
 3406|       |
 3407|  6.10k|                if((1 == i4_no_int_lyr_pred) || (0 == i4_base_pred_wt_tbl_flag))
  ------------------
  |  Branch (3407:20): [True: 67, False: 6.04k]
  |  Branch (3407:49): [True: 4.92k, False: 1.11k]
  ------------------
 3408|  4.98k|                {
 3409|  4.98k|                    ret = ih264d_parse_pred_weight_table(ps_slice_prms, ps_bitstrm);
 3410|  4.98k|                    if(ret != OK) return ret;
  ------------------
  |  |  114|  4.98k|#define OK        0
  ------------------
  |  Branch (3410:24): [True: 2.56k, False: 2.41k]
  ------------------
 3411|  4.98k|                }
 3412|  6.10k|            }
 3413|    171|            else
 3414|    171|            {
 3415|    171|                ret = ih264d_parse_pred_weight_table(ps_slice_prms, ps_bitstrm);
 3416|    171|                if(ret != OK) return ret;
  ------------------
  |  |  114|    171|#define OK        0
  ------------------
  |  Branch (3416:20): [True: 74, False: 97]
  ------------------
 3417|    171|            }
 3418|  6.27k|        }
 3419|       |
 3420|       |        /*-----------------------------------------------------------------------*/
 3421|       |        /*------------------------- ref pic marking -----------------------------*/
 3422|       |        /*-----------------------------------------------------------------------*/
 3423|  30.3k|        if(0 != i4_nal_ref_idc)
  ------------------
  |  Branch (3423:12): [True: 25.5k, False: 4.79k]
  ------------------
 3424|  25.5k|        {
 3425|  25.5k|            dec_struct_t *ps_dec;
 3426|  25.5k|            svc_dec_lyr_struct_t *ps_svc_lyr_dec;
 3427|  25.5k|            UWORD8 u1_store_ref_base_pic;
 3428|  25.5k|            ps_svc_lyr_dec = ps_svcd_ctxt->ps_svc_dec_lyr;
 3429|  25.5k|            ps_dec = &ps_svc_lyr_dec->s_dec;
 3430|  25.5k|            {
 3431|  25.5k|                dec_seq_params_t *ps_sps_tmp = ps_pps->ps_sps;
 3432|       |
 3433|  25.5k|                ps_dec->u1_nal_unit_type = i4_nal_unit_type;
 3434|  25.5k|                ps_svc_lyr_dec->ps_nal_svc_ext->u1_idr_flag = i4_idr_pic_flag;
 3435|  25.5k|                ps_dec->ps_cur_sps = ps_sps;
 3436|  25.5k|                ps_dec->ps_cur_pps = ps_pps;
 3437|  25.5k|                ps_pps->ps_sps = ps_sps;
 3438|       |
 3439|  25.5k|                if(ps_svc_lyr_dec->ps_nal_svc_ext->u1_idr_flag)
  ------------------
  |  Branch (3439:20): [True: 20.8k, False: 4.71k]
  ------------------
 3440|  20.8k|                    ps_dec->u1_nal_unit_type = IDR_SLICE_NAL;
  ------------------
  |  |  328|  20.8k|#define IDR_SLICE_NAL                   5
  ------------------
 3441|       |
 3442|  25.5k|                i_temp = ih264d_read_mmco_commands(ps_dec);
 3443|  25.5k|                ps_pps->ps_sps = ps_sps_tmp;
 3444|  25.5k|                ps_dec->u1_nal_unit_type = i4_nal_unit_type;
 3445|  25.5k|                if(i_temp < 0)
  ------------------
  |  Branch (3445:20): [True: 182, False: 25.3k]
  ------------------
 3446|    182|                {
 3447|    182|                    return ERROR_DBP_MANAGER_T;
 3448|    182|                }
 3449|  25.3k|                ps_dec->u4_bitoffset = i_temp;
 3450|  25.3k|            }
 3451|       |
 3452|  25.3k|            if(0 == ps_subset_sps->s_sps_svc_ext.u1_slice_header_restriction_flag)
  ------------------
  |  Branch (3452:16): [True: 18.9k, False: 6.42k]
  ------------------
 3453|  18.9k|            {
 3454|       |                /* store_ref_base_pic_flag */
 3455|  18.9k|                u1_store_ref_base_pic = ih264d_get_bit_h264(ps_bitstrm);
 3456|  18.9k|                if(0 != u1_store_ref_base_pic)
  ------------------
  |  Branch (3456:20): [True: 4.10k, False: 14.8k]
  ------------------
 3457|  4.10k|                {
 3458|  4.10k|                    return ERROR_INV_SLICE_HDR_T;
 3459|  4.10k|                }
 3460|       |
 3461|  14.8k|                if(((1 == i4_use_ref_base) || (1 == u1_store_ref_base_pic)) &&
  ------------------
  |  Branch (3461:21): [True: 0, False: 14.8k]
  |  Branch (3461:47): [True: 0, False: 14.8k]
  ------------------
 3462|      0|                   (SVCD_FALSE == i4_idr_pic_flag))
  ------------------
  |  |   45|      0|#define SVCD_FALSE 0
  ------------------
  |  Branch (3462:20): [True: 0, False: 0]
  ------------------
 3463|      0|                {
 3464|      0|                    i_temp = isvcd_dec_ref_base_pic_marking(
 3465|      0|                        &ps_svc_lyr_dec->s_svc_slice_params.s_ref_base_pic_marking_svc_ext,
 3466|      0|                        ps_bitstrm);
 3467|      0|                    if(i_temp != OK)
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
  |  Branch (3467:24): [True: 0, False: 0]
  ------------------
 3468|      0|                    {
 3469|      0|                        return i_temp;
 3470|      0|                    }
 3471|      0|                }
 3472|       |                /******* End of if (SVC_VCL_NAL == i4_nal_unit_type) *********/
 3473|  14.8k|            }
 3474|       |            /******** End of if(0 != i4_nal_ref_idc) *************************/
 3475|  25.3k|        }
 3476|       |        /************* End of if(0 == i4_quality_id) *************************/
 3477|  30.3k|    }
 3478|       |
 3479|       |    /*-----------------------------------------------------------------------*/
 3480|       |    /*--------------------------- cabac int idc -----------------------------*/
 3481|       |    /*-----------------------------------------------------------------------*/
 3482|  26.0k|    if((ps_pps->u1_entropy_coding_mode == CABAC) && (I_SLICE != ps_slice_prms->u1_slice_type))
  ------------------
  |  |  339|  26.0k|#define CABAC  1
  ------------------
                  if((ps_pps->u1_entropy_coding_mode == CABAC) && (I_SLICE != ps_slice_prms->u1_slice_type))
  ------------------
  |  |  370|  4.69k|#define I_SLICE  2
  ------------------
  |  Branch (3482:8): [True: 4.69k, False: 21.3k]
  |  Branch (3482:53): [True: 3.00k, False: 1.69k]
  ------------------
 3483|  3.00k|    {
 3484|  3.00k|        ps_slice_prms->u1_cabac_init_idc = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 3485|  3.00k|        if(ps_slice_prms->u1_cabac_init_idc > MAX_CABAC_INIT_IDC)
  ------------------
  |  |  537|  3.00k|#define MAX_CABAC_INIT_IDC        2
  ------------------
  |  Branch (3485:12): [True: 1.97k, False: 1.03k]
  ------------------
 3486|  1.97k|        {
 3487|  1.97k|            return ERROR_INV_SLICE_HDR_T;
 3488|  1.97k|        }
 3489|  3.00k|    }
 3490|       |
 3491|       |    /*-----------------------------------------------------------------------*/
 3492|       |    /*--------------------------- slice qp delta ----------------------------*/
 3493|       |    /*-----------------------------------------------------------------------*/
 3494|  24.0k|    {
 3495|  24.0k|        WORD8 i1_slice_qp_delta;
 3496|  24.0k|        i1_slice_qp_delta = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 3497|  24.0k|        i1_slice_qp_delta += ps_pps->u1_pic_init_qp;
 3498|  24.0k|        if((i1_slice_qp_delta < MIN_H264_QP) || (i1_slice_qp_delta > MAX_H264_QP))
  ------------------
  |  |  629|  24.0k|#define MIN_H264_QP 0
  ------------------
                      if((i1_slice_qp_delta < MIN_H264_QP) || (i1_slice_qp_delta > MAX_H264_QP))
  ------------------
  |  |  634|  22.8k|#define MAX_H264_QP 51
  ------------------
  |  Branch (3498:12): [True: 1.16k, False: 22.8k]
  |  Branch (3498:49): [True: 489, False: 22.4k]
  ------------------
 3499|  1.65k|        {
 3500|  1.65k|            return ERROR_INV_RANGE_QP_T;
 3501|  1.65k|        }
 3502|  22.4k|        ps_slice_prms->u1_slice_qp = (UWORD8) i1_slice_qp_delta;
 3503|  22.4k|    }
 3504|       |
 3505|       |    /*-----------------------------------------------------------------------*/
 3506|       |    /*--------------------------- disable dblk filter idc -------------------*/
 3507|       |    /*-----------------------------------------------------------------------*/
 3508|       |    /* Set to default value */
 3509|       |
 3510|      0|    ps_slice_prms->u1_disable_dblk_filter_idc = 0;
 3511|  22.4k|    if(SVCD_TRUE == ps_pps->u1_deblocking_filter_parameters_present_flag)
  ------------------
  |  |   46|  22.4k|#define SVCD_TRUE 1
  ------------------
  |  Branch (3511:8): [True: 11.3k, False: 11.0k]
  ------------------
 3512|  11.3k|    {
 3513|  11.3k|        ps_slice_prms->u1_disable_dblk_filter_idc = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 3514|       |
 3515|  11.3k|        if(ps_slice_prms->u1_disable_dblk_filter_idc > SLICE_BOUNDARY_DBLK_DISABLED)
  ------------------
  |  |  547|  11.3k|#define SLICE_BOUNDARY_DBLK_DISABLED  2
  ------------------
  |  Branch (3515:12): [True: 2.82k, False: 8.54k]
  ------------------
 3516|  2.82k|        {
 3517|  2.82k|            return ERROR_INV_SLICE_HDR_T;
 3518|  2.82k|        }
 3519|       |        /*-------------------------------------------------------------------*/
 3520|       |        /*--------------------------- slice_alpha_c0_offset_div2 ------------*/
 3521|       |        /*--------------------------- slice_beta_offset_div2 ----------------*/
 3522|       |        /*-------------------------------------------------------------------*/
 3523|  8.54k|        if(1 != ps_slice_prms->u1_disable_dblk_filter_idc)
  ------------------
  |  Branch (3523:12): [True: 6.78k, False: 1.76k]
  ------------------
 3524|  6.78k|        {
 3525|       |            /* slice_alpha_c0_offset_div2 */
 3526|  6.78k|            ps_slice_prms->i1_slice_alpha_c0_offset = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 3527|  6.78k|            if((MIN_DBLK_FIL_OFF > ps_slice_prms->i1_slice_alpha_c0_offset) ||
  ------------------
  |  |  550|  6.78k|#define MIN_DBLK_FIL_OFF              -12
  ------------------
  |  Branch (3527:16): [True: 203, False: 6.58k]
  ------------------
 3528|  6.58k|               (ps_slice_prms->i1_slice_alpha_c0_offset > MAX_DBLK_FIL_OFF))
  ------------------
  |  |  551|  6.58k|#define MAX_DBLK_FIL_OFF              12
  ------------------
  |  Branch (3528:16): [True: 137, False: 6.44k]
  ------------------
 3529|    340|            {
 3530|    340|                return ERROR_INV_SLICE_HDR_T;
 3531|    340|            }
 3532|  6.44k|            ps_slice_prms->i1_slice_beta_offset = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 3533|  6.44k|            if((MIN_DBLK_FIL_OFF > ps_slice_prms->i1_slice_beta_offset) ||
  ------------------
  |  |  550|  6.44k|#define MIN_DBLK_FIL_OFF              -12
  ------------------
  |  Branch (3533:16): [True: 155, False: 6.29k]
  ------------------
 3534|  6.29k|               (ps_slice_prms->i1_slice_beta_offset > MAX_DBLK_FIL_OFF))
  ------------------
  |  |  551|  6.29k|#define MAX_DBLK_FIL_OFF              12
  ------------------
  |  Branch (3534:16): [True: 248, False: 6.04k]
  ------------------
 3535|    403|            {
 3536|    403|                return ERROR_INV_SLICE_HDR_T;
 3537|    403|            }
 3538|  6.44k|        }
 3539|  8.54k|    }
 3540|       |
 3541|  18.8k|    *pi4_ref_dq_id = -1;
 3542|       |
 3543|  18.8k|    if((0 == i4_no_int_lyr_pred) && (0 == i4_quality_id))
  ------------------
  |  Branch (3543:8): [True: 18.5k, False: 320]
  |  Branch (3543:37): [True: 18.5k, False: 0]
  ------------------
 3544|  18.5k|    {
 3545|  18.5k|        WORD32 i4_inter_lyr_dblk_idc;
 3546|  18.5k|        WORD32 i4_inter_lyr_alpha_c0_offset;
 3547|  18.5k|        WORD32 i4_inter_lyr_beta_offset;
 3548|       |
 3549|       |        /*-------------------------------------------------------------------*/
 3550|       |        /*--------------------------- ref_layer_dq_id -----------------------*/
 3551|       |        /*-------------------------------------------------------------------*/
 3552|  18.5k|        *pi4_ref_dq_id = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 3553|       |
 3554|  18.5k|        if(*pi4_ref_dq_id > MAX_REF_DEP_ID)
  ------------------
  |  |  106|  18.5k|#define MAX_REF_DEP_ID ((MAX_DEPENDENCY_ID << 4) | MAX_QUALITY_ID)
  |  |  ------------------
  |  |  |  |  103|  18.5k|#define MAX_DEPENDENCY_ID 4
  |  |  ------------------
  |  |               #define MAX_REF_DEP_ID ((MAX_DEPENDENCY_ID << 4) | MAX_QUALITY_ID)
  |  |  ------------------
  |  |  |  |  102|  18.5k|#define MAX_QUALITY_ID 0
  |  |  ------------------
  ------------------
  |  Branch (3554:12): [True: 1.38k, False: 17.1k]
  ------------------
 3555|  1.38k|        {
 3556|  1.38k|            return ERROR_INV_SLICE_HDR_T;
 3557|  1.38k|        }
 3558|       |        /* ------------------------------------------- */
 3559|       |        /* ---- Inter layer de-blocking parameters ---- */
 3560|       |        /* ------------------------------------------- */
 3561|  17.1k|        i4_inter_lyr_dblk_idc = 0;
 3562|  17.1k|        i4_inter_lyr_alpha_c0_offset = 0;
 3563|  17.1k|        i4_inter_lyr_beta_offset = 0;
 3564|  17.1k|        if(SVCD_TRUE ==
  ------------------
  |  |   46|  17.1k|#define SVCD_TRUE 1
  ------------------
  |  Branch (3564:12): [True: 2.72k, False: 14.4k]
  ------------------
 3565|  17.1k|           ps_subset_sps->s_sps_svc_ext.u1_inter_layer_deblocking_filter_control_present_flag)
 3566|  2.72k|        {
 3567|  2.72k|            i4_inter_lyr_dblk_idc = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 3568|       |
 3569|  2.72k|            if(i4_inter_lyr_dblk_idc > 6)
  ------------------
  |  Branch (3569:16): [True: 344, False: 2.38k]
  ------------------
 3570|    344|            {
 3571|    344|                return ERROR_INV_SLICE_HDR_T;
 3572|    344|            }
 3573|  2.38k|            if(1 != i4_inter_lyr_dblk_idc)
  ------------------
  |  Branch (3573:16): [True: 1.94k, False: 437]
  ------------------
 3574|  1.94k|            {
 3575|       |                /* Alpha Offset */
 3576|  1.94k|                i4_inter_lyr_alpha_c0_offset = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 3577|  1.94k|                if(i4_inter_lyr_alpha_c0_offset > 6 || i4_inter_lyr_alpha_c0_offset < -6)
  ------------------
  |  Branch (3577:20): [True: 201, False: 1.74k]
  |  Branch (3577:56): [True: 124, False: 1.61k]
  ------------------
 3578|    325|                {
 3579|    325|                    return ERROR_INV_SLICE_HDR_T;
 3580|    325|                }
 3581|  1.61k|                i4_inter_lyr_alpha_c0_offset <<= 1;
 3582|       |
 3583|       |                /* Beta Offset */
 3584|  1.61k|                i4_inter_lyr_beta_offset = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 3585|  1.61k|                if(i4_inter_lyr_beta_offset > 6 || i4_inter_lyr_beta_offset < -6)
  ------------------
  |  Branch (3585:20): [True: 366, False: 1.25k]
  |  Branch (3585:52): [True: 250, False: 1.00k]
  ------------------
 3586|    616|                {
 3587|    616|                    return ERROR_INV_SLICE_HDR_T;
 3588|    616|                }
 3589|  1.00k|                i4_inter_lyr_beta_offset <<= 1;
 3590|  1.00k|            }
 3591|  2.38k|        }
 3592|  15.8k|        ps_vcl_node->i4_inter_lyr_dblk_idc = i4_inter_lyr_dblk_idc;
 3593|  15.8k|        ps_vcl_node->i4_inter_lyr_beta_offset = i4_inter_lyr_beta_offset;
 3594|  15.8k|        ps_vcl_node->i4_inter_lyr_alpha_c0_offset = i4_inter_lyr_alpha_c0_offset;
 3595|  15.8k|    }
 3596|       |
 3597|  16.1k|    return (0);
 3598|  18.8k|}
isvcd_get_ref_lyr_dqid:
 3631|  78.6k|{
 3632|  78.6k|    WORD32 i4_status;
 3633|  78.6k|    WORD32 ai4_ref_dq_id[2] = {0};
 3634|  78.6k|    WORD32 i4_num_slc_dec;
 3635|       |
 3636|       |    /* local structures */
 3637|  78.6k|    dec_slice_params_t s_slice_prms = {0};
 3638|       |
 3639|       |    /* vcl buffer */
 3640|  78.6k|    vcl_buf_hdr_t *ps_vcl_buf;
 3641|  78.6k|    dec_struct_t *ps_dec;
 3642|  78.6k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec = ps_svcd_ctxt->ps_svc_dec_lyr;
 3643|  78.6k|    UNUSED(i4_prev_au_dqid);
  ------------------
  |  |   45|  78.6k|#define UNUSED(x) ((void)(x))
  ------------------
 3644|  78.6k|    ps_dec = &ps_svc_lyr_dec->s_dec;
 3645|       |    /* Sanity checks */
 3646|  78.6k|    if((NULL == ps_vcl_node) || (NULL == ps_sps) || (NULL == ps_pps) || (NULL == pi4_ref_lyr_dqid))
  ------------------
  |  Branch (3646:8): [True: 0, False: 78.6k]
  |  Branch (3646:33): [True: 0, False: 78.6k]
  |  Branch (3646:53): [True: 0, False: 78.6k]
  |  Branch (3646:73): [True: 0, False: 78.6k]
  ------------------
 3647|      0|    {
 3648|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 3649|      0|    }
 3650|       |
 3651|  78.6k|    i4_num_slc_dec = 0;
 3652|  78.6k|    ps_vcl_buf = ps_vcl_node->ps_first_vcl_nal;
 3653|  78.6k|    i4_status = NOT_OK;
  ------------------
  |  |  116|  78.6k|#define NOT_OK    -1
  ------------------
 3654|       |
 3655|  78.6k|    while(NULL != ps_vcl_buf)
  ------------------
  |  Branch (3655:11): [True: 54.6k, False: 24.0k]
  ------------------
 3656|  54.6k|    {
 3657|  54.6k|        WORD32 i4_error;
 3658|       |
 3659|       |        /* Fill the stream context structure */
 3660|  54.6k|        ps_dec->ps_bitstrm->u4_ofst = 0;
 3661|  54.6k|        ps_dec->ps_bitstrm->pu4_buffer =
 3662|  54.6k|            (UWORD32 *) ((UWORD8 *) ps_vcl_buf + ps_vcl_buf->i4_buf_offset +
 3663|  54.6k|                         ps_vcl_buf->i4_slice_offset);
 3664|  54.6k|        ps_dec->ps_bitstrm->u4_max_ofst = ps_vcl_buf->u4_max_bits;
 3665|       |
 3666|       |        /* call the function which decodes the slice header */
 3667|  54.6k|        i4_error = isvcd_parse_slice_hdr_refdq_id(ps_vcl_node, &s_slice_prms, ps_dec->ps_bitstrm,
 3668|  54.6k|                                                  &ai4_ref_dq_id[i4_num_slc_dec], ps_sps, ps_pps,
 3669|  54.6k|                                                  ps_vcl_buf->i4_no_int_lyr_pred, ps_svcd_ctxt);
 3670|       |
 3671|       |        /* store the first error encountered */
 3672|  54.6k|        if(0 == *pi4_err_code)
  ------------------
  |  Branch (3672:12): [True: 14.1k, False: 40.4k]
  ------------------
 3673|  14.1k|        {
 3674|  14.1k|            *pi4_err_code = i4_error;
 3675|  14.1k|        }
 3676|  54.6k|        if(i4_error != 0)
  ------------------
  |  Branch (3676:12): [True: 38.4k, False: 16.1k]
  ------------------
 3677|  38.4k|        {
 3678|       |            /* check on the Error returned */
 3679|  38.4k|            return NOT_OK;
  ------------------
  |  |  116|  38.4k|#define NOT_OK    -1
  ------------------
 3680|  38.4k|        }
 3681|       |
 3682|       |        /* set the return status */
 3683|  16.1k|        i4_status = OK;
  ------------------
  |  |  114|  16.1k|#define OK        0
  ------------------
 3684|  16.1k|        break;
 3685|       |
 3686|       |        /* go to the next slice header */
 3687|      0|        ps_vcl_buf = ps_vcl_buf->ps_next;
 3688|      0|    }
 3689|       |
 3690|       |    /* set the appropriate reference dqid of the first slice */
 3691|  40.2k|    *pi4_ref_lyr_dqid = ai4_ref_dq_id[0];
 3692|       |
 3693|  40.2k|    return (i4_status);
 3694|  78.6k|}
isvcd_conceal_node_params:
 3719|  72.6k|{
 3720|  72.6k|    vcl_node_t *ps_node;
 3721|  72.6k|    WORD32 i4_conceal_lyrs;
 3722|  72.6k|    WORD32 i4_no_gaps_flag;
 3723|       |
 3724|       |    /* get the bottom node */
 3725|  72.6k|    ps_node = ps_vcl_nal->ps_bot_node;
 3726|  72.6k|    i4_conceal_lyrs = SVCD_FALSE;
  ------------------
  |  |   45|  72.6k|#define SVCD_FALSE 0
  ------------------
 3727|  72.6k|    i4_no_gaps_flag = SVCD_FALSE;
  ------------------
  |  |   45|  72.6k|#define SVCD_FALSE 0
  ------------------
 3728|       |
 3729|       |    /* loop over all nodes present in the current AU */
 3730|   218k|    while(NULL != ps_node)
  ------------------
  |  Branch (3730:11): [True: 145k, False: 72.6k]
  ------------------
 3731|   145k|    {
 3732|   145k|        WORD32 i4_dep_id = 0;
 3733|   145k|        WORD32 i4_qua_id = 0;
 3734|   145k|        UWORD16 u2_frm_num_dep = 0;
 3735|   145k|        WORD32 i4_idr_pic_flag = 0;
 3736|   145k|        WORD32 i4_idr_pic_num = 0;
 3737|   145k|        WORD32 i4_nal_ref_idc = 0;
 3738|   145k|        WORD32 i4_poc_syntax = 0;
 3739|   145k|        WORD32 i4_qua_zero_lyr_sts = 0;
 3740|       |
 3741|   145k|        i4_dep_id = ps_node->i4_dependency_id;
 3742|   145k|        i4_qua_id = ps_node->i4_quality_id;
 3743|       |
 3744|       |        /* reset the quality 0 layer updated status */
 3745|   145k|        if(0 == i4_qua_id)
  ------------------
  |  Branch (3745:12): [True: 145k, False: 0]
  ------------------
 3746|   145k|        {
 3747|   145k|            i4_qua_zero_lyr_sts = SVCD_FALSE;
  ------------------
  |  |   45|   145k|#define SVCD_FALSE 0
  ------------------
 3748|   145k|        }
 3749|       |
 3750|       |        /* process the quality id 0 layers */
 3751|   145k|        if((0 == i4_qua_id) && (NULL != ps_node->ps_first_vcl_nal))
  ------------------
  |  Branch (3751:12): [True: 145k, False: 0]
  |  Branch (3751:32): [True: 117k, False: 27.9k]
  ------------------
 3752|   117k|        {
 3753|       |            /* if current and previous are reference pictures */
 3754|   117k|            if((0 != ps_prev_au_prms[i4_dep_id].i4_nal_ref_id) && (0 != ps_node->i4_nal_ref_idc))
  ------------------
  |  Branch (3754:16): [True: 91.0k, False: 26.3k]
  |  Branch (3754:67): [True: 84.4k, False: 6.65k]
  ------------------
 3755|  84.4k|            {
 3756|  84.4k|                if(ps_prev_au_prms[i4_dep_id].u2_frm_num == ps_node->u2_frm_num)
  ------------------
  |  Branch (3756:20): [True: 25.3k, False: 59.1k]
  ------------------
 3757|  25.3k|                {
 3758|       |                    /* frame number is concealed */
 3759|  25.3k|                    ps_node->u2_frm_num++;
 3760|  25.3k|                    i4_conceal_lyrs = SVCD_TRUE;
  ------------------
  |  |   46|  25.3k|#define SVCD_TRUE 1
  ------------------
 3761|  25.3k|                }
 3762|  59.1k|                else if((SVCD_TRUE == i4_conceal_lyrs) || (SVCD_TRUE == i4_no_gaps_flag))
  ------------------
  |  |   46|  59.1k|#define SVCD_TRUE 1
  ------------------
                              else if((SVCD_TRUE == i4_conceal_lyrs) || (SVCD_TRUE == i4_no_gaps_flag))
  ------------------
  |  |   46|  50.0k|#define SVCD_TRUE 1
  ------------------
  |  Branch (3762:25): [True: 9.06k, False: 50.0k]
  |  Branch (3762:59): [True: 6.65k, False: 43.3k]
  ------------------
 3763|  15.7k|                {
 3764|       |                    /* if the current au frm_num is less than prev */
 3765|       |                    /* or the difference is greater than 1         */
 3766|  15.7k|                    if((ps_prev_au_prms[i4_dep_id].u2_frm_num > ps_node->u2_frm_num) ||
  ------------------
  |  Branch (3766:24): [True: 535, False: 15.1k]
  ------------------
 3767|  15.1k|                       ((ps_node->u2_frm_num - ps_prev_au_prms[i4_dep_id].u2_frm_num) > 1))
  ------------------
  |  Branch (3767:24): [True: 15.0k, False: 167]
  ------------------
 3768|  15.5k|                    {
 3769|       |                        /* frame number is concealed */
 3770|  15.5k|                        ps_node->u2_frm_num = ps_prev_au_prms[i4_dep_id].u2_frm_num + 1;
 3771|  15.5k|                    }
 3772|  15.7k|                }
 3773|       |
 3774|       |                /* set the no gaps flag */
 3775|  84.4k|                if(1 == (ps_node->u2_frm_num - ps_prev_au_prms[i4_dep_id].u2_frm_num))
  ------------------
  |  Branch (3775:20): [True: 56.0k, False: 28.3k]
  ------------------
 3776|  56.0k|                {
 3777|  56.0k|                    i4_no_gaps_flag = SVCD_TRUE;
  ------------------
  |  |   46|  56.0k|#define SVCD_TRUE 1
  ------------------
 3778|  56.0k|                }
 3779|  84.4k|            }
 3780|       |
 3781|       |            /* store the final frame number */
 3782|   117k|            u2_frm_num_dep = ps_node->u2_frm_num;
 3783|   117k|            i4_idr_pic_flag = ps_node->i4_idr_pic_flag;
 3784|   117k|            i4_idr_pic_num = ps_node->i4_idr_pic_num;
 3785|   117k|            i4_nal_ref_idc = ps_node->i4_nal_ref_idc;
 3786|   117k|            i4_poc_syntax = ps_node->i4_poc_syntax;
 3787|   117k|            i4_qua_zero_lyr_sts = SVCD_TRUE;
  ------------------
  |  |   46|   117k|#define SVCD_TRUE 1
  ------------------
 3788|   117k|        }
 3789|  27.9k|        else
 3790|  27.9k|        {
 3791|  27.9k|            if(SVCD_TRUE == i4_qua_zero_lyr_sts)
  ------------------
  |  |   46|  27.9k|#define SVCD_TRUE 1
  ------------------
  |  Branch (3791:16): [True: 0, False: 27.9k]
  ------------------
 3792|      0|            {
 3793|       |                /* for higher quality layers store the same value */
 3794|       |                /* present in the quality id 0 layer              */
 3795|      0|                ps_node->u2_frm_num = u2_frm_num_dep;
 3796|      0|                ps_node->i4_idr_pic_flag = i4_idr_pic_flag;
 3797|      0|                ps_node->i4_idr_pic_num = i4_idr_pic_num;
 3798|      0|                ps_node->i4_nal_ref_idc = i4_nal_ref_idc;
 3799|      0|                ps_node->i4_poc_syntax = i4_poc_syntax;
 3800|      0|            }
 3801|  27.9k|        }
 3802|       |
 3803|       |        /* get the upper node pointer */
 3804|   145k|        ps_node = ps_node->ps_top_node;
 3805|   145k|    }
 3806|  72.6k|}
isvcd_refine_dep_list:
 3840|   156k|{
 3841|   156k|    vcl_nal_t *ps_vcl_nal;
 3842|   156k|    vcl_node_t *ps_vcl_node;
 3843|   156k|    WORD32 i4_idr_pic_flag;
 3844|   156k|    WORD32 i4_nal_ref_idc;
 3845|   156k|    WORD32 i4_idr_pic_num;
 3846|   156k|    WORD32 i4_num_res_lyrs_bup;
 3847|   156k|    WORD32 i4_restore_prms_flag;
 3848|   156k|    vcl_node_t *ps_node_bup;
 3849|   156k|    WORD32 ai4_dep_id[MAX_NUM_RES_LYRS] = {0};
 3850|       |
 3851|       |    /* used for checking the init prms */
 3852|   156k|    dec_seq_params_t *ps_sps_tgt_minus1_lyr = NULL;
 3853|   156k|    dec_seq_params_t *ps_sps_tgt_minus2_lyr = NULL;
 3854|   156k|    UNUSED(pi4_err_code);
  ------------------
  |  |   45|   156k|#define UNUSED(x) ((void)(x))
  ------------------
 3855|       |    /* sanity checks */
 3856|   156k|    if((NULL == pv_out_vcl_ctxt) || (NULL == ps_sps) || (NULL == ps_pps))
  ------------------
  |  Branch (3856:8): [True: 0, False: 156k]
  |  Branch (3856:37): [True: 0, False: 156k]
  |  Branch (3856:57): [True: 0, False: 156k]
  ------------------
 3857|      0|    {
 3858|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 3859|      0|    }
 3860|       |
 3861|   156k|    ps_vcl_nal = (vcl_nal_t *) pv_out_vcl_ctxt;
 3862|       |
 3863|       |    /*  no node is present */
 3864|   156k|    if(NULL == ps_vcl_nal->ps_bot_node)
  ------------------
  |  Branch (3864:8): [True: 1.46k, False: 155k]
  ------------------
 3865|  1.46k|    {
 3866|  1.46k|        return (NOT_OK);
  ------------------
  |  |  116|  1.46k|#define NOT_OK    -1
  ------------------
 3867|  1.46k|    }
 3868|       |
 3869|       |    /* set the single layer flag if top node and bottom node are same */
 3870|   155k|    if((ps_vcl_nal->ps_bot_node == ps_vcl_nal->ps_top_node) &&
  ------------------
  |  Branch (3870:8): [True: 82.7k, False: 72.6k]
  ------------------
 3871|  82.7k|       (0 == ps_vcl_nal->ps_bot_node->i4_dependency_id))
  ------------------
  |  Branch (3871:8): [True: 82.7k, False: 0]
  ------------------
 3872|  82.7k|    {
 3873|  82.7k|    }
 3874|  72.6k|    else
 3875|  72.6k|    {
 3876|       |        /* call the function which corrects the frame number of each node */
 3877|       |        /* based on previous access unit frame number                     */
 3878|  72.6k|        isvcd_conceal_node_params(ps_vcl_nal, ps_prev_au_prms);
 3879|  72.6k|    }
 3880|       |    /* get the top most node */
 3881|   155k|    ps_vcl_node = ps_vcl_nal->ps_top_node;
 3882|       |
 3883|       |    /* get the IDR picture flag for top most layer in current AU */
 3884|       |    /* if not valid then set the value present in the first valid node */
 3885|   155k|    {
 3886|   155k|        vcl_node_t *ps_node;
 3887|   155k|        WORD32 i4_node_present_flag;
 3888|       |
 3889|   155k|        ps_node = ps_vcl_node;
 3890|   155k|        i4_node_present_flag = SVCD_FALSE;
  ------------------
  |  |   45|   155k|#define SVCD_FALSE 0
  ------------------
 3891|       |
 3892|       |        /* store default values */
 3893|   155k|        i4_idr_pic_flag = SVCD_FALSE;
  ------------------
  |  |   45|   155k|#define SVCD_FALSE 0
  ------------------
 3894|   155k|        i4_nal_ref_idc = 0;
 3895|   155k|        i4_idr_pic_num = 0;
 3896|       |
 3897|       |        /* loop until valid node */
 3898|   158k|        while(NULL != ps_node)
  ------------------
  |  Branch (3898:15): [True: 157k, False: 507]
  ------------------
 3899|   157k|        {
 3900|   157k|            if(NULL != ps_node->ps_first_vcl_nal)
  ------------------
  |  Branch (3900:16): [True: 130k, False: 27.5k]
  ------------------
 3901|   130k|            {
 3902|   130k|                i4_idr_pic_flag = ps_node->i4_idr_pic_flag;
 3903|   130k|                i4_nal_ref_idc = ps_node->i4_nal_ref_idc;
 3904|   130k|                i4_idr_pic_num = ps_node->i4_idr_pic_num;
 3905|   130k|                i4_node_present_flag = SVCD_TRUE;
  ------------------
  |  |   46|   130k|#define SVCD_TRUE 1
  ------------------
 3906|   130k|                break;
 3907|   130k|            }
 3908|  27.5k|            else if(SVCD_TRUE == ps_node->i4_idr_pic_flag)
  ------------------
  |  |   46|  27.5k|#define SVCD_TRUE 1
  ------------------
  |  Branch (3908:21): [True: 24.8k, False: 2.65k]
  ------------------
 3909|  24.8k|            {
 3910|  24.8k|                i4_idr_pic_flag = ps_node->i4_idr_pic_flag;
 3911|  24.8k|                i4_nal_ref_idc = ps_node->i4_nal_ref_idc;
 3912|  24.8k|                i4_idr_pic_num = ps_node->i4_idr_pic_num;
 3913|  24.8k|                i4_node_present_flag = SVCD_TRUE;
  ------------------
  |  |   46|  24.8k|#define SVCD_TRUE 1
  ------------------
 3914|  24.8k|                break;
 3915|  24.8k|            }
 3916|       |            /* point to next node */
 3917|  2.65k|            ps_node = ps_node->ps_bot_node;
 3918|  2.65k|        }
 3919|       |
 3920|       |        /* alteast one node should be present */
 3921|   155k|        if(SVCD_FALSE == i4_node_present_flag)
  ------------------
  |  |   45|   155k|#define SVCD_FALSE 0
  ------------------
  |  Branch (3921:12): [True: 507, False: 154k]
  ------------------
 3922|    507|        {
 3923|    507|            return (NOT_OK);
  ------------------
  |  |  116|    507|#define NOT_OK    -1
  ------------------
 3924|    507|        }
 3925|   155k|    }
 3926|       |
 3927|       |    /* initially the access unit is considered to have a single resolution */
 3928|   154k|    ai4_dep_id[0] = 0;
 3929|   154k|    ps_vcl_nal->i4_num_res_lyrs = 1;
 3930|   154k|    i4_restore_prms_flag = SVCD_FALSE;
  ------------------
  |  |   45|   154k|#define SVCD_FALSE 0
  ------------------
 3931|       |
 3932|       |    /*-----------------------------------------------------------------------*/
 3933|       |    /* loop until all the nodes are processed                                */
 3934|       |    /*-----------------------------------------------------------------------*/
 3935|   382k|    while(NULL != ps_vcl_node)
  ------------------
  |  Branch (3935:11): [True: 227k, False: 154k]
  ------------------
 3936|   227k|    {
 3937|   227k|        WORD32 i4_ref_lyr_dqid, i4_status;
 3938|   227k|        vcl_node_t *ps_bot_vcl_node;
 3939|   227k|        WORD32 i4_res_chnge_flag = SVCD_FALSE;
  ------------------
  |  |   45|   227k|#define SVCD_FALSE 0
  ------------------
 3940|   227k|        WORD32 i4_dep_id, i4_qua_id;
 3941|   227k|        WORD32 i4_prev_sps_pps_valid;
 3942|   227k|        WORD32 i4_prev_au_prms_valid;
 3943|       |
 3944|       |        /* set the reference layer DQID to -1 */
 3945|   227k|        i4_ref_lyr_dqid = -1;
 3946|       |
 3947|       |        /* get the current layer dependency and quality id */
 3948|   227k|        i4_dep_id = ps_vcl_node->i4_dependency_id;
 3949|   227k|        i4_qua_id = ps_vcl_node->i4_quality_id;
 3950|       |
 3951|       |        /* get the valid status of prev access unit params */
 3952|   227k|        i4_prev_au_prms_valid = ps_prev_au_prms[i4_dep_id].i4_updated_sts;
 3953|   227k|        i4_prev_sps_pps_valid = ps_pps_sps_prev[(i4_dep_id << 4) + i4_qua_id].i4_updated_sts;
 3954|       |
 3955|       |        /* missing layer handling */
 3956|   227k|        if(NULL == ps_vcl_node->ps_first_vcl_nal)
  ------------------
  |  Branch (3956:12): [True: 30.7k, False: 196k]
  ------------------
 3957|  30.7k|        {
 3958|       |            /* store the params appropriately */
 3959|  30.7k|            ps_vcl_node->i4_idr_pic_flag = i4_idr_pic_flag;
 3960|  30.7k|            ps_vcl_node->i4_nal_ref_idc = i4_nal_ref_idc;
 3961|  30.7k|            ps_vcl_node->i4_idr_pic_num = i4_idr_pic_num;
 3962|  30.7k|            ps_vcl_node->i4_num_slices = 0;
 3963|  30.7k|            ps_vcl_node->i4_use_ref_base = 0;
 3964|  30.7k|            ps_vcl_node->i4_temporal_id = 0;
 3965|       |
 3966|  30.7k|            if((0 != i4_dep_id) || (0 != i4_qua_id))
  ------------------
  |  Branch (3966:16): [True: 24.0k, False: 6.71k]
  |  Branch (3966:36): [True: 0, False: 6.71k]
  ------------------
 3967|  24.0k|            {
 3968|  24.0k|                ps_vcl_node->i4_nal_unit_type = CODED_SLICE_EXTENSION_NAL;
  ------------------
  |  |   66|  24.0k|#define CODED_SLICE_EXTENSION_NAL 20
  ------------------
 3969|  24.0k|                ps_vcl_node->u1_acc_no_int_pred = 0;
 3970|  24.0k|            }
 3971|  6.71k|            else if(SVCD_TRUE == i4_idr_pic_flag)
  ------------------
  |  |   46|  6.71k|#define SVCD_TRUE 1
  ------------------
  |  Branch (3971:21): [True: 5.89k, False: 819]
  ------------------
 3972|  5.89k|            {
 3973|  5.89k|                ps_vcl_node->i4_nal_unit_type = IDR_SLICE_NAL;
  ------------------
  |  |  328|  5.89k|#define IDR_SLICE_NAL                   5
  ------------------
 3974|  5.89k|                ps_vcl_node->u1_acc_no_int_pred = 1;
 3975|  5.89k|            }
 3976|    819|            else
 3977|    819|            {
 3978|    819|                ps_vcl_node->i4_nal_unit_type = SLICE_NAL;
  ------------------
  |  |  324|    819|#define SLICE_NAL                       1
  ------------------
 3979|    819|                ps_vcl_node->u1_acc_no_int_pred = 1;
 3980|    819|            }
 3981|       |
 3982|  30.7k|            if(SVCD_FALSE == i4_idr_pic_flag)
  ------------------
  |  |   45|  30.7k|#define SVCD_FALSE 0
  ------------------
  |  Branch (3982:16): [True: 1.84k, False: 28.9k]
  ------------------
 3983|  1.84k|            {
 3984|       |                /* pick the other params form previous access unit */
 3985|  1.84k|                if(SVCD_TRUE == i4_prev_sps_pps_valid)
  ------------------
  |  |   46|  1.84k|#define SVCD_TRUE 1
  ------------------
  |  Branch (3985:20): [True: 1.84k, False: 0]
  ------------------
 3986|  1.84k|                {
 3987|  1.84k|                    ps_vcl_node->u1_pps_id =
 3988|  1.84k|                        ps_pps_sps_prev[(i4_dep_id << 4) + i4_qua_id].u1_pps_id;
 3989|       |
 3990|  1.84k|                    ps_vcl_node->u1_sps_id =
 3991|  1.84k|                        ps_pps_sps_prev[(i4_dep_id << 4) + i4_qua_id].u1_sps_id;
 3992|  1.84k|                }
 3993|       |
 3994|  1.84k|                if(SVCD_TRUE == i4_prev_au_prms_valid)
  ------------------
  |  |   46|  1.84k|#define SVCD_TRUE 1
  ------------------
  |  Branch (3994:20): [True: 1.83k, False: 11]
  ------------------
 3995|  1.83k|                {
 3996|  1.83k|                    if(0 == ps_vcl_node->i4_nal_ref_idc)
  ------------------
  |  Branch (3996:24): [True: 644, False: 1.18k]
  ------------------
 3997|    644|                    {
 3998|    644|                        ps_vcl_node->u2_frm_num = ps_prev_au_prms[i4_dep_id].u2_frm_num;
 3999|    644|                    }
 4000|  1.18k|                    else
 4001|  1.18k|                    {
 4002|  1.18k|                        ps_vcl_node->u2_frm_num = ps_prev_au_prms[i4_dep_id].u2_frm_num + 1;
 4003|  1.18k|                    }
 4004|  1.83k|                }
 4005|  1.84k|            }
 4006|  30.7k|        }
 4007|       |
 4008|       |        /* SPS id cannot change unless its an IDR pic */
 4009|   227k|        if(SVCD_FALSE == ps_vcl_node->i4_idr_pic_flag)
  ------------------
  |  |   45|   227k|#define SVCD_FALSE 0
  ------------------
  |  Branch (4009:12): [True: 39.8k, False: 187k]
  ------------------
 4010|  39.8k|        {
 4011|  39.8k|            if(SVCD_TRUE == i4_prev_sps_pps_valid)
  ------------------
  |  |   46|  39.8k|#define SVCD_TRUE 1
  ------------------
  |  Branch (4011:16): [True: 39.7k, False: 69]
  ------------------
 4012|  39.7k|            {
 4013|       |                /* store the SPS id of the current layer */
 4014|  39.7k|                ps_vcl_node->u1_sps_id = ps_pps_sps_prev[(i4_dep_id << 4) + i4_qua_id].u1_sps_id;
 4015|  39.7k|            }
 4016|  39.8k|        }
 4017|       |
 4018|       |        /* store the PPS id and SPS id of the current layer */
 4019|   227k|        ps_pps_sps_prev[(i4_dep_id << 4) + i4_qua_id].u1_pps_id = ps_vcl_node->u1_pps_id;
 4020|   227k|        ps_pps_sps_prev[(i4_dep_id << 4) + i4_qua_id].u1_sps_id = ps_vcl_node->u1_sps_id;
 4021|   227k|        ps_pps_sps_prev[(i4_dep_id << 4) + i4_qua_id].i4_updated_sts = SVCD_TRUE;
  ------------------
  |  |   46|   227k|#define SVCD_TRUE 1
  ------------------
 4022|       |
 4023|       |        /* handling of no_inter_layer_pred_flag 1 cases */
 4024|   227k|        if((1 == ps_vcl_node->u1_acc_no_int_pred) && (NULL != ps_vcl_node->ps_bot_node))
  ------------------
  |  Branch (4024:12): [True: 148k, False: 78.6k]
  |  Branch (4024:54): [True: 331, False: 148k]
  ------------------
 4025|    331|        {
 4026|    331|            if(SVCD_TRUE == i4_idr_pic_flag)
  ------------------
  |  |   46|    331|#define SVCD_TRUE 1
  ------------------
  |  Branch (4026:16): [True: 235, False: 96]
  ------------------
 4027|    235|            {
 4028|       |                /* take a back up of the parameters till the current node. */
 4029|       |                /* these parameters will be restored at the end of loop */
 4030|       |
 4031|    235|                if(SVCD_FALSE == i4_restore_prms_flag)
  ------------------
  |  |   45|    235|#define SVCD_FALSE 0
  ------------------
  |  Branch (4031:20): [True: 235, False: 0]
  ------------------
 4032|    235|                {
 4033|       |                    /* get the number of resolution detected so far */
 4034|    235|                    i4_num_res_lyrs_bup = ps_vcl_nal->i4_num_res_lyrs;
 4035|       |
 4036|    235|                    ps_node_bup = ps_vcl_node;
 4037|       |
 4038|       |                    /* set the restore params flag */
 4039|    235|                    i4_restore_prms_flag = SVCD_TRUE;
  ------------------
  |  |   46|    235|#define SVCD_TRUE 1
  ------------------
 4040|    235|                }
 4041|    235|            }
 4042|     96|            else
 4043|     96|            {
 4044|     96|                ps_vcl_node->i4_ref_dq_id = -1;
 4045|     96|                ps_vcl_node->i4_res_change_flag = i4_res_chnge_flag;
 4046|       |
 4047|       |                /* store the reference DQID for current dependency */
 4048|     96|                ps_prev_au_prms[i4_dep_id].i4_ref_dq_id = -1;
 4049|     96|                ps_prev_au_prms[i4_dep_id].u2_frm_num = ps_vcl_node->u2_frm_num;
 4050|     96|                ps_prev_au_prms[i4_dep_id].i4_nal_ref_id = ps_vcl_node->i4_nal_ref_idc;
 4051|       |
 4052|       |                /* the bottom node is set to NULL */
 4053|     96|                ps_vcl_node->ps_bot_node = NULL;
 4054|     96|                break;
 4055|     96|            }
 4056|    331|        }
 4057|       |
 4058|       |        /* derive the reference layer DQID for quality id equal to 0 */
 4059|   227k|        if(0 == i4_qua_id)
  ------------------
  |  Branch (4059:12): [True: 227k, False: 0]
  ------------------
 4060|   227k|        {
 4061|   227k|            dec_seq_params_t *ps_curr_sps;
 4062|   227k|            dec_svc_seq_params_t *ps_curr_subset_sps;
 4063|       |
 4064|       |            /* derive current SPS */
 4065|   227k|            ps_curr_sps = ps_sps + ps_vcl_node->u1_sps_id;
 4066|   227k|            ps_curr_subset_sps = ps_subset_sps + ps_vcl_node->u1_sps_id;
 4067|       |
 4068|   227k|            {
 4069|   227k|                WORD32 i4_max_frm_num;
 4070|       |
 4071|       |                /* get the maximum value of frame number */
 4072|   227k|                i4_max_frm_num = (1 << (ps_curr_sps->u1_bits_in_frm_num + 1));
 4073|   227k|                ps_vcl_node->u2_frm_num = ps_vcl_node->u2_frm_num % i4_max_frm_num;
 4074|   227k|                if(SVCD_TRUE == ps_vcl_node->i4_idr_pic_flag)
  ------------------
  |  |   46|   227k|#define SVCD_TRUE 1
  ------------------
  |  Branch (4074:20): [True: 187k, False: 39.7k]
  ------------------
 4075|   187k|                {
 4076|       |                    /* if idr then frm num should be 0 */
 4077|   187k|                    ps_vcl_node->u2_frm_num = 0;
 4078|   187k|                }
 4079|   227k|            }
 4080|       |
 4081|       |            /* store default params to inter layer deblocking params */
 4082|   227k|            ps_vcl_node->i4_inter_lyr_dblk_idc = 0;
 4083|   227k|            ps_vcl_node->i4_inter_lyr_beta_offset = 0;
 4084|   227k|            ps_vcl_node->i4_inter_lyr_alpha_c0_offset = 0;
 4085|       |            /* No SEI support for scalability info*/
 4086|   227k|            i4_status = NOT_OK;
  ------------------
  |  |  116|   227k|#define NOT_OK    -1
  ------------------
 4087|       |
 4088|       |            /* if no inter layer pred flag is present set the   */
 4089|       |            /* status to fail since the slices will not contain */
 4090|       |            /* reference layer Dqid                             */
 4091|   227k|            if(1 == ps_vcl_node->u1_acc_no_int_pred)
  ------------------
  |  Branch (4091:16): [True: 148k, False: 78.6k]
  ------------------
 4092|   148k|            {
 4093|   148k|                i4_status = NOT_OK;
  ------------------
  |  |  116|   148k|#define NOT_OK    -1
  ------------------
 4094|   148k|            }
 4095|  78.6k|            else
 4096|  78.6k|            {
 4097|  78.6k|                WORD32 *pi4_ref_dq_id;
 4098|  78.6k|                WORD32 i4_ref_dq_id_temp;
 4099|       |
 4100|       |                /* check if the SEI message has given the ref_dq_id */
 4101|  78.6k|                if(NOT_OK == i4_status)
  ------------------
  |  |  116|  78.6k|#define NOT_OK    -1
  ------------------
  |  Branch (4101:20): [True: 78.6k, False: 0]
  ------------------
 4102|  78.6k|                {
 4103|  78.6k|                    pi4_ref_dq_id = &i4_ref_lyr_dqid;
 4104|  78.6k|                }
 4105|      0|                else
 4106|      0|                {
 4107|      0|                    pi4_ref_dq_id = &i4_ref_dq_id_temp;
 4108|      0|                }
 4109|       |
 4110|  78.6k|                i4_status = isvcd_get_ref_lyr_dqid(ps_vcl_node, ps_sps, ps_pps, pi4_ref_dq_id,
 4111|  78.6k|                                                   ps_prev_au_prms[i4_dep_id].i4_ref_dq_id,
 4112|  78.6k|                                                   &ps_svcd_ctxt->i4_error_code, ps_svcd_ctxt);
 4113|  78.6k|            }
 4114|       |
 4115|       |            /* no slice in the layer has been successfully decoded */
 4116|   227k|            if(NOT_OK == i4_status)
  ------------------
  |  |  116|   227k|#define NOT_OK    -1
  ------------------
  |  Branch (4116:16): [True: 211k, False: 16.1k]
  ------------------
 4117|   211k|            {
 4118|       |                /* check for IDR picture */
 4119|   211k|                if(SVCD_TRUE == i4_idr_pic_flag)
  ------------------
  |  |   46|   211k|#define SVCD_TRUE 1
  ------------------
  |  Branch (4119:20): [True: 172k, False: 38.1k]
  ------------------
 4120|   172k|                {
 4121|       |                    /* set the next lower layer as the reference layer */
 4122|   172k|                    if(NULL != ps_vcl_node->ps_bot_node)
  ------------------
  |  Branch (4122:24): [True: 50.9k, False: 121k]
  ------------------
 4123|  50.9k|                    {
 4124|  50.9k|                        i4_ref_lyr_dqid = ps_vcl_node->ps_bot_node->i4_dependency_id << 4;
 4125|       |
 4126|  50.9k|                        i4_ref_lyr_dqid += ps_vcl_node->ps_bot_node->i4_quality_id;
 4127|  50.9k|                    }
 4128|   121k|                    else
 4129|   121k|                    {
 4130|   121k|                        i4_ref_lyr_dqid = -1;
 4131|   121k|                    }
 4132|   172k|                }
 4133|  38.1k|                else
 4134|  38.1k|                {
 4135|       |                    /* take the reference dq id from previous access unit */
 4136|  38.1k|                    i4_ref_lyr_dqid = ps_prev_au_prms[i4_dep_id].i4_ref_dq_id;
 4137|  38.1k|                }
 4138|   211k|            }
 4139|       |
 4140|       |            /* Update the DQID list based on ref DQID.     */
 4141|       |            /* This routine also updates the ref_dq_id     */
 4142|       |            /* in case the actual layer is completely lost */
 4143|   227k|            i4_status = isvcd_update_dqid(i4_ref_lyr_dqid, ps_vcl_node, &ps_bot_vcl_node);
 4144|       |
 4145|   227k|            if(!(OK == i4_status))
  ------------------
  |  |  114|   227k|#define OK        0
  ------------------
  |  Branch (4145:16): [True: 0, False: 227k]
  ------------------
 4146|      0|            {
 4147|      0|                return i4_status;
 4148|      0|            }
 4149|       |
 4150|       |            /* store the reference DQID for current depedency and */
 4151|       |            /* quality id 0 layer                                 */
 4152|   227k|            ps_prev_au_prms[i4_dep_id].i4_ref_dq_id = i4_ref_lyr_dqid;
 4153|   227k|            ps_prev_au_prms[i4_dep_id].i4_nal_ref_id = ps_vcl_node->i4_nal_ref_idc;
 4154|   227k|            ps_prev_au_prms[i4_dep_id].u2_frm_num = ps_vcl_node->u2_frm_num;
 4155|   227k|            ps_prev_au_prms[i4_dep_id].i4_updated_sts = SVCD_TRUE;
  ------------------
  |  |   46|   227k|#define SVCD_TRUE 1
  ------------------
 4156|       |
 4157|       |            /* ------- Detect Resolution Change ---------------- */
 4158|   227k|            {
 4159|   227k|                dec_seq_params_t *ps_lower_sps = NULL;
 4160|   227k|                dec_svc_seq_params_t *ps_lower_subset_sps = NULL;
 4161|       |
 4162|   227k|                if(NULL != ps_bot_vcl_node)
  ------------------
  |  Branch (4162:20): [True: 72.5k, False: 154k]
  ------------------
 4163|  72.5k|                {
 4164|  72.5k|                    if((NULL != ps_bot_vcl_node->ps_first_vcl_nal) ||
  ------------------
  |  Branch (4164:24): [True: 68.6k, False: 3.87k]
  ------------------
 4165|  3.87k|                       (SVCD_TRUE == i4_idr_pic_flag))
  ------------------
  |  |   46|  3.87k|#define SVCD_TRUE 1
  ------------------
  |  Branch (4165:24): [True: 2.98k, False: 886]
  ------------------
 4166|  71.6k|                    {
 4167|       |                        /* get the SPS of layer */
 4168|  71.6k|                        ps_lower_sps = ps_sps + ps_bot_vcl_node->u1_sps_id;
 4169|  71.6k|                        ps_lower_subset_sps = ps_subset_sps + ps_bot_vcl_node->u1_sps_id;
 4170|  71.6k|                    }
 4171|    886|                    else
 4172|    886|                    {
 4173|       |                        /* if the bottom layer is completely missed */
 4174|    886|                        WORD32 i4_bot_dep_id, i4_bot_qua_id;
 4175|    886|                        UWORD8 u1_sps_id = 0;
 4176|       |
 4177|       |                        /* sps id is picked from previous access unit */
 4178|    886|                        i4_bot_dep_id = ps_bot_vcl_node->i4_dependency_id;
 4179|    886|                        i4_bot_qua_id = ps_bot_vcl_node->i4_quality_id;
 4180|       |
 4181|    886|                        if(SVCD_TRUE ==
  ------------------
  |  |   46|    886|#define SVCD_TRUE 1
  ------------------
  |  Branch (4181:28): [True: 819, False: 67]
  ------------------
 4182|    886|                           ps_pps_sps_prev[(i4_bot_dep_id << 4) + i4_bot_qua_id].i4_updated_sts)
 4183|    819|                        {
 4184|    819|                            u1_sps_id =
 4185|    819|                                ps_pps_sps_prev[(i4_bot_dep_id << 4) + i4_bot_qua_id].u1_sps_id;
 4186|    819|                        }
 4187|     67|                        else
 4188|     67|                        {
 4189|       |                            /* should not enter here */
 4190|     67|                            return NOT_OK;
  ------------------
  |  |  116|     67|#define NOT_OK    -1
  ------------------
 4191|     67|                        }
 4192|       |
 4193|       |                        /* get the SPS of lower layer */
 4194|    819|                        ps_lower_sps = ps_sps + u1_sps_id;
 4195|    819|                        ps_lower_subset_sps = ps_subset_sps + u1_sps_id;
 4196|    819|                    }
 4197|  72.5k|                }
 4198|       |
 4199|       |                /* call the function which detects resolution change */
 4200|   227k|                i4_res_chnge_flag = isvcd_detect_res_change(
 4201|   227k|                    ps_curr_sps, ps_lower_sps, ps_curr_subset_sps, ps_lower_subset_sps);
 4202|       |
 4203|       |                /* if a resolution exists below current resolution */
 4204|   227k|                if(SVCD_TRUE == i4_res_chnge_flag)
  ------------------
  |  |   46|   227k|#define SVCD_TRUE 1
  ------------------
  |  Branch (4204:20): [True: 53.4k, False: 173k]
  ------------------
 4205|  53.4k|                {
 4206|       |                    /* if current picture id IDR */
 4207|  53.4k|                    if(SVCD_TRUE == i4_idr_pic_flag)
  ------------------
  |  |   46|  53.4k|#define SVCD_TRUE 1
  ------------------
  |  Branch (4207:24): [True: 47.3k, False: 6.00k]
  ------------------
 4208|  47.3k|                    {
 4209|       |                        /* store the depedency id of bottom most layer in current resolution */
 4210|  47.3k|                        ai4_dep_id[ps_vcl_nal->i4_num_res_lyrs - 1] = i4_dep_id;
 4211|  47.3k|                    }
 4212|       |
 4213|       |                    /* increment the num resolution layer counter */
 4214|  53.4k|                    ps_vcl_nal->i4_num_res_lyrs++;
 4215|       |
 4216|       |                    /* store the SPS of target -1 and -2 resolution layers */
 4217|  53.4k|                    if(2 == ps_vcl_nal->i4_num_res_lyrs)
  ------------------
  |  Branch (4217:24): [True: 53.4k, False: 0]
  ------------------
 4218|  53.4k|                    {
 4219|  53.4k|                        ps_sps_tgt_minus1_lyr = ps_curr_sps;
 4220|  53.4k|                    }
 4221|      0|                    else if(3 == ps_vcl_nal->i4_num_res_lyrs)
  ------------------
  |  Branch (4221:29): [True: 0, False: 0]
  ------------------
 4222|      0|                    {
 4223|      0|                        ps_sps_tgt_minus2_lyr = ps_curr_sps;
 4224|      0|                    }
 4225|      0|                    else if(ps_vcl_nal->i4_num_res_lyrs > MAX_NUM_RES_LYRS)
  ------------------
  |  |   94|      0|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (4225:29): [True: 0, False: 0]
  ------------------
 4226|      0|                    {
 4227|      0|                        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 4228|      0|                    }
 4229|  53.4k|                }
 4230|   227k|            }
 4231|       |
 4232|       |            /* -------- end of resolution change detection -------- */
 4233|   227k|        }
 4234|      0|        else
 4235|      0|        {
 4236|      0|            i4_ref_lyr_dqid = (i4_dep_id << 4);
 4237|      0|            i4_ref_lyr_dqid += (i4_qua_id - 1);
 4238|       |
 4239|       |            /* Update the DQID list based on ref DQID.     */
 4240|       |            /* This routine also updates the ref_dq_id     */
 4241|       |            /* in case the actual layer is completely lost */
 4242|      0|            i4_status = isvcd_update_dqid(i4_ref_lyr_dqid, ps_vcl_node, &ps_bot_vcl_node);
 4243|       |
 4244|      0|            if(!(OK == i4_status))
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
  |  Branch (4244:16): [True: 0, False: 0]
  ------------------
 4245|      0|            {
 4246|      0|                return i4_status;
 4247|      0|            }
 4248|      0|            if(SVCD_TRUE == ps_vcl_node->i4_idr_pic_flag)
  ------------------
  |  |   46|      0|#define SVCD_TRUE 1
  ------------------
  |  Branch (4248:16): [True: 0, False: 0]
  ------------------
 4249|      0|            {
 4250|       |                /* if idr then frm num should be 0 */
 4251|      0|                ps_vcl_node->u2_frm_num = 0;
 4252|      0|            }
 4253|      0|        }
 4254|       |
 4255|       |        /* Update resolution change flag inside VCL    */
 4256|       |        /* node structure. This parameter is later used*/
 4257|       |        /* in detecting the top most layer in the      */
 4258|       |        /* resolution currently being decoded          */
 4259|   227k|        ps_vcl_node->i4_res_change_flag = i4_res_chnge_flag;
 4260|   227k|        ps_vcl_node->i4_ref_dq_id = i4_ref_lyr_dqid;
 4261|       |
 4262|       |        /* go to the next node */
 4263|   227k|        ps_vcl_node = ps_bot_vcl_node;
 4264|   227k|    }
 4265|       |
 4266|       |    /* update the Dependency array for each resolution */
 4267|   154k|    if(SVCD_TRUE == i4_idr_pic_flag)
  ------------------
  |  |   46|   154k|#define SVCD_TRUE 1
  ------------------
  |  Branch (4267:8): [True: 122k, False: 32.0k]
  ------------------
 4268|   122k|    {
 4269|   122k|        WORD32 i4_idx;
 4270|       |
 4271|   122k|        ai4_dep_id[ps_vcl_nal->i4_num_res_lyrs - 1] = 0;
 4272|       |
 4273|       |        /* loop over number of resolutions detected */
 4274|   292k|        for(i4_idx = 0; i4_idx < ps_vcl_nal->i4_num_res_lyrs; i4_idx++)
  ------------------
  |  Branch (4274:25): [True: 170k, False: 122k]
  ------------------
 4275|   170k|        {
 4276|   170k|            pi4_dep_id_map[i4_idx] = ai4_dep_id[ps_vcl_nal->i4_num_res_lyrs - 1 - i4_idx];
 4277|   170k|        }
 4278|   122k|    }
 4279|       |
 4280|   154k|    if(SVCD_TRUE == i4_restore_prms_flag)
  ------------------
  |  |   46|   154k|#define SVCD_TRUE 1
  ------------------
  |  Branch (4280:8): [True: 235, False: 154k]
  ------------------
 4281|    235|    {
 4282|       |        /* restore the number of resolutions */
 4283|    235|        ps_vcl_nal->i4_num_res_lyrs = i4_num_res_lyrs_bup;
 4284|       |
 4285|    235|        ps_vcl_node = ps_node_bup;
 4286|       |
 4287|       |        /* set the bottom node to NULL */
 4288|    235|        ps_vcl_node->ps_bot_node = NULL;
 4289|       |
 4290|    235|        ps_vcl_node->i4_ref_dq_id = -1;
 4291|    235|        ps_vcl_node->i4_res_change_flag = SVCD_FALSE;
  ------------------
  |  |   45|    235|#define SVCD_FALSE 0
  ------------------
 4292|       |
 4293|       |        /* store the reference DQID for current dependency */
 4294|    235|        ps_prev_au_prms[ps_vcl_node->i4_dependency_id].i4_ref_dq_id = -1;
 4295|       |
 4296|    235|        ps_prev_au_prms[ps_vcl_node->i4_dependency_id].u2_frm_num = ps_vcl_node->u2_frm_num;
 4297|       |
 4298|    235|        ps_prev_au_prms[ps_vcl_node->i4_dependency_id].i4_nal_ref_id = ps_vcl_node->i4_nal_ref_idc;
 4299|    235|    }
 4300|       |
 4301|       |    /* Finally update the bottom most node in the current access unit */
 4302|   154k|    ps_vcl_node = ps_vcl_nal->ps_top_node;
 4303|       |
 4304|   227k|    while(NULL != ps_vcl_node->ps_bot_node)
  ------------------
  |  Branch (4304:11): [True: 72.2k, False: 154k]
  ------------------
 4305|  72.2k|    {
 4306|  72.2k|        ps_vcl_node = ps_vcl_node->ps_bot_node;
 4307|  72.2k|    }
 4308|       |
 4309|   154k|    ps_vcl_nal->ps_bot_node = ps_vcl_node;
 4310|       |
 4311|       |    /* check on validity of Target Layer -1 and -2 dimensions */
 4312|   154k|    if((NULL != ps_sps_tgt_minus1_lyr) && (0 == ps_sps_tgt_minus1_lyr->u1_is_valid))
  ------------------
  |  Branch (4312:8): [True: 53.4k, False: 101k]
  |  Branch (4312:43): [True: 4.15k, False: 49.2k]
  ------------------
 4313|  4.15k|    {
 4314|  4.15k|        if((H264_MAX_FRAME_WIDTH < (WORD32) (ps_sps_tgt_minus1_lyr->u2_frm_wd_in_mbs << 4)) ||
  ------------------
  |  |   39|  4.15k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
  |  Branch (4314:12): [True: 0, False: 4.15k]
  ------------------
 4315|  4.15k|           (H264_MAX_FRAME_HEIGHT < (WORD32) (ps_sps_tgt_minus1_lyr->u2_frm_ht_in_mbs << 4)))
  ------------------
  |  |   40|  4.15k|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
  |  Branch (4315:12): [True: 0, False: 4.15k]
  ------------------
 4316|      0|        {
 4317|      0|            return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 4318|      0|        }
 4319|  4.15k|    }
 4320|       |
 4321|   154k|    if((NULL != ps_sps_tgt_minus2_lyr) && (0 == ps_sps_tgt_minus2_lyr->u1_is_valid))
  ------------------
  |  Branch (4321:8): [True: 0, False: 154k]
  |  Branch (4321:43): [True: 0, False: 0]
  ------------------
 4322|      0|    {
 4323|      0|        if((H264_MAX_FRAME_WIDTH < (WORD32) (ps_sps_tgt_minus2_lyr->u2_frm_wd_in_mbs << 4)) ||
  ------------------
  |  |   39|      0|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
  |  Branch (4323:12): [True: 0, False: 0]
  ------------------
 4324|      0|           (H264_MAX_FRAME_HEIGHT < (WORD32) (ps_sps_tgt_minus2_lyr->u2_frm_ht_in_mbs << 4)))
  ------------------
  |  |   40|      0|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
  |  Branch (4324:12): [True: 0, False: 0]
  ------------------
 4325|      0|        {
 4326|      0|            return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 4327|      0|        }
 4328|      0|    }
 4329|       |
 4330|   154k|    return (OK);
  ------------------
  |  |  114|   154k|#define OK        0
  ------------------
 4331|   154k|}
isvcd_dec_non_vcl:
 4361|   145k|{
 4362|       |    /* local varibles */
 4363|   145k|    non_vcl_nal_t *ps_non_vcl;
 4364|   145k|    WORD32 i4_unit_indx;
 4365|   145k|    non_vcl_buf_hdr_t *ps_non_vcl_buf;
 4366|   145k|    WORD32 i_status = OK;
  ------------------
  |  |  114|   145k|#define OK        0
  ------------------
 4367|   145k|    dec_struct_t *ps_dec;
 4368|   145k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec;
 4369|   145k|    dec_bit_stream_t *ps_bitstrm;
 4370|       |
 4371|   145k|    if((NULL == pv_out_non_vcl) || (NULL == pv_seq_params) || (NULL == pv_pic_params))
  ------------------
  |  Branch (4371:8): [True: 0, False: 145k]
  |  Branch (4371:36): [True: 0, False: 145k]
  |  Branch (4371:63): [True: 0, False: 145k]
  ------------------
 4372|      0|    {
 4373|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 4374|      0|    }
 4375|   145k|    UNUSED(pv_seq_params);
  ------------------
  |  |   45|   145k|#define UNUSED(x) ((void)(x))
  ------------------
 4376|   145k|    UNUSED(pv_pic_params);
  ------------------
  |  |   45|   145k|#define UNUSED(x) ((void)(x))
  ------------------
 4377|       |
 4378|       |    /* currently SEI decoding is not supported */
 4379|       |    /* derive the local variables */
 4380|   145k|    ps_non_vcl = (non_vcl_nal_t *) pv_out_non_vcl;
 4381|   145k|    ps_non_vcl_buf = ps_non_vcl->ps_first_non_vcl_nal;
 4382|   145k|    if(NULL == ps_non_vcl_buf) return (NOT_OK);
  ------------------
  |  |  116|  30.8k|#define NOT_OK    -1
  ------------------
  |  Branch (4382:8): [True: 30.8k, False: 114k]
  ------------------
 4383|       |
 4384|       |    /* loop until all NON VCL NAL are decoded */
 4385|   378k|    for(i4_unit_indx = 0; i4_unit_indx < ps_non_vcl->i4_num_non_vcl_nals; i4_unit_indx++)
  ------------------
  |  Branch (4385:27): [True: 293k, False: 85.3k]
  ------------------
 4386|   293k|    {
 4387|   293k|        UWORD32 u4_nal_unit_type;
 4388|   293k|        ps_svc_lyr_dec = ps_svcd_ctxt->ps_svc_dec_lyr;
 4389|   293k|        ps_dec = &ps_svc_lyr_dec->s_dec;
 4390|   293k|        if(NULL == ps_non_vcl_buf) return (NOT_OK);
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  |  Branch (4390:12): [True: 0, False: 293k]
  ------------------
 4391|       |        /* get the current NAL unit type */
 4392|   293k|        u4_nal_unit_type = (UWORD32) ps_non_vcl_buf->i4_nal_unit_type;
 4393|   293k|        if(u4_nal_unit_type > MAX_SVC_NAL_UNIT_TYPE) return (NOT_OK);
  ------------------
  |  |   73|   293k|#define MAX_SVC_NAL_UNIT_TYPE 31
  ------------------
                      if(u4_nal_unit_type > MAX_SVC_NAL_UNIT_TYPE) return (NOT_OK);
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  |  Branch (4393:12): [True: 0, False: 293k]
  ------------------
 4394|   293k|        ps_dec->u1_nal_unit_type = u4_nal_unit_type;
 4395|       |
 4396|   293k|        ps_dec->ps_bitstrm->pu4_buffer =
 4397|   293k|            (UWORD32 *) ((UWORD8 *) ps_non_vcl_buf + ps_non_vcl_buf->i4_buf_offset);
 4398|   293k|        ps_dec->ps_bitstrm->u4_ofst = 0;
 4399|   293k|        ps_dec->ps_bitstrm->u4_max_ofst = isvcd_nal_rbsp_to_sodb(
 4400|   293k|            (UWORD8 *) ps_dec->ps_bitstrm->pu4_buffer, ps_non_vcl_buf->i4_buf_size, 0);
 4401|   293k|        if(ps_dec->ps_bitstrm->u4_max_ofst <= 0) return (NOT_OK);
  ------------------
  |  |  116|  1.93k|#define NOT_OK    -1
  ------------------
  |  Branch (4401:12): [True: 1.93k, False: 291k]
  ------------------
 4402|       |
 4403|   291k|        ps_bitstrm = ps_dec->ps_bitstrm;
 4404|       |
 4405|       |        /* call the processing module based on nal unit type */
 4406|   291k|        switch(u4_nal_unit_type)
 4407|   291k|        {
 4408|  92.4k|            case SEQ_PARAM_NAL:
  ------------------
  |  |  330|  92.4k|#define SEQ_PARAM_NAL                   7
  ------------------
  |  Branch (4408:13): [True: 92.4k, False: 198k]
  ------------------
 4409|       |
 4410|  92.4k|                i_status = isvcd_parse_sps(ps_svc_lyr_dec, ps_bitstrm);
 4411|       |
 4412|  92.4k|                if(!i_status)
  ------------------
  |  Branch (4412:20): [True: 81.2k, False: 11.1k]
  ------------------
 4413|  81.2k|                {
 4414|  81.2k|                    ps_dec->i4_header_decoded |= 0x1;
 4415|  81.2k|                    ps_svcd_ctxt->u4_num_sps_ctr++;
 4416|       |
 4417|  81.2k|                    if(ps_svcd_ctxt->pic_width < ps_svc_lyr_dec->pic_width)
  ------------------
  |  Branch (4417:24): [True: 13.0k, False: 68.2k]
  ------------------
 4418|  13.0k|                    {
 4419|  13.0k|                        ps_svcd_ctxt->pic_width = ps_svc_lyr_dec->pic_width;
 4420|  13.0k|                    }
 4421|  81.2k|                    if(ps_svcd_ctxt->pic_height < ps_svc_lyr_dec->pic_height)
  ------------------
  |  Branch (4421:24): [True: 13.1k, False: 68.1k]
  ------------------
 4422|  13.1k|                    {
 4423|  13.1k|                        ps_svcd_ctxt->pic_height = ps_svc_lyr_dec->pic_height;
 4424|  13.1k|                    }
 4425|  81.2k|                }
 4426|       |
 4427|  92.4k|                if(i_status) return i_status;
  ------------------
  |  Branch (4427:20): [True: 11.1k, False: 81.2k]
  ------------------
 4428|       |
 4429|  81.2k|                break;
 4430|  81.2k|            case SUBSET_SPS_NAL:
  ------------------
  |  |   65|  24.0k|#define SUBSET_SPS_NAL 15
  ------------------
  |  Branch (4430:13): [True: 24.0k, False: 267k]
  ------------------
 4431|       |
 4432|  24.0k|                i_status = isvcd_parse_subset_sps(ps_svc_lyr_dec, ps_bitstrm);
 4433|       |
 4434|  24.0k|                if(!i_status)
  ------------------
  |  Branch (4434:20): [True: 17.3k, False: 6.71k]
  ------------------
 4435|  17.3k|                {
 4436|  17.3k|                    ps_svcd_ctxt->u4_num_sps_ctr++;
 4437|  17.3k|                    ps_dec->i4_header_decoded |= 0x1;
 4438|       |
 4439|  17.3k|                    if(ps_svcd_ctxt->pic_width < ps_svc_lyr_dec->pic_width)
  ------------------
  |  Branch (4439:24): [True: 7.27k, False: 10.0k]
  ------------------
 4440|  7.27k|                    {
 4441|  7.27k|                        ps_svcd_ctxt->pic_width = ps_svc_lyr_dec->pic_width;
 4442|  7.27k|                    }
 4443|  17.3k|                    if(ps_svcd_ctxt->pic_height < ps_svc_lyr_dec->pic_height)
  ------------------
  |  Branch (4443:24): [True: 7.32k, False: 9.98k]
  ------------------
 4444|  7.32k|                    {
 4445|  7.32k|                        ps_svcd_ctxt->pic_height = ps_svc_lyr_dec->pic_height;
 4446|  7.32k|                    }
 4447|  17.3k|                }
 4448|  24.0k|                if(i_status) return i_status;
  ------------------
  |  Branch (4448:20): [True: 6.71k, False: 17.3k]
  ------------------
 4449|       |
 4450|  17.3k|                break;
 4451|       |
 4452|  72.7k|            case PIC_PARAM_NAL:
  ------------------
  |  |  331|  72.7k|#define PIC_PARAM_NAL                   8
  ------------------
  |  Branch (4452:13): [True: 72.7k, False: 218k]
  ------------------
 4453|       |
 4454|  72.7k|                i_status = isvcd_parse_pps(ps_svc_lyr_dec, ps_bitstrm);
 4455|  72.7k|                if(!i_status)
  ------------------
  |  Branch (4455:20): [True: 69.8k, False: 2.91k]
  ------------------
 4456|  69.8k|                {
 4457|  69.8k|                    ps_dec->i4_header_decoded |= 0x2;
 4458|  69.8k|                    ps_svcd_ctxt->u4_num_pps_ctr++;
 4459|  69.8k|                }
 4460|  72.7k|                if(i_status) return i_status;
  ------------------
  |  Branch (4460:20): [True: 2.91k, False: 69.8k]
  ------------------
 4461|  69.8k|                break;
 4462|  69.8k|            case SEI_NAL:
  ------------------
  |  |  329|  8.73k|#define SEI_NAL                         6
  ------------------
  |  Branch (4462:13): [True: 8.73k, False: 282k]
  ------------------
 4463|  8.73k|            {
 4464|  8.73k|                i_status = ih264d_parse_sei_message(ps_dec, ps_bitstrm);
 4465|  8.73k|                if(i_status) return i_status;
  ------------------
  |  Branch (4465:20): [True: 6.48k, False: 2.24k]
  ------------------
 4466|  2.24k|                ih264d_parse_sei(ps_dec, ps_bitstrm);
 4467|  2.24k|            }
 4468|      0|            break;
 4469|  93.3k|            default:
  ------------------
  |  Branch (4469:13): [True: 93.3k, False: 197k]
  ------------------
 4470|       |                /* no other NON VCL UNIT is supported */
 4471|  93.3k|                break;
 4472|   291k|        }
 4473|       |
 4474|       |        /* get the next non vcl bufffer */
 4475|   264k|        ps_non_vcl_buf = ps_non_vcl_buf->ps_next;
 4476|       |
 4477|   264k|    } /* end of loop over all NAL units */
 4478|       |
 4479|  85.3k|    return (OK);
  ------------------
  |  |  114|  85.3k|#define OK        0
  ------------------
 4480|   114k|}
isvcd_seq_hdr_dec:
 4505|   110k|{
 4506|   110k|    WORD32 i4_status;
 4507|       |
 4508|       |    /* Decode all non VCL NAL till first VCL NAL is encountered */
 4509|   110k|    ps_svcd_ctxt->s_non_vcl_nal.i4_num_non_vcl_nals = 0;
 4510|   110k|    i4_status = isvcd_nal_parse_non_vcl_nal(
 4511|   110k|        ps_svcd_ctxt->pv_nal_parse_ctxt, ps_in_bufs->pv_stream_buffer, &ps_svcd_ctxt->s_non_vcl_nal,
 4512|   110k|        pu4_bytes_consumed, &ps_in_bufs->u4_num_Bytes);
 4513|       |
 4514|       |    /* Note: The bitstream extraction module expects updated  */
 4515|       |    /* pointer whenever a new call to this module has been    */
 4516|       |    /* made. Hence the buffer pointer has to be incremented   */
 4517|       |    /* by bytes consumed                                      */
 4518|   110k|    ps_in_bufs->u4_num_Bytes -= *pu4_bytes_consumed;
 4519|       |
 4520|       |    /* ------------------------------------------------------ */
 4521|       |    /* Decoding of non VCL data. As current implementation it */
 4522|       |    /* decodes the followings:                                */
 4523|       |    /*          1. Sequence parameter set                     */
 4524|       |    /*          2. Picture parameter set                      */
 4525|       |    /*          3. SEI message                                */
 4526|       |    /* ------------------------------------------------------ */
 4527|   110k|    i4_status = isvcd_dec_non_vcl(&ps_svcd_ctxt->s_non_vcl_nal, ps_svcd_ctxt->ps_sps, ps_svcd_ctxt->ps_pps,
 4528|   110k|                      ps_svcd_ctxt);
 4529|       |
 4530|   110k|    return (i4_status);
 4531|   110k|}
isvcd_pre_parse_refine_au:
 4560|   652k|{
 4561|   652k|    WORD32 i4_status = 0, i4_non_vcl_status;
 4562|   652k|    UWORD32 u4_bytes_consumed = 0;
 4563|   652k|    dec_struct_t *ps_dec;
 4564|   652k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec;
 4565|   652k|    ps_svc_lyr_dec = ps_svcd_ctxt->ps_svc_dec_lyr;
 4566|   652k|    ps_dec = &ps_svc_lyr_dec->s_dec;
 4567|       |
 4568|       |    /* Sequence header decode:                                 */
 4569|       |    /* If sequence header is not decoded then decode  the seq  */
 4570|       |    /* uence header                                            */
 4571|       |
 4572|   652k|    if(SVCD_FALSE == ps_dec->i4_header_decoded)
  ------------------
  |  |   45|   652k|#define SVCD_FALSE 0
  ------------------
  |  Branch (4572:8): [True: 110k, False: 541k]
  ------------------
 4573|   110k|    {
 4574|   110k|        i4_status = isvcd_seq_hdr_dec(ps_svcd_ctxt, ps_in_bufs, &u4_bytes_consumed);
 4575|       |
 4576|   110k|        if((VCL_NAL_FOUND_TRUE == i4_status) && (ps_svcd_ctxt->u4_num_sps_ctr != 0) &&
  ------------------
  |  Branch (4576:12): [True: 0, False: 110k]
  |  Branch (4576:49): [True: 0, False: 0]
  ------------------
 4577|      0|           (ps_svcd_ctxt->u4_num_pps_ctr != 0))
  ------------------
  |  Branch (4577:12): [True: 0, False: 0]
  ------------------
 4578|      0|        {
 4579|       |            /* set the header decoded flag */
 4580|      0|            ps_dec->i4_header_decoded = 3;
 4581|      0|        }
 4582|   110k|    }
 4583|   652k|    *pu4_bytes_consumed = u4_bytes_consumed;
 4584|   652k|    if (i4_status)
  ------------------
  |  Branch (4584:9): [True: 49.9k, False: 602k]
  ------------------
 4585|  49.9k|    {
 4586|  49.9k|        return NOT_OK;
  ------------------
  |  |  116|  49.9k|#define NOT_OK    -1
  ------------------
 4587|  49.9k|    }
 4588|   602k|    if(1 == ps_dec->i4_decode_header)
  ------------------
  |  Branch (4588:8): [True: 423k, False: 179k]
  ------------------
 4589|   423k|    {
 4590|   423k|        return OK;
  ------------------
  |  |  114|   423k|#define OK        0
  ------------------
 4591|   423k|    }
 4592|       |    /* Bit-stream Parsing. It performs following tasks:        */
 4593|       |    /*          1. NAL hader decoder                           */
 4594|       |    /*          2. Emulation prevention and byte swap          */
 4595|       |    /*             (During this process data to moved to output*/
 4596|       |    /*              buffer)                                    */
 4597|       |    /*          3. Dependency list creation based on NAL header*/
 4598|       |    /*          4. Detection of picture boundary               */
 4599|       |    /* NOTE1:                                                  */
 4600|       |    /*       Output buffers for VCL and non VCL data are       */
 4601|       |    /*       different. VCL data can be retrieved through      */
 4602|       |    /*       dependency list. Whereas non VCL data is stored in*/
 4603|       |    /*       one single buffer, which is accessed through NON  */
 4604|       |    /*       VCL structure                                     */
 4605|       |    /* NOTE2:Partial input case for nal parsing requires a     */
 4606|       |    /*       flush API to be called when end of bitstream      */
 4607|       |    /*       occurs                                            */
 4608|       |
 4609|   179k|    if(SVCD_FALSE == ps_svcd_ctxt->i4_eos_flag)
  ------------------
  |  |   45|   179k|#define SVCD_FALSE 0
  ------------------
  |  Branch (4609:8): [True: 179k, False: 0]
  ------------------
 4610|   179k|    {
 4611|   179k|        if(ps_dec->i4_header_decoded == 3)
  ------------------
  |  Branch (4611:12): [True: 161k, False: 17.8k]
  ------------------
 4612|   161k|        {
 4613|   161k|            i4_status = isvcd_nal_parse_vcl_nal_partial(
 4614|   161k|                ps_svcd_ctxt->pv_nal_parse_ctxt, ps_in_bufs->pv_stream_buffer,
 4615|   161k|                &ps_svcd_ctxt->s_non_vcl_nal, &ps_svcd_ctxt->s_vcl_nal, &u4_bytes_consumed,
 4616|   161k|                &ps_in_bufs->u4_num_Bytes);
 4617|   161k|        }
 4618|  17.8k|        else
 4619|  17.8k|        {
 4620|  17.8k|            return NOT_OK;
  ------------------
  |  |  116|  17.8k|#define NOT_OK    -1
  ------------------
 4621|  17.8k|        }
 4622|   179k|    }
 4623|      0|    else
 4624|      0|    {
 4625|      0|        void *pv_nal_parse_ctxt;
 4626|      0|        pv_nal_parse_ctxt = ps_svcd_ctxt->pv_nal_parse_ctxt;
 4627|       |
 4628|      0|        i4_status = isvcd_nal_parse_partial_signal_eos(pv_nal_parse_ctxt, &ps_svcd_ctxt->s_vcl_nal,
 4629|      0|                                                       &ps_svcd_ctxt->s_non_vcl_nal);
 4630|       |
 4631|      0|        u4_bytes_consumed = 0;
 4632|      0|    }
 4633|       |
 4634|   161k|    *pu4_bytes_consumed += u4_bytes_consumed;
 4635|       |
 4636|       |    /* Picture Boundary detected: Go ahead and do the decoding  */
 4637|       |    /* Picture boundary not detected: Otherwsie retrun from this*/
 4638|       |    /* function and update the bytes consumed variable. This    */
 4639|       |    /* should be repeated till we get a picture boundary        */
 4640|       |
 4641|   161k|    if(PIC_BOUNDARY_FALSE == i4_status)
  ------------------
  |  Branch (4641:8): [True: 4.80k, False: 156k]
  ------------------
 4642|  4.80k|    {
 4643|  4.80k|        return (NOT_OK);
  ------------------
  |  |  116|  4.80k|#define NOT_OK    -1
  ------------------
 4644|  4.80k|    }
 4645|       |
 4646|   156k|    else if(FLUSH_DECODED_PICTURE == i4_status)
  ------------------
  |  Branch (4646:13): [True: 14, False: 156k]
  ------------------
 4647|     14|    {
 4648|       |        /* No more data is expected to come. Pictures decoded   */
 4649|       |        /* so far needs to be sent for display                  */
 4650|     14|        return (FLUSH);
  ------------------
  |  |   50|     14|#define FLUSH 2
  ------------------
 4651|     14|    }
 4652|       |
 4653|   156k|    if(PIC_BOUNDARY_TRUE != i4_status)
  ------------------
  |  Branch (4653:8): [True: 0, False: 156k]
  ------------------
 4654|      0|    {
 4655|      0|        return (NOT_OK);
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 4656|      0|    }
 4657|       |
 4658|       |    /* check if the application has set any of the skip modes       */
 4659|       |    /* add the support for P and B skip modes                       */
 4660|       |    /* if(ps_dec_ctxt->s_dyn_prms.u1_frame_skip_mode)               */
 4661|       |
 4662|       |    /* Parse slice header to decode reference layer dQId and refine */
 4663|       |    /* the dependency list                                          */
 4664|       |    /* NOTE: Yes, this processing could be moved into NAL parsing   */
 4665|       |    /*       routine to avoid unneccessary emulation prevention and */
 4666|       |    /*       byte swapping over discardable data. This Optimization */
 4667|       |    /*       has been deferred for some time. In future if we found */
 4668|       |    /*       that there are many such streams which doesn't set     */
 4669|       |    /*       'discard_flag' correctly in NAL header, we will take a */
 4670|       |    /*       hit to optimize it.                                    */
 4671|       |
 4672|       |    /* At present this routine also performs the following          */
 4673|       |    /* 1. Refine DQID list based on reference layer DQID            */
 4674|       |    /* 2. Calculates the POC for the target layer                   */
 4675|       |
 4676|   156k|    {
 4677|   156k|        i4_status = isvcd_refine_dep_list(
 4678|   156k|            &ps_svcd_ctxt->s_vcl_nal, ps_svcd_ctxt->ps_sps, ps_svcd_ctxt->ps_subset_sps,
 4679|   156k|            ps_svcd_ctxt->ps_pps, &ps_svcd_ctxt->ai4_dq_id_map[0], &ps_svcd_ctxt->as_au_prms_dep[0],
 4680|   156k|            &ps_svcd_ctxt->as_pps_sps_prev_au[0], &ps_svcd_ctxt->i4_error_code, ps_svcd_ctxt);
 4681|   156k|    }
 4682|       |
 4683|   156k|    if(0 != ps_svcd_ctxt->s_non_vcl_nal.i4_num_non_vcl_nals)
  ------------------
  |  Branch (4683:8): [True: 34.4k, False: 122k]
  ------------------
 4684|  34.4k|    {
 4685|       |        /* Decoding of non VCL data. In current implementation it  */
 4686|       |        /* decodes the followings:                                 */
 4687|       |        /*          1. Sequence parameter set                      */
 4688|       |        /*          2. Picture parameter set                       */
 4689|       |        /*          3. SEI message                                 */
 4690|  34.4k|        i4_non_vcl_status = isvcd_dec_non_vcl(&ps_svcd_ctxt->s_non_vcl_nal, ps_svcd_ctxt->ps_sps,
 4691|  34.4k|                                              ps_svcd_ctxt->ps_pps, ps_svcd_ctxt);
 4692|       |
 4693|  34.4k|        if(OK != i4_non_vcl_status) return i4_non_vcl_status;
  ------------------
  |  |  114|  34.4k|#define OK        0
  ------------------
  |  Branch (4693:12): [True: 10.2k, False: 24.2k]
  ------------------
 4694|  34.4k|    }
 4695|   146k|    if(OK != i4_status) return (i4_status);
  ------------------
  |  |  114|   146k|#define OK        0
  ------------------
  |  Branch (4695:8): [True: 1.61k, False: 145k]
  ------------------
 4696|   145k|    return (OK);
  ------------------
  |  |  114|   145k|#define OK        0
  ------------------
 4697|   146k|}
isvcd_video_decode:
 4721|   723k|{
 4722|   723k|    dec_struct_t *ps_dec;
 4723|   723k|    dec_struct_t *ps_dec_zero_lyr;
 4724|   723k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec;
 4725|   723k|    svc_dec_lyr_struct_t *ps_svc_lyr_zero_dec;
 4726|       |
 4727|   723k|    svc_dec_ctxt_t *ps_svcd_ctxt;
 4728|   723k|    WORD32 i4_err_status = 0;
 4729|       |
 4730|   723k|    UWORD32 bytes_consumed = 0;
 4731|   723k|    WORD32 ret = 0, api_ret_value = IV_SUCCESS;
 4732|   723k|    isvcd_video_decode_ip_t *ps_h264d_dec_ip;
 4733|   723k|    isvcd_video_decode_op_t *ps_h264d_dec_op;
 4734|   723k|    ivd_video_decode_ip_t *ps_dec_ip;
 4735|   723k|    ivd_video_decode_op_t *ps_dec_op;
 4736|   723k|    UWORD8 u1_res_id;
 4737|       |
 4738|   723k|    ithread_set_name((void *) "Parse_thread");
 4739|       |
 4740|   723k|    ps_svcd_ctxt = (svc_dec_ctxt_t *) (dec_hdl->pv_codec_handle);
 4741|   723k|    ps_svc_lyr_dec = &ps_svcd_ctxt->ps_svc_dec_lyr[0];
 4742|   723k|    ps_dec = &ps_svc_lyr_dec->s_dec;
 4743|       |
 4744|   723k|    ps_h264d_dec_ip = (isvcd_video_decode_ip_t *) pv_api_ip;
 4745|   723k|    ps_h264d_dec_op = (isvcd_video_decode_op_t *) pv_api_op;
 4746|   723k|    ps_dec_ip = &ps_h264d_dec_ip->s_ivd_video_decode_ip_t;
 4747|   723k|    ps_dec_op = &ps_h264d_dec_op->s_ivd_video_decode_op_t;
 4748|       |
 4749|   723k|    {
 4750|   723k|        UWORD32 u4_size;
 4751|   723k|        u4_size = ps_dec_op->u4_size;
 4752|   723k|        memset(ps_h264d_dec_op, 0, sizeof(isvcd_video_decode_op_t));
 4753|   723k|        ps_dec_op->u4_size = u4_size;
 4754|   723k|    }
 4755|       |
 4756|   723k|    ps_dec->pv_dec_out = ps_dec_op;
 4757|   723k|    if(ps_dec->init_done != 1)
  ------------------
  |  Branch (4757:8): [True: 0, False: 723k]
  ------------------
 4758|      0|    {
 4759|      0|        return IV_FAIL;
 4760|      0|    }
 4761|       |
 4762|       |    /*Data memory barries instruction,so that bitstream write by the application
 4763|       |     * is complete*/
 4764|   723k|    DATA_SYNC();
  ------------------
  |  |  116|   723k|#define DATA_SYNC()  __sync_synchronize()
  ------------------
 4765|       |
 4766|   723k|    if(0 == ps_dec->u1_flushfrm)
  ------------------
  |  Branch (4766:8): [True: 723k, False: 0]
  ------------------
 4767|   723k|    {
 4768|   723k|        if(ps_dec_ip->pv_stream_buffer == NULL)
  ------------------
  |  Branch (4768:12): [True: 0, False: 723k]
  ------------------
 4769|      0|        {
 4770|      0|            ps_dec_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
 4771|      0|            ps_dec_op->u4_error_code |= IVD_DEC_FRM_BS_BUF_NULL;
 4772|      0|            return IV_FAIL;
 4773|      0|        }
 4774|   723k|        if(ps_dec_ip->u4_num_Bytes <= 16)
  ------------------
  |  Branch (4774:12): [True: 70.6k, False: 652k]
  ------------------
 4775|  70.6k|        {
 4776|  70.6k|            ps_dec_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
 4777|  70.6k|            ps_dec_op->u4_error_code |= IVD_DEC_NUMBYTES_INV;
 4778|  70.6k|            return IV_FAIL;
 4779|  70.6k|        }
 4780|   723k|    }
 4781|       |#ifdef KEEP_THREADS_ACTIVE
 4782|       |    {
 4783|       |        UWORD32 i;
 4784|       |        ps_dec->i4_break_threads = 0;
 4785|       |        for(i = 0; i < 2; i++)
 4786|       |        {
 4787|       |            ret = ithread_mutex_lock(ps_dec->apv_proc_start_mutex[i]);
 4788|       |            RETURN_IF((ret != IV_SUCCESS), ret);
 4789|       |
 4790|       |            ps_dec->ai4_process_start[i] = PROC_INIT;
 4791|       |
 4792|       |            ret = ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[i]);
 4793|       |            RETURN_IF((ret != IV_SUCCESS), ret);
 4794|       |        }
 4795|       |    }
 4796|       |#else
 4797|   652k|    ps_dec->u4_dec_thread_created = 0;
 4798|   652k|    ps_dec->u4_bs_deblk_thread_created = 0;
 4799|   652k|#endif
 4800|   652k|    ps_dec_op->u4_num_bytes_consumed = 0;
 4801|   652k|    ps_dec_op->i4_reorder_depth = -1;
 4802|   652k|    ps_dec_op->i4_display_index = DEFAULT_POC;
  ------------------
  |  |   45|   652k|#define DEFAULT_POC 0x7FFFFFFF
  ------------------
 4803|       |
 4804|   652k|    ps_dec->ps_out_buffer = NULL;
 4805|   652k|    if(ps_dec_ip->u4_size >= offsetof(ivd_video_decode_ip_t, s_out_buffer))
  ------------------
  |  Branch (4805:8): [True: 652k, False: 0]
  ------------------
 4806|   652k|        ps_dec->ps_out_buffer = &ps_dec_ip->s_out_buffer;
 4807|       |
 4808|   652k|    if(0 == ps_dec->u4_share_disp_buf && ps_dec->i4_decode_header == 0)
  ------------------
  |  Branch (4808:8): [True: 652k, False: 0]
  |  Branch (4808:42): [True: 190k, False: 462k]
  ------------------
 4809|   190k|    {
 4810|   190k|        UWORD32 i;
 4811|   190k|        if((ps_dec->ps_out_buffer->u4_num_bufs == 0) ||
  ------------------
  |  Branch (4811:12): [True: 0, False: 190k]
  ------------------
 4812|   190k|           (ps_dec->ps_out_buffer->u4_num_bufs > IVD_VIDDEC_MAX_IO_BUFFERS))
  ------------------
  |  |   45|   190k|#define IVD_VIDDEC_MAX_IO_BUFFERS 64
  ------------------
  |  Branch (4812:12): [True: 0, False: 190k]
  ------------------
 4813|      0|        {
 4814|      0|            ps_dec_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
 4815|      0|            ps_dec_op->u4_error_code |= IVD_DISP_FRM_ZERO_OP_BUFS;
 4816|      0|            return IV_FAIL;
 4817|      0|        }
 4818|       |
 4819|   667k|        for(i = 0; i < ps_dec->ps_out_buffer->u4_num_bufs; i++)
  ------------------
  |  Branch (4819:20): [True: 476k, False: 190k]
  ------------------
 4820|   476k|        {
 4821|   476k|            if(ps_dec->ps_out_buffer->pu1_bufs[i] == NULL)
  ------------------
  |  Branch (4821:16): [True: 0, False: 476k]
  ------------------
 4822|      0|            {
 4823|      0|                ps_dec_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
 4824|      0|                ps_dec_op->u4_error_code |= IVD_DISP_FRM_OP_BUF_NULL;
 4825|      0|                return IV_FAIL;
 4826|      0|            }
 4827|       |
 4828|   476k|            if(ps_dec->ps_out_buffer->u4_min_out_buf_size[i] == 0)
  ------------------
  |  Branch (4828:16): [True: 0, False: 476k]
  ------------------
 4829|      0|            {
 4830|      0|                ps_dec_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
 4831|      0|                ps_dec_op->u4_error_code |= IVD_DISP_FRM_ZERO_OP_BUF_SIZE;
 4832|      0|                return IV_FAIL;
 4833|      0|            }
 4834|   476k|        }
 4835|   190k|    }
 4836|       |
 4837|   652k|    if(ps_dec->u4_total_frames_decoded >= NUM_FRAMES_LIMIT)
  ------------------
  |  |  157|   652k|#define NUM_FRAMES_LIMIT 0x7FFFFFFF
  ------------------
  |  Branch (4837:8): [True: 0, False: 652k]
  ------------------
 4838|      0|    {
 4839|      0|        ps_dec_op->u4_error_code = ERROR_FRAME_LIMIT_OVER;
 4840|      0|        return IV_FAIL;
 4841|      0|    }
 4842|       |
 4843|   652k|    ps_dec_op->u4_error_code = 0;
 4844|   652k|    ps_dec_op->e_pic_type = IV_NA_FRAME;
 4845|   652k|    ps_dec_op->u4_output_present = 0;
 4846|   652k|    ps_dec_op->u4_frame_decoded_flag = 0;
 4847|       |
 4848|       |    /* In case the decoder is not in flush mode(in shared mode),
 4849|       |     then decoder has to pick up a buffer to write current frame.
 4850|       |     Check if a frame is available in such cases */
 4851|   652k|    if(ps_dec->u1_init_dec_flag == 1 && ps_dec->u4_share_disp_buf == 1 && ps_dec->u1_flushfrm == 0)
  ------------------
  |  Branch (4851:8): [True: 115k, False: 537k]
  |  Branch (4851:41): [True: 0, False: 115k]
  |  Branch (4851:75): [True: 0, False: 0]
  ------------------
 4852|      0|    {
 4853|      0|        UWORD32 i;
 4854|      0|        WORD32 disp_avail = 0, free_id;
 4855|       |
 4856|       |        /* Check if at least one buffer is available with the codec */
 4857|       |        /* If not then return to application with error */
 4858|      0|        for(i = 0; i < ps_dec->u1_pic_bufs; i++)
  ------------------
  |  Branch (4858:20): [True: 0, False: 0]
  ------------------
 4859|      0|        {
 4860|      0|            if(0 == ps_dec->u4_disp_buf_mapping[i] || 1 == ps_dec->u4_disp_buf_to_be_freed[i])
  ------------------
  |  Branch (4860:16): [True: 0, False: 0]
  |  Branch (4860:55): [True: 0, False: 0]
  ------------------
 4861|      0|            {
 4862|      0|                disp_avail = 1;
 4863|      0|                break;
 4864|      0|            }
 4865|      0|        }
 4866|       |
 4867|      0|        if(0 == disp_avail)
  ------------------
  |  Branch (4867:12): [True: 0, False: 0]
  ------------------
 4868|      0|        {
 4869|       |            /* If something is queued for display wait for that buffer to be returned
 4870|       |             */
 4871|       |
 4872|      0|            ps_dec_op->u4_error_code = IVD_DEC_REF_BUF_NULL;
 4873|      0|            ps_dec_op->u4_error_code |= (1 << IVD_UNSUPPORTEDPARAM);
 4874|      0|            return (IV_FAIL);
 4875|      0|        }
 4876|       |
 4877|      0|        while(1)
  ------------------
  |  Branch (4877:15): [True: 0, Folded]
  ------------------
 4878|      0|        {
 4879|      0|            pic_buffer_t *ps_pic_buf;
 4880|      0|            ps_pic_buf = (pic_buffer_t *) ih264_buf_mgr_get_next_free(
 4881|      0|                (buf_mgr_t *) ps_dec->pv_pic_buf_mgr, &free_id);
 4882|       |
 4883|      0|            if(ps_pic_buf == NULL)
  ------------------
  |  Branch (4883:16): [True: 0, False: 0]
  ------------------
 4884|      0|            {
 4885|      0|                UWORD32 display_queued = 0;
 4886|       |
 4887|       |                /* check if any buffer was given for display which is not returned yet */
 4888|      0|                for(i = 0; i < (MAX_DISP_BUFS_NEW); i++)
  ------------------
  |  |   76|      0|#define MAX_DISP_BUFS_NEW 64
  ------------------
  |  Branch (4888:28): [True: 0, False: 0]
  ------------------
 4889|      0|                {
 4890|      0|                    if(0 != ps_dec->u4_disp_buf_mapping[i])
  ------------------
  |  Branch (4890:24): [True: 0, False: 0]
  ------------------
 4891|      0|                    {
 4892|      0|                        display_queued = 1;
 4893|      0|                        break;
 4894|      0|                    }
 4895|      0|                }
 4896|       |                /* If some buffer is queued for display, then codec has to singal an
 4897|       |                 error and wait for that buffer to be returned. If nothing is queued for
 4898|       |                 display then codec has ownership of all display buffers and it can
 4899|       |                 reuse any of the existing buffers and continue decoding */
 4900|       |
 4901|      0|                if(1 == display_queued)
  ------------------
  |  Branch (4901:20): [True: 0, False: 0]
  ------------------
 4902|      0|                {
 4903|       |                    /* If something is queued for display wait for that buffer to be
 4904|       |                     * returned */
 4905|      0|                    ps_dec_op->u4_error_code = IVD_DEC_REF_BUF_NULL;
 4906|      0|                    ps_dec_op->u4_error_code |= (1 << IVD_UNSUPPORTEDPARAM);
 4907|      0|                    return (IV_FAIL);
 4908|      0|                }
 4909|      0|            }
 4910|      0|            else
 4911|      0|            {
 4912|       |                /* If the buffer is with display, then mark it as in use and then look
 4913|       |                 * for a buffer again */
 4914|      0|                if(1 == ps_dec->u4_disp_buf_mapping[free_id])
  ------------------
  |  Branch (4914:20): [True: 0, False: 0]
  ------------------
 4915|      0|                {
 4916|      0|                    ih264_buf_mgr_set_status((buf_mgr_t *) ps_dec->pv_pic_buf_mgr, free_id,
 4917|      0|                                             BUF_MGR_IO);
  ------------------
  |  |   53|      0|#define BUF_MGR_IO           (1 << 3)
  ------------------
 4918|      0|                }
 4919|      0|                else
 4920|      0|                {
 4921|       |                    /**
 4922|       |                     *  Found a free buffer for present call. Release it now.
 4923|       |                     *  Will be again obtained later.
 4924|       |                     */
 4925|      0|                    ih264_buf_mgr_release((buf_mgr_t *) ps_dec->pv_pic_buf_mgr, free_id,
 4926|      0|                                          BUF_MGR_IO);
  ------------------
  |  |   53|      0|#define BUF_MGR_IO           (1 << 3)
  ------------------
 4927|      0|                    break;
 4928|      0|                }
 4929|      0|            }
 4930|      0|        }
 4931|      0|    }
 4932|       |
 4933|   652k|    if(ps_dec->u1_enable_mb_info && (ps_dec->i4_header_decoded & DECODED_SPS_MASK))
  ------------------
  |  |  125|      0|#define DECODED_SPS_MASK 1
  ------------------
  |  Branch (4933:8): [True: 0, False: 652k]
  |  Branch (4933:37): [True: 0, False: 0]
  ------------------
 4934|      0|    {
 4935|      0|        UWORD32 blk_qp_map_size = ps_h264d_dec_ip->u4_8x8_blk_qp_map_size;
 4936|      0|        UWORD32 blk_type_map_size = ps_h264d_dec_ip->u4_8x8_blk_type_map_size;
 4937|      0|        UWORD32 blk_8x8_map_size = ps_dec->u4_total_mbs << 2;
 4938|      0|        if((ps_h264d_dec_ip->pu1_8x8_blk_qp_map && blk_qp_map_size < blk_8x8_map_size) ||
  ------------------
  |  Branch (4938:13): [True: 0, False: 0]
  |  Branch (4938:52): [True: 0, False: 0]
  ------------------
 4939|      0|           (ps_h264d_dec_ip->pu1_8x8_blk_type_map && blk_type_map_size < blk_8x8_map_size))
  ------------------
  |  Branch (4939:13): [True: 0, False: 0]
  |  Branch (4939:54): [True: 0, False: 0]
  ------------------
 4940|      0|        {
 4941|      0|            ps_dec_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
 4942|      0|            ps_dec_op->u4_error_code |= IH264D_INSUFFICIENT_METADATA_BUFFER;
 4943|      0|            return IV_FAIL;
 4944|      0|        }
 4945|      0|    }
 4946|       |
 4947|   652k|    if(ps_dec->u1_flushfrm && (1 == ps_svcd_ctxt->u1_pre_parse_in_flush))
  ------------------
  |  Branch (4947:8): [True: 0, False: 652k]
  |  Branch (4947:31): [True: 0, False: 0]
  ------------------
 4948|      0|    {
 4949|      0|        if(ps_dec->u1_init_dec_flag == 0)
  ------------------
  |  Branch (4949:12): [True: 0, False: 0]
  ------------------
 4950|      0|        {
 4951|      0|            ps_dec->u1_flushfrm = 0;
 4952|      0|            return (IV_FAIL);
 4953|      0|        }
 4954|       |
 4955|      0|        ps_svc_lyr_dec = &ps_svcd_ctxt->ps_svc_dec_lyr[ps_svcd_ctxt->s_vcl_nal.i4_num_res_lyrs - 1];
 4956|      0|        ps_dec = &ps_svc_lyr_dec->s_dec;
 4957|      0|        ps_dec->u4_fmt_conv_cur_row = 0;
 4958|      0|        ps_dec->u4_output_present = 0;
 4959|      0|        ps_dec->s_disp_op.u4_error_code = 1;
 4960|       |
 4961|      0|        ps_dec->ps_out_buffer = NULL;
 4962|      0|        if(ps_dec_ip->u4_size >= offsetof(ivd_video_decode_ip_t, s_out_buffer))
  ------------------
  |  Branch (4962:12): [True: 0, False: 0]
  ------------------
 4963|      0|        {
 4964|      0|            ps_dec->ps_out_buffer = &ps_dec_ip->s_out_buffer;
 4965|      0|        }
 4966|      0|        ih264d_get_next_display_field(ps_dec, ps_dec->ps_out_buffer, &(ps_dec->s_disp_op));
 4967|      0|        if(0 == ps_dec->s_disp_op.u4_error_code)
  ------------------
  |  Branch (4967:12): [True: 0, False: 0]
  ------------------
 4968|      0|        {
 4969|       |            /* check output buffer size given by the application */
 4970|      0|            if(check_app_out_buf_size(ps_dec) != IV_SUCCESS)
  ------------------
  |  Branch (4970:16): [True: 0, False: 0]
  ------------------
 4971|      0|            {
 4972|      0|                ps_dec_op->u4_error_code = IVD_DISP_FRM_ZERO_OP_BUF_SIZE;
 4973|      0|                return (IV_FAIL);
 4974|      0|            }
 4975|       |
 4976|      0|            ps_dec->u4_fmt_conv_cur_row = 0;
 4977|      0|            ps_dec->u4_fmt_conv_num_rows = ps_dec->s_disp_frame_info.u4_y_ht;
 4978|      0|            ih264d_format_convert(ps_dec, &(ps_dec->s_disp_op), ps_dec->u4_fmt_conv_cur_row,
 4979|      0|                                  ps_dec->u4_fmt_conv_num_rows);
 4980|      0|            ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows;
 4981|      0|            ps_dec->u4_output_present = 1;
 4982|      0|            if(ps_dec->u1_enable_mb_info)
  ------------------
  |  Branch (4982:16): [True: 0, False: 0]
  ------------------
 4983|      0|            {
 4984|      0|                UWORD32 disp_buf_id = ps_dec->s_disp_op.u4_disp_buf_id;
 4985|      0|                if(ps_h264d_dec_ip->pu1_8x8_blk_qp_map)
  ------------------
  |  Branch (4985:20): [True: 0, False: 0]
  ------------------
 4986|      0|                {
 4987|      0|                    ps_h264d_dec_op->pu1_8x8_blk_qp_map = ps_h264d_dec_ip->pu1_8x8_blk_qp_map;
 4988|      0|                    ps_h264d_dec_op->u4_8x8_blk_qp_map_size = ps_dec->u4_total_mbs << 2;
 4989|      0|                    ih264_memcpy(ps_h264d_dec_op->pu1_8x8_blk_qp_map,
 4990|      0|                                 ps_dec->as_buf_id_info_map[disp_buf_id].pu1_qp_map,
 4991|      0|                                 ps_dec->u4_total_mbs << 2);
 4992|      0|                }
 4993|      0|                if(ps_h264d_dec_ip->pu1_8x8_blk_type_map)
  ------------------
  |  Branch (4993:20): [True: 0, False: 0]
  ------------------
 4994|      0|                {
 4995|      0|                    ps_h264d_dec_op->pu1_8x8_blk_type_map = ps_h264d_dec_ip->pu1_8x8_blk_type_map;
 4996|      0|                    ps_h264d_dec_op->u4_8x8_blk_type_map_size = ps_dec->u4_total_mbs << 2;
 4997|      0|                    ih264_memcpy(ps_h264d_dec_op->pu1_8x8_blk_type_map,
 4998|      0|                                 ps_dec->as_buf_id_info_map[disp_buf_id].pu1_mb_type_map,
 4999|      0|                                 ps_dec->u4_total_mbs << 2);
 5000|      0|                }
 5001|      0|            }
 5002|      0|        }
 5003|      0|        ih264d_export_sei_params(&ps_dec_op->s_sei_decode_op, ps_dec);
 5004|       |
 5005|      0|        ih264d_release_display_field(ps_dec, &(ps_dec->s_disp_op));
 5006|       |
 5007|      0|        ps_dec_op->u4_pic_wd = (UWORD32) ps_dec->u2_disp_width;
 5008|      0|        ps_dec_op->u4_pic_ht = (UWORD32) ps_dec->u2_disp_height;
 5009|      0|        ps_dec_op->i4_reorder_depth = ps_dec->i4_reorder_depth;
 5010|      0|        ps_dec_op->i4_display_index = ps_dec->i4_display_index;
 5011|      0|        ps_dec_op->u4_new_seq = 0;
 5012|       |
 5013|      0|        ps_dec_op->u4_output_present = ps_dec->u4_output_present;
 5014|      0|        ps_dec_op->u4_progressive_frame_flag = ps_dec->s_disp_op.u4_progressive_frame_flag;
 5015|      0|        ps_dec_op->e_output_format = ps_dec->s_disp_op.e_output_format;
 5016|      0|        ps_dec_op->s_disp_frm_buf = ps_dec->s_disp_op.s_disp_frm_buf;
 5017|      0|        ps_dec_op->e4_fld_type = ps_dec->s_disp_op.e4_fld_type;
 5018|      0|        ps_dec_op->u4_ts = ps_dec->s_disp_op.u4_ts;
 5019|      0|        ps_dec_op->u4_disp_buf_id = ps_dec->s_disp_op.u4_disp_buf_id;
 5020|       |
 5021|       |        /*In the case of flush ,since no frame is decoded set pic type as invalid*/
 5022|      0|        ps_dec_op->u4_is_ref_flag = UINT32_MAX;
 5023|      0|        ps_dec_op->e_pic_type = IV_NA_FRAME;
 5024|      0|        ps_dec_op->u4_frame_decoded_flag = 0;
 5025|       |
 5026|      0|        if(0 == ps_dec->s_disp_op.u4_error_code)
  ------------------
  |  Branch (5026:12): [True: 0, False: 0]
  ------------------
 5027|      0|        {
 5028|      0|            return (IV_SUCCESS);
 5029|      0|        }
 5030|      0|        else
 5031|      0|            return (IV_FAIL);
 5032|      0|    }
 5033|       |
 5034|   652k|    if(ps_dec->u1_res_changed == 1)
  ------------------
  |  Branch (5034:8): [True: 3.28k, False: 649k]
  ------------------
 5035|  3.28k|    {
 5036|       |        /*if resolution has changed and all buffers have been flushed, reset
 5037|       |         * decoder*/
 5038|  3.28k|        if(((buf_mgr_t *) ps_dec->pv_pic_buf_mgr)->pv_mutex != NULL)
  ------------------
  |  Branch (5038:12): [True: 143, False: 3.14k]
  ------------------
 5039|    143|            ih264_buf_mgr_free(ps_dec->pv_pic_buf_mgr);
 5040|  3.28k|        if(((buf_mgr_t *) ps_dec->pv_mv_buf_mgr)->pv_mutex != NULL)
  ------------------
  |  Branch (5040:12): [True: 143, False: 3.14k]
  ------------------
 5041|    143|            ih264_buf_mgr_free(ps_dec->pv_mv_buf_mgr);
 5042|       |
 5043|  3.28k|        isvcd_init_decoder(ps_svc_lyr_dec);
 5044|  3.28k|    }
 5045|       |
 5046|   652k|    DEBUG_THREADS_PRINTF(" Starting process call\n");
 5047|       |
 5048|   652k|    {
 5049|   652k|        vcl_node_t *ps_cur_node;
 5050|   652k|        UWORD8 u1_num_res_lyrs;
 5051|   652k|        vcl_buf_hdr_t *ps_vcl_buf;
 5052|   652k|        UWORD8 flush_decode = 1;
 5053|   652k|        ps_svcd_ctxt->u1_pre_parse_in_flush = 0;
 5054|       |
 5055|   652k|        ret = isvcd_pre_parse_refine_au(ps_svcd_ctxt, ps_dec_ip, &ps_dec_op->u4_num_bytes_consumed);
 5056|   652k|        ps_svcd_ctxt->u1_pre_parse_in_flush = (ret == FLUSH);
  ------------------
  |  |   50|   652k|#define FLUSH 2
  ------------------
 5057|       |
 5058|   652k|        if(ret != OK)
  ------------------
  |  |  114|   652k|#define OK        0
  ------------------
  |  Branch (5058:12): [True: 84.4k, False: 568k]
  ------------------
 5059|  84.4k|        {
 5060|  84.4k|            UWORD32 error = ih264d_map_error((UWORD32) ret);
 5061|  84.4k|            if(ret != NOT_OK)
  ------------------
  |  |  116|  84.4k|#define NOT_OK    -1
  ------------------
  |  Branch (5061:16): [True: 9.39k, False: 75.0k]
  ------------------
 5062|  9.39k|            {
 5063|  9.39k|                ps_dec_op->u4_error_code = error | ret;
 5064|  9.39k|            }
 5065|  84.4k|            if((ps_dec_op->u4_error_code >> IVD_FATALERROR) & 1)
  ------------------
  |  Branch (5065:16): [True: 1.07k, False: 83.3k]
  ------------------
 5066|  1.07k|            {
 5067|  1.07k|                ps_svcd_ctxt->u1_exit_till_next_IDR = 1;
 5068|  1.07k|            }
 5069|  84.4k|            api_ret_value = IV_FAIL;
 5070|  84.4k|            if((ret == IVD_RES_CHANGED) || (ret == IVD_MEM_ALLOC_FAILED) ||
  ------------------
  |  Branch (5070:16): [True: 6.26k, False: 78.1k]
  |  Branch (5070:44): [True: 0, False: 78.1k]
  ------------------
 5071|  78.1k|               (ret == ERROR_UNAVAIL_PICBUF_T) || (ret == ERROR_UNAVAIL_MVBUF_T) ||
  ------------------
  |  Branch (5071:16): [True: 0, False: 78.1k]
  |  Branch (5071:51): [True: 0, False: 78.1k]
  ------------------
 5072|  78.1k|               (ret == ERROR_INV_SPS_PPS_T) || (ret == ERROR_FEATURE_UNAVAIL) ||
  ------------------
  |  Branch (5072:16): [True: 970, False: 77.2k]
  |  Branch (5072:48): [True: 566, False: 76.6k]
  ------------------
 5073|  76.6k|               (ret == IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED) ||
  ------------------
  |  Branch (5073:16): [True: 513, False: 76.1k]
  ------------------
 5074|  76.1k|               (ret == IVD_DISP_FRM_ZERO_OP_BUF_SIZE))
  ------------------
  |  Branch (5074:16): [True: 0, False: 76.1k]
  ------------------
 5075|  8.31k|            {
 5076|  8.31k|                ps_dec->u4_slice_start_code_found = 0;
 5077|  8.31k|            }
 5078|  84.4k|            if((ret == ERROR_INCOMPLETE_FRAME) || (ret == ERROR_DANGLING_FIELD_IN_PIC))
  ------------------
  |  Branch (5078:16): [True: 0, False: 84.4k]
  |  Branch (5078:51): [True: 0, False: 84.4k]
  ------------------
 5079|      0|            {
 5080|      0|                api_ret_value = IV_FAIL;
 5081|      0|            }
 5082|       |
 5083|  84.4k|            if(ret == ERROR_IN_LAST_SLICE_OF_PIC)
  ------------------
  |  Branch (5083:16): [True: 0, False: 84.4k]
  ------------------
 5084|      0|            {
 5085|      0|                api_ret_value = IV_FAIL;
 5086|      0|            }
 5087|  84.4k|        }
 5088|       |
 5089|   652k|        if(NOT_OK == ret)
  ------------------
  |  |  116|   652k|#define NOT_OK    -1
  ------------------
  |  Branch (5089:12): [True: 75.0k, False: 577k]
  ------------------
 5090|  75.0k|        {
 5091|  75.0k|            if(ps_dec->u4_pic_buf_got == 0)
  ------------------
  |  Branch (5091:16): [True: 68.6k, False: 6.37k]
  ------------------
 5092|  68.6k|            {
 5093|  68.6k|                ps_dec->i4_error_code = ERROR_START_CODE_NOT_FOUND;
 5094|  68.6k|                ps_dec_op->u4_error_code |= 1 << IVD_INSUFFICIENTDATA;
 5095|       |
 5096|  68.6k|                isvcd_fill_output_struct_from_context(ps_svc_lyr_dec, ps_dec_op);
 5097|       |
 5098|  68.6k|                ps_dec_op->u4_error_code = ps_dec->i4_error_code;
 5099|  68.6k|                ps_dec_op->u4_frame_decoded_flag = 0;
 5100|  68.6k|                return (IV_FAIL);
 5101|  68.6k|            }
 5102|  6.37k|            return (IV_SUCCESS);
 5103|  75.0k|        }
 5104|       |
 5105|   577k|        u1_num_res_lyrs = ps_svcd_ctxt->s_vcl_nal.i4_num_res_lyrs;
 5106|       |
 5107|       |        /* error concelment: exit till next IDR if any of Non Target layers are
 5108|       |         * corrupted */
 5109|   577k|        {
 5110|   577k|            ps_cur_node = ps_svcd_ctxt->s_vcl_nal.ps_bot_node;
 5111|       |
 5112|   577k|            if(NULL != ps_cur_node)
  ------------------
  |  Branch (5112:16): [True: 154k, False: 423k]
  ------------------
 5113|   154k|            {
 5114|   154k|                if(!ps_cur_node->i4_idr_pic_flag)
  ------------------
  |  Branch (5114:20): [True: 32.1k, False: 121k]
  ------------------
 5115|  32.1k|                {
 5116|  32.1k|                    if(u1_num_res_lyrs != ps_svcd_ctxt->u1_prev_num_res_layers)
  ------------------
  |  Branch (5116:24): [True: 4.74k, False: 27.3k]
  ------------------
 5117|  4.74k|                    {
 5118|  4.74k|                        ps_svcd_ctxt->u1_exit_till_next_IDR = 1;
 5119|  4.74k|                        ps_dec_op->u4_error_code = ERROR_UNKNOWN_NAL;
 5120|  4.74k|                        return IV_FAIL;
 5121|  4.74k|                    }
 5122|  32.1k|                }
 5123|   121k|                else
 5124|   121k|                {
 5125|   121k|                    if(u1_num_res_lyrs != ps_svcd_ctxt->u1_prev_num_res_layers)
  ------------------
  |  Branch (5125:24): [True: 44.3k, False: 77.6k]
  ------------------
 5126|  44.3k|                    {
 5127|  44.3k|                        ps_svcd_ctxt->u1_prev_num_res_layers = u1_num_res_lyrs;
 5128|  44.3k|                    }
 5129|   121k|                }
 5130|   154k|            }
 5131|   577k|        }
 5132|   572k|        if(ps_svcd_ctxt->u1_prev_num_res_layers != u1_num_res_lyrs && (u1_num_res_lyrs != 0))
  ------------------
  |  Branch (5132:12): [True: 423k, False: 149k]
  |  Branch (5132:71): [True: 71, False: 423k]
  ------------------
 5133|     71|        {
 5134|     71|            ps_svc_lyr_dec = ps_svcd_ctxt->ps_svc_dec_lyr + u1_num_res_lyrs - 1;
 5135|     71|            ps_dec = &ps_svc_lyr_dec->s_dec;
 5136|       |
 5137|     71|            if(ps_dec->u1_init_dec_flag == 1)
  ------------------
  |  Branch (5137:16): [True: 3, False: 68]
  ------------------
 5138|      3|            {
 5139|      3|                ih264d_release_pics_in_dpb((void *) ps_dec, ps_dec->u1_pic_bufs);
 5140|      3|                ih264d_release_display_bufs(ps_dec);
 5141|      3|                ih264_disp_mgr_init((disp_mgr_t *) ps_dec->pv_disp_buf_mgr);
 5142|       |
 5143|      3|                ih264_buf_mgr_reset(ps_dec->pv_pic_buf_mgr);
 5144|      3|                ih264_buf_mgr_reset(ps_dec->pv_mv_buf_mgr);
 5145|      3|                ih264d_init_ref_bufs(ps_dec->ps_dpb_mgr);
 5146|      3|            }
 5147|       |
 5148|       |            // ps_svcd_ctxt->u1_prev_num_res_layers = u1_num_res_lyrs;
 5149|     71|        }
 5150|   572k|        ps_svcd_ctxt->u1_parse_nal_unit_error = 0;
 5151|       |
 5152|   572k|        if((1 == ps_svcd_ctxt->u1_exit_till_next_IDR) &&
  ------------------
  |  Branch (5152:12): [True: 53.7k, False: 519k]
  ------------------
 5153|  53.7k|           (ps_svcd_ctxt->s_vcl_nal.ps_bot_node != NULL))
  ------------------
  |  Branch (5153:12): [True: 53.4k, False: 287]
  ------------------
 5154|  53.4k|        {
 5155|  53.4k|            if(1 == ps_svcd_ctxt->s_vcl_nal.ps_bot_node->i4_idr_pic_flag)
  ------------------
  |  Branch (5155:16): [True: 48.3k, False: 5.11k]
  ------------------
 5156|  48.3k|            {
 5157|  48.3k|                ps_svcd_ctxt->u1_exit_till_next_IDR = 0;
 5158|       |
 5159|   126k|                for(u1_res_id = 0; u1_res_id < u1_num_res_lyrs; u1_res_id++)
  ------------------
  |  Branch (5159:36): [True: 77.8k, False: 48.3k]
  ------------------
 5160|  77.8k|                {
 5161|  77.8k|                    ps_svc_lyr_dec = ps_svcd_ctxt->ps_svc_dec_lyr + u1_res_id;
 5162|  77.8k|                    ps_dec = &ps_svc_lyr_dec->s_dec;
 5163|  77.8k|                    ih264_buf_mgr_reset(ps_dec->pv_pic_buf_mgr);
 5164|  77.8k|                    ih264_buf_mgr_reset(ps_dec->pv_mv_buf_mgr);
 5165|  77.8k|                }
 5166|  48.3k|            }
 5167|  5.11k|            else
 5168|  5.11k|            {
 5169|  5.11k|                ps_dec_op->u4_error_code = ERROR_UNKNOWN_NAL;
 5170|  5.11k|                return IV_FAIL;
 5171|  5.11k|            }
 5172|  53.4k|        }
 5173|       |
 5174|   567k|        if((0 == ps_dec->i4_decode_header) && (OK == ret))
  ------------------
  |  |  114|   144k|#define OK        0
  ------------------
  |  Branch (5174:12): [True: 144k, False: 423k]
  |  Branch (5174:47): [True: 135k, False: 8.80k]
  ------------------
 5175|   135k|        {
 5176|   135k|            flush_decode = 0;
 5177|   135k|            ps_cur_node = ps_svcd_ctxt->s_vcl_nal.ps_bot_node;
 5178|   135k|            ps_svc_lyr_zero_dec = ps_svcd_ctxt->ps_svc_dec_lyr;
 5179|   135k|            ps_dec_zero_lyr = &ps_svc_lyr_zero_dec->s_dec;
 5180|       |            /* master loop */
 5181|       |
 5182|   262k|            for(u1_res_id = 0; u1_res_id < u1_num_res_lyrs; u1_res_id++)
  ------------------
  |  Branch (5182:32): [True: 174k, False: 87.7k]
  ------------------
 5183|   174k|            {
 5184|   174k|                UWORD8 u1_layer_nal_data_present = 0;
 5185|   174k|                ps_svcd_ctxt->u1_cur_layer_id = u1_res_id;
 5186|   174k|                ps_svc_lyr_dec = ps_svcd_ctxt->ps_svc_dec_lyr + u1_res_id;
 5187|   174k|                ps_svc_lyr_dec->u1_res_init_done = 0;
 5188|   174k|                ps_dec = &ps_svc_lyr_dec->s_dec;
 5189|       |
 5190|   174k|                ps_dec->i4_decode_header = ps_dec_zero_lyr->i4_decode_header;
 5191|   174k|                ps_dec->i4_header_decoded = ps_dec_zero_lyr->i4_header_decoded;
 5192|   174k|                ps_dec->u1_pic_decode_done = 0;
 5193|   174k|                ps_dec->u4_fmt_conv_cur_row = 0;
 5194|       |
 5195|   174k|                ps_dec->u4_output_present = 0;
 5196|   174k|                ps_dec->s_disp_op.u4_error_code = 1;
 5197|   174k|                ps_dec->u4_fmt_conv_num_rows = FMT_CONV_NUM_ROWS;
  ------------------
  |  |   46|   174k|#define FMT_CONV_NUM_ROWS       16
  ------------------
 5198|   174k|                ps_dec->u4_ts = ps_dec_ip->u4_ts;
 5199|   174k|                ps_dec->i4_frametype = IV_NA_FRAME;
 5200|   174k|                ps_dec->i4_content_type = IV_CONTENTTYPE_NA;
 5201|       |
 5202|   174k|                ps_dec->u4_slice_start_code_found = 0;
 5203|   174k|                ps_dec->u4_cur_mb_addr = 0;
 5204|   174k|                ps_dec->u4_total_mbs_coded = 0;
 5205|   174k|                ps_dec->u2_cur_slice_num = 0;
 5206|   174k|                ps_dec->cur_dec_mb_num = 0;
 5207|   174k|                ps_dec->cur_recon_mb_num = 0;
 5208|   174k|                ps_dec->u4_first_slice_in_pic = 1;
 5209|   174k|                ps_dec->u1_slice_header_done = 0;
 5210|   174k|                ps_dec->u1_dangling_field = 0;
 5211|       |
 5212|   174k|                ps_dec->u4_dec_thread_created = 0;
 5213|   174k|                ps_dec->u4_bs_deblk_thread_created = 0;
 5214|   174k|                ps_dec->u4_cur_bs_mb_num = 0;
 5215|   174k|                ps_dec->u4_cur_deblk_mb_num = 0;
 5216|   174k|                ps_dec->u4_start_recon_deblk = 0;
 5217|   174k|                ps_dec->u4_sps_cnt_in_process = 0;
 5218|   174k|                ps_dec->u4_pic_buf_got = 0;
 5219|   174k|                ps_dec->pv_dec_out = ps_dec_op;
 5220|       |
 5221|   174k|                if(ps_dec_ip->u4_size >= offsetof(ivd_video_decode_ip_t, s_out_buffer))
  ------------------
  |  Branch (5221:20): [True: 174k, False: 0]
  ------------------
 5222|   174k|                    ps_dec->ps_out_buffer = &ps_dec_ip->s_out_buffer;
 5223|       |
 5224|   174k|                ps_dec->u1_nal_unit_type = ps_cur_node->i4_nal_unit_type;
 5225|   174k|                ps_dec->u1_separate_parse = 0;
 5226|   174k|                if(u1_res_id == (u1_num_res_lyrs - 1))
  ------------------
  |  Branch (5226:20): [True: 125k, False: 48.8k]
  ------------------
 5227|   125k|                {
 5228|   125k|                    ps_svc_lyr_dec->u1_layer_identifier = TARGET_LAYER;
  ------------------
  |  |  110|   125k|#define TARGET_LAYER 2
  ------------------
 5229|   125k|                    if(ps_dec->u4_num_cores >= 2)
  ------------------
  |  Branch (5229:24): [True: 64.7k, False: 60.9k]
  ------------------
 5230|  64.7k|                    {
 5231|  64.7k|                        ps_dec->u4_num_cores = 2;
 5232|  64.7k|                        ps_dec->u1_separate_parse = 1;
 5233|  64.7k|                    }
 5234|   125k|                }
 5235|  48.8k|                else if(u1_res_id == 0)
  ------------------
  |  Branch (5235:25): [True: 48.8k, False: 0]
  ------------------
 5236|  48.8k|                {
 5237|  48.8k|                    ps_svc_lyr_dec->u1_layer_identifier = BASE_LAYER;
  ------------------
  |  |  108|  48.8k|#define BASE_LAYER 0
  ------------------
 5238|  48.8k|                    ps_dec->u1_separate_parse = 0;
 5239|  48.8k|                    ps_dec->u4_num_cores = 1;
 5240|  48.8k|                }
 5241|      0|                else if(u1_res_id != 0)
  ------------------
  |  Branch (5241:25): [True: 0, False: 0]
  ------------------
 5242|      0|                {
 5243|      0|                    ps_svc_lyr_dec->u1_layer_identifier = MEDIAL_ENHANCEMENT_LAYER;
  ------------------
  |  |  109|      0|#define MEDIAL_ENHANCEMENT_LAYER 1
  ------------------
 5244|      0|                    ps_dec->u1_separate_parse = 0;
 5245|      0|                    ps_dec->u4_num_cores = 1;
 5246|      0|                }
 5247|      0|                else
 5248|      0|                {
 5249|      0|                    return IV_FAIL;
 5250|      0|                }
 5251|       |
 5252|   174k|                ps_svc_lyr_dec->u1_base_res_flag = (0 == u1_res_id);
 5253|   174k|                ps_svc_lyr_dec->ps_nal_svc_ext->u1_idr_flag = ps_cur_node->i4_idr_pic_flag;
 5254|   174k|                ps_svc_lyr_dec->ps_nal_svc_ext->u1_dependency_id = ps_cur_node->i4_dependency_id;
 5255|   174k|                ps_svc_lyr_dec->ps_nal_svc_ext->u1_priority_id = ps_cur_node->i4_priority_id;
 5256|   174k|                ps_svc_lyr_dec->ps_nal_svc_ext->u1_no_inter_layer_pred_flag =
 5257|   174k|                    ps_cur_node->u1_acc_no_int_pred;
 5258|       |
 5259|   174k|                ps_svc_lyr_dec->ps_nal_svc_ext->u1_quality_id = ps_cur_node->i4_quality_id;
 5260|   174k|                ps_svc_lyr_dec->ps_nal_svc_ext->u1_temporal_id = ps_cur_node->i4_temporal_id;
 5261|       |
 5262|   174k|                ps_svc_lyr_dec->ps_nal_svc_ext->u1_use_ref_base_pic_flag =
 5263|   174k|                    ps_cur_node->i4_use_ref_base;
 5264|   174k|                ps_svc_lyr_dec->ps_nal_svc_ext->u1_discardable_flag = 0;
 5265|   174k|                ps_svc_lyr_dec->ps_nal_svc_ext->u1_svc_ext_flag = (u1_res_id > 1);
 5266|   174k|                ps_svc_lyr_dec->u4_pps_id_for_layer = UINT32_MAX;
 5267|   174k|                ps_vcl_buf = ps_cur_node->ps_first_vcl_nal;
 5268|   174k|                ps_svc_lyr_dec->u1_error_in_cur_frame = 0;
 5269|       |
 5270|       |                /* Only for Non target Layers*/
 5271|   174k|                if(NULL != ps_cur_node->ps_top_node)
  ------------------
  |  Branch (5271:20): [True: 62.2k, False: 112k]
  ------------------
 5272|  62.2k|                {
 5273|  62.2k|                    ps_svc_lyr_dec->u1_inter_lyr_disable_dblk_filter_idc =
 5274|  62.2k|                        ps_cur_node->ps_top_node->i4_inter_lyr_dblk_idc;
 5275|  62.2k|                    ps_svc_lyr_dec->i1_inter_lyr_slice_alpha_c0_offset =
 5276|  62.2k|                        ps_cur_node->ps_top_node->i4_inter_lyr_alpha_c0_offset;
 5277|  62.2k|                    ps_svc_lyr_dec->i1_inter_lyr_slice_beta_offset =
 5278|  62.2k|                        ps_cur_node->ps_top_node->i4_inter_lyr_beta_offset;
 5279|  62.2k|                }
 5280|       |
 5281|   240k|                while(NULL != ps_vcl_buf)
  ------------------
  |  Branch (5281:23): [True: 172k, False: 67.1k]
  ------------------
 5282|   172k|                {
 5283|   172k|                    u1_layer_nal_data_present = 1;
 5284|   172k|                    ps_dec->ps_bitstrm->u4_ofst = 0;
 5285|   172k|                    ps_dec->ps_bitstrm->pu4_buffer =
 5286|   172k|                        (UWORD32 *) ((UWORD8 *) ps_vcl_buf + ps_vcl_buf->i4_buf_offset +
 5287|   172k|                                     ps_vcl_buf->i4_slice_offset);
 5288|       |
 5289|   172k|                    ps_dec->ps_bitstrm->u4_max_ofst = ps_vcl_buf->u4_max_bits;
 5290|       |
 5291|   172k|                    ps_dec_op->u4_frame_decoded_flag = 0;
 5292|   172k|                    ret = isvcd_parse_nal_unit(ps_svc_lyr_dec, ps_cur_node->i4_nal_ref_idc);
 5293|   172k|                    if(ret != OK)
  ------------------
  |  |  114|   172k|#define OK        0
  ------------------
  |  Branch (5293:24): [True: 107k, False: 65.6k]
  ------------------
 5294|   107k|                    {
 5295|   107k|                        ps_svcd_ctxt->u1_parse_nal_unit_error = 1;
 5296|   107k|                        break;
 5297|   107k|                    }
 5298|       |
 5299|       |                    /* go to the next slice */
 5300|  65.6k|                    ps_vcl_buf = ps_vcl_buf->ps_next;
 5301|  65.6k|                }
 5302|       |                /* error concelment: exit till next IDR if a Layer data is missing */
 5303|   174k|                if(0 == u1_layer_nal_data_present)
  ------------------
  |  Branch (5303:20): [True: 6.76k, False: 167k]
  ------------------
 5304|  6.76k|                {
 5305|  6.76k|                    ps_svcd_ctxt->u1_exit_till_next_IDR = 1;
 5306|  6.76k|                    ps_dec_op->u4_error_code = ERROR_UNKNOWN_NAL;
 5307|  6.76k|                    return IV_FAIL;
 5308|  6.76k|                }
 5309|       |                /* error concelment: exit till next IDR if any of Non Target layers are
 5310|       |                 * corrupted */
 5311|   167k|                if((ret != OK) && (u1_res_id != (u1_num_res_lyrs - 1)))
  ------------------
  |  |  114|   167k|#define OK        0
  ------------------
  |  Branch (5311:20): [True: 107k, False: 60.3k]
  |  Branch (5311:35): [True: 9.25k, False: 98.0k]
  ------------------
 5312|  9.25k|                {
 5313|  9.25k|                    ps_svcd_ctxt->u1_exit_till_next_IDR = 1;
 5314|  9.25k|                    ps_dec_op->u4_error_code = ERROR_UNKNOWN_NAL;
 5315|  9.25k|                    return IV_FAIL;
 5316|  9.25k|                }
 5317|       |
 5318|   158k|                if((ret != OK) && (u1_res_id == (u1_num_res_lyrs - 1)))
  ------------------
  |  |  114|   158k|#define OK        0
  ------------------
  |  Branch (5318:20): [True: 98.0k, False: 60.3k]
  |  Branch (5318:35): [True: 98.0k, False: 0]
  ------------------
 5319|  98.0k|                {
 5320|  98.0k|                    ps_svc_lyr_dec = ps_svcd_ctxt->ps_svc_dec_lyr + u1_num_res_lyrs - 1;
 5321|  98.0k|                    ps_dec = &ps_svc_lyr_dec->s_dec;
 5322|       |
 5323|  98.0k|                    if((0 == ps_svcd_ctxt->u4_num_sps_ctr) || (0 == ps_svcd_ctxt->u4_num_pps_ctr) ||
  ------------------
  |  Branch (5323:24): [True: 0, False: 98.0k]
  |  Branch (5323:63): [True: 0, False: 98.0k]
  ------------------
 5324|  98.0k|                       (NULL == ps_dec->ps_cur_pps) || (ps_svc_lyr_dec->u1_res_init_done == 0))
  ------------------
  |  Branch (5324:24): [True: 13.9k, False: 84.1k]
  |  Branch (5324:56): [True: 17.4k, False: 66.7k]
  ------------------
 5325|  31.3k|                    {
 5326|  31.3k|                        ps_svcd_ctxt->u1_exit_till_next_IDR = 1;
 5327|  31.3k|                        ps_dec_op->u4_error_code = ERROR_UNKNOWN_NAL;
 5328|  31.3k|                        ih264d_signal_decode_thread(ps_dec);
 5329|  31.3k|                        return IV_FAIL;
 5330|  31.3k|                    }
 5331|  98.0k|                }
 5332|   127k|                ps_cur_node = ps_cur_node->ps_top_node;
 5333|       |
 5334|   127k|                if((ps_dec->u4_pic_buf_got == 1) && (ret != IVD_MEM_ALLOC_FAILED) &&
  ------------------
  |  Branch (5334:20): [True: 127k, False: 0]
  |  Branch (5334:53): [True: 127k, False: 0]
  ------------------
 5335|   127k|                   ps_dec->u4_total_mbs_coded < ps_dec->u2_frm_ht_in_mbs * ps_dec->u2_frm_wd_in_mbs)
  ------------------
  |  Branch (5335:20): [True: 94.3k, False: 32.7k]
  ------------------
 5336|  94.3k|                {
 5337|       |                    // last slice - missing/corruption
 5338|  94.3k|                    WORD32 num_mb_skipped;
 5339|  94.3k|                    WORD32 prev_slice_err;
 5340|  94.3k|                    pocstruct_t temp_poc;
 5341|  94.3k|                    WORD32 ret1;
 5342|  94.3k|                    WORD32 ht_in_mbs;
 5343|  94.3k|                    ht_in_mbs = ps_dec->u2_pic_ht >> (4 + ps_dec->ps_cur_slice->u1_field_pic_flag);
 5344|  94.3k|                    num_mb_skipped =
 5345|  94.3k|                        (ht_in_mbs * ps_dec->u2_frm_wd_in_mbs) - ps_dec->u4_total_mbs_coded;
 5346|       |
 5347|  94.3k|                    if(ps_dec->u4_first_slice_in_pic && (ps_dec->u4_pic_buf_got == 0))
  ------------------
  |  Branch (5347:24): [True: 0, False: 94.3k]
  |  Branch (5347:57): [True: 0, False: 0]
  ------------------
 5348|      0|                        prev_slice_err = 1;
 5349|  94.3k|                    else
 5350|  94.3k|                        prev_slice_err = 2;
 5351|       |
 5352|  94.3k|                    if(ps_dec->u4_total_mbs_coded == 0)
  ------------------
  |  Branch (5352:24): [True: 49.5k, False: 44.7k]
  ------------------
 5353|  49.5k|                    {
 5354|  49.5k|                        prev_slice_err = 1;
 5355|  49.5k|                    }
 5356|  94.3k|                    ret1 = isvcd_mark_err_slice_skip(
 5357|  94.3k|                        ps_svc_lyr_dec, num_mb_skipped, ps_dec->u1_nal_unit_type == IDR_SLICE_NAL,
  ------------------
  |  |  328|  94.3k|#define IDR_SLICE_NAL                   5
  ------------------
 5358|  94.3k|                        ps_dec->ps_cur_slice->u2_frame_num, &temp_poc, prev_slice_err);
 5359|       |
 5360|  94.3k|                    if((ret1 == ERROR_UNAVAIL_PICBUF_T) || (ret1 == ERROR_UNAVAIL_MVBUF_T) ||
  ------------------
  |  Branch (5360:24): [True: 0, False: 94.3k]
  |  Branch (5360:60): [True: 0, False: 94.3k]
  ------------------
 5361|  94.3k|                       (ret1 == ERROR_INV_SPS_PPS_T) || (ret1 == ERROR_CORRUPTED_SLICE) ||
  ------------------
  |  Branch (5361:24): [True: 0, False: 94.3k]
  |  Branch (5361:57): [True: 0, False: 94.3k]
  ------------------
 5362|  94.3k|                       (ret == NOT_OK))
  ------------------
  |  |  116|  94.3k|#define NOT_OK    -1
  ------------------
  |  Branch (5362:24): [True: 1.69k, False: 92.6k]
  ------------------
 5363|  1.69k|                    {
 5364|  1.69k|                        ret = ret1;
 5365|  1.69k|                    }
 5366|  94.3k|                }
 5367|       |
 5368|   127k|                if((ret == IVD_RES_CHANGED) || (ret == IVD_MEM_ALLOC_FAILED) ||
  ------------------
  |  Branch (5368:20): [True: 0, False: 127k]
  |  Branch (5368:48): [True: 0, False: 127k]
  ------------------
 5369|   127k|                   (ret == ERROR_UNAVAIL_PICBUF_T) || (ret == ERROR_UNAVAIL_MVBUF_T) ||
  ------------------
  |  Branch (5369:20): [True: 0, False: 127k]
  |  Branch (5369:55): [True: 0, False: 127k]
  ------------------
 5370|   127k|                   (ret == ERROR_INV_SPS_PPS_T) || (ret == ERROR_CORRUPTED_SLICE) ||
  ------------------
  |  Branch (5370:20): [True: 0, False: 127k]
  |  Branch (5370:52): [True: 615, False: 126k]
  ------------------
 5371|   126k|                   (ret == IVD_DISP_FRM_ZERO_OP_BUF_SIZE) || (ret == NOT_OK))
  ------------------
  |  |  116|   126k|#define NOT_OK    -1
  ------------------
  |  Branch (5371:20): [True: 0, False: 126k]
  |  Branch (5371:62): [True: 84, False: 126k]
  ------------------
 5372|    699|                {
 5373|    699|                    ps_svcd_ctxt->u1_exit_till_next_IDR = 1;
 5374|       |                    /* signal the decode thread */
 5375|    699|                    ih264d_signal_decode_thread(ps_dec);
 5376|       |                    /* dont consume bitstream for change in resolution case */
 5377|    699|                    if(ret == IVD_RES_CHANGED)
  ------------------
  |  Branch (5377:24): [True: 0, False: 699]
  ------------------
 5378|      0|                    {
 5379|      0|                        ps_dec_op->u4_num_bytes_consumed -= bytes_consumed;
 5380|      0|                    }
 5381|    699|                    return IV_FAIL;
 5382|    699|                }
 5383|       |
 5384|       |                /* Multi thread - for target Layer decoding*/
 5385|   126k|                if((ps_dec->u1_separate_parse) &&
  ------------------
  |  Branch (5385:20): [True: 50.5k, False: 75.8k]
  ------------------
 5386|  50.5k|                   (ps_svc_lyr_dec->u1_layer_identifier == TARGET_LAYER) &&
  ------------------
  |  |  110|  50.5k|#define TARGET_LAYER 2
  ------------------
  |  Branch (5386:20): [True: 50.5k, False: 0]
  ------------------
 5387|  50.5k|                   (0 == ps_svc_lyr_dec->u1_error_in_cur_frame))
  ------------------
  |  Branch (5387:20): [True: 6.88k, False: 43.7k]
  ------------------
 5388|  6.88k|                {
 5389|       |                    /* If Format conversion is not complete,
 5390|       |                     complete it here */
 5391|  6.88k|                    if(ps_dec->u4_num_cores == 2)
  ------------------
  |  Branch (5391:24): [True: 6.88k, False: 0]
  ------------------
 5392|  6.88k|                    {
 5393|       |                        /*do deblocking of all mbs*/
 5394|  6.88k|                        if((ps_dec->u4_nmb_deblk == 0) && (ps_dec->u4_start_recon_deblk == 1) &&
  ------------------
  |  Branch (5394:28): [True: 6.88k, False: 0]
  |  Branch (5394:59): [True: 6.88k, False: 0]
  ------------------
 5395|  6.88k|                           (ps_dec->ps_cur_sps->u1_mb_aff_flag == 0))
  ------------------
  |  Branch (5395:28): [True: 6.88k, False: 0]
  ------------------
 5396|  6.88k|                        {
 5397|  6.88k|                            UWORD8 u1_end_of_row = 0;
 5398|  6.88k|                            UWORD32 u4_max_addr;
 5399|  6.88k|                            tfr_ctxt_t s_tfr_ctxt = {0};
 5400|  6.88k|                            tfr_ctxt_t *ps_tfr_cxt = &s_tfr_ctxt;
 5401|  6.88k|                            pad_mgr_t *ps_pad_mgr = &ps_dec->s_pad_mgr;
 5402|  6.88k|                            UWORD32 u4_slice_end = 0;
 5403|       |
 5404|       |                            /*BS is done for all mbs while parsing*/
 5405|  6.88k|                            u4_max_addr = (ps_dec->u2_frm_wd_in_mbs * ps_dec->u2_frm_ht_in_mbs) - 1;
 5406|       |                            /* BS is moved post recon gen in SVC ext*/
 5407|       |
 5408|  6.88k|                            ih264d_init_deblk_tfr_ctxt(ps_dec, ps_pad_mgr, ps_tfr_cxt,
 5409|  6.88k|                                                       ps_dec->u2_frm_wd_in_mbs, 0);
 5410|       |
 5411|  6.88k|                            {
 5412|   200M|                                while(u4_slice_end != 1)
  ------------------
  |  Branch (5412:39): [True: 200M, False: 6.88k]
  ------------------
 5413|   200M|                                {
 5414|   200M|                                    dec_mb_info_t *p_cur_mb;
 5415|   200M|                                    WORD32 i, bs_mb_grp;
 5416|   200M|                                    bs_mb_grp = ps_dec->cur_dec_mb_num - ps_dec->u4_cur_bs_mb_num;
 5417|       |
 5418|   202M|                                    for(i = 0; i < bs_mb_grp; i++)
  ------------------
  |  Branch (5418:48): [True: 1.07M, False: 200M]
  ------------------
 5419|  1.07M|                                    {
 5420|  1.07M|                                        p_cur_mb =
 5421|  1.07M|                                            &ps_dec->ps_frm_mb_info[ps_dec->u4_cur_bs_mb_num];
 5422|       |
 5423|  1.07M|                                        DEBUG_THREADS_PRINTF("ps_dec->u4_cur_bs_mb_num = %d\n",
 5424|  1.07M|                                                             ps_dec->u4_cur_bs_mb_num);
 5425|  1.07M|                                        isvcd_compute_bs_non_mbaff_thread(ps_svc_lyr_dec, p_cur_mb,
 5426|  1.07M|                                                                          ps_dec->u4_cur_bs_mb_num);
 5427|       |
 5428|  1.07M|                                        ps_dec->u4_cur_bs_mb_num++;
 5429|  1.07M|                                        ps_dec->u4_bs_cur_slice_num_mbs++;
 5430|  1.07M|                                    }
 5431|   200M|                                    if(ps_dec->u4_cur_bs_mb_num > u4_max_addr)
  ------------------
  |  Branch (5431:40): [True: 6.88k, False: 200M]
  ------------------
 5432|  6.88k|                                    {
 5433|  6.88k|                                        u4_slice_end = 1;
 5434|  6.88k|                                        u1_end_of_row = 1;
 5435|  6.88k|                                    }
 5436|       |                                    /*deblock MB group*/
 5437|   200M|                                    {
 5438|   200M|                                        UWORD32 u4_num_mbs;
 5439|       |
 5440|   200M|                                        if(ps_dec->u4_cur_bs_mb_num > ps_dec->u4_cur_deblk_mb_num)
  ------------------
  |  Branch (5440:44): [True: 183M, False: 17.5M]
  ------------------
 5441|   183M|                                        {
 5442|   183M|                                            if(u1_end_of_row)
  ------------------
  |  Branch (5442:48): [True: 6.88k, False: 183M]
  ------------------
 5443|  6.88k|                                            {
 5444|  6.88k|                                                u4_num_mbs = ps_dec->u4_cur_bs_mb_num -
 5445|  6.88k|                                                             ps_dec->u4_cur_deblk_mb_num;
 5446|  6.88k|                                            }
 5447|   183M|                                            else
 5448|   183M|                                            {
 5449|   183M|                                                u4_num_mbs = ps_dec->u4_cur_bs_mb_num -
 5450|   183M|                                                             ps_dec->u4_cur_deblk_mb_num - 1;
 5451|   183M|                                            }
 5452|   183M|                                        }
 5453|  17.5M|                                        else
 5454|  17.5M|                                            u4_num_mbs = 0;
 5455|       |
 5456|   200M|                                        ih264d_check_mb_map_deblk(ps_dec, u4_num_mbs, ps_tfr_cxt,
 5457|   200M|                                                                  0);
 5458|   200M|                                    }
 5459|   200M|                                }
 5460|  6.88k|                            }
 5461|  6.88k|                        }
 5462|  6.88k|                    }
 5463|       |
 5464|       |                    /*signal the decode thread*/
 5465|  6.88k|                    ih264d_signal_decode_thread(ps_dec);
 5466|  6.88k|                }
 5467|   119k|                else if((ps_dec->u1_separate_parse) &&
  ------------------
  |  Branch (5467:25): [True: 43.7k, False: 75.8k]
  ------------------
 5468|  43.7k|                        (ps_svc_lyr_dec->u1_layer_identifier == TARGET_LAYER))
  ------------------
  |  |  110|  43.7k|#define TARGET_LAYER 2
  ------------------
  |  Branch (5468:25): [True: 43.7k, False: 0]
  ------------------
 5469|  43.7k|                {
 5470|       |                    /*signal the decode thread*/
 5471|  43.7k|                    ih264d_signal_decode_thread(ps_dec);
 5472|  43.7k|                }
 5473|       |
 5474|   126k|                DATA_SYNC();
  ------------------
  |  |  116|   126k|#define DATA_SYNC()  __sync_synchronize()
  ------------------
 5475|       |
 5476|   126k|                if((ps_dec_op->u4_error_code & 0xff) != ERROR_DYNAMIC_RESOLUTION_NOT_SUPPORTED)
  ------------------
  |  Branch (5476:20): [True: 126k, False: 0]
  ------------------
 5477|   126k|                {
 5478|   126k|                    ps_dec_op->u4_pic_wd = (UWORD32) ps_dec->u2_disp_width;
 5479|   126k|                    ps_dec_op->u4_pic_ht = (UWORD32) ps_dec->u2_disp_height;
 5480|   126k|                    ps_dec_op->i4_reorder_depth = ps_dec->i4_reorder_depth;
 5481|   126k|                }
 5482|       |
 5483|       |                // Report if header (sps and pps) has not been decoded yet
 5484|   126k|                if(ps_dec->i4_decode_header == 1 && ps_dec->i4_header_decoded != 3)
  ------------------
  |  Branch (5484:20): [True: 0, False: 126k]
  |  Branch (5484:53): [True: 0, False: 0]
  ------------------
 5485|      0|                {
 5486|      0|                    ps_dec_op->u4_error_code |= (1 << IVD_INSUFFICIENTDATA);
 5487|      0|                    api_ret_value = IV_FAIL;
 5488|      0|                }
 5489|       |
 5490|   126k|                if((ps_dec->u4_pic_buf_got == 1) && (ERROR_DANGLING_FIELD_IN_PIC != i4_err_status))
  ------------------
  |  Branch (5490:20): [True: 126k, False: 0]
  |  Branch (5490:53): [True: 126k, False: 0]
  ------------------
 5491|   126k|                {
 5492|       |                    /* For field pictures, set bottom and top picture decoded u4_flag correctly */
 5493|       |
 5494|   126k|                    if(ps_dec->ps_cur_slice->u1_field_pic_flag)
  ------------------
  |  Branch (5494:24): [True: 0, False: 126k]
  ------------------
 5495|      0|                    {
 5496|      0|                        if(1 == ps_dec->ps_cur_slice->u1_bottom_field_flag)
  ------------------
  |  Branch (5496:28): [True: 0, False: 0]
  ------------------
 5497|      0|                        {
 5498|      0|                            ps_dec->u1_top_bottom_decoded |= BOT_FIELD_ONLY;
  ------------------
  |  |   66|      0|#define BOT_FIELD_ONLY      0x01
  ------------------
 5499|      0|                        }
 5500|      0|                        else
 5501|      0|                        {
 5502|      0|                            ps_dec->u1_top_bottom_decoded |= TOP_FIELD_ONLY;
  ------------------
  |  |   65|      0|#define TOP_FIELD_ONLY      0x02
  ------------------
 5503|      0|                        }
 5504|      0|                    }
 5505|   126k|                    else
 5506|   126k|                    {
 5507|   126k|                        ps_dec->u1_top_bottom_decoded = TOP_FIELD_ONLY | BOT_FIELD_ONLY;
  ------------------
  |  |   65|   126k|#define TOP_FIELD_ONLY      0x02
  ------------------
                                      ps_dec->u1_top_bottom_decoded = TOP_FIELD_ONLY | BOT_FIELD_ONLY;
  ------------------
  |  |   66|   126k|#define BOT_FIELD_ONLY      0x01
  ------------------
 5508|   126k|                    }
 5509|       |
 5510|       |                    /* if new frame in not found (if we are still getting slices from
 5511|       |                     * previous frame) ih264d_deblock_display is not called. Such frames
 5512|       |                     * will not be added to reference /display
 5513|       |                     */
 5514|   126k|                    if((ps_dec->ps_dec_err_status->u1_err_flag & REJECT_CUR_PIC) == 0)
  ------------------
  |  |  602|   126k|#define REJECT_CUR_PIC    (0x01)
  ------------------
  |  Branch (5514:24): [True: 126k, False: 0]
  ------------------
 5515|   126k|                    {
 5516|       |                        /* Calling Function to deblock Picture and Display */
 5517|   126k|                        ret = ih264d_deblock_display(ps_dec);
 5518|   126k|                    }
 5519|       |
 5520|       |                    /*set to complete ,as we dont support partial frame decode*/
 5521|   126k|                    if(ps_dec->i4_header_decoded == 3)
  ------------------
  |  Branch (5521:24): [True: 126k, False: 0]
  ------------------
 5522|   126k|                    {
 5523|   126k|                        ps_dec->u4_total_mbs_coded = ps_dec->ps_cur_sps->u4_max_mb_addr + 1;
 5524|   126k|                    }
 5525|       |
 5526|       |                    /*Update the i4_frametype at the end of picture*/
 5527|   126k|                    if(ps_dec->ps_cur_slice->u1_nal_unit_type == IDR_SLICE_NAL)
  ------------------
  |  |  328|   126k|#define IDR_SLICE_NAL                   5
  ------------------
  |  Branch (5527:24): [True: 105k, False: 21.3k]
  ------------------
 5528|   105k|                    {
 5529|   105k|                        ps_dec->i4_frametype = IV_IDR_FRAME;
 5530|   105k|                    }
 5531|  21.3k|                    else if(ps_dec->i4_pic_type == B_SLICE)
  ------------------
  |  |  369|  21.3k|#define B_SLICE  1
  ------------------
  |  Branch (5531:29): [True: 15.3k, False: 6.00k]
  ------------------
 5532|  15.3k|                    {
 5533|  15.3k|                        ps_dec->i4_frametype = IV_B_FRAME;
 5534|  15.3k|                    }
 5535|  6.00k|                    else if(ps_dec->i4_pic_type == P_SLICE)
  ------------------
  |  |  368|  6.00k|#define P_SLICE  0
  ------------------
  |  Branch (5535:29): [True: 3.51k, False: 2.49k]
  ------------------
 5536|  3.51k|                    {
 5537|  3.51k|                        ps_dec->i4_frametype = IV_P_FRAME;
 5538|  3.51k|                    }
 5539|  2.49k|                    else if(ps_dec->i4_pic_type == I_SLICE)
  ------------------
  |  |  370|  2.49k|#define I_SLICE  2
  ------------------
  |  Branch (5539:29): [True: 2.26k, False: 232]
  ------------------
 5540|  2.26k|                    {
 5541|  2.26k|                        ps_dec->i4_frametype = IV_I_FRAME;
 5542|  2.26k|                    }
 5543|    232|                    else
 5544|    232|                    {
 5545|    232|                        H264_DEC_DEBUG_PRINT("Shouldn't come here\n");
  ------------------
  |  |   39|    232|#define H264_DEC_DEBUG_PRINT(...) {}
  ------------------
 5546|    232|                    }
 5547|       |
 5548|       |                    // Update the content type
 5549|   126k|                    ps_dec->i4_content_type = ps_dec->ps_cur_slice->u1_field_pic_flag;
 5550|       |
 5551|   126k|                    ps_dec->u4_total_frames_decoded = ps_dec->u4_total_frames_decoded + 2;
 5552|   126k|                    ps_dec->u4_total_frames_decoded =
 5553|   126k|                        ps_dec->u4_total_frames_decoded - ps_dec->ps_cur_slice->u1_field_pic_flag;
 5554|   126k|                }
 5555|       |
 5556|       |                /* In case the decoder is configured to run in low delay mode,
 5557|       |                 * then get display buffer and then format convert.
 5558|       |                 * Note in this mode, format conversion does not run paralelly in a
 5559|       |                 * thread and adds to the codec cycles
 5560|       |                 */
 5561|   126k|                if((IVD_DECODE_FRAME_OUT == ps_dec->e_frm_out_mode) && ps_dec->u1_init_dec_flag)
  ------------------
  |  Branch (5561:20): [True: 0, False: 126k]
  |  Branch (5561:72): [True: 0, False: 0]
  ------------------
 5562|      0|                {
 5563|      0|                    ih264d_get_next_display_field(ps_dec, ps_dec->ps_out_buffer,
 5564|      0|                                                  &(ps_dec->s_disp_op));
 5565|       |
 5566|      0|                    if(0 == ps_dec->s_disp_op.u4_error_code)
  ------------------
  |  Branch (5566:24): [True: 0, False: 0]
  ------------------
 5567|      0|                    {
 5568|      0|                        ps_dec->u4_fmt_conv_cur_row = 0;
 5569|      0|                        ps_dec->u4_output_present = 1;
 5570|      0|                    }
 5571|      0|                    else
 5572|      0|                    {
 5573|      0|                        ps_dec->u4_output_present = 0;
 5574|      0|                    }
 5575|      0|                }
 5576|       |
 5577|   126k|                isvcd_fill_output_struct_from_context(ps_svc_lyr_dec, ps_dec_op);
 5578|       |
 5579|       |                /* If Format conversion is not complete,
 5580|       |                 complete it here */
 5581|       |                /* For Non -target Layers , Buffers are retrived but not displayed*/
 5582|       |
 5583|   126k|                if((ps_svc_lyr_dec->u1_layer_identifier == TARGET_LAYER) &&
  ------------------
  |  |  110|   126k|#define TARGET_LAYER 2
  ------------------
  |  Branch (5583:20): [True: 87.7k, False: 38.6k]
  ------------------
 5584|  87.7k|                   ps_dec->u4_output_present &&
  ------------------
  |  Branch (5584:20): [True: 23.9k, False: 63.7k]
  ------------------
 5585|  23.9k|                   (ps_dec->u4_fmt_conv_cur_row < ps_dec->s_disp_frame_info.u4_y_ht))
  ------------------
  |  Branch (5585:20): [True: 9.09k, False: 14.8k]
  ------------------
 5586|  9.09k|                {
 5587|  9.09k|                    ps_dec->u4_fmt_conv_num_rows =
 5588|  9.09k|                        ps_dec->s_disp_frame_info.u4_y_ht - ps_dec->u4_fmt_conv_cur_row;
 5589|  9.09k|                    ih264d_format_convert(ps_dec, &(ps_dec->s_disp_op), ps_dec->u4_fmt_conv_cur_row,
 5590|  9.09k|                                          ps_dec->u4_fmt_conv_num_rows);
 5591|  9.09k|                    ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows;
 5592|  9.09k|                }
 5593|       |
 5594|   126k|                ih264d_release_display_field(ps_dec, &(ps_dec->s_disp_op));
 5595|       |
 5596|   126k|                if(ps_dec->i4_decode_header == 1 && (ps_dec->i4_header_decoded & 1) == 1)
  ------------------
  |  Branch (5596:20): [True: 0, False: 126k]
  |  Branch (5596:53): [True: 0, False: 0]
  ------------------
 5597|      0|                {
 5598|      0|                    ps_dec_op->u4_progressive_frame_flag = 1;
 5599|      0|                    if((NULL != ps_dec->ps_cur_sps) && (1 == (ps_dec->ps_cur_sps->u1_is_valid)))
  ------------------
  |  Branch (5599:24): [True: 0, False: 0]
  |  Branch (5599:56): [True: 0, False: 0]
  ------------------
 5600|      0|                    {
 5601|      0|                        if((0 == ps_dec->ps_sps->u1_frame_mbs_only_flag) &&
  ------------------
  |  Branch (5601:28): [True: 0, False: 0]
  ------------------
 5602|      0|                           (0 == ps_dec->ps_sps->u1_mb_aff_flag))
  ------------------
  |  Branch (5602:28): [True: 0, False: 0]
  ------------------
 5603|      0|                            ps_dec_op->u4_progressive_frame_flag = 0;
 5604|      0|                    }
 5605|      0|                }
 5606|       |
 5607|   126k|                if((TOP_FIELD_ONLY | BOT_FIELD_ONLY) == ps_dec->u1_top_bottom_decoded)
  ------------------
  |  |   65|   126k|#define TOP_FIELD_ONLY      0x02
  ------------------
                              if((TOP_FIELD_ONLY | BOT_FIELD_ONLY) == ps_dec->u1_top_bottom_decoded)
  ------------------
  |  |   66|   126k|#define BOT_FIELD_ONLY      0x01
  ------------------
  |  Branch (5607:20): [True: 126k, False: 0]
  ------------------
 5608|   126k|                {
 5609|   126k|                    ps_dec->u1_top_bottom_decoded = 0;
 5610|   126k|                }
 5611|       |                /*--------------------------------------------------------------------*/
 5612|       |                /* Do End of Pic processing.                                          */
 5613|       |                /* Should be called only if frame was decoded in previous process call*/
 5614|       |                /*--------------------------------------------------------------------*/
 5615|   126k|                if(ps_dec->u4_pic_buf_got == 1)
  ------------------
  |  Branch (5615:20): [True: 126k, False: 0]
  ------------------
 5616|   126k|                {
 5617|   126k|                    if(1 == ps_dec->u1_last_pic_not_decoded)
  ------------------
  |  Branch (5617:24): [True: 0, False: 126k]
  ------------------
 5618|      0|                    {
 5619|      0|                        ret = ih264d_end_of_pic_dispbuf_mgr(ps_dec);
 5620|       |
 5621|      0|                        if(ret != OK) return ret;
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
  |  Branch (5621:28): [True: 0, False: 0]
  ------------------
 5622|       |
 5623|      0|                        ret = ih264d_end_of_pic(ps_dec);
 5624|      0|                        if(ret != OK) return ret;
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
  |  Branch (5624:28): [True: 0, False: 0]
  ------------------
 5625|      0|                    }
 5626|   126k|                    else
 5627|   126k|                    {
 5628|   126k|                        ret = ih264d_end_of_pic(ps_dec);
 5629|   126k|                        if(ret != OK) return ret;
  ------------------
  |  |  114|   126k|#define OK        0
  ------------------
  |  Branch (5629:28): [True: 0, False: 126k]
  ------------------
 5630|   126k|                    }
 5631|   126k|                }
 5632|       |
 5633|   126k|                if(ps_dec->u1_enable_mb_info && ps_dec->u4_output_present)
  ------------------
  |  Branch (5633:20): [True: 0, False: 126k]
  |  Branch (5633:49): [True: 0, False: 0]
  ------------------
 5634|      0|                {
 5635|      0|                    UWORD32 disp_buf_id = ps_dec->s_disp_op.u4_disp_buf_id;
 5636|      0|                    if(ps_h264d_dec_ip->pu1_8x8_blk_qp_map)
  ------------------
  |  Branch (5636:24): [True: 0, False: 0]
  ------------------
 5637|      0|                    {
 5638|      0|                        ps_h264d_dec_op->pu1_8x8_blk_qp_map = ps_h264d_dec_ip->pu1_8x8_blk_qp_map;
 5639|      0|                        ps_h264d_dec_op->u4_8x8_blk_qp_map_size = ps_dec->u4_total_mbs << 2;
 5640|      0|                        ih264_memcpy(ps_h264d_dec_op->pu1_8x8_blk_qp_map,
 5641|      0|                                     ps_dec->as_buf_id_info_map[disp_buf_id].pu1_qp_map,
 5642|      0|                                     ps_dec->u4_total_mbs << 2);
 5643|      0|                    }
 5644|      0|                    if(ps_h264d_dec_ip->pu1_8x8_blk_type_map)
  ------------------
  |  Branch (5644:24): [True: 0, False: 0]
  ------------------
 5645|      0|                    {
 5646|      0|                        ps_h264d_dec_op->pu1_8x8_blk_type_map =
 5647|      0|                            ps_h264d_dec_ip->pu1_8x8_blk_type_map;
 5648|      0|                        ps_h264d_dec_op->u4_8x8_blk_type_map_size = ps_dec->u4_total_mbs << 2;
 5649|      0|                        ih264_memcpy(ps_h264d_dec_op->pu1_8x8_blk_type_map,
 5650|      0|                                     ps_dec->as_buf_id_info_map[disp_buf_id].pu1_mb_type_map,
 5651|      0|                                     ps_dec->u4_total_mbs << 2);
 5652|      0|                    }
 5653|      0|                }
 5654|       |                /*Data memory barrier instruction,so that yuv write by the library is
 5655|       |                 * complete*/
 5656|   126k|                DATA_SYNC();
  ------------------
  |  |  116|   126k|#define DATA_SYNC()  __sync_synchronize()
  ------------------
 5657|       |
 5658|   126k|                H264_DEC_DEBUG_PRINT("The num bytes consumed: %d\n",
  ------------------
  |  |   39|   126k|#define H264_DEC_DEBUG_PRINT(...) {}
  ------------------
 5659|   126k|                                     ps_dec_op->u4_num_bytes_consumed);
 5660|   126k|            }
 5661|   135k|        }
 5662|       |        /* highest layer for flush validation */
 5663|       |
 5664|   519k|        if((ps_dec->u1_flushfrm) && (1 == flush_decode))
  ------------------
  |  Branch (5664:12): [True: 0, False: 519k]
  |  Branch (5664:37): [True: 0, False: 0]
  ------------------
 5665|      0|        {
 5666|      0|            u1_res_id = u1_num_res_lyrs - 1;
 5667|      0|            ps_svc_lyr_dec = ps_svcd_ctxt->ps_svc_dec_lyr + u1_res_id;
 5668|      0|            ps_dec = &ps_svc_lyr_dec->s_dec;
 5669|       |
 5670|      0|            ih264d_get_next_display_field(ps_dec, ps_dec->ps_out_buffer, &(ps_dec->s_disp_op));
 5671|      0|            if(0 == ps_dec->s_disp_op.u4_error_code)
  ------------------
  |  Branch (5671:16): [True: 0, False: 0]
  ------------------
 5672|      0|            {
 5673|       |                /* check output buffer size given by the application */
 5674|      0|                if(check_app_out_buf_size(ps_dec) != IV_SUCCESS)
  ------------------
  |  Branch (5674:20): [True: 0, False: 0]
  ------------------
 5675|      0|                {
 5676|      0|                    ps_dec_op->u4_error_code = IVD_DISP_FRM_ZERO_OP_BUF_SIZE;
 5677|      0|                    return (IV_FAIL);
 5678|      0|                }
 5679|       |
 5680|      0|                ps_dec->u4_fmt_conv_cur_row = 0;
 5681|      0|                ps_dec->u4_fmt_conv_num_rows = ps_dec->s_disp_frame_info.u4_y_ht;
 5682|      0|                ih264d_format_convert(ps_dec, &(ps_dec->s_disp_op), ps_dec->u4_fmt_conv_cur_row,
 5683|      0|                                      ps_dec->u4_fmt_conv_num_rows);
 5684|      0|                ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows;
 5685|      0|                ps_dec->u4_output_present = 1;
 5686|      0|            }
 5687|      0|            else
 5688|      0|            {
 5689|      0|                ps_dec->u4_output_present = 0;
 5690|      0|            }
 5691|      0|            ih264d_export_sei_params(&ps_dec_op->s_sei_decode_op, ps_dec);
 5692|       |
 5693|      0|            ih264d_release_display_field(ps_dec, &(ps_dec->s_disp_op));
 5694|       |
 5695|      0|            ps_dec_op->u4_pic_wd = (UWORD32) ps_dec->u2_disp_width;
 5696|      0|            ps_dec_op->u4_pic_ht = (UWORD32) ps_dec->u2_disp_height;
 5697|      0|            ps_dec_op->i4_reorder_depth = ps_dec->i4_reorder_depth;
 5698|      0|            ps_dec_op->i4_display_index = ps_dec->i4_display_index;
 5699|       |
 5700|      0|            ps_dec_op->u4_new_seq = 0;
 5701|      0|            ps_dec_op->u4_output_present = (ps_svc_lyr_dec->u1_layer_identifier == TARGET_LAYER)
  ------------------
  |  |  110|      0|#define TARGET_LAYER 2
  ------------------
  |  Branch (5701:44): [True: 0, False: 0]
  ------------------
 5702|      0|                                               ? ps_dec->u4_output_present
 5703|      0|                                               : 0;
 5704|      0|            ps_dec_op->u4_progressive_frame_flag = ps_dec->s_disp_op.u4_progressive_frame_flag;
 5705|      0|            ps_dec_op->e_output_format = ps_dec->s_disp_op.e_output_format;
 5706|      0|            ps_dec_op->s_disp_frm_buf = ps_dec->s_disp_op.s_disp_frm_buf;
 5707|      0|            ps_dec_op->e4_fld_type = ps_dec->s_disp_op.e4_fld_type;
 5708|      0|            ps_dec_op->u4_ts = ps_dec->s_disp_op.u4_ts;
 5709|      0|            ps_dec_op->u4_disp_buf_id = ps_dec->s_disp_op.u4_disp_buf_id;
 5710|       |
 5711|       |            /*In the case of flush ,since no frame is decoded set pic type as invalid*/
 5712|      0|            ps_dec_op->u4_is_ref_flag = UINT32_MAX;
 5713|      0|            ps_dec_op->e_pic_type = IV_NA_FRAME;
 5714|      0|            ps_dec_op->u4_frame_decoded_flag = 0;
 5715|       |
 5716|      0|            if(0 == ps_dec->s_disp_op.u4_error_code)
  ------------------
  |  Branch (5716:16): [True: 0, False: 0]
  ------------------
 5717|      0|            {
 5718|      0|                return (IV_SUCCESS);
 5719|      0|            }
 5720|      0|            else
 5721|      0|                return (IV_FAIL);
 5722|      0|        }
 5723|   519k|    }
 5724|       |
 5725|   519k|    if((ps_dec_op->u4_error_code & 0xff) != ERROR_DYNAMIC_RESOLUTION_NOT_SUPPORTED)
  ------------------
  |  Branch (5725:8): [True: 519k, False: 0]
  ------------------
 5726|   519k|    {
 5727|   519k|        ps_dec_op->u4_pic_wd = (UWORD32) ps_dec->u2_disp_width;
 5728|   519k|        ps_dec_op->u4_pic_ht = (UWORD32) ps_dec->u2_disp_height;
 5729|   519k|        ps_dec_op->i4_reorder_depth = ps_dec->i4_reorder_depth;
 5730|   519k|    }
 5731|   519k|    return api_ret_value;
 5732|   519k|}
isvcd_set_params:
 6163|  45.1k|{
 6164|  45.1k|    dec_struct_t *ps_dec;
 6165|  45.1k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec;
 6166|  45.1k|    WORD32 ret = IV_SUCCESS;
 6167|  45.1k|    svc_dec_ctxt_t *ps_svcd_ctxt;
 6168|  45.1k|    WORD32 u1_layer_id;
 6169|       |
 6170|  45.1k|    ps_svcd_ctxt = (svc_dec_ctxt_t *) dec_hdl->pv_codec_handle;
 6171|  45.1k|    ps_svcd_ctxt->i4_eos_flag = 0;
 6172|   180k|    for(u1_layer_id = 0; u1_layer_id < MAX_NUM_RES_LYRS; u1_layer_id++)
  ------------------
  |  |   94|   180k|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (6172:26): [True: 135k, False: 45.1k]
  ------------------
 6173|   135k|    {
 6174|   135k|        isvcd_ctl_set_config_ip_t *ps_h264d_ctl_ip = (isvcd_ctl_set_config_ip_t *) pv_api_ip;
 6175|   135k|        isvcd_ctl_set_config_op_t *ps_h264d_ctl_op = (isvcd_ctl_set_config_op_t *) pv_api_op;
 6176|   135k|        ivd_ctl_set_config_ip_t *ps_ctl_ip = &ps_h264d_ctl_ip->s_ivd_ctl_set_config_ip_t;
 6177|   135k|        ivd_ctl_set_config_op_t *ps_ctl_op = &ps_h264d_ctl_op->s_ivd_ctl_set_config_op_t;
 6178|       |
 6179|   135k|        ps_svc_lyr_dec = &ps_svcd_ctxt->ps_svc_dec_lyr[u1_layer_id];
 6180|   135k|        ps_dec = &ps_svc_lyr_dec->s_dec;
 6181|       |
 6182|   135k|        ps_dec->u1_flushfrm = 0;
 6183|   135k|        ps_dec->u4_skip_frm_mask = 0;
 6184|   135k|        ps_ctl_op->u4_error_code = 0;
 6185|       |
 6186|   135k|        if(ps_ctl_ip->e_frm_skip_mode != IVD_SKIP_NONE)
  ------------------
  |  Branch (6186:12): [True: 0, False: 135k]
  ------------------
 6187|      0|        {
 6188|      0|            ps_ctl_op->u4_error_code = (1 << IVD_UNSUPPORTEDPARAM);
 6189|      0|            ret = IV_FAIL;
 6190|      0|        }
 6191|       |
 6192|   135k|        if(ps_ctl_ip->u4_disp_wd >= ps_dec->u2_disp_width)
  ------------------
  |  Branch (6192:12): [True: 135k, False: 0]
  ------------------
 6193|   135k|        {
 6194|   135k|            ps_dec->u4_app_disp_width = ps_ctl_ip->u4_disp_wd;
 6195|   135k|        }
 6196|      0|        else if(0 == ps_dec->i4_header_decoded)
  ------------------
  |  Branch (6196:17): [True: 0, False: 0]
  ------------------
 6197|      0|        {
 6198|      0|            ps_dec->u4_app_disp_width = ps_ctl_ip->u4_disp_wd;
 6199|      0|        }
 6200|      0|        else if(ps_ctl_ip->u4_disp_wd == 0)
  ------------------
  |  Branch (6200:17): [True: 0, False: 0]
  ------------------
 6201|      0|        {
 6202|      0|            ps_dec->u4_app_disp_width = 0;
 6203|      0|        }
 6204|      0|        else
 6205|      0|        {
 6206|       |            /*
 6207|       |             * Set the display width to zero. This will ensure that the wrong value we
 6208|       |             * had stored (0xFFFFFFFF) does not propogate.
 6209|       |             */
 6210|      0|            ps_dec->u4_app_disp_width = 0;
 6211|      0|            ps_ctl_op->u4_error_code |= (1 << IVD_UNSUPPORTEDPARAM);
 6212|      0|            ps_ctl_op->u4_error_code |= ERROR_DISP_WIDTH_INVALID;
 6213|      0|            ret = IV_FAIL;
 6214|      0|        }
 6215|       |
 6216|   135k|        if(ps_ctl_ip->e_vid_dec_mode == IVD_DECODE_FRAME)
  ------------------
  |  Branch (6216:12): [True: 67.7k, False: 67.7k]
  ------------------
 6217|  67.7k|            ps_dec->i4_decode_header = 0;
 6218|  67.7k|        else if(ps_ctl_ip->e_vid_dec_mode == IVD_DECODE_HEADER)
  ------------------
  |  Branch (6218:17): [True: 67.7k, False: 0]
  ------------------
 6219|  67.7k|            ps_dec->i4_decode_header = 1;
 6220|      0|        else
 6221|      0|        {
 6222|      0|            ps_ctl_op->u4_error_code = (1 << IVD_UNSUPPORTEDPARAM);
 6223|      0|            ps_dec->i4_decode_header = 1;
 6224|      0|            ret = IV_FAIL;
 6225|      0|        }
 6226|   135k|        ps_dec->e_frm_out_mode = IVD_DISPLAY_FRAME_OUT;
 6227|       |
 6228|   135k|        if((ps_ctl_ip->e_frm_out_mode != IVD_DECODE_FRAME_OUT) &&
  ------------------
  |  Branch (6228:12): [True: 135k, False: 0]
  ------------------
 6229|   135k|           (ps_ctl_ip->e_frm_out_mode != IVD_DISPLAY_FRAME_OUT))
  ------------------
  |  Branch (6229:12): [True: 0, False: 135k]
  ------------------
 6230|      0|        {
 6231|      0|            ps_ctl_op->u4_error_code = (1 << IVD_UNSUPPORTEDPARAM);
 6232|      0|            ret = IV_FAIL;
 6233|      0|        }
 6234|   135k|        ps_dec->e_frm_out_mode = ps_ctl_ip->e_frm_out_mode;
 6235|   135k|    }
 6236|  45.1k|    return ret;
 6237|  45.1k|}
isvcd_set_target_layer:
 6260|  22.5k|{
 6261|  22.5k|    WORD32 ret = IV_SUCCESS;
 6262|       |
 6263|  22.5k|    isvcd_set_target_layer_ip_t *ps_ip;
 6264|  22.5k|    isvcd_set_target_layer_op_t *ps_op;
 6265|  22.5k|    svc_dec_ctxt_t *ps_svcd_ctxt;
 6266|  22.5k|    ps_svcd_ctxt = (svc_dec_ctxt_t *) dec_hdl->pv_codec_handle;
 6267|       |
 6268|  22.5k|    ps_ip = (isvcd_set_target_layer_ip_t *) pv_api_ip;
 6269|  22.5k|    ps_op = (isvcd_set_target_layer_op_t *) pv_api_op;
 6270|       |
 6271|  22.5k|    ps_svcd_ctxt->u1_tgt_dep_id = ps_ip->u1_tgt_dep_id;
 6272|  22.5k|    ps_svcd_ctxt->u1_tgt_quality_id = ps_ip->u1_tgt_quality_id;
 6273|  22.5k|    ps_svcd_ctxt->u1_tgt_temp_id = ps_ip->u1_tgt_temp_id;
 6274|  22.5k|    ps_svcd_ctxt->u1_tgt_priority_id = ps_ip->u1_tgt_priority_id;
 6275|       |
 6276|  22.5k|    ret = isvcd_nal_parse_set_target_attr(ps_ip->u1_tgt_quality_id, ps_ip->u1_tgt_dep_id,
 6277|  22.5k|                                          ps_ip->u1_tgt_temp_id, ps_ip->u1_tgt_priority_id,
 6278|  22.5k|                                          ps_svcd_ctxt->pv_nal_parse_ctxt);
 6279|  22.5k|    ps_op->u4_error_code = 0;
 6280|       |
 6281|  22.5k|    return ret;
 6282|  22.5k|}
isvcd_delete:
 6351|  22.5k|{
 6352|  22.5k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec;
 6353|  22.5k|    isvcd_delete_ip_t *ps_ip = (isvcd_delete_ip_t *) pv_api_ip;
 6354|  22.5k|    isvcd_delete_op_t *ps_op = (isvcd_delete_op_t *) pv_api_op;
 6355|       |
 6356|  22.5k|    UWORD8 u1_layer_id;
 6357|  22.5k|    svc_dec_ctxt_t *ps_svcd_ctxt;
 6358|  22.5k|    ps_svcd_ctxt = (svc_dec_ctxt_t *) dec_hdl->pv_codec_handle;
 6359|  22.5k|    UNUSED(ps_ip);
  ------------------
  |  |   45|  22.5k|#define UNUSED(x) ((void)(x))
  ------------------
 6360|       |
 6361|  90.3k|    for(u1_layer_id = 0; u1_layer_id < MAX_NUM_RES_LYRS; u1_layer_id++)
  ------------------
  |  |   94|  90.3k|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (6361:26): [True: 67.7k, False: 22.5k]
  ------------------
 6362|  67.7k|    {
 6363|  67.7k|        ps_svc_lyr_dec = &ps_svcd_ctxt->ps_svc_dec_lyr[u1_layer_id];
 6364|  67.7k|        isvcd_free_dynamic_bufs(ps_svc_lyr_dec);
 6365|  67.7k|    }
 6366|  22.5k|    isvcd_free_static_bufs(dec_hdl);
 6367|  22.5k|    ps_op->s_ivd_delete_op_t.u4_error_code = 0;
 6368|       |
 6369|  22.5k|    return IV_SUCCESS;
 6370|  22.5k|}
isvcd_reset:
 6393|  4.14k|{
 6394|  4.14k|    dec_struct_t *ps_dec;
 6395|  4.14k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec;
 6396|  4.14k|    ivd_ctl_reset_op_t *ps_ctl_op = (ivd_ctl_reset_op_t *) pv_api_op;
 6397|  4.14k|    UWORD8 u1_layer_id;
 6398|  4.14k|    svc_dec_ctxt_t *ps_svcd_ctxt;
 6399|  4.14k|    ps_svcd_ctxt = (svc_dec_ctxt_t *) dec_hdl->pv_codec_handle;
 6400|  4.14k|    UNUSED(pv_api_ip);
  ------------------
  |  |   45|  4.14k|#define UNUSED(x) ((void)(x))
  ------------------
 6401|  4.14k|    ps_ctl_op->u4_error_code = 0;
 6402|       |
 6403|  4.14k|    ps_svcd_ctxt->i4_eos_flag = 0;
 6404|  4.14k|    ps_svcd_ctxt->u4_num_sps_ctr = 0;
 6405|  4.14k|    ps_svcd_ctxt->u4_num_pps_ctr = 0;
 6406|  4.14k|    ps_svcd_ctxt->u1_pre_parse_in_flush = 1;
 6407|  16.5k|    for(u1_layer_id = 0; u1_layer_id < MAX_NUM_RES_LYRS; u1_layer_id++)
  ------------------
  |  |   94|  16.5k|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (6407:26): [True: 12.4k, False: 4.14k]
  ------------------
 6408|  12.4k|    {
 6409|  12.4k|        ps_svc_lyr_dec = &ps_svcd_ctxt->ps_svc_dec_lyr[u1_layer_id];
 6410|  12.4k|        ps_dec = &ps_svc_lyr_dec->s_dec;
 6411|  12.4k|        if(ps_dec != NULL)
  ------------------
  |  Branch (6411:12): [True: 12.4k, False: 0]
  ------------------
 6412|  12.4k|        {
 6413|  12.4k|            if(((buf_mgr_t *) ps_dec->pv_pic_buf_mgr)->pv_mutex != NULL)
  ------------------
  |  Branch (6413:16): [True: 787, False: 11.6k]
  ------------------
 6414|    787|                ih264_buf_mgr_free(ps_dec->pv_pic_buf_mgr);
 6415|  12.4k|            if(((buf_mgr_t *) ps_dec->pv_mv_buf_mgr)->pv_mutex != NULL)
  ------------------
  |  Branch (6415:16): [True: 787, False: 11.6k]
  ------------------
 6416|    787|                ih264_buf_mgr_free(ps_dec->pv_mv_buf_mgr);
 6417|       |
 6418|  12.4k|            isvcd_init_decoder(ps_svc_lyr_dec);
 6419|  12.4k|            ps_dec->u1_flushfrm = 0;
 6420|  12.4k|        }
 6421|      0|        else
 6422|      0|        {
 6423|      0|            H264_DEC_DEBUG_PRINT("\nReset called without Initializing the decoder\n");
  ------------------
  |  |   39|      0|#define H264_DEC_DEBUG_PRINT(...) {}
  ------------------
 6424|      0|            ps_ctl_op->u4_error_code = ERROR_INIT_NOT_DONE;
 6425|      0|        }
 6426|  12.4k|    }
 6427|  4.14k|    return IV_SUCCESS;
 6428|  4.14k|}
isvcd_ctl:
 6451|   117k|{
 6452|   117k|    ivd_ctl_set_config_ip_t *ps_ctl_ip;
 6453|   117k|    ivd_ctl_set_config_op_t *ps_ctl_op;
 6454|   117k|    WORD32 ret = IV_SUCCESS;
 6455|   117k|    UWORD32 subcommand;
 6456|   117k|    dec_struct_t *ps_dec;
 6457|   117k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec;
 6458|   117k|    svc_dec_ctxt_t *ps_svcd_ctxt;
 6459|   117k|    ps_svcd_ctxt = (svc_dec_ctxt_t *) dec_hdl->pv_codec_handle;
 6460|   117k|    ps_svc_lyr_dec = &ps_svcd_ctxt->ps_svc_dec_lyr[ps_svcd_ctxt->u1_target_layer_id];
 6461|   117k|    ps_dec = &ps_svc_lyr_dec->s_dec;
 6462|   117k|    if(ps_dec->init_done != 1)
  ------------------
  |  Branch (6462:8): [True: 0, False: 117k]
  ------------------
 6463|      0|    {
 6464|      0|        return IV_FAIL;
 6465|      0|    }
 6466|   117k|    ps_ctl_ip = (ivd_ctl_set_config_ip_t *) pv_api_ip;
 6467|   117k|    ps_ctl_op = (ivd_ctl_set_config_op_t *) pv_api_op;
 6468|   117k|    ps_ctl_op->u4_error_code = 0;
 6469|   117k|    subcommand = ps_ctl_ip->e_sub_cmd;
 6470|       |
 6471|   117k|    switch(subcommand)
 6472|   117k|    {
 6473|      0|        case IVD_CMD_CTL_GETPARAMS:
  ------------------
  |  Branch (6473:9): [True: 0, False: 117k]
  ------------------
 6474|      0|            ret = isvcd_get_status(dec_hdl, (void *) pv_api_ip, (void *) pv_api_op);
 6475|      0|            break;
 6476|  45.1k|        case IVD_CMD_CTL_SETPARAMS:
  ------------------
  |  Branch (6476:9): [True: 45.1k, False: 71.9k]
  ------------------
 6477|  45.1k|            ret = isvcd_set_params(dec_hdl, (void *) pv_api_ip, (void *) pv_api_op);
 6478|  45.1k|            break;
 6479|  4.14k|        case IVD_CMD_CTL_RESET:
  ------------------
  |  Branch (6479:9): [True: 4.14k, False: 112k]
  ------------------
 6480|  4.14k|            ret = isvcd_reset(dec_hdl, (void *) pv_api_ip, (void *) pv_api_op);
 6481|  4.14k|            break;
 6482|      0|        case IVD_CMD_CTL_SETDEFAULT:
  ------------------
  |  Branch (6482:9): [True: 0, False: 117k]
  ------------------
 6483|      0|            ret = isvcd_set_default_params(dec_hdl, (void *) pv_api_ip, (void *) pv_api_op);
 6484|      0|            break;
 6485|      0|        case IVD_CMD_CTL_FLUSH:
  ------------------
  |  Branch (6485:9): [True: 0, False: 117k]
  ------------------
 6486|      0|            ret = isvcd_set_flush_mode(dec_hdl, (void *) pv_api_ip, (void *) pv_api_op);
 6487|      0|            break;
 6488|      0|        case IVD_CMD_CTL_GETBUFINFO:
  ------------------
  |  Branch (6488:9): [True: 0, False: 117k]
  ------------------
 6489|      0|            ret = isvcd_get_buf_info(dec_hdl, (void *) pv_api_ip, (void *) pv_api_op);
 6490|      0|            break;
 6491|      0|        case IVD_CMD_CTL_GETVERSION:
  ------------------
  |  Branch (6491:9): [True: 0, False: 117k]
  ------------------
 6492|      0|            ret = ih264d_get_version(dec_hdl, (void *) pv_api_ip, (void *) pv_api_op);
 6493|      0|            break;
 6494|      0|        case IH264D_CMD_CTL_DEGRADE:
  ------------------
  |  Branch (6494:9): [True: 0, False: 117k]
  ------------------
 6495|      0|            ret = isvcd_set_degrade(dec_hdl, (void *) pv_api_ip, (void *) pv_api_op);
 6496|      0|            break;
 6497|       |
 6498|  22.5k|        case IH264D_CMD_CTL_SET_NUM_CORES:
  ------------------
  |  Branch (6498:9): [True: 22.5k, False: 94.5k]
  ------------------
 6499|  22.5k|            ret = isvcd_set_num_cores(dec_hdl, (void *) pv_api_ip, (void *) pv_api_op);
 6500|  22.5k|            break;
 6501|      0|        case IH264D_CMD_CTL_GET_BUFFER_DIMENSIONS:
  ------------------
  |  Branch (6501:9): [True: 0, False: 117k]
  ------------------
 6502|      0|            ret = isvcd_get_frame_dimensions(dec_hdl, (void *) pv_api_ip, (void *) pv_api_op);
 6503|      0|            break;
 6504|      0|        case IH264D_CMD_CTL_GET_VUI_PARAMS:
  ------------------
  |  Branch (6504:9): [True: 0, False: 117k]
  ------------------
 6505|      0|            ret = isvcd_get_vui_params(dec_hdl, (void *) pv_api_ip, (void *) pv_api_op);
 6506|      0|            break;
 6507|      0|        case IH264D_CMD_CTL_GET_SEI_MDCV_PARAMS:
  ------------------
  |  Branch (6507:9): [True: 0, False: 117k]
  ------------------
 6508|      0|            ret = isvcd_get_sei_mdcv_params(dec_hdl, (void *) pv_api_ip, (void *) pv_api_op);
 6509|      0|            break;
 6510|      0|        case IH264D_CMD_CTL_GET_SEI_CLL_PARAMS:
  ------------------
  |  Branch (6510:9): [True: 0, False: 117k]
  ------------------
 6511|      0|            ret = isvcd_get_sei_cll_params(dec_hdl, (void *) pv_api_ip, (void *) pv_api_op);
 6512|      0|            break;
 6513|      0|        case IH264D_CMD_CTL_GET_SEI_AVE_PARAMS:
  ------------------
  |  Branch (6513:9): [True: 0, False: 117k]
  ------------------
 6514|      0|            ret = isvcd_get_sei_ave_params(dec_hdl, (void *) pv_api_ip, (void *) pv_api_op);
 6515|      0|            break;
 6516|      0|        case IH264D_CMD_CTL_GET_SEI_CCV_PARAMS:
  ------------------
  |  Branch (6516:9): [True: 0, False: 117k]
  ------------------
 6517|      0|            ret = isvcd_get_sei_ccv_params(dec_hdl, (void *) pv_api_ip, (void *) pv_api_op);
 6518|      0|            break;
 6519|  22.5k|        case IH264D_CMD_CTL_SET_PROCESSOR:
  ------------------
  |  Branch (6519:9): [True: 22.5k, False: 94.5k]
  ------------------
 6520|  22.5k|            ret = isvcd_set_processor(dec_hdl, (void *) pv_api_ip, (void *) pv_api_op);
 6521|  22.5k|            break;
 6522|  22.5k|        case ISVCD_CMD_CTL_SET_TGT_LAYER:
  ------------------
  |  Branch (6522:9): [True: 22.5k, False: 94.5k]
  ------------------
 6523|  22.5k|            ret = isvcd_set_target_layer(dec_hdl, (void *) pv_api_ip, (void *) pv_api_op);
 6524|  22.5k|            break;
 6525|      0|        default:
  ------------------
  |  Branch (6525:9): [True: 0, False: 117k]
  ------------------
 6526|      0|            H264_DEC_DEBUG_PRINT("\ndo nothing\n");
  ------------------
  |  |   39|      0|#define H264_DEC_DEBUG_PRINT(...) {}
  ------------------
 6527|      0|            break;
 6528|   117k|    }
 6529|       |
 6530|   117k|    return ret;
 6531|   117k|}
isvcd_set_num_cores:
 7120|  22.5k|{
 7121|  22.5k|    UWORD8 u1_layer_id;
 7122|  22.5k|    isvcd_ctl_set_num_cores_ip_t *ps_ip;
 7123|  22.5k|    isvcd_ctl_set_num_cores_op_t *ps_op;
 7124|  22.5k|    dec_struct_t *ps_dec;
 7125|  22.5k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec;
 7126|  22.5k|    svc_dec_ctxt_t *ps_svcd_ctxt;
 7127|  22.5k|    ps_svcd_ctxt = (svc_dec_ctxt_t *) dec_hdl->pv_codec_handle;
 7128|       |
 7129|  22.5k|    ps_ip = (isvcd_ctl_set_num_cores_ip_t *) pv_api_ip;
 7130|  22.5k|    ps_op = (isvcd_ctl_set_num_cores_op_t *) pv_api_op;
 7131|  22.5k|    ps_op->u4_error_code = 0;
 7132|  90.3k|    for(u1_layer_id = 0; u1_layer_id < MAX_NUM_RES_LYRS; u1_layer_id++)
  ------------------
  |  |   94|  90.3k|#define MAX_NUM_RES_LYRS 3
  ------------------
  |  Branch (7132:26): [True: 67.7k, False: 22.5k]
  ------------------
 7133|  67.7k|    {
 7134|  67.7k|        ps_svc_lyr_dec = &ps_svcd_ctxt->ps_svc_dec_lyr[u1_layer_id];
 7135|  67.7k|        ps_dec = &ps_svc_lyr_dec->s_dec;
 7136|  67.7k|        ps_dec->u4_num_cores = ps_ip->u4_num_cores;
 7137|       |
 7138|  67.7k|        if(ps_dec->u4_num_cores == 1)
  ------------------
  |  Branch (7138:12): [True: 33.8k, False: 33.9k]
  ------------------
 7139|  33.8k|        {
 7140|  33.8k|            ps_dec->u1_separate_parse = 0;
 7141|  33.8k|        }
 7142|  33.9k|        else
 7143|  33.9k|        {
 7144|  33.9k|            ps_dec->u1_separate_parse = 1;
 7145|  33.9k|        }
 7146|       |
 7147|       |        /*using only upto three threads currently*/
 7148|  67.7k|        if(ps_dec->u4_num_cores > 3) ps_dec->u4_num_cores = 3;
  ------------------
  |  Branch (7148:12): [True: 17.3k, False: 50.4k]
  ------------------
 7149|  67.7k|    }
 7150|  22.5k|    return IV_SUCCESS;
 7151|  22.5k|}
isvcd_fill_output_struct_from_context:
 7175|   195k|{
 7176|   195k|    dec_struct_t *ps_dec;
 7177|   195k|    ps_dec = &ps_svc_lyr_dec->s_dec;
 7178|   195k|    if((ps_dec_op->u4_error_code & 0xff) != ERROR_DYNAMIC_RESOLUTION_NOT_SUPPORTED)
  ------------------
  |  Branch (7178:8): [True: 195k, False: 0]
  ------------------
 7179|   195k|    {
 7180|   195k|        ps_dec_op->u4_pic_wd = (UWORD32) ps_dec->u2_disp_width;
 7181|   195k|        ps_dec_op->u4_pic_ht = (UWORD32) ps_dec->u2_disp_height;
 7182|   195k|    }
 7183|   195k|    ps_dec_op->i4_reorder_depth = ps_dec->i4_reorder_depth;
 7184|   195k|    ps_dec_op->i4_display_index = ps_dec->i4_display_index;
 7185|   195k|    ps_dec_op->e_pic_type = ps_dec->i4_frametype;
 7186|       |
 7187|   195k|    ps_dec_op->u4_new_seq = 0;
 7188|   195k|    ps_dec_op->u4_output_present =
 7189|   195k|        (ps_svc_lyr_dec->u1_layer_identifier == TARGET_LAYER) ? ps_dec->u4_output_present : 0;
  ------------------
  |  |  110|   195k|#define TARGET_LAYER 2
  ------------------
  |  Branch (7189:9): [True: 88.9k, False: 106k]
  ------------------
 7190|   195k|    ps_dec_op->u4_progressive_frame_flag = ps_dec->s_disp_op.u4_progressive_frame_flag;
 7191|       |
 7192|   195k|    ps_dec_op->u4_is_ref_flag = 1;
 7193|   195k|    if(ps_dec_op->u4_frame_decoded_flag)
  ------------------
  |  Branch (7193:8): [True: 126k, False: 68.6k]
  ------------------
 7194|   126k|    {
 7195|   126k|        if(ps_dec->ps_cur_slice->u1_nal_ref_idc == 0) ps_dec_op->u4_is_ref_flag = 0;
  ------------------
  |  Branch (7195:12): [True: 16.6k, False: 109k]
  ------------------
 7196|   126k|    }
 7197|       |
 7198|   195k|    ps_dec_op->e_output_format = ps_dec->s_disp_op.e_output_format;
 7199|   195k|    ps_dec_op->s_disp_frm_buf = ps_dec->s_disp_op.s_disp_frm_buf;
 7200|   195k|    ps_dec_op->e4_fld_type = ps_dec->s_disp_op.e4_fld_type;
 7201|   195k|    ps_dec_op->u4_ts = ps_dec->s_disp_op.u4_ts;
 7202|   195k|    ps_dec_op->u4_disp_buf_id = ps_dec->s_disp_op.u4_disp_buf_id;
 7203|       |
 7204|   195k|    ih264d_export_sei_params(&ps_dec_op->s_sei_decode_op, ps_dec);
 7205|   195k|}
isvcd_api_function:
 7228|   886k|{
 7229|   886k|    UWORD32 command;
 7230|   886k|    UWORD32 *pu2_ptr_cmd;
 7231|   886k|    UWORD32 u4_api_ret;
 7232|   886k|    IV_API_CALL_STATUS_T e_status;
 7233|   886k|    e_status = api_check_struct_sanity(dec_hdl, pv_api_ip, pv_api_op);
 7234|       |
 7235|   886k|    if(e_status != IV_SUCCESS)
  ------------------
  |  Branch (7235:8): [True: 570, False: 885k]
  ------------------
 7236|    570|    {
 7237|    570|        UWORD32 *ptr_err;
 7238|       |
 7239|    570|        ptr_err = (UWORD32 *) pv_api_op;
 7240|    570|        UNUSED(ptr_err);
  ------------------
  |  |   45|    570|#define UNUSED(x) ((void)(x))
  ------------------
 7241|    570|        H264_DEC_DEBUG_PRINT("error code = %d\n", *(ptr_err + 1));
  ------------------
  |  |   39|    570|#define H264_DEC_DEBUG_PRINT(...) {}
  ------------------
 7242|    570|        return IV_FAIL;
 7243|    570|    }
 7244|       |
 7245|   885k|    pu2_ptr_cmd = (UWORD32 *) pv_api_ip;
 7246|   885k|    pu2_ptr_cmd++;
 7247|       |
 7248|   885k|    command = *pu2_ptr_cmd;
 7249|   885k|    switch(command)
 7250|   885k|    {
 7251|  22.6k|        case IVD_CMD_CREATE:
  ------------------
  |  Branch (7251:9): [True: 22.6k, False: 863k]
  ------------------
 7252|  22.6k|            u4_api_ret = isvcd_create(dec_hdl, (void *) pv_api_ip, (void *) pv_api_op);
 7253|  22.6k|            break;
 7254|  22.5k|        case IVD_CMD_DELETE:
  ------------------
  |  Branch (7254:9): [True: 22.5k, False: 863k]
  ------------------
 7255|  22.5k|            u4_api_ret = isvcd_delete(dec_hdl, (void *) pv_api_ip, (void *) pv_api_op);
 7256|  22.5k|            break;
 7257|       |
 7258|   723k|        case IVD_CMD_VIDEO_DECODE:
  ------------------
  |  Branch (7258:9): [True: 723k, False: 162k]
  ------------------
 7259|   723k|            u4_api_ret = isvcd_video_decode(dec_hdl, (void *) pv_api_ip, (void *) pv_api_op);
 7260|   723k|            break;
 7261|       |
 7262|      0|        case IVD_CMD_GET_DISPLAY_FRAME:
  ------------------
  |  Branch (7262:9): [True: 0, False: 885k]
  ------------------
 7263|      0|            u4_api_ret = ih264d_get_display_frame(dec_hdl, (void *) pv_api_ip, (void *) pv_api_op);
 7264|       |
 7265|      0|            break;
 7266|       |
 7267|      0|        case IVD_CMD_SET_DISPLAY_FRAME:
  ------------------
  |  Branch (7267:9): [True: 0, False: 885k]
  ------------------
 7268|      0|            u4_api_ret = isvcd_set_display_frame(dec_hdl, (void *) pv_api_ip, (void *) pv_api_op);
 7269|       |
 7270|      0|            break;
 7271|       |
 7272|      0|        case IVD_CMD_REL_DISPLAY_FRAME:
  ------------------
  |  Branch (7272:9): [True: 0, False: 885k]
  ------------------
 7273|      0|            u4_api_ret = isvcd_rel_display_frame(dec_hdl, (void *) pv_api_ip, (void *) pv_api_op);
 7274|      0|            break;
 7275|       |
 7276|   117k|        case IVD_CMD_VIDEO_CTL:
  ------------------
  |  Branch (7276:9): [True: 117k, False: 768k]
  ------------------
 7277|   117k|            u4_api_ret = isvcd_ctl(dec_hdl, (void *) pv_api_ip, (void *) pv_api_op);
 7278|   117k|            break;
 7279|      0|        default:
  ------------------
  |  Branch (7279:9): [True: 0, False: 885k]
  ------------------
 7280|      0|            u4_api_ret = IV_FAIL;
 7281|      0|            break;
 7282|   885k|    }
 7283|       |
 7284|   885k|    return u4_api_ret;
 7285|   885k|}
isvcd_api.c:api_check_struct_sanity:
  188|   886k|{
  189|   886k|    IVD_API_COMMAND_TYPE_T e_cmd;
  190|   886k|    UWORD32 *pu4_api_ip;
  191|   886k|    UWORD32 *pu4_api_op;
  192|   886k|    UWORD32 i;
  193|       |
  194|   886k|    if(NULL == pv_api_op) return (IV_FAIL);
  ------------------
  |  Branch (194:8): [True: 0, False: 886k]
  ------------------
  195|       |
  196|   886k|    if(NULL == pv_api_ip) return (IV_FAIL);
  ------------------
  |  Branch (196:8): [True: 0, False: 886k]
  ------------------
  197|       |
  198|   886k|    pu4_api_ip = (UWORD32 *) pv_api_ip;
  199|   886k|    pu4_api_op = (UWORD32 *) pv_api_op;
  200|   886k|    e_cmd = *(pu4_api_ip + 1);
  201|       |
  202|       |    /* error checks on handle */
  203|   886k|    switch((WORD32) e_cmd)
  204|   886k|    {
  205|  22.6k|        case IVD_CMD_CREATE:
  ------------------
  |  Branch (205:9): [True: 22.6k, False: 863k]
  ------------------
  206|  22.6k|            break;
  207|       |
  208|      0|        case IVD_CMD_REL_DISPLAY_FRAME:
  ------------------
  |  Branch (208:9): [True: 0, False: 886k]
  ------------------
  209|      0|        case IVD_CMD_SET_DISPLAY_FRAME:
  ------------------
  |  Branch (209:9): [True: 0, False: 886k]
  ------------------
  210|      0|        case IVD_CMD_GET_DISPLAY_FRAME:
  ------------------
  |  Branch (210:9): [True: 0, False: 886k]
  ------------------
  211|   723k|        case IVD_CMD_VIDEO_DECODE:
  ------------------
  |  Branch (211:9): [True: 723k, False: 162k]
  ------------------
  212|   746k|        case IVD_CMD_DELETE:
  ------------------
  |  Branch (212:9): [True: 22.6k, False: 863k]
  ------------------
  213|   863k|        case IVD_CMD_VIDEO_CTL:
  ------------------
  |  Branch (213:9): [True: 117k, False: 769k]
  ------------------
  214|   863k|            if(ps_handle == NULL)
  ------------------
  |  Branch (214:16): [True: 566, False: 863k]
  ------------------
  215|    566|            {
  216|    566|                *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM;
  217|    566|                *(pu4_api_op + 1) |= IVD_HANDLE_NULL;
  218|    566|                return IV_FAIL;
  219|    566|            }
  220|       |
  221|   863k|            if(ps_handle->u4_size != sizeof(iv_obj_t))
  ------------------
  |  Branch (221:16): [True: 0, False: 863k]
  ------------------
  222|      0|            {
  223|      0|                *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM;
  224|      0|                *(pu4_api_op + 1) |= IVD_HANDLE_STRUCT_SIZE_INCORRECT;
  225|      0|                return IV_FAIL;
  226|      0|            }
  227|       |
  228|   863k|            if(ps_handle->pv_fxns != isvcd_api_function)
  ------------------
  |  Branch (228:16): [True: 0, False: 863k]
  ------------------
  229|      0|            {
  230|      0|                *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM;
  231|      0|                *(pu4_api_op + 1) |= IVD_INVALID_HANDLE_NULL;
  232|      0|                return IV_FAIL;
  233|      0|            }
  234|       |
  235|   863k|            if(ps_handle->pv_codec_handle == NULL)
  ------------------
  |  Branch (235:16): [True: 0, False: 863k]
  ------------------
  236|      0|            {
  237|      0|                *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM;
  238|      0|                *(pu4_api_op + 1) |= IVD_INVALID_HANDLE_NULL;
  239|      0|                return IV_FAIL;
  240|      0|            }
  241|   863k|            break;
  242|   863k|        default:
  ------------------
  |  Branch (242:9): [True: 0, False: 886k]
  ------------------
  243|      0|            *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM;
  244|      0|            *(pu4_api_op + 1) |= IVD_INVALID_API_CMD;
  245|      0|            return IV_FAIL;
  246|   886k|    }
  247|       |
  248|   885k|    switch((WORD32) e_cmd)
  ------------------
  |  Branch (248:12): [True: 885k, False: 0]
  ------------------
  249|   885k|    {
  250|  22.6k|        case IVD_CMD_CREATE:
  ------------------
  |  Branch (250:9): [True: 22.6k, False: 863k]
  ------------------
  251|  22.6k|        {
  252|  22.6k|            isvcd_create_ip_t *ps_ip = (isvcd_create_ip_t *) pv_api_ip;
  253|  22.6k|            isvcd_create_op_t *ps_op = (isvcd_create_op_t *) pv_api_op;
  254|       |
  255|  22.6k|            ps_op->s_ivd_create_op_t.u4_error_code = 0;
  256|       |
  257|  22.6k|            if((ps_ip->s_ivd_create_ip_t.u4_size > sizeof(isvcd_create_ip_t)) ||
  ------------------
  |  Branch (257:16): [True: 0, False: 22.6k]
  ------------------
  258|  22.6k|               (ps_ip->s_ivd_create_ip_t.u4_size < sizeof(ivd_create_ip_t)))
  ------------------
  |  Branch (258:16): [True: 0, False: 22.6k]
  ------------------
  259|      0|            {
  260|      0|                ps_op->s_ivd_create_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  261|      0|                ps_op->s_ivd_create_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT;
  262|      0|                H264_DEC_DEBUG_PRINT("\n");
  ------------------
  |  |   39|      0|#define H264_DEC_DEBUG_PRINT(...) {}
  ------------------
  263|      0|                return (IV_FAIL);
  264|      0|            }
  265|       |
  266|  22.6k|            if((ps_op->s_ivd_create_op_t.u4_size != sizeof(isvcd_create_op_t)) &&
  ------------------
  |  Branch (266:16): [True: 0, False: 22.6k]
  ------------------
  267|      0|               (ps_op->s_ivd_create_op_t.u4_size != sizeof(ivd_create_op_t)))
  ------------------
  |  Branch (267:16): [True: 0, False: 0]
  ------------------
  268|      0|            {
  269|      0|                ps_op->s_ivd_create_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  270|      0|                ps_op->s_ivd_create_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT;
  271|      0|                H264_DEC_DEBUG_PRINT("\n");
  ------------------
  |  |   39|      0|#define H264_DEC_DEBUG_PRINT(...) {}
  ------------------
  272|      0|                return (IV_FAIL);
  273|      0|            }
  274|       |
  275|  22.6k|            if((ps_ip->s_ivd_create_ip_t.e_output_format != IV_YUV_420P) &&
  ------------------
  |  Branch (275:16): [True: 12.8k, False: 9.72k]
  ------------------
  276|  12.8k|               (ps_ip->s_ivd_create_ip_t.e_output_format != IV_YUV_422ILE) &&
  ------------------
  |  Branch (276:16): [True: 12.8k, False: 6]
  ------------------
  277|  12.8k|               (ps_ip->s_ivd_create_ip_t.e_output_format != IV_RGB_565) &&
  ------------------
  |  Branch (277:16): [True: 12.8k, False: 3]
  ------------------
  278|  12.8k|               (ps_ip->s_ivd_create_ip_t.e_output_format != IV_YUV_420SP_UV) &&
  ------------------
  |  Branch (278:16): [True: 5.99k, False: 6.87k]
  ------------------
  279|  5.99k|               (ps_ip->s_ivd_create_ip_t.e_output_format != IV_YUV_420SP_VU))
  ------------------
  |  Branch (279:16): [True: 4, False: 5.99k]
  ------------------
  280|      4|            {
  281|      4|                ps_op->s_ivd_create_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  282|      4|                ps_op->s_ivd_create_op_t.u4_error_code |= IVD_INIT_DEC_COL_FMT_NOT_SUPPORTED;
  283|      4|                H264_DEC_DEBUG_PRINT("\n");
  ------------------
  |  |   39|      4|#define H264_DEC_DEBUG_PRINT(...) {}
  ------------------
  284|      4|                return (IV_FAIL);
  285|      4|            }
  286|  22.6k|        }
  287|  22.6k|        break;
  288|       |
  289|  22.6k|        case IVD_CMD_GET_DISPLAY_FRAME:
  ------------------
  |  Branch (289:9): [True: 0, False: 885k]
  ------------------
  290|      0|        {
  291|      0|            isvcd_get_display_frame_ip_t *ps_ip = (isvcd_get_display_frame_ip_t *) pv_api_ip;
  292|      0|            isvcd_get_display_frame_op_t *ps_op = (isvcd_get_display_frame_op_t *) pv_api_op;
  293|       |
  294|      0|            ps_op->s_ivd_get_display_frame_op_t.u4_error_code = 0;
  295|       |
  296|      0|            if((ps_ip->s_ivd_get_display_frame_ip_t.u4_size !=
  ------------------
  |  Branch (296:16): [True: 0, False: 0]
  ------------------
  297|      0|                sizeof(isvcd_get_display_frame_ip_t)) &&
  298|      0|               (ps_ip->s_ivd_get_display_frame_ip_t.u4_size != sizeof(ivd_get_display_frame_ip_t)))
  ------------------
  |  Branch (298:16): [True: 0, False: 0]
  ------------------
  299|      0|            {
  300|      0|                ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  301|      0|                ps_op->s_ivd_get_display_frame_op_t.u4_error_code |=
  302|      0|                    IVD_IP_API_STRUCT_SIZE_INCORRECT;
  303|      0|                return (IV_FAIL);
  304|      0|            }
  305|       |
  306|      0|            if((ps_op->s_ivd_get_display_frame_op_t.u4_size !=
  ------------------
  |  Branch (306:16): [True: 0, False: 0]
  ------------------
  307|      0|                sizeof(isvcd_get_display_frame_op_t)) &&
  308|      0|               (ps_op->s_ivd_get_display_frame_op_t.u4_size != sizeof(ivd_get_display_frame_op_t)))
  ------------------
  |  Branch (308:16): [True: 0, False: 0]
  ------------------
  309|      0|            {
  310|      0|                ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  311|      0|                ps_op->s_ivd_get_display_frame_op_t.u4_error_code |=
  312|      0|                    IVD_OP_API_STRUCT_SIZE_INCORRECT;
  313|      0|                return (IV_FAIL);
  314|      0|            }
  315|      0|        }
  316|      0|        break;
  317|       |
  318|      0|        case IVD_CMD_REL_DISPLAY_FRAME:
  ------------------
  |  Branch (318:9): [True: 0, False: 885k]
  ------------------
  319|      0|        {
  320|      0|            isvcd_rel_display_frame_ip_t *ps_ip = (isvcd_rel_display_frame_ip_t *) pv_api_ip;
  321|      0|            isvcd_rel_display_frame_op_t *ps_op = (isvcd_rel_display_frame_op_t *) pv_api_op;
  322|       |
  323|      0|            ps_op->s_ivd_rel_display_frame_op_t.u4_error_code = 0;
  324|       |
  325|      0|            if((ps_ip->s_ivd_rel_display_frame_ip_t.u4_size !=
  ------------------
  |  Branch (325:16): [True: 0, False: 0]
  ------------------
  326|      0|                sizeof(isvcd_rel_display_frame_ip_t)) &&
  327|      0|               (ps_ip->s_ivd_rel_display_frame_ip_t.u4_size != sizeof(ivd_rel_display_frame_ip_t)))
  ------------------
  |  Branch (327:16): [True: 0, False: 0]
  ------------------
  328|      0|            {
  329|      0|                ps_op->s_ivd_rel_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  330|      0|                ps_op->s_ivd_rel_display_frame_op_t.u4_error_code |=
  331|      0|                    IVD_IP_API_STRUCT_SIZE_INCORRECT;
  332|      0|                return (IV_FAIL);
  333|      0|            }
  334|       |
  335|      0|            if((ps_op->s_ivd_rel_display_frame_op_t.u4_size !=
  ------------------
  |  Branch (335:16): [True: 0, False: 0]
  ------------------
  336|      0|                sizeof(isvcd_rel_display_frame_op_t)) &&
  337|      0|               (ps_op->s_ivd_rel_display_frame_op_t.u4_size != sizeof(ivd_rel_display_frame_op_t)))
  ------------------
  |  Branch (337:16): [True: 0, False: 0]
  ------------------
  338|      0|            {
  339|      0|                ps_op->s_ivd_rel_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  340|      0|                ps_op->s_ivd_rel_display_frame_op_t.u4_error_code |=
  341|      0|                    IVD_OP_API_STRUCT_SIZE_INCORRECT;
  342|      0|                return (IV_FAIL);
  343|      0|            }
  344|      0|        }
  345|      0|        break;
  346|       |
  347|      0|        case IVD_CMD_SET_DISPLAY_FRAME:
  ------------------
  |  Branch (347:9): [True: 0, False: 885k]
  ------------------
  348|      0|        {
  349|      0|            isvcd_set_display_frame_ip_t *ps_ip = (isvcd_set_display_frame_ip_t *) pv_api_ip;
  350|      0|            isvcd_set_display_frame_op_t *ps_op = (isvcd_set_display_frame_op_t *) pv_api_op;
  351|      0|            UWORD32 j;
  352|       |
  353|      0|            ps_op->s_ivd_set_display_frame_op_t.u4_error_code = 0;
  354|       |
  355|      0|            if((ps_ip->s_ivd_set_display_frame_ip_t.u4_size !=
  ------------------
  |  Branch (355:16): [True: 0, False: 0]
  ------------------
  356|      0|                sizeof(isvcd_set_display_frame_ip_t)) &&
  357|      0|               (ps_ip->s_ivd_set_display_frame_ip_t.u4_size != sizeof(ivd_set_display_frame_ip_t)))
  ------------------
  |  Branch (357:16): [True: 0, False: 0]
  ------------------
  358|      0|            {
  359|      0|                ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  360|      0|                ps_op->s_ivd_set_display_frame_op_t.u4_error_code |=
  361|      0|                    IVD_IP_API_STRUCT_SIZE_INCORRECT;
  362|      0|                return (IV_FAIL);
  363|      0|            }
  364|       |
  365|      0|            if((ps_op->s_ivd_set_display_frame_op_t.u4_size !=
  ------------------
  |  Branch (365:16): [True: 0, False: 0]
  ------------------
  366|      0|                sizeof(isvcd_set_display_frame_op_t)) &&
  367|      0|               (ps_op->s_ivd_set_display_frame_op_t.u4_size != sizeof(ivd_set_display_frame_op_t)))
  ------------------
  |  Branch (367:16): [True: 0, False: 0]
  ------------------
  368|      0|            {
  369|      0|                ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  370|      0|                ps_op->s_ivd_set_display_frame_op_t.u4_error_code |=
  371|      0|                    IVD_OP_API_STRUCT_SIZE_INCORRECT;
  372|      0|                return (IV_FAIL);
  373|      0|            }
  374|       |
  375|      0|            if(ps_ip->s_ivd_set_display_frame_ip_t.num_disp_bufs == 0)
  ------------------
  |  Branch (375:16): [True: 0, False: 0]
  ------------------
  376|      0|            {
  377|      0|                ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  378|      0|                ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= IVD_DISP_FRM_ZERO_OP_BUFS;
  379|      0|                return IV_FAIL;
  380|      0|            }
  381|       |
  382|      0|            for(j = 0; j < ps_ip->s_ivd_set_display_frame_ip_t.num_disp_bufs; j++)
  ------------------
  |  Branch (382:24): [True: 0, False: 0]
  ------------------
  383|      0|            {
  384|      0|                if(ps_ip->s_ivd_set_display_frame_ip_t.s_disp_buffer[j].u4_num_bufs == 0)
  ------------------
  |  Branch (384:20): [True: 0, False: 0]
  ------------------
  385|      0|                {
  386|      0|                    ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  387|      0|                    ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= IVD_DISP_FRM_ZERO_OP_BUFS;
  388|      0|                    return IV_FAIL;
  389|      0|                }
  390|       |
  391|      0|                for(i = 0; i < ps_ip->s_ivd_set_display_frame_ip_t.s_disp_buffer[j].u4_num_bufs;
  ------------------
  |  Branch (391:28): [True: 0, False: 0]
  ------------------
  392|      0|                    i++)
  393|      0|                {
  394|      0|                    if(ps_ip->s_ivd_set_display_frame_ip_t.s_disp_buffer[j].pu1_bufs[i] == NULL)
  ------------------
  |  Branch (394:24): [True: 0, False: 0]
  ------------------
  395|      0|                    {
  396|      0|                        ps_op->s_ivd_set_display_frame_op_t.u4_error_code |=
  397|      0|                            1 << IVD_UNSUPPORTEDPARAM;
  398|      0|                        ps_op->s_ivd_set_display_frame_op_t.u4_error_code |=
  399|      0|                            IVD_DISP_FRM_OP_BUF_NULL;
  400|      0|                        return IV_FAIL;
  401|      0|                    }
  402|       |
  403|      0|                    if(ps_ip->s_ivd_set_display_frame_ip_t.s_disp_buffer[j]
  ------------------
  |  Branch (403:24): [True: 0, False: 0]
  ------------------
  404|      0|                           .u4_min_out_buf_size[i] == 0)
  405|      0|                    {
  406|      0|                        ps_op->s_ivd_set_display_frame_op_t.u4_error_code |=
  407|      0|                            1 << IVD_UNSUPPORTEDPARAM;
  408|      0|                        ps_op->s_ivd_set_display_frame_op_t.u4_error_code |=
  409|      0|                            IVD_DISP_FRM_ZERO_OP_BUF_SIZE;
  410|      0|                        return IV_FAIL;
  411|      0|                    }
  412|      0|                }
  413|      0|            }
  414|      0|        }
  415|      0|        break;
  416|       |
  417|   723k|        case IVD_CMD_VIDEO_DECODE:
  ------------------
  |  Branch (417:9): [True: 723k, False: 162k]
  ------------------
  418|   723k|        {
  419|   723k|            isvcd_video_decode_ip_t *ps_ip = (isvcd_video_decode_ip_t *) pv_api_ip;
  420|   723k|            isvcd_video_decode_op_t *ps_op = (isvcd_video_decode_op_t *) pv_api_op;
  421|       |
  422|   723k|            H264_DEC_DEBUG_PRINT("The input bytes is: %d",
  ------------------
  |  |   39|   723k|#define H264_DEC_DEBUG_PRINT(...) {}
  ------------------
  423|   723k|                                 ps_ip->s_ivd_video_decode_ip_t.u4_num_Bytes);
  424|   723k|            ps_op->s_ivd_video_decode_op_t.u4_error_code = 0;
  425|       |
  426|   723k|            if(ps_ip->s_ivd_video_decode_ip_t.u4_size != sizeof(isvcd_video_decode_ip_t) &&
  ------------------
  |  Branch (426:16): [True: 0, False: 723k]
  ------------------
  427|      0|               ps_ip->s_ivd_video_decode_ip_t.u4_size !=
  ------------------
  |  Branch (427:16): [True: 0, False: 0]
  ------------------
  428|      0|                   offsetof(ivd_video_decode_ip_t, s_out_buffer))
  429|      0|            {
  430|      0|                ps_op->s_ivd_video_decode_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  431|      0|                ps_op->s_ivd_video_decode_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT;
  432|      0|                return (IV_FAIL);
  433|      0|            }
  434|       |
  435|   723k|            if(ps_op->s_ivd_video_decode_op_t.u4_size != sizeof(isvcd_video_decode_op_t) &&
  ------------------
  |  Branch (435:16): [True: 0, False: 723k]
  ------------------
  436|      0|               ps_op->s_ivd_video_decode_op_t.u4_size !=
  ------------------
  |  Branch (436:16): [True: 0, False: 0]
  ------------------
  437|      0|                   offsetof(ivd_video_decode_op_t, u4_output_present))
  438|      0|            {
  439|      0|                ps_op->s_ivd_video_decode_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  440|      0|                ps_op->s_ivd_video_decode_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT;
  441|      0|                return (IV_FAIL);
  442|      0|            }
  443|   723k|            {
  444|   723k|                svc_dec_ctxt_t *ps_svcd_ctxt;
  445|   723k|                svc_dec_lyr_struct_t *ps_svc_lyr_dec;
  446|   723k|                dec_struct_t *ps_dec;
  447|   723k|                ps_svcd_ctxt = (svc_dec_ctxt_t *) (ps_handle->pv_codec_handle);
  448|   723k|                ps_svc_lyr_dec = &ps_svcd_ctxt->ps_svc_dec_lyr[0];
  449|   723k|                ps_dec = &ps_svc_lyr_dec->s_dec;
  450|   723k|                if(ps_dec->u1_enable_mb_info)
  ------------------
  |  Branch (450:20): [True: 0, False: 723k]
  ------------------
  451|      0|                {
  452|      0|                    if(!ps_ip->pu1_8x8_blk_qp_map && !ps_ip->pu1_8x8_blk_type_map)
  ------------------
  |  Branch (452:24): [True: 0, False: 0]
  |  Branch (452:54): [True: 0, False: 0]
  ------------------
  453|      0|                    {
  454|      0|                        ps_op->s_ivd_video_decode_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  455|      0|                        ps_op->s_ivd_video_decode_op_t.u4_error_code |=
  456|      0|                            IH264D_FRAME_INFO_OP_BUF_NULL;
  457|      0|                        return IV_FAIL;
  458|      0|                    }
  459|      0|                }
  460|   723k|            }
  461|   723k|        }
  462|   723k|        break;
  463|       |
  464|   723k|        case IVD_CMD_DELETE:
  ------------------
  |  Branch (464:9): [True: 22.5k, False: 863k]
  ------------------
  465|  22.5k|        {
  466|  22.5k|            isvcd_delete_ip_t *ps_ip = (isvcd_delete_ip_t *) pv_api_ip;
  467|  22.5k|            isvcd_delete_op_t *ps_op = (isvcd_delete_op_t *) pv_api_op;
  468|       |
  469|  22.5k|            ps_op->s_ivd_delete_op_t.u4_error_code = 0;
  470|       |
  471|  22.5k|            if(ps_ip->s_ivd_delete_ip_t.u4_size != sizeof(isvcd_delete_ip_t))
  ------------------
  |  Branch (471:16): [True: 0, False: 22.5k]
  ------------------
  472|      0|            {
  473|      0|                ps_op->s_ivd_delete_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  474|      0|                ps_op->s_ivd_delete_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT;
  475|      0|                return (IV_FAIL);
  476|      0|            }
  477|       |
  478|  22.5k|            if(ps_op->s_ivd_delete_op_t.u4_size != sizeof(isvcd_delete_op_t))
  ------------------
  |  Branch (478:16): [True: 0, False: 22.5k]
  ------------------
  479|      0|            {
  480|      0|                ps_op->s_ivd_delete_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  481|      0|                ps_op->s_ivd_delete_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT;
  482|      0|                return (IV_FAIL);
  483|      0|            }
  484|  22.5k|        }
  485|  22.5k|        break;
  486|       |
  487|   117k|        case IVD_CMD_VIDEO_CTL:
  ------------------
  |  Branch (487:9): [True: 117k, False: 768k]
  ------------------
  488|   117k|        {
  489|   117k|            UWORD32 *pu4_ptr_cmd;
  490|   117k|            UWORD32 sub_command;
  491|       |
  492|   117k|            pu4_ptr_cmd = (UWORD32 *) pv_api_ip;
  493|   117k|            pu4_ptr_cmd += 2;
  494|   117k|            sub_command = *pu4_ptr_cmd;
  495|       |
  496|   117k|            switch(sub_command)
  497|   117k|            {
  498|  45.1k|                case IVD_CMD_CTL_SETPARAMS:
  ------------------
  |  Branch (498:17): [True: 45.1k, False: 71.9k]
  ------------------
  499|  45.1k|                {
  500|  45.1k|                    isvcd_ctl_set_config_ip_t *ps_ip;
  501|  45.1k|                    isvcd_ctl_set_config_op_t *ps_op;
  502|  45.1k|                    ps_ip = (isvcd_ctl_set_config_ip_t *) pv_api_ip;
  503|  45.1k|                    ps_op = (isvcd_ctl_set_config_op_t *) pv_api_op;
  504|       |
  505|  45.1k|                    if(ps_ip->s_ivd_ctl_set_config_ip_t.u4_size !=
  ------------------
  |  Branch (505:24): [True: 0, False: 45.1k]
  ------------------
  506|  45.1k|                       sizeof(isvcd_ctl_set_config_ip_t))
  507|      0|                    {
  508|      0|                        ps_op->s_ivd_ctl_set_config_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  509|      0|                        ps_op->s_ivd_ctl_set_config_op_t.u4_error_code |=
  510|      0|                            IVD_IP_API_STRUCT_SIZE_INCORRECT;
  511|      0|                        return IV_FAIL;
  512|      0|                    }
  513|  45.1k|                }
  514|  45.1k|                break;
  515|       |
  516|  45.1k|                case IVD_CMD_CTL_SETDEFAULT:
  ------------------
  |  Branch (516:17): [True: 0, False: 117k]
  ------------------
  517|      0|                {
  518|      0|                    isvcd_ctl_set_config_op_t *ps_op;
  519|      0|                    ps_op = (isvcd_ctl_set_config_op_t *) pv_api_op;
  520|      0|                    if(ps_op->s_ivd_ctl_set_config_op_t.u4_size !=
  ------------------
  |  Branch (520:24): [True: 0, False: 0]
  ------------------
  521|      0|                       sizeof(isvcd_ctl_set_config_op_t))
  522|      0|                    {
  523|      0|                        ps_op->s_ivd_ctl_set_config_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  524|      0|                        ps_op->s_ivd_ctl_set_config_op_t.u4_error_code |=
  525|      0|                            IVD_OP_API_STRUCT_SIZE_INCORRECT;
  526|      0|                        return IV_FAIL;
  527|      0|                    }
  528|      0|                }
  529|      0|                break;
  530|       |
  531|      0|                case IVD_CMD_CTL_GETPARAMS:
  ------------------
  |  Branch (531:17): [True: 0, False: 117k]
  ------------------
  532|      0|                {
  533|      0|                    isvcd_ctl_getstatus_ip_t *ps_ip;
  534|      0|                    isvcd_ctl_getstatus_op_t *ps_op;
  535|       |
  536|      0|                    ps_ip = (isvcd_ctl_getstatus_ip_t *) pv_api_ip;
  537|      0|                    ps_op = (isvcd_ctl_getstatus_op_t *) pv_api_op;
  538|      0|                    if(ps_ip->s_ivd_ctl_getstatus_ip_t.u4_size != sizeof(isvcd_ctl_getstatus_ip_t))
  ------------------
  |  Branch (538:24): [True: 0, False: 0]
  ------------------
  539|      0|                    {
  540|      0|                        ps_op->s_ivd_ctl_getstatus_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  541|      0|                        ps_op->s_ivd_ctl_getstatus_op_t.u4_error_code |=
  542|      0|                            IVD_IP_API_STRUCT_SIZE_INCORRECT;
  543|      0|                        return IV_FAIL;
  544|      0|                    }
  545|      0|                    if(ps_op->s_ivd_ctl_getstatus_op_t.u4_size != sizeof(isvcd_ctl_getstatus_op_t))
  ------------------
  |  Branch (545:24): [True: 0, False: 0]
  ------------------
  546|      0|                    {
  547|      0|                        ps_op->s_ivd_ctl_getstatus_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  548|      0|                        ps_op->s_ivd_ctl_getstatus_op_t.u4_error_code |=
  549|      0|                            IVD_OP_API_STRUCT_SIZE_INCORRECT;
  550|      0|                        return IV_FAIL;
  551|      0|                    }
  552|      0|                }
  553|      0|                break;
  554|       |
  555|      0|                case IVD_CMD_CTL_GETBUFINFO:
  ------------------
  |  Branch (555:17): [True: 0, False: 117k]
  ------------------
  556|      0|                {
  557|      0|                    isvcd_ctl_getbufinfo_ip_t *ps_ip;
  558|      0|                    isvcd_ctl_getbufinfo_op_t *ps_op;
  559|      0|                    ps_ip = (isvcd_ctl_getbufinfo_ip_t *) pv_api_ip;
  560|      0|                    ps_op = (isvcd_ctl_getbufinfo_op_t *) pv_api_op;
  561|       |
  562|      0|                    if(ps_ip->s_ivd_ctl_getbufinfo_ip_t.u4_size !=
  ------------------
  |  Branch (562:24): [True: 0, False: 0]
  ------------------
  563|      0|                       sizeof(isvcd_ctl_getbufinfo_ip_t))
  564|      0|                    {
  565|      0|                        ps_op->s_ivd_ctl_getbufinfo_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  566|      0|                        ps_op->s_ivd_ctl_getbufinfo_op_t.u4_error_code |=
  567|      0|                            IVD_IP_API_STRUCT_SIZE_INCORRECT;
  568|      0|                        return IV_FAIL;
  569|      0|                    }
  570|      0|                    if(ps_op->s_ivd_ctl_getbufinfo_op_t.u4_size !=
  ------------------
  |  Branch (570:24): [True: 0, False: 0]
  ------------------
  571|      0|                       sizeof(isvcd_ctl_getbufinfo_op_t))
  572|      0|                    {
  573|      0|                        ps_op->s_ivd_ctl_getbufinfo_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  574|      0|                        ps_op->s_ivd_ctl_getbufinfo_op_t.u4_error_code |=
  575|      0|                            IVD_OP_API_STRUCT_SIZE_INCORRECT;
  576|      0|                        return IV_FAIL;
  577|      0|                    }
  578|      0|                }
  579|      0|                break;
  580|       |
  581|      0|                case IVD_CMD_CTL_GETVERSION:
  ------------------
  |  Branch (581:17): [True: 0, False: 117k]
  ------------------
  582|      0|                {
  583|      0|                    isvcd_ctl_getversioninfo_ip_t *ps_ip;
  584|      0|                    isvcd_ctl_getversioninfo_op_t *ps_op;
  585|      0|                    ps_ip = (isvcd_ctl_getversioninfo_ip_t *) pv_api_ip;
  586|      0|                    ps_op = (isvcd_ctl_getversioninfo_op_t *) pv_api_op;
  587|      0|                    if(ps_ip->s_ivd_ctl_getversioninfo_ip_t.u4_size !=
  ------------------
  |  Branch (587:24): [True: 0, False: 0]
  ------------------
  588|      0|                       sizeof(isvcd_ctl_getversioninfo_ip_t))
  589|      0|                    {
  590|      0|                        ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code |=
  591|      0|                            1 << IVD_UNSUPPORTEDPARAM;
  592|      0|                        ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code |=
  593|      0|                            IVD_IP_API_STRUCT_SIZE_INCORRECT;
  594|      0|                        return IV_FAIL;
  595|      0|                    }
  596|      0|                    if(ps_op->s_ivd_ctl_getversioninfo_op_t.u4_size !=
  ------------------
  |  Branch (596:24): [True: 0, False: 0]
  ------------------
  597|      0|                       sizeof(isvcd_ctl_getversioninfo_op_t))
  598|      0|                    {
  599|      0|                        ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code |=
  600|      0|                            1 << IVD_UNSUPPORTEDPARAM;
  601|      0|                        ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code |=
  602|      0|                            IVD_OP_API_STRUCT_SIZE_INCORRECT;
  603|      0|                        return IV_FAIL;
  604|      0|                    }
  605|      0|                }
  606|      0|                break;
  607|       |
  608|      0|                case IVD_CMD_CTL_FLUSH:
  ------------------
  |  Branch (608:17): [True: 0, False: 117k]
  ------------------
  609|      0|                {
  610|      0|                    isvcd_ctl_flush_ip_t *ps_ip;
  611|      0|                    isvcd_ctl_flush_op_t *ps_op;
  612|      0|                    ps_ip = (isvcd_ctl_flush_ip_t *) pv_api_ip;
  613|      0|                    ps_op = (isvcd_ctl_flush_op_t *) pv_api_op;
  614|      0|                    if(ps_ip->s_ivd_ctl_flush_ip_t.u4_size != sizeof(isvcd_ctl_flush_ip_t))
  ------------------
  |  Branch (614:24): [True: 0, False: 0]
  ------------------
  615|      0|                    {
  616|      0|                        ps_op->s_ivd_ctl_flush_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  617|      0|                        ps_op->s_ivd_ctl_flush_op_t.u4_error_code |=
  618|      0|                            IVD_IP_API_STRUCT_SIZE_INCORRECT;
  619|      0|                        return IV_FAIL;
  620|      0|                    }
  621|      0|                    if(ps_op->s_ivd_ctl_flush_op_t.u4_size != sizeof(isvcd_ctl_flush_op_t))
  ------------------
  |  Branch (621:24): [True: 0, False: 0]
  ------------------
  622|      0|                    {
  623|      0|                        ps_op->s_ivd_ctl_flush_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  624|      0|                        ps_op->s_ivd_ctl_flush_op_t.u4_error_code |=
  625|      0|                            IVD_OP_API_STRUCT_SIZE_INCORRECT;
  626|      0|                        return IV_FAIL;
  627|      0|                    }
  628|      0|                }
  629|      0|                break;
  630|       |
  631|  4.14k|                case IVD_CMD_CTL_RESET:
  ------------------
  |  Branch (631:17): [True: 4.14k, False: 112k]
  ------------------
  632|  4.14k|                {
  633|  4.14k|                    isvcd_ctl_reset_ip_t *ps_ip;
  634|  4.14k|                    isvcd_ctl_reset_op_t *ps_op;
  635|  4.14k|                    ps_ip = (isvcd_ctl_reset_ip_t *) pv_api_ip;
  636|  4.14k|                    ps_op = (isvcd_ctl_reset_op_t *) pv_api_op;
  637|  4.14k|                    if(ps_ip->s_ivd_ctl_reset_ip_t.u4_size != sizeof(isvcd_ctl_reset_ip_t))
  ------------------
  |  Branch (637:24): [True: 0, False: 4.14k]
  ------------------
  638|      0|                    {
  639|      0|                        ps_op->s_ivd_ctl_reset_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  640|      0|                        ps_op->s_ivd_ctl_reset_op_t.u4_error_code |=
  641|      0|                            IVD_IP_API_STRUCT_SIZE_INCORRECT;
  642|      0|                        return IV_FAIL;
  643|      0|                    }
  644|  4.14k|                    if(ps_op->s_ivd_ctl_reset_op_t.u4_size != sizeof(isvcd_ctl_reset_op_t))
  ------------------
  |  Branch (644:24): [True: 0, False: 4.14k]
  ------------------
  645|      0|                    {
  646|      0|                        ps_op->s_ivd_ctl_reset_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  647|      0|                        ps_op->s_ivd_ctl_reset_op_t.u4_error_code |=
  648|      0|                            IVD_OP_API_STRUCT_SIZE_INCORRECT;
  649|      0|                        return IV_FAIL;
  650|      0|                    }
  651|  4.14k|                }
  652|  4.14k|                break;
  653|       |
  654|  4.14k|                case IH264D_CMD_CTL_DEGRADE:
  ------------------
  |  Branch (654:17): [True: 0, False: 117k]
  ------------------
  655|      0|                {
  656|      0|                    isvcd_ctl_degrade_ip_t *ps_ip;
  657|      0|                    isvcd_ctl_degrade_op_t *ps_op;
  658|       |
  659|      0|                    ps_ip = (isvcd_ctl_degrade_ip_t *) pv_api_ip;
  660|      0|                    ps_op = (isvcd_ctl_degrade_op_t *) pv_api_op;
  661|       |
  662|      0|                    if(ps_ip->u4_size != sizeof(isvcd_ctl_degrade_ip_t))
  ------------------
  |  Branch (662:24): [True: 0, False: 0]
  ------------------
  663|      0|                    {
  664|      0|                        ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  665|      0|                        ps_op->u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT;
  666|      0|                        return IV_FAIL;
  667|      0|                    }
  668|       |
  669|      0|                    if(ps_op->u4_size != sizeof(isvcd_ctl_degrade_op_t))
  ------------------
  |  Branch (669:24): [True: 0, False: 0]
  ------------------
  670|      0|                    {
  671|      0|                        ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  672|      0|                        ps_op->u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT;
  673|      0|                        return IV_FAIL;
  674|      0|                    }
  675|       |
  676|      0|                    if((ps_ip->i4_degrade_pics < 0) || (ps_ip->i4_degrade_pics > 4) ||
  ------------------
  |  Branch (676:24): [True: 0, False: 0]
  |  Branch (676:56): [True: 0, False: 0]
  ------------------
  677|      0|                       (ps_ip->i4_nondegrade_interval < 0) || (ps_ip->i4_degrade_type < 0) ||
  ------------------
  |  Branch (677:24): [True: 0, False: 0]
  |  Branch (677:63): [True: 0, False: 0]
  ------------------
  678|      0|                       (ps_ip->i4_degrade_type > 15))
  ------------------
  |  Branch (678:24): [True: 0, False: 0]
  ------------------
  679|      0|                    {
  680|      0|                        ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  681|      0|                        return IV_FAIL;
  682|      0|                    }
  683|       |
  684|      0|                    break;
  685|      0|                }
  686|       |
  687|      0|                case IH264D_CMD_CTL_GET_BUFFER_DIMENSIONS:
  ------------------
  |  Branch (687:17): [True: 0, False: 117k]
  ------------------
  688|      0|                {
  689|      0|                    isvcd_ctl_get_frame_dimensions_ip_t *ps_ip;
  690|      0|                    isvcd_ctl_get_frame_dimensions_op_t *ps_op;
  691|       |
  692|      0|                    ps_ip = (isvcd_ctl_get_frame_dimensions_ip_t *) pv_api_ip;
  693|      0|                    ps_op = (isvcd_ctl_get_frame_dimensions_op_t *) pv_api_op;
  694|       |
  695|      0|                    if(ps_ip->u4_size != sizeof(isvcd_ctl_get_frame_dimensions_ip_t))
  ------------------
  |  Branch (695:24): [True: 0, False: 0]
  ------------------
  696|      0|                    {
  697|      0|                        ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  698|      0|                        ps_op->u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT;
  699|      0|                        return IV_FAIL;
  700|      0|                    }
  701|       |
  702|      0|                    if(ps_op->u4_size != sizeof(isvcd_ctl_get_frame_dimensions_op_t))
  ------------------
  |  Branch (702:24): [True: 0, False: 0]
  ------------------
  703|      0|                    {
  704|      0|                        ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  705|      0|                        ps_op->u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT;
  706|      0|                        return IV_FAIL;
  707|      0|                    }
  708|       |
  709|      0|                    break;
  710|      0|                }
  711|      0|                case IH264D_CMD_CTL_GET_VUI_PARAMS:
  ------------------
  |  Branch (711:17): [True: 0, False: 117k]
  ------------------
  712|      0|                {
  713|      0|                    isvcd_ctl_get_vui_params_ip_t *ps_ip;
  714|      0|                    isvcd_ctl_get_vui_params_op_t *ps_op;
  715|       |
  716|      0|                    ps_ip = (isvcd_ctl_get_vui_params_ip_t *) pv_api_ip;
  717|      0|                    ps_op = (isvcd_ctl_get_vui_params_op_t *) pv_api_op;
  718|       |
  719|      0|                    if(ps_ip->u4_size != sizeof(isvcd_ctl_get_vui_params_ip_t))
  ------------------
  |  Branch (719:24): [True: 0, False: 0]
  ------------------
  720|      0|                    {
  721|      0|                        ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  722|      0|                        ps_op->u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT;
  723|      0|                        return IV_FAIL;
  724|      0|                    }
  725|       |
  726|      0|                    if(ps_op->u4_size != sizeof(isvcd_ctl_get_vui_params_op_t))
  ------------------
  |  Branch (726:24): [True: 0, False: 0]
  ------------------
  727|      0|                    {
  728|      0|                        ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  729|      0|                        ps_op->u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT;
  730|      0|                        return IV_FAIL;
  731|      0|                    }
  732|       |
  733|      0|                    break;
  734|      0|                }
  735|      0|                case IH264D_CMD_CTL_GET_SEI_MDCV_PARAMS:
  ------------------
  |  Branch (735:17): [True: 0, False: 117k]
  ------------------
  736|      0|                {
  737|      0|                    isvcd_ctl_get_sei_mdcv_params_ip_t *ps_ip;
  738|      0|                    isvcd_ctl_get_sei_mdcv_params_op_t *ps_op;
  739|       |
  740|      0|                    ps_ip = (isvcd_ctl_get_sei_mdcv_params_ip_t *) pv_api_ip;
  741|      0|                    ps_op = (isvcd_ctl_get_sei_mdcv_params_op_t *) pv_api_op;
  742|       |
  743|      0|                    if(ps_ip->u4_size != sizeof(isvcd_ctl_get_sei_mdcv_params_ip_t))
  ------------------
  |  Branch (743:24): [True: 0, False: 0]
  ------------------
  744|      0|                    {
  745|      0|                        ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  746|      0|                        ps_op->u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT;
  747|      0|                        return IV_FAIL;
  748|      0|                    }
  749|       |
  750|      0|                    if(ps_op->u4_size != sizeof(isvcd_ctl_get_sei_mdcv_params_op_t))
  ------------------
  |  Branch (750:24): [True: 0, False: 0]
  ------------------
  751|      0|                    {
  752|      0|                        ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  753|      0|                        ps_op->u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT;
  754|      0|                        return IV_FAIL;
  755|      0|                    }
  756|       |
  757|      0|                    break;
  758|      0|                }
  759|       |
  760|      0|                case IH264D_CMD_CTL_GET_SEI_CLL_PARAMS:
  ------------------
  |  Branch (760:17): [True: 0, False: 117k]
  ------------------
  761|      0|                {
  762|      0|                    isvcd_ctl_get_sei_cll_params_ip_t *ps_ip;
  763|      0|                    isvcd_ctl_get_sei_cll_params_op_t *ps_op;
  764|       |
  765|      0|                    ps_ip = (isvcd_ctl_get_sei_cll_params_ip_t *) pv_api_ip;
  766|      0|                    ps_op = (isvcd_ctl_get_sei_cll_params_op_t *) pv_api_op;
  767|       |
  768|      0|                    if(ps_ip->u4_size != sizeof(isvcd_ctl_get_sei_cll_params_ip_t))
  ------------------
  |  Branch (768:24): [True: 0, False: 0]
  ------------------
  769|      0|                    {
  770|      0|                        ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  771|      0|                        ps_op->u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT;
  772|      0|                        return IV_FAIL;
  773|      0|                    }
  774|       |
  775|      0|                    if(ps_op->u4_size != sizeof(isvcd_ctl_get_sei_cll_params_op_t))
  ------------------
  |  Branch (775:24): [True: 0, False: 0]
  ------------------
  776|      0|                    {
  777|      0|                        ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  778|      0|                        ps_op->u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT;
  779|      0|                        return IV_FAIL;
  780|      0|                    }
  781|       |
  782|      0|                    break;
  783|      0|                }
  784|       |
  785|      0|                case IH264D_CMD_CTL_GET_SEI_AVE_PARAMS:
  ------------------
  |  Branch (785:17): [True: 0, False: 117k]
  ------------------
  786|      0|                {
  787|      0|                    isvcd_ctl_get_sei_ave_params_ip_t *ps_ip;
  788|      0|                    isvcd_ctl_get_sei_ave_params_op_t *ps_op;
  789|       |
  790|      0|                    ps_ip = (isvcd_ctl_get_sei_ave_params_ip_t *) pv_api_ip;
  791|      0|                    ps_op = (isvcd_ctl_get_sei_ave_params_op_t *) pv_api_op;
  792|       |
  793|      0|                    if(ps_ip->u4_size != sizeof(isvcd_ctl_get_sei_ave_params_ip_t))
  ------------------
  |  Branch (793:24): [True: 0, False: 0]
  ------------------
  794|      0|                    {
  795|      0|                        ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  796|      0|                        ps_op->u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT;
  797|      0|                        return IV_FAIL;
  798|      0|                    }
  799|       |
  800|      0|                    if(ps_op->u4_size != sizeof(isvcd_ctl_get_sei_ave_params_op_t))
  ------------------
  |  Branch (800:24): [True: 0, False: 0]
  ------------------
  801|      0|                    {
  802|      0|                        ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  803|      0|                        ps_op->u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT;
  804|      0|                        return IV_FAIL;
  805|      0|                    }
  806|       |
  807|      0|                    break;
  808|      0|                }
  809|       |
  810|      0|                case IH264D_CMD_CTL_GET_SEI_CCV_PARAMS:
  ------------------
  |  Branch (810:17): [True: 0, False: 117k]
  ------------------
  811|      0|                {
  812|      0|                    isvcd_ctl_get_sei_ccv_params_ip_t *ps_ip;
  813|      0|                    isvcd_ctl_get_sei_ccv_params_op_t *ps_op;
  814|       |
  815|      0|                    ps_ip = (isvcd_ctl_get_sei_ccv_params_ip_t *) pv_api_ip;
  816|      0|                    ps_op = (isvcd_ctl_get_sei_ccv_params_op_t *) pv_api_op;
  817|       |
  818|      0|                    if(ps_ip->u4_size != sizeof(isvcd_ctl_get_sei_ccv_params_ip_t))
  ------------------
  |  Branch (818:24): [True: 0, False: 0]
  ------------------
  819|      0|                    {
  820|      0|                        ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  821|      0|                        ps_op->u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT;
  822|      0|                        return IV_FAIL;
  823|      0|                    }
  824|       |
  825|      0|                    if(ps_op->u4_size != sizeof(isvcd_ctl_get_sei_ccv_params_op_t))
  ------------------
  |  Branch (825:24): [True: 0, False: 0]
  ------------------
  826|      0|                    {
  827|      0|                        ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  828|      0|                        ps_op->u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT;
  829|      0|                        return IV_FAIL;
  830|      0|                    }
  831|       |
  832|      0|                    break;
  833|      0|                }
  834|       |
  835|  22.5k|                case IH264D_CMD_CTL_SET_NUM_CORES:
  ------------------
  |  Branch (835:17): [True: 22.5k, False: 94.5k]
  ------------------
  836|  22.5k|                {
  837|  22.5k|                    isvcd_ctl_set_num_cores_ip_t *ps_ip;
  838|  22.5k|                    isvcd_ctl_set_num_cores_op_t *ps_op;
  839|       |
  840|  22.5k|                    ps_ip = (isvcd_ctl_set_num_cores_ip_t *) pv_api_ip;
  841|  22.5k|                    ps_op = (isvcd_ctl_set_num_cores_op_t *) pv_api_op;
  842|       |
  843|  22.5k|                    if(ps_ip->u4_size != sizeof(isvcd_ctl_set_num_cores_ip_t))
  ------------------
  |  Branch (843:24): [True: 0, False: 22.5k]
  ------------------
  844|      0|                    {
  845|      0|                        ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  846|      0|                        ps_op->u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT;
  847|      0|                        return IV_FAIL;
  848|      0|                    }
  849|       |
  850|  22.5k|                    if(ps_op->u4_size != sizeof(isvcd_ctl_set_num_cores_op_t))
  ------------------
  |  Branch (850:24): [True: 0, False: 22.5k]
  ------------------
  851|      0|                    {
  852|      0|                        ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  853|      0|                        ps_op->u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT;
  854|      0|                        return IV_FAIL;
  855|      0|                    }
  856|       |
  857|  22.5k|                    if((ps_ip->u4_num_cores != 1) && (ps_ip->u4_num_cores != 2) &&
  ------------------
  |  Branch (857:24): [True: 11.3k, False: 11.2k]
  |  Branch (857:54): [True: 7.53k, False: 3.77k]
  ------------------
  858|  7.53k|                       (ps_ip->u4_num_cores != 3) && (ps_ip->u4_num_cores != 4))
  ------------------
  |  Branch (858:24): [True: 5.77k, False: 1.75k]
  |  Branch (858:54): [True: 0, False: 5.77k]
  ------------------
  859|      0|                    {
  860|      0|                        ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  861|      0|                        return IV_FAIL;
  862|      0|                    }
  863|  22.5k|                    break;
  864|  22.5k|                }
  865|  22.5k|                case IH264D_CMD_CTL_SET_PROCESSOR:
  ------------------
  |  Branch (865:17): [True: 22.5k, False: 94.5k]
  ------------------
  866|  22.5k|                {
  867|  22.5k|                    isvcd_ctl_set_processor_ip_t *ps_ip;
  868|  22.5k|                    isvcd_ctl_set_processor_op_t *ps_op;
  869|       |
  870|  22.5k|                    ps_ip = (isvcd_ctl_set_processor_ip_t *) pv_api_ip;
  871|  22.5k|                    ps_op = (isvcd_ctl_set_processor_op_t *) pv_api_op;
  872|       |
  873|  22.5k|                    if(ps_ip->u4_size != sizeof(isvcd_ctl_set_processor_ip_t))
  ------------------
  |  Branch (873:24): [True: 0, False: 22.5k]
  ------------------
  874|      0|                    {
  875|      0|                        ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  876|      0|                        ps_op->u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT;
  877|      0|                        return IV_FAIL;
  878|      0|                    }
  879|       |
  880|  22.5k|                    if(ps_op->u4_size != sizeof(isvcd_ctl_set_processor_op_t))
  ------------------
  |  Branch (880:24): [True: 0, False: 22.5k]
  ------------------
  881|      0|                    {
  882|      0|                        ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  883|      0|                        ps_op->u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT;
  884|      0|                        return IV_FAIL;
  885|      0|                    }
  886|       |
  887|  22.5k|                    break;
  888|  22.5k|                }
  889|       |
  890|  22.5k|                case ISVCD_CMD_CTL_SET_TGT_LAYER:
  ------------------
  |  Branch (890:17): [True: 22.5k, False: 94.5k]
  ------------------
  891|  22.5k|                {
  892|  22.5k|                    isvcd_set_target_layer_ip_t *ps_ip;
  893|  22.5k|                    isvcd_set_target_layer_op_t *ps_op;
  894|       |
  895|  22.5k|                    ps_ip = (isvcd_set_target_layer_ip_t *) pv_api_ip;
  896|  22.5k|                    ps_op = (isvcd_set_target_layer_op_t *) pv_api_op;
  897|       |
  898|  22.5k|                    if(ps_ip->u4_size != sizeof(isvcd_set_target_layer_ip_t))
  ------------------
  |  Branch (898:24): [True: 0, False: 22.5k]
  ------------------
  899|      0|                    {
  900|      0|                        ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  901|      0|                        ps_op->u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT;
  902|      0|                        return IV_FAIL;
  903|      0|                    }
  904|       |
  905|  22.5k|                    if(ps_ip->u1_tgt_dep_id > MAX_DEPENDENCY_ID)
  ------------------
  |  |  103|  22.5k|#define MAX_DEPENDENCY_ID 4
  ------------------
  |  Branch (905:24): [True: 0, False: 22.5k]
  ------------------
  906|      0|                    {
  907|      0|                        ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  908|      0|                        return IV_FAIL;
  909|      0|                    }
  910|       |
  911|  22.5k|                    if(ps_ip->u1_tgt_temp_id > MAX_TEMPORAL_ID)
  ------------------
  |  |  104|  22.5k|#define MAX_TEMPORAL_ID 7
  ------------------
  |  Branch (911:24): [True: 0, False: 22.5k]
  ------------------
  912|      0|                    {
  913|      0|                        ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  914|      0|                        return IV_FAIL;
  915|      0|                    }
  916|       |
  917|  22.5k|                    if(ps_ip->u1_tgt_quality_id > MAX_QUALITY_ID)
  ------------------
  |  |  102|  22.5k|#define MAX_QUALITY_ID 0
  ------------------
  |  Branch (917:24): [True: 0, False: 22.5k]
  ------------------
  918|      0|                    {
  919|      0|                        ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  920|      0|                        return IV_FAIL;
  921|      0|                    }
  922|       |
  923|  22.5k|                    if(ps_ip->u1_tgt_priority_id > MAX_PRIORITY_ID)
  ------------------
  |  |  105|  22.5k|#define MAX_PRIORITY_ID 63
  ------------------
  |  Branch (923:24): [True: 0, False: 22.5k]
  ------------------
  924|      0|                    {
  925|      0|                        ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  926|      0|                        return IV_FAIL;
  927|      0|                    }
  928|       |
  929|  22.5k|                    if(ps_op->u4_size != sizeof(isvcd_set_target_layer_op_t))
  ------------------
  |  Branch (929:24): [True: 0, False: 22.5k]
  ------------------
  930|      0|                    {
  931|      0|                        ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
  932|      0|                        ps_op->u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT;
  933|      0|                        return IV_FAIL;
  934|      0|                    }
  935|       |
  936|  22.5k|                    break;
  937|  22.5k|                }
  938|       |
  939|  22.5k|                default:
  ------------------
  |  Branch (939:17): [True: 0, False: 117k]
  ------------------
  940|      0|                    *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM;
  941|      0|                    *(pu4_api_op + 1) |= IVD_UNSUPPORTED_API_CMD;
  942|      0|                    return IV_FAIL;
  943|      0|                    break;
  944|   117k|            }
  945|   117k|        }
  946|   117k|        break;
  947|   885k|    }
  948|       |
  949|   885k|    return IV_SUCCESS;
  950|   885k|}

isvcd_init_cabac_contexts:
   77|  6.23k|{
   78|  6.23k|    bin_ctxt_model_t *p_cabac_ctxt_table_t = ps_dec->p_cabac_ctxt_table_t;
   79|  6.23k|    UWORD8 u1_qp_y = ps_dec->ps_cur_slice->u1_slice_qp;
   80|  6.23k|    UWORD8 u1_cabac_init_Idc = 0;
   81|       |
   82|  6.23k|    if(I_SLICE != u1_slice_type)
  ------------------
  |  |  370|  6.23k|#define I_SLICE  2
  ------------------
  |  Branch (82:8): [True: 5.06k, False: 1.16k]
  ------------------
   83|  5.06k|    {
   84|  5.06k|        u1_cabac_init_Idc = ps_dec->ps_cur_slice->u1_cabac_init_idc;
   85|  5.06k|    }
   86|       |
   87|  6.23k|    {
   88|       |        /* MAKING ps_dec->p_ctxt_inc_mb_map a scratch buffer */
   89|       |        /* 0th entry of CtxtIncMbMap will be always be containing default values
   90|       |         for CABAC context representing MB not available */
   91|  6.23k|        ctxt_inc_mb_info_t *p_DefCtxt = ps_dec->p_ctxt_inc_mb_map - 1;
   92|  6.23k|        UWORD8 *pu1_temp;
   93|  6.23k|        WORD8 i;
   94|  6.23k|        p_DefCtxt->u1_mb_type = CAB_SKIP;
  ------------------
  |  |  402|  6.23k|#define CAB_SKIP          0x10 /* 0001 0000 */
  ------------------
   95|  6.23k|        p_DefCtxt->u1_cbp = 0x0f;
   96|  6.23k|        p_DefCtxt->u1_intra_chroma_pred_mode = 0;
   97|  6.23k|        p_DefCtxt->u1_yuv_dc_csbp = 0x7;
   98|  6.23k|        p_DefCtxt->u1_transform8x8_ctxt = 0;
   99|       |
  100|  6.23k|        pu1_temp = (UWORD8 *) p_DefCtxt->i1_ref_idx;
  101|  31.1k|        for(i = 0; i < 4; i++, pu1_temp++) (*pu1_temp) = 0;
  ------------------
  |  Branch (101:20): [True: 24.9k, False: 6.23k]
  ------------------
  102|  6.23k|        pu1_temp = (UWORD8 *) p_DefCtxt->u1_mv;
  103|   105k|        for(i = 0; i < 16; i++, pu1_temp++) (*pu1_temp) = 0;
  ------------------
  |  Branch (103:20): [True: 99.7k, False: 6.23k]
  ------------------
  104|  6.23k|        ps_dec->ps_def_ctxt_mb_info = p_DefCtxt;
  105|  6.23k|    }
  106|       |
  107|  6.23k|    if(u1_slice_type == I_SLICE)
  ------------------
  |  |  370|  6.23k|#define I_SLICE  2
  ------------------
  |  Branch (107:8): [True: 1.16k, False: 5.06k]
  ------------------
  108|  1.16k|    {
  109|  1.16k|        u1_cabac_init_Idc = 3;
  110|  1.16k|        ps_dec->p_mb_type_t = p_cabac_ctxt_table_t + MB_TYPE_I_SLICE;
  111|  1.16k|    }
  112|  5.06k|    else if(u1_slice_type == P_SLICE)
  ------------------
  |  |  368|  5.06k|#define P_SLICE  0
  ------------------
  |  Branch (112:13): [True: 2.64k, False: 2.42k]
  ------------------
  113|  2.64k|    {
  114|  2.64k|        ps_dec->p_mb_type_t = p_cabac_ctxt_table_t + MB_TYPE_P_SLICE;
  115|  2.64k|        ps_dec->p_mb_skip_flag_t = p_cabac_ctxt_table_t + MB_SKIP_FLAG_P_SLICE;
  116|  2.64k|        ps_dec->p_sub_mb_type_t = p_cabac_ctxt_table_t + SUB_MB_TYPE_P_SLICE;
  117|  2.64k|    }
  118|  2.42k|    else if(u1_slice_type == B_SLICE)
  ------------------
  |  |  369|  2.42k|#define B_SLICE  1
  ------------------
  |  Branch (118:13): [True: 2.42k, False: 0]
  ------------------
  119|  2.42k|    {
  120|  2.42k|        ps_dec->p_mb_type_t = p_cabac_ctxt_table_t + MB_TYPE_B_SLICE;
  121|  2.42k|        ps_dec->p_mb_skip_flag_t = p_cabac_ctxt_table_t + MB_SKIP_FLAG_B_SLICE;
  122|  2.42k|        ps_dec->p_sub_mb_type_t = p_cabac_ctxt_table_t + SUB_MB_TYPE_B_SLICE;
  123|  2.42k|    }
  124|  6.23k|    {
  125|  6.23k|        bin_ctxt_model_t *p_cabac_ctxt_table_t_tmp = p_cabac_ctxt_table_t;
  126|  6.23k|        if(ps_dec->ps_cur_slice->u1_field_pic_flag)
  ------------------
  |  Branch (126:12): [True: 0, False: 6.23k]
  ------------------
  127|      0|        {
  128|      0|            p_cabac_ctxt_table_t_tmp += SIGNIFICANT_COEFF_FLAG_FLD;
  129|      0|        }
  130|  6.23k|        else
  131|  6.23k|        {
  132|  6.23k|            p_cabac_ctxt_table_t_tmp += SIGNIFICANT_COEFF_FLAG_FRAME;
  133|  6.23k|        }
  134|  6.23k|        {
  135|  6.23k|            bin_ctxt_model_t **p_significant_coeff_flag_t = ps_dec->p_significant_coeff_flag_t;
  136|  6.23k|            p_significant_coeff_flag_t[0] = p_cabac_ctxt_table_t_tmp + SIG_COEFF_CTXT_CAT_0_OFFSET;
  137|  6.23k|            p_significant_coeff_flag_t[1] = p_cabac_ctxt_table_t_tmp + SIG_COEFF_CTXT_CAT_1_OFFSET;
  138|  6.23k|            p_significant_coeff_flag_t[2] = p_cabac_ctxt_table_t_tmp + SIG_COEFF_CTXT_CAT_2_OFFSET;
  139|  6.23k|            p_significant_coeff_flag_t[3] = p_cabac_ctxt_table_t_tmp + SIG_COEFF_CTXT_CAT_3_OFFSET;
  140|  6.23k|            p_significant_coeff_flag_t[4] = p_cabac_ctxt_table_t_tmp + SIG_COEFF_CTXT_CAT_4_OFFSET;
  141|       |
  142|  6.23k|            p_significant_coeff_flag_t[5] = p_cabac_ctxt_table_t_tmp + SIG_COEFF_CTXT_CAT_5_OFFSET;
  143|  6.23k|        }
  144|  6.23k|    }
  145|       |
  146|  6.23k|    memcpy(p_cabac_ctxt_table_t, gau1_isvcd_cabac_ctxt_init_table[u1_cabac_init_Idc][u1_qp_y],
  147|  6.23k|           NUM_CABAC_CTXTS_SVC * sizeof(bin_ctxt_model_t));
  ------------------
  |  |   46|  6.23k|#define NUM_CABAC_CTXTS_SVC 467
  ------------------
  148|  6.23k|}

isvcd_deblk_extract_bit_flags:
   65|  57.8k|{
   66|  57.8k|    WORD32 i4_i;
   67|  57.8k|    WORD32 i4_bit_mask;
   68|  57.8k|    UWORD16 u2_result = 0;
   69|       |
   70|  57.8k|    i4_bit_mask = i4_initial_bit_mask;
   71|       |
   72|   289k|    for(i4_i = 0; i4_i < NUM_SUB_MB_PARTS; i4_i++)
  ------------------
  |  |   60|   289k|#define NUM_SUB_MB_PARTS 4
  ------------------
  |  Branch (72:19): [True: 231k, False: 57.8k]
  ------------------
   73|   231k|    {
   74|   231k|        WORD32 i4_bit;
   75|       |        /* extract the bits of the last column 4x4 blocks */
   76|   231k|        if(0 == (i4_bit_mask & u2_bit_field))
  ------------------
  |  Branch (76:12): [True: 223k, False: 7.89k]
  ------------------
   77|   223k|        {
   78|   223k|            i4_bit = 0;
   79|   223k|        }
   80|  7.89k|        else
   81|  7.89k|        {
   82|  7.89k|            i4_bit = 1;
   83|  7.89k|        }
   84|       |        /* store the result */
   85|   231k|        u2_result |= i4_bit << i4_i;
   86|   231k|        i4_bit_mask <<= 4;
   87|       |
   88|   231k|    } /* end of loop over num sub Mb parts */
   89|  57.8k|    return (u2_result);
   90|  57.8k|}
isvcd_fill_bs_ibl:
   96|  1.18M|{
   97|       |    /*! Flow of the module is as follows                                  */
   98|       |    /*! 1. checks if MB edge is falling on IBL boundary                   */
   99|       |    /*! 2. if only Mb edge then it fills the BS based on INTRA or INTER
  100|       |           stauts                                                         */
  101|       |    /*! 3. if the current MB is IBL and neighbours are also neighbours
  102|       |           then it uses the current layer t_coeff flag to decide the
  103|       |           BS of a particular edge                                        */
  104|       |    /*!4. fills the BS for all the edges in curretn MB if IBL             */
  105|       |
  106|  1.18M|    UWORD16 u2_top_horz_nnz;
  107|  1.18M|    UWORD8 u1_top_mb_ibl, u1_left_mb_ibl;
  108|  1.18M|    UWORD32 i4_i, i4_edge;
  109|  1.18M|    UWORD8 u1_bs;
  110|  1.18M|    UWORD8 u1_cnd;
  111|  1.18M|    UWORD8 u1_top_intra;
  112|  1.18M|    UWORD8 u1_left_intra;
  113|  1.18M|    UWORD8 u1_p_nnz, u1_q_nnz;
  114|  1.18M|    UWORD8 u1_curr_mb_ibl;
  115|  1.18M|    UWORD32 *pu4_bs_table;
  116|  1.18M|    UWORD16 u2_curr_nnz;
  117|  1.18M|    UWORD8 u1_left_mb_nnz = 0, u1_left_nnz;
  118|  1.18M|    WORD32 i4_horz_start = 0;
  119|  1.18M|    WORD32 i4_vertical_start = 0;
  120|       |
  121|  1.18M|    pu4_bs_table = &(ps_deblk_mb->u4_bs_table[0]);
  122|       |
  123|  1.18M|    u1_top_mb_ibl = u1_top_mb_type & D_INTRA_IBL;
  ------------------
  |  |   72|  1.18M|#define D_INTRA_IBL 16
  ------------------
  124|  1.18M|    u1_left_mb_ibl = u1_left_mb_type & D_INTRA_IBL;
  ------------------
  |  |   72|  1.18M|#define D_INTRA_IBL 16
  ------------------
  125|       |
  126|  1.18M|    u1_curr_mb_ibl = ps_deblk_mb->u1_mb_type & D_INTRA_IBL;
  ------------------
  |  |   72|  1.18M|#define D_INTRA_IBL 16
  ------------------
  127|       |
  128|  1.18M|    u1_top_intra = u1_top_mb_type & D_INTRA_MB;
  ------------------
  |  |  382|  1.18M|#define D_INTRA_MB        1
  ------------------
  129|  1.18M|    u1_left_intra = u1_left_mb_type & D_INTRA_MB;
  ------------------
  |  |  382|  1.18M|#define D_INTRA_MB        1
  ------------------
  130|       |
  131|       |    /* return if none of the current top and left is IBL */
  132|  1.18M|    if((0 == u1_curr_mb_ibl) && (0 == u1_top_mb_ibl) && (0 == u1_left_mb_ibl))
  ------------------
  |  Branch (132:8): [True: 1.13M, False: 45.5k]
  |  Branch (132:33): [True: 1.12M, False: 8.56k]
  |  Branch (132:57): [True: 1.12M, False: 3.80k]
  ------------------
  133|  1.12M|    {
  134|  1.12M|        return;
  135|  1.12M|    }
  136|       |
  137|       |    /* set up the vertical and horz MB edge skip flags */
  138|  57.8k|    if(0 != u1_curr_mb_ibl)
  ------------------
  |  Branch (138:8): [True: 45.5k, False: 12.3k]
  ------------------
  139|  45.5k|    {
  140|       |        /* if top is not IBL */
  141|  45.5k|        if(0 == u1_top_mb_ibl)
  ------------------
  |  Branch (141:12): [True: 16.4k, False: 29.0k]
  ------------------
  142|  16.4k|        {
  143|  16.4k|            i4_horz_start = 1;
  144|  16.4k|        }
  145|       |
  146|       |        /* if left in not IBL */
  147|  45.5k|        if(0 == u1_left_mb_ibl)
  ------------------
  |  Branch (147:12): [True: 20.1k, False: 25.3k]
  ------------------
  148|  20.1k|        {
  149|  20.1k|            i4_vertical_start = 1;
  150|  20.1k|        }
  151|  45.5k|    }
  152|       |
  153|       |    /*******************************************************/
  154|       |    /* Fill BS for mb egdex assuming non IBL case          */
  155|       |    /*******************************************************/
  156|       |
  157|       |    /* only the  MB edges fall across IBL boundary */
  158|  57.8k|    if((0 != u1_curr_mb_ibl) || (0 != u1_top_mb_ibl) || (0 != u1_left_mb_ibl))
  ------------------
  |  Branch (158:8): [True: 45.5k, False: 12.3k]
  |  Branch (158:33): [True: 8.56k, False: 3.80k]
  |  Branch (158:57): [True: 3.80k, False: 0]
  ------------------
  159|  57.8k|    {
  160|  57.8k|        UWORD16 u2_temp, u2_i;
  161|  57.8k|        u2_temp = *pu2_left_res_luma_csbp;
  162|   289k|        for(u2_i = 0; u2_i < 4; u2_i++)
  ------------------
  |  Branch (162:23): [True: 231k, False: 57.8k]
  ------------------
  163|   231k|        {
  164|   231k|            u1_left_mb_nnz |= ((u2_temp & 0x08) >> (3 - u2_i));
  165|   231k|            u2_temp >>= 4;
  166|   231k|        }
  167|  57.8k|        u2_curr_nnz = *pu2_curr_res_luma_csbp;
  168|  57.8k|        u2_top_horz_nnz = *pu2_top_res_luma_csbp >> 12;
  169|       |
  170|       |        /* top is intra and not ibl */
  171|  57.8k|        if(0 != u1_top_intra)
  ------------------
  |  Branch (171:12): [True: 2.23k, False: 55.6k]
  ------------------
  172|  2.23k|        {
  173|  2.23k|            pu4_bs_table[0] = 0x04040404;
  174|  2.23k|        }
  175|       |        /* left is intra and not ibl */
  176|  57.8k|        if(0 != u1_left_intra)
  ------------------
  |  Branch (176:12): [True: 1.80k, False: 56.0k]
  ------------------
  177|  1.80k|        {
  178|  1.80k|            pu4_bs_table[4] = 0x04040404;
  179|  1.80k|        }
  180|       |
  181|       |        /* assume neighbours are inter and update bs */
  182|       |
  183|       |        /* Edge = 0 means Vert Edges and Edge = 1 means Horz edges */
  184|   173k|        for(i4_edge = 0; i4_edge < 2; i4_edge++)
  ------------------
  |  Branch (184:26): [True: 115k, False: 57.8k]
  ------------------
  185|   115k|        {
  186|   115k|            UWORD8 u1_p_nnz, u1_q_nnz;
  187|   115k|            UWORD32 u4_bs_edge = 0;
  188|   115k|            WORD32 i4_bit_mask;
  189|   115k|            WORD32 i4_curr_intra_flag;
  190|   115k|            WORD32 i4_neibor_intra_flag;
  191|       |
  192|   115k|            i4_curr_intra_flag = (0 != u1_curr_mb_ibl);
  193|       |
  194|   115k|            if(0 != i4_edge)
  ------------------
  |  Branch (194:16): [True: 57.8k, False: 57.8k]
  ------------------
  195|  57.8k|            {
  196|       |                /* Initialize for the TOP edge */
  197|  57.8k|                u1_p_nnz = (UWORD8) u2_top_horz_nnz;
  198|  57.8k|                u1_q_nnz = (UWORD8) (u2_curr_nnz & g_au4_extract_set[0]);
  199|  57.8k|                i4_neibor_intra_flag = (u1_top_mb_ibl || u1_top_intra);
  ------------------
  |  Branch (199:41): [True: 37.6k, False: 20.2k]
  |  Branch (199:58): [True: 2.23k, False: 18.0k]
  ------------------
  200|  57.8k|            }
  201|  57.8k|            else
  202|  57.8k|            {
  203|  57.8k|                u1_p_nnz = u1_left_mb_nnz;
  204|  57.8k|                u1_q_nnz = (UWORD8) isvcd_deblk_extract_bit_flags(u2_curr_nnz, 0x01);
  205|  57.8k|                i4_neibor_intra_flag = (u1_left_mb_ibl || u1_left_intra);
  ------------------
  |  Branch (205:41): [True: 30.2k, False: 27.6k]
  |  Branch (205:59): [True: 1.80k, False: 25.8k]
  ------------------
  206|  57.8k|            }
  207|       |
  208|   115k|            i4_bit_mask = 1;
  209|       |            /* find bs of 4 edges */
  210|   578k|            for(i4_i = 0; i4_i < 4; i4_i++)
  ------------------
  |  Branch (210:27): [True: 462k, False: 115k]
  ------------------
  211|   462k|            {
  212|   462k|                UWORD8 u1_p_nnz_temp, u1_q_nnz_temp;
  213|       |
  214|   462k|                u1_p_nnz_temp = (u1_p_nnz & i4_bit_mask);
  215|   462k|                u1_q_nnz_temp = (u1_q_nnz & i4_bit_mask);
  216|       |
  217|   462k|                u1_cnd = ((u1_p_nnz_temp && (!i4_neibor_intra_flag)) ||
  ------------------
  |  Branch (217:28): [True: 8.09k, False: 454k]
  |  Branch (217:45): [True: 3.70k, False: 4.38k]
  ------------------
  218|   459k|                          (u1_q_nnz_temp && (!i4_curr_intra_flag)));
  ------------------
  |  Branch (218:28): [True: 12.4k, False: 446k]
  |  Branch (218:45): [True: 1.70k, False: 10.7k]
  ------------------
  219|       |
  220|   462k|                u1_bs = u1_cnd ? 2 : 1;
  ------------------
  |  Branch (220:25): [True: 5.40k, False: 457k]
  ------------------
  221|       |
  222|       |                /* update the bs of the edge */
  223|   462k|                u4_bs_edge = (u4_bs_edge << 8) + u1_bs;
  224|   462k|                i4_bit_mask <<= 1;
  225|       |
  226|   462k|            } /* end of loop over blk edges */
  227|       |
  228|       |            /* update the bs of edges */
  229|   115k|            if(i4_edge && !u1_top_intra)
  ------------------
  |  Branch (229:16): [True: 57.8k, False: 57.8k]
  |  Branch (229:27): [True: 55.6k, False: 2.23k]
  ------------------
  230|  55.6k|            {
  231|  55.6k|                pu4_bs_table[0] = u4_bs_edge;
  232|  55.6k|            }
  233|  60.1k|            else if(!i4_edge && !u1_left_intra)
  ------------------
  |  Branch (233:21): [True: 57.8k, False: 2.23k]
  |  Branch (233:33): [True: 56.0k, False: 1.80k]
  ------------------
  234|  56.0k|            {
  235|  56.0k|                pu4_bs_table[4] = u4_bs_edge;
  236|  56.0k|            }
  237|   115k|        } /* end of loop over v1 vetical and horizontal edge */
  238|  57.8k|    }
  239|       |    /* current MB is IBL */
  240|  57.8k|    if(0 != u1_curr_mb_ibl)
  ------------------
  |  Branch (240:8): [True: 45.5k, False: 12.3k]
  ------------------
  241|  45.5k|    {
  242|  45.5k|        UWORD16 u2_temp, u2_i;
  243|  45.5k|        WORD32 i4_bit_mask_edge = 1;
  244|       |
  245|  45.5k|        u1_left_mb_nnz = 0;
  246|  45.5k|        u2_temp = ps_cur_mb_info->ps_left_mb->u2_luma_csbp;
  247|   227k|        for(u2_i = 0; u2_i < 4; u2_i++)
  ------------------
  |  Branch (247:23): [True: 182k, False: 45.5k]
  ------------------
  248|   182k|        {
  249|   182k|            u1_left_mb_nnz |= ((u2_temp & 0x08) >> (3 - u2_i));
  250|   182k|            u2_temp >>= 4;
  251|   182k|        }
  252|  45.5k|        u2_curr_nnz = ps_cur_mb_info->ps_curmb->u2_luma_csbp;
  253|  45.5k|        u2_top_horz_nnz = ps_cur_mb_info->ps_top_mb->u2_luma_csbp >> 12;
  254|       |        /* all are IBL edges then use only t_coeff of current layer*/
  255|       |        /* loop over all edges */
  256|   227k|        for(i4_edge = 0; i4_edge < 4; i4_edge++)
  ------------------
  |  Branch (256:26): [True: 182k, False: 45.5k]
  ------------------
  257|   182k|        {
  258|   182k|            UWORD16 u2_curr_horz_nnz = 0;
  259|   182k|            WORD32 i4_bit_mask = 1;
  260|       |
  261|   182k|            u2_curr_horz_nnz = u2_curr_nnz & g_au4_extract_set[i4_edge];
  262|       |
  263|   182k|            u2_curr_horz_nnz = (u2_curr_horz_nnz >> (i4_edge * 4));
  264|       |
  265|   182k|            u1_left_nnz = (u1_left_mb_nnz & i4_bit_mask_edge);
  266|       |
  267|   910k|            for(i4_i = 0; i4_i < 4; i4_i++)
  ------------------
  |  Branch (267:27): [True: 728k, False: 182k]
  ------------------
  268|   728k|            {
  269|   728k|                UWORD8 u1_curr_nnz, u1_top_nnz;
  270|       |
  271|   728k|                u1_curr_nnz = (u2_curr_horz_nnz & i4_bit_mask);
  272|   728k|                u1_top_nnz = (u2_top_horz_nnz & i4_bit_mask);
  273|       |                /* update bs horizontal */
  274|       |
  275|   728k|                if(!((1 == i4_horz_start) && (0 == i4_edge)))
  ------------------
  |  Branch (275:22): [True: 263k, False: 464k]
  |  Branch (275:46): [True: 65.8k, False: 197k]
  ------------------
  276|   662k|                {
  277|   662k|                    u1_p_nnz = u1_top_nnz;
  278|   662k|                    u1_q_nnz = u1_curr_nnz;
  279|   662k|                    u1_cnd = !(u1_p_nnz || u1_q_nnz);
  ------------------
  |  Branch (279:32): [True: 19.3k, False: 642k]
  |  Branch (279:44): [True: 4.51k, False: 638k]
  ------------------
  280|   662k|                    u1_bs = u1_cnd ? 0 : 1;
  ------------------
  |  Branch (280:29): [True: 638k, False: 23.8k]
  ------------------
  281|   662k|                    pu4_bs_table[i4_edge] = (pu4_bs_table[i4_edge] << 8) + u1_bs;
  282|   662k|                }
  283|       |
  284|       |                /* update bs vertical */
  285|   728k|                if(!((1 == i4_vertical_start) && (0 == i4_i)))
  ------------------
  |  Branch (285:22): [True: 322k, False: 405k]
  |  Branch (285:50): [True: 80.5k, False: 241k]
  ------------------
  286|   647k|                {
  287|   647k|                    u1_p_nnz = u1_left_nnz;
  288|   647k|                    u1_q_nnz = u1_curr_nnz;
  289|   647k|                    u1_cnd = !(u1_p_nnz || u1_q_nnz);
  ------------------
  |  Branch (289:32): [True: 19.5k, False: 627k]
  |  Branch (289:44): [True: 3.33k, False: 624k]
  ------------------
  290|   647k|                    u1_bs = u1_cnd ? 0 : 1;
  ------------------
  |  Branch (290:29): [True: 624k, False: 22.9k]
  ------------------
  291|   647k|                    pu4_bs_table[i4_i + 4] = (pu4_bs_table[i4_i + 4] << 8) + u1_bs;
  292|   647k|                }
  293|       |                /* store the current nnz to left nnz */
  294|   728k|                u1_left_nnz = u1_curr_nnz;
  295|   728k|                i4_bit_mask <<= 1;
  296|   728k|            }
  297|       |            /* store the current row nnz to top row nnz */
  298|   182k|            u2_top_horz_nnz = u2_curr_horz_nnz;
  299|   182k|            i4_bit_mask_edge <<= 1;
  300|   182k|        }
  301|  45.5k|    }
  302|  57.8k|    return;
  303|  1.18M|}
isvcd_compute_bs_non_mbaff_target_lyr_no_inter_layer:
  327|  94.3k|{
  328|  94.3k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
  329|       |    /* Mvpred and Nnz for top and Courrent */
  330|  94.3k|    mv_pred_t *ps_cur_mv_pred, *ps_top_mv_pred = NULL, *ps_left_mv_pred;
  331|       |    /* deblk_mb_t Params */
  332|  94.3k|    deblk_mb_t *ps_cur_mb_params; /*< Parameters of current MacroBlock */
  333|  94.3k|    deblkmb_neighbour_t *ps_deblk_top_mb;
  334|       |
  335|       |    /* Reference Index to POC mapping*/
  336|  94.3k|    void **apv_map_ref_idx_to_poc;
  337|  94.3k|    UWORD32 u4_leftmbtype;
  338|       |
  339|  94.3k|    UWORD16 u2_left_csbp, u2_top_csbp, u2_cur_csbp;
  340|       |
  341|       |    /* Set of flags */
  342|  94.3k|    UWORD32 u4_cur_mb_intra, u1_top_mb_typ, u4_cur_mb_fld;
  343|  94.3k|    UWORD32 u1_cur_mb_type;
  344|  94.3k|    UWORD32 *pu4_bs_table;
  345|       |
  346|       |    /* Neighbour availability */
  347|       |    /* Initialization */
  348|  94.3k|    const UWORD32 u2_mbx = ps_cur_mb_info->u2_mbx;
  349|  94.3k|    const UWORD32 u2_mby = ps_cur_mb_info->u2_mby;
  350|  94.3k|    const UWORD32 u1_pingpong = u2_mbx & 0x01;
  351|       |
  352|  94.3k|    PROFILE_DISABLE_BOUNDARY_STRENGTH()
  ------------------
  |  |  125|  94.3k|#define PROFILE_DISABLE_BOUNDARY_STRENGTH() ;
  ------------------
  353|       |
  354|  94.3k|    ps_deblk_top_mb = ps_dec->ps_deblk_top_mb + u2_mbx;
  355|       |
  356|       |    /* Pointer assignment for Current DeblkMB, Current Mv Pred  */
  357|  94.3k|    ps_cur_mb_params = ps_dec->ps_deblk_mbn + u2_mbxn_mb;
  358|  94.3k|    ps_cur_mv_pred = ps_dec->ps_mv_cur + (u2_mbxn_mb << 4);
  359|       |
  360|  94.3k|    apv_map_ref_idx_to_poc = ps_dec->ppv_map_ref_idx_to_poc + 1;
  361|  94.3k|    u1_cur_mb_type = ps_cur_mb_params->u1_mb_type;
  362|  94.3k|    u1_top_mb_typ = ps_deblk_top_mb->u1_mb_type;
  363|  94.3k|    ps_deblk_top_mb->u1_mb_type = u1_cur_mb_type;
  364|       |
  365|  94.3k|    ps_cur_mb_params->u1_topmb_qp = ps_deblk_top_mb->u1_mb_qp;
  366|  94.3k|    ps_deblk_top_mb->u1_mb_qp = ps_cur_mb_params->u1_mb_qp;
  367|  94.3k|    ps_cur_mb_params->u1_left_mb_qp = ps_dec->deblk_left_mb[1].u1_mb_qp;
  368|  94.3k|    ps_dec->deblk_left_mb[1].u1_mb_qp = ps_cur_mb_params->u1_mb_qp;
  369|       |
  370|       |    /* if no deblocking required for current Mb then continue */
  371|       |    /* Check next Mbs   in Mb group                           */
  372|  94.3k|    if(ps_cur_mb_params->u1_deblocking_mode & MB_DISABLE_FILTERING)
  ------------------
  |  |   70|  94.3k|#define MB_DISABLE_FILTERING          0x01
  ------------------
  |  Branch (372:8): [True: 1.56k, False: 92.7k]
  ------------------
  373|  1.56k|    {
  374|  1.56k|        void **pu4_map_ref_idx_to_poc_l1 = apv_map_ref_idx_to_poc + POC_LIST_L0_TO_L1_DIFF;
  ------------------
  |  |   86|  1.56k|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  ------------------
  |  |  |  |  600|  1.56k|#define MAX_FRAMES              16
  |  |  ------------------
  ------------------
  375|  1.56k|        {
  376|       |            /* Store Parameter for Top MvPred refernce frame Address */
  377|       |
  378|  1.56k|            void **ppv_top_mv_pred_addr = ps_cur_mb_info->ps_curmb->u4_pic_addrress;
  379|  1.56k|            WORD8 *p1_refTop0 = (ps_cur_mv_pred + 12)->i1_ref_frame;
  380|  1.56k|            WORD8 *p1_refTop1 = (ps_cur_mv_pred + 14)->i1_ref_frame;
  381|       |
  382|       |            /* Store Left addresses for Next Mb   */
  383|  1.56k|            void **ppv_left_mv_pred_addr = ps_dec->ps_left_mvpred_addr[!u1_pingpong][1].u4_add;
  384|  1.56k|            WORD8 *p1_refleft0 = (ps_cur_mv_pred + 3)->i1_ref_frame;
  385|       |
  386|  1.56k|            ppv_top_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refTop0[0]];
  387|  1.56k|            ppv_top_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refTop0[1]];
  388|       |
  389|  1.56k|            ppv_left_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]];
  390|  1.56k|            ppv_top_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]];
  391|  1.56k|            ppv_left_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]];
  392|  1.56k|            ppv_top_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]];
  393|       |
  394|  1.56k|            ppv_left_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refleft0[0]];
  395|  1.56k|            ppv_left_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refleft0[1]];
  396|       |            /* Storing the leftMbtype for next Mb */
  397|  1.56k|            ps_dec->deblk_left_mb[1].u1_mb_type = ps_cur_mb_params->u1_mb_type;
  398|  1.56k|        }
  399|       |
  400|  1.56k|        return;
  401|  1.56k|    }
  402|       |
  403|       |    /* Flag for extra left Edge */
  404|  92.7k|    ps_cur_mb_params->u1_single_call = 1;
  405|       |
  406|       |    /* Update the Left deblk_mb_t and Left MvPred Parameters           */
  407|  92.7k|    if(!u2_mbx)
  ------------------
  |  Branch (407:8): [True: 14.5k, False: 78.2k]
  ------------------
  408|  14.5k|    {
  409|  14.5k|        u4_leftmbtype = 0;
  410|       |
  411|       |        /* Initialize the ps_left_mv_pred with Junk but Valid Location */
  412|       |        /* to avoid invalid memory access                           */
  413|       |        /* this is read only pointer                                */
  414|  14.5k|        ps_left_mv_pred = ps_dec->ps_mv_cur + 3;
  415|  14.5k|    }
  416|  78.2k|    else
  417|  78.2k|    {
  418|  78.2k|        u4_leftmbtype = ps_dec->deblk_left_mb[1].u1_mb_type;
  419|       |
  420|       |        /* Come to Left Most Edge of the MB */
  421|  78.2k|        ps_left_mv_pred =
  422|  78.2k|            (u2_mbxn_mb) ? ps_dec->ps_mv_cur + ((u2_mbxn_mb - 1) << 4) + 3 : ps_dec->ps_mv_left + 3;
  ------------------
  |  Branch (422:13): [True: 78.0k, False: 235]
  ------------------
  423|  78.2k|    }
  424|       |
  425|  92.7k|    if(!u2_mby) u1_top_mb_typ = 0;
  ------------------
  |  Branch (425:8): [True: 24.0k, False: 68.7k]
  ------------------
  426|       |
  427|       |    /* MvPred Pointer Calculation */
  428|  92.7k|    ps_top_mv_pred = ps_cur_mv_pred - (ps_dec->u2_frm_wd_in_mbs << 4) + 12;
  429|       |
  430|  92.7k|    u4_cur_mb_intra = u1_cur_mb_type & D_INTRA_MB;
  ------------------
  |  |  382|  92.7k|#define D_INTRA_MB        1
  ------------------
  431|  92.7k|    u4_cur_mb_fld = !!(u1_cur_mb_type & D_FLD_MB);
  ------------------
  |  |  386|  92.7k|#define D_FLD_MB          0x80
  ------------------
  432|       |    /* Compute BS function */
  433|  92.7k|    pu4_bs_table = ps_cur_mb_params->u4_bs_table;
  434|       |
  435|  92.7k|    u2_cur_csbp = ps_cur_mb_info->ps_curmb->u2_luma_csbp;
  436|  92.7k|    u2_left_csbp = ps_cur_mb_info->ps_left_mb->u2_luma_csbp;
  437|  92.7k|    u2_top_csbp = ps_cur_mb_info->ps_top_mb->u2_luma_csbp;
  438|       |    /* Compute BS function */
  439|  92.7k|    if((ps_dec->ps_cur_sps->u1_profile_idc == HIGH_PROFILE_IDC) ||
  ------------------
  |  |  278|  92.7k|#define HIGH_PROFILE_IDC   100
  ------------------
  |  Branch (439:8): [True: 41.5k, False: 51.1k]
  ------------------
  440|  51.1k|       (ps_dec->ps_cur_sps->u1_profile_idc == SCALABLE_HIGH_PROFILE_IDC) ||
  ------------------
  |  |   60|  51.1k|#define SCALABLE_HIGH_PROFILE_IDC 86
  ------------------
  |  Branch (440:8): [True: 0, False: 51.1k]
  ------------------
  441|  51.1k|       (ps_dec->ps_cur_sps->u1_profile_idc == SCALABLE_BASELINE_PROFILE_IDC))
  ------------------
  |  |   59|  51.1k|#define SCALABLE_BASELINE_PROFILE_IDC 83
  ------------------
  |  Branch (441:8): [True: 0, False: 51.1k]
  ------------------
  442|  41.5k|    {
  443|  41.5k|        if(ps_cur_mb_info->u1_tran_form8x8 == 1)
  ------------------
  |  Branch (443:12): [True: 24.2k, False: 17.3k]
  ------------------
  444|  24.2k|        {
  445|  24.2k|            u2_cur_csbp = ih264d_update_csbp_8x8(ps_cur_mb_info->ps_curmb->u2_luma_csbp);
  446|  24.2k|        }
  447|       |
  448|  41.5k|        if(ps_cur_mb_info->ps_left_mb->u1_tran_form8x8 == 1)
  ------------------
  |  Branch (448:12): [True: 21.8k, False: 19.7k]
  ------------------
  449|  21.8k|        {
  450|  21.8k|            u2_left_csbp = ih264d_update_csbp_8x8(ps_cur_mb_info->ps_left_mb->u2_luma_csbp);
  451|  21.8k|        }
  452|       |
  453|  41.5k|        if(ps_cur_mb_info->ps_top_mb->u1_tran_form8x8 == 1)
  ------------------
  |  Branch (453:12): [True: 13.8k, False: 27.7k]
  ------------------
  454|  13.8k|        {
  455|  13.8k|            u2_top_csbp = ih264d_update_csbp_8x8(ps_cur_mb_info->ps_top_mb->u2_luma_csbp);
  456|  13.8k|        }
  457|  41.5k|    }
  458|  92.7k|    if(u4_cur_mb_intra)
  ------------------
  |  Branch (458:8): [True: 92.7k, False: 0]
  ------------------
  459|  92.7k|    {
  460|  92.7k|        pu4_bs_table[4] = 0x04040404;
  461|  92.7k|        pu4_bs_table[0] = u4_cur_mb_fld ? 0x03030303 : 0x04040404;
  ------------------
  |  Branch (461:27): [True: 0, False: 92.7k]
  ------------------
  462|  92.7k|        pu4_bs_table[1] = 0x03030303;
  463|  92.7k|        pu4_bs_table[2] = 0x03030303;
  464|  92.7k|        pu4_bs_table[3] = 0x03030303;
  465|  92.7k|        pu4_bs_table[5] = 0x03030303;
  466|  92.7k|        pu4_bs_table[6] = 0x03030303;
  467|  92.7k|        pu4_bs_table[7] = 0x03030303;
  468|  92.7k|    }
  469|      0|    else
  470|      0|    {
  471|      0|        UWORD32 u4_is_non16x16 = !!(u1_cur_mb_type & D_PRED_NON_16x16);
  ------------------
  |  |  383|      0|#define D_PRED_NON_16x16  2
  ------------------
  472|      0|        UWORD32 u4_is_b = ps_dec->u1_B;
  473|       |
  474|      0|        ih264d_fill_bs2_horz_vert(pu4_bs_table, u2_left_csbp, u2_top_csbp, u2_cur_csbp,
  475|      0|                                  (const UWORD32 *) (gau4_ih264d_packed_bs2),
  476|      0|                                  (const UWORD16 *) (gau2_ih264d_4x4_v2h_reorder));
  477|       |
  478|      0|        if(u4_leftmbtype & D_INTRA_MB) pu4_bs_table[4] = 0x04040404;
  ------------------
  |  |  382|      0|#define D_INTRA_MB        1
  ------------------
  |  Branch (478:12): [True: 0, False: 0]
  ------------------
  479|       |
  480|      0|        if(u1_top_mb_typ & D_INTRA_MB) pu4_bs_table[0] = u4_cur_mb_fld ? 0x03030303 : 0x04040404;
  ------------------
  |  |  382|      0|#define D_INTRA_MB        1
  ------------------
  |  Branch (480:12): [True: 0, False: 0]
  |  Branch (480:58): [True: 0, False: 0]
  ------------------
  481|       |
  482|      0|        ps_dec->pf_fill_bs1[u4_is_b][u4_is_non16x16](
  483|      0|            ps_cur_mv_pred, ps_top_mv_pred, apv_map_ref_idx_to_poc, pu4_bs_table, ps_left_mv_pred,
  484|      0|            &(ps_dec->ps_left_mvpred_addr[u1_pingpong][1]),
  485|      0|            ps_cur_mb_info->ps_top_mb->u4_pic_addrress, (4 >> u4_cur_mb_fld));
  486|      0|    }
  487|       |
  488|  92.7k|    {
  489|  92.7k|        void **pu4_map_ref_idx_to_poc_l1 = apv_map_ref_idx_to_poc + POC_LIST_L0_TO_L1_DIFF;
  ------------------
  |  |   86|  92.7k|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  ------------------
  |  |  |  |  600|  92.7k|#define MAX_FRAMES              16
  |  |  ------------------
  ------------------
  490|  92.7k|        {
  491|       |            /* Store Parameter for Top MvPred refernce frame Address */
  492|       |
  493|  92.7k|            void **ppv_top_mv_pred_addr = ps_cur_mb_info->ps_curmb->u4_pic_addrress;
  494|  92.7k|            WORD8 *p1_refTop0 = (ps_cur_mv_pred + 12)->i1_ref_frame;
  495|  92.7k|            WORD8 *p1_refTop1 = (ps_cur_mv_pred + 14)->i1_ref_frame;
  496|       |
  497|       |            /* Store Left addresses for Next Mb   */
  498|  92.7k|            void **ppv_left_mv_pred_addr = ps_dec->ps_left_mvpred_addr[!u1_pingpong][1].u4_add;
  499|  92.7k|            WORD8 *p1_refleft0 = (ps_cur_mv_pred + 3)->i1_ref_frame;
  500|       |
  501|  92.7k|            ppv_top_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refTop0[0]];
  502|  92.7k|            ppv_top_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refTop0[1]];
  503|       |
  504|  92.7k|            ppv_left_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]];
  505|  92.7k|            ppv_top_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]];
  506|  92.7k|            ppv_left_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]];
  507|  92.7k|            ppv_top_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]];
  508|       |
  509|  92.7k|            ppv_left_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refleft0[0]];
  510|  92.7k|            ppv_left_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refleft0[1]];
  511|       |
  512|       |            /* Storing the leftMbtype for next Mb */
  513|  92.7k|            ps_dec->deblk_left_mb[1].u1_mb_type = ps_cur_mb_params->u1_mb_type;
  514|  92.7k|        }
  515|  92.7k|    }
  516|       |
  517|       |    /* For transform 8x8 disable deblocking of the intrernal edges of a 8x8 block */
  518|  92.7k|    if(ps_cur_mb_info->u1_tran_form8x8)
  ------------------
  |  Branch (518:8): [True: 24.2k, False: 68.4k]
  ------------------
  519|  24.2k|    {
  520|  24.2k|        pu4_bs_table[1] = 0;
  521|  24.2k|        pu4_bs_table[3] = 0;
  522|  24.2k|        pu4_bs_table[5] = 0;
  523|  24.2k|        pu4_bs_table[7] = 0;
  524|  24.2k|    }
  525|  92.7k|}
isvcd_compute_bs_non_mbaff:
  547|  4.62k|{
  548|  4.62k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
  549|  4.62k|    ps_dec->pf_compute_bs(ps_dec, ps_cur_mb_info, u2_mbxn_mb);
  550|  4.62k|}
isvcd_compute_bs_non_mbaff_target_lyr:
  572|   140k|{
  573|   140k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
  574|       |    /* Mvpred and Nnz for top and Courrent */
  575|   140k|    mv_pred_t *ps_cur_mv_pred, *ps_top_mv_pred = NULL, *ps_left_mv_pred;
  576|       |    /* deblk_mb_t Params */
  577|   140k|    deblk_mb_t *ps_cur_mb_params; /*< Parameters of current MacroBlock */
  578|   140k|    deblkmb_neighbour_t *ps_deblk_top_mb;
  579|       |
  580|       |    /* Reference Index to POC mapping*/
  581|   140k|    void **apv_map_ref_idx_to_poc;
  582|   140k|    UWORD32 u4_leftmbtype;
  583|   140k|    UWORD16 u2_left_csbp, u2_top_csbp, u2_cur_csbp;
  584|       |
  585|       |    /* Set of flags */
  586|   140k|    UWORD32 u4_cur_mb_intra, u1_top_mb_typ, u4_cur_mb_fld;
  587|   140k|    UWORD32 u4_cur_mb_ibl;
  588|   140k|    UWORD32 u1_cur_mb_type;
  589|   140k|    UWORD32 *pu4_bs_table;
  590|       |
  591|   140k|    UWORD16 *pu2_curr_res_luma_csbp;
  592|   140k|    UWORD16 *pu2_left_res_luma_csbp;
  593|   140k|    UWORD16 *pu2_top_res_luma_csbp;
  594|       |
  595|       |    /* Neighbour availability */
  596|       |    /* Initialization */
  597|   140k|    const UWORD32 u2_mbx = ps_cur_mb_info->u2_mbx;
  598|   140k|    const UWORD32 u2_mby = ps_cur_mb_info->u2_mby;
  599|   140k|    const UWORD32 u1_pingpong = u2_mbx & 0x01;
  600|       |
  601|   140k|    PROFILE_DISABLE_BOUNDARY_STRENGTH()
  ------------------
  |  |  125|   140k|#define PROFILE_DISABLE_BOUNDARY_STRENGTH() ;
  ------------------
  602|       |
  603|   140k|    ps_deblk_top_mb = ps_dec->ps_deblk_top_mb + u2_mbx;
  604|       |
  605|       |    /* Pointer assignment for Current DeblkMB, Current Mv Pred  */
  606|   140k|    ps_cur_mb_params = ps_dec->ps_deblk_mbn + u2_mbxn_mb;
  607|   140k|    ps_cur_mv_pred = ps_dec->ps_mv_cur + (u2_mbxn_mb << 4);
  608|       |
  609|       |    /*Pointer assignment for Residual NNZ */
  610|   140k|    pu2_curr_res_luma_csbp = ps_svc_lyr_dec->pu2_frm_res_luma_csbp + ps_cur_mb_info->u2_mbx;
  611|   140k|    pu2_curr_res_luma_csbp += ps_cur_mb_info->u2_mby * ps_svc_lyr_dec->i4_frm_res_luma_csbp_stride;
  612|       |
  613|   140k|    pu2_left_res_luma_csbp = pu2_curr_res_luma_csbp - (ps_cur_mb_info->u2_mbx != 0);
  614|   140k|    pu2_top_res_luma_csbp = pu2_curr_res_luma_csbp - ((ps_cur_mb_info->u2_mby != 0) *
  615|   140k|                                                      ps_svc_lyr_dec->i4_frm_res_luma_csbp_stride);
  616|       |
  617|   140k|    apv_map_ref_idx_to_poc = ps_dec->ppv_map_ref_idx_to_poc + 1;
  618|   140k|    u1_cur_mb_type = ps_cur_mb_params->u1_mb_type;
  619|   140k|    u1_top_mb_typ = ps_deblk_top_mb->u1_mb_type;
  620|   140k|    ps_deblk_top_mb->u1_mb_type = u1_cur_mb_type;
  621|       |
  622|   140k|    ps_cur_mb_params->u1_topmb_qp = ps_deblk_top_mb->u1_mb_qp;
  623|   140k|    ps_deblk_top_mb->u1_mb_qp = ps_cur_mb_params->u1_mb_qp;
  624|   140k|    ps_cur_mb_params->u1_left_mb_qp = ps_dec->deblk_left_mb[1].u1_mb_qp;
  625|   140k|    ps_dec->deblk_left_mb[1].u1_mb_qp = ps_cur_mb_params->u1_mb_qp;
  626|       |
  627|       |    /* if no deblocking required for current Mb then continue */
  628|       |    /* Check next Mbs   in Mb group                           */
  629|   140k|    if(ps_cur_mb_params->u1_deblocking_mode & MB_DISABLE_FILTERING)
  ------------------
  |  |   70|   140k|#define MB_DISABLE_FILTERING          0x01
  ------------------
  |  Branch (629:8): [True: 8.44k, False: 132k]
  ------------------
  630|  8.44k|    {
  631|  8.44k|        void **pu4_map_ref_idx_to_poc_l1 = apv_map_ref_idx_to_poc + POC_LIST_L0_TO_L1_DIFF;
  ------------------
  |  |   86|  8.44k|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  ------------------
  |  |  |  |  600|  8.44k|#define MAX_FRAMES              16
  |  |  ------------------
  ------------------
  632|       |
  633|       |        /* Store Parameter for Top MvPred refernce frame Address */
  634|  8.44k|        void **ppv_top_mv_pred_addr = ps_cur_mb_info->ps_curmb->u4_pic_addrress;
  635|  8.44k|        WORD8 *p1_refTop0 = (ps_cur_mv_pred + 12)->i1_ref_frame;
  636|  8.44k|        WORD8 *p1_refTop1 = (ps_cur_mv_pred + 14)->i1_ref_frame;
  637|       |
  638|       |        /* Store Left addresses for Next Mb   */
  639|  8.44k|        void **ppv_left_mv_pred_addr = ps_dec->ps_left_mvpred_addr[!u1_pingpong][1].u4_add;
  640|  8.44k|        WORD8 *p1_refleft0 = (ps_cur_mv_pred + 3)->i1_ref_frame;
  641|       |
  642|  8.44k|        ppv_top_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refTop0[0]];
  643|  8.44k|        ppv_top_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refTop0[1]];
  644|       |
  645|  8.44k|        ppv_left_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]];
  646|  8.44k|        ppv_top_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]];
  647|  8.44k|        ppv_left_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]];
  648|  8.44k|        ppv_top_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]];
  649|       |
  650|  8.44k|        ppv_left_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refleft0[0]];
  651|  8.44k|        ppv_left_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refleft0[1]];
  652|       |        /* Storing the leftMbtype for next Mb */
  653|  8.44k|        ps_dec->deblk_left_mb[1].u1_mb_type = ps_cur_mb_params->u1_mb_type;
  654|       |
  655|  8.44k|        return;
  656|  8.44k|    }
  657|       |
  658|       |    /* Flag for extra left Edge */
  659|   132k|    ps_cur_mb_params->u1_single_call = 1;
  660|       |
  661|       |    /* Update the Left deblk_mb_t and Left MvPred Parameters           */
  662|   132k|    if(!u2_mbx)
  ------------------
  |  Branch (662:8): [True: 38.1k, False: 94.3k]
  ------------------
  663|  38.1k|    {
  664|  38.1k|        u4_leftmbtype = 0;
  665|       |
  666|       |        /* Initialize the ps_left_mv_pred with Junk but Valid Location */
  667|       |        /* to avoid invalid memory access                           */
  668|       |        /* this is read only pointer                                */
  669|  38.1k|        ps_left_mv_pred = ps_dec->ps_mv_cur + 3;
  670|  38.1k|    }
  671|  94.3k|    else
  672|  94.3k|    {
  673|  94.3k|        u4_leftmbtype = ps_dec->deblk_left_mb[1].u1_mb_type;
  674|       |
  675|       |        /* Come to Left Most Edge of the MB */
  676|  94.3k|        ps_left_mv_pred =
  677|  94.3k|            (u2_mbxn_mb) ? ps_dec->ps_mv_cur + ((u2_mbxn_mb - 1) << 4) + 3 : ps_dec->ps_mv_left + 3;
  ------------------
  |  Branch (677:13): [True: 94.0k, False: 274]
  ------------------
  678|  94.3k|    }
  679|       |
  680|   132k|    if(!u2_mby) u1_top_mb_typ = 0;
  ------------------
  |  Branch (680:8): [True: 20.6k, False: 111k]
  ------------------
  681|       |
  682|       |    /* MvPred Pointer Calculation */
  683|   132k|    ps_top_mv_pred = ps_cur_mv_pred - (ps_dec->u2_frm_wd_in_mbs << 4) + 12;
  684|   132k|    u4_cur_mb_intra = u1_cur_mb_type & D_INTRA_MB;
  ------------------
  |  |  382|   132k|#define D_INTRA_MB        1
  ------------------
  685|   132k|    u4_cur_mb_ibl = u1_cur_mb_type & D_INTRA_IBL;
  ------------------
  |  |   72|   132k|#define D_INTRA_IBL 16
  ------------------
  686|   132k|    u4_cur_mb_fld = !!(u1_cur_mb_type & D_FLD_MB);
  ------------------
  |  |  386|   132k|#define D_FLD_MB          0x80
  ------------------
  687|       |    /* Compute BS function */
  688|   132k|    pu4_bs_table = ps_cur_mb_params->u4_bs_table;
  689|       |
  690|   132k|    u2_cur_csbp = ps_cur_mb_info->ps_curmb->u2_luma_csbp;
  691|   132k|    u2_left_csbp = ps_cur_mb_info->ps_left_mb->u2_luma_csbp;
  692|   132k|    u2_top_csbp = ps_cur_mb_info->ps_top_mb->u2_luma_csbp;
  693|       |    /* Compute BS function */
  694|   132k|    if((ps_dec->ps_cur_sps->u1_profile_idc == HIGH_PROFILE_IDC) ||
  ------------------
  |  |  278|   132k|#define HIGH_PROFILE_IDC   100
  ------------------
  |  Branch (694:8): [True: 3.34k, False: 129k]
  ------------------
  695|   129k|       (ps_dec->ps_cur_sps->u1_profile_idc == SCALABLE_HIGH_PROFILE_IDC) ||
  ------------------
  |  |   60|   129k|#define SCALABLE_HIGH_PROFILE_IDC 86
  ------------------
  |  Branch (695:8): [True: 17.9k, False: 111k]
  ------------------
  696|   111k|       (ps_dec->ps_cur_sps->u1_profile_idc == SCALABLE_BASELINE_PROFILE_IDC))
  ------------------
  |  |   59|   111k|#define SCALABLE_BASELINE_PROFILE_IDC 83
  ------------------
  |  Branch (696:8): [True: 28.1k, False: 83.0k]
  ------------------
  697|  49.4k|    {
  698|  49.4k|        if(ps_cur_mb_info->u1_tran_form8x8 == 1)
  ------------------
  |  Branch (698:12): [True: 5.38k, False: 44.0k]
  ------------------
  699|  5.38k|        {
  700|  5.38k|            u2_cur_csbp = ih264d_update_csbp_8x8(ps_cur_mb_info->ps_curmb->u2_luma_csbp);
  701|  5.38k|            ps_cur_mb_info->ps_curmb->u2_luma_csbp = u2_cur_csbp;
  702|  5.38k|        }
  703|  49.4k|    }
  704|   132k|    u2_cur_csbp |= *pu2_curr_res_luma_csbp;
  705|   132k|    u2_left_csbp |= *pu2_left_res_luma_csbp;
  706|   132k|    u2_top_csbp |= *pu2_top_res_luma_csbp;
  707|       |
  708|   132k|    if(u4_cur_mb_intra)
  ------------------
  |  Branch (708:8): [True: 7.60k, False: 124k]
  ------------------
  709|  7.60k|    {
  710|  7.60k|        pu4_bs_table[4] = 0x04040404;
  711|  7.60k|        pu4_bs_table[0] = u4_cur_mb_fld ? 0x03030303 : 0x04040404;
  ------------------
  |  Branch (711:27): [True: 0, False: 7.60k]
  ------------------
  712|  7.60k|        pu4_bs_table[1] = 0x03030303;
  713|  7.60k|        pu4_bs_table[2] = 0x03030303;
  714|  7.60k|        pu4_bs_table[3] = 0x03030303;
  715|  7.60k|        pu4_bs_table[5] = 0x03030303;
  716|  7.60k|        pu4_bs_table[6] = 0x03030303;
  717|  7.60k|        pu4_bs_table[7] = 0x03030303;
  718|  7.60k|    }
  719|   124k|    else
  720|   124k|    {
  721|   124k|        isvcd_fill_bs_ibl(ps_cur_mb_params, u1_top_mb_typ, u4_leftmbtype, ps_cur_mb_info,
  722|   124k|                          pu2_curr_res_luma_csbp, pu2_left_res_luma_csbp, pu2_top_res_luma_csbp);
  723|       |
  724|   124k|        if(!u4_cur_mb_ibl)
  ------------------
  |  Branch (724:12): [True: 99.8k, False: 25.0k]
  ------------------
  725|  99.8k|        {
  726|  99.8k|            UWORD32 u4_is_non16x16 = !!(u1_cur_mb_type & D_PRED_NON_16x16);
  ------------------
  |  |  383|  99.8k|#define D_PRED_NON_16x16  2
  ------------------
  727|  99.8k|            UWORD32 u4_is_b = ps_dec->u1_B;
  728|  99.8k|            UWORD32 u4_bs_0, u4_bs_4;
  729|       |
  730|  99.8k|            u4_bs_0 = pu4_bs_table[0];
  731|  99.8k|            u4_bs_4 = pu4_bs_table[4];
  732|       |
  733|  99.8k|            ih264d_fill_bs2_horz_vert(pu4_bs_table, u2_left_csbp, u2_top_csbp, u2_cur_csbp,
  734|  99.8k|                                      (const UWORD32 *) (gau4_ih264d_packed_bs2),
  735|  99.8k|                                      (const UWORD16 *) (gau2_ih264d_4x4_v2h_reorder));
  736|       |
  737|  99.8k|            if(u4_leftmbtype & D_INTRA_MB)
  ------------------
  |  |  382|  99.8k|#define D_INTRA_MB        1
  ------------------
  |  Branch (737:16): [True: 597, False: 99.2k]
  ------------------
  738|    597|            {
  739|    597|                pu4_bs_table[4] = 0x04040404;
  740|    597|            }
  741|  99.2k|            else if(u4_leftmbtype & D_INTRA_IBL)
  ------------------
  |  |   72|  99.2k|#define D_INTRA_IBL 16
  ------------------
  |  Branch (741:21): [True: 2.43k, False: 96.8k]
  ------------------
  742|  2.43k|            {
  743|  2.43k|                pu4_bs_table[4] = u4_bs_4;
  744|  2.43k|            }
  745|       |
  746|  99.8k|            if(u1_top_mb_typ & D_INTRA_MB)
  ------------------
  |  |  382|  99.8k|#define D_INTRA_MB        1
  ------------------
  |  Branch (746:16): [True: 791, False: 99.0k]
  ------------------
  747|    791|            {
  748|    791|                pu4_bs_table[0] = u4_cur_mb_fld ? 0x03030303 : 0x04040404;
  ------------------
  |  Branch (748:35): [True: 0, False: 791]
  ------------------
  749|    791|            }
  750|  99.0k|            else if(u1_top_mb_typ & D_INTRA_IBL)
  ------------------
  |  |   72|  99.0k|#define D_INTRA_IBL 16
  ------------------
  |  Branch (750:21): [True: 3.52k, False: 95.5k]
  ------------------
  751|  3.52k|            {
  752|  3.52k|                pu4_bs_table[0] = u4_bs_0;
  753|  3.52k|            }
  754|       |
  755|  99.8k|            ps_dec->pf_fill_bs1[u4_is_b][u4_is_non16x16](
  756|  99.8k|                ps_cur_mv_pred, ps_top_mv_pred, apv_map_ref_idx_to_poc, pu4_bs_table,
  757|  99.8k|                ps_left_mv_pred, &(ps_dec->ps_left_mvpred_addr[u1_pingpong][1]),
  758|  99.8k|                ps_cur_mb_info->ps_top_mb->u4_pic_addrress, (4 >> u4_cur_mb_fld));
  759|  99.8k|        }
  760|   124k|    }
  761|       |
  762|   132k|    {
  763|   132k|        void **pu4_map_ref_idx_to_poc_l1 = apv_map_ref_idx_to_poc + POC_LIST_L0_TO_L1_DIFF;
  ------------------
  |  |   86|   132k|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  ------------------
  |  |  |  |  600|   132k|#define MAX_FRAMES              16
  |  |  ------------------
  ------------------
  764|       |        /* Store Parameter for Top MvPred refernce frame Address */
  765|   132k|        void **ppv_top_mv_pred_addr = ps_cur_mb_info->ps_curmb->u4_pic_addrress;
  766|   132k|        WORD8 *p1_refTop0 = (ps_cur_mv_pred + 12)->i1_ref_frame;
  767|   132k|        WORD8 *p1_refTop1 = (ps_cur_mv_pred + 14)->i1_ref_frame;
  768|       |
  769|       |        /* Store Left addresses for Next Mb   */
  770|   132k|        void **ppv_left_mv_pred_addr = ps_dec->ps_left_mvpred_addr[!u1_pingpong][1].u4_add;
  771|   132k|        WORD8 *p1_refleft0 = (ps_cur_mv_pred + 3)->i1_ref_frame;
  772|       |
  773|   132k|        ppv_top_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refTop0[0]];
  774|   132k|        ppv_top_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refTop0[1]];
  775|       |
  776|   132k|        ppv_left_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]];
  777|   132k|        ppv_top_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]];
  778|   132k|        ppv_left_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]];
  779|   132k|        ppv_top_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]];
  780|       |
  781|   132k|        ppv_left_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refleft0[0]];
  782|   132k|        ppv_left_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refleft0[1]];
  783|       |
  784|       |        /* Storing the leftMbtype for next Mb */
  785|   132k|        ps_dec->deblk_left_mb[1].u1_mb_type = ps_cur_mb_params->u1_mb_type;
  786|   132k|    }
  787|       |
  788|       |    /* For transform 8x8 disable deblocking of the intrernal edges of a 8x8 block */
  789|   132k|    if(ps_cur_mb_info->u1_tran_form8x8)
  ------------------
  |  Branch (789:8): [True: 12.1k, False: 120k]
  ------------------
  790|  12.1k|    {
  791|  12.1k|        pu4_bs_table[1] = 0;
  792|  12.1k|        pu4_bs_table[3] = 0;
  793|  12.1k|        pu4_bs_table[5] = 0;
  794|  12.1k|        pu4_bs_table[7] = 0;
  795|  12.1k|    }
  796|   132k|}

isvcd_init_function_ptr_generic:
   85|   176k|{
   86|   176k|    residual_sampling_ctxt_t *ps_resd_samp_ctx;
   87|   176k|    intra_sampling_ctxt_t *ps_intra_samp_ctxt;
   88|   176k|    dec_struct_t *ps_codec = &ps_svc_lyr_dec->s_dec;
   89|       |
   90|       |    /* call the ih264 init ptr generic fn*/
   91|   176k|    ih264d_init_function_ptr_generic(ps_codec);
   92|       |
   93|   176k|    ps_resd_samp_ctx = (residual_sampling_ctxt_t *) ps_svc_lyr_dec->pv_residual_sample_ctxt;
   94|   176k|    ps_intra_samp_ctxt = (intra_sampling_ctxt_t *) ps_svc_lyr_dec->pv_intra_sample_ctxt;
   95|       |
   96|   176k|    ps_svc_lyr_dec->pf_pred_residual_recon_luma_4x4 = isvcd_pred_residual_recon_4x4;
   97|   176k|    ps_svc_lyr_dec->pf_pred_residual_recon_luma_8x8 = isvcd_pred_residual_recon_8x8;
   98|   176k|    ps_svc_lyr_dec->pf_pred_residual_recon_luma_16x16 = isvcd_pred_residual_recon_16x16;
   99|   176k|    ps_svc_lyr_dec->pf_pred_residual_recon_chroma_4x4 = isvcd_pred_residual_recon_chroma_4x4;
  100|   176k|    ps_svc_lyr_dec->pf_pred_residual_recon_chroma_8x8 = isvcd_pred_residual_recon_chroma_8x8;
  101|       |
  102|   176k|    ps_svc_lyr_dec->pf_residual_luma_4x4 = isvcd_residual_luma_4x4;
  103|   176k|    ps_svc_lyr_dec->pf_residual_luma_8x8 = isvcd_residual_luma_8x8;
  104|   176k|    ps_svc_lyr_dec->pf_residual_luma_16x16 = isvcd_residual_luma_16x16;
  105|   176k|    ps_svc_lyr_dec->pf_residual_chroma_cb_cr_8x8 = isvcd_residual_chroma_cb_cr_8x8;
  106|       |
  107|   176k|    ps_svc_lyr_dec->pf_iquant_itrans_residual_recon_luma_4x4 =
  108|   176k|        isvcd_iquant_itrans_residual_recon_4x4;
  109|   176k|    ps_svc_lyr_dec->pf_iquant_itrans_residual_recon_luma_4x4_dc =
  110|   176k|        isvcd_iquant_itrans_residual_recon_4x4_dc;
  111|   176k|    ps_svc_lyr_dec->pf_iquant_itrans_residual_recon_luma_8x8 =
  112|   176k|        isvcd_iquant_itrans_residual_recon_8x8;
  113|   176k|    ps_svc_lyr_dec->pf_iquant_itrans_residual_recon_luma_8x8_dc =
  114|   176k|        isvcd_iquant_itrans_residual_recon_8x8_dc;
  115|   176k|    ps_svc_lyr_dec->pf_iquant_itrans_residual_recon_chroma_4x4 =
  116|   176k|        isvcd_iquant_itrans_residual_recon_chroma_4x4;
  117|   176k|    ps_svc_lyr_dec->pf_iquant_itrans_residual_recon_chroma_4x4_dc =
  118|   176k|        isvcd_iquant_itrans_residual_recon_chroma_4x4_dc;
  119|       |
  120|   176k|    ps_svc_lyr_dec->pf_iquant_itrans_residual_luma_4x4 = isvcd_iquant_itrans_residual_4x4;
  121|   176k|    ps_svc_lyr_dec->pf_iquant_itrans_residual_luma_4x4_dc = isvcd_iquant_itrans_residual_4x4_dc;
  122|   176k|    ps_svc_lyr_dec->pf_iquant_itrans_residual_luma_8x8 = isvcd_iquant_itrans_residual_8x8;
  123|   176k|    ps_svc_lyr_dec->pf_iquant_itrans_residual_luma_8x8_dc = isvcd_iquant_itrans_residual_8x8_dc;
  124|   176k|    ps_svc_lyr_dec->pf_iquant_itrans_residual_chroma_4x4 = isvcd_iquant_itrans_residual_chroma_4x4;
  125|   176k|    ps_svc_lyr_dec->pf_iquant_itrans_residual_chroma_4x4_dc =
  126|   176k|        isvcd_iquant_itrans_residual_chroma_4x4_dc;
  127|       |
  128|   176k|    ps_svc_lyr_dec->pf_iquant_itrans_luma_4x4 = isvcd_iquant_itrans_4x4;
  129|   176k|    ps_svc_lyr_dec->pf_iquant_itrans_luma_4x4_dc = isvcd_iquant_itrans_4x4_dc;
  130|   176k|    ps_svc_lyr_dec->pf_iquant_itrans_luma_8x8 = isvcd_iquant_itrans_8x8;
  131|   176k|    ps_svc_lyr_dec->pf_iquant_itrans_luma_8x8_dc = isvcd_iquant_itrans_8x8_dc;
  132|   176k|    ps_svc_lyr_dec->pf_iquant_itrans_chroma_4x4 = isvcd_iquant_itrans_chroma_4x4;
  133|   176k|    ps_svc_lyr_dec->pf_iquant_itrans_chroma_4x4_dc = isvcd_iquant_itrans_chroma_4x4_dc;
  134|       |
  135|   176k|    ps_intra_samp_ctxt->pf_interpolate_base_luma_dyadic = isvcd_interpolate_base_luma_dyadic;
  136|   176k|    ps_intra_samp_ctxt->pf_interpolate_intra_base = isvcd_interpolate_intra_base;
  137|   176k|    ps_intra_samp_ctxt->pf_vert_chroma_interpol[0] = isvcd_vert_interpol_chroma_dyadic_1;
  138|   176k|    ps_intra_samp_ctxt->pf_vert_chroma_interpol[1] = isvcd_vert_interpol_chroma_dyadic_2;
  139|   176k|    ps_intra_samp_ctxt->pf_vert_chroma_interpol[2] = isvcd_vert_interpol_chroma_dyadic_3;
  140|       |
  141|   176k|    ps_intra_samp_ctxt->pf_horz_chroma_interpol[0] = isvcd_horz_interpol_chroma_dyadic_1;
  142|   176k|    ps_intra_samp_ctxt->pf_horz_chroma_interpol[1] = isvcd_horz_interpol_chroma_dyadic_2;
  143|       |
  144|       |    /*Dyadic Residual Resampling*/
  145|   176k|    ps_resd_samp_ctx->pf_residual_luma_dyadic = isvcd_residual_luma_dyadic;
  146|   176k|    ps_resd_samp_ctx->pf_residual_chroma_dyadic = isvcd_residual_chroma_dyadic;
  147|   176k|    ps_resd_samp_ctx->pf_residual_chroma_dyadic_alt = isvcd_residual_chroma_dyadic_alt;
  148|       |
  149|       |    /*Non-dyadic Residual Resampling*/
  150|   176k|    ps_resd_samp_ctx->pf_interpolate_residual = isvcd_interpolate_residual;
  151|       |
  152|   176k|    ps_resd_samp_ctx->pf_residual_reflayer_const_non_boundary_mb =
  153|   176k|        isvcd_residual_reflayer_const_non_boundary_mb;
  154|   176k|    ps_resd_samp_ctx->pf_residual_reflayer_const_boundary_mb =
  155|   176k|        isvcd_residual_reflayer_const_boundary_mb;
  156|       |
  157|   176k|    return;
  158|   176k|}

isvcd_ii_pred_res_init:
  100|   132k|{
  101|       |    /* local vaiables */
  102|   132k|    intra_inter_pred_ctxt_t *ps_ii_pred_ctxt;
  103|   132k|    mode_motion_ctxt_t *ps_ctxt;
  104|   132k|    mode_motion_lyr_ctxt *ps_lyr_mem;
  105|   132k|    WORD32 i4_base_res_flag;
  106|   132k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec = (svc_dec_lyr_struct_t *) pv_svc_dec;
  107|       |
  108|   132k|    res_prms_t *ps_res_prms = &ps_svc_lyr_dec->s_res_prms;
  109|   132k|    ps_ii_pred_ctxt = (intra_inter_pred_ctxt_t *) ps_svc_lyr_dec->pv_ii_pred_ctxt;
  110|   132k|    ps_ctxt = (mode_motion_ctxt_t *) ps_svc_lyr_dec->pv_mode_mv_sample_ctxt;
  111|   132k|    i4_base_res_flag = ps_svc_lyr_dec->u1_base_res_flag;
  112|       |
  113|   132k|    if((0 != ps_svc_lyr_dec->u1_layer_id) && (SVCD_FALSE == i4_base_res_flag))
  ------------------
  |  |   45|  34.0k|#define SVCD_FALSE 0
  ------------------
  |  Branch (113:8): [True: 34.0k, False: 98.8k]
  |  Branch (113:46): [True: 34.0k, False: 0]
  ------------------
  114|  34.0k|    {
  115|       |        /* if not first resolution layer */
  116|  34.0k|        ps_ii_pred_ctxt->i4_ref_res_lyr_wd = ps_ii_pred_ctxt->i4_cur_res_lyr_wd;
  117|  34.0k|        ps_ii_pred_ctxt->i4_ref_res_lyr_ht = ps_ii_pred_ctxt->i4_cur_res_lyr_ht;
  118|  34.0k|    }
  119|       |
  120|   132k|    if ((ps_ctxt->i4_res_id >= 0) && (ps_ctxt->i4_res_id <= 2))
  ------------------
  |  Branch (120:9): [True: 34.0k, False: 98.8k]
  |  Branch (120:38): [True: 34.0k, False: 0]
  ------------------
  121|  34.0k|    {
  122|  34.0k|        ps_lyr_mem = &ps_ctxt->as_res_lyr_mem[ps_ctxt->i4_res_id];
  123|       |
  124|  34.0k|        ps_ii_pred_ctxt->pi2_ref_loc_x = ps_lyr_mem->pi2_ref_loc_x;
  125|  34.0k|        ps_ii_pred_ctxt->pi2_ref_loc_y = ps_lyr_mem->pi2_ref_loc_y;
  126|  34.0k|    }
  127|       |    /* Store the dimensions */
  128|   132k|    ps_ii_pred_ctxt->i4_cur_res_lyr_wd = ps_res_prms->i4_res_width;
  129|   132k|    ps_ii_pred_ctxt->i4_cur_res_lyr_ht = ps_res_prms->i4_res_height;
  130|       |
  131|   132k|    return (OK);
  ------------------
  |  |  114|   132k|#define OK        0
  ------------------
  132|   132k|}
isvcd_ii_get_ref_mb_mode:
  161|   445k|{
  162|   445k|    WORD32 i4_mb_x, i4_mb_y;
  163|   445k|    inter_lyr_mb_prms_t *ps_inter_lyr_mb_prms;
  164|   445k|    WORD8 i1_mb_mode;
  165|       |
  166|   445k|    i4_mb_x = (i4_x_ref >> MB_WIDTH_SHIFT);
  ------------------
  |  |   70|   445k|#define MB_WIDTH_SHIFT 4
  ------------------
  167|   445k|    i4_mb_y = (i4_y_ref >> MB_HEIGHT_SHIFT);
  ------------------
  |  |   71|   445k|#define MB_HEIGHT_SHIFT 4
  ------------------
  168|       |
  169|       |    /* get the location of the byte which has the current mb mode */
  170|   445k|    pi1_ref_mb_modes += (i4_mb_y * i4_ref_mode_stride * i4_ref_mode_size);
  171|   445k|    pi1_ref_mb_modes += (i4_mb_x * i4_ref_mode_size);
  172|   445k|    ps_inter_lyr_mb_prms = (inter_lyr_mb_prms_t *) pi1_ref_mb_modes;
  173|   445k|    i1_mb_mode = ps_inter_lyr_mb_prms->i1_mb_mode;
  174|       |
  175|   445k|    if(i1_mb_mode <= SVC_INTER_MB)
  ------------------
  |  |  114|   445k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
  |  Branch (175:8): [True: 416k, False: 28.7k]
  ------------------
  176|   416k|    {
  177|       |        /* INTER */
  178|   416k|        return (SVCD_FALSE);
  ------------------
  |  |   45|   416k|#define SVCD_FALSE 0
  ------------------
  179|   416k|    }
  180|  28.7k|    else
  181|  28.7k|    {
  182|       |        /* INTRA */
  183|  28.7k|        return (SVCD_TRUE);
  ------------------
  |  |   46|  28.7k|#define SVCD_TRUE 1
  ------------------
  184|  28.7k|    }
  185|   445k|}
isvcd_ii_get_ref_projections:
  216|   111k|{
  217|   111k|    WORD16 *pi2_ref_loc_x;
  218|   111k|    WORD16 *pi2_ref_loc_y;
  219|   111k|    WORD8 *pi1_ref_mb_mode;
  220|   111k|    WORD32 i4_ref_mode_stride;
  221|   111k|    WORD32 i4_element_size;
  222|   111k|    WORD32 i4_ref_x, i4_ref_y;
  223|   111k|    WORD32 i4_frame_x, i4_frame_y;
  224|   111k|    WORD32 i4_flag;
  225|       |
  226|   111k|    pi2_ref_loc_x = ps_ctxt->pi2_ref_loc_x;
  227|   111k|    pi2_ref_loc_y = ps_ctxt->pi2_ref_loc_y;
  228|       |
  229|   111k|    pi1_ref_mb_mode = (WORD8 *) ps_ref_mb_mode->pv_buffer;
  230|   111k|    i4_ref_mode_stride = ps_ref_mb_mode->i4_num_element_stride;
  231|   111k|    i4_element_size = ps_ref_mb_mode->i4_element_size;
  232|       |
  233|       |    /* get the current MB frame positions */
  234|   111k|    i4_frame_x = i4_mb_x << 4;
  235|   111k|    i4_frame_y = i4_mb_y << 4;
  236|       |
  237|       |    /* reset the flag */
  238|   111k|    i4_flag = SVCD_FALSE;
  ------------------
  |  |   45|   111k|#define SVCD_FALSE 0
  ------------------
  239|       |
  240|       |    /* project the (0,0) of current MB and get the ref MB mode */
  241|   111k|    i4_ref_x = pi2_ref_loc_x[i4_frame_x];
  242|   111k|    i4_ref_y = pi2_ref_loc_y[i4_frame_y];
  243|       |
  244|   111k|    if((i4_ref_x < ps_ctxt->i4_ref_res_lyr_wd) && (i4_ref_y < ps_ctxt->i4_ref_res_lyr_ht))
  ------------------
  |  Branch (244:8): [True: 111k, False: 0]
  |  Branch (244:51): [True: 111k, False: 0]
  ------------------
  245|   111k|    {
  246|   111k|        ps_ii_mb_ctxt->u1_top_left_intra_flag = isvcd_ii_get_ref_mb_mode(
  247|   111k|            pi1_ref_mb_mode, i4_ref_mode_stride, i4_element_size, i4_ref_x, i4_ref_y);
  248|   111k|    }
  249|      0|    else
  250|      0|    {
  251|       |        /* If projection is outside the picture boundary */
  252|      0|        ps_ii_mb_ctxt->u1_top_left_intra_flag = SVCD_FALSE;
  ------------------
  |  |   45|      0|#define SVCD_FALSE 0
  ------------------
  253|      0|    }
  254|       |    /* project the (15,0) of current MB and get the ref MB mode */
  255|   111k|    i4_ref_x = pi2_ref_loc_x[i4_frame_x + 15];
  256|   111k|    i4_ref_y = pi2_ref_loc_y[i4_frame_y];
  257|       |
  258|   111k|    if((i4_ref_x < ps_ctxt->i4_ref_res_lyr_wd) && (i4_ref_y < ps_ctxt->i4_ref_res_lyr_ht))
  ------------------
  |  Branch (258:8): [True: 111k, False: 0]
  |  Branch (258:51): [True: 111k, False: 0]
  ------------------
  259|   111k|    {
  260|   111k|        ps_ii_mb_ctxt->u1_top_rt_intra_flag = isvcd_ii_get_ref_mb_mode(
  261|   111k|            pi1_ref_mb_mode, i4_ref_mode_stride, i4_element_size, i4_ref_x, i4_ref_y);
  262|   111k|    }
  263|      0|    else
  264|      0|    {
  265|      0|        ps_ii_mb_ctxt->u1_top_rt_intra_flag = SVCD_FALSE;
  ------------------
  |  |   45|      0|#define SVCD_FALSE 0
  ------------------
  266|      0|    }
  267|       |
  268|       |    /* project the (0,15) of current MB and get the ref MB mode */
  269|   111k|    i4_ref_x = pi2_ref_loc_x[i4_frame_x];
  270|   111k|    i4_ref_y = pi2_ref_loc_y[i4_frame_y + 15];
  271|       |
  272|   111k|    if((i4_ref_x < ps_ctxt->i4_ref_res_lyr_wd) && (i4_ref_y < ps_ctxt->i4_ref_res_lyr_ht))
  ------------------
  |  Branch (272:8): [True: 111k, False: 0]
  |  Branch (272:51): [True: 111k, False: 0]
  ------------------
  273|   111k|    {
  274|   111k|        ps_ii_mb_ctxt->u1_bot_left_intra_flag = isvcd_ii_get_ref_mb_mode(
  275|   111k|            pi1_ref_mb_mode, i4_ref_mode_stride, i4_element_size, i4_ref_x, i4_ref_y);
  276|   111k|    }
  277|      0|    else
  278|      0|    {
  279|      0|        ps_ii_mb_ctxt->u1_bot_left_intra_flag = SVCD_FALSE;
  ------------------
  |  |   45|      0|#define SVCD_FALSE 0
  ------------------
  280|      0|    }
  281|       |
  282|       |    /* project the (15,15) of current MB and get the ref MB mode */
  283|   111k|    i4_ref_x = pi2_ref_loc_x[i4_frame_x + 15];
  284|   111k|    i4_ref_y = pi2_ref_loc_y[i4_frame_y + 15];
  285|       |
  286|   111k|    if((i4_ref_x < ps_ctxt->i4_ref_res_lyr_wd) && (i4_ref_y < ps_ctxt->i4_ref_res_lyr_ht))
  ------------------
  |  Branch (286:8): [True: 111k, False: 0]
  |  Branch (286:51): [True: 111k, False: 0]
  ------------------
  287|   111k|    {
  288|   111k|        ps_ii_mb_ctxt->u1_bot_rt_intra_flag = isvcd_ii_get_ref_mb_mode(
  289|   111k|            pi1_ref_mb_mode, i4_ref_mode_stride, i4_element_size, i4_ref_x, i4_ref_y);
  290|   111k|    }
  291|      0|    else
  292|      0|    {
  293|      0|        ps_ii_mb_ctxt->u1_bot_rt_intra_flag = SVCD_FALSE;
  ------------------
  |  |   45|      0|#define SVCD_FALSE 0
  ------------------
  294|      0|    }
  295|       |
  296|       |    /* if any of the 4 cormers are falling into intra region
  297|       |      set the INTRA INTER Flag */
  298|   111k|    if((SVCD_TRUE == ps_ii_mb_ctxt->u1_top_left_intra_flag) ||
  ------------------
  |  |   46|   111k|#define SVCD_TRUE 1
  ------------------
  |  Branch (298:8): [True: 6.41k, False: 104k]
  ------------------
  299|   104k|       (SVCD_TRUE == ps_ii_mb_ctxt->u1_top_rt_intra_flag) ||
  ------------------
  |  |   46|   104k|#define SVCD_TRUE 1
  ------------------
  |  Branch (299:8): [True: 4.72k, False: 100k]
  ------------------
  300|   100k|       (SVCD_TRUE == ps_ii_mb_ctxt->u1_bot_left_intra_flag) ||
  ------------------
  |  |   46|   100k|#define SVCD_TRUE 1
  ------------------
  |  Branch (300:8): [True: 2.97k, False: 97.2k]
  ------------------
  301|  97.2k|       (SVCD_TRUE == ps_ii_mb_ctxt->u1_bot_rt_intra_flag))
  ------------------
  |  |   46|  97.2k|#define SVCD_TRUE 1
  ------------------
  |  Branch (301:8): [True: 568, False: 96.6k]
  ------------------
  302|  14.6k|    {
  303|  14.6k|        i4_flag = SVCD_TRUE;
  ------------------
  |  |   46|  14.6k|#define SVCD_TRUE 1
  ------------------
  304|  14.6k|    }
  305|       |
  306|       |    /* derive the intersection point of MB boundaries */
  307|   111k|    if(SVCD_TRUE == i4_flag)
  ------------------
  |  |   46|   111k|#define SVCD_TRUE 1
  ------------------
  |  Branch (307:8): [True: 14.6k, False: 96.6k]
  ------------------
  308|  14.6k|    {
  309|  14.6k|        WORD32 i4_intr_x, i4_intr_y;
  310|  14.6k|        WORD32 i4_ref_mb_init_x, i4_ref_mb_init_y;
  311|  14.6k|        WORD32 i4_ctr;
  312|       |
  313|       |        /* set the variables to initial values */
  314|  14.6k|        i4_intr_x = 0;
  315|  14.6k|        i4_intr_y = 0;
  316|  14.6k|        i4_ref_mb_init_x = pi2_ref_loc_x[i4_frame_x] >> MB_WIDTH_SHIFT;
  ------------------
  |  |   70|  14.6k|#define MB_WIDTH_SHIFT 4
  ------------------
  317|  14.6k|        i4_ref_mb_init_y = pi2_ref_loc_y[i4_frame_y] >> MB_HEIGHT_SHIFT;
  ------------------
  |  |   71|  14.6k|#define MB_HEIGHT_SHIFT 4
  ------------------
  318|       |
  319|       |        /* loop until an Mb boundary is found in horizontal direction */
  320|   175k|        for(i4_ctr = 0; i4_ctr < MB_WIDTH; i4_ctr++)
  ------------------
  |  |   67|   175k|#define MB_WIDTH 16
  ------------------
  |  Branch (320:25): [True: 170k, False: 5.44k]
  ------------------
  321|   170k|        {
  322|   170k|            i4_ref_x = pi2_ref_loc_x[i4_frame_x + i4_ctr];
  323|   170k|            i4_ref_x >>= MB_WIDTH_SHIFT;
  ------------------
  |  |   70|   170k|#define MB_WIDTH_SHIFT 4
  ------------------
  324|       |
  325|       |            /* check if the locations are falling into same MB */
  326|   170k|            if(i4_ref_x != i4_ref_mb_init_x)
  ------------------
  |  Branch (326:16): [True: 9.23k, False: 160k]
  ------------------
  327|  9.23k|            {
  328|  9.23k|                break;
  329|  9.23k|            }
  330|       |            /* increment the position */
  331|   160k|            i4_intr_x++;
  332|   160k|        }
  333|       |
  334|       |        /* loop until an Mb boundary is found in vertical direction */
  335|   173k|        for(i4_ctr = 0; i4_ctr < MB_HEIGHT; i4_ctr++)
  ------------------
  |  |   68|   173k|#define MB_HEIGHT 16
  ------------------
  |  Branch (335:25): [True: 168k, False: 5.14k]
  ------------------
  336|   168k|        {
  337|   168k|            i4_ref_y = pi2_ref_loc_y[i4_frame_y + i4_ctr];
  338|   168k|            i4_ref_y >>= MB_HEIGHT_SHIFT;
  ------------------
  |  |   71|   168k|#define MB_HEIGHT_SHIFT 4
  ------------------
  339|       |
  340|       |            /* check if the locations are falling into same MB */
  341|   168k|            if(i4_ref_y != i4_ref_mb_init_y)
  ------------------
  |  Branch (341:16): [True: 9.53k, False: 158k]
  ------------------
  342|  9.53k|            {
  343|  9.53k|                break;
  344|  9.53k|            }
  345|       |            /* increment the position */
  346|   158k|            i4_intr_y++;
  347|   158k|        }
  348|       |        /* store the intersection points */
  349|  14.6k|        ps_ii_mb_ctxt->u1_intersection_x = i4_intr_x;
  350|  14.6k|        ps_ii_mb_ctxt->u1_intersection_y = i4_intr_y;
  351|  14.6k|    }
  352|  96.6k|    else
  353|  96.6k|    {
  354|       |        /* set to default value */
  355|  96.6k|        ps_ii_mb_ctxt->u1_intersection_x = 0;
  356|  96.6k|        ps_ii_mb_ctxt->u1_intersection_y = 0;
  357|  96.6k|    }
  358|       |
  359|   111k|    return (i4_flag);
  360|   111k|}
isvcd_ii_pred_compute_flags_mb:
  386|   131k|{
  387|   131k|    intra_inter_pred_ctxt_t *ps_ctxt;
  388|   131k|    WORD32 i4_mb_x, i4_mb_y;
  389|   131k|    dec_svc_mb_info_t *ps_svc_mb_prms;
  390|   131k|    UNUSED(pv_mb_prms);
  ------------------
  |  |   45|   131k|#define UNUSED(x) ((void)(x))
  ------------------
  391|       |
  392|   131k|    if((NULL == pv_ii_pred_ctxt) || (NULL == ps_ref_mb_mode) || (NULL == ps_coord) ||
  ------------------
  |  Branch (392:8): [True: 0, False: 131k]
  |  Branch (392:37): [True: 0, False: 131k]
  |  Branch (392:65): [True: 0, False: 131k]
  ------------------
  393|   131k|       (NULL == pu1_ii_mb_mode))
  ------------------
  |  Branch (393:8): [True: 0, False: 131k]
  ------------------
  394|      0|    {
  395|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  396|      0|    }
  397|       |
  398|   131k|    ps_ctxt = (intra_inter_pred_ctxt_t *) pv_ii_pred_ctxt;
  399|   131k|    ps_svc_mb_prms = (dec_svc_mb_info_t *) pv_svc_mb_prms;
  400|       |
  401|       |    /* get mb co-ordinates */
  402|   131k|    i4_mb_x = ps_coord->u2_mb_x;
  403|   131k|    i4_mb_y = ps_coord->u2_mb_y;
  404|       |
  405|   131k|    {
  406|   131k|        intra_inter_mb_t *ps_ii_mb_ctxt;
  407|   131k|        WORD32 i4_ii_flag;
  408|       |
  409|       |        /* get the current MB strcuture pointer */
  410|   131k|        ps_ii_mb_ctxt = &ps_ctxt->s_intra_inter_mb_prms;
  411|       |
  412|       |        /* reset the Intra Inter qualified flag for current MB */
  413|   131k|        i4_ii_flag = SVCD_FALSE;
  ------------------
  |  |   45|   131k|#define SVCD_FALSE 0
  ------------------
  414|       |
  415|       |        /* check for base mode flag and Inter MB status */
  416|   131k|        if(1 == ps_svc_mb_prms->u1_base_mode_flag)
  ------------------
  |  Branch (416:12): [True: 111k, False: 20.4k]
  ------------------
  417|   111k|        {
  418|       |            /* call the function which calculates the projections
  419|       |               and returns whether current MB has to under go
  420|       |               Inter Intra Prediction */
  421|   111k|            i4_ii_flag = isvcd_ii_get_ref_projections(ps_ctxt, ps_ii_mb_ctxt, ps_ref_mb_mode,
  422|   111k|                                                      i4_mb_x, i4_mb_y);
  423|   111k|        }
  424|       |
  425|       |        /* If the current MB requires Intra Inter prediction */
  426|   131k|        if(SVCD_TRUE == i4_ii_flag)
  ------------------
  |  |   46|   131k|#define SVCD_TRUE 1
  ------------------
  |  Branch (426:12): [True: 14.6k, False: 117k]
  ------------------
  427|  14.6k|        {
  428|       |            /* set the mb mode */
  429|  14.6k|            *pu1_ii_mb_mode = SVC_INTRA_INTER_MB;
  ------------------
  |  |  118|  14.6k|#define SVC_INTRA_INTER_MB (1 << 4) /*!< Intra Inter MB */
  ------------------
  430|  14.6k|        }
  431|   117k|        else
  432|   117k|        {
  433|       |            /* set all MB params to default values */
  434|   117k|            ps_ii_mb_ctxt->u1_bot_left_intra_flag = SVCD_FALSE;
  ------------------
  |  |   45|   117k|#define SVCD_FALSE 0
  ------------------
  435|   117k|            ps_ii_mb_ctxt->u1_bot_rt_intra_flag = SVCD_FALSE;
  ------------------
  |  |   45|   117k|#define SVCD_FALSE 0
  ------------------
  436|   117k|            ps_ii_mb_ctxt->u1_top_left_intra_flag = SVCD_FALSE;
  ------------------
  |  |   45|   117k|#define SVCD_FALSE 0
  ------------------
  437|   117k|            ps_ii_mb_ctxt->u1_top_rt_intra_flag = SVCD_FALSE;
  ------------------
  |  |   45|   117k|#define SVCD_FALSE 0
  ------------------
  438|   117k|            ps_ii_mb_ctxt->u1_intersection_x = 0;
  439|   117k|            ps_ii_mb_ctxt->u1_intersection_y = 0;
  440|       |
  441|       |            /* set the mb mode to 0 (which has no interpretation) */
  442|   117k|            *pu1_ii_mb_mode = 0;
  443|   117k|        }
  444|   131k|    }
  445|   131k|    return (OK);
  ------------------
  |  |  114|   131k|#define OK        0
  ------------------
  446|   131k|}
isvcd_ii_pred_mb:
  472|  14.6k|{
  473|  14.6k|    intra_inter_mb_t *ps_mb_ctxt;
  474|  14.6k|    UWORD8 *pu1_rec_y, *pu1_rec_uv;
  475|  14.6k|    UWORD8 *pu1_recon_luma;
  476|  14.6k|    WORD32 i4_recon_luma_stride;
  477|  14.6k|    UWORD8 *pu1_recon_chroma;
  478|  14.6k|    WORD32 i4_recon_chroma_stride;
  479|  14.6k|    UWORD8 *pu1_pred_luma;
  480|  14.6k|    UWORD8 *pu1_pred_chroma;
  481|  14.6k|    WORD32 i4_pred_luma_stride;
  482|  14.6k|    WORD32 i4_pred_chroma_stride;
  483|  14.6k|    WORD32 i4_intr_x, i4_intr_y;
  484|  14.6k|    intra_inter_pred_ctxt_t *ps_ctxt;
  485|  14.6k|    pic_buffer_t *ps_frame_buf;
  486|  14.6k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec = (svc_dec_lyr_struct_t *) pv_svc_dec;
  487|  14.6k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
  488|       |
  489|  14.6k|    ps_ctxt = (intra_inter_pred_ctxt_t *) ps_svc_lyr_dec->pv_ii_pred_ctxt;
  490|  14.6k|    ps_mb_ctxt = &ps_ctxt->s_intra_inter_mb_prms;
  491|  14.6k|    ps_frame_buf = ps_dec->ps_cur_pic;
  492|  14.6k|    i4_recon_luma_stride = ps_dec->u2_frm_wd_y;
  493|  14.6k|    i4_recon_chroma_stride = ps_dec->u2_frm_wd_uv;
  494|       |
  495|       |    /* derive the intersection point */
  496|  14.6k|    i4_intr_x = ps_mb_ctxt->u1_intersection_x;
  497|  14.6k|    i4_intr_y = ps_mb_ctxt->u1_intersection_y;
  498|       |
  499|  14.6k|    pu1_rec_y = ps_frame_buf->pu1_buf1 + (ps_cur_mb_info->u2_mbx << 4) +
  500|  14.6k|                (i4_recon_luma_stride * (ps_cur_mb_info->u2_mby << 4));
  501|       |
  502|  14.6k|    pu1_rec_uv = ps_frame_buf->pu1_buf2 + (ps_cur_mb_info->u2_mbx << 3) * YUV420SP_FACTOR +
  ------------------
  |  |  119|  14.6k|#define YUV420SP_FACTOR 2
  ------------------
  503|  14.6k|                 (i4_recon_chroma_stride * (ps_cur_mb_info->u2_mby << 3));
  504|       |
  505|  14.6k|    pu1_pred_luma = ps_svc_lyr_dec->pu1_ii_resamp_buffer_luma;
  506|  14.6k|    pu1_pred_chroma = ps_svc_lyr_dec->pu1_ii_resamp_buffer_chroma;
  507|  14.6k|    i4_pred_luma_stride = MB_SIZE;
  ------------------
  |  |  554|  14.6k|#define MB_SIZE             16
  ------------------
  508|  14.6k|    i4_pred_chroma_stride = MB_SIZE;
  ------------------
  |  |  554|  14.6k|#define MB_SIZE             16
  ------------------
  509|       |
  510|       |    /* get the recon and residual buffer pointer */
  511|  14.6k|    pu1_recon_luma = pu1_rec_y;
  512|  14.6k|    pu1_recon_chroma = pu1_rec_uv;
  513|       |
  514|       |    /*-----------------------------------------------------------------------*/
  515|       |    /* Reconstruct TOP_LEFT Partition                                        */
  516|       |    /*-----------------------------------------------------------------------*/
  517|  14.6k|    {
  518|  14.6k|        WORD32 i4_width, i4_height;
  519|       |
  520|       |        /* assign the appropriate buffer params based on Intra status */
  521|  14.6k|        if(SVCD_TRUE == ps_mb_ctxt->u1_top_left_intra_flag)
  ------------------
  |  |   46|  14.6k|#define SVCD_TRUE 1
  ------------------
  |  Branch (521:12): [True: 6.41k, False: 8.26k]
  ------------------
  522|  6.41k|        {
  523|       |            /* Luma Processing */
  524|  6.41k|            isvcd_copy_data(pu1_pred_luma, i4_pred_luma_stride, pu1_recon_luma,
  525|  6.41k|                            i4_recon_luma_stride, i4_intr_x, i4_intr_y);
  526|       |
  527|       |            /* assign appropriate width and height for chroma */
  528|  6.41k|            i4_width = (((i4_intr_x + 1) >> 1) << 1);
  529|  6.41k|            i4_height = ((i4_intr_y + 1) & ~1);
  530|  6.41k|            i4_height >>= 1;
  531|       |            /* Chroma Processing (cb and cr interleaved) */
  532|  6.41k|            isvcd_copy_data(pu1_pred_chroma, i4_pred_chroma_stride, pu1_recon_chroma,
  533|  6.41k|                            i4_recon_chroma_stride, i4_width, i4_height);
  534|  6.41k|        }
  535|  14.6k|    }
  536|       |
  537|       |    /*-----------------------------------------------------------------------*/
  538|       |    /* Reconstruct TOP_RIGHT Partition                                       */
  539|       |    /*-----------------------------------------------------------------------*/
  540|  14.6k|    {
  541|  14.6k|        WORD32 i4_width, i4_height;
  542|       |
  543|       |        /* assign the appropriate buffer params based on Intra status */
  544|  14.6k|        if(SVCD_TRUE == ps_mb_ctxt->u1_top_rt_intra_flag)
  ------------------
  |  |   46|  14.6k|#define SVCD_TRUE 1
  ------------------
  |  Branch (544:12): [True: 7.88k, False: 6.79k]
  ------------------
  545|  7.88k|        {
  546|  7.88k|            pu1_pred_luma += i4_intr_x;
  547|  7.88k|            pu1_pred_chroma += (((i4_intr_x + 1) >> 1) << 1);
  548|       |
  549|       |            /* ----------------------- Luma ------------------------ */
  550|       |            /* get the recon and residual buffer pointer */
  551|  7.88k|            pu1_recon_luma = pu1_rec_y + i4_intr_x;
  552|       |
  553|       |            /* assign appropriate width and height for luma */
  554|  7.88k|            i4_width = MB_WIDTH - i4_intr_x;
  ------------------
  |  |   67|  7.88k|#define MB_WIDTH 16
  ------------------
  555|  7.88k|            i4_height = i4_intr_y;
  556|       |
  557|       |            /* Luma Processing */
  558|       |            /* Luma Processing */
  559|  7.88k|            isvcd_copy_data(pu1_pred_luma, i4_pred_luma_stride, pu1_recon_luma,
  560|  7.88k|                            i4_recon_luma_stride, i4_width, i4_height);
  561|       |
  562|       |            /* ----------------------- Chroma ----------------------- */
  563|       |            /* assign appropriate width and height for luma */
  564|  7.88k|            i4_width = (BLOCK_WIDTH - ((i4_intr_x + 1) >> 1)) << 1;
  ------------------
  |  |   64|  7.88k|#define BLOCK_WIDTH 8
  ------------------
  565|       |
  566|       |            /* Height includes for both Cb & Cr */
  567|  7.88k|            i4_height = ((i4_intr_y + 1) & ~1);
  568|  7.88k|            i4_height >>= 1;
  569|       |            /* get the recon and residual buffer pointer */
  570|  7.88k|            pu1_recon_chroma = pu1_rec_uv;
  571|  7.88k|            {
  572|  7.88k|                WORD32 i4_temp;
  573|  7.88k|                i4_temp = (((i4_intr_x + 1) >> 1) << 1);
  574|  7.88k|                pu1_recon_chroma += i4_temp;
  575|  7.88k|            }
  576|       |
  577|       |            /* Chroma Processing (cb and cr  interleaved) */
  578|  7.88k|            isvcd_copy_data(pu1_pred_chroma, i4_pred_chroma_stride, pu1_recon_chroma,
  579|  7.88k|                            i4_recon_chroma_stride, i4_width, i4_height);
  580|  7.88k|        }
  581|  14.6k|    }
  582|       |
  583|       |    /*-----------------------------------------------------------------------*/
  584|       |    /* Reconstruct BOTTOM_LEFT Partition                                     */
  585|       |    /*-----------------------------------------------------------------------*/
  586|  14.6k|    {
  587|  14.6k|        WORD32 i4_width, i4_height;
  588|       |
  589|       |        /* assign the appropriate buffer params based on Intra status */
  590|  14.6k|        if(SVCD_TRUE == ps_mb_ctxt->u1_bot_left_intra_flag)
  ------------------
  |  |   46|  14.6k|#define SVCD_TRUE 1
  ------------------
  |  Branch (590:12): [True: 6.59k, False: 8.08k]
  ------------------
  591|  6.59k|        {
  592|  6.59k|            pu1_pred_luma = ps_svc_lyr_dec->pu1_ii_resamp_buffer_luma;
  593|  6.59k|            pu1_pred_chroma = ps_svc_lyr_dec->pu1_ii_resamp_buffer_chroma;
  594|       |
  595|       |            /* increment to current vertical offset */
  596|  6.59k|            pu1_pred_luma += i4_intr_y * i4_pred_luma_stride;
  597|  6.59k|            pu1_pred_chroma += (((i4_intr_y + 1) & ~1) >> 1) * i4_pred_chroma_stride;
  598|       |
  599|       |            /* ----------------------- Luma ----------------------- */
  600|       |            /* get the recon and residual buffer pointer */
  601|  6.59k|            pu1_recon_luma = pu1_rec_y;
  602|  6.59k|            pu1_recon_luma += i4_intr_y * i4_recon_luma_stride;
  603|       |
  604|       |            /* assign appropriate width and height */
  605|  6.59k|            i4_width = i4_intr_x;
  606|  6.59k|            i4_height = MB_HEIGHT - i4_intr_y;
  ------------------
  |  |   68|  6.59k|#define MB_HEIGHT 16
  ------------------
  607|       |
  608|       |            /* Luma Processing */
  609|  6.59k|            isvcd_copy_data(pu1_pred_luma, i4_pred_luma_stride, pu1_recon_luma,
  610|  6.59k|                            i4_recon_luma_stride, i4_width, i4_height);
  611|       |
  612|       |            /* ----------------------- Chroma ----------------------- */
  613|  6.59k|            pu1_recon_chroma = pu1_rec_uv;
  614|  6.59k|            {
  615|  6.59k|                WORD32 i4_temp;
  616|  6.59k|                i4_temp = ((i4_intr_y + 1) & ~1) >> 1;
  617|  6.59k|                pu1_recon_chroma += (i4_temp * i4_recon_chroma_stride);
  618|  6.59k|            }
  619|       |            /* assign appropriate width and height */
  620|  6.59k|            i4_width = ((i4_intr_x + 1) >> 1) << 1;
  621|  6.59k|            i4_height = MB_HEIGHT - (i4_intr_y & ~1);
  ------------------
  |  |   68|  6.59k|#define MB_HEIGHT 16
  ------------------
  622|  6.59k|            i4_height >>= 1;
  623|       |            /* Chroma Processing (cb and cr interleaved) */
  624|  6.59k|            isvcd_copy_data(pu1_pred_chroma, i4_pred_chroma_stride, pu1_recon_chroma,
  625|  6.59k|                            i4_recon_chroma_stride, i4_width, i4_height);
  626|  6.59k|        }
  627|  14.6k|    }
  628|       |
  629|       |    /*-----------------------------------------------------------------------*/
  630|       |    /* Reconstruct BOTTOM_RIGHT Partition                                    */
  631|       |    /*-----------------------------------------------------------------------*/
  632|  14.6k|    {
  633|  14.6k|        WORD32 i4_width, i4_height;
  634|       |
  635|       |        /* assign the appropriate buffer params based on Intra status */
  636|  14.6k|        if(SVCD_TRUE == ps_mb_ctxt->u1_bot_rt_intra_flag)
  ------------------
  |  |   46|  14.6k|#define SVCD_TRUE 1
  ------------------
  |  Branch (636:12): [True: 7.89k, False: 6.78k]
  ------------------
  637|  7.89k|        {
  638|  7.89k|            pu1_pred_luma = ps_svc_lyr_dec->pu1_ii_resamp_buffer_luma;
  639|  7.89k|            pu1_pred_chroma = ps_svc_lyr_dec->pu1_ii_resamp_buffer_chroma;
  640|       |
  641|       |            /* increment to current vertical offset */
  642|  7.89k|            pu1_pred_luma += i4_intr_x;
  643|  7.89k|            pu1_pred_luma += i4_intr_y * i4_pred_luma_stride;
  644|  7.89k|            pu1_pred_chroma += (((i4_intr_y + 1) & ~1) >> 1) * i4_pred_chroma_stride;
  645|  7.89k|            pu1_pred_chroma += ((i4_intr_x + 1) >> 1) << 1;
  646|       |
  647|       |            /* ----------------------- Luma ----------------------- */
  648|       |            /* get the recon and residual buffer pointer horz */
  649|  7.89k|            pu1_recon_luma = pu1_rec_y + i4_intr_x;
  650|       |
  651|       |            /* get the recon and residual buffer pointer vertical */
  652|  7.89k|            pu1_recon_luma += (i4_intr_y * i4_recon_luma_stride);
  653|       |
  654|       |            /* assign appropriate width and height */
  655|  7.89k|            i4_width = MB_WIDTH - i4_intr_x;
  ------------------
  |  |   67|  7.89k|#define MB_WIDTH 16
  ------------------
  656|  7.89k|            i4_height = MB_HEIGHT - i4_intr_y;
  ------------------
  |  |   68|  7.89k|#define MB_HEIGHT 16
  ------------------
  657|       |
  658|       |            /* Luma Processing */
  659|  7.89k|            isvcd_copy_data(pu1_pred_luma, i4_pred_luma_stride, pu1_recon_luma,
  660|  7.89k|                            i4_recon_luma_stride, i4_width, i4_height);
  661|       |
  662|       |            /* ----------------------- Chroma ----------------------- */
  663|       |            /* get the recon and residual buffer pointer horz */
  664|  7.89k|            pu1_recon_chroma = pu1_rec_uv;
  665|  7.89k|            {
  666|  7.89k|                WORD32 i4_temp;
  667|  7.89k|                i4_temp = ((i4_intr_y + 1) & ~1) >> 1;
  668|  7.89k|                i4_temp *= i4_recon_chroma_stride;
  669|  7.89k|                i4_temp += (((i4_intr_x + 1) >> 1) << 1);
  670|  7.89k|                pu1_recon_chroma += i4_temp;
  671|  7.89k|            }
  672|       |
  673|       |            /* assign appropriate width and height */
  674|  7.89k|            i4_width = (BLOCK_WIDTH - ((i4_intr_x + 1) >> 1)) << 1;
  ------------------
  |  |   64|  7.89k|#define BLOCK_WIDTH 8
  ------------------
  675|  7.89k|            i4_height = MB_HEIGHT - (i4_intr_y & ~1);
  ------------------
  |  |   68|  7.89k|#define MB_HEIGHT 16
  ------------------
  676|  7.89k|            i4_height >>= 1;
  677|       |            /* Chroma Processing (cb and cr interleaved) */
  678|  7.89k|            isvcd_copy_data(pu1_pred_chroma, i4_pred_chroma_stride, pu1_recon_chroma,
  679|  7.89k|                            i4_recon_chroma_stride, i4_width, i4_height);
  680|  7.89k|        }
  681|  14.6k|    }
  682|  14.6k|    return;
  683|  14.6k|}

isvcd_get_ceil_log2:
  269|  25.7k|{
  270|  25.7k|    WORD32 i4_bits = 0;
  271|       |
  272|  25.7k|    i4_input--;
  273|   164k|    while(i4_input > 0)
  ------------------
  |  Branch (273:11): [True: 138k, False: 25.7k]
  ------------------
  274|   138k|    {
  275|   138k|        i4_bits++;
  276|   138k|        i4_input >>= 1;
  277|   138k|    }
  278|  25.7k|    return (i4_bits);
  279|  25.7k|}
isvcd_copy_data:
  350|   148k|{
  351|   148k|    WORD32 i4_vert_lines;
  352|       |
  353|       |    /* loop for copy all the lines requried */
  354|  2.31M|    for(i4_vert_lines = 0; i4_vert_lines < i4_num_lines; i4_vert_lines++)
  ------------------
  |  Branch (354:28): [True: 2.16M, False: 148k]
  ------------------
  355|  2.16M|    {
  356|  2.16M|        memcpy(pu1_dst, pu1_src, i4_num_bytes);
  357|  2.16M|        pu1_src += i4_src_stride;
  358|  2.16M|        pu1_dst += i4_dst_stride;
  359|  2.16M|    }
  360|   148k|    return;
  361|   148k|}
isvcd_copy_data_semiplanr:
  393|  90.8k|{
  394|  90.8k|    WORD32 i4_vert_lines, u4_i;
  395|       |
  396|       |    /* loop for copy all the lines requried */
  397|  1.20M|    for(i4_vert_lines = 0; i4_vert_lines < i4_num_lines; i4_vert_lines++)
  ------------------
  |  Branch (397:28): [True: 1.11M, False: 90.8k]
  ------------------
  398|  1.11M|    {
  399|  14.3M|        for(u4_i = 0; u4_i < i4_num_bytes; u4_i++)
  ------------------
  |  Branch (399:23): [True: 13.1M, False: 1.11M]
  ------------------
  400|  13.1M|        {
  401|  13.1M|            *(pu1_dst1 + u4_i) = *(pu1_src + (2 * u4_i));
  402|  13.1M|            *(pu1_dst2 + u4_i) = *(pu1_src + (2 * u4_i) + 1);
  403|  13.1M|        }
  404|       |
  405|  1.11M|        pu1_src += i4_src_stride;
  406|  1.11M|        pu1_dst1 += i4_dst_stride;
  407|  1.11M|        pu1_dst2 += i4_dst_stride;
  408|  1.11M|    }
  409|  90.8k|    return;
  410|  90.8k|}
isvcd_get_ref_layer_avlblty_dyadic:
  445|   278k|{
  446|   278k|    inter_lyr_mb_prms_t *ps_inter_lyr_mb_prms;
  447|   278k|    WORD8 i1_mb_mode;
  448|       |
  449|       |    /* get the location of the byte which has the current mb mode */
  450|   278k|    pi1_ref_mb_modes += (i4_ref_mb_y * i4_ref_mode_stride * i4_element_size);
  451|   278k|    pi1_ref_mb_modes += (i4_ref_mb_x * i4_element_size);
  452|   278k|    ps_inter_lyr_mb_prms = (inter_lyr_mb_prms_t *) pi1_ref_mb_modes;
  453|   278k|    i1_mb_mode = ps_inter_lyr_mb_prms->i1_mb_mode;
  454|       |
  455|   278k|    if(i1_mb_mode <= SVC_INTER_MB)
  ------------------
  |  |  114|   278k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
  |  Branch (455:8): [True: 230k, False: 47.1k]
  ------------------
  456|   230k|    {
  457|       |        /* INTER */
  458|   230k|        *pi4_avlblty = 0;
  459|   230k|    }
  460|  47.1k|    else
  461|  47.1k|    {
  462|       |        /* INTRA */
  463|  47.1k|        *pi4_avlblty = 1;
  464|  47.1k|    }
  465|       |
  466|       |    /* if constrained intra flag is 1 then check for same slice id */
  467|   278k|    if(1 == i1_cons_intr_samp_flag)
  ------------------
  |  Branch (467:8): [True: 153k, False: 124k]
  ------------------
  468|   153k|    {
  469|   153k|        if(1 == *pi4_avlblty)
  ------------------
  |  Branch (469:12): [True: 25.5k, False: 127k]
  ------------------
  470|  25.5k|        {
  471|       |            /* check for different slice idc */
  472|  25.5k|            if(ps_inter_lyr_mb_prms->i1_slice_id != i1_curr_slice_id)
  ------------------
  |  Branch (472:16): [True: 2.69k, False: 22.8k]
  ------------------
  473|  2.69k|            {
  474|       |                /* store the mode as not available for upsampling */
  475|  2.69k|                *pi4_avlblty = 0;
  476|  2.69k|            }
  477|  25.5k|        }
  478|   153k|    }
  479|   278k|}
isvcd_diagonal_construct_dyadic:
  509|   318k|{
  510|   318k|    WORD32 i4_diff_hor_ver, i4_sgn_xy;
  511|   318k|    WORD32 i4_xc, i4_yc;
  512|   318k|    WORD32 i4_samp1, i4_samp2, i4_samp3;
  513|   318k|    WORD32 i4_result;
  514|   318k|    UWORD8 *pu1_tmp;
  515|       |
  516|   318k|    i4_diff_hor_ver = ABS(i4_xd_index) - ABS(i4_yd_index);
  ------------------
  |  |  100|   318k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 160k, False: 158k]
  |  |  ------------------
  ------------------
                  i4_diff_hor_ver = ABS(i4_xd_index) - ABS(i4_yd_index);
  ------------------
  |  |  100|   318k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 161k, False: 157k]
  |  |  ------------------
  ------------------
  517|   318k|    i4_sgn_xy = SIGN(i4_xd_index * i4_yd_index);
  ------------------
  |  |  103|   318k|#define SIGN(x)     (((x) >= 0) ? (((x) > 0) ? 1 : 0) : -1)
  |  |  ------------------
  |  |  |  Branch (103:22): [True: 162k, False: 155k]
  |  |  |  Branch (103:36): [True: 162k, False: 0]
  |  |  ------------------
  ------------------
  518|       |
  519|   318k|    if(i4_diff_hor_ver > 0)
  ------------------
  |  Branch (519:8): [True: 130k, False: 188k]
  ------------------
  520|   130k|    {
  521|   130k|        i4_xc = i4_x - (i4_sgn_xy * i4_yd_index);
  522|   130k|        i4_yc = i4_y - i4_yd_index;
  523|   130k|        pu1_tmp = pu1_refarray + (i4_yc * i4_refarray_wd);
  524|   130k|        i4_samp1 = pu1_tmp[i4_xc - 1];
  525|   130k|        i4_samp2 = pu1_tmp[i4_xc];
  526|   130k|        i4_samp3 = pu1_tmp[i4_xc + 1];
  527|   130k|    }
  528|   188k|    else if(i4_diff_hor_ver < 0)
  ------------------
  |  Branch (528:13): [True: 137k, False: 51.1k]
  ------------------
  529|   137k|    {
  530|   137k|        i4_xc = i4_x - i4_xd_index;
  531|   137k|        i4_yc = i4_y - (i4_sgn_xy * i4_xd_index);
  532|   137k|        pu1_tmp = pu1_refarray + ((i4_yc - 1) * i4_refarray_wd);
  533|   137k|        i4_samp1 = pu1_tmp[i4_xc];
  534|   137k|        pu1_tmp += i4_refarray_wd;
  535|   137k|        i4_samp2 = pu1_tmp[i4_xc];
  536|   137k|        pu1_tmp += i4_refarray_wd;
  537|   137k|        i4_samp3 = pu1_tmp[i4_xc];
  538|   137k|    }
  539|  51.1k|    else
  540|  51.1k|    {
  541|  51.1k|        WORD32 i4_ref_xd, i4_ref_yd;
  542|       |
  543|  51.1k|        i4_ref_xd = i4_x - i4_xd_index;
  544|  51.1k|        i4_ref_yd = i4_y - i4_yd_index;
  545|  51.1k|        i4_xc = i4_ref_xd + SIGN(i4_xd_index);
  ------------------
  |  |  103|  51.1k|#define SIGN(x)     (((x) >= 0) ? (((x) > 0) ? 1 : 0) : -1)
  |  |  ------------------
  |  |  |  Branch (103:22): [True: 25.3k, False: 25.7k]
  |  |  |  Branch (103:36): [True: 25.3k, False: 0]
  |  |  ------------------
  ------------------
  546|  51.1k|        i4_yc = i4_ref_yd + SIGN(i4_yd_index);
  ------------------
  |  |  103|  51.1k|#define SIGN(x)     (((x) >= 0) ? (((x) > 0) ? 1 : 0) : -1)
  |  |  ------------------
  |  |  |  Branch (103:22): [True: 25.4k, False: 25.7k]
  |  |  |  Branch (103:36): [True: 25.4k, False: 0]
  |  |  ------------------
  ------------------
  547|  51.1k|        pu1_tmp = pu1_refarray + (i4_ref_yd * i4_refarray_wd);
  548|  51.1k|        i4_samp1 = pu1_tmp[i4_xc];
  549|  51.1k|        i4_samp2 = pu1_tmp[i4_ref_xd];
  550|  51.1k|        pu1_tmp = pu1_refarray + (i4_yc * i4_refarray_wd);
  551|  51.1k|        i4_samp3 = pu1_tmp[i4_ref_xd];
  552|  51.1k|    }
  553|       |
  554|   318k|    i4_result = (i4_samp1 + (i4_samp2 << 1) + i4_samp3 + 2) >> 2;
  555|   318k|    pu1_tmp = pu1_refarray + (i4_y * i4_refarray_wd);
  556|       |    /* Store the filled sample */
  557|   318k|    pu1_tmp[i4_x] = i4_result;
  558|       |
  559|   318k|    return (i4_result);
  560|   318k|}
isvcd_left_right_padding:
  584|  29.9k|{
  585|  29.9k|    WORD32 i4_idx_i;
  586|  29.9k|    UWORD8 *pu1_src, *pu1_dst;
  587|       |
  588|  29.9k|    UNUSED(i1_yd_index);
  ------------------
  |  |   45|  29.9k|#define UNUSED(x) ((void)(x))
  ------------------
  589|  29.9k|    UNUSED(pu1_refarray_2);
  ------------------
  |  |   45|  29.9k|#define UNUSED(x) ((void)(x))
  ------------------
  590|  29.9k|    UNUSED(i4_mb_adjoin_x);
  ------------------
  |  |   45|  29.9k|#define UNUSED(x) ((void)(x))
  ------------------
  591|  29.9k|    UNUSED(i4_mb_adjoin_y);
  ------------------
  |  |   45|  29.9k|#define UNUSED(x) ((void)(x))
  ------------------
  592|  29.9k|    UNUSED(i4_corner_pixel_available);
  ------------------
  |  |   45|  29.9k|#define UNUSED(x) ((void)(x))
  ------------------
  593|       |
  594|  29.9k|    pu1_dst = pu1_refarray_1 + i4_x + (i4_y * i4_refarray_stride);
  595|  29.9k|    pu1_src = pu1_dst + i1_xd_index;
  596|  29.9k|    i1_xd_index = MIN(i1_xd_index, MAX_PIX_FILL_LUMA);
  ------------------
  |  |   61|  29.9k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 25.9k, False: 4.01k]
  |  |  ------------------
  ------------------
  597|  29.9k|    u1_seg_wd = MIN(u1_seg_wd, MAX_PIX_FILL_LUMA);
  ------------------
  |  |   61|  29.9k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 24.2k, False: 5.65k]
  |  |  ------------------
  ------------------
  598|  29.9k|    pu1_dst = pu1_src - i1_xd_index;
  599|       |
  600|   227k|    for(i4_idx_i = 0; i4_idx_i < u1_seg_ht; i4_idx_i++)
  ------------------
  |  Branch (600:23): [True: 197k, False: 29.9k]
  ------------------
  601|   197k|    {
  602|   197k|        memset(pu1_dst, *pu1_src, u1_seg_wd);
  603|   197k|        pu1_dst += i4_refarray_stride;
  604|   197k|        pu1_src += i4_refarray_stride;
  605|   197k|    }
  606|  29.9k|}
isvcd_left_right_padding_chroma:
  629|  30.0k|{
  630|  30.0k|    WORD32 i4_idx_i;
  631|  30.0k|    UWORD8 *pu1_src_cb, *pu1_dst_cb;
  632|  30.0k|    UWORD8 *pu1_src_cr, *pu1_dst_cr;
  633|  30.0k|    WORD32 i4_tmp;
  634|       |
  635|  30.0k|    UNUSED(i1_yd_index);
  ------------------
  |  |   45|  30.0k|#define UNUSED(x) ((void)(x))
  ------------------
  636|  30.0k|    UNUSED(i4_mb_adjoin_x);
  ------------------
  |  |   45|  30.0k|#define UNUSED(x) ((void)(x))
  ------------------
  637|  30.0k|    UNUSED(i4_mb_adjoin_y);
  ------------------
  |  |   45|  30.0k|#define UNUSED(x) ((void)(x))
  ------------------
  638|  30.0k|    UNUSED(i4_corner_pixel_available);
  ------------------
  |  |   45|  30.0k|#define UNUSED(x) ((void)(x))
  ------------------
  639|       |
  640|  30.0k|    i4_tmp = i4_x + (i4_y * i4_refarray_stride);
  641|  30.0k|    pu1_dst_cb = pu1_refarray_1 + i4_tmp;
  642|  30.0k|    pu1_src_cb = pu1_dst_cb + i1_xd_index;
  643|  30.0k|    pu1_dst_cr = pu1_refarray_2 + i4_tmp;
  644|  30.0k|    pu1_src_cr = pu1_dst_cr + i1_xd_index;
  645|       |
  646|  30.0k|    i1_xd_index = MIN(i1_xd_index, MAX_PIX_FILL_CHROMA);
  ------------------
  |  |   61|  30.0k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 15.2k, False: 14.7k]
  |  |  ------------------
  ------------------
  647|  30.0k|    u1_seg_wd = MIN(u1_seg_wd, MAX_PIX_FILL_CHROMA);
  ------------------
  |  |   61|  30.0k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 13.6k, False: 16.4k]
  |  |  ------------------
  ------------------
  648|  30.0k|    pu1_dst_cb = pu1_src_cb - i1_xd_index;
  649|  30.0k|    pu1_dst_cr = pu1_src_cr - i1_xd_index;
  650|       |
  651|   133k|    for(i4_idx_i = 0; i4_idx_i < u1_seg_ht; i4_idx_i++)
  ------------------
  |  Branch (651:23): [True: 103k, False: 30.0k]
  ------------------
  652|   103k|    {
  653|   103k|        memset(pu1_dst_cb, *pu1_src_cb, u1_seg_wd);
  654|   103k|        pu1_dst_cb += i4_refarray_stride;
  655|   103k|        pu1_src_cb += i4_refarray_stride;
  656|   103k|        memset(pu1_dst_cr, *pu1_src_cr, u1_seg_wd);
  657|   103k|        pu1_dst_cr += i4_refarray_stride;
  658|   103k|        pu1_src_cr += i4_refarray_stride;
  659|   103k|    }
  660|  30.0k|}
isvcd_top_bot_padding:
  683|  26.1k|{
  684|  26.1k|    WORD32 i4_idx_i;
  685|  26.1k|    UWORD8 *pu1_src, *pu1_dst;
  686|       |
  687|  26.1k|    UNUSED(i1_xd_index);
  ------------------
  |  |   45|  26.1k|#define UNUSED(x) ((void)(x))
  ------------------
  688|  26.1k|    UNUSED(pu1_refarray_2);
  ------------------
  |  |   45|  26.1k|#define UNUSED(x) ((void)(x))
  ------------------
  689|  26.1k|    UNUSED(i4_mb_adjoin_x);
  ------------------
  |  |   45|  26.1k|#define UNUSED(x) ((void)(x))
  ------------------
  690|  26.1k|    UNUSED(i4_mb_adjoin_y);
  ------------------
  |  |   45|  26.1k|#define UNUSED(x) ((void)(x))
  ------------------
  691|  26.1k|    UNUSED(i4_corner_pixel_available);
  ------------------
  |  |   45|  26.1k|#define UNUSED(x) ((void)(x))
  ------------------
  692|       |
  693|  26.1k|    pu1_dst = pu1_refarray_1 + i4_x + (i4_y * i4_refarray_stride);
  694|  26.1k|    pu1_src = pu1_dst + (i1_yd_index * i4_refarray_stride);
  695|  26.1k|    i1_yd_index = MIN(i1_yd_index, MAX_PIX_FILL_LUMA);
  ------------------
  |  |   61|  26.1k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 20.6k, False: 5.46k]
  |  |  ------------------
  ------------------
  696|  26.1k|    u1_seg_ht = MIN(u1_seg_ht, MAX_PIX_FILL_LUMA);
  ------------------
  |  |   61|  26.1k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 15.3k, False: 10.7k]
  |  |  ------------------
  ------------------
  697|  26.1k|    pu1_dst = pu1_src - (i1_yd_index * i4_refarray_stride);
  698|       |
  699|   108k|    for(i4_idx_i = 0; i4_idx_i < u1_seg_ht; i4_idx_i++)
  ------------------
  |  Branch (699:23): [True: 82.2k, False: 26.1k]
  ------------------
  700|  82.2k|    {
  701|  82.2k|        memcpy(pu1_dst, pu1_src, u1_seg_wd);
  702|  82.2k|        pu1_dst += i4_refarray_stride;
  703|  82.2k|    }
  704|  26.1k|}
isvcd_top_bot_padding_chroma:
  728|  26.1k|{
  729|  26.1k|    WORD32 i4_idx_i;
  730|  26.1k|    UWORD8 *pu1_src_cb, *pu1_dst_cb;
  731|  26.1k|    UWORD8 *pu1_src_cr, *pu1_dst_cr;
  732|  26.1k|    WORD32 i4_tmp;
  733|       |
  734|  26.1k|    UNUSED(i1_xd_index);
  ------------------
  |  |   45|  26.1k|#define UNUSED(x) ((void)(x))
  ------------------
  735|  26.1k|    UNUSED(pu1_refarray_2);
  ------------------
  |  |   45|  26.1k|#define UNUSED(x) ((void)(x))
  ------------------
  736|  26.1k|    UNUSED(i4_mb_adjoin_x);
  ------------------
  |  |   45|  26.1k|#define UNUSED(x) ((void)(x))
  ------------------
  737|  26.1k|    UNUSED(i4_mb_adjoin_y);
  ------------------
  |  |   45|  26.1k|#define UNUSED(x) ((void)(x))
  ------------------
  738|  26.1k|    UNUSED(i4_corner_pixel_available);
  ------------------
  |  |   45|  26.1k|#define UNUSED(x) ((void)(x))
  ------------------
  739|       |
  740|  26.1k|    i4_tmp = i4_x + (i4_y * i4_refarray_stride);
  741|  26.1k|    pu1_dst_cb = pu1_refarray_1 + i4_tmp;
  742|  26.1k|    pu1_dst_cr = pu1_refarray_2 + i4_tmp;
  743|       |
  744|  26.1k|    i4_tmp = (i1_yd_index * i4_refarray_stride);
  745|  26.1k|    pu1_src_cb = pu1_dst_cb + i4_tmp;
  746|  26.1k|    pu1_src_cr = pu1_dst_cr + i4_tmp;
  747|  26.1k|    i1_yd_index = MIN(i1_yd_index, MAX_PIX_FILL_CHROMA);
  ------------------
  |  |   61|  26.1k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 12.6k, False: 13.4k]
  |  |  ------------------
  ------------------
  748|  26.1k|    u1_seg_ht = MIN(u1_seg_ht, MAX_PIX_FILL_CHROMA);
  ------------------
  |  |   61|  26.1k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 7.35k, False: 18.7k]
  |  |  ------------------
  ------------------
  749|       |
  750|  26.1k|    i4_tmp = (i1_yd_index * i4_refarray_stride);
  751|  26.1k|    pu1_dst_cb = pu1_src_cb - i4_tmp;
  752|  26.1k|    pu1_dst_cr = pu1_src_cr - i4_tmp;
  753|       |
  754|  71.0k|    for(i4_idx_i = 0; i4_idx_i < u1_seg_ht; i4_idx_i++)
  ------------------
  |  Branch (754:23): [True: 44.8k, False: 26.1k]
  ------------------
  755|  44.8k|    {
  756|  44.8k|        memcpy(pu1_dst_cb, pu1_src_cb, u1_seg_wd);
  757|  44.8k|        pu1_dst_cb += i4_refarray_stride;
  758|  44.8k|        memcpy(pu1_dst_cr, pu1_src_cr, u1_seg_wd);
  759|  44.8k|        pu1_dst_cr += i4_refarray_stride;
  760|  44.8k|    }
  761|  26.1k|}
isvcd_diag_reconstruction:
  785|  23.2k|{
  786|  23.2k|    WORD32 i4_i;
  787|  23.2k|    UWORD8 *pu1_src_1, *pu1_src_2, *pu1_dst;
  788|  23.2k|    UWORD8 u1_filter_delay_buf[18] = {0};
  789|  23.2k|    UWORD8 u1_out_buf[16] = {0};
  790|  23.2k|    WORD32 i4_width, i4_height;
  791|  23.2k|    WORD32 i4_x_off, i4_y_off;
  792|  23.2k|    WORD32 i4_block_size = BLOCK_WIDTH;
  ------------------
  |  |   64|  23.2k|#define BLOCK_WIDTH 8
  ------------------
  793|       |
  794|  23.2k|    UNUSED(pu1_refarray_2);
  ------------------
  |  |   45|  23.2k|#define UNUSED(x) ((void)(x))
  ------------------
  795|       |
  796|  23.2k|    pu1_dst = pu1_refarray_1 + i4_x + (i4_y * i4_refarray_stride);
  797|  23.2k|    pu1_src_1 = pu1_dst + i1_xd_index;
  798|  23.2k|    pu1_src_2 = pu1_dst + (i1_yd_index * i4_refarray_stride);
  799|       |
  800|  23.2k|    i4_width = MAX(u1_seg_wd, (((i4_mb_adjoin_x >> 3) ^ 1) * i4_block_size));
  ------------------
  |  |   60|  23.2k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 15.7k, False: 7.52k]
  |  |  ------------------
  ------------------
  801|  23.2k|    i4_height = MAX(u1_seg_ht, (((i4_mb_adjoin_y >> 4) ^ 1) * i4_block_size));
  ------------------
  |  |   60|  23.2k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 18.6k, False: 4.62k]
  |  |  ------------------
  ------------------
  802|  23.2k|    i4_x_off = (i4_width - u1_seg_wd);
  803|  23.2k|    i4_y_off = (i4_height - u1_seg_ht);
  804|       |
  805|  23.2k|    if(i1_xd_index < 0 && i1_yd_index > 0)
  ------------------
  |  Branch (805:8): [True: 11.0k, False: 12.2k]
  |  Branch (805:27): [True: 5.56k, False: 5.48k]
  ------------------
  806|  5.56k|    {
  807|       |        /* Quadrant 1 Processing load the pixel in the filter delay buffer */
  808|  48.4k|        for(i4_i = 0; i4_i < (i4_height + 1); i4_i++)
  ------------------
  |  Branch (808:23): [True: 42.8k, False: 5.56k]
  ------------------
  809|  42.8k|        {
  810|  42.8k|            u1_filter_delay_buf[i4_i] = *pu1_src_1;
  811|  42.8k|            pu1_src_1 += i4_refarray_stride;
  812|  42.8k|        }
  813|       |
  814|  5.56k|        pu1_src_2 -= i4_x_off;
  815|  5.56k|        memcpy(&u1_filter_delay_buf[i4_i], pu1_src_2, i4_width);
  816|       |
  817|  5.56k|        if(0 == i4_corner_pixel_available)
  ------------------
  |  Branch (817:12): [True: 5.16k, False: 395]
  ------------------
  818|  5.16k|        {
  819|       |            /* interpolate the unavailable corner pixel */
  820|  5.16k|            u1_filter_delay_buf[i4_i - 1] =
  821|  5.16k|                (u1_filter_delay_buf[i4_i] + u1_filter_delay_buf[i4_i - 2] + 1) >> 1;
  822|  5.16k|        }
  823|       |
  824|  77.7k|        for(i4_i = 0; i4_i < (i4_width + i4_height - 1); i4_i++)
  ------------------
  |  Branch (824:23): [True: 72.2k, False: 5.56k]
  ------------------
  825|  72.2k|        {
  826|       |            /* get the filtered output */
  827|  72.2k|            u1_out_buf[i4_i] = ((u1_filter_delay_buf[i4_i]) + (u1_filter_delay_buf[i4_i + 1] * 2) +
  828|  72.2k|                                (u1_filter_delay_buf[i4_i + 2]) + 2) >>
  829|  72.2k|                               2;
  830|  72.2k|        }
  831|       |
  832|       |        /* fill the segment with diagonal reconstructed output */
  833|  37.8k|        for(i4_i = 0; i4_i < u1_seg_ht; i4_i++)
  ------------------
  |  Branch (833:23): [True: 32.2k, False: 5.56k]
  ------------------
  834|  32.2k|        {
  835|  32.2k|            memcpy(pu1_dst, &u1_out_buf[i4_x_off + i4_i], u1_seg_wd);
  836|  32.2k|            pu1_dst += i4_refarray_stride;
  837|  32.2k|        }
  838|  5.56k|    }
  839|  17.6k|    else if(i1_xd_index > 0 && i1_yd_index > 0)
  ------------------
  |  Branch (839:13): [True: 12.2k, False: 5.48k]
  |  Branch (839:32): [True: 6.25k, False: 5.95k]
  ------------------
  840|  6.25k|    {
  841|       |        /* Quadrant 2 Processing */
  842|       |        /* load the pixel in the filter delay buffer  */
  843|  6.25k|        memcpy(&u1_filter_delay_buf[0], pu1_src_2, (i4_width + 1));
  844|  48.5k|        for(i4_i = i4_height; i4_i > 0; i4_i--)
  ------------------
  |  Branch (844:31): [True: 42.2k, False: 6.25k]
  ------------------
  845|  42.2k|        {
  846|  42.2k|            u1_filter_delay_buf[i4_width + i4_i] = *pu1_src_1;
  847|  42.2k|            pu1_src_1 += i4_refarray_stride;
  848|  42.2k|        }
  849|       |
  850|  6.25k|        if(0 == i4_corner_pixel_available)
  ------------------
  |  Branch (850:12): [True: 5.50k, False: 757]
  ------------------
  851|  5.50k|        {
  852|       |            /* interpolate the unavailable corner pixel */
  853|  5.50k|            u1_filter_delay_buf[i4_width] =
  854|  5.50k|                (u1_filter_delay_buf[i4_width - 1] + u1_filter_delay_buf[i4_width + 1] + 1) >> 1;
  855|  5.50k|        }
  856|       |
  857|  92.3k|        for(i4_i = 0; i4_i < (i4_width + i4_height - 1); i4_i++)
  ------------------
  |  Branch (857:23): [True: 86.1k, False: 6.25k]
  ------------------
  858|  86.1k|        {
  859|       |            /* get the filtered output */
  860|  86.1k|            u1_out_buf[i4_i] = ((u1_filter_delay_buf[i4_i]) + (u1_filter_delay_buf[i4_i + 1] * 2) +
  861|  86.1k|                                (u1_filter_delay_buf[i4_i + 2]) + 2) >>
  862|  86.1k|                               2;
  863|  86.1k|        }
  864|       |
  865|       |        /* fill the segment with diagonal reconstructed output */
  866|  43.5k|        for(i4_i = 1; i4_i <= u1_seg_ht; i4_i++)
  ------------------
  |  Branch (866:23): [True: 37.2k, False: 6.25k]
  ------------------
  867|  37.2k|        {
  868|  37.2k|            memcpy(pu1_dst, &u1_out_buf[i4_height - i4_i], u1_seg_wd);
  869|  37.2k|            pu1_dst += i4_refarray_stride;
  870|  37.2k|        }
  871|  6.25k|    }
  872|  11.4k|    else if(i1_xd_index > 0 && i1_yd_index < 0)
  ------------------
  |  Branch (872:13): [True: 5.95k, False: 5.48k]
  |  Branch (872:32): [True: 5.95k, False: 0]
  ------------------
  873|  5.95k|    {
  874|       |        /* Quadrant 3 Processing */
  875|       |        /* load the pixel in the filter delay buffer  */
  876|  5.95k|        memcpy(&u1_filter_delay_buf[0], pu1_src_2, (i4_width + 1));
  877|       |
  878|  5.95k|        pu1_src_1 -= (i4_y_off * i4_refarray_stride);
  879|  41.4k|        for(i4_i = 1; i4_i <= i4_height; i4_i++)
  ------------------
  |  Branch (879:23): [True: 35.5k, False: 5.95k]
  ------------------
  880|  35.5k|        {
  881|  35.5k|            u1_filter_delay_buf[i4_width + i4_i] = *pu1_src_1;
  882|  35.5k|            pu1_src_1 += i4_refarray_stride;
  883|  35.5k|        }
  884|       |
  885|  5.95k|        if(0 == i4_corner_pixel_available)
  ------------------
  |  Branch (885:12): [True: 5.40k, False: 549]
  ------------------
  886|  5.40k|        {
  887|       |            /* interpolate the unavailable corner pixel */
  888|  5.40k|            u1_filter_delay_buf[i4_width] =
  889|  5.40k|                (u1_filter_delay_buf[i4_width - 1] + u1_filter_delay_buf[i4_width + 1] + 1) >> 1;
  890|  5.40k|        }
  891|       |
  892|  83.1k|        for(i4_i = 0; i4_i < (i4_width + i4_height - 1); i4_i++)
  ------------------
  |  Branch (892:23): [True: 77.2k, False: 5.95k]
  ------------------
  893|  77.2k|        {
  894|       |            /* get the filtered output */
  895|  77.2k|            u1_out_buf[i4_i] = ((u1_filter_delay_buf[i4_i]) + (u1_filter_delay_buf[i4_i + 1] * 2) +
  896|  77.2k|                                (u1_filter_delay_buf[i4_i + 2]) + 2) >>
  897|  77.2k|                               2;
  898|  77.2k|        }
  899|       |
  900|       |        /* fill the segment with diagonal reconstructed output */
  901|  37.9k|        for(i4_i = 0; i4_i < u1_seg_ht; i4_i++)
  ------------------
  |  Branch (901:23): [True: 32.0k, False: 5.95k]
  ------------------
  902|  32.0k|        {
  903|  32.0k|            memcpy(pu1_dst, &u1_out_buf[i4_y_off + i4_i], u1_seg_wd);
  904|  32.0k|            pu1_dst += i4_refarray_stride;
  905|  32.0k|        }
  906|  5.95k|    }
  907|  5.48k|    else
  908|  5.48k|    {
  909|       |        /* Quadrant 4 Processing */
  910|       |        /* load the pixel in the filter delay buffer  */
  911|  5.48k|        pu1_src_1 += ((u1_seg_ht - 1) * i4_refarray_stride);
  912|  44.3k|        for(i4_i = 0; i4_i <= i4_height; i4_i++)
  ------------------
  |  Branch (912:23): [True: 38.8k, False: 5.48k]
  ------------------
  913|  38.8k|        {
  914|  38.8k|            u1_filter_delay_buf[i4_i] = *pu1_src_1;
  915|  38.8k|            pu1_src_1 -= i4_refarray_stride;
  916|  38.8k|        }
  917|       |
  918|  5.48k|        pu1_src_2 -= i4_x_off;
  919|  5.48k|        memcpy(&u1_filter_delay_buf[i4_i], pu1_src_2, i4_width);
  920|       |
  921|  5.48k|        if(0 == i4_corner_pixel_available)
  ------------------
  |  Branch (921:12): [True: 5.09k, False: 389]
  ------------------
  922|  5.09k|        {
  923|       |            /* interpolate the unavailable corner pixel */
  924|  5.09k|            u1_filter_delay_buf[i4_i - 1] =
  925|  5.09k|                (u1_filter_delay_buf[i4_i] + u1_filter_delay_buf[i4_i - 2] + 1) >> 1;
  926|  5.09k|        }
  927|       |
  928|  73.6k|        for(i4_i = 0; i4_i < (i4_width + i4_height - 1); i4_i++)
  ------------------
  |  Branch (928:23): [True: 68.1k, False: 5.48k]
  ------------------
  929|  68.1k|        {
  930|       |            /* get the filtered output */
  931|  68.1k|            u1_out_buf[i4_i] = ((u1_filter_delay_buf[i4_i]) + (u1_filter_delay_buf[i4_i + 1] * 2) +
  932|  68.1k|                                (u1_filter_delay_buf[i4_i + 2]) + 2) >>
  933|  68.1k|                               2;
  934|  68.1k|        }
  935|       |
  936|       |        /* fill the segment with diagonal reconstructed output */
  937|  36.0k|        for(i4_i = 1; i4_i <= u1_seg_ht; i4_i++)
  ------------------
  |  Branch (937:23): [True: 30.5k, False: 5.48k]
  ------------------
  938|  30.5k|        {
  939|  30.5k|            memcpy(pu1_dst, &u1_out_buf[(u1_seg_ht + i4_x_off) - i4_i], u1_seg_wd);
  940|  30.5k|            pu1_dst += i4_refarray_stride;
  941|  30.5k|        }
  942|  5.48k|    }
  943|  23.2k|}
isvcd_diag_reconstruction_chroma:
  966|  23.2k|{
  967|  23.2k|    WORD32 i4_i;
  968|  23.2k|    UWORD8 u1_filter_delay_buf_cb[18] = {0};
  969|  23.2k|    UWORD8 u1_filter_delay_buf_cr[18] = {0};
  970|  23.2k|    UWORD8 u1_out_buf_cb[16] = {0};
  971|  23.2k|    UWORD8 u1_out_buf_cr[16] = {0};
  972|  23.2k|    WORD32 i4_width, i4_height;
  973|  23.2k|    WORD32 i4_x_off, i4_y_off;
  974|  23.2k|    WORD32 i4_block_size = BLOCK_WIDTH >> 1;
  ------------------
  |  |   64|  23.2k|#define BLOCK_WIDTH 8
  ------------------
  975|  23.2k|    UWORD8 *pu1_src_1_cb, *pu1_src_2_cb, *pu1_dst_cb;
  976|  23.2k|    UWORD8 *pu1_src_1_cr, *pu1_src_2_cr, *pu1_dst_cr;
  977|  23.2k|    WORD32 i4_tmp;
  978|       |
  979|  23.2k|    i4_tmp = i4_x + (i4_y * i4_refarray_stride);
  980|  23.2k|    pu1_dst_cb = pu1_refarray_1 + i4_tmp;
  981|  23.2k|    pu1_dst_cr = pu1_refarray_2 + i4_tmp;
  982|       |
  983|  23.2k|    pu1_src_1_cb = pu1_dst_cb + i1_xd_index;
  984|  23.2k|    pu1_src_1_cr = pu1_dst_cr + i1_xd_index;
  985|  23.2k|    i4_tmp = (i1_yd_index * i4_refarray_stride);
  986|  23.2k|    pu1_src_2_cb = pu1_dst_cb + i4_tmp;
  987|  23.2k|    pu1_src_2_cr = pu1_dst_cr + i4_tmp;
  988|       |
  989|  23.2k|    i4_width = MAX(u1_seg_wd, (((i4_mb_adjoin_x >> 3) ^ 1) * i4_block_size));
  ------------------
  |  |   60|  23.2k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 15.7k, False: 7.52k]
  |  |  ------------------
  ------------------
  990|  23.2k|    i4_height = MAX(u1_seg_ht, (((i4_mb_adjoin_y >> 4) ^ 1) * i4_block_size));
  ------------------
  |  |   60|  23.2k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 18.6k, False: 4.63k]
  |  |  ------------------
  ------------------
  991|  23.2k|    i4_x_off = (i4_width - u1_seg_wd);
  992|  23.2k|    i4_y_off = (i4_height - u1_seg_ht);
  993|       |
  994|  23.2k|    if(i1_xd_index < 0 && i1_yd_index > 0)
  ------------------
  |  Branch (994:8): [True: 11.0k, False: 12.2k]
  |  Branch (994:27): [True: 5.56k, False: 5.49k]
  ------------------
  995|  5.56k|    {
  996|       |        /* Quadrant 1 Processing load the pixel in the filter delay buffer */
  997|  30.4k|        for(i4_i = 0; i4_i < (i4_height + 1); i4_i++)
  ------------------
  |  Branch (997:23): [True: 24.9k, False: 5.56k]
  ------------------
  998|  24.9k|        {
  999|  24.9k|            u1_filter_delay_buf_cb[i4_i] = *pu1_src_1_cb;
 1000|  24.9k|            pu1_src_1_cb += i4_refarray_stride;
 1001|  24.9k|            u1_filter_delay_buf_cr[i4_i] = *pu1_src_1_cr;
 1002|  24.9k|            pu1_src_1_cr += i4_refarray_stride;
 1003|  24.9k|        }
 1004|       |
 1005|  5.56k|        pu1_src_2_cb -= i4_x_off;
 1006|  5.56k|        pu1_src_2_cr -= i4_x_off;
 1007|  5.56k|        memcpy(&u1_filter_delay_buf_cb[i4_i], pu1_src_2_cb, i4_width);
 1008|  5.56k|        memcpy(&u1_filter_delay_buf_cr[i4_i], pu1_src_2_cr, i4_width);
 1009|       |
 1010|  5.56k|        if(0 == i4_corner_pixel_available)
  ------------------
  |  Branch (1010:12): [True: 5.16k, False: 395]
  ------------------
 1011|  5.16k|        {
 1012|       |            /* interpolate the unavailable corner pixel */
 1013|  5.16k|            u1_filter_delay_buf_cb[i4_i - 1] =
 1014|  5.16k|                (u1_filter_delay_buf_cb[i4_i] + u1_filter_delay_buf_cb[i4_i - 2] + 1) >> 1;
 1015|       |
 1016|  5.16k|            u1_filter_delay_buf_cr[i4_i - 1] =
 1017|  5.16k|                (u1_filter_delay_buf_cr[i4_i] + u1_filter_delay_buf_cr[i4_i - 2] + 1) >> 1;
 1018|  5.16k|        }
 1019|       |
 1020|  41.6k|        for(i4_i = 0; i4_i < (i4_width + i4_height - 1); i4_i++)
  ------------------
  |  Branch (1020:23): [True: 36.0k, False: 5.56k]
  ------------------
 1021|  36.0k|        {
 1022|       |            /* get the filtered output */
 1023|  36.0k|            u1_out_buf_cb[i4_i] =
 1024|  36.0k|                ((u1_filter_delay_buf_cb[i4_i]) + (u1_filter_delay_buf_cb[i4_i + 1] * 2) +
 1025|  36.0k|                 (u1_filter_delay_buf_cb[i4_i + 2]) + 2) >>
 1026|  36.0k|                2;
 1027|       |
 1028|  36.0k|            u1_out_buf_cr[i4_i] =
 1029|  36.0k|                ((u1_filter_delay_buf_cr[i4_i]) + (u1_filter_delay_buf_cr[i4_i + 1] * 2) +
 1030|  36.0k|                 (u1_filter_delay_buf_cr[i4_i + 2]) + 2) >>
 1031|  36.0k|                2;
 1032|  36.0k|        }
 1033|       |
 1034|       |        /* fill the segment with diagonal reconstructed output */
 1035|  22.5k|        for(i4_i = 0; i4_i < u1_seg_ht; i4_i++)
  ------------------
  |  Branch (1035:23): [True: 17.0k, False: 5.56k]
  ------------------
 1036|  17.0k|        {
 1037|  17.0k|            memcpy(pu1_dst_cb, &u1_out_buf_cb[i4_x_off + i4_i], u1_seg_wd);
 1038|  17.0k|            pu1_dst_cb += i4_refarray_stride;
 1039|  17.0k|            memcpy(pu1_dst_cr, &u1_out_buf_cr[i4_x_off + i4_i], u1_seg_wd);
 1040|  17.0k|            pu1_dst_cr += i4_refarray_stride;
 1041|  17.0k|        }
 1042|  5.56k|    }
 1043|  17.7k|    else if(i1_xd_index > 0 && i1_yd_index > 0)
  ------------------
  |  Branch (1043:13): [True: 12.2k, False: 5.49k]
  |  Branch (1043:32): [True: 6.25k, False: 5.95k]
  ------------------
 1044|  6.25k|    {
 1045|       |        /* Quadrant 2 Processing load the pixel in the filter delay buffer  */
 1046|  6.25k|        memcpy(&u1_filter_delay_buf_cb[0], pu1_src_2_cb, (i4_width + 1));
 1047|  6.25k|        memcpy(&u1_filter_delay_buf_cr[0], pu1_src_2_cr, (i4_width + 1));
 1048|       |
 1049|  27.9k|        for(i4_i = i4_height; i4_i > 0; i4_i--)
  ------------------
  |  Branch (1049:31): [True: 21.6k, False: 6.25k]
  ------------------
 1050|  21.6k|        {
 1051|  21.6k|            u1_filter_delay_buf_cb[i4_width + i4_i] = *pu1_src_1_cb;
 1052|  21.6k|            pu1_src_1_cb += i4_refarray_stride;
 1053|       |
 1054|  21.6k|            u1_filter_delay_buf_cr[i4_width + i4_i] = *pu1_src_1_cr;
 1055|  21.6k|            pu1_src_1_cr += i4_refarray_stride;
 1056|  21.6k|        }
 1057|       |
 1058|  6.25k|        if(0 == i4_corner_pixel_available)
  ------------------
  |  Branch (1058:12): [True: 5.50k, False: 757]
  ------------------
 1059|  5.50k|        {
 1060|       |            /* interpolate the unavailable corner pixel */
 1061|  5.50k|            u1_filter_delay_buf_cb[i4_width] =
 1062|  5.50k|                (u1_filter_delay_buf_cb[i4_width - 1] + u1_filter_delay_buf_cb[i4_width + 1] + 1) >>
 1063|  5.50k|                1;
 1064|       |
 1065|  5.50k|            u1_filter_delay_buf_cr[i4_width] =
 1066|  5.50k|                (u1_filter_delay_buf_cr[i4_width - 1] + u1_filter_delay_buf_cr[i4_width + 1] + 1) >>
 1067|  5.50k|                1;
 1068|  5.50k|        }
 1069|       |
 1070|  46.7k|        for(i4_i = 0; i4_i < (i4_width + i4_height - 1); i4_i++)
  ------------------
  |  Branch (1070:23): [True: 40.4k, False: 6.25k]
  ------------------
 1071|  40.4k|        {
 1072|       |            /* get the filtered output */
 1073|  40.4k|            u1_out_buf_cb[i4_i] =
 1074|  40.4k|                ((u1_filter_delay_buf_cb[i4_i]) + (u1_filter_delay_buf_cb[i4_i + 1] * 2) +
 1075|  40.4k|                 (u1_filter_delay_buf_cb[i4_i + 2]) + 2) >>
 1076|  40.4k|                2;
 1077|       |
 1078|  40.4k|            u1_out_buf_cr[i4_i] =
 1079|  40.4k|                ((u1_filter_delay_buf_cr[i4_i]) + (u1_filter_delay_buf_cr[i4_i + 1] * 2) +
 1080|  40.4k|                 (u1_filter_delay_buf_cr[i4_i + 2]) + 2) >>
 1081|  40.4k|                2;
 1082|  40.4k|        }
 1083|       |
 1084|       |        /* fill the segment with diagonal reconstructed output */
 1085|  25.4k|        for(i4_i = 1; i4_i <= u1_seg_ht; i4_i++)
  ------------------
  |  Branch (1085:23): [True: 19.1k, False: 6.25k]
  ------------------
 1086|  19.1k|        {
 1087|  19.1k|            memcpy(pu1_dst_cb, &u1_out_buf_cb[i4_height - i4_i], u1_seg_wd);
 1088|  19.1k|            pu1_dst_cb += i4_refarray_stride;
 1089|       |
 1090|  19.1k|            memcpy(pu1_dst_cr, &u1_out_buf_cr[i4_height - i4_i], u1_seg_wd);
 1091|  19.1k|            pu1_dst_cr += i4_refarray_stride;
 1092|  19.1k|        }
 1093|  6.25k|    }
 1094|  11.4k|    else if(i1_xd_index > 0 && i1_yd_index < 0)
  ------------------
  |  Branch (1094:13): [True: 5.95k, False: 5.49k]
  |  Branch (1094:32): [True: 5.95k, False: 0]
  ------------------
 1095|  5.95k|    {
 1096|       |        /* Quadrant 3 Processing load the pixel in the filter delay buffer  */
 1097|  5.95k|        memcpy(&u1_filter_delay_buf_cb[0], pu1_src_2_cb, (i4_width + 1));
 1098|  5.95k|        memcpy(&u1_filter_delay_buf_cr[0], pu1_src_2_cr, (i4_width + 1));
 1099|       |
 1100|  5.95k|        i4_tmp = (i4_y_off * i4_refarray_stride);
 1101|  5.95k|        pu1_src_1_cb -= i4_tmp;
 1102|  5.95k|        pu1_src_1_cr -= i4_tmp;
 1103|  24.8k|        for(i4_i = 1; i4_i <= i4_height; i4_i++)
  ------------------
  |  Branch (1103:23): [True: 18.8k, False: 5.95k]
  ------------------
 1104|  18.8k|        {
 1105|  18.8k|            u1_filter_delay_buf_cb[i4_width + i4_i] = *pu1_src_1_cb;
 1106|  18.8k|            pu1_src_1_cb += i4_refarray_stride;
 1107|       |
 1108|  18.8k|            u1_filter_delay_buf_cr[i4_width + i4_i] = *pu1_src_1_cr;
 1109|  18.8k|            pu1_src_1_cr += i4_refarray_stride;
 1110|  18.8k|        }
 1111|       |
 1112|  5.95k|        if(0 == i4_corner_pixel_available)
  ------------------
  |  Branch (1112:12): [True: 5.40k, False: 549]
  ------------------
 1113|  5.40k|        {
 1114|       |            /* interpolate the unavailable corner pixel */
 1115|  5.40k|            u1_filter_delay_buf_cb[i4_width] =
 1116|  5.40k|                (u1_filter_delay_buf_cb[i4_width - 1] + u1_filter_delay_buf_cb[i4_width + 1] + 1) >>
 1117|  5.40k|                1;
 1118|       |
 1119|  5.40k|            u1_filter_delay_buf_cr[i4_width] =
 1120|  5.40k|                (u1_filter_delay_buf_cr[i4_width - 1] + u1_filter_delay_buf_cr[i4_width + 1] + 1) >>
 1121|  5.40k|                1;
 1122|  5.40k|        }
 1123|       |
 1124|  42.7k|        for(i4_i = 0; i4_i < (i4_width + i4_height - 1); i4_i++)
  ------------------
  |  Branch (1124:23): [True: 36.7k, False: 5.95k]
  ------------------
 1125|  36.7k|        {
 1126|       |            /* get the filtered output */
 1127|  36.7k|            u1_out_buf_cb[i4_i] =
 1128|  36.7k|                ((u1_filter_delay_buf_cb[i4_i]) + (u1_filter_delay_buf_cb[i4_i + 1] * 2) +
 1129|  36.7k|                 (u1_filter_delay_buf_cb[i4_i + 2]) + 2) >>
 1130|  36.7k|                2;
 1131|       |
 1132|  36.7k|            u1_out_buf_cr[i4_i] =
 1133|  36.7k|                ((u1_filter_delay_buf_cr[i4_i]) + (u1_filter_delay_buf_cr[i4_i + 1] * 2) +
 1134|  36.7k|                 (u1_filter_delay_buf_cr[i4_i + 2]) + 2) >>
 1135|  36.7k|                2;
 1136|  36.7k|        }
 1137|       |
 1138|       |        /* fill the segment with diagonal reconstructed output */
 1139|  23.6k|        for(i4_i = 0; i4_i < u1_seg_ht; i4_i++)
  ------------------
  |  Branch (1139:23): [True: 17.7k, False: 5.95k]
  ------------------
 1140|  17.7k|        {
 1141|  17.7k|            memcpy(pu1_dst_cb, &u1_out_buf_cb[i4_y_off + i4_i], u1_seg_wd);
 1142|  17.7k|            pu1_dst_cb += i4_refarray_stride;
 1143|  17.7k|            memcpy(pu1_dst_cr, &u1_out_buf_cr[i4_y_off + i4_i], u1_seg_wd);
 1144|  17.7k|            pu1_dst_cr += i4_refarray_stride;
 1145|  17.7k|        }
 1146|  5.95k|    }
 1147|  5.49k|    else
 1148|  5.49k|    {
 1149|       |        /* Quadrant 4 Processing load the pixel in the filter delay buffer  */
 1150|  5.49k|        i4_tmp = ((u1_seg_ht - 1) * i4_refarray_stride);
 1151|  5.49k|        pu1_src_1_cb += i4_tmp;
 1152|  5.49k|        pu1_src_1_cr += i4_tmp;
 1153|       |
 1154|  28.8k|        for(i4_i = 0; i4_i <= i4_height; i4_i++)
  ------------------
  |  Branch (1154:23): [True: 23.3k, False: 5.49k]
  ------------------
 1155|  23.3k|        {
 1156|  23.3k|            u1_filter_delay_buf_cb[i4_i] = *pu1_src_1_cb;
 1157|  23.3k|            pu1_src_1_cb -= i4_refarray_stride;
 1158|       |
 1159|  23.3k|            u1_filter_delay_buf_cr[i4_i] = *pu1_src_1_cr;
 1160|  23.3k|            pu1_src_1_cr -= i4_refarray_stride;
 1161|  23.3k|        }
 1162|       |
 1163|  5.49k|        pu1_src_2_cb -= i4_x_off;
 1164|  5.49k|        pu1_src_2_cr -= i4_x_off;
 1165|  5.49k|        memcpy(&u1_filter_delay_buf_cb[i4_i], pu1_src_2_cb, i4_width);
 1166|  5.49k|        memcpy(&u1_filter_delay_buf_cr[i4_i], pu1_src_2_cr, i4_width);
 1167|       |
 1168|  5.49k|        if(0 == i4_corner_pixel_available)
  ------------------
  |  Branch (1168:12): [True: 5.10k, False: 389]
  ------------------
 1169|  5.10k|        {
 1170|       |            /* interpolate the unavailable corner pixel */
 1171|  5.10k|            u1_filter_delay_buf_cb[i4_i - 1] =
 1172|  5.10k|                (u1_filter_delay_buf_cb[i4_i] + u1_filter_delay_buf_cb[i4_i - 2] + 1) >> 1;
 1173|       |
 1174|  5.10k|            u1_filter_delay_buf_cr[i4_i - 1] =
 1175|  5.10k|                (u1_filter_delay_buf_cr[i4_i] + u1_filter_delay_buf_cr[i4_i - 2] + 1) >> 1;
 1176|  5.10k|        }
 1177|       |
 1178|  39.8k|        for(i4_i = 0; i4_i < (i4_width + i4_height - 1); i4_i++)
  ------------------
  |  Branch (1178:23): [True: 34.3k, False: 5.49k]
  ------------------
 1179|  34.3k|        {
 1180|       |            /* get the filtered output */
 1181|  34.3k|            u1_out_buf_cb[i4_i] =
 1182|  34.3k|                ((u1_filter_delay_buf_cb[i4_i]) + (u1_filter_delay_buf_cb[i4_i + 1] * 2) +
 1183|  34.3k|                 (u1_filter_delay_buf_cb[i4_i + 2]) + 2) >>
 1184|  34.3k|                2;
 1185|       |
 1186|  34.3k|            u1_out_buf_cr[i4_i] =
 1187|  34.3k|                ((u1_filter_delay_buf_cr[i4_i]) + (u1_filter_delay_buf_cr[i4_i + 1] * 2) +
 1188|  34.3k|                 (u1_filter_delay_buf_cr[i4_i + 2]) + 2) >>
 1189|  34.3k|                2;
 1190|  34.3k|        }
 1191|       |
 1192|       |        /* fill the segment with diagonal reconstructed output */
 1193|  22.3k|        for(i4_i = 1; i4_i <= u1_seg_ht; i4_i++)
  ------------------
  |  Branch (1193:23): [True: 16.9k, False: 5.49k]
  ------------------
 1194|  16.9k|        {
 1195|  16.9k|            memcpy(pu1_dst_cb, &u1_out_buf_cb[(u1_seg_ht + i4_x_off) - i4_i], u1_seg_wd);
 1196|  16.9k|            pu1_dst_cb += i4_refarray_stride;
 1197|  16.9k|            memcpy(pu1_dst_cr, &u1_out_buf_cr[(u1_seg_ht + i4_x_off) - i4_i], u1_seg_wd);
 1198|  16.9k|            pu1_dst_cr += i4_refarray_stride;
 1199|  16.9k|        }
 1200|  5.49k|    }
 1201|  23.2k|}
isvcd_diag_padding:
 1224|  17.9k|{
 1225|  17.9k|    WORD32 i4_idx_i;
 1226|  17.9k|    UWORD8 *pu1_src, *pu1_dst;
 1227|       |
 1228|  17.9k|    UNUSED(pu1_refarray_2);
  ------------------
  |  |   45|  17.9k|#define UNUSED(x) ((void)(x))
  ------------------
 1229|  17.9k|    UNUSED(i4_mb_adjoin_x);
  ------------------
  |  |   45|  17.9k|#define UNUSED(x) ((void)(x))
  ------------------
 1230|  17.9k|    UNUSED(i4_mb_adjoin_y);
  ------------------
  |  |   45|  17.9k|#define UNUSED(x) ((void)(x))
  ------------------
 1231|  17.9k|    UNUSED(i4_corner_pixel_available);
  ------------------
  |  |   45|  17.9k|#define UNUSED(x) ((void)(x))
  ------------------
 1232|       |
 1233|  17.9k|    pu1_dst = pu1_refarray_1 + i4_x + (i4_y * i4_refarray_stride);
 1234|  17.9k|    pu1_src = pu1_dst + i1_xd_index + (i1_yd_index * i4_refarray_stride);
 1235|  17.9k|    i1_xd_index = MIN(i1_xd_index, MAX_PIX_FILL_LUMA);
  ------------------
  |  |   61|  17.9k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 15.0k, False: 2.92k]
  |  |  ------------------
  ------------------
 1236|  17.9k|    u1_seg_wd = MIN(u1_seg_wd, MAX_PIX_FILL_LUMA);
  ------------------
  |  |   61|  17.9k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 13.6k, False: 4.24k]
  |  |  ------------------
  ------------------
 1237|  17.9k|    i1_yd_index = MIN(i1_yd_index, MAX_PIX_FILL_LUMA);
  ------------------
  |  |   61|  17.9k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 13.9k, False: 3.98k]
  |  |  ------------------
  ------------------
 1238|  17.9k|    u1_seg_ht = MIN(u1_seg_ht, MAX_PIX_FILL_LUMA);
  ------------------
  |  |   61|  17.9k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 9.89k, False: 8.04k]
  |  |  ------------------
  ------------------
 1239|  17.9k|    pu1_dst = pu1_src - i1_xd_index - (i1_yd_index * i4_refarray_stride);
 1240|       |
 1241|  75.3k|    for(i4_idx_i = 0; i4_idx_i < u1_seg_ht; i4_idx_i++)
  ------------------
  |  Branch (1241:23): [True: 57.3k, False: 17.9k]
  ------------------
 1242|  57.3k|    {
 1243|  57.3k|        memset(pu1_dst, *pu1_src, u1_seg_wd);
 1244|  57.3k|        pu1_dst += i4_refarray_stride;
 1245|  57.3k|    }
 1246|  17.9k|}
isvcd_diag_padding_chroma:
 1271|  17.9k|{
 1272|  17.9k|    WORD32 i4_idx_i;
 1273|  17.9k|    UWORD8 *pu1_src_cb, *pu1_dst_cb;
 1274|  17.9k|    UWORD8 *pu1_src_cr, *pu1_dst_cr;
 1275|  17.9k|    WORD32 i4_tmp;
 1276|       |
 1277|  17.9k|    UNUSED(i4_mb_adjoin_x);
  ------------------
  |  |   45|  17.9k|#define UNUSED(x) ((void)(x))
  ------------------
 1278|  17.9k|    UNUSED(i4_mb_adjoin_y);
  ------------------
  |  |   45|  17.9k|#define UNUSED(x) ((void)(x))
  ------------------
 1279|  17.9k|    UNUSED(i4_corner_pixel_available);
  ------------------
  |  |   45|  17.9k|#define UNUSED(x) ((void)(x))
  ------------------
 1280|       |
 1281|  17.9k|    i4_tmp = i4_x + (i4_y * i4_refarray_stride);
 1282|  17.9k|    pu1_dst_cb = pu1_refarray_1 + i4_tmp;
 1283|  17.9k|    pu1_dst_cr = pu1_refarray_2 + i4_tmp;
 1284|  17.9k|    i4_tmp = i1_xd_index + (i1_yd_index * i4_refarray_stride);
 1285|  17.9k|    pu1_src_cb = pu1_dst_cb + i4_tmp;
 1286|  17.9k|    pu1_src_cr = pu1_dst_cr + i4_tmp;
 1287|       |
 1288|  17.9k|    i1_xd_index = MIN(i1_xd_index, MAX_PIX_FILL_LUMA);
  ------------------
  |  |   61|  17.9k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 15.0k, False: 2.92k]
  |  |  ------------------
  ------------------
 1289|  17.9k|    u1_seg_wd = MIN(u1_seg_wd, MAX_PIX_FILL_LUMA);
  ------------------
  |  |   61|  17.9k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 13.6k, False: 4.24k]
  |  |  ------------------
  ------------------
 1290|  17.9k|    i1_yd_index = MIN(i1_yd_index, MAX_PIX_FILL_LUMA);
  ------------------
  |  |   61|  17.9k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 13.9k, False: 3.98k]
  |  |  ------------------
  ------------------
 1291|  17.9k|    u1_seg_ht = MIN(u1_seg_ht, MAX_PIX_FILL_LUMA);
  ------------------
  |  |   61|  17.9k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 9.96k, False: 7.96k]
  |  |  ------------------
  ------------------
 1292|       |
 1293|  17.9k|    i4_tmp = (i1_xd_index + (i1_yd_index * i4_refarray_stride));
 1294|  17.9k|    pu1_dst_cb = pu1_src_cb - i4_tmp;
 1295|  17.9k|    pu1_dst_cr = pu1_src_cr - i4_tmp;
 1296|       |
 1297|  65.0k|    for(i4_idx_i = 0; i4_idx_i < u1_seg_ht; i4_idx_i++)
  ------------------
  |  Branch (1297:23): [True: 47.1k, False: 17.9k]
  ------------------
 1298|  47.1k|    {
 1299|  47.1k|        memset(pu1_dst_cb, *pu1_src_cb, u1_seg_wd);
 1300|  47.1k|        pu1_dst_cb += i4_refarray_stride;
 1301|  47.1k|        memset(pu1_dst_cr, *pu1_src_cr, u1_seg_wd);
 1302|  47.1k|        pu1_dst_cr += i4_refarray_stride;
 1303|  47.1k|    }
 1304|  17.9k|}
isvcd_corner_samp_dyadic:
 1335|  23.8k|{
 1336|  23.8k|    WORD32 i4_ref_xD, i4_ref_yD;
 1337|  23.8k|    WORD32 i4_c_ref_xD, i4_c_ref_yD;
 1338|  23.8k|    WORD32 i4_xc, i4_yc;
 1339|  23.8k|    WORD32 i4_c_xc, i4_c_yc;
 1340|  23.8k|    WORD32 i4_samp1, i4_samp2;
 1341|  23.8k|    UWORD8 *pu1_tmp_src, *pu1_tmp_dst;
 1342|       |
 1343|  23.8k|    i4_ref_xD = i4_x - i4_xD;
 1344|  23.8k|    i4_ref_yD = i4_y - i4_yD;
 1345|  23.8k|    i4_xc = i4_ref_xD + SIGN(i4_xD);
  ------------------
  |  |  103|  23.8k|#define SIGN(x)     (((x) >= 0) ? (((x) > 0) ? 1 : 0) : -1)
  |  |  ------------------
  |  |  |  Branch (103:22): [True: 11.8k, False: 11.9k]
  |  |  |  Branch (103:36): [True: 11.8k, False: 0]
  |  |  ------------------
  ------------------
 1346|  23.8k|    i4_yc = i4_ref_yD + SIGN(i4_yD);
  ------------------
  |  |  103|  23.8k|#define SIGN(x)     (((x) >= 0) ? (((x) > 0) ? 1 : 0) : -1)
  |  |  ------------------
  |  |  |  Branch (103:22): [True: 11.7k, False: 12.0k]
  |  |  |  Branch (103:36): [True: 11.7k, False: 0]
  |  |  ------------------
  ------------------
 1347|       |
 1348|       |    /* Luma */
 1349|  23.8k|    pu1_tmp_src = pu1_refarray_y + (i4_yc * DYADIC_REF_W_Y);
  ------------------
  |  |   56|  23.8k|#define DYADIC_REF_W_Y 20
  ------------------
 1350|  23.8k|    i4_samp1 = pu1_tmp_src[i4_ref_xD];
 1351|  23.8k|    pu1_tmp_src = pu1_refarray_y + (i4_ref_yD * DYADIC_REF_W_Y);
  ------------------
  |  |   56|  23.8k|#define DYADIC_REF_W_Y 20
  ------------------
 1352|  23.8k|    i4_samp2 = pu1_tmp_src[i4_xc];
 1353|  23.8k|    pu1_tmp_dst = pu1_tmp_src;
 1354|  23.8k|    pu1_tmp_dst[i4_ref_xD] = (i4_samp1 + i4_samp2 + 1) >> 1;
 1355|       |
 1356|       |    /* Chroma */
 1357|  23.8k|    i4_c_ref_xD = i4_ref_xD >> 1;
 1358|  23.8k|    i4_c_ref_yD = i4_ref_yD >> 1;
 1359|  23.8k|    i4_c_xc = i4_c_ref_xD + SIGN(i4_xD);
  ------------------
  |  |  103|  23.8k|#define SIGN(x)     (((x) >= 0) ? (((x) > 0) ? 1 : 0) : -1)
  |  |  ------------------
  |  |  |  Branch (103:22): [True: 11.8k, False: 11.9k]
  |  |  |  Branch (103:36): [True: 11.8k, False: 0]
  |  |  ------------------
  ------------------
 1360|  23.8k|    i4_c_yc = i4_c_ref_yD + SIGN(i4_yD);
  ------------------
  |  |  103|  23.8k|#define SIGN(x)     (((x) >= 0) ? (((x) > 0) ? 1 : 0) : -1)
  |  |  ------------------
  |  |  |  Branch (103:22): [True: 11.7k, False: 12.0k]
  |  |  |  Branch (103:36): [True: 11.7k, False: 0]
  |  |  ------------------
  ------------------
 1361|       |
 1362|       |    /* Cb */
 1363|  23.8k|    pu1_tmp_src = pu1_refarray_cb + (i4_c_yc * DYADIC_REF_W_C);
  ------------------
  |  |   58|  23.8k|#define DYADIC_REF_W_C 10
  ------------------
 1364|  23.8k|    i4_samp1 = pu1_tmp_src[i4_c_ref_xD];
 1365|  23.8k|    pu1_tmp_src = pu1_refarray_cb + (i4_c_ref_yD * DYADIC_REF_W_C);
  ------------------
  |  |   58|  23.8k|#define DYADIC_REF_W_C 10
  ------------------
 1366|  23.8k|    i4_samp2 = pu1_tmp_src[i4_c_xc];
 1367|  23.8k|    pu1_tmp_dst = pu1_tmp_src;
 1368|  23.8k|    pu1_tmp_dst[i4_c_ref_xD] = (i4_samp1 + i4_samp2 + 1) >> 1;
 1369|       |
 1370|       |    /* Cr */
 1371|  23.8k|    pu1_tmp_src = pu1_refarray_cr + (i4_c_yc * DYADIC_REF_W_C);
  ------------------
  |  |   58|  23.8k|#define DYADIC_REF_W_C 10
  ------------------
 1372|  23.8k|    i4_samp1 = pu1_tmp_src[i4_c_ref_xD];
 1373|  23.8k|    pu1_tmp_src = pu1_refarray_cr + (i4_c_ref_yD * DYADIC_REF_W_C);
  ------------------
  |  |   58|  23.8k|#define DYADIC_REF_W_C 10
  ------------------
 1374|  23.8k|    i4_samp2 = pu1_tmp_src[i4_c_xc];
 1375|  23.8k|    pu1_tmp_dst = pu1_tmp_src;
 1376|  23.8k|    pu1_tmp_dst[i4_c_ref_xD] = (i4_samp1 + i4_samp2 + 1) >> 1;
 1377|  23.8k|}
isvcd_fill_non_avail_pixel:
 1408|  75.3k|{
 1409|       |    /* --------------------------------------------------------------------- */
 1410|       |    /* Index Variables                                                         */
 1411|       |    /* --------------------------------------------------------------------- */
 1412|  75.3k|    intra_samp_map_ctxt_t *ps_map_ctxt;
 1413|  75.3k|    ref_mb_map_t *ps_x_off_len;
 1414|  75.3k|    ref_mb_map_t *ps_y_off_len;
 1415|  75.3k|    WORD32 i4_x, i4_y;
 1416|  75.3k|    WORD32 i4_corner_pixel_available;
 1417|       |
 1418|       |    /* --------------------------------------------------------------------- */
 1419|       |    /* Local Pointer Declaration for Segment lookup                             */
 1420|       |    /* --------------------------------------------------------------------- */
 1421|  75.3k|    seg_lookup_desc_t *ps_segments_x;
 1422|  75.3k|    seg_lookup_desc_t *ps_segments_y;
 1423|  75.3k|    seg_description_t *ps_seg_desc_x, *ps_seg_desc_y;
 1424|  75.3k|    seg_description_t *ps_seg_x_tmp, *ps_seg_y_tmp;
 1425|  75.3k|    UWORD8 u1_num_sgmts_x, u1_num_sgmts_y;
 1426|       |
 1427|       |    /* --------------------------------------------------------------------- */
 1428|       |    /* Temp Variables for Mapping context                                     */
 1429|       |    /* --------------------------------------------------------------------- */
 1430|  75.3k|    WORD32 i4_x_offset;
 1431|  75.3k|    WORD32 i4_y_offset;
 1432|  75.3k|    WORD32 i4_refmb_wd;
 1433|  75.3k|    WORD32 i4_refmb_ht;
 1434|  75.3k|    WORD32 i4_mbaddr_x;
 1435|  75.3k|    WORD32 i4_mbaddr_y;
 1436|  75.3k|    WORD32 i4_xr_index, i4_yr_index;
 1437|  75.3k|    WORD32 i4_j, i4_i;
 1438|  75.3k|    WORD32 i4_cur_x;
 1439|  75.3k|    UWORD32 u4_lookup_4bit, u4_lookup_5bit, u4_4thbit;
 1440|  75.3k|    WORD32 i4_pad_size;
 1441|  75.3k|    WORD32 i4_x_min;
 1442|  75.3k|    WORD32 i4_y_min;
 1443|  75.3k|    WORD32 i4_x_start_pos, i4_y_start_pos;
 1444|  75.3k|    ref_min_max_map_t *ps_x_min_max;
 1445|  75.3k|    ref_min_max_map_t *ps_y_min_max;
 1446|  75.3k|    UWORD8 *pu1_ref_idx_x, *pu1_ref_idx_y;
 1447|  75.3k|    ftype_intra_samp_padding *pf_intra_samp_padding;
 1448|  75.3k|    ftype_intra_samp_padding **pf_intra_samp_lookup;
 1449|       |
 1450|  75.3k|    ps_map_ctxt = (intra_samp_map_ctxt_t *) pv_map_ctxt;
 1451|  75.3k|    ps_x_min_max = ps_map_ctxt->ps_x_min_max;
 1452|  75.3k|    ps_y_min_max = ps_map_ctxt->ps_y_min_max;
 1453|  75.3k|    ps_x_off_len = ps_map_ctxt->ps_x_offset_length;
 1454|  75.3k|    ps_y_off_len = ps_map_ctxt->ps_y_offset_length;
 1455|  75.3k|    i4_mbaddr_y = ps_mb_coord->u2_mb_y;
 1456|  75.3k|    i4_mbaddr_x = ps_mb_coord->u2_mb_x;
 1457|  75.3k|    i4_x_offset = ps_x_off_len[i4_mbaddr_x].i2_offset;
 1458|  75.3k|    i4_y_offset = ps_y_off_len[i4_mbaddr_y].i2_offset;
 1459|  75.3k|    i4_refmb_wd = (MB_WIDTH >> i4_chroma_flag) - 1;
  ------------------
  |  |   67|  75.3k|#define MB_WIDTH 16
  ------------------
 1460|  75.3k|    i4_refmb_ht = (MB_HEIGHT >> i4_chroma_flag) - 1;
  ------------------
  |  |   68|  75.3k|#define MB_HEIGHT 16
  ------------------
 1461|       |
 1462|  75.3k|    if(0 == i4_chroma_flag)
  ------------------
  |  Branch (1462:8): [True: 37.5k, False: 37.8k]
  ------------------
 1463|  37.5k|    {
 1464|  37.5k|        pf_intra_samp_lookup = gpf_lookup_fxns_luma;
 1465|  37.5k|    }
 1466|  37.8k|    else
 1467|  37.8k|    {
 1468|  37.8k|        pf_intra_samp_lookup = gpf_lookup_fxns_chroma;
 1469|  37.8k|    }
 1470|       |
 1471|       |    /* get the min and max positions */
 1472|  75.3k|    i4_x_min = ps_x_min_max[i4_mbaddr_x].i2_min_pos;
 1473|  75.3k|    i4_y_min = ps_y_min_max[i4_mbaddr_y].i2_min_pos;
 1474|       |
 1475|       |    /* get the start position of the MB in reference layer */
 1476|  75.3k|    i4_pad_size = 2 >> i4_chroma_flag;
 1477|  75.3k|    i4_x_start_pos = (i4_x_min - i4_pad_size);
 1478|  75.3k|    i4_y_start_pos = (i4_y_min - i4_pad_size);
 1479|  75.3k|    i4_xr_index = (i4_x_start_pos + i4_x_offset) & i4_refmb_wd;
 1480|  75.3k|    i4_yr_index = (i4_y_start_pos + i4_y_offset) & i4_refmb_ht;
 1481|       |
 1482|       |    /* Find the number of segments in x and y direction */
 1483|  75.3k|    ps_segments_x = (ps_map_ctxt->ps_seg_lookup_horz + i4_xr_index);
 1484|  75.3k|    ps_segments_y = (ps_map_ctxt->ps_seg_lookup_vert + i4_yr_index);
 1485|  75.3k|    u1_num_sgmts_x = ps_segments_x->u1_num_segments;
 1486|  75.3k|    u1_num_sgmts_y = ps_segments_y->u1_num_segments;
 1487|  75.3k|    ps_seg_desc_x = ps_segments_x->s_segments;
 1488|  75.3k|    ps_seg_desc_y = ps_segments_y->s_segments;
 1489|  75.3k|    pu1_ref_idx_x = ps_map_ctxt->pu1_refarray_x_idx;
 1490|  75.3k|    pu1_ref_idx_y = ps_map_ctxt->pu1_refarray_y_idx;
 1491|  75.3k|    i4_cur_x = pu1_ref_idx_x[i4_x_start_pos];
 1492|  75.3k|    u4_4thbit = ps_segments_x->u4_start_pos;
 1493|       |
 1494|   270k|    for(i4_j = 0; i4_j < u1_num_sgmts_y; i4_j++)
  ------------------
  |  Branch (1494:19): [True: 195k, False: 75.3k]
  ------------------
 1495|   195k|    {
 1496|   195k|        UWORD8 i4_idx_a, i4_idx_b;
 1497|   195k|        UWORD8 u1_seg_ht, u1_seg_wd;
 1498|   195k|        UWORD8 u1_mb_adjoin_x, u1_mb_adjoin_y;
 1499|   195k|        WORD8 i1_nearst_mb_bdry_x, i1_nearst_mb_bdry_y;
 1500|   195k|        UWORD32 u4_num_valid_segs;
 1501|   195k|        WORD32 i4_idx_a_plus_ny, i4_idx_b_plus_nx, i4_index;
 1502|   195k|        WORD8 i1_yd_index, i1_xd_index;
 1503|       |
 1504|   195k|        ps_seg_y_tmp = &ps_seg_desc_y[i4_j];
 1505|   195k|        i4_y = i4_y_start_pos + ps_seg_y_tmp->u1_seg_off;
 1506|   195k|        u1_seg_ht = ps_seg_y_tmp->u1_seg_dim;
 1507|   195k|        i1_yd_index = ps_seg_y_tmp->i1_dist_idx;
 1508|   195k|        i1_nearst_mb_bdry_y = ps_seg_y_tmp->i1_nearst_mb_bdry;
 1509|   195k|        u1_mb_adjoin_y = ps_seg_y_tmp->u1_mb_adjoin;
 1510|   195k|        i4_idx_a = pu1_ref_idx_y[i4_y];
 1511|   195k|        i4_idx_a_plus_ny = (i4_idx_a + i1_nearst_mb_bdry_y);
 1512|       |
 1513|       |        /* Pack the availabilities of the next three horizontal MBs in 3bit
 1514|       |           format and 4th bit indicating if the start position is greater than the mb_width/2 */
 1515|   195k|        u4_lookup_4bit = u4_4thbit | u1_avail_map[i4_idx_a][i4_cur_x + 2] << 2 |
 1516|   195k|                         u1_avail_map[i4_idx_a][i4_cur_x + 1] << 1 |
 1517|   195k|                         u1_avail_map[i4_idx_a][i4_cur_x];
 1518|       |
 1519|   195k|        u4_num_valid_segs = gu4_valid_segs_lookup[u4_lookup_4bit];
 1520|   195k|        i4_i = isvcd_left_most_bit_detect(u4_num_valid_segs);
 1521|   195k|        u4_num_valid_segs <<= (i4_i + 1);
 1522|       |
 1523|   552k|        for(; i4_i < u1_num_sgmts_x; i4_i++)
  ------------------
  |  Branch (1523:15): [True: 356k, False: 195k]
  ------------------
 1524|   356k|        {
 1525|   356k|            ps_seg_x_tmp = &ps_seg_desc_x[i4_i];
 1526|   356k|            i4_x = i4_x_start_pos + ps_seg_x_tmp->u1_seg_off;
 1527|   356k|            i4_idx_b = pu1_ref_idx_x[i4_x];
 1528|   356k|            u1_seg_wd = ps_seg_x_tmp->u1_seg_dim;
 1529|   356k|            i1_xd_index = ps_seg_x_tmp->i1_dist_idx;
 1530|   356k|            i1_nearst_mb_bdry_x = ps_seg_x_tmp->i1_nearst_mb_bdry;
 1531|   356k|            u1_mb_adjoin_x = ps_seg_x_tmp->u1_mb_adjoin;
 1532|   356k|            i4_idx_b_plus_nx = (i4_idx_b + i1_nearst_mb_bdry_x);
 1533|       |
 1534|       |            /* Find the avalability of (x,y-Yd),(x-Xd,y),(x-Xd,y-Yd) and pack it to 3 bits */
 1535|   356k|            u4_lookup_5bit = u1_avail_map[i4_idx_a_plus_ny][i4_idx_b_plus_nx] << 2 |
 1536|   356k|                             u1_avail_map[i4_idx_a_plus_ny][i4_idx_b] << 1 |
 1537|   356k|                             u1_avail_map[i4_idx_a][i4_idx_b_plus_nx] | u1_mb_adjoin_x |
 1538|   356k|                             u1_mb_adjoin_y;
 1539|       |
 1540|   356k|            i4_corner_pixel_available = u1_avail_map[i4_idx_a_plus_ny][i4_idx_b_plus_nx];
 1541|       |
 1542|       |            /* Function pointer table from lookup to get Left,Top,Bottom,Right,Diagonal padding */
 1543|   356k|            if(u4_lookup_5bit > 31)
  ------------------
  |  Branch (1543:16): [True: 0, False: 356k]
  ------------------
 1544|      0|            {
 1545|      0|                u4_lookup_5bit = 0;
 1546|      0|            }
 1547|   356k|            pf_intra_samp_padding = pf_intra_samp_lookup[u4_lookup_5bit];
 1548|       |
 1549|   356k|            if(pf_intra_samp_padding != NULL)
  ------------------
  |  Branch (1549:16): [True: 194k, False: 161k]
  ------------------
 1550|   194k|            {
 1551|   194k|                pf_intra_samp_padding(i4_x, i4_y, i1_xd_index, i1_yd_index, u1_seg_wd, u1_seg_ht,
 1552|   194k|                                      pu1_refarray_1, pu1_refarray_2, i4_refarray_stride,
 1553|   194k|                                      u1_mb_adjoin_x, u1_mb_adjoin_y, i4_corner_pixel_available);
 1554|   194k|            }
 1555|       |
 1556|       |            /* increment to the next unavailable segment */
 1557|   356k|            i4_index = isvcd_left_most_bit_detect(u4_num_valid_segs);
 1558|   356k|            u4_num_valid_segs <<= (i4_index + 1);
 1559|   356k|            i4_i += i4_index;
 1560|       |
 1561|   356k|        } /* end of loop over ref array width */
 1562|       |
 1563|   195k|    }     /* end of loop over ref array height */
 1564|  75.3k|    return;
 1565|  75.3k|}
isvcd_get_ref_layer_mbtype:
 1597|   736k|{
 1598|   736k|    WORD8 i1_intra_slice_id;
 1599|   736k|    inter_lyr_mb_prms_t *ps_inter_lyr_mb_prms;
 1600|   736k|    WORD8 i1_mb_mode;
 1601|       |
 1602|   736k|    ps_inter_lyr_mb_prms = (inter_lyr_mb_prms_t *) pi1_ref_mb_modes;
 1603|   736k|    i1_mb_mode = ps_inter_lyr_mb_prms->i1_mb_mode;
 1604|       |
 1605|   736k|    if(i1_mb_mode <= SVC_INTER_MB)
  ------------------
  |  |  114|   736k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
  |  Branch (1605:8): [True: 587k, False: 148k]
  ------------------
 1606|   587k|    {
 1607|       |        /* INTER */
 1608|   587k|        *pi4_mb_type = SVC_INTER_MB;
  ------------------
  |  |  114|   587k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
 1609|   587k|        i1_intra_slice_id = -1;
 1610|   587k|    }
 1611|   148k|    else
 1612|   148k|    {
 1613|       |        /* INTRA */
 1614|   148k|        *pi4_mb_type = SVC_INTRA_MB;
  ------------------
  |  |  115|   148k|#define SVC_INTRA_MB (1 << 1)       /*!< P or B MBs decoded or inferred*/
  ------------------
 1615|   148k|        i1_intra_slice_id = ps_inter_lyr_mb_prms->i1_slice_id;
 1616|       |
 1617|   148k|        if(1 == i1_cons_intr_samp_flag)
  ------------------
  |  Branch (1617:12): [True: 9.50k, False: 139k]
  ------------------
 1618|  9.50k|        {
 1619|       |            /* check for different slice idc */
 1620|  9.50k|            if(ps_inter_lyr_mb_prms->i1_slice_id != i1_curr_slice_id)
  ------------------
  |  Branch (1620:16): [True: 2.07k, False: 7.43k]
  ------------------
 1621|  2.07k|            {
 1622|       |                /* store the mode as INTER (not available for upsampling) */
 1623|  2.07k|                *pi4_mb_type = SVC_INTER_MB;
  ------------------
  |  |  114|  2.07k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
 1624|  2.07k|            }
 1625|  9.50k|        }
 1626|   148k|    }
 1627|       |
 1628|       |    /* if contarained intra flag is 1 then check for same mb mode */
 1629|   736k|    return (i1_intra_slice_id);
 1630|   736k|}
isvcd_reflayer_construction:
 1666|  75.6k|{
 1667|  75.6k|    WORD32 i4_x, i4_y;
 1668|       |
 1669|       |    /* --------------------------------------------------------------------- */
 1670|       |    /* Context and reference layer realted varaibles                         */
 1671|       |    /* --------------------------------------------------------------------- */
 1672|  75.6k|    intra_sampling_ctxt_t *ps_ctxt;
 1673|  75.6k|    intra_samp_map_ctxt_t *ps_map_ctxt;
 1674|  75.6k|    intra_samp_lyr_ctxt *ps_lyr_ctxt;
 1675|  75.6k|    WORD8 *pi1_ref_mb_modes, *pi1_ref_mb_modes_bkp_1;
 1676|  75.6k|    WORD32 i4_ref_mode_stride;
 1677|  75.6k|    WORD32 i4_element_size;
 1678|  75.6k|    ref_mb_map_t *ps_x_off_len;
 1679|  75.6k|    ref_mb_map_t *ps_y_off_len;
 1680|  75.6k|    WORD32 i4_mbaddr_y;
 1681|  75.6k|    WORD32 i4_mbaddr_x;
 1682|  75.6k|    WORD32 i4_mb_ht, i4_mb_wd;
 1683|  75.6k|    UWORD8 u1_map_buf[4][4] = {0}; /*!< 4x4 mb grid buffer to store the mb availablity */
 1684|       |    /* --------------------------------------------------------------------- */
 1685|       |    /* Temp Variables for Mapping context                                     */
 1686|       |    /* --------------------------------------------------------------------- */
 1687|  75.6k|    WORD32 i4_ref_wd;
 1688|  75.6k|    WORD32 i4_ref_ht;
 1689|  75.6k|    WORD32 i4_x_offset;
 1690|  75.6k|    WORD32 i4_y_offset;
 1691|  75.6k|    WORD32 i4_refarray_wd;
 1692|  75.6k|    WORD32 i4_refarray_ht;
 1693|  75.6k|    WORD32 i4_mb_type;
 1694|  75.6k|    WORD8 i1_cons_intr_samp_flag;
 1695|  75.6k|    WORD8 i1_slice_id = 0;
 1696|  75.6k|    WORD32 i4_mb_wd_sft, i4_mb_ht_sft;
 1697|       |
 1698|       |    /* --------------------------------------------------------------------- */
 1699|       |    /* Local Pointer Declaration for arrays in Mapping context                 */
 1700|       |    /* --------------------------------------------------------------------- */
 1701|       |
 1702|  75.6k|    WORD32 i4_unfill_check;
 1703|  75.6k|    UWORD8 *pu1_refarray_1, *pu1_refarray_2;
 1704|       |
 1705|  75.6k|    UNUSED(pu1_inp_2);
  ------------------
  |  |   45|  75.6k|#define UNUSED(x) ((void)(x))
  ------------------
 1706|       |
 1707|  75.6k|    ps_ctxt = (intra_sampling_ctxt_t *) pv_intra_samp_ctxt;
 1708|  75.6k|    ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id];
 1709|  75.6k|    pi1_ref_mb_modes = (WORD8 *) ps_ref_mb_mode_map->pv_buffer;
 1710|  75.6k|    i4_ref_mode_stride = ps_ref_mb_mode_map->i4_num_element_stride;
 1711|  75.6k|    i4_element_size = ps_ref_mb_mode_map->i4_element_size;
 1712|       |
 1713|       |    /* get the condtrained intra sampling flag */
 1714|  75.6k|    i1_cons_intr_samp_flag = ps_lyr_ctxt->i1_constrained_intra_rsmpl_flag;
 1715|       |
 1716|  75.6k|    if(NULL == pi1_ref_mb_modes)
  ------------------
  |  Branch (1716:8): [True: 0, False: 75.6k]
  ------------------
 1717|      0|    {
 1718|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 1719|      0|    }
 1720|       |
 1721|       |    /* --------------------------------------------------------------------- */
 1722|       |    /* Based on Chroma and Luma, extracting the context information struct     */
 1723|       |    /* --------------------------------------------------------------------- */
 1724|  75.6k|    if(1 == i4_chroma_flag)
  ------------------
  |  Branch (1724:8): [True: 37.8k, False: 37.8k]
  ------------------
 1725|  37.8k|        ps_map_ctxt = &ps_lyr_ctxt->s_chroma_map_ctxt;
 1726|  37.8k|    else
 1727|  37.8k|        ps_map_ctxt = &ps_lyr_ctxt->s_luma_map_ctxt;
 1728|       |
 1729|  75.6k|    ps_x_off_len = ps_map_ctxt->ps_x_offset_length;
 1730|  75.6k|    ps_y_off_len = ps_map_ctxt->ps_y_offset_length;
 1731|       |
 1732|       |    /* --------------------------------------------------------------------- */
 1733|       |    /* Deriving the parameters required for further processing                 */
 1734|       |    /* --------------------------------------------------------------------- */
 1735|  75.6k|    {
 1736|  75.6k|        WORD32 i4_base_width = ps_lyr_ctxt->i4_ref_width;
 1737|  75.6k|        WORD32 i4_base_height = ps_lyr_ctxt->i4_ref_height;
 1738|       |
 1739|  75.6k|        i4_ref_wd = i4_base_width >> i4_chroma_flag;
 1740|  75.6k|        i4_ref_ht = i4_base_height >> i4_chroma_flag;
 1741|  75.6k|        i4_mb_wd_sft = (MB_WIDTH_SHIFT - i4_chroma_flag);
  ------------------
  |  |   70|  75.6k|#define MB_WIDTH_SHIFT 4
  ------------------
 1742|  75.6k|        i4_mb_ht_sft = (MB_HEIGHT_SHIFT - i4_chroma_flag);
  ------------------
  |  |   71|  75.6k|#define MB_HEIGHT_SHIFT 4
  ------------------
 1743|  75.6k|    }
 1744|       |
 1745|       |    /* --------------------------------------------------------------------- */
 1746|       |    /* Local variables based on the MB address                                 */
 1747|       |    /* --------------------------------------------------------------------- */
 1748|  75.6k|    i4_mbaddr_y = ps_coord->u2_mb_y;
 1749|  75.6k|    i4_mbaddr_x = ps_coord->u2_mb_x;
 1750|  75.6k|    i4_x_offset = ps_x_off_len[i4_mbaddr_x].i2_offset;
 1751|  75.6k|    i4_y_offset = ps_y_off_len[i4_mbaddr_y].i2_offset;
 1752|  75.6k|    i4_refarray_wd = ps_x_off_len[i4_mbaddr_x].i2_length;
 1753|  75.6k|    i4_refarray_ht = ps_y_off_len[i4_mbaddr_y].i2_length;
 1754|  75.6k|    i4_mb_wd = (MB_WIDTH >> i4_chroma_flag);
  ------------------
  |  |   67|  75.6k|#define MB_WIDTH 16
  ------------------
 1755|  75.6k|    i4_mb_ht = (MB_HEIGHT >> i4_chroma_flag);
  ------------------
  |  |   68|  75.6k|#define MB_HEIGHT 16
  ------------------
 1756|       |
 1757|       |    /* --------------------------------------------------------------------- */
 1758|       |    /* Derivation of ref slice MB idc                                         */
 1759|       |    /* --------------------------------------------------------------------- */
 1760|  75.6k|    if(1 == i1_cons_intr_samp_flag)
  ------------------
  |  Branch (1760:8): [True: 24.5k, False: 51.1k]
  ------------------
 1761|  24.5k|    {
 1762|  24.5k|        WORD32 i4_x_min, i4_x_max;
 1763|  24.5k|        WORD32 i4_y_min, i4_y_max;
 1764|  24.5k|        ref_min_max_map_t *ps_x_min_max;
 1765|  24.5k|        ref_min_max_map_t *ps_y_min_max;
 1766|       |
 1767|  24.5k|        ps_x_min_max = ps_map_ctxt->ps_x_min_max;
 1768|  24.5k|        ps_y_min_max = ps_map_ctxt->ps_y_min_max;
 1769|       |
 1770|       |        /* get the min and max positions */
 1771|  24.5k|        i4_x_min = ps_x_min_max[i4_mbaddr_x].i2_min_pos;
 1772|  24.5k|        i4_x_max = ps_x_min_max[i4_mbaddr_x].i2_max_pos;
 1773|  24.5k|        i4_y_min = ps_y_min_max[i4_mbaddr_y].i2_min_pos;
 1774|  24.5k|        i4_y_max = ps_y_min_max[i4_mbaddr_y].i2_max_pos;
 1775|       |
 1776|       |        /* default initialization */
 1777|  24.5k|        i4_mb_type = SVC_INTER_MB;
  ------------------
  |  |  114|  24.5k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
 1778|       |
 1779|  24.5k|        {
 1780|  24.5k|            WORD32 i4_x_ref;
 1781|  24.5k|            WORD32 i4_y_ref;
 1782|  24.5k|            WORD32 i4_mb_x, i4_mb_y;
 1783|       |
 1784|  24.5k|            i4_y_ref = (i4_y_min + 1) + i4_y_offset;
 1785|  24.5k|            i4_x_ref = (i4_x_min + 1) + i4_x_offset;
 1786|  24.5k|            i4_mb_x = (i4_x_ref >> i4_mb_wd_sft);
 1787|  24.5k|            i4_mb_y = (i4_y_ref >> i4_mb_ht_sft);
 1788|  24.5k|            pi1_ref_mb_modes = (WORD8 *) ps_ref_mb_mode_map->pv_buffer;
 1789|       |
 1790|       |            /* get the location of the byte which has the current mb mode */
 1791|  24.5k|            pi1_ref_mb_modes += (i4_mb_y * i4_ref_mode_stride * i4_element_size);
 1792|  24.5k|            pi1_ref_mb_modes += (i4_mb_x * i4_element_size);
 1793|  24.5k|        }
 1794|       |
 1795|  52.5k|        for(i4_y = (i4_y_min + 1); i4_y <= (i4_y_max - 1);)
  ------------------
  |  Branch (1795:36): [True: 32.8k, False: 19.6k]
  ------------------
 1796|  32.8k|        {
 1797|  32.8k|            WORD32 i4_x_ref;
 1798|  32.8k|            WORD32 i4_y_ref;
 1799|  32.8k|            WORD32 i4_distleftX, i4_rangeX;
 1800|  32.8k|            WORD32 i4_disttopY, i4_rangeY;
 1801|       |
 1802|  32.8k|            i4_y_ref = (i4_y + i4_y_offset);
 1803|  32.8k|            i4_disttopY = (i4_y_ref) & (i4_mb_ht - 1);
 1804|  32.8k|            i4_rangeY = (i4_mb_ht - i4_disttopY);
 1805|       |
 1806|  32.8k|            pi1_ref_mb_modes_bkp_1 = pi1_ref_mb_modes;
 1807|       |
 1808|  71.8k|            for(i4_x = (i4_x_min + 1); i4_x <= (i4_x_max - 1);)
  ------------------
  |  Branch (1808:40): [True: 43.9k, False: 27.9k]
  ------------------
 1809|  43.9k|            {
 1810|  43.9k|                i4_x_ref = (i4_x + i4_x_offset);
 1811|  43.9k|                i4_distleftX = (i4_x_ref) & (i4_mb_wd - 1);
 1812|  43.9k|                i4_rangeX = (i4_mb_wd - i4_distleftX);
 1813|       |
 1814|       |                /* get the referecne layer mb type */
 1815|  43.9k|                i1_slice_id =
 1816|  43.9k|                    isvcd_get_ref_layer_mbtype(pi1_ref_mb_modes_bkp_1, &i4_mb_type, i1_slice_id, 0);
 1817|  43.9k|                if(SVC_INTRA_MB == i4_mb_type)
  ------------------
  |  |  115|  43.9k|#define SVC_INTRA_MB (1 << 1)       /*!< P or B MBs decoded or inferred*/
  ------------------
  |  Branch (1817:20): [True: 4.84k, False: 39.0k]
  ------------------
 1818|  4.84k|                {
 1819|       |                    /* if an Intra MB is returned then break the loop */
 1820|  4.84k|                    break;
 1821|  4.84k|                }
 1822|       |
 1823|  39.0k|                i4_x += i4_rangeX;
 1824|  39.0k|                pi1_ref_mb_modes_bkp_1 += i4_element_size;
 1825|  39.0k|            } /* end of loop in horizontal direction */
 1826|       |
 1827|  32.8k|            if(SVC_INTRA_MB == i4_mb_type)
  ------------------
  |  |  115|  32.8k|#define SVC_INTRA_MB (1 << 1)       /*!< P or B MBs decoded or inferred*/
  ------------------
  |  Branch (1827:16): [True: 4.84k, False: 27.9k]
  ------------------
 1828|  4.84k|            {
 1829|       |                /* if an Intra MB is returned then break the loop */
 1830|  4.84k|                break;
 1831|  4.84k|            }
 1832|       |
 1833|  27.9k|            i4_y += i4_rangeY;
 1834|  27.9k|            pi1_ref_mb_modes += (i4_ref_mode_stride * i4_element_size);
 1835|       |
 1836|  27.9k|        } /* end of loop in vertical direction */
 1837|  24.5k|    }
 1838|  51.1k|    else
 1839|  51.1k|    {
 1840|       |        /* set to non valid value */
 1841|  51.1k|        i1_slice_id = -1;
 1842|  51.1k|    }
 1843|       |
 1844|  75.6k|    i4_unfill_check = 0;
 1845|       |
 1846|       |    /* --------------------------------------------------------------------- */
 1847|       |    /* Copying the data from recon buffer to refSample Array.                */
 1848|       |    /* NOTE: The copying of the data from recon buffer to refSample Array    */
 1849|       |    /*       can be optimized by bring in data at N-MB level,thus taking     */
 1850|       |    /*       advantage of the overlapping data which now gets copied every MB*/
 1851|       |    /* --------------------------------------------------------------------- */
 1852|  75.6k|    {
 1853|  75.6k|        WORD32 i4_x_ref_start, i4_x_ref_end;
 1854|  75.6k|        WORD32 i4_y_ref_start, i4_y_ref_end;
 1855|  75.6k|        WORD32 i4_rangeW, i4_rangeH;
 1856|  75.6k|        WORD32 i4_offset;
 1857|  75.6k|        UWORD8 *pu1_src, *pu1_dst;
 1858|  75.6k|        UWORD8 *pu1_dst1, *pu1_dst2;
 1859|       |
 1860|       |        /* Copy (refW x refH) dimension into reference sample array */
 1861|  75.6k|        i4_x_ref_start = MAX(0, MIN((i4_ref_wd - 1), i4_x_offset));
  ------------------
  |  |   60|   151k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 37.1k, False: 38.5k]
  |  |  |  Branch (60:24): [True: 0, False: 75.6k]
  |  |  |  Branch (60:32): [True: 0, False: 38.5k]
  |  |  ------------------
  ------------------
 1862|  75.6k|        i4_x_ref_end = MAX(0, MIN((i4_ref_wd - 1), (i4_refarray_wd - 1) + i4_x_offset));
  ------------------
  |  |   60|   151k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 0, False: 75.6k]
  |  |  |  Branch (60:24): [True: 38.5k, False: 37.1k]
  |  |  |  Branch (60:32): [True: 38.5k, False: 37.1k]
  |  |  ------------------
  ------------------
 1863|  75.6k|        i4_y_ref_start = MAX(0, MIN((i4_ref_ht - 1), i4_y_offset));
  ------------------
  |  |   60|   151k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 15.1k, False: 60.5k]
  |  |  |  Branch (60:24): [True: 0, False: 75.6k]
  |  |  |  Branch (60:32): [True: 0, False: 60.5k]
  |  |  ------------------
  ------------------
 1864|  75.6k|        i4_y_ref_end = MAX(0, MIN((i4_ref_ht - 1), (i4_refarray_ht - 1) + i4_y_offset));
  ------------------
  |  |   60|   151k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 0, False: 75.6k]
  |  |  |  Branch (60:24): [True: 10.1k, False: 65.5k]
  |  |  |  Branch (60:32): [True: 10.1k, False: 65.5k]
  |  |  ------------------
  ------------------
 1865|       |
 1866|       |        /* find the actual data to be copied */
 1867|  75.6k|        i4_rangeW = (i4_x_ref_end - i4_x_ref_start + 1);
 1868|  75.6k|        i4_rangeH = (i4_y_ref_end - i4_y_ref_start + 1);
 1869|       |
 1870|       |        /* get the reconbuffer pointer and ref sample array pointer */
 1871|  75.6k|        i4_offset =
 1872|  75.6k|            (i4_x_ref_start - i4_x_offset) + ((i4_y_ref_start - i4_y_offset) * i4_refarray_stride);
 1873|       |
 1874|  75.6k|        if(0 == i4_chroma_flag)
  ------------------
  |  Branch (1874:12): [True: 37.8k, False: 37.8k]
  ------------------
 1875|  37.8k|        {
 1876|  37.8k|            pu1_refarray_1 = ps_ctxt->pu1_refarray_buffer;
 1877|  37.8k|            pu1_refarray_2 = NULL;
 1878|  37.8k|            pu1_src = pu1_inp_1;
 1879|  37.8k|            pu1_dst = pu1_refarray_1 + i4_offset;
 1880|       |
 1881|       |            /* Copy luma data into refsample array */
 1882|  37.8k|            isvcd_copy_data(pu1_src, i4_inp_stride, pu1_dst, i4_refarray_stride, i4_rangeW,
 1883|  37.8k|                            i4_rangeH);
 1884|  37.8k|        }
 1885|  37.8k|        else
 1886|  37.8k|        {
 1887|  37.8k|            pu1_refarray_1 = ps_ctxt->pu1_refarray_buffer;
 1888|  37.8k|            pu1_refarray_2 = ps_ctxt->pu1_refarray_cb;
 1889|  37.8k|            pu1_src = pu1_inp_1;
 1890|  37.8k|            pu1_dst1 = pu1_refarray_1 + i4_offset;
 1891|  37.8k|            pu1_dst2 = pu1_refarray_2 + i4_offset;
 1892|  37.8k|            isvcd_copy_data_semiplanr(pu1_src, i4_inp_stride, pu1_dst1, pu1_dst2,
 1893|  37.8k|                                      i4_refarray_stride, i4_rangeW, i4_rangeH);
 1894|  37.8k|        }
 1895|  75.6k|    }
 1896|       |
 1897|       |    /* --------------------------------------------------------------------- */
 1898|       |    /* Loop to fill ref sample array and corresponding map for interpolation */
 1899|       |    /* --------------------------------------------------------------------- */
 1900|  75.6k|    {
 1901|  75.6k|        WORD32 i4_i, i4_j;
 1902|  75.6k|        UWORD8 *pu1_ref_idx_x, *pu1_ref_idx_y;
 1903|  75.6k|        WORD32 i4_x_ref;
 1904|  75.6k|        WORD32 i4_y_ref;
 1905|  75.6k|        WORD32 i4_mb_x, i4_mb_y;
 1906|       |
 1907|  75.6k|        i4_y_ref = i4_y_offset;
 1908|  75.6k|        i4_x_ref = i4_x_offset;
 1909|  75.6k|        i4_mb_x = (i4_x_ref >> i4_mb_wd_sft);
 1910|  75.6k|        i4_mb_y = (i4_y_ref >> i4_mb_ht_sft);
 1911|  75.6k|        pi1_ref_mb_modes = (WORD8 *) ps_ref_mb_mode_map->pv_buffer;
 1912|       |
 1913|       |        /* get the location of the byte which has the current mb mode */
 1914|  75.6k|        pi1_ref_mb_modes += (i4_mb_y * i4_ref_mode_stride * i4_element_size);
 1915|  75.6k|        pi1_ref_mb_modes += (i4_mb_x * i4_element_size);
 1916|  75.6k|        pu1_ref_idx_x = ps_map_ctxt->pu1_refarray_x_idx;
 1917|  75.6k|        pu1_ref_idx_y = ps_map_ctxt->pu1_refarray_y_idx;
 1918|       |
 1919|  75.6k|        i4_j = 0;
 1920|   302k|        for(i4_y = 0; i4_y < i4_refarray_ht;)
  ------------------
  |  Branch (1920:23): [True: 227k, False: 75.6k]
  ------------------
 1921|   227k|        {
 1922|   227k|            WORD32 i4_x_ref;
 1923|   227k|            WORD32 i4_y_ref;
 1924|   227k|            WORD32 i4_distleftX, i4_rangeX;
 1925|   227k|            WORD32 i4_disttopY, i4_rangeY;
 1926|       |
 1927|   227k|            i4_y_ref = i4_y + i4_y_offset;
 1928|   227k|            i4_disttopY = (i4_y_ref) & (i4_mb_ht - 1);
 1929|   227k|            i4_rangeY = (i4_mb_ht - i4_disttopY);
 1930|       |
 1931|       |            /* find the y-index lookup */
 1932|   227k|            memset(pu1_ref_idx_y, i4_j, i4_rangeY);
 1933|   227k|            pu1_ref_idx_y += i4_rangeY;
 1934|       |
 1935|   227k|            i4_i = 0;
 1936|   227k|            pi1_ref_mb_modes_bkp_1 = pi1_ref_mb_modes;
 1937|   919k|            for(i4_x = 0; i4_x < i4_refarray_wd;)
  ------------------
  |  Branch (1937:27): [True: 692k, False: 227k]
  ------------------
 1938|   692k|            {
 1939|   692k|                i4_x_ref = i4_x + i4_x_offset;
 1940|   692k|                i4_distleftX = (i4_x_ref) & (i4_mb_wd - 1);
 1941|   692k|                i4_rangeX = (i4_mb_wd - i4_distleftX);
 1942|       |
 1943|   692k|                if(0 == i4_j)
  ------------------
  |  Branch (1943:20): [True: 227k, False: 465k]
  ------------------
 1944|   227k|                {
 1945|       |                    /* find the x-index lookup */
 1946|   227k|                    memset(pu1_ref_idx_x, i4_i, i4_rangeX);
 1947|   227k|                    pu1_ref_idx_x += i4_rangeX;
 1948|   227k|                }
 1949|       |
 1950|       |                /* get the referecne layer mb type */
 1951|   692k|                isvcd_get_ref_layer_mbtype(pi1_ref_mb_modes_bkp_1, &i4_mb_type, i1_slice_id,
 1952|   692k|                                           i1_cons_intr_samp_flag);
 1953|       |
 1954|   692k|                if(SVC_INTRA_MB == i4_mb_type)
  ------------------
  |  |  115|   692k|#define SVC_INTRA_MB (1 << 1)       /*!< P or B MBs decoded or inferred*/
  ------------------
  |  Branch (1954:20): [True: 141k, False: 550k]
  ------------------
 1955|   141k|                {
 1956|   141k|                    u1_map_buf[i4_j][i4_i] = 1;
 1957|   141k|                }
 1958|   550k|                else
 1959|   550k|                {
 1960|   550k|                    i4_unfill_check = 1;
 1961|   550k|                }
 1962|       |
 1963|   692k|                i4_x = i4_x + i4_rangeX;
 1964|   692k|                i4_i++;
 1965|   692k|                pi1_ref_mb_modes_bkp_1 += i4_element_size;
 1966|       |
 1967|   692k|            } /* end of loop over ref array width */
 1968|       |
 1969|   227k|            i4_j++;
 1970|   227k|            i4_y = i4_y + i4_rangeY;
 1971|   227k|            pi1_ref_mb_modes += (i4_ref_mode_stride * i4_element_size);
 1972|   227k|        } /* end of loop over ref array height */
 1973|  75.6k|    }
 1974|       |
 1975|       |    /* --------------------------------------------------------------------- */
 1976|       |    /* Calling boundary extension algorithm to fill unfilled pixels             */
 1977|       |    /* --------------------------------------------------------------------- */
 1978|  75.6k|    if(i4_unfill_check == 1)
  ------------------
  |  Branch (1978:8): [True: 75.3k, False: 338]
  ------------------
 1979|  75.3k|    {
 1980|  75.3k|        isvcd_fill_non_avail_pixel(ps_map_ctxt, pu1_refarray_1, pu1_refarray_2, i4_refarray_stride,
 1981|  75.3k|                                   ps_coord, i4_chroma_flag, u1_map_buf);
 1982|  75.3k|    }
 1983|  75.6k|    return OK;
  ------------------
  |  |  114|  75.6k|#define OK        0
  ------------------
 1984|  75.6k|}
isvcd_reflayer_construction_dyadic:
 2025|  52.9k|{
 2026|       |    /* Index variables */
 2027|  52.9k|    WORD32 i4_x, i4_y;
 2028|  52.9k|    WORD32 i4_x0, i4_y0;
 2029|  52.9k|    WORD32 i4_xc0, i4_yc0;
 2030|  52.9k|    WORD32 i4_ref_xD, i4_ref_yD;
 2031|  52.9k|    WORD32 i4_c_ref_xD, i4_c_ref_yD;
 2032|       |
 2033|       |    /* --------------------------------------------------------------------- */
 2034|       |    /* Context and reference layer related variables                         */
 2035|       |    /* --------------------------------------------------------------------- */
 2036|  52.9k|    intra_sampling_ctxt_t *ps_ctxt;
 2037|  52.9k|    intra_samp_lyr_ctxt *ps_lyr_ctxt;
 2038|  52.9k|    WORD8 *pi1_ref_mb_modes;
 2039|  52.9k|    WORD32 i4_ref_mode_stride;
 2040|  52.9k|    WORD32 i4_element_size;
 2041|  52.9k|    WORD32 i4_mbaddr_y;
 2042|  52.9k|    WORD32 i4_mbaddr_x;
 2043|       |
 2044|       |    /* --------------------------------------------------------------------- */
 2045|       |    /* Temp Variables for Mapping context                                     */
 2046|       |    /* --------------------------------------------------------------------- */
 2047|  52.9k|    WORD32 i4_refarray_wd_luma, i4_refarray_wd_chroma;
 2048|  52.9k|    WORD32 i4_refarray_ht_luma, i4_refarray_ht_chroma;
 2049|  52.9k|    WORD32 i4_avlblty;
 2050|  52.9k|    WORD8 i1_cons_intr_samp_flag;
 2051|  52.9k|    WORD8 i1_slice_id;
 2052|  52.9k|    WORD8 i1_corner_samp_avlbl_flag;
 2053|  52.9k|    UWORD8 u1_ny_avlblty;
 2054|       |
 2055|       |    /* --------------------------------------------------------------------- */
 2056|       |    /* Local Pointer Declaration for arrays in Mapping context                 */
 2057|       |    /* --------------------------------------------------------------------- */
 2058|  52.9k|    UWORD8 *pu1_refarray_luma;
 2059|  52.9k|    UWORD8 *pu1_refarray_cb, *pu1_refarray_cr;
 2060|       |
 2061|       |    /* --------------------------------------------------------------------- */
 2062|       |    /* Derivation of local variables                                         */
 2063|       |    /* --------------------------------------------------------------------- */
 2064|  52.9k|    ps_ctxt = (intra_sampling_ctxt_t *) pv_intra_samp_ctxt;
 2065|  52.9k|    ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id];
 2066|  52.9k|    pi1_ref_mb_modes = (WORD8 *) ps_ref_mb_mode_map->pv_buffer;
 2067|  52.9k|    i4_ref_mode_stride = ps_ref_mb_mode_map->i4_num_element_stride;
 2068|  52.9k|    i4_element_size = ps_ref_mb_mode_map->i4_element_size;
 2069|       |
 2070|       |    /* --------------------------------------------------------------------- */
 2071|       |    /* get the constrained intra resampling flag                             */
 2072|       |    /* --------------------------------------------------------------------- */
 2073|  52.9k|    i1_cons_intr_samp_flag = ps_lyr_ctxt->i1_constrained_intra_rsmpl_flag;
 2074|  52.9k|    if(NULL == pi1_ref_mb_modes)
  ------------------
  |  Branch (2074:8): [True: 0, False: 52.9k]
  ------------------
 2075|      0|    {
 2076|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 2077|      0|    }
 2078|       |
 2079|  52.9k|    pu1_refarray_luma = ps_ctxt->pu1_refarray_buffer;
 2080|  52.9k|    pu1_refarray_cb = ps_ctxt->pu1_refarray_cb;
 2081|  52.9k|    pu1_refarray_cr = ps_ctxt->pu1_refarray_cr;
 2082|       |
 2083|       |    /* --------------------------------------------------------------------- */
 2084|       |    /* Get the coordinates of the reference layer MB                         */
 2085|       |    /* --------------------------------------------------------------------- */
 2086|  52.9k|    i4_mbaddr_x = u2_mb_x;
 2087|  52.9k|    i4_mbaddr_y = u2_mb_y;
 2088|       |
 2089|       |    /* --------------------------------------------------------------------- */
 2090|       |    /* Getting the size of the valid area of ref array to be brought in         */
 2091|       |    /* --------------------------------------------------------------------- */
 2092|  52.9k|    i4_refarray_wd_luma = 20;
 2093|  52.9k|    i4_refarray_ht_luma = 20;
 2094|  52.9k|    i4_refarray_wd_chroma = i4_refarray_wd_luma >> 1;
 2095|  52.9k|    i4_refarray_ht_chroma = i4_refarray_ht_luma >> 1;
 2096|       |
 2097|       |    /* --------------------------------------------------------------------- */
 2098|       |    /* Derivation of ref slice MB idc                                         */
 2099|       |    /* --------------------------------------------------------------------- */
 2100|  52.9k|    if(1 == i1_cons_intr_samp_flag)
  ------------------
  |  Branch (2100:8): [True: 29.2k, False: 23.7k]
  ------------------
 2101|  29.2k|    {
 2102|  29.2k|        inter_lyr_mb_prms_t *ps_inter_lyr_mb_prms;
 2103|  29.2k|        WORD8 *pi1_ref_mb_mode_tmp;
 2104|  29.2k|        WORD8 i1_mb_mode;
 2105|       |
 2106|       |        /* get the location of the byte which has the current mb mode */
 2107|  29.2k|        pi1_ref_mb_mode_tmp = pi1_ref_mb_modes;
 2108|  29.2k|        pi1_ref_mb_mode_tmp += (i4_mbaddr_y * i4_ref_mode_stride * i4_element_size);
 2109|  29.2k|        pi1_ref_mb_mode_tmp += (i4_mbaddr_x * i4_element_size);
 2110|  29.2k|        ps_inter_lyr_mb_prms = (inter_lyr_mb_prms_t *) pi1_ref_mb_mode_tmp;
 2111|  29.2k|        i1_mb_mode = ps_inter_lyr_mb_prms->i1_mb_mode;
 2112|       |
 2113|       |        /* The reference layer MB should be intra */
 2114|  29.2k|        UNUSED(i1_mb_mode);
  ------------------
  |  |   45|  29.2k|#define UNUSED(x) ((void)(x))
  ------------------
 2115|       |
 2116|  29.2k|        i1_slice_id = ps_inter_lyr_mb_prms->i1_slice_id;
 2117|  29.2k|    }
 2118|  23.7k|    else
 2119|  23.7k|    {
 2120|       |        /* set to non valid value */
 2121|  23.7k|        i1_slice_id = -1;
 2122|  23.7k|    }
 2123|       |
 2124|       |    /* --------------------------------------------------------------------- */
 2125|       |    /* Bring in the reference array                                          */
 2126|       |    /* --------------------------------------------------------------------- */
 2127|  52.9k|    {
 2128|  52.9k|        UWORD8 *pu1_src, *pu1_dst;
 2129|  52.9k|        WORD32 i4_src_stride, i4_dst_stride;
 2130|       |
 2131|       |        /* Copy luma */
 2132|  52.9k|        i4_src_stride = i4_inp_luma_stride;
 2133|  52.9k|        i4_dst_stride = DYADIC_REF_W_Y;
  ------------------
  |  |   56|  52.9k|#define DYADIC_REF_W_Y 20
  ------------------
 2134|  52.9k|        pu1_src = pu1_inp_luma;
 2135|  52.9k|        pu1_dst = pu1_refarray_luma;
 2136|  52.9k|        isvcd_copy_data(pu1_src, i4_src_stride, pu1_dst, i4_dst_stride, i4_refarray_wd_luma,
 2137|  52.9k|                        i4_refarray_ht_luma);
 2138|       |        // Semi planar
 2139|  52.9k|        i4_src_stride = i4_inp_chroma_stride;
 2140|  52.9k|        i4_dst_stride = DYADIC_REF_W_C;
  ------------------
  |  |   58|  52.9k|#define DYADIC_REF_W_C 10
  ------------------
 2141|  52.9k|        pu1_src = pu1_inp_chroma;
 2142|  52.9k|        isvcd_copy_data_semiplanr(pu1_src, i4_src_stride, pu1_refarray_cb, pu1_refarray_cr,
 2143|  52.9k|                                  i4_dst_stride, i4_refarray_wd_chroma, i4_refarray_ht_chroma);
 2144|  52.9k|    }
 2145|       |
 2146|       |    /* --------------------------------------------------------------------- */
 2147|       |    /* Get the availability of 5 neighboring MBs                             */
 2148|       |    /* --------------------------------------------------------------------- */
 2149|  52.9k|    {
 2150|       |        /* mb_x + left, mb_y + top */
 2151|  52.9k|        isvcd_get_ref_layer_avlblty_dyadic(pi1_ref_mb_modes, i4_ref_mode_stride, i4_element_size,
 2152|  52.9k|                                           i4_mbaddr_x + i4_left, i4_mbaddr_y + i4_top, &i4_avlblty,
 2153|  52.9k|                                           i1_slice_id, i1_cons_intr_samp_flag);
 2154|  52.9k|        u1_ny_avlblty = i4_avlblty;
 2155|       |
 2156|       |        /* mb_x + left, mb_y */
 2157|  52.9k|        isvcd_get_ref_layer_avlblty_dyadic(pi1_ref_mb_modes, i4_ref_mode_stride, i4_element_size,
 2158|  52.9k|                                           i4_mbaddr_x + i4_left, i4_mbaddr_y, &i4_avlblty,
 2159|  52.9k|                                           i1_slice_id, i1_cons_intr_samp_flag);
 2160|  52.9k|        u1_ny_avlblty += (i4_avlblty << 1);
 2161|       |
 2162|       |        /* mb_x, mb_y + top */
 2163|  52.9k|        isvcd_get_ref_layer_avlblty_dyadic(pi1_ref_mb_modes, i4_ref_mode_stride, i4_element_size,
 2164|  52.9k|                                           i4_mbaddr_x, i4_mbaddr_y + i4_top, &i4_avlblty,
 2165|  52.9k|                                           i1_slice_id, i1_cons_intr_samp_flag);
 2166|  52.9k|        u1_ny_avlblty += (i4_avlblty << 2);
 2167|       |
 2168|       |        /* mb_x - left, mb_y + top */
 2169|  52.9k|        isvcd_get_ref_layer_avlblty_dyadic(pi1_ref_mb_modes, i4_ref_mode_stride, i4_element_size,
 2170|  52.9k|                                           i4_mbaddr_x - i4_left, i4_mbaddr_y + i4_top, &i4_avlblty,
 2171|  52.9k|                                           i1_slice_id, i1_cons_intr_samp_flag);
 2172|  52.9k|        u1_ny_avlblty += (i4_avlblty << 3);
 2173|       |
 2174|       |        /* mb_x + left, mb_y - top */
 2175|  52.9k|        isvcd_get_ref_layer_avlblty_dyadic(pi1_ref_mb_modes, i4_ref_mode_stride, i4_element_size,
 2176|  52.9k|                                           i4_mbaddr_x + i4_left, i4_mbaddr_y - i4_top, &i4_avlblty,
 2177|  52.9k|                                           i1_slice_id, i1_cons_intr_samp_flag);
 2178|  52.9k|        u1_ny_avlblty += (i4_avlblty << 4);
 2179|  52.9k|    }
 2180|       |
 2181|       |    /* --------------------------------------------------------------------- */
 2182|       |    /* Filling the unavailable samples, if any                                 */
 2183|       |    /* --------------------------------------------------------------------- */
 2184|  52.9k|    if(0x7 == u1_ny_avlblty)
  ------------------
  |  Branch (2184:8): [True: 519, False: 52.4k]
  ------------------
 2185|    519|    {
 2186|       |        /* All are available, exit */
 2187|    519|        return OK;
  ------------------
  |  |  114|    519|#define OK        0
  ------------------
 2188|    519|    }
 2189|       |
 2190|  52.4k|    if(!(u1_ny_avlblty & 0x7))
  ------------------
  |  Branch (2190:8): [True: 33.5k, False: 18.9k]
  ------------------
 2191|  33.5k|    {
 2192|  33.5k|        UWORD8 *pu1_tmp_src, *pu1_tmp_dst1, *pu1_tmp_dst2;
 2193|  33.5k|        UWORD8 *pu1_tmp_src1, *pu1_tmp_src2;
 2194|       |
 2195|       |        /* Set the 4 corner samples to (x-xD,y-yD) */
 2196|  33.5k|        i4_x0 = 9 + (i4_left << 3) + i4_left;
 2197|  33.5k|        i4_y0 = 9 + (i4_top << 3) + i4_top;
 2198|       |
 2199|  33.5k|        i4_ref_xD = i4_x0 - i4_left - (i4_left >> 1);
 2200|  33.5k|        i4_ref_yD = i4_y0 - i4_top - (i4_top >> 1);
 2201|       |
 2202|  33.5k|        pu1_tmp_src = pu1_refarray_luma + (i4_ref_yD * DYADIC_REF_W_Y);
  ------------------
  |  |   56|  33.5k|#define DYADIC_REF_W_Y 20
  ------------------
 2203|  33.5k|        pu1_tmp_dst1 = pu1_refarray_luma + (i4_y0 * DYADIC_REF_W_Y);
  ------------------
  |  |   56|  33.5k|#define DYADIC_REF_W_Y 20
  ------------------
 2204|  33.5k|        pu1_tmp_dst2 = pu1_tmp_dst1 + DYADIC_REF_W_Y;
  ------------------
  |  |   56|  33.5k|#define DYADIC_REF_W_Y 20
  ------------------
 2205|  33.5k|        pu1_tmp_dst1[i4_x0] = pu1_tmp_src[i4_ref_xD];
 2206|  33.5k|        pu1_tmp_dst1[i4_x0 + 1] = pu1_tmp_src[i4_ref_xD];
 2207|  33.5k|        pu1_tmp_dst2[i4_x0] = pu1_tmp_src[i4_ref_xD];
 2208|  33.5k|        pu1_tmp_dst2[i4_x0 + 1] = pu1_tmp_src[i4_ref_xD];
 2209|       |
 2210|       |        /* Set the corner sample of Cb and Cr to (x-xD,y-yD) */
 2211|  33.5k|        i4_xc0 = i4_x0 >> 1;
 2212|  33.5k|        i4_yc0 = i4_y0 >> 1;
 2213|  33.5k|        i4_c_ref_yD = i4_ref_yD >> 1;
 2214|  33.5k|        i4_c_ref_xD = i4_ref_xD >> 1;
 2215|  33.5k|        pu1_tmp_src1 = pu1_refarray_cb + (i4_c_ref_yD * DYADIC_REF_W_C);
  ------------------
  |  |   58|  33.5k|#define DYADIC_REF_W_C 10
  ------------------
 2216|  33.5k|        pu1_tmp_dst1 = pu1_refarray_cb + (i4_yc0 * DYADIC_REF_W_C);
  ------------------
  |  |   58|  33.5k|#define DYADIC_REF_W_C 10
  ------------------
 2217|  33.5k|        pu1_tmp_dst1[i4_xc0] = pu1_tmp_src1[i4_c_ref_xD];
 2218|  33.5k|        pu1_tmp_src2 = pu1_refarray_cr + (i4_c_ref_yD * DYADIC_REF_W_C);
  ------------------
  |  |   58|  33.5k|#define DYADIC_REF_W_C 10
  ------------------
 2219|  33.5k|        pu1_tmp_dst2 = pu1_refarray_cr + (i4_yc0 * DYADIC_REF_W_C);
  ------------------
  |  |   58|  33.5k|#define DYADIC_REF_W_C 10
  ------------------
 2220|  33.5k|        pu1_tmp_dst2[i4_xc0] = pu1_tmp_src2[i4_c_ref_xD];
 2221|  33.5k|    }
 2222|       |
 2223|  52.4k|    if(!(u1_ny_avlblty & 0x5))
  ------------------
  |  Branch (2223:8): [True: 35.6k, False: 16.7k]
  ------------------
 2224|  35.6k|    {
 2225|  35.6k|        UWORD8 *pu1_tmp_src, *pu1_tmp_dst1, *pu1_tmp_dst2;
 2226|  35.6k|        UWORD8 *pu1_tmp_src1, *pu1_tmp_src2;
 2227|       |
 2228|       |        /* Copy (x0,ref_yD), (x0+1,ref_yD), ..., (x0+7,ref_yD) to */
 2229|       |        /* (x0,y0), (x0+1,y0), ..., (x0+7,y0) and   */
 2230|       |        /* (x0,y0+1), (x0+1,y0+1), ..., (x0+7,y0+1) */
 2231|  35.6k|        i4_x0 = 2;
 2232|  35.6k|        i4_y0 = 9 + (i4_top << 3) + i4_top;
 2233|  35.6k|        if(i4_left > 0)
  ------------------
  |  Branch (2233:12): [True: 17.4k, False: 18.2k]
  ------------------
 2234|  17.4k|        {
 2235|  17.4k|            i4_x0 += 8;
 2236|  17.4k|        }
 2237|  35.6k|        i4_ref_yD = i4_y0 - i4_top - (i4_top >> 1);
 2238|       |
 2239|  35.6k|        pu1_tmp_src = pu1_refarray_luma + (i4_ref_yD * DYADIC_REF_W_Y);
  ------------------
  |  |   56|  35.6k|#define DYADIC_REF_W_Y 20
  ------------------
 2240|  35.6k|        pu1_tmp_dst1 = pu1_refarray_luma + (i4_y0 * DYADIC_REF_W_Y);
  ------------------
  |  |   56|  35.6k|#define DYADIC_REF_W_Y 20
  ------------------
 2241|  35.6k|        pu1_tmp_dst2 = pu1_tmp_dst1 + DYADIC_REF_W_Y;
  ------------------
  |  |   56|  35.6k|#define DYADIC_REF_W_Y 20
  ------------------
 2242|       |
 2243|   321k|        for(i4_x = i4_x0; i4_x < i4_x0 + 8; i4_x++)
  ------------------
  |  Branch (2243:27): [True: 285k, False: 35.6k]
  ------------------
 2244|   285k|        {
 2245|   285k|            pu1_tmp_dst1[i4_x] = pu1_tmp_src[i4_x];
 2246|   285k|            pu1_tmp_dst2[i4_x] = pu1_tmp_src[i4_x];
 2247|   285k|        }
 2248|       |
 2249|       |        /* Cb and Cr copy */
 2250|  35.6k|        i4_xc0 = i4_x0 >> 1;
 2251|  35.6k|        i4_yc0 = i4_y0 >> 1;
 2252|  35.6k|        i4_c_ref_yD = i4_ref_yD >> 1;
 2253|  35.6k|        pu1_tmp_src1 = pu1_refarray_cb + (i4_c_ref_yD * DYADIC_REF_W_C);
  ------------------
  |  |   58|  35.6k|#define DYADIC_REF_W_C 10
  ------------------
 2254|  35.6k|        pu1_tmp_dst1 = pu1_refarray_cb + (i4_yc0 * DYADIC_REF_W_C);
  ------------------
  |  |   58|  35.6k|#define DYADIC_REF_W_C 10
  ------------------
 2255|  35.6k|        pu1_tmp_src2 = pu1_refarray_cr + (i4_c_ref_yD * DYADIC_REF_W_C);
  ------------------
  |  |   58|  35.6k|#define DYADIC_REF_W_C 10
  ------------------
 2256|  35.6k|        pu1_tmp_dst2 = pu1_refarray_cr + (i4_yc0 * DYADIC_REF_W_C);
  ------------------
  |  |   58|  35.6k|#define DYADIC_REF_W_C 10
  ------------------
 2257|       |
 2258|   178k|        for(i4_x = i4_xc0; i4_x < i4_xc0 + 4; i4_x++)
  ------------------
  |  Branch (2258:28): [True: 142k, False: 35.6k]
  ------------------
 2259|   142k|        {
 2260|   142k|            pu1_tmp_dst1[i4_x] = pu1_tmp_src1[i4_x];
 2261|   142k|            pu1_tmp_dst2[i4_x] = pu1_tmp_src2[i4_x];
 2262|   142k|        }
 2263|  35.6k|    }
 2264|       |
 2265|  52.4k|    if(!(u1_ny_avlblty & 0x3))
  ------------------
  |  Branch (2265:8): [True: 39.3k, False: 13.1k]
  ------------------
 2266|  39.3k|    {
 2267|  39.3k|        UWORD8 *pu1_tmp_src, *pu1_tmp_dst1, *pu1_tmp_dst2;
 2268|  39.3k|        UWORD8 *pu1_tmp_src1, *pu1_tmp_src2;
 2269|       |
 2270|       |        /* Copy (ref_xD,y0) to (x0,y0) and (x0+1,y0); */
 2271|       |        /* copy (ref_xD,y0+1) to (x0,y0+1) and (x0+1,y0+1); ... ;*/
 2272|       |        /* copy (ref_xD,y0+7) to (x0,y0+7) and (x0+1,y0+7) */
 2273|  39.3k|        i4_x0 = 9 + (i4_left << 3) + i4_left;
 2274|  39.3k|        i4_y0 = 2;
 2275|  39.3k|        if(i4_top > 0)
  ------------------
  |  Branch (2275:12): [True: 18.8k, False: 20.4k]
  ------------------
 2276|  18.8k|        {
 2277|  18.8k|            i4_y0 += 8;
 2278|  18.8k|        }
 2279|  39.3k|        i4_ref_xD = i4_x0 - i4_left - (i4_left >> 1);
 2280|       |
 2281|  39.3k|        pu1_tmp_src = pu1_refarray_luma + (i4_y0 * DYADIC_REF_W_Y);
  ------------------
  |  |   56|  39.3k|#define DYADIC_REF_W_Y 20
  ------------------
 2282|  39.3k|        pu1_tmp_dst1 = pu1_tmp_src;
 2283|       |
 2284|   353k|        for(i4_y = i4_y0; i4_y < i4_y0 + 8; i4_y++)
  ------------------
  |  Branch (2284:27): [True: 314k, False: 39.3k]
  ------------------
 2285|   314k|        {
 2286|   314k|            pu1_tmp_dst1[i4_x0] = pu1_tmp_src[i4_ref_xD];
 2287|   314k|            pu1_tmp_dst1[i4_x0 + 1] = pu1_tmp_src[i4_ref_xD];
 2288|   314k|            pu1_tmp_src += DYADIC_REF_W_Y;
  ------------------
  |  |   56|   314k|#define DYADIC_REF_W_Y 20
  ------------------
 2289|   314k|            pu1_tmp_dst1 += DYADIC_REF_W_Y;
  ------------------
  |  |   56|   314k|#define DYADIC_REF_W_Y 20
  ------------------
 2290|   314k|        }
 2291|       |
 2292|       |        /* Cb and Cr copy */
 2293|  39.3k|        i4_xc0 = i4_x0 >> 1;
 2294|  39.3k|        i4_yc0 = i4_y0 >> 1;
 2295|  39.3k|        i4_c_ref_xD = i4_ref_xD >> 1;
 2296|  39.3k|        pu1_tmp_src1 = pu1_refarray_cb + (i4_yc0 * DYADIC_REF_W_C);
  ------------------
  |  |   58|  39.3k|#define DYADIC_REF_W_C 10
  ------------------
 2297|  39.3k|        pu1_tmp_dst1 = pu1_tmp_src1;
 2298|  39.3k|        pu1_tmp_src2 = pu1_refarray_cr + (i4_yc0 * DYADIC_REF_W_C);
  ------------------
  |  |   58|  39.3k|#define DYADIC_REF_W_C 10
  ------------------
 2299|  39.3k|        pu1_tmp_dst2 = pu1_tmp_src2;
 2300|       |
 2301|   196k|        for(i4_y = i4_yc0; i4_y < i4_yc0 + 4; i4_y++)
  ------------------
  |  Branch (2301:28): [True: 157k, False: 39.3k]
  ------------------
 2302|   157k|        {
 2303|   157k|            pu1_tmp_dst1[i4_xc0] = pu1_tmp_src1[i4_c_ref_xD];
 2304|   157k|            pu1_tmp_dst2[i4_xc0] = pu1_tmp_src2[i4_c_ref_xD];
 2305|   157k|            pu1_tmp_src1 += DYADIC_REF_W_C;
  ------------------
  |  |   58|   157k|#define DYADIC_REF_W_C 10
  ------------------
 2306|   157k|            pu1_tmp_src2 += DYADIC_REF_W_C;
  ------------------
  |  |   58|   157k|#define DYADIC_REF_W_C 10
  ------------------
 2307|   157k|            pu1_tmp_dst1 += DYADIC_REF_W_C;
  ------------------
  |  |   58|   157k|#define DYADIC_REF_W_C 10
  ------------------
 2308|   157k|            pu1_tmp_dst2 += DYADIC_REF_W_C;
  ------------------
  |  |   58|   157k|#define DYADIC_REF_W_C 10
  ------------------
 2309|   157k|        }
 2310|  39.3k|    }
 2311|       |
 2312|  52.4k|    if(!(u1_ny_avlblty & 0x4))
  ------------------
  |  Branch (2312:8): [True: 41.9k, False: 10.4k]
  ------------------
 2313|  41.9k|    {
 2314|  41.9k|        if(!(u1_ny_avlblty & 0x8))
  ------------------
  |  Branch (2314:12): [True: 35.5k, False: 6.42k]
  ------------------
 2315|  35.5k|        {
 2316|       |            /* (mb_x-left,mb_y+top) not available */
 2317|  35.5k|            UWORD8 *pu1_tmp_src, *pu1_tmp_dst;
 2318|       |
 2319|  35.5k|            i4_x0 = 9 - i4_left;
 2320|  35.5k|            i4_y0 = 9 + (i4_top << 3) + i4_top;
 2321|       |
 2322|  35.5k|            i4_ref_yD = i4_y0 - i4_top - (i4_top >> 1);
 2323|       |
 2324|       |            /* Copy (x0,ref_yD) and (x0+1,ref_yD) to (x0,y0) and (x0+1,y0), and */
 2325|       |            /* to (x0,y0+1) and (x0+1,y0+1) */
 2326|  35.5k|            pu1_tmp_src = pu1_refarray_luma + (i4_ref_yD * DYADIC_REF_W_Y);
  ------------------
  |  |   56|  35.5k|#define DYADIC_REF_W_Y 20
  ------------------
 2327|  35.5k|            pu1_tmp_dst = pu1_refarray_luma + (i4_y0 * DYADIC_REF_W_Y);
  ------------------
  |  |   56|  35.5k|#define DYADIC_REF_W_Y 20
  ------------------
 2328|  35.5k|            pu1_tmp_dst[i4_x0] = pu1_tmp_src[i4_x0];
 2329|  35.5k|            pu1_tmp_dst[i4_x0 + 1] = pu1_tmp_src[i4_x0 + 1];
 2330|  35.5k|            pu1_tmp_dst += DYADIC_REF_W_Y;
  ------------------
  |  |   56|  35.5k|#define DYADIC_REF_W_Y 20
  ------------------
 2331|  35.5k|            pu1_tmp_dst[i4_x0] = pu1_tmp_src[i4_x0];
 2332|  35.5k|            pu1_tmp_dst[i4_x0 + 1] = pu1_tmp_src[i4_x0 + 1];
 2333|       |
 2334|       |            /* Cb copy */
 2335|  35.5k|            i4_xc0 = i4_x0 >> 1;
 2336|  35.5k|            i4_yc0 = i4_y0 >> 1;
 2337|  35.5k|            i4_c_ref_yD = i4_ref_yD >> 1;
 2338|  35.5k|            pu1_tmp_src = pu1_refarray_cb + (i4_c_ref_yD * DYADIC_REF_W_C);
  ------------------
  |  |   58|  35.5k|#define DYADIC_REF_W_C 10
  ------------------
 2339|  35.5k|            pu1_tmp_dst = pu1_refarray_cb + (i4_yc0 * DYADIC_REF_W_C);
  ------------------
  |  |   58|  35.5k|#define DYADIC_REF_W_C 10
  ------------------
 2340|  35.5k|            pu1_tmp_dst[i4_xc0] = pu1_tmp_src[i4_xc0];
 2341|       |
 2342|       |            /* Cr copy */
 2343|  35.5k|            pu1_tmp_src = pu1_refarray_cr + (i4_c_ref_yD * DYADIC_REF_W_C);
  ------------------
  |  |   58|  35.5k|#define DYADIC_REF_W_C 10
  ------------------
 2344|  35.5k|            pu1_tmp_dst = pu1_refarray_cr + (i4_yc0 * DYADIC_REF_W_C);
  ------------------
  |  |   58|  35.5k|#define DYADIC_REF_W_C 10
  ------------------
 2345|  35.5k|            pu1_tmp_dst[i4_xc0] = pu1_tmp_src[i4_xc0];
 2346|       |
 2347|  35.5k|        } /* if (mb_x-left,mb_y+top) not available */
 2348|  6.42k|        else
 2349|  6.42k|        {
 2350|  6.42k|            WORD32 i4_xD, i4_yD;
 2351|  6.42k|            WORD32 i4_c_xD, i4_c_yD;
 2352|       |
 2353|  6.42k|            isvcd_get_ref_layer_avlblty_dyadic(pi1_ref_mb_modes, i4_ref_mode_stride,
 2354|  6.42k|                                               i4_element_size, i4_mbaddr_x - i4_left, i4_mbaddr_y,
 2355|  6.42k|                                               &i4_avlblty, i1_slice_id, i1_cons_intr_samp_flag);
 2356|  6.42k|            i1_corner_samp_avlbl_flag = i4_avlblty;
 2357|       |
 2358|  6.42k|            i4_x0 = 9 - i4_left;
 2359|  6.42k|            i4_y0 = 9 + (i4_top << 3) + i4_top;
 2360|  6.42k|            i4_xc0 = i4_x0 >> 1;
 2361|  6.42k|            i4_yc0 = i4_y0 >> 1;
 2362|  6.42k|            i4_ref_yD = i4_y0 - i4_top - (i4_top >> 1);
 2363|  6.42k|            i4_ref_xD = i4_x0 - (i4_left * 7) - (i4_left >> 1);
 2364|  6.42k|            i4_c_ref_xD = i4_ref_xD >> 1;
 2365|  6.42k|            i4_c_ref_yD = i4_ref_yD >> 1;
 2366|  6.42k|            i4_xD = i4_x0 - i4_ref_xD;
 2367|  6.42k|            i4_yD = i4_y0 - i4_ref_yD;
 2368|  6.42k|            i4_c_xD = i4_xc0 - i4_c_ref_xD;
 2369|  6.42k|            i4_c_yD = i4_yc0 - i4_c_ref_yD;
 2370|       |
 2371|       |            /* Fill corner sample if not available */
 2372|  6.42k|            if(!i1_corner_samp_avlbl_flag)
  ------------------
  |  Branch (2372:16): [True: 6.10k, False: 312]
  ------------------
 2373|  6.10k|            {
 2374|  6.10k|                isvcd_corner_samp_dyadic(i4_x0, i4_y0, i4_xD, i4_yD, pu1_refarray_luma,
 2375|  6.10k|                                         pu1_refarray_cb, pu1_refarray_cr);
 2376|  6.10k|            }
 2377|       |
 2378|       |            /* Call diagonal construction for luma */
 2379|  19.2k|            for(i4_y = i4_y0; i4_y < i4_y0 + 2; i4_y++)
  ------------------
  |  Branch (2379:31): [True: 12.8k, False: 6.42k]
  ------------------
 2380|  12.8k|            {
 2381|  38.5k|                for(i4_x = i4_x0; i4_x < i4_x0 + 2; i4_x++)
  ------------------
  |  Branch (2381:35): [True: 25.6k, False: 12.8k]
  ------------------
 2382|  25.6k|                {
 2383|  25.6k|                    isvcd_diagonal_construct_dyadic(i4_x, i4_y, i4_xD, i4_yD, pu1_refarray_luma,
 2384|  25.6k|                                                    DYADIC_REF_W_Y);
  ------------------
  |  |   56|  25.6k|#define DYADIC_REF_W_Y 20
  ------------------
 2385|  25.6k|                    i4_xD++;
 2386|  25.6k|                }
 2387|  12.8k|                i4_yD++;
 2388|  12.8k|                i4_xD -= 2;
 2389|  12.8k|            }
 2390|       |
 2391|       |            /* Call diagonal construction for chroma */
 2392|  6.42k|            isvcd_diagonal_construct_dyadic(i4_xc0, i4_yc0, i4_c_xD, i4_c_yD, pu1_refarray_cb,
 2393|  6.42k|                                            DYADIC_REF_W_C);
  ------------------
  |  |   58|  6.42k|#define DYADIC_REF_W_C 10
  ------------------
 2394|       |
 2395|  6.42k|            isvcd_diagonal_construct_dyadic(i4_xc0, i4_yc0, i4_c_xD, i4_c_yD, pu1_refarray_cr,
 2396|  6.42k|                                            DYADIC_REF_W_C);
  ------------------
  |  |   58|  6.42k|#define DYADIC_REF_W_C 10
  ------------------
 2397|  6.42k|        }
 2398|  41.9k|    }
 2399|       |
 2400|  52.4k|    if(!(u1_ny_avlblty & 0x2))
  ------------------
  |  Branch (2400:8): [True: 46.1k, False: 6.35k]
  ------------------
 2401|  46.1k|    {
 2402|  46.1k|        if(!(u1_ny_avlblty & 0x10))
  ------------------
  |  Branch (2402:12): [True: 39.5k, False: 6.59k]
  ------------------
 2403|  39.5k|        {
 2404|  39.5k|            UWORD8 *pu1_tmp_src, *pu1_tmp_dst;
 2405|       |
 2406|  39.5k|            i4_x0 = 9 + (i4_left << 3) + i4_left;
 2407|  39.5k|            i4_y0 = 9 - i4_top;
 2408|  39.5k|            i4_ref_xD = i4_x0 - i4_left - (i4_left >> 1);
 2409|       |
 2410|       |            /* Copy (ref_xD,y0) to (x0,y0), (x0+1,y0), and  */
 2411|       |            /* copy (ref_xD,y0+1) to (x0,y0+1), (x0+1,y0+1) */
 2412|  39.5k|            pu1_tmp_src = pu1_refarray_luma + (i4_y0 * DYADIC_REF_W_Y);
  ------------------
  |  |   56|  39.5k|#define DYADIC_REF_W_Y 20
  ------------------
 2413|  39.5k|            pu1_tmp_dst = pu1_tmp_src;
 2414|  39.5k|            pu1_tmp_dst[i4_x0] = pu1_tmp_src[i4_ref_xD];
 2415|  39.5k|            pu1_tmp_dst[i4_x0 + 1] = pu1_tmp_src[i4_ref_xD];
 2416|  39.5k|            pu1_tmp_src += DYADIC_REF_W_Y;
  ------------------
  |  |   56|  39.5k|#define DYADIC_REF_W_Y 20
  ------------------
 2417|  39.5k|            pu1_tmp_dst += DYADIC_REF_W_Y;
  ------------------
  |  |   56|  39.5k|#define DYADIC_REF_W_Y 20
  ------------------
 2418|  39.5k|            pu1_tmp_dst[i4_x0] = pu1_tmp_src[i4_ref_xD];
 2419|  39.5k|            pu1_tmp_dst[i4_x0 + 1] = pu1_tmp_src[i4_ref_xD];
 2420|       |
 2421|       |            /* Cb copy */
 2422|  39.5k|            i4_xc0 = i4_x0 >> 1;
 2423|  39.5k|            i4_yc0 = i4_y0 >> 1;
 2424|  39.5k|            i4_c_ref_xD = i4_ref_xD >> 1;
 2425|  39.5k|            pu1_tmp_src = pu1_refarray_cb + (i4_yc0 * DYADIC_REF_W_C);
  ------------------
  |  |   58|  39.5k|#define DYADIC_REF_W_C 10
  ------------------
 2426|  39.5k|            pu1_tmp_dst = pu1_tmp_src;
 2427|  39.5k|            pu1_tmp_dst[i4_xc0] = pu1_tmp_src[i4_c_ref_xD];
 2428|       |
 2429|       |            /* Cr copy */
 2430|  39.5k|            pu1_tmp_src = pu1_refarray_cr + (i4_yc0 * DYADIC_REF_W_C);
  ------------------
  |  |   58|  39.5k|#define DYADIC_REF_W_C 10
  ------------------
 2431|  39.5k|            pu1_tmp_dst = pu1_tmp_src;
 2432|  39.5k|            pu1_tmp_dst[i4_xc0] = pu1_tmp_src[i4_c_ref_xD];
 2433|       |
 2434|  39.5k|        } /* if (mb_x+left,mb_y-top) not available */
 2435|  6.59k|        else
 2436|  6.59k|        {
 2437|  6.59k|            WORD32 i4_xD, i4_yD;
 2438|  6.59k|            WORD32 i4_c_xD, i4_c_yD;
 2439|       |
 2440|  6.59k|            isvcd_get_ref_layer_avlblty_dyadic(pi1_ref_mb_modes, i4_ref_mode_stride,
 2441|  6.59k|                                               i4_element_size, i4_mbaddr_x, i4_mbaddr_y - i4_top,
 2442|  6.59k|                                               &i4_avlblty, i1_slice_id, i1_cons_intr_samp_flag);
 2443|  6.59k|            i1_corner_samp_avlbl_flag = i4_avlblty;
 2444|       |
 2445|  6.59k|            i4_x0 = 9 + (i4_left << 3) + i4_left;
 2446|  6.59k|            i4_y0 = 9 - i4_top;
 2447|  6.59k|            i4_xc0 = i4_x0 >> 1;
 2448|  6.59k|            i4_yc0 = i4_y0 >> 1;
 2449|  6.59k|            i4_ref_xD = i4_x0 - i4_left - (i4_left >> 1);
 2450|  6.59k|            i4_ref_yD = i4_y0 - (i4_top * 7) - (i4_top >> 1);
 2451|  6.59k|            i4_c_ref_xD = i4_ref_xD >> 1;
 2452|  6.59k|            i4_c_ref_yD = i4_ref_yD >> 1;
 2453|  6.59k|            i4_xD = i4_x0 - i4_ref_xD;
 2454|  6.59k|            i4_yD = i4_y0 - i4_ref_yD;
 2455|  6.59k|            i4_c_xD = i4_xc0 - i4_c_ref_xD;
 2456|  6.59k|            i4_c_yD = i4_yc0 - i4_c_ref_yD;
 2457|       |
 2458|  6.59k|            if(!i1_corner_samp_avlbl_flag)
  ------------------
  |  Branch (2458:16): [True: 5.74k, False: 850]
  ------------------
 2459|  5.74k|            {
 2460|  5.74k|                isvcd_corner_samp_dyadic(i4_x0, i4_y0, i4_xD, i4_yD, pu1_refarray_luma,
 2461|  5.74k|                                         pu1_refarray_cb, pu1_refarray_cr);
 2462|  5.74k|            }
 2463|       |
 2464|       |            /* Call diagonal consrtuction for luma */
 2465|  19.7k|            for(i4_y = i4_y0; i4_y < i4_y0 + 2; i4_y++)
  ------------------
  |  Branch (2465:31): [True: 13.1k, False: 6.59k]
  ------------------
 2466|  13.1k|            {
 2467|  39.5k|                for(i4_x = i4_x0; i4_x < i4_x0 + 2; i4_x++)
  ------------------
  |  Branch (2467:35): [True: 26.3k, False: 13.1k]
  ------------------
 2468|  26.3k|                {
 2469|  26.3k|                    isvcd_diagonal_construct_dyadic(i4_x, i4_y, i4_xD, i4_yD, pu1_refarray_luma,
 2470|  26.3k|                                                    DYADIC_REF_W_Y);
  ------------------
  |  |   56|  26.3k|#define DYADIC_REF_W_Y 20
  ------------------
 2471|  26.3k|                    i4_xD++;
 2472|  26.3k|                }
 2473|  13.1k|                i4_yD++;
 2474|  13.1k|                i4_xD -= 2;
 2475|  13.1k|            }
 2476|       |
 2477|       |            /* Call diagonal construction for chroma */
 2478|  6.59k|            isvcd_diagonal_construct_dyadic(i4_xc0, i4_yc0, i4_c_xD, i4_c_yD, pu1_refarray_cb,
 2479|  6.59k|                                            DYADIC_REF_W_C);
  ------------------
  |  |   58|  6.59k|#define DYADIC_REF_W_C 10
  ------------------
 2480|       |
 2481|  6.59k|            isvcd_diagonal_construct_dyadic(i4_xc0, i4_yc0, i4_c_xD, i4_c_yD, pu1_refarray_cr,
 2482|  6.59k|                                            DYADIC_REF_W_C);
  ------------------
  |  |   58|  6.59k|#define DYADIC_REF_W_C 10
  ------------------
 2483|  6.59k|        }
 2484|  46.1k|    }
 2485|       |
 2486|  52.4k|    if(u1_ny_avlblty & 1)
  ------------------
  |  Branch (2486:8): [True: 8.08k, False: 44.3k]
  ------------------
 2487|  8.08k|    {
 2488|  8.08k|        if(!(u1_ny_avlblty & 2))
  ------------------
  |  Branch (2488:12): [True: 6.79k, False: 1.28k]
  ------------------
 2489|  6.79k|        {
 2490|       |            /* (mb_x+left,mb_y) is unavailable */
 2491|  6.79k|            WORD32 i4_xD, i4_yD;
 2492|  6.79k|            WORD32 i4_c_xD, i4_c_yD;
 2493|  6.79k|            UWORD8 *pu1_tmp_dst;
 2494|  6.79k|            UWORD8 u1_filled_samp;
 2495|       |
 2496|  6.79k|            i1_corner_samp_avlbl_flag = (u1_ny_avlblty & 4) >> 2;
 2497|       |
 2498|  6.79k|            i4_x0 = 9 + (i4_left << 3) + i4_left;
 2499|  6.79k|            i4_y0 = 2;
 2500|  6.79k|            i4_ref_yD = 1;
 2501|  6.79k|            if(i4_top > 0)
  ------------------
  |  Branch (2501:16): [True: 3.26k, False: 3.53k]
  ------------------
 2502|  3.26k|            {
 2503|  3.26k|                i4_y0 += 8;
 2504|  3.26k|                i4_ref_yD = 18;
 2505|  3.26k|            }
 2506|       |
 2507|  6.79k|            i4_ref_xD = i4_x0 - (i4_left) - (i4_left >> 1);
 2508|  6.79k|            i4_xD = i4_x0 - i4_ref_xD;
 2509|  6.79k|            i4_yD = i4_y0 - i4_ref_yD;
 2510|  6.79k|            i4_xc0 = i4_x0 >> 1;
 2511|  6.79k|            i4_yc0 = i4_y0 >> 1;
 2512|  6.79k|            i4_c_ref_xD = i4_ref_xD >> 1;
 2513|  6.79k|            i4_c_ref_yD = i4_ref_yD >> 1;
 2514|  6.79k|            i4_c_xD = i4_xc0 - i4_c_ref_xD;
 2515|  6.79k|            i4_c_yD = i4_yc0 - i4_c_ref_yD;
 2516|       |
 2517|       |            /* Fill corner sample if unavailable */
 2518|  6.79k|            if(!i1_corner_samp_avlbl_flag)
  ------------------
  |  Branch (2518:16): [True: 5.98k, False: 810]
  ------------------
 2519|  5.98k|            {
 2520|  5.98k|                isvcd_corner_samp_dyadic(i4_x0, i4_y0, i4_xD, i4_yD, pu1_refarray_luma,
 2521|  5.98k|                                         pu1_refarray_cb, pu1_refarray_cr);
 2522|  5.98k|            }
 2523|       |
 2524|       |            /* Call the diagonal construction for the 8 rows */
 2525|  6.79k|            if(i4_top == i4_left)
  ------------------
  |  Branch (2525:16): [True: 3.35k, False: 3.44k]
  ------------------
 2526|  3.35k|            {
 2527|       |                /* if top * left = 1 (x0,y0) */
 2528|  3.35k|                u1_filled_samp = isvcd_diagonal_construct_dyadic(i4_x0, i4_y0, i4_xD, i4_yD,
 2529|  3.35k|                                                                 pu1_refarray_luma, DYADIC_REF_W_Y);
  ------------------
  |  |   56|  3.35k|#define DYADIC_REF_W_Y 20
  ------------------
 2530|       |
 2531|  3.35k|                pu1_tmp_dst = pu1_refarray_luma + (i4_y0 * DYADIC_REF_W_Y);
  ------------------
  |  |   56|  3.35k|#define DYADIC_REF_W_Y 20
  ------------------
 2532|       |
 2533|       |                /* (x0,y0+1), ..., (x0,y0+7) and */
 2534|       |                /* (x0+1,y0), ..., (x0+1,y0+6)   */
 2535|  26.8k|                for(i4_y = i4_y0 + 1; i4_y < i4_y0 + 8; i4_y++)
  ------------------
  |  Branch (2535:39): [True: 23.4k, False: 3.35k]
  ------------------
 2536|  23.4k|                {
 2537|  23.4k|                    i4_yD++;
 2538|  23.4k|                    u1_filled_samp = isvcd_diagonal_construct_dyadic(
 2539|  23.4k|                        i4_x0, i4_y, i4_xD, i4_yD, pu1_refarray_luma, DYADIC_REF_W_Y);
  ------------------
  |  |   56|  23.4k|#define DYADIC_REF_W_Y 20
  ------------------
 2540|  23.4k|                    pu1_tmp_dst[i4_x0 + 1] = u1_filled_samp;
 2541|  23.4k|                    pu1_tmp_dst += DYADIC_REF_W_Y;
  ------------------
  |  |   56|  23.4k|#define DYADIC_REF_W_Y 20
  ------------------
 2542|  23.4k|                }
 2543|       |
 2544|       |                /* (x0+1,y0+7) */
 2545|  3.35k|                u1_filled_samp = isvcd_diagonal_construct_dyadic(
 2546|  3.35k|                    i4_x0 + 1, i4_y0 + 7, i4_xD + 1, i4_yD, pu1_refarray_luma, DYADIC_REF_W_Y);
  ------------------
  |  |   56|  3.35k|#define DYADIC_REF_W_Y 20
  ------------------
 2547|  3.35k|            }
 2548|  3.44k|            else
 2549|  3.44k|            {
 2550|       |                /* top * left = -1 (x0+1,y0) */
 2551|  3.44k|                u1_filled_samp = isvcd_diagonal_construct_dyadic(i4_x0 + 1, i4_y0, i4_xD + 1, i4_yD,
 2552|  3.44k|                                                                 pu1_refarray_luma, DYADIC_REF_W_Y);
  ------------------
  |  |   56|  3.44k|#define DYADIC_REF_W_Y 20
  ------------------
 2553|       |
 2554|  3.44k|                pu1_tmp_dst = pu1_refarray_luma + (i4_y0 * DYADIC_REF_W_Y);
  ------------------
  |  |   56|  3.44k|#define DYADIC_REF_W_Y 20
  ------------------
 2555|       |
 2556|       |                /* (x0,y0), ..., (x0,y0+6) and   */
 2557|       |                /* (x0+1,y0+1), ..., (x0+1,y0+7) */
 2558|  27.5k|                for(i4_y = i4_y0; i4_y < i4_y0 + 7; i4_y++)
  ------------------
  |  Branch (2558:35): [True: 24.0k, False: 3.44k]
  ------------------
 2559|  24.0k|                {
 2560|  24.0k|                    u1_filled_samp = isvcd_diagonal_construct_dyadic(
 2561|  24.0k|                        i4_x0, i4_y, i4_xD, i4_yD, pu1_refarray_luma, DYADIC_REF_W_Y);
  ------------------
  |  |   56|  24.0k|#define DYADIC_REF_W_Y 20
  ------------------
 2562|       |
 2563|  24.0k|                    pu1_tmp_dst += DYADIC_REF_W_Y;
  ------------------
  |  |   56|  24.0k|#define DYADIC_REF_W_Y 20
  ------------------
 2564|  24.0k|                    pu1_tmp_dst[i4_x0 + 1] = u1_filled_samp;
 2565|  24.0k|                    i4_yD++;
 2566|  24.0k|                }
 2567|       |
 2568|       |                /* (x0,y0+7) */
 2569|  3.44k|                u1_filled_samp = isvcd_diagonal_construct_dyadic(i4_x0, i4_y0 + 7, i4_xD, i4_yD,
 2570|  3.44k|                                                                 pu1_refarray_luma, DYADIC_REF_W_Y);
  ------------------
  |  |   56|  3.44k|#define DYADIC_REF_W_Y 20
  ------------------
 2571|  3.44k|            }
 2572|       |
 2573|       |            /* For Cb and Cr */
 2574|  33.9k|            for(i4_y = i4_yc0; i4_y < i4_yc0 + 4; i4_y++)
  ------------------
  |  Branch (2574:32): [True: 27.1k, False: 6.79k]
  ------------------
 2575|  27.1k|            {
 2576|  27.1k|                u1_filled_samp = isvcd_diagonal_construct_dyadic(i4_xc0, i4_y, i4_c_xD, i4_c_yD,
 2577|  27.1k|                                                                 pu1_refarray_cb, DYADIC_REF_W_C);
  ------------------
  |  |   58|  27.1k|#define DYADIC_REF_W_C 10
  ------------------
 2578|  27.1k|                u1_filled_samp = isvcd_diagonal_construct_dyadic(i4_xc0, i4_y, i4_c_xD, i4_c_yD,
 2579|  27.1k|                                                                 pu1_refarray_cr, DYADIC_REF_W_C);
  ------------------
  |  |   58|  27.1k|#define DYADIC_REF_W_C 10
  ------------------
 2580|  27.1k|                i4_c_yD++;
 2581|  27.1k|            }
 2582|       |
 2583|  6.79k|        } /* (mb_x+left,mb_y) is unavailable */
 2584|       |
 2585|  8.08k|        if(!(u1_ny_avlblty & 4))
  ------------------
  |  Branch (2585:12): [True: 6.29k, False: 1.78k]
  ------------------
 2586|  6.29k|        {
 2587|       |            /* (mb_x,mb_y+top) is unavailable */
 2588|  6.29k|            WORD32 i4_xD, i4_yD;
 2589|  6.29k|            WORD32 i4_c_xD, i4_c_yD;
 2590|  6.29k|            UWORD8 *pu1_tmp_dst;
 2591|  6.29k|            UWORD8 u1_filled_samp;
 2592|       |
 2593|  6.29k|            i1_corner_samp_avlbl_flag = (u1_ny_avlblty & 2) >> 1;
 2594|  6.29k|            i4_y0 = 9 + (i4_top << 3) + (i4_top);
 2595|  6.29k|            i4_x0 = 2;
 2596|  6.29k|            i4_ref_xD = 1;
 2597|  6.29k|            if(i4_left > 0)
  ------------------
  |  Branch (2597:16): [True: 3.34k, False: 2.95k]
  ------------------
 2598|  3.34k|            {
 2599|  3.34k|                i4_x0 += 8;
 2600|  3.34k|                i4_ref_xD = 18;
 2601|  3.34k|            }
 2602|       |
 2603|  6.29k|            i4_ref_yD = i4_y0 - i4_top - (i4_top >> 1);
 2604|  6.29k|            i4_xD = i4_x0 - i4_ref_xD;
 2605|  6.29k|            i4_yD = i4_y0 - i4_ref_yD;
 2606|  6.29k|            i4_xc0 = i4_x0 >> 1;
 2607|  6.29k|            i4_yc0 = i4_y0 >> 1;
 2608|  6.29k|            i4_c_ref_xD = i4_ref_xD >> 1;
 2609|  6.29k|            i4_c_ref_yD = i4_ref_yD >> 1;
 2610|  6.29k|            i4_c_xD = i4_xc0 - i4_c_ref_xD;
 2611|  6.29k|            i4_c_yD = i4_yc0 - i4_c_ref_yD;
 2612|       |
 2613|  6.29k|            if(!i1_corner_samp_avlbl_flag)
  ------------------
  |  Branch (2613:16): [True: 5.98k, False: 311]
  ------------------
 2614|  5.98k|            {
 2615|  5.98k|                isvcd_corner_samp_dyadic(i4_x0, i4_y0, i4_xD, i4_yD, pu1_refarray_luma,
 2616|  5.98k|                                         pu1_refarray_cb, pu1_refarray_cr);
 2617|  5.98k|            }
 2618|       |
 2619|       |            /* Call the diagonal construction for the 2 rows */
 2620|  6.29k|            if(i4_top == i4_left)
  ------------------
  |  Branch (2620:16): [True: 3.03k, False: 3.26k]
  ------------------
 2621|  3.03k|            {
 2622|       |                /* if top * left = 1 (x0,y0) */
 2623|  3.03k|                u1_filled_samp = isvcd_diagonal_construct_dyadic(i4_x0, i4_y0, i4_xD, i4_yD,
 2624|  3.03k|                                                                 pu1_refarray_luma, DYADIC_REF_W_Y);
  ------------------
  |  |   56|  3.03k|#define DYADIC_REF_W_Y 20
  ------------------
 2625|       |
 2626|  3.03k|                pu1_tmp_dst = pu1_refarray_luma + ((i4_y0 + 1) * DYADIC_REF_W_Y);
  ------------------
  |  |   56|  3.03k|#define DYADIC_REF_W_Y 20
  ------------------
 2627|       |
 2628|       |                /* (x0+1,y0), ..., (x0+7,y0) and */
 2629|       |                /* (x0,y0+1), ..., (x0+6,y0+1)   */
 2630|  24.2k|                for(i4_x = i4_x0 + 1; i4_x < i4_x0 + 8; i4_x++)
  ------------------
  |  Branch (2630:39): [True: 21.2k, False: 3.03k]
  ------------------
 2631|  21.2k|                {
 2632|  21.2k|                    i4_xD++;
 2633|  21.2k|                    u1_filled_samp = isvcd_diagonal_construct_dyadic(
 2634|  21.2k|                        i4_x, i4_y0, i4_xD, i4_yD, pu1_refarray_luma, DYADIC_REF_W_Y);
  ------------------
  |  |   56|  21.2k|#define DYADIC_REF_W_Y 20
  ------------------
 2635|  21.2k|                    pu1_tmp_dst[i4_x - 1] = u1_filled_samp;
 2636|  21.2k|                }
 2637|       |
 2638|       |                /* (x0+7,y0+1) */
 2639|  3.03k|                u1_filled_samp = isvcd_diagonal_construct_dyadic(
 2640|  3.03k|                    i4_x0 + 7, i4_y0 + 1, i4_xD, i4_yD + 1, pu1_refarray_luma, DYADIC_REF_W_Y);
  ------------------
  |  |   56|  3.03k|#define DYADIC_REF_W_Y 20
  ------------------
 2641|  3.03k|            }
 2642|  3.26k|            else
 2643|  3.26k|            {
 2644|       |                /* top * left = -1 */
 2645|       |                /* (x0,y0+1) */
 2646|  3.26k|                u1_filled_samp = isvcd_diagonal_construct_dyadic(i4_x0, i4_y0 + 1, i4_xD, i4_yD + 1,
 2647|  3.26k|                                                                 pu1_refarray_luma, DYADIC_REF_W_Y);
  ------------------
  |  |   56|  3.26k|#define DYADIC_REF_W_Y 20
  ------------------
 2648|       |
 2649|  3.26k|                pu1_tmp_dst = pu1_refarray_luma + ((i4_y0 + 1) * DYADIC_REF_W_Y);
  ------------------
  |  |   56|  3.26k|#define DYADIC_REF_W_Y 20
  ------------------
 2650|       |
 2651|       |                /* (x0,y0), ..., (x0,y0+6) and   */
 2652|       |                /* (x0+1,y0+1), ..., (x0+1,y0+7) */
 2653|  26.0k|                for(i4_x = i4_x0; i4_x < i4_x0 + 7; i4_x++)
  ------------------
  |  Branch (2653:35): [True: 22.8k, False: 3.26k]
  ------------------
 2654|  22.8k|                {
 2655|  22.8k|                    u1_filled_samp = isvcd_diagonal_construct_dyadic(
 2656|  22.8k|                        i4_x, i4_y0, i4_xD, i4_yD, pu1_refarray_luma, DYADIC_REF_W_Y);
  ------------------
  |  |   56|  22.8k|#define DYADIC_REF_W_Y 20
  ------------------
 2657|       |
 2658|  22.8k|                    pu1_tmp_dst[i4_x + 1] = u1_filled_samp;
 2659|  22.8k|                    i4_xD++;
 2660|  22.8k|                }
 2661|       |
 2662|       |                /* (x0+7,y0) */
 2663|  3.26k|                u1_filled_samp = isvcd_diagonal_construct_dyadic(i4_x0 + 7, i4_y0, i4_xD, i4_yD,
 2664|  3.26k|                                                                 pu1_refarray_luma, DYADIC_REF_W_Y);
  ------------------
  |  |   56|  3.26k|#define DYADIC_REF_W_Y 20
  ------------------
 2665|  3.26k|            }
 2666|       |
 2667|       |            /* For Cb and Cr */
 2668|  31.4k|            for(i4_x = i4_xc0; i4_x < i4_xc0 + 4; i4_x++)
  ------------------
  |  Branch (2668:32): [True: 25.1k, False: 6.29k]
  ------------------
 2669|  25.1k|            {
 2670|  25.1k|                u1_filled_samp = isvcd_diagonal_construct_dyadic(i4_x, i4_yc0, i4_c_xD, i4_c_yD,
 2671|  25.1k|                                                                 pu1_refarray_cb, DYADIC_REF_W_C);
  ------------------
  |  |   58|  25.1k|#define DYADIC_REF_W_C 10
  ------------------
 2672|  25.1k|                u1_filled_samp = isvcd_diagonal_construct_dyadic(i4_x, i4_yc0, i4_c_xD, i4_c_yD,
 2673|  25.1k|                                                                 pu1_refarray_cr, DYADIC_REF_W_C);
  ------------------
  |  |   58|  25.1k|#define DYADIC_REF_W_C 10
  ------------------
 2674|  25.1k|                i4_c_xD++;
 2675|  25.1k|            }
 2676|       |
 2677|  6.29k|        } /* (mb_x,mb_y+top) is unavailable */
 2678|  8.08k|    }     /* if (mb_x+left,mb_y+top) not available */
 2679|  44.3k|    else
 2680|  44.3k|    {
 2681|  44.3k|        UWORD8 *pu1_tmp_dst1, *pu1_tmp_dst2;
 2682|  44.3k|        UWORD8 *pu1_tmp_src1, *pu1_tmp_src2;
 2683|       |
 2684|  44.3k|        if(0x02 == (u1_ny_avlblty & 0x6))
  ------------------
  |  Branch (2684:12): [True: 2.10k, False: 42.2k]
  ------------------
 2685|  2.10k|        {
 2686|       |            /* (mb_x+left,mb_y) available, (mb_x,mb_y+top) unavailable */
 2687|  2.10k|            i4_x0 = 9 + (i4_left << 3) + i4_left;
 2688|  2.10k|            i4_y0 = 9 + (i4_top << 3) + i4_top;
 2689|  2.10k|            i4_ref_yD = i4_y0 - i4_top - (i4_top >> 1);
 2690|       |
 2691|       |            /* Copy (x0,ref_yD), (x0+1,ref_yD) to  */
 2692|       |            /* (x0,y0), (x0+1,y0), and (x0,y0+1), (x0+1,y0+1) */
 2693|  2.10k|            pu1_tmp_src1 = pu1_refarray_luma + (i4_ref_yD * DYADIC_REF_W_Y);
  ------------------
  |  |   56|  2.10k|#define DYADIC_REF_W_Y 20
  ------------------
 2694|  2.10k|            pu1_tmp_dst1 = pu1_refarray_luma + (i4_y0 * DYADIC_REF_W_Y);
  ------------------
  |  |   56|  2.10k|#define DYADIC_REF_W_Y 20
  ------------------
 2695|  2.10k|            pu1_tmp_dst2 = pu1_tmp_dst1 + DYADIC_REF_W_Y;
  ------------------
  |  |   56|  2.10k|#define DYADIC_REF_W_Y 20
  ------------------
 2696|  2.10k|            pu1_tmp_dst1[i4_x0] = pu1_tmp_src1[i4_x0];
 2697|  2.10k|            pu1_tmp_dst2[i4_x0] = pu1_tmp_src1[i4_x0];
 2698|  2.10k|            pu1_tmp_dst1[i4_x0 + 1] = pu1_tmp_src1[i4_x0 + 1];
 2699|  2.10k|            pu1_tmp_dst2[i4_x0 + 1] = pu1_tmp_src1[i4_x0 + 1];
 2700|       |
 2701|       |            /* Cb and Cr copy */
 2702|  2.10k|            i4_xc0 = i4_x0 >> 1;
 2703|  2.10k|            i4_yc0 = i4_y0 >> 1;
 2704|  2.10k|            i4_c_ref_yD = i4_ref_yD >> 1;
 2705|  2.10k|            pu1_tmp_src1 = pu1_refarray_cb + (i4_c_ref_yD * DYADIC_REF_W_C);
  ------------------
  |  |   58|  2.10k|#define DYADIC_REF_W_C 10
  ------------------
 2706|  2.10k|            pu1_tmp_dst1 = pu1_refarray_cb + (i4_yc0 * DYADIC_REF_W_C);
  ------------------
  |  |   58|  2.10k|#define DYADIC_REF_W_C 10
  ------------------
 2707|  2.10k|            pu1_tmp_src2 = pu1_refarray_cr + (i4_c_ref_yD * DYADIC_REF_W_C);
  ------------------
  |  |   58|  2.10k|#define DYADIC_REF_W_C 10
  ------------------
 2708|  2.10k|            pu1_tmp_dst2 = pu1_refarray_cr + (i4_yc0 * DYADIC_REF_W_C);
  ------------------
  |  |   58|  2.10k|#define DYADIC_REF_W_C 10
  ------------------
 2709|  2.10k|            pu1_tmp_dst1[i4_xc0] = pu1_tmp_src1[i4_xc0];
 2710|  2.10k|            pu1_tmp_dst2[i4_xc0] = pu1_tmp_src2[i4_xc0];
 2711|       |
 2712|  2.10k|        } /* if (mb_x+left,mb_y) available, (mb_x,mb_y+top) unavailable */
 2713|  42.2k|        else if(0x04 == (u1_ny_avlblty & 0x6))
  ------------------
  |  Branch (2713:17): [True: 5.74k, False: 36.5k]
  ------------------
 2714|  5.74k|        {
 2715|       |            /* (mb_x+left,mb_y) unavailable, (mb_x,mb_y+top) available */
 2716|  5.74k|            i4_x0 = 9 + (i4_left << 3) + i4_left;
 2717|  5.74k|            i4_y0 = 9 + (i4_top << 3) + i4_top;
 2718|  5.74k|            i4_ref_xD = i4_x0 - i4_left - (i4_left >> 1);
 2719|       |
 2720|       |            /* Copy (ref_xD,y0) to (x0,y0) and (x0+1,y0) */
 2721|       |            /* copy (ref_xD,y0+1) to (x0,y0+1) and (x0+1,y0+1) */
 2722|  5.74k|            pu1_tmp_src1 = pu1_refarray_luma + (i4_y0 * DYADIC_REF_W_Y);
  ------------------
  |  |   56|  5.74k|#define DYADIC_REF_W_Y 20
  ------------------
 2723|  5.74k|            pu1_tmp_dst1 = pu1_tmp_src1;
 2724|  5.74k|            pu1_tmp_src2 = pu1_tmp_src1 + DYADIC_REF_W_Y;
  ------------------
  |  |   56|  5.74k|#define DYADIC_REF_W_Y 20
  ------------------
 2725|  5.74k|            pu1_tmp_dst2 = pu1_tmp_src2;
 2726|       |
 2727|  5.74k|            pu1_tmp_dst1[i4_x0] = pu1_tmp_src1[i4_ref_xD];
 2728|  5.74k|            pu1_tmp_dst1[i4_x0 + 1] = pu1_tmp_src1[i4_ref_xD];
 2729|  5.74k|            pu1_tmp_dst2[i4_x0] = pu1_tmp_src2[i4_ref_xD];
 2730|  5.74k|            pu1_tmp_dst2[i4_x0 + 1] = pu1_tmp_src2[i4_ref_xD];
 2731|       |
 2732|       |            /* Copy Cb and Cr */
 2733|  5.74k|            i4_xc0 = i4_x0 >> 1;
 2734|  5.74k|            i4_yc0 = i4_y0 >> 1;
 2735|  5.74k|            i4_c_ref_xD = i4_ref_xD >> 1;
 2736|       |
 2737|  5.74k|            pu1_tmp_src1 = pu1_refarray_cb + (i4_yc0 * DYADIC_REF_W_C);
  ------------------
  |  |   58|  5.74k|#define DYADIC_REF_W_C 10
  ------------------
 2738|  5.74k|            pu1_tmp_dst1 = pu1_tmp_src1;
 2739|  5.74k|            pu1_tmp_src2 = pu1_refarray_cr + (i4_yc0 * DYADIC_REF_W_C);
  ------------------
  |  |   58|  5.74k|#define DYADIC_REF_W_C 10
  ------------------
 2740|  5.74k|            pu1_tmp_dst2 = pu1_tmp_src2;
 2741|       |
 2742|  5.74k|            pu1_tmp_dst1[i4_xc0] = pu1_tmp_src1[i4_c_ref_xD];
 2743|  5.74k|            pu1_tmp_dst2[i4_xc0] = pu1_tmp_src2[i4_c_ref_xD];
 2744|       |
 2745|  5.74k|        } /* if (mb_x+left,mb_y) unavailable, (mb_x,mb_y+top) available */
 2746|  36.5k|        else if(0x6 == (u1_ny_avlblty & 0x6))
  ------------------
  |  Branch (2746:17): [True: 2.96k, False: 33.5k]
  ------------------
 2747|  2.96k|        {
 2748|       |            /* (mb_x+left,mb_y) available, (mb_x,mb_y+top) available */
 2749|  2.96k|            WORD32 i4_xD, i4_yD;
 2750|  2.96k|            WORD32 i4_c_xD, i4_c_yD;
 2751|       |
 2752|  2.96k|            i4_y0 = 9 + (i4_top << 3) + i4_top;
 2753|  2.96k|            i4_x0 = 9 + (i4_left << 3) + i4_left;
 2754|  2.96k|            i4_ref_xD = i4_x0 - i4_left - (i4_left >> 1);
 2755|  2.96k|            i4_ref_yD = i4_y0 - i4_top - (i4_top >> 1);
 2756|  2.96k|            i4_xD = i4_x0 - i4_ref_xD;
 2757|  2.96k|            i4_yD = i4_y0 - i4_ref_yD;
 2758|  2.96k|            i4_xc0 = i4_x0 >> 1;
 2759|  2.96k|            i4_yc0 = i4_y0 >> 1;
 2760|  2.96k|            i4_c_ref_xD = i4_ref_xD >> 1;
 2761|  2.96k|            i4_c_ref_yD = i4_ref_yD >> 1;
 2762|  2.96k|            i4_c_xD = i4_xc0 - i4_c_ref_xD;
 2763|  2.96k|            i4_c_yD = i4_yc0 - i4_c_ref_yD;
 2764|       |
 2765|       |            /* Call diagonal construction for luma */
 2766|  8.89k|            for(i4_y = i4_y0; i4_y < i4_y0 + 2; i4_y++)
  ------------------
  |  Branch (2766:31): [True: 5.93k, False: 2.96k]
  ------------------
 2767|  5.93k|            {
 2768|  17.7k|                for(i4_x = i4_x0; i4_x < i4_x0 + 2; i4_x++)
  ------------------
  |  Branch (2768:35): [True: 11.8k, False: 5.93k]
  ------------------
 2769|  11.8k|                {
 2770|  11.8k|                    isvcd_diagonal_construct_dyadic(i4_x, i4_y, i4_xD, i4_yD, pu1_refarray_luma,
 2771|  11.8k|                                                    DYADIC_REF_W_Y);
  ------------------
  |  |   56|  11.8k|#define DYADIC_REF_W_Y 20
  ------------------
 2772|  11.8k|                    i4_xD++;
 2773|  11.8k|                }
 2774|  5.93k|                i4_yD++;
 2775|  5.93k|                i4_xD -= 2;
 2776|  5.93k|            }
 2777|       |
 2778|       |            /* Call diagonal construction for chroma */
 2779|  2.96k|            isvcd_diagonal_construct_dyadic(i4_xc0, i4_yc0, i4_c_xD, i4_c_yD, pu1_refarray_cb,
 2780|  2.96k|                                            DYADIC_REF_W_C);
  ------------------
  |  |   58|  2.96k|#define DYADIC_REF_W_C 10
  ------------------
 2781|       |
 2782|  2.96k|            isvcd_diagonal_construct_dyadic(i4_xc0, i4_yc0, i4_c_xD, i4_c_yD, pu1_refarray_cr,
 2783|  2.96k|                                            DYADIC_REF_W_C);
  ------------------
  |  |   58|  2.96k|#define DYADIC_REF_W_C 10
  ------------------
 2784|       |
 2785|  2.96k|        } /* if (mb_x+left,mb_y) available, (mb_x,mb_y+top) available */
 2786|  44.3k|    }     /* (mb_x+left,mb_y+top) available */
 2787|       |
 2788|  52.4k|    return OK;
  ------------------
  |  |  114|  52.4k|#define OK        0
  ------------------
 2789|  52.9k|}
isvcd_interpolate_base_luma_dyadic:
 2817|  27.7k|{
 2818|  27.7k|    WORD32 i4_x, i4_y;
 2819|  27.7k|    WORD32 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3;
 2820|  27.7k|    WORD32 i4_samp_0, i4_samp_1, i4_samp_2, i4_samp_3;
 2821|  27.7k|    WORD32 i4_rslt_1, i4_rslt_2;
 2822|  27.7k|    WORD32 i4_filt_stride, i4_src_stride;
 2823|  27.7k|    UWORD8 *pu1_inp, *pu1_out;
 2824|  27.7k|    WORD16 *pi2_tmp;
 2825|       |
 2826|       |    /* Filter coefficient values for phase 4 */
 2827|  27.7k|    i4_coeff_0 = -3;
 2828|  27.7k|    i4_coeff_1 = 28;
 2829|  27.7k|    i4_coeff_2 = 8;
 2830|  27.7k|    i4_coeff_3 = -1;
 2831|  27.7k|    i4_filt_stride = 12;
 2832|  27.7k|    i4_src_stride = DYADIC_REF_W_Y;
  ------------------
  |  |   56|  27.7k|#define DYADIC_REF_W_Y 20
  ------------------
 2833|  27.7k|    pu1_inp = pu1_inp_buf;
 2834|  27.7k|    pi2_tmp = pi2_tmp_filt_buf;
 2835|  27.7k|    pu1_out = pu1_out_buf;
 2836|       |
 2837|       |    /* Vertical interpolation */
 2838|   360k|    for(i4_x = 0; i4_x < 12; i4_x++)
  ------------------
  |  Branch (2838:19): [True: 332k, False: 27.7k]
  ------------------
 2839|   332k|    {
 2840|       |        /* y = 0, y_phase = 12 */
 2841|   332k|        i4_samp_0 = pu1_inp[i4_x];
 2842|   332k|        pu1_inp += i4_src_stride;
 2843|   332k|        i4_samp_1 = pu1_inp[i4_x];
 2844|   332k|        pu1_inp += i4_src_stride;
 2845|   332k|        i4_samp_2 = pu1_inp[i4_x];
 2846|   332k|        pu1_inp += i4_src_stride;
 2847|   332k|        i4_samp_3 = pu1_inp[i4_x];
 2848|   332k|        pu1_inp += i4_src_stride;
 2849|       |
 2850|       |        /* since y_phase 12 for y = 0 */
 2851|   332k|        i4_rslt_1 = i4_samp_0 * i4_coeff_3;
 2852|   332k|        i4_rslt_1 += i4_samp_1 * i4_coeff_2;
 2853|   332k|        i4_rslt_1 += i4_samp_2 * i4_coeff_1;
 2854|   332k|        i4_rslt_1 += i4_samp_3 * i4_coeff_0;
 2855|       |
 2856|       |        /* Store the output */
 2857|   332k|        pi2_tmp[i4_x] = i4_rslt_1;
 2858|       |        /* Increment the output ptr */
 2859|   332k|        pi2_tmp += i4_filt_stride;
 2860|       |
 2861|  2.66M|        for(i4_y = 1; i4_y < 15; i4_y += 2)
  ------------------
  |  Branch (2861:23): [True: 2.32M, False: 332k]
  ------------------
 2862|  2.32M|        {
 2863|  2.32M|            i4_samp_0 = i4_samp_1;
 2864|  2.32M|            i4_samp_1 = i4_samp_2;
 2865|  2.32M|            i4_samp_2 = i4_samp_3;
 2866|  2.32M|            i4_samp_3 = pu1_inp[i4_x];
 2867|       |
 2868|       |            /* y_phase is 4 for odd values of y */
 2869|       |            /* and 12 for even values of y    */
 2870|  2.32M|            i4_rslt_1 = i4_samp_0 * i4_coeff_0;
 2871|  2.32M|            i4_rslt_1 += i4_samp_1 * i4_coeff_1;
 2872|  2.32M|            i4_rslt_1 += i4_samp_2 * i4_coeff_2;
 2873|  2.32M|            i4_rslt_1 += i4_samp_3 * i4_coeff_3;
 2874|  2.32M|            i4_rslt_2 = i4_samp_0 * i4_coeff_3;
 2875|  2.32M|            i4_rslt_2 += i4_samp_1 * i4_coeff_2;
 2876|  2.32M|            i4_rslt_2 += i4_samp_2 * i4_coeff_1;
 2877|  2.32M|            i4_rslt_2 += i4_samp_3 * i4_coeff_0;
 2878|       |
 2879|       |            /* Storing the results */
 2880|  2.32M|            pi2_tmp[i4_x] = i4_rslt_1;
 2881|  2.32M|            pi2_tmp += i4_filt_stride;
 2882|  2.32M|            pi2_tmp[i4_x] = i4_rslt_2;
 2883|       |
 2884|       |            /* Incrementing the pointers */
 2885|  2.32M|            pi2_tmp += i4_filt_stride;
 2886|  2.32M|            pu1_inp += i4_src_stride;
 2887|       |
 2888|  2.32M|        } /* End of loop over y */
 2889|       |
 2890|       |        /* y = 15, y_phase = 4 */
 2891|   332k|        i4_samp_0 = i4_samp_1;
 2892|   332k|        i4_samp_1 = i4_samp_2;
 2893|   332k|        i4_samp_2 = i4_samp_3;
 2894|   332k|        i4_samp_3 = pu1_inp[i4_x];
 2895|       |
 2896|   332k|        i4_rslt_1 = i4_samp_0 * i4_coeff_0;
 2897|   332k|        i4_rslt_1 += i4_samp_1 * i4_coeff_1;
 2898|   332k|        i4_rslt_1 += i4_samp_2 * i4_coeff_2;
 2899|   332k|        i4_rslt_1 += i4_samp_3 * i4_coeff_3;
 2900|       |
 2901|       |        /* Store the output */
 2902|   332k|        pi2_tmp[i4_x] = i4_rslt_1;
 2903|       |
 2904|       |        /* Reinitializing the ptrs */
 2905|   332k|        pu1_inp = pu1_inp_buf;
 2906|   332k|        pi2_tmp = pi2_tmp_filt_buf;
 2907|   332k|    }
 2908|       |
 2909|       |    /* Horizontal interpolation */
 2910|   471k|    for(i4_y = 0; i4_y < 16; i4_y++)
  ------------------
  |  Branch (2910:19): [True: 443k, False: 27.7k]
  ------------------
 2911|   443k|    {
 2912|       |        /* x = 0, x_phase = 12 */
 2913|   443k|        i4_samp_0 = *pi2_tmp++;
 2914|   443k|        i4_samp_1 = *pi2_tmp++;
 2915|   443k|        i4_samp_2 = *pi2_tmp++;
 2916|   443k|        i4_samp_3 = *pi2_tmp++;
 2917|       |
 2918|       |        /* since x_phase 12 for x = 0 */
 2919|   443k|        i4_rslt_1 = i4_samp_0 * i4_coeff_3;
 2920|   443k|        i4_rslt_1 += i4_samp_1 * i4_coeff_2;
 2921|   443k|        i4_rslt_1 += i4_samp_2 * i4_coeff_1;
 2922|   443k|        i4_rslt_1 += i4_samp_3 * i4_coeff_0;
 2923|   443k|        i4_rslt_1 += 512;
 2924|       |
 2925|   443k|        i4_rslt_1 >>= 10;
 2926|       |
 2927|       |        /* Store the output */
 2928|   443k|        pu1_out[0] = CLIPUCHAR(i4_rslt_1);
  ------------------
  |  |   69|   443k|#define CLIPUCHAR(x) CLIP3(0, 255, (x))
  |  |  ------------------
  |  |  |  |   77|   443k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 4.33k, False: 439k]
  |  |  |  |  |  Branch (77:54): [True: 300, False: 439k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 2929|       |
 2930|  3.54M|        for(i4_x = 1; i4_x < 15; i4_x += 2)
  ------------------
  |  Branch (2930:23): [True: 3.10M, False: 443k]
  ------------------
 2931|  3.10M|        {
 2932|  3.10M|            i4_samp_0 = i4_samp_1;
 2933|  3.10M|            i4_samp_1 = i4_samp_2;
 2934|  3.10M|            i4_samp_2 = i4_samp_3;
 2935|  3.10M|            i4_samp_3 = *pi2_tmp++;
 2936|       |
 2937|       |            /* x_phase is 4 for odd values of x */
 2938|       |            /* and 12 for even values of x    */
 2939|  3.10M|            i4_rslt_1 = i4_samp_0 * i4_coeff_0;
 2940|  3.10M|            i4_rslt_1 += i4_samp_1 * i4_coeff_1;
 2941|  3.10M|            i4_rslt_1 += i4_samp_2 * i4_coeff_2;
 2942|  3.10M|            i4_rslt_1 += i4_samp_3 * i4_coeff_3;
 2943|  3.10M|            i4_rslt_1 += 512;
 2944|       |
 2945|  3.10M|            i4_rslt_2 = i4_samp_0 * i4_coeff_3;
 2946|  3.10M|            i4_rslt_2 += i4_samp_1 * i4_coeff_2;
 2947|  3.10M|            i4_rslt_2 += i4_samp_2 * i4_coeff_1;
 2948|  3.10M|            i4_rslt_2 += i4_samp_3 * i4_coeff_0;
 2949|  3.10M|            i4_rslt_2 += 512;
 2950|       |
 2951|  3.10M|            i4_rslt_1 >>= 10;
 2952|  3.10M|            i4_rslt_2 >>= 10;
 2953|       |
 2954|       |            /* Store the output */
 2955|  3.10M|            pu1_out[i4_x] = CLIPUCHAR(i4_rslt_1);
  ------------------
  |  |   69|  3.10M|#define CLIPUCHAR(x) CLIP3(0, 255, (x))
  |  |  ------------------
  |  |  |  |   77|  3.10M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 40.3k, False: 3.06M]
  |  |  |  |  |  Branch (77:54): [True: 849, False: 3.06M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 2956|  3.10M|            pu1_out[i4_x + 1] = CLIPUCHAR(i4_rslt_2);
  ------------------
  |  |   69|  3.10M|#define CLIPUCHAR(x) CLIP3(0, 255, (x))
  |  |  ------------------
  |  |  |  |   77|  3.10M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 41.6k, False: 3.06M]
  |  |  |  |  |  Branch (77:54): [True: 989, False: 3.06M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 2957|  3.10M|        }
 2958|       |
 2959|       |        /* x = 15 */
 2960|   443k|        i4_samp_0 = i4_samp_1;
 2961|   443k|        i4_samp_1 = i4_samp_2;
 2962|   443k|        i4_samp_2 = i4_samp_3;
 2963|   443k|        i4_samp_3 = *pi2_tmp++;
 2964|       |
 2965|   443k|        i4_rslt_1 = i4_samp_0 * i4_coeff_0;
 2966|   443k|        i4_rslt_1 += i4_samp_1 * i4_coeff_1;
 2967|   443k|        i4_rslt_1 += i4_samp_2 * i4_coeff_2;
 2968|   443k|        i4_rslt_1 += i4_samp_3 * i4_coeff_3;
 2969|   443k|        i4_rslt_1 += 512;
 2970|       |
 2971|   443k|        i4_rslt_1 >>= 10;
 2972|       |
 2973|       |        /* Store the output */
 2974|   443k|        pu1_out[i4_x] = CLIPUCHAR(i4_rslt_1);
  ------------------
  |  |   69|   443k|#define CLIPUCHAR(x) CLIP3(0, 255, (x))
  |  |  ------------------
  |  |  |  |   77|   443k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 5.09k, False: 438k]
  |  |  |  |  |  Branch (77:54): [True: 426, False: 438k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 2975|       |
 2976|       |        /* Increment the output ptr */
 2977|   443k|        pu1_out += i4_out_stride;
 2978|       |
 2979|   443k|    } /* End of loop over y */
 2980|  27.7k|} /* isvcd_interpolate_base_luma_dyadic */
isvcd_vert_interpol_chroma_dyadic_1:
 3017|  52.0k|{
 3018|  52.0k|    WORD32 i4_x, i4_y;
 3019|  52.0k|    WORD32 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3;
 3020|  52.0k|    WORD32 i4_samp_0, i4_samp_1;
 3021|  52.0k|    WORD32 i4_rslt_1, i4_rslt_2;
 3022|  52.0k|    WORD32 i4_filt_stride, i4_src_stride;
 3023|  52.0k|    UWORD8 *pu1_inp;
 3024|  52.0k|    WORD16 *pi2_tmp;
 3025|       |
 3026|  52.0k|    i4_coeff_0 = 8 - i4_phase_0;
 3027|  52.0k|    i4_coeff_1 = i4_phase_0;
 3028|  52.0k|    i4_coeff_2 = 8 - i4_phase_1;
 3029|  52.0k|    i4_coeff_3 = i4_phase_1;
 3030|       |
 3031|  52.0k|    pu1_inp = pu1_inp_buf;
 3032|  52.0k|    pi2_tmp = pi2_tmp_filt_buf;
 3033|  52.0k|    i4_filt_stride = 6;
 3034|  52.0k|    i4_src_stride = DYADIC_REF_W_C;
  ------------------
  |  |   58|  52.0k|#define DYADIC_REF_W_C 10
  ------------------
 3035|       |
 3036|       |    /* Vertical interpolation */
 3037|   364k|    for(i4_x = 0; i4_x < 6; i4_x++)
  ------------------
  |  Branch (3037:19): [True: 312k, False: 52.0k]
  ------------------
 3038|   312k|    {
 3039|       |        /* y = 0, y_phase = phase_0 */
 3040|   312k|        i4_samp_0 = pu1_inp[i4_x];
 3041|   312k|        pu1_inp += i4_src_stride;
 3042|   312k|        i4_samp_1 = pu1_inp[i4_x];
 3043|   312k|        pu1_inp += i4_src_stride;
 3044|       |
 3045|       |        /* since y_phase = phase_0 for y = 0 */
 3046|   312k|        i4_rslt_1 = i4_samp_0 * i4_coeff_0;
 3047|   312k|        i4_rslt_1 += i4_samp_1 * i4_coeff_1;
 3048|       |
 3049|       |        /* Store the output */
 3050|   312k|        pi2_tmp[i4_x] = i4_rslt_1;
 3051|       |
 3052|       |        /* Increment the output ptr */
 3053|   312k|        pi2_tmp += i4_filt_stride;
 3054|       |
 3055|  1.24M|        for(i4_y = 1; i4_y < 7; i4_y += 2)
  ------------------
  |  Branch (3055:23): [True: 936k, False: 312k]
  ------------------
 3056|   936k|        {
 3057|   936k|            i4_samp_0 = i4_samp_1;
 3058|   936k|            i4_samp_1 = pu1_inp[i4_x];
 3059|       |
 3060|       |            /* y_phase is phase_1 for odd values of y */
 3061|       |            /* and phase_0 for even values of y          */
 3062|   936k|            i4_rslt_1 = i4_samp_0 * i4_coeff_2;
 3063|   936k|            i4_rslt_1 += i4_samp_1 * i4_coeff_3;
 3064|   936k|            i4_rslt_2 = i4_samp_0 * i4_coeff_0;
 3065|   936k|            i4_rslt_2 += i4_samp_1 * i4_coeff_1;
 3066|       |
 3067|       |            /* Storing the results */
 3068|   936k|            pi2_tmp[i4_x] = i4_rslt_1;
 3069|   936k|            pi2_tmp += i4_filt_stride;
 3070|   936k|            pi2_tmp[i4_x] = i4_rslt_2;
 3071|       |
 3072|       |            /* Incrementing the pointers */
 3073|   936k|            pi2_tmp += i4_filt_stride;
 3074|   936k|            pu1_inp += i4_src_stride;
 3075|       |
 3076|   936k|        } /* End of loop over y */
 3077|       |
 3078|       |        /* y = 7, y_phase = phase_1 */
 3079|   312k|        i4_samp_0 = i4_samp_1;
 3080|   312k|        i4_samp_1 = pu1_inp[i4_x];
 3081|       |
 3082|   312k|        i4_rslt_1 = i4_samp_0 * i4_coeff_2;
 3083|   312k|        i4_rslt_1 += i4_samp_1 * i4_coeff_3;
 3084|       |
 3085|       |        /* Store the output */
 3086|   312k|        pi2_tmp[i4_x] = i4_rslt_1;
 3087|       |
 3088|       |        /* Reinitializing the ptrs */
 3089|   312k|        pu1_inp = pu1_inp_buf;
 3090|   312k|        pi2_tmp = pi2_tmp_filt_buf;
 3091|       |
 3092|   312k|    } /* End of loop over x */
 3093|  52.0k|} /* isvcd_vert_interpol_chroma_dyadic_1 */
isvcd_vert_interpol_chroma_dyadic_2:
 3126|  2.61k|{
 3127|  2.61k|    WORD32 i4_x, i4_y;
 3128|  2.61k|    WORD32 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3;
 3129|  2.61k|    WORD32 i4_samp_0, i4_samp_1;
 3130|  2.61k|    WORD32 i4_rslt_1, i4_rslt_2;
 3131|  2.61k|    WORD32 i4_filt_stride, i4_src_stride;
 3132|  2.61k|    UWORD8 *pu1_inp;
 3133|  2.61k|    WORD16 *pi2_tmp;
 3134|       |
 3135|  2.61k|    i4_coeff_0 = 8 - i4_phase_0;
 3136|  2.61k|    i4_coeff_1 = i4_phase_0;
 3137|  2.61k|    i4_coeff_2 = 8 - i4_phase_1;
 3138|  2.61k|    i4_coeff_3 = i4_phase_1;
 3139|       |
 3140|  2.61k|    pi2_tmp = pi2_tmp_filt_buf;
 3141|  2.61k|    i4_filt_stride = 6;
 3142|  2.61k|    i4_src_stride = DYADIC_REF_W_C;
  ------------------
  |  |   58|  2.61k|#define DYADIC_REF_W_C 10
  ------------------
 3143|  2.61k|    pu1_inp = pu1_inp_buf + i4_src_stride;
 3144|       |
 3145|       |    /* Vertical interpolation */
 3146|  18.2k|    for(i4_x = 0; i4_x < 6; i4_x++)
  ------------------
  |  Branch (3146:19): [True: 15.6k, False: 2.61k]
  ------------------
 3147|  15.6k|    {
 3148|  15.6k|        i4_samp_1 = pu1_inp[i4_x];
 3149|  15.6k|        pu1_inp += i4_src_stride;
 3150|       |
 3151|  78.3k|        for(i4_y = 0; i4_y < 8; i4_y += 2)
  ------------------
  |  Branch (3151:23): [True: 62.6k, False: 15.6k]
  ------------------
 3152|  62.6k|        {
 3153|  62.6k|            i4_samp_0 = i4_samp_1;
 3154|  62.6k|            i4_samp_1 = pu1_inp[i4_x];
 3155|       |
 3156|       |            /* y_phase is phase_1 for odd values of y and phase_0 for even values of y */
 3157|  62.6k|            i4_rslt_1 = i4_samp_0 * i4_coeff_0;
 3158|  62.6k|            i4_rslt_1 += i4_samp_1 * i4_coeff_1;
 3159|       |
 3160|  62.6k|            i4_rslt_2 = i4_samp_0 * i4_coeff_2;
 3161|  62.6k|            i4_rslt_2 += i4_samp_1 * i4_coeff_3;
 3162|       |
 3163|       |            /* Storing the results */
 3164|  62.6k|            pi2_tmp[i4_x] = i4_rslt_1;
 3165|  62.6k|            pi2_tmp += i4_filt_stride;
 3166|  62.6k|            pi2_tmp[i4_x] = i4_rslt_2;
 3167|       |
 3168|       |            /* Incrementing the pointers */
 3169|  62.6k|            pi2_tmp += i4_filt_stride;
 3170|  62.6k|            pu1_inp += i4_src_stride;
 3171|       |
 3172|  62.6k|        } /* End of loop over y */
 3173|       |
 3174|       |        /* Reinitializing the ptrs */
 3175|  15.6k|        pu1_inp = pu1_inp_buf + i4_src_stride;
 3176|  15.6k|        pi2_tmp = pi2_tmp_filt_buf;
 3177|       |
 3178|  15.6k|    } /* End of loop over x */
 3179|  2.61k|} /* isvcd_vert_interpol_chroma_dyadic_2 */
isvcd_vert_interpol_chroma_dyadic_3:
 3211|    794|{
 3212|    794|    WORD32 i4_x, i4_y;
 3213|    794|    WORD32 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3;
 3214|    794|    WORD32 i4_samp_0, i4_samp_1;
 3215|    794|    WORD32 i4_rslt_1, i4_rslt_2;
 3216|    794|    WORD32 i4_filt_stride, i4_src_stride;
 3217|    794|    UWORD8 *pu1_inp;
 3218|    794|    WORD16 *pi2_tmp;
 3219|       |
 3220|    794|    i4_coeff_0 = 8 - i4_phase_0;
 3221|    794|    i4_coeff_1 = i4_phase_0;
 3222|    794|    i4_coeff_2 = 8 - i4_phase_1;
 3223|    794|    i4_coeff_3 = i4_phase_1;
 3224|       |
 3225|    794|    pi2_tmp = pi2_tmp_filt_buf;
 3226|    794|    i4_filt_stride = 6;
 3227|    794|    i4_src_stride = DYADIC_REF_W_C;
  ------------------
  |  |   58|    794|#define DYADIC_REF_W_C 10
  ------------------
 3228|    794|    pu1_inp = pu1_inp_buf;
 3229|       |
 3230|       |    /* Vertical interpolation */
 3231|  5.55k|    for(i4_x = 0; i4_x < 6; i4_x++)
  ------------------
  |  Branch (3231:19): [True: 4.76k, False: 794]
  ------------------
 3232|  4.76k|    {
 3233|  4.76k|        i4_samp_1 = pu1_inp[i4_x];
 3234|  4.76k|        pu1_inp += i4_src_stride;
 3235|       |
 3236|  23.8k|        for(i4_y = 0; i4_y < 8; i4_y += 2)
  ------------------
  |  Branch (3236:23): [True: 19.0k, False: 4.76k]
  ------------------
 3237|  19.0k|        {
 3238|  19.0k|            i4_samp_0 = i4_samp_1;
 3239|  19.0k|            i4_samp_1 = pu1_inp[i4_x];
 3240|       |
 3241|       |            /* y_phase is phase_1 for odd values of y */
 3242|       |            /* and phase_0 for even values of y          */
 3243|  19.0k|            i4_rslt_1 = i4_samp_0 * i4_coeff_0;
 3244|  19.0k|            i4_rslt_1 += i4_samp_1 * i4_coeff_1;
 3245|       |
 3246|  19.0k|            i4_rslt_2 = i4_samp_0 * i4_coeff_2;
 3247|  19.0k|            i4_rslt_2 += i4_samp_1 * i4_coeff_3;
 3248|       |
 3249|       |            /* Storing the results */
 3250|  19.0k|            pi2_tmp[i4_x] = i4_rslt_1;
 3251|  19.0k|            pi2_tmp += i4_filt_stride;
 3252|  19.0k|            pi2_tmp[i4_x] = i4_rslt_2;
 3253|       |
 3254|       |            /* Incrementing the pointers */
 3255|  19.0k|            pi2_tmp += i4_filt_stride;
 3256|  19.0k|            pu1_inp += i4_src_stride;
 3257|       |
 3258|  19.0k|        } /* End of loop over y */
 3259|       |
 3260|       |        /* Reinitializing the ptrs */
 3261|  4.76k|        pu1_inp = pu1_inp_buf;
 3262|  4.76k|        pi2_tmp = pi2_tmp_filt_buf;
 3263|       |
 3264|  4.76k|    } /* End of loop over x */
 3265|    794|} /* isvcd_vert_interpol_chroma_dyadic_3 */
isvcd_horz_interpol_chroma_dyadic_1:
 3300|  36.4k|{
 3301|  36.4k|    WORD32 i4_x, i4_y;
 3302|  36.4k|    WORD32 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3;
 3303|  36.4k|    WORD32 i4_samp_0, i4_samp_1;
 3304|  36.4k|    WORD32 i4_rslt_1, i4_rslt_2;
 3305|  36.4k|    WORD32 i4_dst_stride;
 3306|  36.4k|    UWORD8 *pu1_out;
 3307|  36.4k|    WORD16 *pi2_tmp;
 3308|       |
 3309|  36.4k|    i4_coeff_0 = 8 - i4_phase_0;
 3310|  36.4k|    i4_coeff_1 = i4_phase_0;
 3311|  36.4k|    i4_coeff_2 = 8 - i4_phase_1;
 3312|  36.4k|    i4_coeff_3 = i4_phase_1;
 3313|       |
 3314|  36.4k|    pu1_out = pu1_out_buf;
 3315|  36.4k|    pi2_tmp = pi2_tmp_filt_buf;
 3316|  36.4k|    i4_dst_stride = i4_out_stride;
 3317|       |
 3318|       |    /* Horizontal interpolation */
 3319|   327k|    for(i4_y = 0; i4_y < 8; i4_y++)
  ------------------
  |  Branch (3319:19): [True: 291k, False: 36.4k]
  ------------------
 3320|   291k|    {
 3321|       |        /* x = 0, x_phase = phase_0 */
 3322|   291k|        i4_samp_0 = *pi2_tmp++;
 3323|   291k|        i4_samp_1 = *pi2_tmp++;
 3324|       |
 3325|       |        /* since x_phase = phase_0 for x = 0 */
 3326|   291k|        i4_rslt_1 = i4_samp_0 * i4_coeff_0;
 3327|   291k|        i4_rslt_1 += i4_samp_1 * i4_coeff_1;
 3328|       |
 3329|       |        /* Round to 8-bit value */
 3330|   291k|        i4_rslt_1 += 32;
 3331|   291k|        i4_rslt_1 >>= 6;
 3332|       |
 3333|       |        /* Store the output */
 3334|   291k|        pu1_out[0] = i4_rslt_1;
 3335|       |
 3336|  1.16M|        for(i4_x = 1; i4_x < 7; i4_x += 2)
  ------------------
  |  Branch (3336:23): [True: 874k, False: 291k]
  ------------------
 3337|   874k|        {
 3338|   874k|            i4_samp_0 = i4_samp_1;
 3339|   874k|            i4_samp_1 = *pi2_tmp++;
 3340|       |
 3341|       |            /* x_phase is phase_1 for odd values of x and phase_0 for even values of x */
 3342|   874k|            i4_rslt_1 = i4_samp_0 * i4_coeff_2;
 3343|   874k|            i4_rslt_1 += i4_samp_1 * i4_coeff_3;
 3344|   874k|            i4_rslt_2 = i4_samp_0 * i4_coeff_0;
 3345|   874k|            i4_rslt_2 += i4_samp_1 * i4_coeff_1;
 3346|       |
 3347|       |            /* Rounding to 8-bit values */
 3348|   874k|            i4_rslt_1 += 32;
 3349|   874k|            i4_rslt_1 >>= 6;
 3350|   874k|            i4_rslt_2 += 32;
 3351|   874k|            i4_rslt_2 >>= 6;
 3352|       |
 3353|       |            /* Storing the results */
 3354|   874k|            pu1_out[2 * i4_x] = i4_rslt_1;
 3355|   874k|            pu1_out[2 * (i4_x + 1)] = i4_rslt_2;
 3356|       |
 3357|   874k|        } /* End of loop over y */
 3358|       |
 3359|       |        /* y = 7, y_phase = phase_1 */
 3360|   291k|        i4_samp_0 = i4_samp_1;
 3361|   291k|        i4_samp_1 = *pi2_tmp++;
 3362|       |
 3363|       |        /* since x_phase = phase_1 for x = 7 */
 3364|   291k|        i4_rslt_1 = i4_samp_0 * i4_coeff_2;
 3365|   291k|        i4_rslt_1 += i4_samp_1 * i4_coeff_3;
 3366|       |
 3367|       |        /* Round to 8-bit value */
 3368|   291k|        i4_rslt_1 += 32;
 3369|   291k|        i4_rslt_1 >>= 6;
 3370|       |
 3371|       |        /* Store the output */
 3372|   291k|        pu1_out[2 * 7] = i4_rslt_1;
 3373|       |
 3374|       |        /* Incrementing the output ptr */
 3375|   291k|        pu1_out += i4_dst_stride;
 3376|       |
 3377|   291k|    } /* End of loop over x */
 3378|  36.4k|} /* isvcd_horz_interpol_chroma_dyadic_1 */
isvcd_horz_interpol_chroma_dyadic_2:
 3411|  19.0k|{
 3412|  19.0k|    WORD32 i4_x, i4_y;
 3413|  19.0k|    WORD32 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3;
 3414|  19.0k|    WORD32 i4_samp_0, i4_samp_1;
 3415|  19.0k|    WORD32 i4_rslt_1, i4_rslt_2;
 3416|  19.0k|    WORD32 i4_dst_stride;
 3417|  19.0k|    UWORD8 *pu1_out;
 3418|  19.0k|    WORD16 *pi2_tmp;
 3419|       |
 3420|  19.0k|    i4_coeff_0 = 8 - i4_phase_0;
 3421|  19.0k|    i4_coeff_1 = i4_phase_0;
 3422|  19.0k|    i4_coeff_2 = 8 - i4_phase_1;
 3423|  19.0k|    i4_coeff_3 = i4_phase_1;
 3424|       |
 3425|  19.0k|    pu1_out = pu1_out_buf;
 3426|  19.0k|    pi2_tmp = pi2_tmp_filt_buf + 1;
 3427|  19.0k|    i4_dst_stride = i4_out_stride;
 3428|       |
 3429|       |    /* Horizontal interpolation */
 3430|   171k|    for(i4_y = 0; i4_y < 8; i4_y++)
  ------------------
  |  Branch (3430:19): [True: 152k, False: 19.0k]
  ------------------
 3431|   152k|    {
 3432|       |        /* x = 0, x_phase = phase_0 */
 3433|   152k|        i4_samp_1 = *pi2_tmp++;
 3434|       |
 3435|   760k|        for(i4_x = 0; i4_x < 8; i4_x += 2)
  ------------------
  |  Branch (3435:23): [True: 608k, False: 152k]
  ------------------
 3436|   608k|        {
 3437|   608k|            i4_samp_0 = i4_samp_1;
 3438|   608k|            i4_samp_1 = *pi2_tmp++;
 3439|       |
 3440|       |            /* x_phase is phase_1 for odd values of x */
 3441|       |            /* and phase_0 for even values of x          */
 3442|   608k|            i4_rslt_1 = i4_samp_0 * i4_coeff_0;
 3443|   608k|            i4_rslt_1 += i4_samp_1 * i4_coeff_1;
 3444|       |
 3445|   608k|            i4_rslt_2 = i4_samp_0 * i4_coeff_2;
 3446|   608k|            i4_rslt_2 += i4_samp_1 * i4_coeff_3;
 3447|       |
 3448|       |            /* Rounding to 8-bit values */
 3449|   608k|            i4_rslt_1 += 32;
 3450|   608k|            i4_rslt_1 >>= 6;
 3451|   608k|            i4_rslt_2 += 32;
 3452|   608k|            i4_rslt_2 >>= 6;
 3453|       |
 3454|       |            /* Storing the results */
 3455|   608k|            pu1_out[2 * i4_x] = i4_rslt_1;
 3456|   608k|            pu1_out[2 * (i4_x + 1)] = i4_rslt_2;
 3457|       |
 3458|   608k|        } /* End of loop over x */
 3459|       |
 3460|       |        /* Incrementing the ptrs */
 3461|   152k|        pi2_tmp += 1;
 3462|   152k|        pu1_out += i4_dst_stride;
 3463|       |
 3464|   152k|    } /* End of loop over y */
 3465|  19.0k|} /* isvcd_horz_interpol_chroma_dyadic_2 */
isvcd_intra_resamp_mb_dyadic:
 3500|  52.9k|{
 3501|       |    /* --------------------------------------------------------------------- */
 3502|       |    /* I/O buffer params                                                     */
 3503|       |    /* --------------------------------------------------------------------- */
 3504|  52.9k|    UWORD8 *pu1_inp_luma, *pu1_inp_chroma;
 3505|  52.9k|    UWORD8 *pu1_out_luma, *pu1_out_chroma;
 3506|  52.9k|    UWORD8 *pu1_out_cb, *pu1_out_cr;
 3507|  52.9k|    UWORD8 *pu1_refarray_luma, *pu1_refarray_cb, *pu1_refarray_cr;
 3508|  52.9k|    WORD16 *pi2_tmp_filt_buf;
 3509|  52.9k|    WORD32 i4_inp_luma_stride, i4_inp_chroma_stride;
 3510|  52.9k|    WORD32 i4_out_luma_stride, i4_out_chroma_stride;
 3511|  52.9k|    UWORD16 u2_mb_x_ref, u2_mb_y_ref;
 3512|  52.9k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec = (svc_dec_lyr_struct_t *) pv_svc_dec;
 3513|  52.9k|    dec_slice_svc_ext_params_t *ps_svc_slice_params = NULL;
 3514|       |
 3515|       |    /* --------------------------------------------------------------------- */
 3516|       |    /* Intra resampling ctxt pointers                                         */
 3517|       |    /* --------------------------------------------------------------------- */
 3518|  52.9k|    intra_sampling_ctxt_t *ps_ctxt;
 3519|  52.9k|    intra_samp_lyr_ctxt *ps_lyr_ctxt;
 3520|       |
 3521|       |    /* --------------------------------------------------------------------- */
 3522|       |    /* reference and current layer MB coordinates                             */
 3523|       |    /* --------------------------------------------------------------------- */
 3524|  52.9k|    WORD32 i4_scaled_mb_x, i4_scaled_mb_y;
 3525|  52.9k|    WORD32 i4_top, i4_left;
 3526|  52.9k|    WORD32 ret;
 3527|       |
 3528|  52.9k|    ps_svc_slice_params = &ps_svc_lyr_dec->s_svc_slice_params;
 3529|       |    /* --------------------------------------------------------------------- */
 3530|       |    /* Pointer derivation                                                     */
 3531|       |    /* --------------------------------------------------------------------- */
 3532|  52.9k|    ps_ctxt = (intra_sampling_ctxt_t *) pv_intra_samp_ctxt;
 3533|  52.9k|    ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id];
 3534|       |
 3535|       |    /* --------------------------------------------------------------------- */
 3536|       |    /* MB coordinate derivation                                                 */
 3537|       |    /* --------------------------------------------------------------------- */
 3538|  52.9k|    i4_scaled_mb_x =
 3539|  52.9k|        ps_mb_coord->u2_mb_x - (ps_svc_slice_params->i4_scaled_ref_layer_left_offset >> 4);
 3540|  52.9k|    i4_scaled_mb_y =
 3541|  52.9k|        ps_mb_coord->u2_mb_y - (ps_svc_slice_params->i4_scaled_ref_layer_top_offset >> 4);
 3542|       |
 3543|  52.9k|    if(i4_scaled_mb_x & 0x1)
  ------------------
  |  Branch (3543:8): [True: 26.2k, False: 26.7k]
  ------------------
 3544|  26.2k|    {
 3545|  26.2k|        i4_left = 1;
 3546|  26.2k|    }
 3547|  26.7k|    else
 3548|  26.7k|    {
 3549|  26.7k|        i4_left = -1;
 3550|  26.7k|    }
 3551|  52.9k|    if(i4_scaled_mb_y & 0x1)
  ------------------
  |  Branch (3551:8): [True: 25.4k, False: 27.5k]
  ------------------
 3552|  25.4k|    {
 3553|  25.4k|        i4_top = 1;
 3554|  25.4k|    }
 3555|  27.5k|    else
 3556|  27.5k|    {
 3557|  27.5k|        i4_top = -1;
 3558|  27.5k|    }
 3559|       |
 3560|  52.9k|    u2_mb_x_ref = (i4_scaled_mb_x >> 1);
 3561|  52.9k|    u2_mb_y_ref = (i4_scaled_mb_y >> 1);
 3562|       |
 3563|       |    /* --------------------------------------------------------------------- */
 3564|       |    /* Reference Array Consrtuction - luma and chroma                         */
 3565|       |    /* --------------------------------------------------------------------- */
 3566|  52.9k|    pu1_inp_luma = (UWORD8 *) ps_ref_luma->pv_buffer;
 3567|  52.9k|    pu1_inp_chroma = (UWORD8 *) ps_ref_chroma->pv_buffer;
 3568|  52.9k|    i4_inp_luma_stride = ps_ref_luma->i4_num_element_stride;
 3569|  52.9k|    i4_inp_chroma_stride = ps_ref_chroma->i4_num_element_stride;
 3570|       |
 3571|       |    /* ------- Constructing refSampleArray ----------------------- */
 3572|  52.9k|    ret = isvcd_reflayer_construction_dyadic(
 3573|  52.9k|        pv_intra_samp_ctxt, ps_ref_mb_mode_map, pu1_inp_luma, pu1_inp_chroma, i4_inp_luma_stride,
 3574|  52.9k|        i4_inp_chroma_stride, i4_top, i4_left, u2_mb_x_ref, u2_mb_y_ref);
 3575|       |
 3576|  52.9k|    if(ret != OK)
  ------------------
  |  |  114|  52.9k|#define OK        0
  ------------------
  |  Branch (3576:8): [True: 0, False: 52.9k]
  ------------------
 3577|      0|    {
 3578|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 3579|      0|    }
 3580|       |    /* --------------------------------------------------------------------- */
 3581|       |    /* LUMA INTERPOLATION                                                       */
 3582|       |    /* --------------------------------------------------------------------- */
 3583|  52.9k|    pu1_refarray_luma = ps_ctxt->pu1_refarray_buffer;
 3584|  52.9k|    if(1 == i4_top)
  ------------------
  |  Branch (3584:8): [True: 25.4k, False: 27.5k]
  ------------------
 3585|  25.4k|    {
 3586|  25.4k|        pu1_refarray_luma += (DYADIC_REF_W_Y << 3);
  ------------------
  |  |   56|  25.4k|#define DYADIC_REF_W_Y 20
  ------------------
 3587|  25.4k|    }
 3588|  52.9k|    if(1 == i4_left)
  ------------------
  |  Branch (3588:8): [True: 26.2k, False: 26.7k]
  ------------------
 3589|  26.2k|    {
 3590|  26.2k|        pu1_refarray_luma += 8;
 3591|  26.2k|    }
 3592|  52.9k|    pu1_out_luma = (UWORD8 *) ps_curr_luma->pv_buffer;
 3593|  52.9k|    i4_out_luma_stride = ps_curr_luma->i4_num_element_stride;
 3594|  52.9k|    pi2_tmp_filt_buf = (WORD16 *) ps_ctxt->pi4_temp_interpolation_buffer;
 3595|       |
 3596|  52.9k|    ps_ctxt->pf_interpolate_base_luma_dyadic(pu1_refarray_luma, pi2_tmp_filt_buf, pu1_out_luma,
 3597|  52.9k|                                             i4_out_luma_stride);
 3598|       |
 3599|       |    /* --------------------------------------------------------------------- */
 3600|       |    /* CHROMA INTERPOLATION                                                     */
 3601|       |    /* --------------------------------------------------------------------- */
 3602|  52.9k|    pu1_out_chroma = (UWORD8 *) ps_curr_chroma->pv_buffer;
 3603|  52.9k|    i4_out_chroma_stride = ps_curr_chroma->i4_num_element_stride;
 3604|       |
 3605|       |    /* CB */
 3606|  52.9k|    pu1_out_cb = pu1_out_chroma;
 3607|  52.9k|    pu1_refarray_cb = ps_ctxt->pu1_refarray_cb;
 3608|       |
 3609|  52.9k|    if(1 == i4_top)
  ------------------
  |  Branch (3609:8): [True: 25.4k, False: 27.5k]
  ------------------
 3610|  25.4k|    {
 3611|  25.4k|        pu1_refarray_cb += (DYADIC_REF_W_C << 2);
  ------------------
  |  |   58|  25.4k|#define DYADIC_REF_W_C 10
  ------------------
 3612|  25.4k|    }
 3613|  52.9k|    if(1 == i4_left)
  ------------------
  |  Branch (3613:8): [True: 26.2k, False: 26.7k]
  ------------------
 3614|  26.2k|    {
 3615|  26.2k|        pu1_refarray_cb += 4;
 3616|  26.2k|    }
 3617|       |
 3618|       |    /* Vertical interpolation */
 3619|  52.9k|    ps_lyr_ctxt->pf_vert_chroma_interpol(pu1_refarray_cb, pi2_tmp_filt_buf,
 3620|  52.9k|                                         ps_lyr_ctxt->i4_y_phase_0, ps_lyr_ctxt->i4_y_phase_1);
 3621|       |
 3622|       |    /* Horizontal interpolation */
 3623|  52.9k|    ps_lyr_ctxt->pf_horz_chroma_interpol(pi2_tmp_filt_buf, pu1_out_cb, i4_out_chroma_stride,
 3624|  52.9k|                                         ps_lyr_ctxt->i4_x_phase_0, ps_lyr_ctxt->i4_x_phase_1);
 3625|       |
 3626|       |    /* CR */
 3627|  52.9k|    pu1_out_cr = pu1_out_chroma + 1;
 3628|  52.9k|    pu1_refarray_cr = ps_ctxt->pu1_refarray_cr;
 3629|       |
 3630|  52.9k|    if(1 == i4_top)
  ------------------
  |  Branch (3630:8): [True: 25.4k, False: 27.5k]
  ------------------
 3631|  25.4k|    {
 3632|  25.4k|        pu1_refarray_cr += (DYADIC_REF_W_C << 2);
  ------------------
  |  |   58|  25.4k|#define DYADIC_REF_W_C 10
  ------------------
 3633|  25.4k|    }
 3634|  52.9k|    if(1 == i4_left)
  ------------------
  |  Branch (3634:8): [True: 26.2k, False: 26.7k]
  ------------------
 3635|  26.2k|    {
 3636|  26.2k|        pu1_refarray_cr += 4;
 3637|  26.2k|    }
 3638|       |
 3639|       |    /* Vertical interpolation */
 3640|  52.9k|    ps_lyr_ctxt->pf_vert_chroma_interpol(pu1_refarray_cr, pi2_tmp_filt_buf,
 3641|  52.9k|                                         ps_lyr_ctxt->i4_y_phase_0, ps_lyr_ctxt->i4_y_phase_1);
 3642|       |
 3643|       |    /* Horizontal interpolation */
 3644|  52.9k|    ps_lyr_ctxt->pf_horz_chroma_interpol(pi2_tmp_filt_buf, pu1_out_cr, i4_out_chroma_stride,
 3645|  52.9k|                                         ps_lyr_ctxt->i4_x_phase_0, ps_lyr_ctxt->i4_x_phase_1);
 3646|  52.9k|    return OK;
  ------------------
  |  |  114|  52.9k|#define OK        0
  ------------------
 3647|  52.9k|}
isvcd_interpolate_intra_base:
 3680|  71.8k|{
 3681|       |    /* --------------------------------------------------------------------- */
 3682|       |    /* Index Parameters                                                         */
 3683|       |    /* --------------------------------------------------------------------- */
 3684|  71.8k|    intra_sampling_ctxt_t *ps_ctxt;
 3685|  71.8k|    intra_samp_map_ctxt_t *ps_map_ctxt;
 3686|  71.8k|    intra_samp_lyr_ctxt *ps_lyr_ctxt;
 3687|  71.8k|    WORD32 i4_x, i4_y;
 3688|  71.8k|    WORD32 i4_frm_mb_x, i4_frm_mb_y;
 3689|  71.8k|    ref_pixel_map_t *ps_x_pos_phase;
 3690|  71.8k|    ref_pixel_map_t *ps_y_pos_phase;
 3691|       |
 3692|  71.8k|    WORD32 i4_temp_array_ht;
 3693|  71.8k|    WORD32 *pi4_interp_buff;
 3694|  71.8k|    WORD32 *pi4_interp_buff_temp;
 3695|  71.8k|    WORD32 i4_mb_wd;
 3696|  71.8k|    WORD32 i4_mb_ht;
 3697|  71.8k|    WORD32 i4_x_min, i4_x_max;
 3698|  71.8k|    ref_min_max_map_t *ps_x_min_max;
 3699|  71.8k|    UWORD8 *pu1_refarray = NULL;
 3700|       |
 3701|       |    /* --------------------------------------------------------------------- */
 3702|       |    /* Extracting pointers from the  context                                  */
 3703|       |    /* --------------------------------------------------------------------- */
 3704|  71.8k|    ps_ctxt = (intra_sampling_ctxt_t *) pv_intra_samp_ctxt;
 3705|  71.8k|    ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id];
 3706|       |
 3707|  71.8k|    if(0 == i4_refarray_flag)
  ------------------
  |  Branch (3707:8): [True: 47.8k, False: 23.9k]
  ------------------
 3708|  47.8k|    {
 3709|  47.8k|        pu1_refarray = ps_ctxt->pu1_refarray_buffer;
 3710|  47.8k|    }
 3711|  23.9k|    else if(1 == i4_refarray_flag)
  ------------------
  |  Branch (3711:13): [True: 23.9k, False: 0]
  ------------------
 3712|  23.9k|    {
 3713|  23.9k|        pu1_refarray = ps_ctxt->pu1_refarray_cb;
 3714|  23.9k|    }
 3715|       |
 3716|       |    /* --------------------------------------------------------------------- */
 3717|       |    /* LUMA    or CHROMA */
 3718|       |    /* --------------------------------------------------------------------- */
 3719|  71.8k|    if(1 == i4_chroma_flag)
  ------------------
  |  Branch (3719:8): [True: 47.8k, False: 23.9k]
  ------------------
 3720|  47.8k|        ps_map_ctxt = &(ps_lyr_ctxt->s_chroma_map_ctxt);
 3721|  23.9k|    else
 3722|  23.9k|        ps_map_ctxt = &(ps_lyr_ctxt->s_luma_map_ctxt);
 3723|       |
 3724|  71.8k|    i4_mb_wd = MB_WIDTH >> i4_chroma_flag;
  ------------------
  |  |   67|  71.8k|#define MB_WIDTH 16
  ------------------
 3725|  71.8k|    i4_mb_ht = MB_HEIGHT >> i4_chroma_flag;
  ------------------
  |  |   68|  71.8k|#define MB_HEIGHT 16
  ------------------
 3726|  71.8k|    ps_x_min_max = ps_map_ctxt->ps_x_min_max;
 3727|  71.8k|    i4_frm_mb_y = i4_mb_y * i4_mb_ht;
 3728|  71.8k|    i4_frm_mb_x = i4_mb_x * i4_mb_wd;
 3729|       |
 3730|       |    /* get the min and max positions */
 3731|  71.8k|    i4_x_min = ps_x_min_max[i4_mb_x].i2_min_pos;
 3732|  71.8k|    i4_x_max = ps_x_min_max[i4_mb_x].i2_max_pos;
 3733|       |
 3734|       |    /* --------------------------------------------------------------------- */
 3735|       |    /* Projected frame level pointers                                        */
 3736|       |    /* --------------------------------------------------------------------- */
 3737|  71.8k|    ps_x_pos_phase = ps_map_ctxt->ps_x_pos_phase;
 3738|  71.8k|    ps_y_pos_phase = ps_map_ctxt->ps_y_pos_phase;
 3739|       |
 3740|       |    /* --------------------------------------------------------------------- */
 3741|       |    /* Pointers and Dimenstion of the temporary buffer                         */
 3742|       |    /* --------------------------------------------------------------------- */
 3743|  71.8k|    i4_temp_array_ht = i4_mb_ht;
 3744|  71.8k|    pi4_interp_buff = ps_ctxt->pi4_temp_interpolation_buffer;
 3745|  71.8k|    pi4_interp_buff_temp = pi4_interp_buff;
 3746|       |
 3747|       |    /* --------------------------------------------------------------------- */
 3748|       |    /* Loop for interpolation in vertical direction                             */
 3749|       |    /* --------------------------------------------------------------------- */
 3750|   838k|    for(i4_y = 0; i4_y < (i4_temp_array_ht); i4_y++)
  ------------------
  |  Branch (3750:19): [True: 766k, False: 71.8k]
  ------------------
 3751|   766k|    {
 3752|  10.1M|        for(i4_x = (i4_x_min - 1); i4_x <= (i4_x_max + 2); i4_x++)
  ------------------
  |  Branch (3752:36): [True: 9.41M, False: 766k]
  ------------------
 3753|  9.41M|        {
 3754|  9.41M|            UWORD8 *pu1_refarray_temp;
 3755|  9.41M|            WORD32 i4_y_ref;
 3756|  9.41M|            WORD32 i4_y_phase;
 3757|       |            /* ------------------------------------------------------------ */
 3758|       |            /* Finding the offset                                            */
 3759|       |            /* ------------------------------------------------------------ */
 3760|  9.41M|            i4_y_ref = ps_y_pos_phase[i4_y + i4_frm_mb_y].i2_ref_pos;
 3761|  9.41M|            i4_y_phase = ps_y_pos_phase[i4_y + i4_frm_mb_y].i2_phase;
 3762|  9.41M|            pu1_refarray_temp = pu1_refarray + i4_x + (i4_y_ref * i4_refarray_wd);
 3763|       |
 3764|       |            /* ------------------------------------------------------------ */
 3765|       |            /* Check for Luma/Chroma Processing                                */
 3766|       |            /* ------------------------------------------------------------ */
 3767|  9.41M|            if(0 == i4_chroma_flag)
  ------------------
  |  Branch (3767:16): [True: 5.74M, False: 3.66M]
  ------------------
 3768|  5.74M|            {
 3769|  5.74M|                *(pi4_interp_buff + i4_x) =
 3770|  5.74M|                    (g_ai1_interp_filter_luma[i4_y_phase]) *
 3771|  5.74M|                        (*(pu1_refarray_temp - i4_refarray_wd)) +
 3772|       |
 3773|  5.74M|                    (g_ai1_interp_filter_luma[16 + i4_y_phase]) * (*(pu1_refarray_temp)) +
 3774|       |
 3775|  5.74M|                    (g_ai1_interp_filter_luma[32 + i4_y_phase]) *
 3776|  5.74M|                        (*(pu1_refarray_temp + i4_refarray_wd)) +
 3777|       |
 3778|  5.74M|                    (g_ai1_interp_filter_luma[48 + i4_y_phase]) *
 3779|  5.74M|                        (*(pu1_refarray_temp + (2 * i4_refarray_wd)));
 3780|  5.74M|            }
 3781|  3.66M|            else
 3782|  3.66M|            {
 3783|  3.66M|                *(pi4_interp_buff + i4_x) =
 3784|  3.66M|                    (g_au1_interp_filter_chroma[i4_y_phase]) * (*(pu1_refarray_temp)) +
 3785|       |
 3786|  3.66M|                    (g_au1_interp_filter_chroma[16 + i4_y_phase]) *
 3787|  3.66M|                        (*(pu1_refarray_temp + i4_refarray_wd));
 3788|  3.66M|            }
 3789|       |
 3790|  9.41M|        } /* end of loop over array  width */
 3791|   766k|        pi4_interp_buff = pi4_interp_buff + i4_refarray_wd;
 3792|   766k|    }     /* end of loop over temp array height*/
 3793|       |
 3794|  71.8k|    pi4_interp_buff = pi4_interp_buff_temp;
 3795|       |
 3796|       |    /* --------------------------------------------------------------------- */
 3797|       |    /* Loop for interpolation in horizontal direction                         */
 3798|       |    /* --------------------------------------------------------------------- */
 3799|   838k|    for(i4_y = 0; i4_y < i4_temp_array_ht; i4_y++)
  ------------------
  |  Branch (3799:19): [True: 766k, False: 71.8k]
  ------------------
 3800|   766k|    {
 3801|  9.96M|        for(i4_x = 0; i4_x < i4_mb_wd; i4_x++)
  ------------------
  |  Branch (3801:23): [True: 9.19M, False: 766k]
  ------------------
 3802|  9.19M|        {
 3803|  9.19M|            WORD32 i4_x_ref;
 3804|  9.19M|            WORD32 i4_x_phase;
 3805|       |            /* ------------------------------------------------------------- */
 3806|       |            /* Finding the offset                                             */
 3807|       |            /* ------------------------------------------------------------- */
 3808|  9.19M|            i4_x_ref = ps_x_pos_phase[i4_x + i4_frm_mb_x].i2_ref_pos;
 3809|  9.19M|            i4_x_phase = ps_x_pos_phase[i4_x + i4_frm_mb_x].i2_phase;
 3810|  9.19M|            pi4_interp_buff_temp = pi4_interp_buff + i4_x_ref;
 3811|       |
 3812|       |            /* ------------------------------------------------------------- */
 3813|       |            /* Check for Luma/Chroma Processing                                 */
 3814|       |            /* ------------------------------------------------------------- */
 3815|  9.19M|            if(0 == i4_chroma_flag)
  ------------------
  |  Branch (3815:16): [True: 6.13M, False: 3.06M]
  ------------------
 3816|  6.13M|            {
 3817|  6.13M|                *(pu1_out + i4_x + (i4_y * i4_out_stride)) =
 3818|       |
 3819|  6.13M|                    CLIPUCHAR(
  ------------------
  |  |   69|  6.13M|#define CLIPUCHAR(x) CLIP3(0, 255, (x))
  |  |  ------------------
  |  |  |  |   77|  6.13M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 55.9k, False: 6.07M]
  |  |  |  |  |  Branch (77:54): [True: 1.73k, False: 6.07M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 3820|  6.13M|                        ((g_ai1_interp_filter_luma[i4_x_phase]) * (*(pi4_interp_buff_temp - 1)) +
 3821|  6.13M|                         (g_ai1_interp_filter_luma[16 + i4_x_phase]) * (*(pi4_interp_buff_temp)) +
 3822|  6.13M|                         (g_ai1_interp_filter_luma[32 + i4_x_phase]) *
 3823|  6.13M|                             (*(pi4_interp_buff_temp + 1)) +
 3824|  6.13M|                         (g_ai1_interp_filter_luma[48 + i4_x_phase]) *
 3825|  6.13M|                             (*(pi4_interp_buff_temp + 2)) +
 3826|  6.13M|                         512) >>
 3827|  6.13M|                        10);
 3828|  6.13M|            }
 3829|  3.06M|            else
 3830|  3.06M|            {
 3831|  3.06M|                *(pu1_out + (2 * i4_x) + (i4_y * i4_out_stride)) = CLIPUCHAR(
  ------------------
  |  |   69|  3.06M|#define CLIPUCHAR(x) CLIP3(0, 255, (x))
  |  |  ------------------
  |  |  |  |   77|  3.06M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 0, False: 3.06M]
  |  |  |  |  |  Branch (77:54): [True: 0, False: 3.06M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 3832|  3.06M|                    ((g_au1_interp_filter_chroma[i4_x_phase]) * (*(pi4_interp_buff_temp)) +
 3833|  3.06M|                     (g_au1_interp_filter_chroma[16 + i4_x_phase]) * (*(pi4_interp_buff_temp + 1)) +
 3834|  3.06M|                     512) >>
 3835|  3.06M|                    10);
 3836|  3.06M|            }
 3837|       |
 3838|  9.19M|        } /* end of loop over array width */
 3839|   766k|        pi4_interp_buff = pi4_interp_buff + i4_refarray_wd;
 3840|   766k|    }     /* end of loop over MB height */
 3841|       |
 3842|  71.8k|    return;
 3843|  71.8k|} /* End of Interpolation Function */
isvcd_intra_resamp_mb:
 3876|  37.8k|{
 3877|       |    /* --------------------------------------------------------------------- */
 3878|       |    /* I/O buffer params                                                     */
 3879|       |    /* --------------------------------------------------------------------- */
 3880|  37.8k|    intra_sampling_ctxt_t *ps_ctxt;
 3881|  37.8k|    UWORD8 *pu1_inp_luma, *pu1_inp_cb, *pu1_inp_cr;
 3882|  37.8k|    UWORD8 *pu1_out_luma, *pu1_out_cb, *pu1_out_cr;
 3883|  37.8k|    WORD32 i4_inp_stride;
 3884|  37.8k|    WORD32 i4_out_stride;
 3885|  37.8k|    WORD32 i4_refarray_stride;
 3886|       |
 3887|       |    /* --------------------------------------------------------------------- */
 3888|       |    /* ref sample array and corresponding parametrs                             */
 3889|       |    /* --------------------------------------------------------------------- */
 3890|  37.8k|    WORD32 i4_chroma_flag, ret;
 3891|       |
 3892|  37.8k|    ps_ctxt = (intra_sampling_ctxt_t *) pv_intra_samp_ctxt;
 3893|       |    /* needs to be populated at proper place */
 3894|  37.8k|    i4_refarray_stride = ps_ctxt->i4_refarray_stride;
 3895|       |
 3896|       |    /* --------------------------------------------------------------------- */
 3897|       |    /* LUMA PROCESSING                                                        */
 3898|       |    /* --------------------------------------------------------------------- */
 3899|  37.8k|    pu1_inp_luma = (UWORD8 *) ps_ref_luma->pv_buffer;
 3900|  37.8k|    pu1_out_luma = (UWORD8 *) ps_curr_luma->pv_buffer;
 3901|  37.8k|    i4_inp_stride = ps_ref_luma->i4_num_element_stride;
 3902|  37.8k|    i4_out_stride = ps_curr_luma->i4_num_element_stride;
 3903|  37.8k|    i4_chroma_flag = 0;
 3904|       |
 3905|       |    /* ------- Constructing refSampleArray ----------------------- */
 3906|  37.8k|    ret = isvcd_reflayer_construction(pv_intra_samp_ctxt, pu1_inp_luma, NULL, i4_inp_stride,
 3907|  37.8k|                                      i4_refarray_stride, ps_ref_mb_mode_map, ps_mb_coord,
 3908|  37.8k|                                      i4_chroma_flag);
 3909|       |
 3910|  37.8k|    if(ret != OK)
  ------------------
  |  |  114|  37.8k|#define OK        0
  ------------------
  |  Branch (3910:8): [True: 0, False: 37.8k]
  ------------------
 3911|      0|    {
 3912|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 3913|      0|    }
 3914|       |    /* ---- Interpolation process for Intra_Base prediction     ------ */
 3915|  37.8k|    ps_ctxt->pf_interpolate_intra_base(pv_intra_samp_ctxt, pu1_out_luma, i4_out_stride,
 3916|  37.8k|                                       i4_refarray_stride, ps_mb_coord->u2_mb_x,
 3917|  37.8k|                                       ps_mb_coord->u2_mb_y, i4_chroma_flag, 0);
 3918|       |
 3919|       |    /* --------------------------------------------------------------------- */
 3920|       |    /* CHROMA PROCESSING                                                       */
 3921|       |    /* --------------------------------------------------------------------- */
 3922|       |    /* CB */
 3923|  37.8k|    i4_inp_stride = ps_ref_chroma->i4_num_element_stride;
 3924|  37.8k|    pu1_inp_cb = (UWORD8 *) ps_ref_chroma->pv_buffer;
 3925|  37.8k|    pu1_inp_cr = pu1_inp_cb + 1;
 3926|  37.8k|    i4_chroma_flag = 1;
 3927|       |
 3928|       |    /* ------- Constructing refSampleArray ----------------------- */
 3929|  37.8k|    ret = isvcd_reflayer_construction(pv_intra_samp_ctxt, pu1_inp_cb, pu1_inp_cr, i4_inp_stride,
 3930|  37.8k|                                      i4_refarray_stride, ps_ref_mb_mode_map, ps_mb_coord,
 3931|  37.8k|                                      i4_chroma_flag);
 3932|       |
 3933|  37.8k|    if(ret != OK)
  ------------------
  |  |  114|  37.8k|#define OK        0
  ------------------
  |  Branch (3933:8): [True: 0, False: 37.8k]
  ------------------
 3934|      0|    {
 3935|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 3936|      0|    }
 3937|  37.8k|    i4_out_stride = ps_curr_chroma->i4_num_element_stride;
 3938|  37.8k|    pu1_out_cb = (UWORD8 *) ps_curr_chroma->pv_buffer;
 3939|  37.8k|    pu1_out_cr = pu1_out_cb + 1;
 3940|       |
 3941|       |    /* ---- Cb Interpolation process for Intra_Base prediction     ------ */
 3942|  37.8k|    ps_ctxt->pf_interpolate_intra_base(pv_intra_samp_ctxt, pu1_out_cb, i4_out_stride,
 3943|  37.8k|                                       i4_refarray_stride, ps_mb_coord->u2_mb_x,
 3944|  37.8k|                                       ps_mb_coord->u2_mb_y, i4_chroma_flag, 0);
 3945|       |
 3946|       |    /* ---- Cr Interpolation process for Intra_Base prediction     ------ */
 3947|  37.8k|    ps_ctxt->pf_interpolate_intra_base(pv_intra_samp_ctxt, pu1_out_cr, i4_out_stride,
 3948|  37.8k|                                       i4_refarray_stride, ps_mb_coord->u2_mb_x,
 3949|  37.8k|                                       ps_mb_coord->u2_mb_y, i4_chroma_flag, 1);
 3950|  37.8k|    return OK;
  ------------------
  |  |  114|  37.8k|#define OK        0
  ------------------
 3951|  37.8k|}
isvcd_intra_resamp_generate_segment_lookup:
 3981|  69.4k|{
 3982|  69.4k|    WORD32 i4_x;
 3983|  69.4k|    WORD32 i4_position, i4_dist_prev_mb, i4_dist_next_mb;
 3984|  69.4k|    UWORD8 u1_seg_dim;
 3985|  69.4k|    UWORD8 u1_num_sgmts;
 3986|  69.4k|    WORD32 i4_block_size = i4_mb_size >> 1;
 3987|  69.4k|    UWORD8 u1_offset = 0;
 3988|  69.4k|    seg_lookup_desc_t *ps_segments;
 3989|  69.4k|    seg_description_t *ps_seg_desc;
 3990|       |
 3991|  69.4k|    memset(ps_seg_lookup_table, 0, i4_mb_size * sizeof(seg_lookup_desc_t));
 3992|       |
 3993|   902k|    for(i4_x = 0; i4_x < i4_mb_size; i4_x++)
  ------------------
  |  Branch (3993:19): [True: 833k, False: 69.4k]
  ------------------
 3994|   833k|    {
 3995|   833k|        ps_segments = &ps_seg_lookup_table[i4_x];
 3996|   833k|        ps_seg_desc = ps_segments->s_segments;
 3997|   833k|        i4_position = i4_x;
 3998|       |
 3999|   833k|        if(i4_x >= i4_block_size)
  ------------------
  |  Branch (3999:12): [True: 416k, False: 416k]
  ------------------
 4000|   416k|        {
 4001|       |            /* set the fourth bit so that later it can be directly OR ed */
 4002|   416k|            ps_segments->u4_start_pos = 8;
 4003|   416k|        }
 4004|   416k|        else
 4005|   416k|        {
 4006|   416k|            ps_segments->u4_start_pos = 0;
 4007|   416k|        }
 4008|       |
 4009|   833k|        u1_num_sgmts = 0;
 4010|   833k|        u1_offset = 0;
 4011|       |
 4012|  3.12M|        while(i4_position < (i4_x + i4_dimension))
  ------------------
  |  Branch (4012:15): [True: 2.29M, False: 833k]
  ------------------
 4013|  2.29M|        {
 4014|       |            /* check and fill the nearest mb boundry flag */
 4015|  2.29M|            if((i4_position & (i4_mb_size - 1)) < i4_block_size)
  ------------------
  |  Branch (4015:16): [True: 1.14M, False: 1.14M]
  ------------------
 4016|  1.14M|            {
 4017|  1.14M|                ps_seg_desc->i1_nearst_mb_bdry = -1;
 4018|  1.14M|            }
 4019|  1.14M|            else
 4020|  1.14M|            {
 4021|  1.14M|                ps_seg_desc->i1_nearst_mb_bdry = 1;
 4022|  1.14M|            }
 4023|       |
 4024|       |            /* find the distance from the previous MB for start of segment*/
 4025|  2.29M|            i4_dist_prev_mb = (i4_position & (i4_mb_size - 1));
 4026|       |
 4027|  2.29M|            ps_seg_desc->i1_dist_idx =
 4028|  2.29M|                ((i4_dist_prev_mb >= i4_mb_size >> 1) ? (i4_mb_size - i4_dist_prev_mb)
  ------------------
  |  Branch (4028:18): [True: 1.14M, False: 1.14M]
  ------------------
 4029|  2.29M|                                                      : -(i4_dist_prev_mb + 1));
 4030|       |
 4031|       |            /* find the size of the segment */
 4032|  2.29M|            u1_seg_dim = (i4_block_size - (i4_position & (i4_block_size - 1)));
 4033|  2.29M|            i4_position += u1_seg_dim;
 4034|  2.29M|            if(i4_position > (i4_x + i4_dimension))
  ------------------
  |  Branch (4034:16): [True: 694k, False: 1.59M]
  ------------------
 4035|   694k|            {
 4036|   694k|                i4_position = (i4_x + i4_dimension);
 4037|   694k|                u1_seg_dim = (i4_position & (i4_block_size - 1));
 4038|   694k|            }
 4039|       |
 4040|       |            /* find the distance from the next MB for end of segment */
 4041|  2.29M|            i4_dist_next_mb = (i4_position & (i4_mb_size - 1));
 4042|  2.29M|            ps_seg_desc->u1_seg_dim = u1_seg_dim;
 4043|  2.29M|            ps_seg_desc->u1_seg_off = u1_offset;
 4044|       |
 4045|       |            /* check if the segment has a adjoining MB edge */
 4046|  2.29M|            if(i4_dist_prev_mb == 0)
  ------------------
  |  Branch (4046:16): [True: 798k, False: 1.49M]
  ------------------
 4047|   798k|            {
 4048|   798k|                if(0 == u1_num_sgmts)
  ------------------
  |  Branch (4048:20): [True: 69.4k, False: 729k]
  ------------------
 4049|  69.4k|                {
 4050|  69.4k|                    ps_seg_desc->u1_mb_adjoin = 0;
 4051|  69.4k|                }
 4052|   729k|                else
 4053|   729k|                {
 4054|   729k|                    ps_seg_desc->u1_mb_adjoin = 1 << i4_shift_val;
 4055|   729k|                }
 4056|   798k|            }
 4057|  1.49M|            else if(i4_dist_next_mb == 0)
  ------------------
  |  Branch (4057:21): [True: 798k, False: 694k]
  ------------------
 4058|   798k|            {
 4059|   798k|                if(i4_position == (i4_x + i4_dimension))
  ------------------
  |  Branch (4059:20): [True: 69.4k, False: 729k]
  ------------------
 4060|  69.4k|                {
 4061|  69.4k|                    ps_seg_desc->u1_mb_adjoin = 0;
 4062|  69.4k|                }
 4063|   729k|                else
 4064|   729k|                {
 4065|   729k|                    ps_seg_desc->u1_mb_adjoin = 1 << i4_shift_val;
 4066|   729k|                }
 4067|   798k|            }
 4068|   694k|            else
 4069|   694k|            {
 4070|   694k|                ps_seg_desc->u1_mb_adjoin = 0;
 4071|   694k|            }
 4072|       |
 4073|       |            /* Updations */
 4074|  2.29M|            u1_offset += u1_seg_dim;
 4075|  2.29M|            u1_num_sgmts++;
 4076|  2.29M|            ps_seg_desc++;
 4077|  2.29M|        }
 4078|       |
 4079|       |        /* fill the number of segments for this position */
 4080|   833k|        ps_segments->u1_num_segments = u1_num_sgmts;
 4081|   833k|    }
 4082|  69.4k|}
isvcd_intra_resamp_populate_list:
 4109|  68.0k|{
 4110|       |    /* --------------------------------------------------------------------- */
 4111|       |    /* Local variables required for finding the mapping between the layers     */
 4112|       |    /* --------------------------------------------------------------------- */
 4113|  68.0k|    UWORD32 i4_shift_x, i4_shift_y, i4_scale_x, i4_scale_y;
 4114|  68.0k|    WORD32 i4_offset_x, i4_offset_y;
 4115|  68.0k|    WORD32 i4_add_x, i4_add_y, i4_delta_x, i4_delta_y, i4_refphase_x, i4_refphase_y;
 4116|  68.0k|    WORD32 i4_phase_x, i4_phase_y, i4_sub_wd, i4_sub_ht, i4_mb_wd, i4_mb_ht;
 4117|  68.0k|    WORD32 i4_horz_dim, i4_vert_dim, i4_tmp;
 4118|  68.0k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
 4119|       |    /* --------------------------------------------------------------------- */
 4120|       |    /* Local Pointer Declaration for arrays in Mapping context                 */
 4121|       |    /* --------------------------------------------------------------------- */
 4122|  68.0k|    ref_mb_map_t *ps_x_off_len, *ps_y_off_len;
 4123|  68.0k|    UWORD32 i4_ref_wd, i4_ref_ht, i4_scaled_wd, i4_scaled_ht, i4_curr_lyr_width, i4_curr_lyr_height;
 4124|       |
 4125|       |    /* --------------------------------------------------------------------- */
 4126|       |    /* Local Flag Declaration                                                 */
 4127|       |    /* --------------------------------------------------------------------- */
 4128|  68.0k|    WORD32 i4_ref_layer_field_pic_flag, i4_field_pic_flag, i4_frame_mbs_only_flag;
 4129|  68.0k|    WORD32 i4_ref_layer_frame_Mbs_only_flag, i4_field_Mb_flag, i4_bot_field_flag;
 4130|       |
 4131|       |    /* --------------------------------------------------------------------- */
 4132|       |    /* Cropping Parameters Declaration                                         */
 4133|       |    /* --------------------------------------------------------------------- */
 4134|  68.0k|    WORD32 i4_scaled_ref_layer_left_offset, i4_scaled_ref_layer_top_offset;
 4135|  68.0k|    WORD32 i4_scaled_ref_layer_right_offset, i4_scaled_ref_layer_bottom_offset;
 4136|  68.0k|    dec_seq_params_t *ps_sps;
 4137|  68.0k|    dec_svc_seq_params_t *ps_subset_sps;
 4138|  68.0k|    ps_sps = ps_dec->ps_cur_sps;
 4139|  68.0k|    ps_subset_sps = ps_svc_lyr_dec->ps_cur_subset_sps;
 4140|       |
 4141|       |    /* --------------------------------------------------------------------- */
 4142|       |    /* Hardcoding flag information    (assuming no field support) */
 4143|       |    /* --------------------------------------------------------------------- */
 4144|  68.0k|    i4_ref_layer_field_pic_flag = SVCD_FALSE;
  ------------------
  |  |   45|  68.0k|#define SVCD_FALSE 0
  ------------------
 4145|  68.0k|    i4_field_pic_flag = SVCD_FALSE;
  ------------------
  |  |   45|  68.0k|#define SVCD_FALSE 0
  ------------------
 4146|  68.0k|    i4_frame_mbs_only_flag = SVCD_TRUE;
  ------------------
  |  |   46|  68.0k|#define SVCD_TRUE 1
  ------------------
 4147|  68.0k|    i4_field_Mb_flag = SVCD_FALSE;
  ------------------
  |  |   45|  68.0k|#define SVCD_FALSE 0
  ------------------
 4148|  68.0k|    i4_bot_field_flag = SVCD_FALSE;
  ------------------
  |  |   45|  68.0k|#define SVCD_FALSE 0
  ------------------
 4149|  68.0k|    i4_ref_layer_frame_Mbs_only_flag = SVCD_TRUE;
  ------------------
  |  |   46|  68.0k|#define SVCD_TRUE 1
  ------------------
 4150|  68.0k|    i4_horz_dim = 0;
 4151|  68.0k|    i4_vert_dim = 0;
 4152|       |
 4153|       |    /* --------------------------------------------------------------------- */
 4154|       |    /* Pointer and Paramater are intialized    - Chroma and Luma */
 4155|       |    /* --------------------------------------------------------------------- */
 4156|  68.0k|    {
 4157|  68.0k|        WORD32 i4_base_width;
 4158|  68.0k|        WORD32 i4_base_height;
 4159|  68.0k|        WORD32 i4_ref_layer_chroma_phase_x_plus1_flag;
 4160|  68.0k|        WORD32 i4_ref_layer_chroma_phase_y_plus1;
 4161|  68.0k|        WORD32 i4_chroma_phase_x_plus1_flag;
 4162|  68.0k|        WORD32 i4_chroma_phase_y_plus1;
 4163|       |
 4164|       |        /* ------------------------------------------------------------- */
 4165|       |        /* HARD CODED FOR 420                                             */
 4166|       |        /* ------------------------------------------------------------- */
 4167|  68.0k|        WORD32 i4_sub_wd_chroma = 2;
 4168|  68.0k|        WORD32 i4_sub_ht_chroma = 2;
 4169|       |
 4170|  68.0k|        i4_base_width = ps_ref_res_prms->i4_res_width;
 4171|  68.0k|        i4_base_height = ps_ref_res_prms->i4_res_height;
 4172|       |
 4173|  68.0k|        i4_ref_layer_chroma_phase_x_plus1_flag =
 4174|  68.0k|            ps_curr_res_prms->i1_ref_lyr_chroma_phase_x_plus1_flag;
 4175|       |
 4176|  68.0k|        i4_ref_layer_chroma_phase_y_plus1 = ps_curr_res_prms->i1_ref_lyr_chroma_phase_y_plus1;
 4177|  68.0k|        i4_chroma_phase_x_plus1_flag = ps_subset_sps->s_sps_svc_ext.u1_chroma_phase_x_plus1_flag;
 4178|  68.0k|        i4_chroma_phase_y_plus1 = ps_subset_sps->s_sps_svc_ext.u1_chroma_phase_y_plus1;
 4179|  68.0k|        i4_scaled_ref_layer_bottom_offset = ps_curr_res_prms->s_ref_lyr_scaled_offset.i2_bot;
 4180|  68.0k|        i4_scaled_ref_layer_left_offset = ps_curr_res_prms->s_ref_lyr_scaled_offset.i2_left;
 4181|  68.0k|        i4_scaled_ref_layer_top_offset = ps_curr_res_prms->s_ref_lyr_scaled_offset.i2_top;
 4182|  68.0k|        i4_scaled_ref_layer_right_offset = ps_curr_res_prms->s_ref_lyr_scaled_offset.i2_rt;
 4183|       |
 4184|       |        /* ----------------------------------------------------------------- */
 4185|       |        /* Computing Effective Frame Dimensions                                 */
 4186|       |        /* ------------------------------------------------------------------*/
 4187|  68.0k|        i4_ref_wd = (i4_base_width >> i4_chroma_flag);
 4188|  68.0k|        i4_ref_ht = (i4_base_height >> i4_chroma_flag) * (1 + i4_ref_layer_field_pic_flag);
 4189|       |
 4190|  68.0k|        i4_scaled_wd = ps_curr_res_prms->u2_scaled_ref_width;
 4191|  68.0k|        i4_scaled_ht = ps_curr_res_prms->u2_scaled_ref_height;
 4192|  68.0k|        i4_scaled_wd = (i4_scaled_wd >> i4_chroma_flag);
 4193|  68.0k|        i4_scaled_ht = (i4_scaled_ht >> i4_chroma_flag) * (1 + i4_field_pic_flag);
 4194|       |
 4195|  68.0k|        if(1 == i4_chroma_flag)
  ------------------
  |  Branch (4195:12): [True: 34.0k, False: 34.0k]
  ------------------
 4196|  34.0k|        {
 4197|  34.0k|            i4_refphase_x = i4_ref_layer_chroma_phase_x_plus1_flag - 1;
 4198|  34.0k|            i4_refphase_y = i4_ref_layer_chroma_phase_y_plus1 - 1;
 4199|  34.0k|            i4_phase_x = i4_chroma_phase_x_plus1_flag - 1;
 4200|  34.0k|            i4_phase_y = i4_chroma_phase_y_plus1 - 1;
 4201|  34.0k|            i4_sub_wd = i4_sub_wd_chroma;
 4202|  34.0k|            i4_sub_ht = i4_sub_ht_chroma;
 4203|  34.0k|            i4_mb_wd = MB_WIDTH >> 1;
  ------------------
  |  |   67|  34.0k|#define MB_WIDTH 16
  ------------------
 4204|  34.0k|            i4_mb_ht = MB_HEIGHT >> 1;
  ------------------
  |  |   68|  34.0k|#define MB_HEIGHT 16
  ------------------
 4205|  34.0k|        }
 4206|  34.0k|        else
 4207|  34.0k|        {
 4208|  34.0k|            i4_refphase_x = 0;
 4209|  34.0k|            i4_refphase_y = 0;
 4210|  34.0k|            i4_phase_x = 0;
 4211|  34.0k|            i4_phase_y = 0;
 4212|  34.0k|            i4_sub_wd = 1;
 4213|  34.0k|            i4_sub_ht = 1;
 4214|  34.0k|            i4_mb_wd = MB_WIDTH;
  ------------------
  |  |   67|  34.0k|#define MB_WIDTH 16
  ------------------
 4215|  34.0k|            i4_mb_ht = MB_HEIGHT;
  ------------------
  |  |   68|  34.0k|#define MB_HEIGHT 16
  ------------------
 4216|  34.0k|        }
 4217|  68.0k|    }
 4218|       |
 4219|       |    /* --------------------------------------------------------------------- */
 4220|       |    /* Derive shift x and y based on level idd                               */
 4221|       |    /* --------------------------------------------------------------------- */
 4222|  68.0k|    if(ps_sps->u1_level_idc <= 30)
  ------------------
  |  Branch (4222:8): [True: 62.8k, False: 5.14k]
  ------------------
 4223|  62.8k|    {
 4224|  62.8k|        i4_shift_x = 16;
 4225|  62.8k|        i4_shift_y = 16;
 4226|  62.8k|    }
 4227|  5.14k|    else
 4228|  5.14k|    {
 4229|  5.14k|        i4_shift_x = 31 - isvcd_get_ceil_log2(i4_ref_wd);
 4230|  5.14k|        i4_shift_y = 31 - isvcd_get_ceil_log2(i4_ref_ht);
 4231|  5.14k|    }
 4232|       |
 4233|       |    /* --------------------------------------------------------------------- */
 4234|       |    /* The following condition is not true in our case for time being         */
 4235|       |    /* --------------------------------------------------------------------- */
 4236|  68.0k|    if((SVCD_FALSE == i4_frame_mbs_only_flag) || (SVCD_FALSE == i4_ref_layer_frame_Mbs_only_flag))
  ------------------
  |  |   45|  68.0k|#define SVCD_FALSE 0
  ------------------
                  if((SVCD_FALSE == i4_frame_mbs_only_flag) || (SVCD_FALSE == i4_ref_layer_frame_Mbs_only_flag))
  ------------------
  |  |   45|  68.0k|#define SVCD_FALSE 0
  ------------------
  |  Branch (4236:8): [True: 0, False: 68.0k]
  |  Branch (4236:50): [True: 0, False: 68.0k]
  ------------------
 4237|      0|    {
 4238|      0|        i4_phase_y = i4_phase_y + 4 * i4_bot_field_flag;
 4239|       |
 4240|      0|        if(1 == i4_ref_layer_frame_Mbs_only_flag)
  ------------------
  |  Branch (4240:12): [True: 0, False: 0]
  ------------------
 4241|      0|            i4_refphase_y = (2 * i4_refphase_y) + 2;
 4242|      0|        else
 4243|      0|            i4_refphase_y = i4_refphase_y + (4 * i4_bot_field_flag);
 4244|      0|    }
 4245|       |
 4246|       |    /* --------------------------------------------------------------------- */
 4247|       |    /* Dx and Dy Computation - Ratio of the base and enhance layer width     */
 4248|       |    /* --------------------------------------------------------------------- */
 4249|  68.0k|    i4_scale_x = ((i4_ref_wd << i4_shift_x) + (i4_scaled_wd >> 1)) / (i4_scaled_wd);
 4250|  68.0k|    i4_scale_y = ((i4_ref_ht << i4_shift_y) + (i4_scaled_ht >> 1)) / (i4_scaled_ht);
 4251|       |
 4252|  68.0k|    i4_offset_x = i4_scaled_ref_layer_left_offset / i4_sub_wd;
 4253|  68.0k|    i4_add_x = (((i4_ref_wd * (2 + i4_phase_x)) << (i4_shift_x - 2)) + (i4_scaled_wd >> 1)) /
 4254|  68.0k|                   i4_scaled_wd +
 4255|  68.0k|               (1 << (i4_shift_x - 5));
 4256|  68.0k|    i4_delta_x = 4 * (2 + i4_refphase_x);
 4257|       |
 4258|  68.0k|    if((SVCD_TRUE == i4_frame_mbs_only_flag) && (SVCD_TRUE == i4_ref_layer_frame_Mbs_only_flag))
  ------------------
  |  |   46|  68.0k|#define SVCD_TRUE 1
  ------------------
                  if((SVCD_TRUE == i4_frame_mbs_only_flag) && (SVCD_TRUE == i4_ref_layer_frame_Mbs_only_flag))
  ------------------
  |  |   46|  68.0k|#define SVCD_TRUE 1
  ------------------
  |  Branch (4258:8): [True: 68.0k, False: 0]
  |  Branch (4258:49): [True: 68.0k, False: 0]
  ------------------
 4259|  68.0k|    {
 4260|  68.0k|        i4_offset_y = i4_scaled_ref_layer_top_offset / i4_sub_ht;
 4261|  68.0k|        i4_add_y = (((i4_ref_ht * (2 + i4_phase_y)) << (i4_shift_y - 2)) + (i4_scaled_ht >> 1)) /
 4262|  68.0k|                       i4_scaled_ht +
 4263|  68.0k|                   (1 << (i4_shift_y - 5));
 4264|  68.0k|        i4_delta_y = 4 * (2 + i4_refphase_y);
 4265|  68.0k|    }
 4266|      0|    else
 4267|      0|    {
 4268|      0|        i4_offset_y = i4_scaled_ref_layer_top_offset / (2 * i4_sub_ht);
 4269|      0|        i4_add_y = (((i4_ref_ht * (2 + i4_phase_y)) << (i4_shift_y - 3)) + (i4_scaled_ht >> 1)) /
 4270|      0|                       i4_scaled_ht +
 4271|      0|                   (1 << (i4_shift_y - 5));
 4272|      0|        i4_delta_y = 2 * (2 + i4_refphase_y);
 4273|      0|    }
 4274|       |
 4275|       |    /* --------------------------------------------------------------------- */
 4276|       |    /* Intializing Local Pointers    - Chroma and Luma                         */
 4277|       |    /* --------------------------------------------------------------------- */
 4278|  68.0k|    ps_x_off_len = ps_map_ctxt->ps_x_offset_length;
 4279|  68.0k|    ps_y_off_len = ps_map_ctxt->ps_y_offset_length;
 4280|  68.0k|    i4_curr_lyr_width = ps_curr_res_prms->i4_res_width >> i4_chroma_flag;
 4281|  68.0k|    i4_curr_lyr_height = ps_curr_res_prms->i4_res_height >> i4_chroma_flag;
 4282|       |
 4283|       |    /* --------------------------------------------------------------------- */
 4284|       |    /* Dyadic Scaling Ratios Handling                                         */
 4285|       |    /* --------------------------------------------------------------------- */
 4286|  68.0k|    if(1 == ps_curr_res_prms->u1_dyadic_flag)
  ------------------
  |  Branch (4286:8): [True: 33.3k, False: 34.7k]
  ------------------
 4287|  33.3k|    {
 4288|  33.3k|        WORD32 i4_refarray_wd, i4_x_offset;
 4289|  33.3k|        WORD32 i4_refarray_ht, i4_y_offset;
 4290|  33.3k|        WORD32 i4_crp_wd_lt, i4_crp_ht_top;
 4291|  33.3k|        WORD32 i4_crp_wd_rt, i4_crp_ht_bot;
 4292|  33.3k|        WORD32 i4_ref_lyr_wd, i4_ref_lyr_ht;
 4293|  33.3k|        WORD32 i4_ref_x, i4_ref_y;
 4294|  33.3k|        WORD32 i4_ofst;
 4295|  33.3k|        WORD32 i4_i, i4_j;
 4296|       |
 4297|       |        /* Hard coded for dyadic case */
 4298|  33.3k|        i4_refarray_wd = 20 >> i4_chroma_flag;
 4299|  33.3k|        i4_ofst = -2 >> i4_chroma_flag;
 4300|  33.3k|        i4_crp_wd_lt = i4_scaled_ref_layer_left_offset >> i4_chroma_flag;
 4301|  33.3k|        i4_crp_wd_rt = i4_scaled_ref_layer_right_offset >> i4_chroma_flag;
 4302|  33.3k|        i4_ref_lyr_wd = (i4_curr_lyr_width >> 1);
 4303|       |
 4304|  33.3k|        i4_ref_x = 0;
 4305|   100k|        for(i4_i = 0; i4_i < (WORD32) i4_curr_lyr_width; i4_i += (i4_mb_wd << 1))
  ------------------
  |  Branch (4305:23): [True: 66.7k, False: 33.3k]
  ------------------
 4306|  66.7k|        {
 4307|  66.7k|            i4_x_offset = MAX(i4_ofst, (i4_ref_x + i4_ofst));
  ------------------
  |  |   60|  66.7k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 0, False: 66.7k]
  |  |  ------------------
  ------------------
 4308|  66.7k|            i4_x_offset = MIN(i4_x_offset, (i4_ref_lyr_wd - i4_ofst));
  ------------------
  |  |   61|  66.7k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 66.7k, False: 0]
  |  |  ------------------
  ------------------
 4309|  66.7k|            ps_x_off_len->i2_offset = i4_x_offset;
 4310|  66.7k|            ps_x_off_len->i2_length = i4_refarray_wd;
 4311|  66.7k|            ps_x_off_len++;
 4312|  66.7k|            ps_x_off_len->i2_offset = i4_x_offset;
 4313|  66.7k|            ps_x_off_len->i2_length = i4_refarray_wd;
 4314|  66.7k|            ps_x_off_len++;
 4315|  66.7k|            if(i4_i >= i4_crp_wd_lt)
  ------------------
  |  Branch (4315:16): [True: 66.7k, False: 0]
  ------------------
 4316|  66.7k|            {
 4317|  66.7k|                if(i4_i <= (WORD32) (i4_curr_lyr_width - i4_crp_wd_rt))
  ------------------
  |  Branch (4317:20): [True: 66.7k, False: 0]
  ------------------
 4318|  66.7k|                {
 4319|  66.7k|                    i4_ref_x += i4_mb_wd;
 4320|  66.7k|                }
 4321|  66.7k|            }
 4322|  66.7k|        }
 4323|       |
 4324|  33.3k|        i4_refarray_ht = 20 >> i4_chroma_flag;
 4325|  33.3k|        i4_crp_ht_top = i4_scaled_ref_layer_top_offset >> i4_chroma_flag;
 4326|  33.3k|        i4_crp_ht_bot = i4_scaled_ref_layer_bottom_offset >> i4_chroma_flag;
 4327|  33.3k|        i4_ref_lyr_ht = (i4_curr_lyr_height >> 1);
 4328|       |
 4329|  33.3k|        i4_ref_y = 0;
 4330|   190k|        for(i4_j = 0; i4_j < (WORD32) i4_curr_lyr_height; i4_j += (i4_mb_ht << 1))
  ------------------
  |  Branch (4330:23): [True: 157k, False: 33.3k]
  ------------------
 4331|   157k|        {
 4332|   157k|            i4_y_offset = MAX(i4_ofst, (i4_ref_y + i4_ofst));
  ------------------
  |  |   60|   157k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 0, False: 157k]
  |  |  ------------------
  ------------------
 4333|   157k|            i4_y_offset = MIN(i4_y_offset, (i4_ref_lyr_ht - i4_ofst));
  ------------------
  |  |   61|   157k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 157k, False: 0]
  |  |  ------------------
  ------------------
 4334|   157k|            ps_y_off_len->i2_offset = i4_y_offset;
 4335|   157k|            ps_y_off_len->i2_length = i4_refarray_ht;
 4336|   157k|            ps_y_off_len++;
 4337|   157k|            ps_y_off_len->i2_offset = i4_y_offset;
 4338|   157k|            ps_y_off_len->i2_length = i4_refarray_ht;
 4339|   157k|            ps_y_off_len++;
 4340|   157k|            if(i4_j >= i4_crp_ht_top)
  ------------------
  |  Branch (4340:16): [True: 157k, False: 0]
  ------------------
 4341|   157k|            {
 4342|   157k|                if(i4_j <= (WORD32) (i4_curr_lyr_height - i4_crp_ht_bot))
  ------------------
  |  Branch (4342:20): [True: 156k, False: 802]
  ------------------
 4343|   156k|                {
 4344|   156k|                    i4_ref_y += i4_mb_ht;
 4345|   156k|                }
 4346|   157k|            }
 4347|   157k|        }
 4348|       |        /* No need to process further, return */
 4349|  33.3k|        return;
 4350|  33.3k|    } /* If dyadic path */
 4351|       |
 4352|       |    /* Proposed Algo for Optimization */
 4353|  34.7k|    {
 4354|  34.7k|        WORD32 i4_max, i4_min;
 4355|  34.7k|        ref_min_max_map_t *ps_x_min_max;
 4356|  34.7k|        ref_min_max_map_t *ps_y_min_max;
 4357|  34.7k|        WORD32 i4_i, i4_j;
 4358|       |
 4359|  34.7k|        ps_x_min_max = ps_map_ctxt->ps_x_min_max;
 4360|  34.7k|        ps_y_min_max = ps_map_ctxt->ps_y_min_max;
 4361|       |        /* ----------------------------------------------------------------- */
 4362|       |        /* Computation of offsetX refArrayW Xmin and Xmax Lists                 */
 4363|       |        /* ----------------------------------------------------------------- */
 4364|   138k|        for(i4_i = 0; i4_i < (WORD32) i4_curr_lyr_width; i4_i = i4_i + i4_mb_wd)
  ------------------
  |  Branch (4364:23): [True: 104k, False: 34.7k]
  ------------------
 4365|   104k|        {
 4366|   104k|            WORD32 i4_refarray_wd, i4_xr_index;
 4367|   104k|            WORD32 i4_x_refmin16;
 4368|   104k|            WORD32 i4_x_refmax16;
 4369|   104k|            WORD32 i4_x_offset;
 4370|       |
 4371|   104k|            i4_x_refmin16 = (WORD64) (((WORD64) (i4_i - i4_offset_x) * i4_scale_x + i4_add_x) >>
 4372|   104k|                                      ((WORD32) (i4_shift_x - 4))) -
 4373|   104k|                            i4_delta_x;
 4374|       |
 4375|   104k|            i4_x_refmax16 =
 4376|   104k|                (WORD64) (((WORD64) (i4_i + i4_mb_wd - 1 - i4_offset_x) * i4_scale_x + i4_add_x) >>
 4377|   104k|                          ((WORD32) (i4_shift_x - 4))) -
 4378|   104k|                i4_delta_x;
 4379|       |
 4380|       |            /* ------------------------------------------------------------- */
 4381|       |            /* Modified AC205                                                 */
 4382|       |            /* Minimum width required - So adding 2 pixels on each side         */
 4383|       |            /* ------------------------------------------------------------- */
 4384|   104k|            i4_refarray_wd = ((i4_x_refmax16 + 15) >> 4) - (i4_x_refmin16 >> 4) + 1 + 4;
 4385|       |
 4386|       |            /* ------------------------------------------------------------- */
 4387|       |            /* Setting the offset 2 pixels before                             */
 4388|       |            /* ------------------------------------------------------------- */
 4389|   104k|            i4_x_offset = (i4_x_refmin16 >> 4) - 2;
 4390|       |
 4391|       |            /* ------------------------------------------------------------- */
 4392|       |            /* Modifying the values based on the location                     */
 4393|       |            /* ------------------------------------------------------------- */
 4394|   104k|            i4_min = i4_x_offset;
 4395|   104k|            i4_xr_index = i4_min - ((i4_min / i4_mb_wd) * i4_mb_wd);
 4396|       |
 4397|   104k|            if(i4_xr_index < (i4_mb_wd >> 1))
  ------------------
  |  Branch (4397:16): [True: 86.7k, False: 17.3k]
  ------------------
 4398|  86.7k|            {
 4399|  86.7k|                i4_refarray_wd = i4_refarray_wd + (i4_mb_wd >> 1);
 4400|  86.7k|                i4_x_offset = i4_x_offset - (i4_mb_wd >> 1);
 4401|  86.7k|            }
 4402|       |
 4403|   104k|            i4_max = ((i4_x_refmax16 + 15) >> 4) + 2;
 4404|   104k|            i4_xr_index = i4_max - ((i4_max / i4_mb_wd) * i4_mb_wd);
 4405|       |
 4406|   104k|            if(i4_xr_index >= (i4_mb_wd >> 1)) i4_refarray_wd = i4_refarray_wd + (i4_mb_wd >> 1);
  ------------------
  |  Branch (4406:16): [True: 52.0k, False: 52.0k]
  ------------------
 4407|       |
 4408|       |            /* ------------------------------------------------------------- */
 4409|       |            /* Filling the arrays with offset, min, max and refArray dim     */
 4410|       |            /* ------------------------------------------------------------- */
 4411|   104k|            ps_x_off_len->i2_offset = i4_x_offset;
 4412|   104k|            ps_x_off_len->i2_length = i4_refarray_wd;
 4413|       |
 4414|   104k|            ps_x_min_max->i2_min_pos = (i4_x_refmin16 >> 4) - i4_x_offset;
 4415|   104k|            ps_x_min_max->i2_max_pos = ((i4_x_refmax16 + 15) >> 4) - i4_x_offset;
 4416|       |
 4417|   104k|            i4_tmp = (WORD32) (ps_x_min_max->i2_max_pos - ps_x_min_max->i2_min_pos) +
 4418|   104k|                     (4 >> i4_chroma_flag);
 4419|   104k|            if(i4_tmp > i4_horz_dim)
  ------------------
  |  Branch (4419:16): [True: 35.0k, False: 69.0k]
  ------------------
 4420|  35.0k|            {
 4421|  35.0k|                i4_horz_dim = i4_tmp;
 4422|  35.0k|            }
 4423|       |
 4424|       |            /* increment the pointer */
 4425|   104k|            ps_x_off_len++;
 4426|   104k|            ps_x_min_max++;
 4427|   104k|        } /* end of loop over scaled width */
 4428|       |
 4429|       |        /* ----------------------------------------------------------------- */
 4430|       |        /* Computation of offsetY refArrayH Ymin and Ymax Lists                 */
 4431|       |        /* ----------------------------------------------------------------- */
 4432|   346k|        for(i4_j = 0; i4_j < (WORD32) i4_curr_lyr_height; i4_j = i4_j + i4_mb_ht)
  ------------------
  |  Branch (4432:23): [True: 312k, False: 34.7k]
  ------------------
 4433|   312k|        {
 4434|   312k|            WORD32 i4_refarray_ht, i4_yr_index;
 4435|   312k|            WORD32 i4_y_refmin16;
 4436|   312k|            WORD32 i4_y_refmax16;
 4437|   312k|            WORD32 i4_y_offset;
 4438|       |
 4439|   312k|            i4_y_refmin16 = (WORD64) (((WORD64) (i4_j - i4_offset_y) * i4_scale_y + i4_add_y) >>
 4440|   312k|                                      ((WORD32) (i4_shift_y - 4))) -
 4441|   312k|                            i4_delta_y;
 4442|       |
 4443|   312k|            i4_y_refmax16 =
 4444|   312k|                (WORD64) (((WORD64) (i4_j + i4_mb_ht - 1 - i4_offset_y) * i4_scale_y + i4_add_y) >>
 4445|   312k|                          ((WORD32) (i4_shift_y - 4))) -
 4446|   312k|                i4_delta_y;
 4447|       |
 4448|       |            /* ------------------------------------------------------------- */
 4449|       |            /* Modified AC205                                                 */
 4450|       |            /* Minimum width required - So adding 2 pixels on each side            */
 4451|       |            /* ------------------------------------------------------------- */
 4452|   312k|            i4_refarray_ht = ((i4_y_refmax16 + 15) >> 4) - (i4_y_refmin16 >> 4) + 1 + 4;
 4453|       |
 4454|       |            /* ------------------------------------------------------------- */
 4455|       |            /* Setting the offset 2 pixels before                             */
 4456|       |            /* ------------------------------------------------------------- */
 4457|   312k|            i4_y_offset = (i4_y_refmin16 >> 4) - 2;
 4458|       |
 4459|       |            /* ------------------------------------------------------------- */
 4460|       |            /* Modifying the values based on the location                     */
 4461|       |            /* ------------------------------------------------------------- */
 4462|   312k|            i4_min = i4_y_offset;
 4463|   312k|            i4_yr_index = i4_min - ((i4_min / i4_mb_ht) * i4_mb_ht);
 4464|   312k|            if(i4_yr_index < (i4_mb_ht >> 1))
  ------------------
  |  Branch (4464:16): [True: 190k, False: 121k]
  ------------------
 4465|   190k|            {
 4466|   190k|                i4_refarray_ht = i4_refarray_ht + (i4_mb_ht >> 1);
 4467|   190k|                i4_y_offset = i4_y_offset - (i4_mb_ht >> 1);
 4468|   190k|            }
 4469|       |
 4470|   312k|            i4_max = ((i4_y_refmax16 + 15) >> 4) + 2;
 4471|   312k|            i4_yr_index = i4_max - ((i4_max / i4_mb_ht) * i4_mb_ht);
 4472|   312k|            if(i4_yr_index >= (i4_mb_ht >> 1)) i4_refarray_ht = i4_refarray_ht + (i4_mb_ht >> 1);
  ------------------
  |  Branch (4472:16): [True: 156k, False: 156k]
  ------------------
 4473|       |
 4474|       |            /* ------------------------------------------------------------- */
 4475|       |            /* Filling the arrays with offset, min, max and refArray dim     */
 4476|       |            /* ------------------------------------------------------------- */
 4477|   312k|            ps_y_off_len->i2_offset = i4_y_offset;
 4478|   312k|            ps_y_off_len->i2_length = i4_refarray_ht;
 4479|   312k|            ps_y_min_max->i2_min_pos = (i4_y_refmin16 >> 4) - i4_y_offset;
 4480|   312k|            ps_y_min_max->i2_max_pos = ((i4_y_refmax16 + 15) >> 4) - i4_y_offset;
 4481|       |
 4482|   312k|            i4_tmp = (WORD32) (ps_y_min_max->i2_max_pos - ps_y_min_max->i2_min_pos) +
 4483|   312k|                     (4 >> i4_chroma_flag);
 4484|   312k|            if(i4_tmp > i4_vert_dim)
  ------------------
  |  Branch (4484:16): [True: 35.0k, False: 277k]
  ------------------
 4485|  35.0k|            {
 4486|  35.0k|                i4_vert_dim = i4_tmp;
 4487|  35.0k|            }
 4488|       |
 4489|       |            /* increment the pointer */
 4490|   312k|            ps_y_off_len++;
 4491|   312k|            ps_y_min_max++;
 4492|   312k|        } /* end of loop over scaled height */
 4493|  34.7k|    }
 4494|       |
 4495|       |    /* --------------------------------------------------------------------- */
 4496|       |    /* Computation of Xref and Xphase List as per standard                     */
 4497|       |    /* --------------------------------------------------------------------- */
 4498|  34.7k|    ps_x_off_len = ps_map_ctxt->ps_x_offset_length;
 4499|  34.7k|    ps_y_off_len = ps_map_ctxt->ps_y_offset_length;
 4500|       |
 4501|  34.7k|    {
 4502|  34.7k|        ref_pixel_map_t *ps_x_pos_phase;
 4503|  34.7k|        WORD32 i4_xc;
 4504|  34.7k|        WORD32 i4_offset_x_index;
 4505|       |
 4506|  34.7k|        ps_x_pos_phase = ps_map_ctxt->ps_x_pos_phase;
 4507|       |
 4508|  1.28M|        for(i4_xc = 0; i4_xc < (WORD32) i4_curr_lyr_width; i4_xc++)
  ------------------
  |  Branch (4508:24): [True: 1.24M, False: 34.7k]
  ------------------
 4509|  1.24M|        {
 4510|  1.24M|            WORD32 i4_x_offset;
 4511|  1.24M|            WORD32 i4_x_ref16;
 4512|       |
 4513|  1.24M|            i4_offset_x_index = i4_xc / i4_mb_wd;
 4514|       |
 4515|  1.24M|            i4_x_offset = ps_x_off_len[i4_offset_x_index].i2_offset;
 4516|       |
 4517|  1.24M|            i4_x_ref16 = (WORD64) (((WORD64) (i4_xc - i4_offset_x) * i4_scale_x + i4_add_x) >>
 4518|  1.24M|                                   ((WORD32) (i4_shift_x - 4))) -
 4519|  1.24M|                         i4_delta_x;
 4520|       |
 4521|       |            /* store the values */
 4522|  1.24M|            ps_x_pos_phase->i2_ref_pos = (i4_x_ref16 >> 4) - i4_x_offset;
 4523|  1.24M|            ps_x_pos_phase->i2_phase = i4_x_ref16 & 15;
 4524|       |
 4525|       |            /* increment the pointer */
 4526|  1.24M|            ps_x_pos_phase++;
 4527|       |
 4528|  1.24M|        } /* end of loop over scaled width */
 4529|  34.7k|    }
 4530|       |
 4531|       |    /* --------------------------------------------------------------------- */
 4532|       |    /* Computation of Yref and Yphase List as per standard                     */
 4533|       |    /* --------------------------------------------------------------------- */
 4534|  34.7k|    {
 4535|  34.7k|        WORD32 i4_yc;
 4536|  34.7k|        ref_pixel_map_t *ps_y_pos_phase;
 4537|       |
 4538|  34.7k|        ps_y_pos_phase = ps_map_ctxt->ps_y_pos_phase;
 4539|       |
 4540|  3.78M|        for(i4_yc = 0; i4_yc < (WORD32) i4_curr_lyr_height; i4_yc++)
  ------------------
  |  Branch (4540:24): [True: 3.74M, False: 34.7k]
  ------------------
 4541|  3.74M|        {
 4542|  3.74M|            WORD32 i4_y_offset;
 4543|  3.74M|            WORD32 i4_y_ref16;
 4544|  3.74M|            WORD32 i4_offset_y_index;
 4545|       |
 4546|  3.74M|            i4_offset_y_index = i4_yc / i4_mb_ht;
 4547|       |
 4548|  3.74M|            i4_y_offset = ps_y_off_len[i4_offset_y_index].i2_offset;
 4549|       |
 4550|  3.74M|            if((SVCD_FALSE == i4_frame_mbs_only_flag) ||
  ------------------
  |  |   45|  3.74M|#define SVCD_FALSE 0
  ------------------
  |  Branch (4550:16): [True: 0, False: 3.74M]
  ------------------
 4551|  3.74M|               (SVCD_FALSE == i4_ref_layer_frame_Mbs_only_flag))
  ------------------
  |  |   45|  3.74M|#define SVCD_FALSE 0
  ------------------
  |  Branch (4551:16): [True: 0, False: 3.74M]
  ------------------
 4552|      0|            {
 4553|      0|                i4_yc = i4_yc >> (1 - i4_field_Mb_flag);
 4554|      0|            }
 4555|       |
 4556|  3.74M|            i4_y_ref16 = (WORD64) (((WORD64) (i4_yc - i4_offset_y) * i4_scale_y + i4_add_y) >>
 4557|  3.74M|                                   ((WORD32) (i4_shift_y - 4))) -
 4558|  3.74M|                         i4_delta_y;
 4559|  3.74M|            ps_y_pos_phase->i2_ref_pos = (i4_y_ref16 >> 4) - i4_y_offset;
 4560|  3.74M|            ps_y_pos_phase->i2_phase = i4_y_ref16 & 15;
 4561|       |
 4562|       |            /* increment the pointer */
 4563|  3.74M|            ps_y_pos_phase++;
 4564|       |
 4565|  3.74M|        } /* end of loop over scaled height */
 4566|  34.7k|    }
 4567|       |
 4568|       |    /* --------------------------------------------------------------------- */
 4569|       |    /* Computation of Corresponding Diagonal Location                         */
 4570|       |    /* --------------------------------------------------------------------- */
 4571|  34.7k|    {
 4572|  34.7k|        WORD16 *pi2_xd_index;
 4573|  34.7k|        WORD16 *pi2_yd_index;
 4574|  34.7k|        WORD16 *pi2_ya_index;
 4575|  34.7k|        WORD32 i4_i, i4_j;
 4576|       |
 4577|  34.7k|        pi2_xd_index = ps_map_ctxt->pi2_xd_index;
 4578|  34.7k|        pi2_yd_index = ps_map_ctxt->pi2_yd_index;
 4579|  34.7k|        pi2_ya_index = ps_map_ctxt->pi2_ya_index;
 4580|       |
 4581|   451k|        for(i4_i = 0; i4_i < i4_mb_wd; i4_i++)
  ------------------
  |  Branch (4581:23): [True: 416k, False: 34.7k]
  ------------------
 4582|   416k|        {
 4583|   416k|            *(pi2_xd_index + i4_i) = ((i4_i >= i4_mb_wd >> 1) ? (i4_i - i4_mb_wd) : (i4_i + 1));
  ------------------
  |  Branch (4583:39): [True: 208k, False: 208k]
  ------------------
 4584|       |
 4585|   416k|        } /* end of loop over MB width */
 4586|       |
 4587|   451k|        for(i4_j = 0; i4_j < i4_mb_ht; i4_j++)
  ------------------
  |  Branch (4587:23): [True: 416k, False: 34.7k]
  ------------------
 4588|   416k|        {
 4589|   416k|            *(pi2_yd_index + i4_j) = ((i4_j >= i4_mb_ht >> 1) ? (i4_j - i4_mb_ht) : (i4_j + 1));
  ------------------
  |  Branch (4589:39): [True: 208k, False: 208k]
  ------------------
 4590|       |
 4591|   416k|            *(pi2_ya_index + i4_j) =
 4592|   416k|                *(pi2_yd_index + i4_j) - (((i4_mb_ht >> 1) + 1) * (SIGN(*(pi2_yd_index + i4_j))));
  ------------------
  |  |  103|   416k|#define SIGN(x)     (((x) >= 0) ? (((x) > 0) ? 1 : 0) : -1)
  |  |  ------------------
  |  |  |  Branch (103:22): [True: 208k, False: 208k]
  |  |  |  Branch (103:36): [True: 208k, False: 0]
  |  |  ------------------
  ------------------
 4593|       |
 4594|   416k|        } /* end of loop over MB height */
 4595|  34.7k|    }
 4596|       |
 4597|       |    /* generate the lookup to generate horizontal segments */
 4598|  34.7k|    isvcd_intra_resamp_generate_segment_lookup(ps_map_ctxt->ps_seg_lookup_horz, i4_horz_dim,
 4599|  34.7k|                                               i4_mb_wd, 3);
 4600|       |
 4601|       |    /* generate the lookup to generate vertical segments */
 4602|  34.7k|    isvcd_intra_resamp_generate_segment_lookup(ps_map_ctxt->ps_seg_lookup_vert, i4_vert_dim,
 4603|  34.7k|                                               i4_mb_ht, 4);
 4604|       |
 4605|  34.7k|    return;
 4606|  68.0k|} /* end of function "isvcd_intra_resamp_populate_list"*/
isvcd_populate_res_prms:
 4631|   133k|{
 4632|   133k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec = (svc_dec_lyr_struct_t *) pv_svc_dec;
 4633|   133k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
 4634|   133k|    res_prms_t *ps_curr_lyr_res_prms;
 4635|   133k|    svc_dec_lyr_struct_t *ps_svc_ref_lyr_dec;
 4636|   133k|    ps_svc_ref_lyr_dec = ps_svc_lyr_dec->ps_dec_svc_ref_layer;
 4637|   133k|    ps_curr_lyr_res_prms = &ps_svc_lyr_dec->s_res_prms;
 4638|       |
 4639|   133k|    ps_curr_lyr_res_prms->i4_res_width = ps_dec->u2_pic_wd;
 4640|   133k|    ps_curr_lyr_res_prms->i4_res_height = ps_dec->u2_pic_ht;
 4641|   133k|    ps_curr_lyr_res_prms->s_ref_lyr_scaled_offset.i2_left =
 4642|   133k|        ps_svc_lyr_dec->ps_cur_subset_sps->s_sps_svc_ext.i4_seq_scaled_ref_layer_left_offset << 1;
 4643|   133k|    ps_curr_lyr_res_prms->s_ref_lyr_scaled_offset.i2_top =
 4644|   133k|        ps_svc_lyr_dec->ps_cur_subset_sps->s_sps_svc_ext.i4_seq_scaled_ref_layer_top_offset << 1;
 4645|   133k|    ps_curr_lyr_res_prms->s_ref_lyr_scaled_offset.i2_rt =
 4646|   133k|        ps_svc_lyr_dec->ps_cur_subset_sps->s_sps_svc_ext.i4_seq_scaled_ref_layer_right_offset << 1;
 4647|   133k|    ps_curr_lyr_res_prms->s_ref_lyr_scaled_offset.i2_bot =
 4648|   133k|        ps_svc_lyr_dec->ps_cur_subset_sps->s_sps_svc_ext.i4_seq_scaled_ref_layer_bottom_offset << 1;
 4649|   133k|    ps_curr_lyr_res_prms->u2_scaled_ref_width =
 4650|   133k|        (ps_dec->u2_frm_wd_in_mbs << 4) - (ps_curr_lyr_res_prms->s_ref_lyr_scaled_offset.i2_left +
 4651|   133k|                                           ps_curr_lyr_res_prms->s_ref_lyr_scaled_offset.i2_rt);
 4652|       |
 4653|   133k|    ps_curr_lyr_res_prms->u2_scaled_ref_height =
 4654|   133k|        (ps_dec->u2_frm_ht_in_mbs << 4) - (ps_curr_lyr_res_prms->s_ref_lyr_scaled_offset.i2_top +
 4655|   133k|                                           ps_curr_lyr_res_prms->s_ref_lyr_scaled_offset.i2_bot);
 4656|       |
 4657|   133k|    ps_curr_lyr_res_prms->u1_cropping_change_flag = 0;
 4658|   133k|    if(2 == ps_svc_lyr_dec->ps_cur_subset_sps->s_sps_svc_ext.u1_extended_spatial_scalability_idc)
  ------------------
  |  Branch (4658:8): [True: 69, False: 133k]
  ------------------
 4659|     69|    {
 4660|     69|        ps_curr_lyr_res_prms->u1_cropping_change_flag = 1;
 4661|       |
 4662|     69|        ps_curr_lyr_res_prms->s_ref_lyr_scaled_offset.i2_left =
 4663|     69|            ps_svc_lyr_dec->s_svc_slice_params.i4_scaled_ref_layer_left_offset << 1;
 4664|       |
 4665|     69|        ps_curr_lyr_res_prms->s_ref_lyr_scaled_offset.i2_top =
 4666|     69|            ps_svc_lyr_dec->s_svc_slice_params.i4_scaled_ref_layer_top_offset << 1;
 4667|     69|        ps_curr_lyr_res_prms->s_ref_lyr_scaled_offset.i2_rt =
 4668|     69|            ps_svc_lyr_dec->s_svc_slice_params.i4_scaled_ref_layer_right_offset << 1;
 4669|     69|        ps_curr_lyr_res_prms->s_ref_lyr_scaled_offset.i2_bot =
 4670|     69|            ps_svc_lyr_dec->s_svc_slice_params.i4_scaled_ref_layer_bottom_offset << 1;
 4671|     69|        ps_curr_lyr_res_prms->u2_scaled_ref_width =
 4672|     69|            (ps_dec->u2_frm_wd_in_mbs << 4) -
 4673|     69|            (ps_curr_lyr_res_prms->s_ref_lyr_scaled_offset.i2_left +
 4674|     69|             ps_curr_lyr_res_prms->s_ref_lyr_scaled_offset.i2_rt);
 4675|       |
 4676|     69|        ps_curr_lyr_res_prms->u2_scaled_ref_height =
 4677|     69|            (ps_dec->u2_frm_ht_in_mbs << 4) -
 4678|     69|            (ps_curr_lyr_res_prms->s_ref_lyr_scaled_offset.i2_top +
 4679|     69|             ps_curr_lyr_res_prms->s_ref_lyr_scaled_offset.i2_bot);
 4680|       |
 4681|     69|        return NOT_OK;
  ------------------
  |  |  116|     69|#define NOT_OK    -1
  ------------------
 4682|     69|    }
 4683|       |
 4684|   133k|    ps_curr_lyr_res_prms->u1_rstrct_res_change_flag = SVCD_TRUE;
  ------------------
  |  |   46|   133k|#define SVCD_TRUE 1
  ------------------
 4685|       |
 4686|   133k|    ps_curr_lyr_res_prms->u1_disable_inter_lyr_dblk_filter_idc =
 4687|   133k|        ps_svc_lyr_dec->s_svc_slice_params.u4_disable_inter_layer_deblk_filter_idc;
 4688|   133k|    ps_curr_lyr_res_prms->i1_inter_lyr_alpha_c0_offset =
 4689|   133k|        ps_svc_lyr_dec->s_svc_slice_params.i4_inter_layer_slice_alpha_c0_offset_div2;
 4690|   133k|    ps_curr_lyr_res_prms->i1_inter_lyr_beta_offset =
 4691|   133k|        ps_svc_lyr_dec->s_svc_slice_params.i4_inter_layer_slice_beta_offset_div2;
 4692|   133k|    ps_curr_lyr_res_prms->i1_constrained_intra_rsmpl_flag =
 4693|   133k|        ps_svc_lyr_dec->s_svc_slice_params.u1_constrained_intra_resampling_flag;
 4694|   133k|    ps_curr_lyr_res_prms->i1_ref_lyr_chroma_phase_x_plus1_flag =
 4695|   133k|        ps_svc_lyr_dec->ps_cur_subset_sps->s_sps_svc_ext.u1_seq_ref_layer_chroma_phase_x_plus1_flag;
 4696|   133k|    ps_curr_lyr_res_prms->i1_ref_lyr_chroma_phase_y_plus1 =
 4697|   133k|        ps_svc_lyr_dec->ps_cur_subset_sps->s_sps_svc_ext.u1_seq_ref_layer_chroma_phase_y_plus1;
 4698|   133k|    ps_curr_lyr_res_prms->u1_direct_8x8_inference_flag =
 4699|   133k|        ps_dec->ps_cur_sps->u1_direct_8x8_inference_flag;
 4700|       |
 4701|   133k|    ps_curr_lyr_res_prms->u1_remap_req_flag = 1;
 4702|   133k|    ps_curr_lyr_res_prms->u1_dyadic_flag = ps_svc_lyr_dec->u1_dyadic_flag;
 4703|       |
 4704|       |    /* Derive the reference layer width and height */
 4705|       |
 4706|   133k|    if(SVCD_TRUE != ps_svc_lyr_dec->u1_base_res_flag)
  ------------------
  |  |   46|   133k|#define SVCD_TRUE 1
  ------------------
  |  Branch (4706:8): [True: 34.4k, False: 98.8k]
  ------------------
 4707|  34.4k|    {
 4708|  34.4k|        WORD32 i4_ref_lyr_width;
 4709|  34.4k|        WORD32 i4_ref_lyr_ht;
 4710|  34.4k|        WORD32 i4_dyadic_flag = SVCD_FALSE;
  ------------------
  |  |   45|  34.4k|#define SVCD_FALSE 0
  ------------------
 4711|  34.4k|        i4_ref_lyr_width = ps_svc_ref_lyr_dec->s_res_prms.i4_res_width;
 4712|  34.4k|        i4_ref_lyr_ht = ps_svc_ref_lyr_dec->s_res_prms.i4_res_height;
 4713|       |
 4714|       |        /* set the Restricted Spatial Resolution change flag */
 4715|  34.4k|        ps_curr_lyr_res_prms->u1_rstrct_res_change_flag = SVCD_TRUE;
  ------------------
  |  |   46|  34.4k|#define SVCD_TRUE 1
  ------------------
 4716|       |
 4717|  34.4k|        if(0 == ((ps_curr_lyr_res_prms->u2_scaled_ref_width == i4_ref_lyr_width) ||
  ------------------
  |  Branch (4717:12): [True: 17.6k, False: 16.8k]
  |  Branch (4717:18): [True: 95, False: 34.3k]
  ------------------
 4718|  34.3k|                 (ps_curr_lyr_res_prms->u2_scaled_ref_width == (i4_ref_lyr_width << 1))))
  ------------------
  |  Branch (4718:18): [True: 16.7k, False: 17.6k]
  ------------------
 4719|  17.6k|        {
 4720|  17.6k|            ps_curr_lyr_res_prms->u1_rstrct_res_change_flag = SVCD_FALSE;
  ------------------
  |  |   45|  17.6k|#define SVCD_FALSE 0
  ------------------
 4721|  17.6k|        }
 4722|       |
 4723|  34.4k|        if(0 == ((ps_curr_lyr_res_prms->u2_scaled_ref_height == i4_ref_lyr_ht) ||
  ------------------
  |  Branch (4723:12): [True: 17.5k, False: 16.8k]
  |  Branch (4723:18): [True: 208, False: 34.2k]
  ------------------
 4724|  34.2k|                 (ps_curr_lyr_res_prms->u2_scaled_ref_height == (i4_ref_lyr_ht << 1))))
  ------------------
  |  Branch (4724:18): [True: 16.6k, False: 17.5k]
  ------------------
 4725|  17.5k|        {
 4726|  17.5k|            ps_curr_lyr_res_prms->u1_rstrct_res_change_flag = SVCD_FALSE;
  ------------------
  |  |   45|  17.5k|#define SVCD_FALSE 0
  ------------------
 4727|  17.5k|        }
 4728|       |
 4729|  34.4k|        if(0 != (ps_curr_lyr_res_prms->s_ref_lyr_scaled_offset.i2_left & 15))
  ------------------
  |  Branch (4729:12): [True: 0, False: 34.4k]
  ------------------
 4730|      0|        {
 4731|      0|            ps_curr_lyr_res_prms->u1_rstrct_res_change_flag = SVCD_FALSE;
  ------------------
  |  |   45|      0|#define SVCD_FALSE 0
  ------------------
 4732|      0|        }
 4733|       |
 4734|  34.4k|        if(0 != (ps_curr_lyr_res_prms->s_ref_lyr_scaled_offset.i2_top & 15))
  ------------------
  |  Branch (4734:12): [True: 0, False: 34.4k]
  ------------------
 4735|      0|        {
 4736|      0|            ps_curr_lyr_res_prms->u1_rstrct_res_change_flag = SVCD_FALSE;
  ------------------
  |  |   45|      0|#define SVCD_FALSE 0
  ------------------
 4737|      0|        }
 4738|       |
 4739|       |        /* populate the dyadic status */
 4740|       |
 4741|  34.4k|        if((ps_curr_lyr_res_prms->u2_scaled_ref_width == (i4_ref_lyr_width << 1)) &&
  ------------------
  |  Branch (4741:12): [True: 16.7k, False: 17.7k]
  ------------------
 4742|  16.7k|           (ps_curr_lyr_res_prms->u2_scaled_ref_height == (i4_ref_lyr_ht << 1)))
  ------------------
  |  Branch (4742:12): [True: 16.6k, False: 77]
  ------------------
 4743|  16.6k|        {
 4744|  16.6k|            i4_dyadic_flag = SVCD_TRUE;
  ------------------
  |  |   46|  16.6k|#define SVCD_TRUE 1
  ------------------
 4745|  16.6k|        }
 4746|  17.7k|        else if((ps_curr_lyr_res_prms->u2_scaled_ref_width != ((i4_ref_lyr_width * 3) >> 1)) ||
  ------------------
  |  Branch (4746:17): [True: 183, False: 17.6k]
  ------------------
 4747|  17.6k|                (ps_curr_lyr_res_prms->u2_scaled_ref_height != ((i4_ref_lyr_ht * 3) >> 1)))
  ------------------
  |  Branch (4747:17): [True: 256, False: 17.3k]
  ------------------
 4748|    439|        {
 4749|    439|            ps_curr_lyr_res_prms->u1_dyadic_flag = i4_dyadic_flag;
 4750|    439|            ps_svc_lyr_dec->u1_dyadic_flag = ps_curr_lyr_res_prms->u1_dyadic_flag;
 4751|    439|            return NOT_OK;
  ------------------
  |  |  116|    439|#define NOT_OK    -1
  ------------------
 4752|    439|        }
 4753|       |
 4754|       |        /* check if cropping is MB aligned */
 4755|  34.0k|        if(SVCD_TRUE == i4_dyadic_flag)
  ------------------
  |  |   46|  34.0k|#define SVCD_TRUE 1
  ------------------
  |  Branch (4755:12): [True: 16.6k, False: 17.3k]
  ------------------
 4756|  16.6k|        {
 4757|  16.6k|            if((0 != (ps_curr_lyr_res_prms->s_ref_lyr_scaled_offset.i2_left & 15)) ||
  ------------------
  |  Branch (4757:16): [True: 0, False: 16.6k]
  ------------------
 4758|  16.6k|               (0 != (ps_curr_lyr_res_prms->s_ref_lyr_scaled_offset.i2_top & 15)))
  ------------------
  |  Branch (4758:16): [True: 0, False: 16.6k]
  ------------------
 4759|      0|            {
 4760|      0|                i4_dyadic_flag = SVCD_FALSE;
  ------------------
  |  |   45|      0|#define SVCD_FALSE 0
  ------------------
 4761|      0|            }
 4762|  16.6k|        }
 4763|       |
 4764|  34.0k|        ps_curr_lyr_res_prms->u1_dyadic_flag = i4_dyadic_flag;
 4765|  34.0k|        ps_svc_lyr_dec->u1_dyadic_flag = ps_curr_lyr_res_prms->u1_dyadic_flag;
 4766|  34.0k|    }
 4767|       |
 4768|   132k|    {
 4769|   132k|        inter_lyr_mb_prms_t *ps_tmp_prms, *ps_tmp_prms_2;
 4770|   132k|        inter_lyr_mb_prms_t *ps_ref_mb_prms;
 4771|   132k|        WORD32 i4_stride;
 4772|   132k|        WORD32 i4_ht_in_mbs, i4_wd_in_mbs;
 4773|   132k|        WORD32 i4_i;
 4774|       |
 4775|       |        /* Derive the reference mb mode map */
 4776|       |
 4777|   132k|        ps_ref_mb_prms = ps_svc_lyr_dec->ps_inter_lyr_mb_prms_frm_start;
 4778|   132k|        i4_stride = ps_svc_lyr_dec->u2_inter_lyr_mb_prms_stride;
 4779|       |
 4780|   132k|        i4_ht_in_mbs = ps_dec->u2_frm_ht_in_mbs;
 4781|   132k|        i4_wd_in_mbs = ps_dec->u2_frm_wd_in_mbs;
 4782|       |
 4783|       |        /* Set the first border row to 0xFF */
 4784|   132k|        ps_tmp_prms = (ps_ref_mb_prms - 1 - i4_stride);
 4785|       |
 4786|   132k|        memset(ps_svc_lyr_dec->ps_inter_lyr_mb_prms_base, -1,
 4787|   132k|               ps_svc_lyr_dec->u4_inter_lyr_mb_prms_size);
 4788|   132k|        memset(ps_svc_lyr_dec->pu1_svc_base_mode_flag, 0,
 4789|   132k|               ps_svc_lyr_dec->i4_frm_svc_base_mode_cabac_size);
 4790|       |
 4791|  1.19M|        for(i4_i = 0; i4_i < (i4_wd_in_mbs + 2); i4_i++)
  ------------------
  |  Branch (4791:23): [True: 1.06M, False: 132k]
  ------------------
 4792|  1.06M|        {
 4793|  1.06M|            ps_tmp_prms->i1_mb_mode = (WORD8) 0xFF;
 4794|  1.06M|            ps_tmp_prms += 1;
 4795|  1.06M|        }
 4796|       |
 4797|       |        /* Set the left and right border pixels of each row to 0 */
 4798|   132k|        ps_tmp_prms = ps_ref_mb_prms - 1;
 4799|       |
 4800|  2.43M|        for(i4_i = 0; i4_i < i4_ht_in_mbs; i4_i++)
  ------------------
  |  Branch (4800:23): [True: 2.30M, False: 132k]
  ------------------
 4801|  2.30M|        {
 4802|  2.30M|            ps_tmp_prms->i1_mb_mode = (WORD8) 0xFF;
 4803|  2.30M|            ps_tmp_prms_2 = ps_tmp_prms + (i4_wd_in_mbs + 1);
 4804|  2.30M|            ps_tmp_prms_2->i1_mb_mode = (WORD8) 0xFF;
 4805|  2.30M|            ps_tmp_prms += i4_stride;
 4806|  2.30M|        }
 4807|       |
 4808|       |        /* Set the last border row to 0xFF */
 4809|  1.19M|        for(i4_i = 0; i4_i < (i4_wd_in_mbs + 2); i4_i++)
  ------------------
  |  Branch (4809:23): [True: 1.06M, False: 132k]
  ------------------
 4810|  1.06M|        {
 4811|  1.06M|            ps_tmp_prms->i1_mb_mode = (WORD8) 0xFF;
 4812|  1.06M|            ps_tmp_prms += 1;
 4813|  1.06M|        }
 4814|   132k|    }
 4815|       |
 4816|       |    /* reset residual luma, chroma buffer*/
 4817|   132k|    memset(ps_svc_lyr_dec->pi2_il_residual_resample_luma_base, 0,
 4818|   132k|           ps_svc_lyr_dec->u4_residual_resample_luma_size);
 4819|   132k|    memset(ps_svc_lyr_dec->pi2_il_residual_resample_chroma_base, 0,
 4820|   132k|           ps_svc_lyr_dec->u4_residual_resample_chroma_size);
 4821|       |
 4822|   132k|    return OK;
  ------------------
  |  |  114|   132k|#define OK        0
  ------------------
 4823|   133k|}
isvcd_crop_wnd_flag_res_int:
 4850|   132k|{
 4851|   132k|    UWORD8 *pu1_crop_wnd_flag;
 4852|   132k|    WORD32 i4_num_mbs;
 4853|   132k|    WORD32 i4_crop_mbs_x;
 4854|   132k|    WORD32 i4_crop_mbs_y;
 4855|   132k|    WORD32 i4_cnt;
 4856|   132k|    WORD32 i4_left_offset, i4_rt_offset;
 4857|   132k|    WORD32 i4_top_offset, i4_bot_offset;
 4858|   132k|    WORD32 i4_frm_wd_in_mbs;
 4859|   132k|    WORD32 i4_frm_ht_in_mbs;
 4860|   132k|    dec_struct_t *ps_dec;
 4861|   132k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec;
 4862|   132k|    res_prms_t *ps_res_prms;
 4863|       |
 4864|   132k|    ps_svc_lyr_dec = (svc_dec_lyr_struct_t *) pv_svc_dec;
 4865|   132k|    ps_dec = &ps_svc_lyr_dec->s_dec;
 4866|   132k|    ps_res_prms = &ps_svc_lyr_dec->s_res_prms;
 4867|   132k|    i4_frm_wd_in_mbs = ps_dec->u2_frm_wd_in_mbs;
 4868|   132k|    i4_frm_ht_in_mbs = ps_dec->u2_frm_ht_in_mbs;
 4869|       |
 4870|       |    /* Initializations */
 4871|   132k|    pu1_crop_wnd_flag = ps_svc_lyr_dec->pu1_crop_wnd_flag;
 4872|   132k|    i4_num_mbs = i4_frm_wd_in_mbs * i4_frm_ht_in_mbs;
 4873|       |
 4874|       |    /* bottom most layer in a resolution */
 4875|   132k|    if(ps_res_prms->s_ref_lyr_scaled_offset.i2_left >= 0)
  ------------------
  |  Branch (4875:8): [True: 132k, False: 0]
  ------------------
 4876|   132k|    {
 4877|       |        /* check for offset greater than 0 */
 4878|   132k|        i4_left_offset = (ps_res_prms->s_ref_lyr_scaled_offset.i2_left + 15) >> 4;
 4879|   132k|    }
 4880|      0|    else
 4881|      0|    {
 4882|       |        /* if negative set it to 0*/
 4883|      0|        i4_left_offset = 0;
 4884|      0|    }
 4885|       |
 4886|   132k|    if(ps_res_prms->s_ref_lyr_scaled_offset.i2_rt >= 0)
  ------------------
  |  Branch (4886:8): [True: 132k, False: 0]
  ------------------
 4887|   132k|    {
 4888|       |        /* check for offset greater than 0 */
 4889|   132k|        i4_rt_offset =
 4890|   132k|            (ps_res_prms->i4_res_width - ps_res_prms->s_ref_lyr_scaled_offset.i2_rt) >> 4;
 4891|   132k|    }
 4892|      0|    else
 4893|      0|    {
 4894|       |        /* if negative set it to framewidth in MBs */
 4895|      0|        i4_rt_offset = (ps_res_prms->i4_res_width >> 4);
 4896|      0|    }
 4897|       |
 4898|   132k|    if(ps_res_prms->s_ref_lyr_scaled_offset.i2_top >= 0)
  ------------------
  |  Branch (4898:8): [True: 132k, False: 0]
  ------------------
 4899|   132k|    {
 4900|       |        /* check for offset greater than 0 */
 4901|   132k|        i4_top_offset = (ps_res_prms->s_ref_lyr_scaled_offset.i2_top + 15) >> 4;
 4902|   132k|    }
 4903|      0|    else
 4904|      0|    {
 4905|       |        /* if negative set it to 0 */
 4906|      0|        i4_top_offset = 0;
 4907|      0|    }
 4908|       |
 4909|   132k|    if(ps_res_prms->s_ref_lyr_scaled_offset.i2_bot >= 0)
  ------------------
  |  Branch (4909:8): [True: 132k, False: 67]
  ------------------
 4910|   132k|    {
 4911|       |        /* check for offset greater than 0 */
 4912|   132k|        i4_bot_offset =
 4913|   132k|            (ps_res_prms->i4_res_height - ps_res_prms->s_ref_lyr_scaled_offset.i2_bot) >> 4;
 4914|   132k|    }
 4915|     67|    else
 4916|     67|    {
 4917|       |        /* if negative set it to frameheight in MBs */
 4918|     67|        i4_bot_offset = (ps_res_prms->i4_res_height >> 4);
 4919|     67|    }
 4920|       |
 4921|   132k|    i4_crop_mbs_x = i4_rt_offset - i4_left_offset;
 4922|   132k|    i4_crop_mbs_y = i4_bot_offset - i4_top_offset;
 4923|       |
 4924|       |    /* Set crop window flag to 0 for all mbs */
 4925|   132k|    memset(pu1_crop_wnd_flag, 0, i4_num_mbs);
 4926|       |
 4927|   132k|    pu1_crop_wnd_flag += (i4_frm_wd_in_mbs * i4_top_offset);
 4928|   132k|    pu1_crop_wnd_flag += i4_left_offset;
 4929|       |    /* Loop over MBs in crop window */
 4930|  2.43M|    for(i4_cnt = 0; i4_cnt < i4_crop_mbs_y; i4_cnt++)
  ------------------
  |  Branch (4930:21): [True: 2.30M, False: 132k]
  ------------------
 4931|  2.30M|    {
 4932|  2.30M|        memset(pu1_crop_wnd_flag, 1, i4_crop_mbs_x);
 4933|  2.30M|        pu1_crop_wnd_flag += i4_frm_wd_in_mbs;
 4934|  2.30M|    }
 4935|   132k|}
isvcd_intra_resamp_res_init_update_flags:
 4963|  26.2k|{
 4964|  26.2k|    intra_sampling_ctxt_t *ps_ctxt;
 4965|  26.2k|    intra_samp_lyr_ctxt *ps_lyr_ctxt;
 4966|  26.2k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec = (svc_dec_lyr_struct_t *) pv_svc_dec;
 4967|       |
 4968|  26.2k|    ps_ctxt = (intra_sampling_ctxt_t *) ps_svc_lyr_dec->pv_intra_sample_ctxt;;
 4969|       |    /* get the current layer ctxt */
 4970|  26.2k|    ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[ps_svc_lyr_dec->u1_layer_id - 1];
 4971|       |
 4972|  26.2k|    ps_lyr_ctxt->i1_constrained_intra_rsmpl_flag =
 4973|  26.2k|        ps_svc_lyr_dec->s_svc_slice_params.u1_constrained_intra_resampling_flag;
 4974|  26.2k|}
isvcd_intra_resamp_res_init:
 5002|   132k|{
 5003|   132k|    intra_sampling_ctxt_t *ps_ctxt;
 5004|   132k|    intra_samp_lyr_ctxt *ps_lyr_ctxt;
 5005|   132k|    dec_svc_seq_params_t *ps_cur_subset_sps;
 5006|   132k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec = (svc_dec_lyr_struct_t *) pv_svc_dec;
 5007|   132k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
 5008|   132k|    dec_slice_svc_ext_params_t *ps_svc_slice_params = NULL;
 5009|       |
 5010|   132k|    void *pv_intra_samp_ctxt = ps_svc_lyr_dec->pv_intra_sample_ctxt;
 5011|   132k|    res_prms_t *ps_curr_lyr_res_prms = &ps_svc_lyr_dec->s_res_prms;
 5012|   132k|    ref_mb_map_t **pps_luma_map_horz = &ps_svc_lyr_dec->ps_intsam_luma_map_horz;
 5013|   132k|    ref_mb_map_t **pps_chroma_map_horz = &ps_svc_lyr_dec->ps_intsam_chroma_map_horz;
 5014|   132k|    ref_mb_map_t **pps_luma_map_vert = &ps_svc_lyr_dec->ps_intsam_luma_map_vert;
 5015|   132k|    ref_mb_map_t **pps_chroma_map_vert = &ps_svc_lyr_dec->ps_intsam_chroma_map_vert;
 5016|       |
 5017|   132k|    ps_svc_slice_params = &ps_svc_lyr_dec->s_svc_slice_params;
 5018|   132k|    ps_cur_subset_sps = ps_svc_lyr_dec->ps_cur_subset_sps;
 5019|       |
 5020|   132k|    ps_ctxt = (intra_sampling_ctxt_t *) pv_intra_samp_ctxt;
 5021|       |
 5022|       |    /* if called for base resolution store default values */
 5023|   132k|    if(SVCD_TRUE == ps_svc_lyr_dec->u1_base_res_flag)
  ------------------
  |  |   46|   132k|#define SVCD_TRUE 1
  ------------------
  |  Branch (5023:8): [True: 98.8k, False: 34.0k]
  ------------------
 5024|  98.8k|    {
 5025|  98.8k|        *pps_luma_map_horz = NULL;
 5026|  98.8k|        *pps_chroma_map_horz = NULL;
 5027|  98.8k|        *pps_luma_map_vert = NULL;
 5028|  98.8k|        *pps_chroma_map_vert = NULL;
 5029|  98.8k|        ps_ctxt->i4_res_lyr_id = -1;
 5030|  98.8k|        ps_ctxt->i4_ref_width = ps_dec->u2_pic_wd;
 5031|  98.8k|        ps_ctxt->i4_ref_height = ps_dec->u2_pic_ht;
 5032|       |
 5033|       |        /* Note: The stride option is provided for bringing in data at NMB */
 5034|       |        /* level. Hence to set a NMB level stride refSample array buffer   */
 5035|       |        /* have to be increased                                            */
 5036|  98.8k|        ps_ctxt->i4_refarray_stride = REF_ARRAY_WIDTH;
  ------------------
  |  |   73|  98.8k|#define REF_ARRAY_WIDTH 48
  ------------------
 5037|  98.8k|        return OK;
  ------------------
  |  |  114|  98.8k|#define OK        0
  ------------------
 5038|  98.8k|    }
 5039|       |
 5040|       |    /* derive the current sps */
 5041|       |    /* store the res id appropriately */
 5042|  34.0k|    ps_ctxt->i4_res_lyr_id = ps_svc_lyr_dec->u1_layer_id - 1;
 5043|       |
 5044|       |    /* store the resolution params */
 5045|  34.0k|    ps_ctxt->ps_res_prms = ps_curr_lyr_res_prms;
 5046|       |
 5047|       |    /* get the current layer ctxt */
 5048|  34.0k|    ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[ps_svc_lyr_dec->u1_layer_id - 1];
 5049|       |
 5050|  34.0k|    ps_ctxt->i4_res_lyr_id = ps_svc_lyr_dec->u1_layer_id - 1;
 5051|       |    /* get the width and heights */
 5052|  34.0k|    ps_lyr_ctxt->i4_curr_width = ps_dec->u2_pic_wd;
 5053|  34.0k|    ps_lyr_ctxt->i4_curr_height = ps_dec->u2_pic_ht;
 5054|  34.0k|    ps_lyr_ctxt->i4_ref_width = ps_ctxt->i4_ref_width;
 5055|  34.0k|    ps_lyr_ctxt->i4_ref_height = ps_ctxt->i4_ref_height;
 5056|  34.0k|    ps_lyr_ctxt->i1_constrained_intra_rsmpl_flag =
 5057|  34.0k|        ps_svc_slice_params->u1_constrained_intra_resampling_flag;
 5058|       |
 5059|       |    /* store the structure pointer containing projected locations */
 5060|  34.0k|    *pps_luma_map_horz = ps_lyr_ctxt->s_luma_map_ctxt.ps_x_offset_length;
 5061|  34.0k|    *pps_chroma_map_horz = ps_lyr_ctxt->s_chroma_map_ctxt.ps_x_offset_length;
 5062|  34.0k|    *pps_luma_map_vert = ps_lyr_ctxt->s_luma_map_ctxt.ps_y_offset_length;
 5063|  34.0k|    *pps_chroma_map_vert = ps_lyr_ctxt->s_chroma_map_ctxt.ps_y_offset_length;
 5064|       |
 5065|       |    /* check for recomputation of mapping required */
 5066|  34.0k|    if(SVCD_TRUE == ps_curr_lyr_res_prms->u1_remap_req_flag)
  ------------------
  |  |   46|  34.0k|#define SVCD_TRUE 1
  ------------------
  |  Branch (5066:8): [True: 34.0k, False: 0]
  ------------------
 5067|  34.0k|    {
 5068|  34.0k|        res_prms_t s_ref_res_prms = {0};
 5069|  34.0k|        WORD32 i4_chroma_x_phase, i4_chroma_y_phase;
 5070|  34.0k|        WORD32 i4_ref_chroma_x_phase, i4_ref_chroma_y_phase;
 5071|  34.0k|        WORD32 i4_x_phase_0, i4_x_phase_1;
 5072|  34.0k|        WORD32 i4_y_phase_0, i4_y_phase_1;
 5073|  34.0k|        WORD32 i4_vert_flag;
 5074|       |
 5075|       |        /* store the reference layer resolution width and height */
 5076|  34.0k|        s_ref_res_prms.i4_res_width = ps_ctxt->i4_ref_width;
 5077|  34.0k|        s_ref_res_prms.i4_res_height = ps_ctxt->i4_ref_height;
 5078|       |
 5079|       |        /* call the frame level projections calculation function */
 5080|  34.0k|        isvcd_intra_resamp_populate_list(&ps_lyr_ctxt->s_luma_map_ctxt, ps_curr_lyr_res_prms,
 5081|  34.0k|                                         &s_ref_res_prms, 0, ps_svc_lyr_dec);
 5082|       |
 5083|  34.0k|        isvcd_intra_resamp_populate_list(&ps_lyr_ctxt->s_chroma_map_ctxt, ps_curr_lyr_res_prms,
 5084|  34.0k|                                         &s_ref_res_prms, 1, ps_svc_lyr_dec);
 5085|       |
 5086|       |        /* Compute the chroma xPhase and yPhase values */
 5087|  34.0k|        if(1 == ps_curr_lyr_res_prms->u1_dyadic_flag)
  ------------------
  |  Branch (5087:12): [True: 16.6k, False: 17.3k]
  ------------------
 5088|  16.6k|        {
 5089|  16.6k|            i4_ref_chroma_x_phase = ps_curr_lyr_res_prms->i1_ref_lyr_chroma_phase_x_plus1_flag;
 5090|  16.6k|            i4_ref_chroma_y_phase = ps_curr_lyr_res_prms->i1_ref_lyr_chroma_phase_y_plus1;
 5091|  16.6k|            i4_chroma_x_phase = ps_cur_subset_sps->s_sps_svc_ext.u1_chroma_phase_x_plus1_flag;
 5092|  16.6k|            i4_chroma_y_phase = ps_cur_subset_sps->s_sps_svc_ext.u1_chroma_phase_y_plus1;
 5093|       |
 5094|  16.6k|            i4_x_phase_0 = i4_chroma_x_phase - (i4_ref_chroma_x_phase << 1);
 5095|  16.6k|            i4_x_phase_1 = (3 + i4_x_phase_0) & 0x7;
 5096|  16.6k|            i4_x_phase_0 += 7;
 5097|  16.6k|            i4_x_phase_0 &= 0x7;
 5098|  16.6k|            i4_y_phase_0 = i4_chroma_y_phase - (i4_ref_chroma_y_phase << 1);
 5099|  16.6k|            i4_y_phase_1 = (3 + i4_y_phase_0) & 0x7;
 5100|  16.6k|            i4_y_phase_0 += 7;
 5101|  16.6k|            i4_y_phase_0 &= 0x7;
 5102|       |
 5103|  16.6k|            ps_lyr_ctxt->i4_x_phase_0 = i4_x_phase_0;
 5104|  16.6k|            ps_lyr_ctxt->i4_x_phase_1 = i4_x_phase_1;
 5105|  16.6k|            ps_lyr_ctxt->i4_y_phase_0 = i4_y_phase_0;
 5106|  16.6k|            ps_lyr_ctxt->i4_y_phase_1 = i4_y_phase_1;
 5107|       |
 5108|       |            /* Choose the appropriate chroma interpolation functions */
 5109|  16.6k|            if((0 == i4_ref_chroma_x_phase) && (1 == i4_chroma_x_phase))
  ------------------
  |  Branch (5109:16): [True: 4.95k, False: 11.6k]
  |  Branch (5109:48): [True: 1.06k, False: 3.88k]
  ------------------
 5110|  1.06k|            {
 5111|  1.06k|                ps_lyr_ctxt->pf_horz_chroma_interpol = ps_ctxt->pf_horz_chroma_interpol[1];
 5112|  1.06k|            }
 5113|  15.5k|            else
 5114|  15.5k|            {
 5115|  15.5k|                ps_lyr_ctxt->pf_horz_chroma_interpol = ps_ctxt->pf_horz_chroma_interpol[0];
 5116|  15.5k|            }
 5117|       |
 5118|  16.6k|            i4_vert_flag = 0;
 5119|  16.6k|            if(0 == i4_ref_chroma_y_phase)
  ------------------
  |  Branch (5119:16): [True: 2.28k, False: 14.3k]
  ------------------
 5120|  2.28k|            {
 5121|  2.28k|                if((1 == i4_chroma_y_phase) || (2 == i4_chroma_y_phase))
  ------------------
  |  Branch (5121:20): [True: 73, False: 2.21k]
  |  Branch (5121:48): [True: 171, False: 2.03k]
  ------------------
 5122|    244|                {
 5123|    244|                    i4_vert_flag = 1;
 5124|    244|                }
 5125|  2.28k|            }
 5126|  14.3k|            else if((2 == i4_ref_chroma_y_phase) && (0 == i4_chroma_y_phase))
  ------------------
  |  Branch (5126:21): [True: 2.68k, False: 11.6k]
  |  Branch (5126:53): [True: 673, False: 2.00k]
  ------------------
 5127|    673|            {
 5128|    673|                i4_vert_flag = 2;
 5129|    673|            }
 5130|       |
 5131|  16.6k|            if(1 == i4_vert_flag)
  ------------------
  |  Branch (5131:16): [True: 244, False: 16.4k]
  ------------------
 5132|    244|            {
 5133|    244|                ps_lyr_ctxt->pf_vert_chroma_interpol = ps_ctxt->pf_vert_chroma_interpol[1];
 5134|    244|            }
 5135|  16.4k|            else if(2 == i4_vert_flag)
  ------------------
  |  Branch (5135:21): [True: 673, False: 15.7k]
  ------------------
 5136|    673|            {
 5137|    673|                ps_lyr_ctxt->pf_vert_chroma_interpol = ps_ctxt->pf_vert_chroma_interpol[2];
 5138|    673|            }
 5139|  15.7k|            else
 5140|  15.7k|            {
 5141|  15.7k|                ps_lyr_ctxt->pf_vert_chroma_interpol = ps_ctxt->pf_vert_chroma_interpol[0];
 5142|  15.7k|            }
 5143|  16.6k|        }
 5144|  34.0k|    }
 5145|      0|    else
 5146|      0|    {
 5147|       |        /* should take false value */
 5148|      0|        if(SVCD_FALSE != ps_curr_lyr_res_prms->u1_remap_req_flag)
  ------------------
  |  |   45|      0|#define SVCD_FALSE 0
  ------------------
  |  Branch (5148:12): [True: 0, False: 0]
  ------------------
 5149|      0|        {
 5150|      0|            return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 5151|      0|        }
 5152|      0|    }
 5153|       |
 5154|       |    /* store the current layer width and height to context */
 5155|  34.0k|    ps_ctxt->i4_ref_width = ps_curr_lyr_res_prms->i4_res_width;
 5156|  34.0k|    ps_ctxt->i4_ref_height = ps_curr_lyr_res_prms->i4_res_height;
 5157|       |
 5158|  34.0k|    return OK;
  ------------------
  |  |  114|  34.0k|#define OK        0
  ------------------
 5159|  34.0k|}

isvcd_intra_resamp.c:isvcd_left_most_bit_detect:
   98|   552k|{
   99|   552k|    WORD32 i4_number = 0;
  100|   552k|    if(0xff == u4_num)
  ------------------
  |  Branch (100:8): [True: 0, False: 552k]
  ------------------
  101|      0|    {
  102|      0|        return 32;
  103|      0|    }
  104|       |
  105|   552k|    do
  106|   819k|    {
  107|   819k|        if(0 == (u4_num & 0x80000000))
  ------------------
  |  Branch (107:12): [True: 552k, False: 267k]
  ------------------
  108|   552k|        {
  109|   552k|            return i4_number;
  110|   552k|        }
  111|   267k|        u4_num <<= 1;
  112|   267k|        i4_number++;
  113|   267k|    } while(1);
  ------------------
  |  Branch (113:13): [True: 267k, Folded]
  ------------------
  114|   552k|}

isvcd_iquant_itrans_4x4:
   87|  47.0k|{
   88|  47.0k|    WORD16 *pi2_src_ptr = pi2_src;
   89|  47.0k|    WORD16 *pi2_tmp_ptr = pi2_tmp;
   90|  47.0k|    WORD16 *pi2_out_ptr = pi2_out;
   91|  47.0k|    WORD16 x0, x1, x2, x3, i;
   92|  47.0k|    WORD32 q0, q1, q2, q3;
   93|  47.0k|    WORD16 i_macro;
   94|  47.0k|    WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
  ------------------
  |  Branch (94:23): [True: 33.2k, False: 13.7k]
  ------------------
   95|       |
   96|       |    /* inverse quant */
   97|       |    /*horizontal inverse transform */
   98|   235k|    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
  ------------------
  |  |   48|   235k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  |  Branch (98:16): [True: 188k, False: 47.0k]
  ------------------
   99|   188k|    {
  100|   188k|        q0 = pi2_src_ptr[0];
  101|   188k|        INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|   188k|                {\
  |  |  104|   188k|                    i4_value *= quant_scale;\
  |  |  105|   188k|                    i4_value *= weight_scale;\
  |  |  106|   188k|                    i4_value += rndfactor;\
  |  |  107|   188k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|   188k|                    i4_value >>= qbits;\
  |  |  109|   188k|                }
  ------------------
  102|   188k|        if(i == 0 && iq_start_idx == 1)
  ------------------
  |  Branch (102:12): [True: 47.0k, False: 141k]
  |  Branch (102:22): [True: 0, False: 47.0k]
  ------------------
  103|      0|            q0 = pi2_dc_ld_addr[0];  // Restoring dc value for intra case
  104|       |
  105|   188k|        q2 = pi2_src_ptr[2];
  106|   188k|        INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|   188k|                {\
  |  |  104|   188k|                    i4_value *= quant_scale;\
  |  |  105|   188k|                    i4_value *= weight_scale;\
  |  |  106|   188k|                    i4_value += rndfactor;\
  |  |  107|   188k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|   188k|                    i4_value >>= qbits;\
  |  |  109|   188k|                }
  ------------------
  107|       |
  108|   188k|        x0 = q0 + q2;
  109|   188k|        x1 = q0 - q2;
  110|       |
  111|   188k|        q1 = pi2_src_ptr[1];
  112|   188k|        INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|   188k|                {\
  |  |  104|   188k|                    i4_value *= quant_scale;\
  |  |  105|   188k|                    i4_value *= weight_scale;\
  |  |  106|   188k|                    i4_value += rndfactor;\
  |  |  107|   188k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|   188k|                    i4_value >>= qbits;\
  |  |  109|   188k|                }
  ------------------
  113|       |
  114|   188k|        q3 = pi2_src_ptr[3];
  115|   188k|        INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|   188k|                {\
  |  |  104|   188k|                    i4_value *= quant_scale;\
  |  |  105|   188k|                    i4_value *= weight_scale;\
  |  |  106|   188k|                    i4_value += rndfactor;\
  |  |  107|   188k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|   188k|                    i4_value >>= qbits;\
  |  |  109|   188k|                }
  ------------------
  116|       |
  117|   188k|        x2 = (q1 >> 1) - q3;
  118|   188k|        x3 = q1 + (q3 >> 1);
  119|       |
  120|   188k|        pi2_tmp_ptr[0] = x0 + x3;
  121|   188k|        pi2_tmp_ptr[1] = x1 + x2;
  122|   188k|        pi2_tmp_ptr[2] = x1 - x2;
  123|   188k|        pi2_tmp_ptr[3] = x0 - x3;
  124|       |
  125|   188k|        pi2_src_ptr += SUB_BLK_WIDTH_4x4;
  ------------------
  |  |   48|   188k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  126|   188k|        pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
  ------------------
  |  |   48|   188k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  127|   188k|        pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
  ------------------
  |  |   48|   188k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  128|   188k|        pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
  ------------------
  |  |   48|   188k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  129|   188k|    }
  130|       |
  131|       |    /* vertical inverse transform */
  132|  47.0k|    pi2_tmp_ptr = pi2_tmp;
  133|   235k|    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
  ------------------
  |  |   48|   235k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  |  Branch (133:16): [True: 188k, False: 47.0k]
  ------------------
  134|   188k|    {
  135|   188k|        pi2_out = pi2_out_ptr;
  136|       |
  137|   188k|        x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
  138|   188k|        x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
  139|   188k|        x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
  140|   188k|        x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
  141|       |
  142|       |        /* inverse prediction */
  143|   188k|        i_macro = x0 + x3;
  144|   188k|        *pi2_out = CLIP_RSD((i_macro + 32) >> 6);
  ------------------
  |  |  774|   188k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   188k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 4.91k, False: 183k]
  |  |  |  |  |  Branch (77:54): [True: 6.30k, False: 176k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  145|   188k|        pi2_out += out_strd;
  146|       |
  147|   188k|        i_macro = x1 + x2;
  148|   188k|        *pi2_out = CLIP_RSD((i_macro + 32) >> 6);
  ------------------
  |  |  774|   188k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   188k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 4.85k, False: 183k]
  |  |  |  |  |  Branch (77:54): [True: 5.38k, False: 177k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  149|   188k|        pi2_out += out_strd;
  150|       |
  151|   188k|        i_macro = x1 - x2;
  152|   188k|        *pi2_out = CLIP_RSD((i_macro + 32) >> 6);
  ------------------
  |  |  774|   188k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   188k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 4.81k, False: 183k]
  |  |  |  |  |  Branch (77:54): [True: 4.72k, False: 178k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  153|   188k|        pi2_out += out_strd;
  154|       |
  155|   188k|        i_macro = x0 - x3;
  156|   188k|        *pi2_out = CLIP_RSD((i_macro + 32) >> 6);
  ------------------
  |  |  774|   188k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   188k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 4.75k, False: 183k]
  |  |  |  |  |  Branch (77:54): [True: 4.79k, False: 178k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  157|   188k|        pi2_tmp_ptr++;
  158|   188k|        pi2_out_ptr++;
  159|   188k|    }
  160|  47.0k|}
isvcd_iquant_itrans_4x4_dc:
  188|  4.98k|{
  189|  4.98k|    WORD16 *pi2_out_ptr = pi2_out;
  190|  4.98k|    WORD32 q0;
  191|  4.98k|    WORD16 i_macro, i;
  192|  4.98k|    WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
  ------------------
  |  Branch (192:23): [True: 582, False: 4.39k]
  ------------------
  193|  4.98k|    UNUSED(pi2_tmp);
  ------------------
  |  |   45|  4.98k|#define UNUSED(x) ((void)(x))
  ------------------
  194|       |
  195|  4.98k|    if(iq_start_idx == 0)
  ------------------
  |  Branch (195:8): [True: 4.98k, False: 0]
  ------------------
  196|  4.98k|    {
  197|  4.98k|        q0 = pi2_src[0];
  198|  4.98k|        INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|  4.98k|                {\
  |  |  104|  4.98k|                    i4_value *= quant_scale;\
  |  |  105|  4.98k|                    i4_value *= weight_scale;\
  |  |  106|  4.98k|                    i4_value += rndfactor;\
  |  |  107|  4.98k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|  4.98k|                    i4_value >>= qbits;\
  |  |  109|  4.98k|                }
  ------------------
  199|  4.98k|    }
  200|      0|    else
  201|      0|    {
  202|      0|        q0 = pi2_dc_ld_addr[0];  // Restoring dc value for intra case3
  203|      0|    }
  204|  4.98k|    i_macro = CLIP_RSD((q0 + 32) >> 6);
  ------------------
  |  |  774|  4.98k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  4.98k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 575, False: 4.40k]
  |  |  |  |  |  Branch (77:54): [True: 653, False: 3.75k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  205|  24.9k|    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
  ------------------
  |  |   48|  24.9k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  |  Branch (205:16): [True: 19.9k, False: 4.98k]
  ------------------
  206|  19.9k|    {
  207|  19.9k|        pi2_out = pi2_out_ptr;
  208|       |
  209|       |        /* inverse prediction */
  210|  19.9k|        *pi2_out = i_macro;
  211|  19.9k|        pi2_out += out_strd;
  212|  19.9k|        *pi2_out = i_macro;
  213|  19.9k|        pi2_out += out_strd;
  214|  19.9k|        *pi2_out = i_macro;
  215|  19.9k|        pi2_out += out_strd;
  216|  19.9k|        *pi2_out = i_macro;
  217|  19.9k|        pi2_out_ptr++;
  218|  19.9k|    }
  219|  4.98k|}
isvcd_iquant_itrans_chroma_4x4:
  246|  6.71k|{
  247|  6.71k|    WORD16 *pi2_src_ptr = pi2_src;
  248|  6.71k|    WORD16 *pi2_tmp_ptr = pi2_tmp;
  249|  6.71k|    WORD16 *pi2_out_ptr = pi2_out;
  250|  6.71k|    WORD16 x0, x1, x2, x3, i;
  251|  6.71k|    WORD32 q0, q1, q2, q3;
  252|  6.71k|    WORD16 i_macro;
  253|  6.71k|    WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
  ------------------
  |  Branch (253:23): [True: 4.18k, False: 2.52k]
  ------------------
  254|       |
  255|       |    /* inverse quant */
  256|       |    /*horizontal inverse transform */
  257|  33.5k|    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
  ------------------
  |  |   48|  33.5k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  |  Branch (257:16): [True: 26.8k, False: 6.71k]
  ------------------
  258|  26.8k|    {
  259|  26.8k|        if(i == 0)
  ------------------
  |  Branch (259:12): [True: 6.71k, False: 20.1k]
  ------------------
  260|  6.71k|        {
  261|  6.71k|            q0 = pi2_dc_src[0];
  262|  6.71k|        }
  263|  20.1k|        else
  264|  20.1k|        {
  265|  20.1k|            q0 = pi2_src_ptr[0];
  266|  20.1k|            INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|  20.1k|                {\
  |  |  104|  20.1k|                    i4_value *= quant_scale;\
  |  |  105|  20.1k|                    i4_value *= weight_scale;\
  |  |  106|  20.1k|                    i4_value += rndfactor;\
  |  |  107|  20.1k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|  20.1k|                    i4_value >>= qbits;\
  |  |  109|  20.1k|                }
  ------------------
  267|  20.1k|        }
  268|       |
  269|  26.8k|        q2 = pi2_src_ptr[2];
  270|  26.8k|        INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|  26.8k|                {\
  |  |  104|  26.8k|                    i4_value *= quant_scale;\
  |  |  105|  26.8k|                    i4_value *= weight_scale;\
  |  |  106|  26.8k|                    i4_value += rndfactor;\
  |  |  107|  26.8k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|  26.8k|                    i4_value >>= qbits;\
  |  |  109|  26.8k|                }
  ------------------
  271|       |
  272|  26.8k|        x0 = q0 + q2;
  273|  26.8k|        x1 = q0 - q2;
  274|       |
  275|  26.8k|        q1 = pi2_src_ptr[1];
  276|  26.8k|        INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|  26.8k|                {\
  |  |  104|  26.8k|                    i4_value *= quant_scale;\
  |  |  105|  26.8k|                    i4_value *= weight_scale;\
  |  |  106|  26.8k|                    i4_value += rndfactor;\
  |  |  107|  26.8k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|  26.8k|                    i4_value >>= qbits;\
  |  |  109|  26.8k|                }
  ------------------
  277|       |
  278|  26.8k|        q3 = pi2_src_ptr[3];
  279|  26.8k|        INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|  26.8k|                {\
  |  |  104|  26.8k|                    i4_value *= quant_scale;\
  |  |  105|  26.8k|                    i4_value *= weight_scale;\
  |  |  106|  26.8k|                    i4_value += rndfactor;\
  |  |  107|  26.8k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|  26.8k|                    i4_value >>= qbits;\
  |  |  109|  26.8k|                }
  ------------------
  280|       |
  281|  26.8k|        x2 = (q1 >> 1) - q3;
  282|  26.8k|        x3 = q1 + (q3 >> 1);
  283|       |
  284|  26.8k|        pi2_tmp_ptr[0] = x0 + x3;
  285|  26.8k|        pi2_tmp_ptr[1] = x1 + x2;
  286|  26.8k|        pi2_tmp_ptr[2] = x1 - x2;
  287|  26.8k|        pi2_tmp_ptr[3] = x0 - x3;
  288|       |
  289|  26.8k|        pi2_src_ptr += SUB_BLK_WIDTH_4x4;
  ------------------
  |  |   48|  26.8k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  290|  26.8k|        pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
  ------------------
  |  |   48|  26.8k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  291|  26.8k|        pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
  ------------------
  |  |   48|  26.8k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  292|  26.8k|        pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
  ------------------
  |  |   48|  26.8k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  293|  26.8k|    }
  294|       |
  295|       |    /* vertical inverse transform */
  296|  6.71k|    pi2_tmp_ptr = pi2_tmp;
  297|  33.5k|    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
  ------------------
  |  |   48|  33.5k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  |  Branch (297:16): [True: 26.8k, False: 6.71k]
  ------------------
  298|  26.8k|    {
  299|  26.8k|        pi2_out = pi2_out_ptr;
  300|       |
  301|  26.8k|        x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
  302|  26.8k|        x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
  303|  26.8k|        x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
  304|  26.8k|        x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
  305|       |
  306|       |        /* inverse prediction */
  307|  26.8k|        i_macro = x0 + x3;
  308|  26.8k|        *pi2_out = CLIP_RSD((i_macro + 32) >> 6);
  ------------------
  |  |  774|  26.8k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  26.8k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 660, False: 26.1k]
  |  |  |  |  |  Branch (77:54): [True: 656, False: 25.5k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  309|  26.8k|        pi2_out += out_strd;
  310|       |
  311|  26.8k|        i_macro = x1 + x2;
  312|  26.8k|        *pi2_out = CLIP_RSD((i_macro + 32) >> 6);
  ------------------
  |  |  774|  26.8k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  26.8k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 537, False: 26.3k]
  |  |  |  |  |  Branch (77:54): [True: 681, False: 25.6k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  313|  26.8k|        pi2_out += out_strd;
  314|       |
  315|  26.8k|        i_macro = x1 - x2;
  316|  26.8k|        *pi2_out = CLIP_RSD((i_macro + 32) >> 6);
  ------------------
  |  |  774|  26.8k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  26.8k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 652, False: 26.2k]
  |  |  |  |  |  Branch (77:54): [True: 586, False: 25.6k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  317|  26.8k|        pi2_out += out_strd;
  318|       |
  319|  26.8k|        i_macro = x0 - x3;
  320|  26.8k|        *pi2_out = CLIP_RSD((i_macro + 32) >> 6);
  ------------------
  |  |  774|  26.8k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  26.8k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 605, False: 26.2k]
  |  |  |  |  |  Branch (77:54): [True: 732, False: 25.5k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  321|  26.8k|        pi2_tmp_ptr++;
  322|  26.8k|        pi2_out_ptr += 2;  // Interleaved store for output
  323|  26.8k|    }
  324|  6.71k|}
isvcd_iquant_itrans_chroma_4x4_dc:
  351|  20.7k|{
  352|  20.7k|    WORD16 *pi2_out_ptr = pi2_out;
  353|  20.7k|    WORD32 q0;
  354|  20.7k|    WORD16 i_macro, i;
  355|  20.7k|    UNUSED(pi2_src);
  ------------------
  |  |   45|  20.7k|#define UNUSED(x) ((void)(x))
  ------------------
  356|  20.7k|    UNUSED(pu2_iscal_mat);
  ------------------
  |  |   45|  20.7k|#define UNUSED(x) ((void)(x))
  ------------------
  357|  20.7k|    UNUSED(pu2_weigh_mat);
  ------------------
  |  |   45|  20.7k|#define UNUSED(x) ((void)(x))
  ------------------
  358|  20.7k|    UNUSED(u4_qp_div_6);
  ------------------
  |  |   45|  20.7k|#define UNUSED(x) ((void)(x))
  ------------------
  359|  20.7k|    UNUSED(pi2_tmp);
  ------------------
  |  |   45|  20.7k|#define UNUSED(x) ((void)(x))
  ------------------
  360|       |
  361|  20.7k|    q0 = pi2_dc_src[0];  // Restoring dc value for intra case3
  362|  20.7k|    i_macro = CLIP_RSD((q0 + 32) >> 6);
  ------------------
  |  |  774|  20.7k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  20.7k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 351, False: 20.3k]
  |  |  |  |  |  Branch (77:54): [True: 302, False: 20.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  363|       |
  364|   103k|    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
  ------------------
  |  |   48|   103k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  |  Branch (364:16): [True: 82.8k, False: 20.7k]
  ------------------
  365|  82.8k|    {
  366|  82.8k|        pi2_out = pi2_out_ptr;
  367|       |
  368|       |        /* inverse prediction */
  369|  82.8k|        *pi2_out = i_macro;
  370|  82.8k|        pi2_out += out_strd;
  371|       |
  372|  82.8k|        *pi2_out = i_macro;
  373|  82.8k|        pi2_out += out_strd;
  374|       |
  375|  82.8k|        *pi2_out = i_macro;
  376|  82.8k|        pi2_out += out_strd;
  377|       |
  378|  82.8k|        *pi2_out = i_macro;
  379|       |
  380|  82.8k|        pi2_out_ptr += 2;
  381|  82.8k|    }
  382|  20.7k|}
isvcd_iquant_itrans_8x8_dc:
  440|  3.20k|{
  441|  3.20k|    WORD16 *pi2_out_ptr = pi2_out;
  442|  3.20k|    WORD16 i, i_macro;
  443|  3.20k|    WORD32 q;
  444|  3.20k|    WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
  ------------------
  |  Branch (444:23): [True: 1.27k, False: 1.93k]
  ------------------
  445|  3.20k|    UNUSED(pi2_tmp);
  ------------------
  |  |   45|  3.20k|#define UNUSED(x) ((void)(x))
  ------------------
  446|  3.20k|    UNUSED(iq_start_idx);
  ------------------
  |  |   45|  3.20k|#define UNUSED(x) ((void)(x))
  ------------------
  447|  3.20k|    UNUSED(pi2_dc_ld_addr);
  ------------------
  |  |   45|  3.20k|#define UNUSED(x) ((void)(x))
  ------------------
  448|       |    /*************************************************************/
  449|       |    /* Dequantization of coefficients. Will be replaced by SIMD  */
  450|       |    /* operations on platform. Note : DC coeff is not scaled     */
  451|       |    /*************************************************************/
  452|  3.20k|    q = pi2_src[0];
  453|  3.20k|    INV_QUANT(q, pu2_iscale_mat[0], pu2_weigh_mat[0], qp_div, rnd_fact, 6);
  ------------------
  |  |  103|  3.20k|                {\
  |  |  104|  3.20k|                    i4_value *= quant_scale;\
  |  |  105|  3.20k|                    i4_value *= weight_scale;\
  |  |  106|  3.20k|                    i4_value += rndfactor;\
  |  |  107|  3.20k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|  3.20k|                    i4_value >>= qbits;\
  |  |  109|  3.20k|                }
  ------------------
  454|  3.20k|    i_macro = CLIP_RSD((q + 32) >> 6);
  ------------------
  |  |  774|  3.20k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  3.20k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 320, False: 2.88k]
  |  |  |  |  |  Branch (77:54): [True: 439, False: 2.44k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  455|       |    /* Perform Inverse transform */
  456|       |    /*--------------------------------------------------------------------*/
  457|       |    /* IDCT [ Horizontal transformation ]                                 */
  458|       |    /*--------------------------------------------------------------------*/
  459|       |    /*--------------------------------------------------------------------*/
  460|       |    /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6            */
  461|       |    /*                                                                    */
  462|       |    /* Add the prediction and store it back to reconstructed frame buffer */
  463|       |    /* [Prediction buffer itself in this case]                            */
  464|       |    /*--------------------------------------------------------------------*/
  465|  28.8k|    for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
  ------------------
  |  |   53|  28.8k|#define SUB_BLK_WIDTH_8x8                   8
  ------------------
  |  Branch (465:16): [True: 25.6k, False: 3.20k]
  ------------------
  466|  25.6k|    {
  467|  25.6k|        pi2_out = pi2_out_ptr;
  468|       |
  469|  25.6k|        *pi2_out = i_macro;
  470|       |        /* Change uc_recBuffer to Point to next element in the same column*/
  471|  25.6k|        pi2_out += out_strd;
  472|       |
  473|  25.6k|        *pi2_out = i_macro;
  474|  25.6k|        pi2_out += out_strd;
  475|       |
  476|  25.6k|        *pi2_out = i_macro;
  477|  25.6k|        pi2_out += out_strd;
  478|       |
  479|  25.6k|        *pi2_out = i_macro;
  480|  25.6k|        pi2_out += out_strd;
  481|       |
  482|  25.6k|        *pi2_out = i_macro;
  483|  25.6k|        pi2_out += out_strd;
  484|       |
  485|  25.6k|        *pi2_out = i_macro;
  486|  25.6k|        pi2_out += out_strd;
  487|       |
  488|  25.6k|        *pi2_out = i_macro;
  489|  25.6k|        pi2_out += out_strd;
  490|       |
  491|  25.6k|        *pi2_out = i_macro;
  492|       |
  493|  25.6k|        pi2_out_ptr++;
  494|  25.6k|    }
  495|  3.20k|}
isvcd_iquant_itrans_8x8:
  523|  8.49k|{
  524|  8.49k|    WORD32 i;
  525|  8.49k|    WORD16 *pi2_tmp_ptr = pi2_tmp;
  526|  8.49k|    WORD16 *pi2_out_ptr = pi2_out;
  527|  8.49k|    WORD16 i_z0, i_z1, i_z2, i_z3, i_z4, i_z5, i_z6, i_z7;
  528|  8.49k|    WORD16 i_y0, i_y1, i_y2, i_y3, i_y4, i_y5, i_y6, i_y7;
  529|  8.49k|    WORD32 q;
  530|  8.49k|    WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
  ------------------
  |  Branch (530:23): [True: 4.77k, False: 3.72k]
  ------------------
  531|  8.49k|    UNUSED(iq_start_idx);
  ------------------
  |  |   45|  8.49k|#define UNUSED(x) ((void)(x))
  ------------------
  532|  8.49k|    UNUSED(pi2_dc_ld_addr);
  ------------------
  |  |   45|  8.49k|#define UNUSED(x) ((void)(x))
  ------------------
  533|       |    /*************************************************************/
  534|       |    /* De quantization of coefficients. Will be replaced by SIMD */
  535|       |    /* operations on platform. Note : DC coeff is not scaled     */
  536|       |    /*************************************************************/
  537|   552k|    for(i = 0; i < (SUB_BLK_WIDTH_8x8 * SUB_BLK_WIDTH_8x8); i++)
  ------------------
  |  |   53|   552k|#define SUB_BLK_WIDTH_8x8                   8
  ------------------
                  for(i = 0; i < (SUB_BLK_WIDTH_8x8 * SUB_BLK_WIDTH_8x8); i++)
  ------------------
  |  |   53|   552k|#define SUB_BLK_WIDTH_8x8                   8
  ------------------
  |  Branch (537:16): [True: 543k, False: 8.49k]
  ------------------
  538|   543k|    {
  539|   543k|        q = pi2_src[i];
  540|   543k|        INV_QUANT(q, pu2_iscale_mat[i], pu2_weigh_mat[i], qp_div, rnd_fact, 6);
  ------------------
  |  |  103|   543k|                {\
  |  |  104|   543k|                    i4_value *= quant_scale;\
  |  |  105|   543k|                    i4_value *= weight_scale;\
  |  |  106|   543k|                    i4_value += rndfactor;\
  |  |  107|   543k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|   543k|                    i4_value >>= qbits;\
  |  |  109|   543k|                }
  ------------------
  541|   543k|        pi2_tmp_ptr[i] = q;
  542|   543k|    }
  543|       |    /* Perform Inverse transform */
  544|       |    /*--------------------------------------------------------------------*/
  545|       |    /* IDCT [ Horizontal transformation ]                                 */
  546|       |    /*--------------------------------------------------------------------*/
  547|  76.4k|    for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
  ------------------
  |  |   53|  76.4k|#define SUB_BLK_WIDTH_8x8                   8
  ------------------
  |  Branch (547:16): [True: 67.9k, False: 8.49k]
  ------------------
  548|  67.9k|    {
  549|       |        /*------------------------------------------------------------------*/
  550|       |        /* y0 = w0 + w4                                                     */
  551|       |        /* y1 = -w3 + w5 - w7 - (w7 >> 1)                                   */
  552|       |        /* y2 = w0 - w4                                                     */
  553|       |        /* y3 = w1 + w7 - w3 - (w3 >> 1)                                    */
  554|       |        /* y4 = (w2 >> 1) - w6                                              */
  555|       |        /* y5 = -w1 + w7 + w5 + (w5 >> 1)                                   */
  556|       |        /* y6 = w2 + (w6 >> 1)                                              */
  557|       |        /* y7 = w3 + w5 + w1 + (w1 >> 1)                                    */
  558|       |        /*------------------------------------------------------------------*/
  559|  67.9k|        i_y0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[4]);
  560|       |
  561|  67.9k|        i_y1 =
  562|  67.9k|            ((WORD32) (-pi2_tmp_ptr[3]) + pi2_tmp_ptr[5] - pi2_tmp_ptr[7] - (pi2_tmp_ptr[7] >> 1));
  563|       |
  564|  67.9k|        i_y2 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[4]);
  565|       |
  566|  67.9k|        i_y3 = ((WORD32) pi2_tmp_ptr[1] + pi2_tmp_ptr[7] - pi2_tmp_ptr[3] - (pi2_tmp_ptr[3] >> 1));
  567|       |
  568|  67.9k|        i_y4 = ((pi2_tmp_ptr[2] >> 1) - pi2_tmp_ptr[6]);
  569|       |
  570|  67.9k|        i_y5 =
  571|  67.9k|            ((WORD32) (-pi2_tmp_ptr[1]) + pi2_tmp_ptr[7] + pi2_tmp_ptr[5] + (pi2_tmp_ptr[5] >> 1));
  572|       |
  573|  67.9k|        i_y6 = (pi2_tmp_ptr[2] + (pi2_tmp_ptr[6] >> 1));
  574|       |
  575|  67.9k|        i_y7 = ((WORD32) pi2_tmp_ptr[3] + pi2_tmp_ptr[5] + pi2_tmp_ptr[1] + (pi2_tmp_ptr[1] >> 1));
  576|       |
  577|       |        /*------------------------------------------------------------------*/
  578|       |        /* z0 = y0 + y6                                                     */
  579|       |        /* z1 = y1 + (y7 >> 2)                                              */
  580|       |        /* z2 = y2 + y4                                                     */
  581|       |        /* z3 = y3 + (y5 >> 2)                                              */
  582|       |        /* z4 = y2 - y4                                                     */
  583|       |        /* z5 = (y3 >> 2) - y5                                              */
  584|       |        /* z6 = y0 - y6                                                     */
  585|       |        /* z7 = y7 - (y1 >> 2)                                              */
  586|       |        /*------------------------------------------------------------------*/
  587|  67.9k|        i_z0 = i_y0 + i_y6;
  588|  67.9k|        i_z1 = i_y1 + (i_y7 >> 2);
  589|  67.9k|        i_z2 = i_y2 + i_y4;
  590|  67.9k|        i_z3 = i_y3 + (i_y5 >> 2);
  591|  67.9k|        i_z4 = i_y2 - i_y4;
  592|  67.9k|        i_z5 = (i_y3 >> 2) - i_y5;
  593|  67.9k|        i_z6 = i_y0 - i_y6;
  594|  67.9k|        i_z7 = i_y7 - (i_y1 >> 2);
  595|       |
  596|       |        /*------------------------------------------------------------------*/
  597|       |        /* x0 = z0 + z7                                                     */
  598|       |        /* x1 = z2 + z5                                                     */
  599|       |        /* x2 = z4 + z3                                                     */
  600|       |        /* x3 = z6 + z1                                                     */
  601|       |        /* x4 = z6 - z1                                                     */
  602|       |        /* x5 = z4 - z3                                                     */
  603|       |        /* x6 = z2 - z5                                                     */
  604|       |        /* x7 = z0 - z7                                                     */
  605|       |        /*------------------------------------------------------------------*/
  606|  67.9k|        pi2_tmp_ptr[0] = i_z0 + i_z7;
  607|  67.9k|        pi2_tmp_ptr[1] = i_z2 + i_z5;
  608|  67.9k|        pi2_tmp_ptr[2] = i_z4 + i_z3;
  609|  67.9k|        pi2_tmp_ptr[3] = i_z6 + i_z1;
  610|  67.9k|        pi2_tmp_ptr[4] = i_z6 - i_z1;
  611|  67.9k|        pi2_tmp_ptr[5] = i_z4 - i_z3;
  612|  67.9k|        pi2_tmp_ptr[6] = i_z2 - i_z5;
  613|  67.9k|        pi2_tmp_ptr[7] = i_z0 - i_z7;
  614|       |
  615|       |        /* move to the next row */
  616|       |        // pi2_src_ptr += SUB_BLK_WIDTH_8x8;
  617|  67.9k|        pi2_tmp_ptr += SUB_BLK_WIDTH_8x8;
  ------------------
  |  |   53|  67.9k|#define SUB_BLK_WIDTH_8x8                   8
  ------------------
  618|  67.9k|    }
  619|       |    /*--------------------------------------------------------------------*/
  620|       |    /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6            */
  621|       |    /*                                                                    */
  622|       |    /* Add the prediction and store it back to reconstructed frame buffer */
  623|       |    /* [Prediction buffer itself in this case]                            */
  624|       |    /*--------------------------------------------------------------------*/
  625|       |
  626|  8.49k|    pi2_tmp_ptr = pi2_tmp;
  627|  76.4k|    for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
  ------------------
  |  |   53|  76.4k|#define SUB_BLK_WIDTH_8x8                   8
  ------------------
  |  Branch (627:16): [True: 67.9k, False: 8.49k]
  ------------------
  628|  67.9k|    {
  629|  67.9k|        pi2_out = pi2_out_ptr;
  630|       |        /*------------------------------------------------------------------*/
  631|       |        /* y0j = w0j + w4j                                                  */
  632|       |        /* y1j = -w3j + w5j -w7j -(w7j >> 1)                                */
  633|       |        /* y2j = w0j -w4j                                                   */
  634|       |        /* y3j = w1j + w7j -w3j -(w3j >> 1)                                 */
  635|       |        /* y4j = ( w2j >> 1 ) -w6j                                          */
  636|       |        /* y5j = -w1j + w7j + w5j + (w5j >> 1)                              */
  637|       |        /* y6j = w2j + ( w6j >> 1 )                                         */
  638|       |        /* y7j = w3j + w5j + w1j + (w1j >> 1)                               */
  639|       |        /*------------------------------------------------------------------*/
  640|  67.9k|        i_y0 = pi2_tmp_ptr[0] + pi2_tmp_ptr[32];
  641|       |
  642|  67.9k|        i_y1 = (WORD32) (-pi2_tmp_ptr[24]) + pi2_tmp_ptr[40] - pi2_tmp_ptr[56] -
  643|  67.9k|               (pi2_tmp_ptr[56] >> 1);
  644|       |
  645|  67.9k|        i_y2 = pi2_tmp_ptr[0] - pi2_tmp_ptr[32];
  646|       |
  647|  67.9k|        i_y3 = (WORD32) pi2_tmp_ptr[8] + pi2_tmp_ptr[56] - pi2_tmp_ptr[24] - (pi2_tmp_ptr[24] >> 1);
  648|       |
  649|  67.9k|        i_y4 = (pi2_tmp_ptr[16] >> 1) - pi2_tmp_ptr[48];
  650|       |
  651|  67.9k|        i_y5 =
  652|  67.9k|            (WORD32) (-pi2_tmp_ptr[8]) + pi2_tmp_ptr[56] + pi2_tmp_ptr[40] + (pi2_tmp_ptr[40] >> 1);
  653|       |
  654|  67.9k|        i_y6 = pi2_tmp_ptr[16] + (pi2_tmp_ptr[48] >> 1);
  655|       |
  656|  67.9k|        i_y7 = (WORD32) pi2_tmp_ptr[24] + pi2_tmp_ptr[40] + pi2_tmp_ptr[8] + (pi2_tmp_ptr[8] >> 1);
  657|       |
  658|       |        /*------------------------------------------------------------------*/
  659|       |        /* z0j = y0j + y6j                                                  */
  660|       |        /* z1j = y1j + (y7j >> 2)                                           */
  661|       |        /* z2j = y2j + y4j                                                  */
  662|       |        /* z3j = y3j + (y5j >> 2)                                           */
  663|       |        /* z4j = y2j -y4j                                                   */
  664|       |        /* z5j = (y3j >> 2) -y5j                                            */
  665|       |        /* z6j = y0j -y6j                                                   */
  666|       |        /* z7j = y7j -(y1j >> 2)                                            */
  667|       |        /*------------------------------------------------------------------*/
  668|  67.9k|        i_z0 = i_y0 + i_y6;
  669|  67.9k|        i_z1 = i_y1 + (i_y7 >> 2);
  670|  67.9k|        i_z2 = i_y2 + i_y4;
  671|  67.9k|        i_z3 = i_y3 + (i_y5 >> 2);
  672|  67.9k|        i_z4 = i_y2 - i_y4;
  673|  67.9k|        i_z5 = (i_y3 >> 2) - i_y5;
  674|  67.9k|        i_z6 = i_y0 - i_y6;
  675|  67.9k|        i_z7 = i_y7 - (i_y1 >> 2);
  676|       |
  677|       |        /*------------------------------------------------------------------*/
  678|       |        /* x0j = z0j + z7j                                                  */
  679|       |        /* x1j = z2j + z5j                                                  */
  680|       |        /* x2j = z4j + z3j                                                  */
  681|       |        /* x3j = z6j + z1j                                                  */
  682|       |        /* x4j = z6j -z1j                                                   */
  683|       |        /* x5j = z4j -z3j                                                   */
  684|       |        /* x6j = z2j -z5j                                                   */
  685|       |        /* x7j = z0j -z7j                                                   */
  686|       |        /*------------------------------------------------------------------*/
  687|  67.9k|        *pi2_out = CLIP_RSD((i_z0 + i_z7 + 32) >> 6);
  ------------------
  |  |  774|  67.9k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  67.9k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 5.00k, False: 62.9k]
  |  |  |  |  |  Branch (77:54): [True: 5.82k, False: 57.1k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  688|       |        /* Change uc_recBuffer to Point to next element in the same column*/
  689|  67.9k|        pi2_out += out_strd;
  690|       |
  691|  67.9k|        *pi2_out = CLIP_RSD((i_z2 + i_z5 + 32) >> 6);
  ------------------
  |  |  774|  67.9k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  67.9k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 4.66k, False: 63.3k]
  |  |  |  |  |  Branch (77:54): [True: 4.29k, False: 59.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  692|  67.9k|        pi2_out += out_strd;
  693|       |
  694|  67.9k|        *pi2_out = CLIP_RSD((i_z4 + i_z3 + 32) >> 6);
  ------------------
  |  |  774|  67.9k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  67.9k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 4.05k, False: 63.9k]
  |  |  |  |  |  Branch (77:54): [True: 4.53k, False: 59.4k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  695|  67.9k|        pi2_out += out_strd;
  696|       |
  697|  67.9k|        *pi2_out = CLIP_RSD((i_z6 + i_z1 + 32) >> 6);
  ------------------
  |  |  774|  67.9k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  67.9k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 4.05k, False: 63.9k]
  |  |  |  |  |  Branch (77:54): [True: 4.25k, False: 59.6k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  698|  67.9k|        pi2_out += out_strd;
  699|       |
  700|  67.9k|        *pi2_out = CLIP_RSD((i_z6 - i_z1 + 32) >> 6);
  ------------------
  |  |  774|  67.9k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  67.9k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 4.28k, False: 63.7k]
  |  |  |  |  |  Branch (77:54): [True: 4.32k, False: 59.3k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  701|  67.9k|        pi2_out += out_strd;
  702|       |
  703|  67.9k|        *pi2_out = CLIP_RSD((i_z4 - i_z3 + 32) >> 6);
  ------------------
  |  |  774|  67.9k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  67.9k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 4.03k, False: 63.9k]
  |  |  |  |  |  Branch (77:54): [True: 4.74k, False: 59.2k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  704|  67.9k|        pi2_out += out_strd;
  705|       |
  706|  67.9k|        *pi2_out = CLIP_RSD((i_z2 - i_z5 + 32) >> 6);
  ------------------
  |  |  774|  67.9k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  67.9k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 4.27k, False: 63.7k]
  |  |  |  |  |  Branch (77:54): [True: 4.76k, False: 58.9k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  707|  67.9k|        pi2_out += out_strd;
  708|       |
  709|  67.9k|        *pi2_out = CLIP_RSD((i_z0 - i_z7 + 32) >> 6);
  ------------------
  |  |  774|  67.9k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  67.9k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 5.09k, False: 62.8k]
  |  |  |  |  |  Branch (77:54): [True: 4.80k, False: 58.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  710|       |
  711|  67.9k|        pi2_tmp_ptr++;
  712|  67.9k|        pi2_out_ptr++;
  713|  67.9k|    }
  714|  8.49k|}

isvcd_iquant_itrans_residual_recon_4x4:
   86|  28.4k|{
   87|  28.4k|    WORD32 i4_nnz = 0;
   88|  28.4k|    WORD16 *pi2_src_ptr = pi2_src;
   89|  28.4k|    WORD16 *pi2_tmp_ptr = pi2_tmp;
   90|  28.4k|    UWORD8 *pu1_pred_ptr = pu1_pred;
   91|  28.4k|    WORD16 *pi2_rsd_ptr = pi2_rsd;
   92|  28.4k|    UWORD8 *pu1_out_ptr = pu1_out;
   93|  28.4k|    WORD16 x0, x1, x2, x3, i;
   94|  28.4k|    WORD32 q0, q1, q2, q3;
   95|  28.4k|    WORD16 i_macro;
   96|  28.4k|    WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
  ------------------
  |  Branch (96:23): [True: 17.7k, False: 10.6k]
  ------------------
   97|       |
   98|       |    /* inverse quant */
   99|       |    /*horizontal inverse transform */
  100|   142k|    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
  ------------------
  |  |   48|   142k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  |  Branch (100:16): [True: 113k, False: 28.4k]
  ------------------
  101|   113k|    {
  102|   113k|        q0 = pi2_src_ptr[0];
  103|   113k|        INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|   113k|                {\
  |  |  104|   113k|                    i4_value *= quant_scale;\
  |  |  105|   113k|                    i4_value *= weight_scale;\
  |  |  106|   113k|                    i4_value += rndfactor;\
  |  |  107|   113k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|   113k|                    i4_value >>= qbits;\
  |  |  109|   113k|                }
  ------------------
  104|   113k|        if(i == 0 && iq_start_idx == 1)
  ------------------
  |  Branch (104:12): [True: 28.4k, False: 85.3k]
  |  Branch (104:22): [True: 0, False: 28.4k]
  ------------------
  105|      0|            q0 = pi2_dc_ld_addr[0];  // Restoring dc value for intra case
  106|       |
  107|   113k|        q2 = pi2_src_ptr[2];
  108|   113k|        INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|   113k|                {\
  |  |  104|   113k|                    i4_value *= quant_scale;\
  |  |  105|   113k|                    i4_value *= weight_scale;\
  |  |  106|   113k|                    i4_value += rndfactor;\
  |  |  107|   113k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|   113k|                    i4_value >>= qbits;\
  |  |  109|   113k|                }
  ------------------
  109|       |
  110|   113k|        x0 = q0 + q2;
  111|   113k|        x1 = q0 - q2;
  112|       |
  113|   113k|        q1 = pi2_src_ptr[1];
  114|   113k|        INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|   113k|                {\
  |  |  104|   113k|                    i4_value *= quant_scale;\
  |  |  105|   113k|                    i4_value *= weight_scale;\
  |  |  106|   113k|                    i4_value += rndfactor;\
  |  |  107|   113k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|   113k|                    i4_value >>= qbits;\
  |  |  109|   113k|                }
  ------------------
  115|       |
  116|   113k|        q3 = pi2_src_ptr[3];
  117|   113k|        INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|   113k|                {\
  |  |  104|   113k|                    i4_value *= quant_scale;\
  |  |  105|   113k|                    i4_value *= weight_scale;\
  |  |  106|   113k|                    i4_value += rndfactor;\
  |  |  107|   113k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|   113k|                    i4_value >>= qbits;\
  |  |  109|   113k|                }
  ------------------
  118|       |
  119|   113k|        x2 = (q1 >> 1) - q3;
  120|   113k|        x3 = q1 + (q3 >> 1);
  121|       |
  122|   113k|        pi2_tmp_ptr[0] = x0 + x3;
  123|   113k|        pi2_tmp_ptr[1] = x1 + x2;
  124|   113k|        pi2_tmp_ptr[2] = x1 - x2;
  125|   113k|        pi2_tmp_ptr[3] = x0 - x3;
  126|       |
  127|   113k|        pi2_src_ptr += SUB_BLK_WIDTH_4x4;
  ------------------
  |  |   48|   113k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  128|   113k|        pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
  ------------------
  |  |   48|   113k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  129|   113k|        pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
  ------------------
  |  |   48|   113k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  130|   113k|        pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
  ------------------
  |  |   48|   113k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  131|   113k|    }
  132|       |
  133|       |    /* vertical inverse transform */
  134|  28.4k|    pi2_tmp_ptr = pi2_tmp;
  135|   142k|    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
  ------------------
  |  |   48|   142k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  |  Branch (135:16): [True: 113k, False: 28.4k]
  ------------------
  136|   113k|    {
  137|   113k|        pu1_pred_ptr = pu1_pred;
  138|   113k|        pi2_rsd_ptr = pi2_rsd;
  139|   113k|        pu1_out = pu1_out_ptr;
  140|       |
  141|   113k|        x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
  142|   113k|        x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
  143|   113k|        x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
  144|   113k|        x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
  145|       |
  146|       |        /* inverse prediction */
  147|   113k|        i_macro = x0 + x3;
  148|   113k|        i_macro = ((i_macro + 32) >> 6);
  149|   113k|        i_macro = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
  ------------------
  |  |  774|   113k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   113k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 849, False: 112k]
  |  |  |  |  |  Branch (77:54): [True: 667, False: 112k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  150|   113k|        i4_nnz |= !!i_macro;
  151|   113k|        i_macro += *pu1_pred_ptr;
  152|   113k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|   113k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   113k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 47.3k, False: 66.4k]
  |  |  |  |  |  Branch (77:54): [True: 1.90k, False: 64.5k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  153|   113k|        pu1_pred_ptr += pred_strd;
  154|   113k|        pi2_rsd_ptr += rsd_strd;
  155|   113k|        pu1_out += out_strd;
  156|       |
  157|   113k|        i_macro = x1 + x2;
  158|   113k|        i_macro = ((i_macro + 32) >> 6);
  159|   113k|        i_macro = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
  ------------------
  |  |  774|   113k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   113k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 469, False: 113k]
  |  |  |  |  |  Branch (77:54): [True: 716, False: 112k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  160|   113k|        i4_nnz |= !!i_macro;
  161|   113k|        i_macro += *pu1_pred_ptr;
  162|   113k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|   113k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   113k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 49.7k, False: 64.0k]
  |  |  |  |  |  Branch (77:54): [True: 1.83k, False: 62.2k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  163|   113k|        pu1_pred_ptr += pred_strd;
  164|   113k|        pi2_rsd_ptr += rsd_strd;
  165|   113k|        pu1_out += out_strd;
  166|       |
  167|   113k|        i_macro = x1 - x2;
  168|   113k|        i_macro = ((i_macro + 32) >> 6);
  169|   113k|        i_macro = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
  ------------------
  |  |  774|   113k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   113k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 502, False: 113k]
  |  |  |  |  |  Branch (77:54): [True: 689, False: 112k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  170|   113k|        i4_nnz |= !!i_macro;
  171|   113k|        i_macro += *pu1_pred_ptr;
  172|   113k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|   113k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   113k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 39.9k, False: 73.7k]
  |  |  |  |  |  Branch (77:54): [True: 2.42k, False: 71.3k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  173|   113k|        pu1_pred_ptr += pred_strd;
  174|   113k|        pi2_rsd_ptr += rsd_strd;
  175|   113k|        pu1_out += out_strd;
  176|       |
  177|   113k|        i_macro = x0 - x3;
  178|   113k|        i_macro = ((i_macro + 32) >> 6);
  179|   113k|        i4_nnz |= !!i_macro;
  180|   113k|        i_macro = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
  ------------------
  |  |  774|   113k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   113k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 542, False: 113k]
  |  |  |  |  |  Branch (77:54): [True: 722, False: 112k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  181|   113k|        i_macro += *pu1_pred_ptr;
  182|   113k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|   113k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   113k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 46.9k, False: 66.7k]
  |  |  |  |  |  Branch (77:54): [True: 2.57k, False: 64.2k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  183|       |
  184|   113k|        pi2_tmp_ptr++;
  185|   113k|        pu1_out_ptr++;
  186|   113k|        pi2_rsd++;
  187|   113k|        pu1_pred++;
  188|   113k|    }
  189|  28.4k|    return i4_nnz;
  190|  28.4k|}
isvcd_iquant_itrans_residual_recon_4x4_dc:
  221|  3.60k|{
  222|  3.60k|    WORD32 i4_nnz = 0;
  223|  3.60k|    UWORD8 *pu1_pred_ptr = pu1_pred;
  224|  3.60k|    WORD16 *pi2_rsd_ptr = pi2_rsd;
  225|  3.60k|    UWORD8 *pu1_out_ptr = pu1_out;
  226|  3.60k|    WORD32 q0;
  227|  3.60k|    WORD16 x, i_macro, i;
  228|  3.60k|    WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
  ------------------
  |  Branch (228:23): [True: 812, False: 2.79k]
  ------------------
  229|  3.60k|    UNUSED(pi2_tmp);
  ------------------
  |  |   45|  3.60k|#define UNUSED(x) ((void)(x))
  ------------------
  230|       |
  231|  3.60k|    if(iq_start_idx == 0)
  ------------------
  |  Branch (231:8): [True: 3.60k, False: 0]
  ------------------
  232|  3.60k|    {
  233|  3.60k|        q0 = pi2_src[0];
  234|  3.60k|        INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|  3.60k|                {\
  |  |  104|  3.60k|                    i4_value *= quant_scale;\
  |  |  105|  3.60k|                    i4_value *= weight_scale;\
  |  |  106|  3.60k|                    i4_value += rndfactor;\
  |  |  107|  3.60k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|  3.60k|                    i4_value >>= qbits;\
  |  |  109|  3.60k|                }
  ------------------
  235|  3.60k|    }
  236|      0|    else
  237|      0|    {
  238|      0|        q0 = pi2_dc_ld_addr[0];  // Restoring dc value for intra case3
  239|      0|    }
  240|  3.60k|    i_macro = ((q0 + 32) >> 6);
  241|  18.0k|    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
  ------------------
  |  |   48|  18.0k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  |  Branch (241:16): [True: 14.4k, False: 3.60k]
  ------------------
  242|  14.4k|    {
  243|  14.4k|        pu1_pred_ptr = pu1_pred;
  244|  14.4k|        pi2_rsd_ptr = pi2_rsd;
  245|  14.4k|        pu1_out = pu1_out_ptr;
  246|       |
  247|       |        /* inverse prediction */
  248|  14.4k|        x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
  ------------------
  |  |  774|  14.4k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  14.4k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 292, False: 14.1k]
  |  |  |  |  |  Branch (77:54): [True: 1.55k, False: 12.5k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  249|  14.4k|        i4_nnz |= !!x;
  250|  14.4k|        x += *pu1_pred_ptr;
  251|  14.4k|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|  14.4k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  14.4k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 5.44k, False: 8.96k]
  |  |  |  |  |  Branch (77:54): [True: 1.11k, False: 7.84k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  252|  14.4k|        pu1_pred_ptr += pred_strd;
  253|  14.4k|        pi2_rsd_ptr += rsd_strd;
  254|  14.4k|        pu1_out += out_strd;
  255|       |
  256|  14.4k|        x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
  ------------------
  |  |  774|  14.4k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  14.4k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 289, False: 14.1k]
  |  |  |  |  |  Branch (77:54): [True: 1.55k, False: 12.5k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  257|  14.4k|        i4_nnz |= !!x;
  258|  14.4k|        x += *pu1_pred_ptr;
  259|  14.4k|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|  14.4k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  14.4k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 5.45k, False: 8.95k]
  |  |  |  |  |  Branch (77:54): [True: 1.14k, False: 7.80k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  260|  14.4k|        pu1_pred_ptr += pred_strd;
  261|  14.4k|        pi2_rsd_ptr += rsd_strd;
  262|  14.4k|        pu1_out += out_strd;
  263|       |
  264|  14.4k|        x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
  ------------------
  |  |  774|  14.4k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  14.4k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 280, False: 14.1k]
  |  |  |  |  |  Branch (77:54): [True: 1.55k, False: 12.5k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  265|  14.4k|        i4_nnz |= !!x;
  266|  14.4k|        x += *pu1_pred_ptr;
  267|  14.4k|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|  14.4k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  14.4k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 5.38k, False: 9.02k]
  |  |  |  |  |  Branch (77:54): [True: 1.09k, False: 7.92k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  268|  14.4k|        pu1_pred_ptr += pred_strd;
  269|  14.4k|        pi2_rsd_ptr += rsd_strd;
  270|  14.4k|        pu1_out += out_strd;
  271|       |
  272|  14.4k|        x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
  ------------------
  |  |  774|  14.4k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  14.4k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 286, False: 14.1k]
  |  |  |  |  |  Branch (77:54): [True: 1.54k, False: 12.5k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  273|  14.4k|        i4_nnz |= !!x;
  274|  14.4k|        x += *pu1_pred_ptr;
  275|  14.4k|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|  14.4k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  14.4k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 5.25k, False: 9.15k]
  |  |  |  |  |  Branch (77:54): [True: 1.05k, False: 8.09k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  276|       |
  277|  14.4k|        pu1_out_ptr++;
  278|  14.4k|        pu1_pred++;
  279|  14.4k|        pi2_rsd++;
  280|  14.4k|    }
  281|  3.60k|    return i4_nnz;
  282|  3.60k|}
isvcd_iquant_itrans_residual_recon_chroma_4x4:
  311|  8.02k|{
  312|  8.02k|    WORD16 *pi2_src_ptr = pi2_src;
  313|  8.02k|    WORD16 *pi2_tmp_ptr = pi2_tmp;
  314|  8.02k|    UWORD8 *pu1_pred_ptr = pu1_pred;
  315|  8.02k|    WORD16 *pi2_rsd_ptr = pi2_rsd;
  316|  8.02k|    UWORD8 *pu1_out_ptr = pu1_out;
  317|  8.02k|    WORD16 x0, x1, x2, x3, i;
  318|  8.02k|    WORD32 q0, q1, q2, q3;
  319|  8.02k|    WORD16 i_macro;
  320|  8.02k|    WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
  ------------------
  |  Branch (320:23): [True: 2.01k, False: 6.00k]
  ------------------
  321|       |
  322|       |    /* inverse quant */
  323|       |    /*horizontal inverse transform */
  324|  40.1k|    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
  ------------------
  |  |   48|  40.1k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  |  Branch (324:16): [True: 32.0k, False: 8.02k]
  ------------------
  325|  32.0k|    {
  326|  32.0k|        if(i == 0)
  ------------------
  |  Branch (326:12): [True: 8.02k, False: 24.0k]
  ------------------
  327|  8.02k|        {
  328|  8.02k|            q0 = pi2_dc_src[0];
  329|  8.02k|        }
  330|  24.0k|        else
  331|  24.0k|        {
  332|  24.0k|            q0 = pi2_src_ptr[0];
  333|  24.0k|            INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|  24.0k|                {\
  |  |  104|  24.0k|                    i4_value *= quant_scale;\
  |  |  105|  24.0k|                    i4_value *= weight_scale;\
  |  |  106|  24.0k|                    i4_value += rndfactor;\
  |  |  107|  24.0k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|  24.0k|                    i4_value >>= qbits;\
  |  |  109|  24.0k|                }
  ------------------
  334|  24.0k|        }
  335|       |
  336|  32.0k|        q2 = pi2_src_ptr[2];
  337|  32.0k|        INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|  32.0k|                {\
  |  |  104|  32.0k|                    i4_value *= quant_scale;\
  |  |  105|  32.0k|                    i4_value *= weight_scale;\
  |  |  106|  32.0k|                    i4_value += rndfactor;\
  |  |  107|  32.0k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|  32.0k|                    i4_value >>= qbits;\
  |  |  109|  32.0k|                }
  ------------------
  338|       |
  339|  32.0k|        x0 = q0 + q2;
  340|  32.0k|        x1 = q0 - q2;
  341|       |
  342|  32.0k|        q1 = pi2_src_ptr[1];
  343|  32.0k|        INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|  32.0k|                {\
  |  |  104|  32.0k|                    i4_value *= quant_scale;\
  |  |  105|  32.0k|                    i4_value *= weight_scale;\
  |  |  106|  32.0k|                    i4_value += rndfactor;\
  |  |  107|  32.0k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|  32.0k|                    i4_value >>= qbits;\
  |  |  109|  32.0k|                }
  ------------------
  344|       |
  345|  32.0k|        q3 = pi2_src_ptr[3];
  346|  32.0k|        INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|  32.0k|                {\
  |  |  104|  32.0k|                    i4_value *= quant_scale;\
  |  |  105|  32.0k|                    i4_value *= weight_scale;\
  |  |  106|  32.0k|                    i4_value += rndfactor;\
  |  |  107|  32.0k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|  32.0k|                    i4_value >>= qbits;\
  |  |  109|  32.0k|                }
  ------------------
  347|       |
  348|  32.0k|        x2 = (q1 >> 1) - q3;
  349|  32.0k|        x3 = q1 + (q3 >> 1);
  350|       |
  351|  32.0k|        pi2_tmp_ptr[0] = x0 + x3;
  352|  32.0k|        pi2_tmp_ptr[1] = x1 + x2;
  353|  32.0k|        pi2_tmp_ptr[2] = x1 - x2;
  354|  32.0k|        pi2_tmp_ptr[3] = x0 - x3;
  355|       |
  356|  32.0k|        pi2_src_ptr += SUB_BLK_WIDTH_4x4;
  ------------------
  |  |   48|  32.0k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  357|  32.0k|        pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
  ------------------
  |  |   48|  32.0k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  358|  32.0k|        pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
  ------------------
  |  |   48|  32.0k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  359|  32.0k|        pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
  ------------------
  |  |   48|  32.0k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  360|  32.0k|    }
  361|       |
  362|       |    /* vertical inverse transform */
  363|  8.02k|    pi2_tmp_ptr = pi2_tmp;
  364|  40.1k|    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
  ------------------
  |  |   48|  40.1k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  |  Branch (364:16): [True: 32.0k, False: 8.02k]
  ------------------
  365|  32.0k|    {
  366|  32.0k|        pu1_pred_ptr = pu1_pred;
  367|  32.0k|        pi2_rsd_ptr = pi2_rsd;
  368|  32.0k|        pu1_out = pu1_out_ptr;
  369|       |
  370|  32.0k|        x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
  371|  32.0k|        x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
  372|  32.0k|        x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
  373|  32.0k|        x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
  374|       |
  375|       |        /* inverse prediction */
  376|  32.0k|        i_macro = x0 + x3;
  377|  32.0k|        i_macro = ((i_macro + 32) >> 6);
  378|  32.0k|        i_macro = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
  ------------------
  |  |  774|  32.0k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  32.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 1.53k, False: 30.5k]
  |  |  |  |  |  Branch (77:54): [True: 1.92k, False: 28.6k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  379|  32.0k|        i_macro += *pu1_pred_ptr;
  380|  32.0k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|  32.0k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  32.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 10.3k, False: 21.7k]
  |  |  |  |  |  Branch (77:54): [True: 2.44k, False: 19.2k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  381|  32.0k|        pu1_pred_ptr += pred_strd;
  382|  32.0k|        pi2_rsd_ptr += rsd_strd;
  383|  32.0k|        pu1_out += out_strd;
  384|       |
  385|  32.0k|        i_macro = x1 + x2;
  386|  32.0k|        i_macro = ((i_macro + 32) >> 6);
  387|  32.0k|        i_macro = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
  ------------------
  |  |  774|  32.0k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  32.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 1.93k, False: 30.1k]
  |  |  |  |  |  Branch (77:54): [True: 1.63k, False: 28.5k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  388|  32.0k|        i_macro += *pu1_pred_ptr;
  389|  32.0k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|  32.0k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  32.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 10.8k, False: 21.1k]
  |  |  |  |  |  Branch (77:54): [True: 2.22k, False: 18.9k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  390|  32.0k|        pu1_pred_ptr += pred_strd;
  391|  32.0k|        pi2_rsd_ptr += rsd_strd;
  392|  32.0k|        pu1_out += out_strd;
  393|       |
  394|  32.0k|        i_macro = x1 - x2;
  395|  32.0k|        i_macro = ((i_macro + 32) >> 6);
  396|  32.0k|        i_macro = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
  ------------------
  |  |  774|  32.0k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  32.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 1.52k, False: 30.5k]
  |  |  |  |  |  Branch (77:54): [True: 1.78k, False: 28.7k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  397|  32.0k|        i_macro += *pu1_pred_ptr;
  398|  32.0k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|  32.0k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  32.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 9.22k, False: 22.8k]
  |  |  |  |  |  Branch (77:54): [True: 2.99k, False: 19.8k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  399|  32.0k|        pu1_pred_ptr += pred_strd;
  400|  32.0k|        pi2_rsd_ptr += rsd_strd;
  401|  32.0k|        pu1_out += out_strd;
  402|       |
  403|  32.0k|        i_macro = x0 - x3;
  404|  32.0k|        i_macro = ((i_macro + 32) >> 6);
  405|  32.0k|        i_macro = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
  ------------------
  |  |  774|  32.0k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  32.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 1.53k, False: 30.5k]
  |  |  |  |  |  Branch (77:54): [True: 2.10k, False: 28.4k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  406|  32.0k|        i_macro += *pu1_pred_ptr;
  407|  32.0k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|  32.0k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  32.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 8.71k, False: 23.3k]
  |  |  |  |  |  Branch (77:54): [True: 3.34k, False: 20.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  408|       |
  409|  32.0k|        pi2_tmp_ptr++;
  410|  32.0k|        pu1_out_ptr += 2;  // Interleaved store for output
  411|  32.0k|        pu1_pred += 2;     // Interleaved load for pred buffer
  412|  32.0k|        pi2_rsd += 2;
  413|  32.0k|    }
  414|  8.02k|}
isvcd_iquant_itrans_residual_recon_chroma_4x4_dc:
  443|  25.9k|{
  444|  25.9k|    UWORD8 *pu1_pred_ptr = pu1_pred;
  445|  25.9k|    WORD16 *pi2_rsd_ptr = pi2_rsd;
  446|  25.9k|    UWORD8 *pu1_out_ptr = pu1_out;
  447|  25.9k|    WORD32 q0;
  448|  25.9k|    WORD16 x, i_macro, i;
  449|  25.9k|    UNUSED(pi2_src);
  ------------------
  |  |   45|  25.9k|#define UNUSED(x) ((void)(x))
  ------------------
  450|  25.9k|    UNUSED(pu2_iscal_mat);
  ------------------
  |  |   45|  25.9k|#define UNUSED(x) ((void)(x))
  ------------------
  451|  25.9k|    UNUSED(pu2_weigh_mat);
  ------------------
  |  |   45|  25.9k|#define UNUSED(x) ((void)(x))
  ------------------
  452|  25.9k|    UNUSED(u4_qp_div_6);
  ------------------
  |  |   45|  25.9k|#define UNUSED(x) ((void)(x))
  ------------------
  453|  25.9k|    UNUSED(pi2_tmp);
  ------------------
  |  |   45|  25.9k|#define UNUSED(x) ((void)(x))
  ------------------
  454|       |
  455|  25.9k|    q0 = pi2_dc_src[0];  // Restoring dc value for intra case3
  456|  25.9k|    i_macro = ((q0 + 32) >> 6);
  457|       |
  458|   129k|    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
  ------------------
  |  |   48|   129k|#define SUB_BLK_WIDTH_4x4                   4
  ------------------
  |  Branch (458:16): [True: 103k, False: 25.9k]
  ------------------
  459|   103k|    {
  460|   103k|        pu1_pred_ptr = pu1_pred;
  461|   103k|        pi2_rsd_ptr = pi2_rsd;
  462|   103k|        pu1_out = pu1_out_ptr;
  463|       |
  464|       |        /* inverse prediction */
  465|   103k|        x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
  ------------------
  |  |  774|   103k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   103k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 662, False: 103k]
  |  |  |  |  |  Branch (77:54): [True: 1.90k, False: 101k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  466|   103k|        x += *pu1_pred_ptr;
  467|   103k|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|   103k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   103k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 26.9k, False: 77.0k]
  |  |  |  |  |  Branch (77:54): [True: 3.59k, False: 73.4k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  468|   103k|        pu1_pred_ptr += pred_strd;
  469|   103k|        pi2_rsd_ptr += rsd_strd;
  470|   103k|        pu1_out += out_strd;
  471|       |
  472|   103k|        x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
  ------------------
  |  |  774|   103k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   103k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 651, False: 103k]
  |  |  |  |  |  Branch (77:54): [True: 1.90k, False: 101k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  473|   103k|        x += *pu1_pred_ptr;
  474|   103k|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|   103k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   103k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 27.1k, False: 76.8k]
  |  |  |  |  |  Branch (77:54): [True: 3.58k, False: 73.2k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  475|   103k|        pu1_pred_ptr += pred_strd;
  476|   103k|        pi2_rsd_ptr += rsd_strd;
  477|   103k|        pu1_out += out_strd;
  478|       |
  479|   103k|        x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
  ------------------
  |  |  774|   103k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   103k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 652, False: 103k]
  |  |  |  |  |  Branch (77:54): [True: 1.89k, False: 101k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  480|   103k|        x += *pu1_pred_ptr;
  481|   103k|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|   103k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   103k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 27.2k, False: 76.7k]
  |  |  |  |  |  Branch (77:54): [True: 3.59k, False: 73.1k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  482|   103k|        pu1_pred_ptr += pred_strd;
  483|   103k|        pi2_rsd_ptr += rsd_strd;
  484|   103k|        pu1_out += out_strd;
  485|       |
  486|   103k|        x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
  ------------------
  |  |  774|   103k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   103k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 651, False: 103k]
  |  |  |  |  |  Branch (77:54): [True: 1.89k, False: 101k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  487|   103k|        x += *pu1_pred_ptr;
  488|   103k|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|   103k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   103k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 27.0k, False: 76.9k]
  |  |  |  |  |  Branch (77:54): [True: 3.54k, False: 73.4k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  489|       |
  490|   103k|        pu1_out_ptr += 2;
  491|   103k|        pu1_pred += 2;
  492|   103k|        pi2_rsd += 2;
  493|   103k|    }
  494|  25.9k|}
isvcd_iquant_itrans_residual_recon_8x8:
  525|  10.5k|{
  526|  10.5k|    WORD32 i4_nnz = 0, i4_nnz_H = 0, i4_nnz_L = 0;
  527|  10.5k|    WORD32 i;
  528|  10.5k|    WORD16 *pi2_tmp_ptr = pi2_tmp;
  529|  10.5k|    UWORD8 *pu1_pred_ptr = pu1_pred;
  530|  10.5k|    WORD16 *pi2_rsd_ptr = pi2_rsd;
  531|  10.5k|    UWORD8 *pu1_out_ptr = pu1_out;
  532|  10.5k|    WORD16 i_z0, i_z1, i_z2, i_z3, i_z4, i_z5, i_z6, i_z7;
  533|  10.5k|    WORD16 i_y0, i_y1, i_y2, i_y3, i_y4, i_y5, i_y6, i_y7;
  534|  10.5k|    WORD16 i_macro;
  535|  10.5k|    WORD32 q;
  536|  10.5k|    WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
  ------------------
  |  Branch (536:23): [True: 9.45k, False: 1.04k]
  ------------------
  537|  10.5k|    UNUSED(iq_start_idx);
  ------------------
  |  |   45|  10.5k|#define UNUSED(x) ((void)(x))
  ------------------
  538|  10.5k|    UNUSED(pi2_dc_ld_addr);
  ------------------
  |  |   45|  10.5k|#define UNUSED(x) ((void)(x))
  ------------------
  539|       |    /*************************************************************/
  540|       |    /* De quantization of coefficients. Will be replaced by SIMD */
  541|       |    /* operations on platform. Note : DC coeff is not scaled     */
  542|       |    /*************************************************************/
  543|   682k|    for(i = 0; i < (SUB_BLK_WIDTH_8x8 * SUB_BLK_WIDTH_8x8); i++)
  ------------------
  |  |   53|   682k|#define SUB_BLK_WIDTH_8x8                   8
  ------------------
                  for(i = 0; i < (SUB_BLK_WIDTH_8x8 * SUB_BLK_WIDTH_8x8); i++)
  ------------------
  |  |   53|   682k|#define SUB_BLK_WIDTH_8x8                   8
  ------------------
  |  Branch (543:16): [True: 672k, False: 10.5k]
  ------------------
  544|   672k|    {
  545|   672k|        q = pi2_src[i];
  546|   672k|        INV_QUANT(q, pu2_iscale_mat[i], pu2_weigh_mat[i], qp_div, rnd_fact, 6);
  ------------------
  |  |  103|   672k|                {\
  |  |  104|   672k|                    i4_value *= quant_scale;\
  |  |  105|   672k|                    i4_value *= weight_scale;\
  |  |  106|   672k|                    i4_value += rndfactor;\
  |  |  107|   672k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|   672k|                    i4_value >>= qbits;\
  |  |  109|   672k|                }
  ------------------
  547|   672k|        pi2_tmp_ptr[i] = q;
  548|   672k|    }
  549|       |    /* Perform Inverse transform */
  550|       |    /*--------------------------------------------------------------------*/
  551|       |    /* IDCT [ Horizontal transformation ]                                 */
  552|       |    /*--------------------------------------------------------------------*/
  553|  94.5k|    for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
  ------------------
  |  |   53|  94.5k|#define SUB_BLK_WIDTH_8x8                   8
  ------------------
  |  Branch (553:16): [True: 84.0k, False: 10.5k]
  ------------------
  554|  84.0k|    {
  555|       |        /*------------------------------------------------------------------*/
  556|       |        /* y0 = w0 + w4                                                     */
  557|       |        /* y1 = -w3 + w5 - w7 - (w7 >> 1)                                   */
  558|       |        /* y2 = w0 - w4                                                     */
  559|       |        /* y3 = w1 + w7 - w3 - (w3 >> 1)                                    */
  560|       |        /* y4 = (w2 >> 1) - w6                                              */
  561|       |        /* y5 = -w1 + w7 + w5 + (w5 >> 1)                                   */
  562|       |        /* y6 = w2 + (w6 >> 1)                                              */
  563|       |        /* y7 = w3 + w5 + w1 + (w1 >> 1)                                    */
  564|       |        /*------------------------------------------------------------------*/
  565|  84.0k|        i_y0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[4]);
  566|       |
  567|  84.0k|        i_y1 =
  568|  84.0k|            ((WORD32) (-pi2_tmp_ptr[3]) + pi2_tmp_ptr[5] - pi2_tmp_ptr[7] - (pi2_tmp_ptr[7] >> 1));
  569|       |
  570|  84.0k|        i_y2 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[4]);
  571|       |
  572|  84.0k|        i_y3 = ((WORD32) pi2_tmp_ptr[1] + pi2_tmp_ptr[7] - pi2_tmp_ptr[3] - (pi2_tmp_ptr[3] >> 1));
  573|       |
  574|  84.0k|        i_y4 = ((pi2_tmp_ptr[2] >> 1) - pi2_tmp_ptr[6]);
  575|       |
  576|  84.0k|        i_y5 =
  577|  84.0k|            ((WORD32) (-pi2_tmp_ptr[1]) + pi2_tmp_ptr[7] + pi2_tmp_ptr[5] + (pi2_tmp_ptr[5] >> 1));
  578|       |
  579|  84.0k|        i_y6 = (pi2_tmp_ptr[2] + (pi2_tmp_ptr[6] >> 1));
  580|       |
  581|  84.0k|        i_y7 = ((WORD32) pi2_tmp_ptr[3] + pi2_tmp_ptr[5] + pi2_tmp_ptr[1] + (pi2_tmp_ptr[1] >> 1));
  582|       |
  583|       |        /*------------------------------------------------------------------*/
  584|       |        /* z0 = y0 + y6                                                     */
  585|       |        /* z1 = y1 + (y7 >> 2)                                              */
  586|       |        /* z2 = y2 + y4                                                     */
  587|       |        /* z3 = y3 + (y5 >> 2)                                              */
  588|       |        /* z4 = y2 - y4                                                     */
  589|       |        /* z5 = (y3 >> 2) - y5                                              */
  590|       |        /* z6 = y0 - y6                                                     */
  591|       |        /* z7 = y7 - (y1 >> 2)                                              */
  592|       |        /*------------------------------------------------------------------*/
  593|  84.0k|        i_z0 = i_y0 + i_y6;
  594|  84.0k|        i_z1 = i_y1 + (i_y7 >> 2);
  595|  84.0k|        i_z2 = i_y2 + i_y4;
  596|  84.0k|        i_z3 = i_y3 + (i_y5 >> 2);
  597|  84.0k|        i_z4 = i_y2 - i_y4;
  598|  84.0k|        i_z5 = (i_y3 >> 2) - i_y5;
  599|  84.0k|        i_z6 = i_y0 - i_y6;
  600|  84.0k|        i_z7 = i_y7 - (i_y1 >> 2);
  601|       |
  602|       |        /*------------------------------------------------------------------*/
  603|       |        /* x0 = z0 + z7                                                     */
  604|       |        /* x1 = z2 + z5                                                     */
  605|       |        /* x2 = z4 + z3                                                     */
  606|       |        /* x3 = z6 + z1                                                     */
  607|       |        /* x4 = z6 - z1                                                     */
  608|       |        /* x5 = z4 - z3                                                     */
  609|       |        /* x6 = z2 - z5                                                     */
  610|       |        /* x7 = z0 - z7                                                     */
  611|       |        /*------------------------------------------------------------------*/
  612|  84.0k|        pi2_tmp_ptr[0] = i_z0 + i_z7;
  613|  84.0k|        pi2_tmp_ptr[1] = i_z2 + i_z5;
  614|  84.0k|        pi2_tmp_ptr[2] = i_z4 + i_z3;
  615|  84.0k|        pi2_tmp_ptr[3] = i_z6 + i_z1;
  616|  84.0k|        pi2_tmp_ptr[4] = i_z6 - i_z1;
  617|  84.0k|        pi2_tmp_ptr[5] = i_z4 - i_z3;
  618|  84.0k|        pi2_tmp_ptr[6] = i_z2 - i_z5;
  619|  84.0k|        pi2_tmp_ptr[7] = i_z0 - i_z7;
  620|       |
  621|       |        /* move to the next row */
  622|  84.0k|        pi2_tmp_ptr += SUB_BLK_WIDTH_8x8;
  ------------------
  |  |   53|  84.0k|#define SUB_BLK_WIDTH_8x8                   8
  ------------------
  623|  84.0k|    }
  624|       |    /*--------------------------------------------------------------------*/
  625|       |    /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6            */
  626|       |    /*                                                                    */
  627|       |    /* Add the prediction and store it back to reconstructed frame buffer */
  628|       |    /* [Prediction buffer itself in this case]                            */
  629|       |    /*--------------------------------------------------------------------*/
  630|       |
  631|  10.5k|    pi2_tmp_ptr = pi2_tmp;
  632|  94.5k|    for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
  ------------------
  |  |   53|  94.5k|#define SUB_BLK_WIDTH_8x8                   8
  ------------------
  |  Branch (632:16): [True: 84.0k, False: 10.5k]
  ------------------
  633|  84.0k|    {
  634|  84.0k|        pu1_pred_ptr = pu1_pred;
  635|  84.0k|        pi2_rsd_ptr = pi2_rsd;
  636|  84.0k|        pu1_out = pu1_out_ptr;
  637|       |        /*------------------------------------------------------------------*/
  638|       |        /* y0j = w0j + w4j                                                  */
  639|       |        /* y1j = -w3j + w5j -w7j -(w7j >> 1)                                */
  640|       |        /* y2j = w0j -w4j                                                   */
  641|       |        /* y3j = w1j + w7j -w3j -(w3j >> 1)                                 */
  642|       |        /* y4j = ( w2j >> 1 ) -w6j                                          */
  643|       |        /* y5j = -w1j + w7j + w5j + (w5j >> 1)                              */
  644|       |        /* y6j = w2j + ( w6j >> 1 )                                         */
  645|       |        /* y7j = w3j + w5j + w1j + (w1j >> 1)                               */
  646|       |        /*------------------------------------------------------------------*/
  647|  84.0k|        i_y0 = pi2_tmp_ptr[0] + pi2_tmp_ptr[32];
  648|       |
  649|  84.0k|        i_y1 = (WORD32) (-pi2_tmp_ptr[24]) + pi2_tmp_ptr[40] - pi2_tmp_ptr[56] -
  650|  84.0k|               (pi2_tmp_ptr[56] >> 1);
  651|       |
  652|  84.0k|        i_y2 = pi2_tmp_ptr[0] - pi2_tmp_ptr[32];
  653|       |
  654|  84.0k|        i_y3 = (WORD32) pi2_tmp_ptr[8] + pi2_tmp_ptr[56] - pi2_tmp_ptr[24] - (pi2_tmp_ptr[24] >> 1);
  655|       |
  656|  84.0k|        i_y4 = (pi2_tmp_ptr[16] >> 1) - pi2_tmp_ptr[48];
  657|       |
  658|  84.0k|        i_y5 =
  659|  84.0k|            (WORD32) (-pi2_tmp_ptr[8]) + pi2_tmp_ptr[56] + pi2_tmp_ptr[40] + (pi2_tmp_ptr[40] >> 1);
  660|       |
  661|  84.0k|        i_y6 = pi2_tmp_ptr[16] + (pi2_tmp_ptr[48] >> 1);
  662|       |
  663|  84.0k|        i_y7 = (WORD32) pi2_tmp_ptr[24] + pi2_tmp_ptr[40] + pi2_tmp_ptr[8] + (pi2_tmp_ptr[8] >> 1);
  664|       |
  665|       |        /*------------------------------------------------------------------*/
  666|       |        /* z0j = y0j + y6j                                                  */
  667|       |        /* z1j = y1j + (y7j >> 2)                                           */
  668|       |        /* z2j = y2j + y4j                                                  */
  669|       |        /* z3j = y3j + (y5j >> 2)                                           */
  670|       |        /* z4j = y2j -y4j                                                   */
  671|       |        /* z5j = (y3j >> 2) -y5j                                            */
  672|       |        /* z6j = y0j -y6j                                                   */
  673|       |        /* z7j = y7j -(y1j >> 2)                                            */
  674|       |        /*------------------------------------------------------------------*/
  675|  84.0k|        i_z0 = i_y0 + i_y6;
  676|  84.0k|        i_z1 = i_y1 + (i_y7 >> 2);
  677|  84.0k|        i_z2 = i_y2 + i_y4;
  678|  84.0k|        i_z3 = i_y3 + (i_y5 >> 2);
  679|  84.0k|        i_z4 = i_y2 - i_y4;
  680|  84.0k|        i_z5 = (i_y3 >> 2) - i_y5;
  681|  84.0k|        i_z6 = i_y0 - i_y6;
  682|  84.0k|        i_z7 = i_y7 - (i_y1 >> 2);
  683|       |
  684|       |        /*------------------------------------------------------------------*/
  685|       |        /* x0j = z0j + z7j                                                  */
  686|       |        /* x1j = z2j + z5j                                                  */
  687|       |        /* x2j = z4j + z3j                                                  */
  688|       |        /* x3j = z6j + z1j                                                  */
  689|       |        /* x4j = z6j -z1j                                                   */
  690|       |        /* x5j = z4j -z3j                                                   */
  691|       |        /* x6j = z2j -z5j                                                   */
  692|       |        /* x7j = z0j -z7j                                                   */
  693|       |        /*------------------------------------------------------------------*/
  694|  84.0k|        i_macro = CLIP_RSD(((i_z0 + i_z7 + 32) >> 6) + (*pi2_rsd_ptr));
  ------------------
  |  |  774|  84.0k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  84.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 897, False: 83.1k]
  |  |  |  |  |  Branch (77:54): [True: 1.09k, False: 82.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  695|  84.0k|        i4_nnz_H |= !!i_macro;
  696|  84.0k|        i_macro += *pu1_pred_ptr;
  697|  84.0k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|  84.0k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  84.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 19.1k, False: 64.8k]
  |  |  |  |  |  Branch (77:54): [True: 2.21k, False: 62.6k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  698|       |        /* Change uc_recBuffer to Point to next element in the same column*/
  699|  84.0k|        pu1_pred_ptr += pred_strd;
  700|  84.0k|        pi2_rsd_ptr += rsd_strd;
  701|  84.0k|        pu1_out += out_strd;
  702|       |
  703|  84.0k|        i_macro = CLIP_RSD(((i_z2 + i_z5 + 32) >> 6) + (*pi2_rsd_ptr));
  ------------------
  |  |  774|  84.0k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  84.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 487, False: 83.5k]
  |  |  |  |  |  Branch (77:54): [True: 513, False: 83.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  704|  84.0k|        i4_nnz_H |= !!i_macro;
  705|  84.0k|        i_macro += *pu1_pred_ptr;
  706|  84.0k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|  84.0k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  84.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 21.7k, False: 62.2k]
  |  |  |  |  |  Branch (77:54): [True: 1.78k, False: 60.5k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  707|  84.0k|        pu1_pred_ptr += pred_strd;
  708|  84.0k|        pi2_rsd_ptr += rsd_strd;
  709|  84.0k|        pu1_out += out_strd;
  710|       |
  711|  84.0k|        i_macro = CLIP_RSD(((i_z4 + i_z3 + 32) >> 6) + (*pi2_rsd_ptr));
  ------------------
  |  |  774|  84.0k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  84.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 504, False: 83.5k]
  |  |  |  |  |  Branch (77:54): [True: 369, False: 83.1k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  712|  84.0k|        i4_nnz_H |= !!i_macro;
  713|  84.0k|        i_macro += *pu1_pred_ptr;
  714|  84.0k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|  84.0k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  84.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 22.9k, False: 61.0k]
  |  |  |  |  |  Branch (77:54): [True: 2.62k, False: 58.4k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  715|  84.0k|        pu1_pred_ptr += pred_strd;
  716|  84.0k|        pi2_rsd_ptr += rsd_strd;
  717|  84.0k|        pu1_out += out_strd;
  718|       |
  719|  84.0k|        i_macro = CLIP_RSD(((i_z6 + i_z1 + 32) >> 6) + (*pi2_rsd_ptr));
  ------------------
  |  |  774|  84.0k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  84.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 597, False: 83.4k]
  |  |  |  |  |  Branch (77:54): [True: 903, False: 82.5k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  720|  84.0k|        i4_nnz_H |= !!i_macro;
  721|  84.0k|        i_macro += *pu1_pred_ptr;
  722|  84.0k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|  84.0k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  84.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 27.4k, False: 56.6k]
  |  |  |  |  |  Branch (77:54): [True: 1.48k, False: 55.1k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  723|  84.0k|        pu1_pred_ptr += pred_strd;
  724|  84.0k|        pi2_rsd_ptr += rsd_strd;
  725|  84.0k|        pu1_out += out_strd;
  726|       |
  727|  84.0k|        i_macro = CLIP_RSD(((i_z6 - i_z1 + 32) >> 6) + (*pi2_rsd_ptr));
  ------------------
  |  |  774|  84.0k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  84.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 594, False: 83.4k]
  |  |  |  |  |  Branch (77:54): [True: 904, False: 82.5k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  728|  84.0k|        i4_nnz_L |= !!i_macro;
  729|  84.0k|        i_macro += *pu1_pred_ptr;
  730|  84.0k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|  84.0k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  84.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 28.1k, False: 55.8k]
  |  |  |  |  |  Branch (77:54): [True: 1.55k, False: 54.3k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  731|  84.0k|        pu1_pred_ptr += pred_strd;
  732|  84.0k|        pi2_rsd_ptr += rsd_strd;
  733|  84.0k|        pu1_out += out_strd;
  734|       |
  735|  84.0k|        i_macro = CLIP_RSD(((i_z4 - i_z3 + 32) >> 6) + (*pi2_rsd_ptr));
  ------------------
  |  |  774|  84.0k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  84.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 499, False: 83.5k]
  |  |  |  |  |  Branch (77:54): [True: 474, False: 83.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  736|  84.0k|        i4_nnz_L |= !!i_macro;
  737|  84.0k|        i_macro += *pu1_pred_ptr;
  738|  84.0k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|  84.0k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  84.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 22.3k, False: 61.6k]
  |  |  |  |  |  Branch (77:54): [True: 2.74k, False: 58.9k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  739|  84.0k|        pu1_pred_ptr += pred_strd;
  740|  84.0k|        pi2_rsd_ptr += rsd_strd;
  741|  84.0k|        pu1_out += out_strd;
  742|       |
  743|  84.0k|        i_macro = CLIP_RSD(((i_z2 - i_z5 + 32) >> 6) + (*pi2_rsd_ptr));
  ------------------
  |  |  774|  84.0k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  84.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 400, False: 83.6k]
  |  |  |  |  |  Branch (77:54): [True: 587, False: 83.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  744|  84.0k|        i4_nnz_L |= !!i_macro;
  745|  84.0k|        i_macro += *pu1_pred_ptr;
  746|  84.0k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|  84.0k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  84.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 20.9k, False: 63.0k]
  |  |  |  |  |  Branch (77:54): [True: 2.26k, False: 60.8k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  747|  84.0k|        pu1_pred_ptr += pred_strd;
  748|  84.0k|        pi2_rsd_ptr += rsd_strd;
  749|  84.0k|        pu1_out += out_strd;
  750|       |
  751|  84.0k|        i_macro = CLIP_RSD(((i_z0 - i_z7 + 32) >> 6) + (*pi2_rsd_ptr));
  ------------------
  |  |  774|  84.0k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  84.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 815, False: 83.2k]
  |  |  |  |  |  Branch (77:54): [True: 1.03k, False: 82.1k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  752|  84.0k|        i4_nnz_L |= !!i_macro;
  753|  84.0k|        i_macro += *pu1_pred_ptr;
  754|  84.0k|        *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|  84.0k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  84.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 19.1k, False: 64.8k]
  |  |  |  |  |  Branch (77:54): [True: 2.44k, False: 62.4k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  755|       |
  756|  84.0k|        pi2_tmp_ptr++;
  757|  84.0k|        pu1_out_ptr++;
  758|  84.0k|        pi2_rsd++;
  759|  84.0k|        pu1_pred++;
  760|  84.0k|        if(i == 3)
  ------------------
  |  Branch (760:12): [True: 10.5k, False: 73.5k]
  ------------------
  761|  10.5k|        {
  762|  10.5k|            i4_nnz = i4_nnz_H | (i4_nnz_L << 4);
  763|  10.5k|            i4_nnz_L = 0;
  764|  10.5k|            i4_nnz_H = 0;
  765|  10.5k|        }
  766|  84.0k|    }
  767|  10.5k|    i4_nnz |= (i4_nnz_H << 1) | (i4_nnz_L << 5);
  768|  10.5k|    return i4_nnz;
  769|  10.5k|}
isvcd_iquant_itrans_residual_recon_8x8_dc:
  800|  1.75k|{
  801|  1.75k|    WORD32 i4_nnz = 0, i4_nnz_H = 0, i4_nnz_L = 0;
  802|  1.75k|    UWORD8 *pu1_pred_ptr = pu1_pred;
  803|  1.75k|    WORD16 *pi2_rsd_ptr = pi2_rsd;
  804|  1.75k|    UWORD8 *pu1_out_ptr = pu1_out;
  805|  1.75k|    WORD16 x, i, i_macro;
  806|  1.75k|    WORD32 q;
  807|  1.75k|    WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
  ------------------
  |  Branch (807:23): [True: 1.08k, False: 670]
  ------------------
  808|  1.75k|    UNUSED(pi2_tmp);
  ------------------
  |  |   45|  1.75k|#define UNUSED(x) ((void)(x))
  ------------------
  809|  1.75k|    UNUSED(iq_start_idx);
  ------------------
  |  |   45|  1.75k|#define UNUSED(x) ((void)(x))
  ------------------
  810|  1.75k|    UNUSED(pi2_dc_ld_addr);
  ------------------
  |  |   45|  1.75k|#define UNUSED(x) ((void)(x))
  ------------------
  811|       |    /*************************************************************/
  812|       |    /* Dequantization of coefficients. Will be replaced by SIMD  */
  813|       |    /* operations on platform. Note : DC coeff is not scaled     */
  814|       |    /*************************************************************/
  815|  1.75k|    q = pi2_src[0];
  816|  1.75k|    INV_QUANT(q, pu2_iscale_mat[0], pu2_weigh_mat[0], qp_div, rnd_fact, 6);
  ------------------
  |  |  103|  1.75k|                {\
  |  |  104|  1.75k|                    i4_value *= quant_scale;\
  |  |  105|  1.75k|                    i4_value *= weight_scale;\
  |  |  106|  1.75k|                    i4_value += rndfactor;\
  |  |  107|  1.75k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|  1.75k|                    i4_value >>= qbits;\
  |  |  109|  1.75k|                }
  ------------------
  817|  1.75k|    i_macro = (q + 32) >> 6;
  818|       |    /* Perform Inverse transform */
  819|       |    /*--------------------------------------------------------------------*/
  820|       |    /* IDCT [ Horizontal transformation ]                                 */
  821|       |    /*--------------------------------------------------------------------*/
  822|       |    /*--------------------------------------------------------------------*/
  823|       |    /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6            */
  824|       |    /*                                                                    */
  825|       |    /* Add the prediction and store it back to reconstructed frame buffer */
  826|       |    /* [Prediction buffer itself in this case]                            */
  827|       |    /*--------------------------------------------------------------------*/
  828|  15.8k|    for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
  ------------------
  |  |   53|  15.8k|#define SUB_BLK_WIDTH_8x8                   8
  ------------------
  |  Branch (828:16): [True: 14.0k, False: 1.75k]
  ------------------
  829|  14.0k|    {
  830|  14.0k|        pu1_pred_ptr = pu1_pred;
  831|  14.0k|        pi2_rsd_ptr = pi2_rsd;
  832|  14.0k|        pu1_out = pu1_out_ptr;
  833|       |
  834|  14.0k|        x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
  ------------------
  |  |  774|  14.0k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  14.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 339, False: 13.7k]
  |  |  |  |  |  Branch (77:54): [True: 423, False: 13.3k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  835|  14.0k|        i4_nnz_H |= !!x;
  836|  14.0k|        x += *pu1_pred_ptr;
  837|  14.0k|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|  14.0k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  14.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 3.67k, False: 10.3k]
  |  |  |  |  |  Branch (77:54): [True: 727, False: 9.66k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  838|       |        /* Change uc_recBuffer to Point to next element in the same column*/
  839|  14.0k|        pu1_pred_ptr += pred_strd;
  840|  14.0k|        pi2_rsd_ptr += rsd_strd;
  841|  14.0k|        pu1_out += out_strd;
  842|       |
  843|  14.0k|        x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
  ------------------
  |  |  774|  14.0k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  14.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 323, False: 13.7k]
  |  |  |  |  |  Branch (77:54): [True: 403, False: 13.3k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  844|  14.0k|        i4_nnz_H |= !!x;
  845|  14.0k|        x += *pu1_pred_ptr;
  846|  14.0k|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|  14.0k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  14.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 3.64k, False: 10.4k]
  |  |  |  |  |  Branch (77:54): [True: 830, False: 9.59k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  847|  14.0k|        pu1_pred_ptr += pred_strd;
  848|  14.0k|        pi2_rsd_ptr += rsd_strd;
  849|  14.0k|        pu1_out += out_strd;
  850|       |
  851|  14.0k|        x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
  ------------------
  |  |  774|  14.0k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  14.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 327, False: 13.7k]
  |  |  |  |  |  Branch (77:54): [True: 423, False: 13.3k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  852|  14.0k|        i4_nnz_H |= !!x;
  853|  14.0k|        x += *pu1_pred_ptr;
  854|  14.0k|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|  14.0k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  14.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 3.63k, False: 10.4k]
  |  |  |  |  |  Branch (77:54): [True: 929, False: 9.50k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  855|  14.0k|        pu1_pred_ptr += pred_strd;
  856|  14.0k|        pi2_rsd_ptr += rsd_strd;
  857|  14.0k|        pu1_out += out_strd;
  858|       |
  859|  14.0k|        x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
  ------------------
  |  |  774|  14.0k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  14.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 326, False: 13.7k]
  |  |  |  |  |  Branch (77:54): [True: 456, False: 13.2k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  860|  14.0k|        i4_nnz_H |= !!x;
  861|  14.0k|        x += *pu1_pred_ptr;
  862|  14.0k|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|  14.0k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  14.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 3.65k, False: 10.4k]
  |  |  |  |  |  Branch (77:54): [True: 909, False: 9.49k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  863|  14.0k|        pu1_pred_ptr += pred_strd;
  864|  14.0k|        pi2_rsd_ptr += rsd_strd;
  865|  14.0k|        pu1_out += out_strd;
  866|       |
  867|  14.0k|        x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
  ------------------
  |  |  774|  14.0k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  14.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 334, False: 13.7k]
  |  |  |  |  |  Branch (77:54): [True: 416, False: 13.3k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  868|  14.0k|        i4_nnz_L |= !!x;
  869|  14.0k|        x += *pu1_pred_ptr;
  870|  14.0k|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|  14.0k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  14.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 3.66k, False: 10.4k]
  |  |  |  |  |  Branch (77:54): [True: 781, False: 9.62k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  871|  14.0k|        pu1_pred_ptr += pred_strd;
  872|  14.0k|        pi2_rsd_ptr += rsd_strd;
  873|  14.0k|        pu1_out += out_strd;
  874|       |
  875|  14.0k|        x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
  ------------------
  |  |  774|  14.0k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  14.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 357, False: 13.7k]
  |  |  |  |  |  Branch (77:54): [True: 427, False: 13.2k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  876|  14.0k|        i4_nnz_L |= !!x;
  877|  14.0k|        x += *pu1_pred_ptr;
  878|  14.0k|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|  14.0k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  14.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 3.77k, False: 10.2k]
  |  |  |  |  |  Branch (77:54): [True: 659, False: 9.62k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  879|  14.0k|        pu1_pred_ptr += pred_strd;
  880|  14.0k|        pi2_rsd_ptr += rsd_strd;
  881|  14.0k|        pu1_out += out_strd;
  882|       |
  883|  14.0k|        x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
  ------------------
  |  |  774|  14.0k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  14.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 346, False: 13.7k]
  |  |  |  |  |  Branch (77:54): [True: 455, False: 13.2k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  884|  14.0k|        i4_nnz_L |= !!x;
  885|  14.0k|        x += *pu1_pred_ptr;
  886|  14.0k|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|  14.0k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  14.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 3.75k, False: 10.3k]
  |  |  |  |  |  Branch (77:54): [True: 723, False: 9.58k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  887|  14.0k|        pu1_pred_ptr += pred_strd;
  888|  14.0k|        pi2_rsd_ptr += rsd_strd;
  889|  14.0k|        pu1_out += out_strd;
  890|       |
  891|  14.0k|        x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
  ------------------
  |  |  774|  14.0k|#define CLIP_RSD(x) CLIP3(RSD_MIN, RSD_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  14.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 334, False: 13.7k]
  |  |  |  |  |  Branch (77:54): [True: 381, False: 13.3k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  892|  14.0k|        i4_nnz_L |= !!x;
  893|  14.0k|        x += *pu1_pred_ptr;
  894|  14.0k|        *pu1_out = CLIP_U8(x);
  ------------------
  |  |   58|  14.0k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  14.0k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 3.67k, False: 10.3k]
  |  |  |  |  |  Branch (77:54): [True: 715, False: 9.67k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  895|       |
  896|  14.0k|        pu1_out_ptr++;
  897|  14.0k|        pu1_pred++;
  898|  14.0k|        pi2_rsd++;
  899|  14.0k|        if(i == 3)
  ------------------
  |  Branch (899:12): [True: 1.75k, False: 12.3k]
  ------------------
  900|  1.75k|        {
  901|  1.75k|            i4_nnz = i4_nnz_H | (i4_nnz_L << 4);
  902|  1.75k|            i4_nnz_L = 0;
  903|  1.75k|            i4_nnz_H = 0;
  904|  1.75k|        }
  905|  14.0k|    }
  906|  1.75k|    i4_nnz |= (i4_nnz_H << 1) | (i4_nnz_L << 5);
  907|  1.75k|    return i4_nnz;
  908|  1.75k|}

isvcd_get_mb_info_cabac_nonmbaff:
   91|  1.84M|{
   92|  1.84M|    WORD32 mb_x;
   93|  1.84M|    WORD32 mb_y;
   94|  1.84M|    UWORD32 u1_mb_ngbr_avail = 0;
   95|  1.84M|    UWORD32 u2_frm_width_in_mb = ps_dec->u2_frm_wd_in_mbs;
   96|  1.84M|    UWORD32 u1_top_mb = 1;
   97|  1.84M|    WORD32 i2_prev_slice_mbx = ps_dec->i2_prev_slice_mbx;
   98|  1.84M|    UWORD32 u2_top_right_mask = TOP_RIGHT_DEFAULT_AVAILABLE;
  ------------------
  |  |   60|  1.84M|#define TOP_RIGHT_DEFAULT_AVAILABLE            0x5750
  ------------------
   99|  1.84M|    UWORD32 u2_top_left_mask = TOP_LEFT_DEFAULT_AVAILABLE;
  ------------------
  |  |   64|  1.84M|#define TOP_LEFT_DEFAULT_AVAILABLE            0xEEE0
  ------------------
  100|  1.84M|    ctxt_inc_mb_info_t *const p_ctx_inc_mb_map = ps_dec->p_ctxt_inc_mb_map;
  101|       |
  102|       |    /*--------------------------------------------------------------------*/
  103|       |    /* Calculate values of mb_x and mb_y                                  */
  104|       |    /*--------------------------------------------------------------------*/
  105|  1.84M|    mb_x = (WORD16) ps_dec->u2_mbx;
  106|  1.84M|    mb_y = (WORD16) ps_dec->u2_mby;
  107|  1.84M|    ps_dec->u4_cur_mb_addr = u2_cur_mb_address;
  108|       |
  109|  1.84M|    mb_x++;
  110|  1.84M|    if((UWORD32) mb_x == u2_frm_width_in_mb)
  ------------------
  |  Branch (110:8): [True: 137k, False: 1.70M]
  ------------------
  111|   137k|    {
  112|   137k|        mb_x = 0;
  113|   137k|        mb_y++;
  114|   137k|        if(mb_y >= ps_dec->u2_frm_ht_in_mbs)
  ------------------
  |  Branch (114:12): [True: 0, False: 137k]
  ------------------
  115|      0|        {
  116|      0|            mb_y = ps_dec->u2_frm_ht_in_mbs - 1;
  117|      0|        }
  118|   137k|    }
  119|       |    /*********************************************************************/
  120|       |    /* Cabac Context Initialisations                                     */
  121|       |    /*********************************************************************/
  122|  1.84M|    ps_dec->ps_curr_ctxt_mb_info = p_ctx_inc_mb_map + mb_x;
  123|  1.84M|    ps_dec->p_left_ctxt_mb_info = p_ctx_inc_mb_map - 1;
  124|  1.84M|    ps_dec->p_top_ctxt_mb_info = p_ctx_inc_mb_map - 1;
  125|       |
  126|       |    /********************************************************************/
  127|       |    /* neighbour availablility                                          */
  128|       |    /********************************************************************/
  129|  1.84M|    if(mb_y > ps_dec->i2_prev_slice_mby)
  ------------------
  |  Branch (129:8): [True: 1.66M, False: 177k]
  ------------------
  130|  1.66M|    {
  131|       |        /* if not in the immemdiate row of prev slice end then top will be available */
  132|  1.66M|        if(mb_y > (ps_dec->i2_prev_slice_mby + 1)) i2_prev_slice_mbx = -1;
  ------------------
  |  Branch (132:12): [True: 1.50M, False: 163k]
  ------------------
  133|       |
  134|  1.66M|        if(mb_x > i2_prev_slice_mbx)
  ------------------
  |  Branch (134:12): [True: 1.65M, False: 9.57k]
  ------------------
  135|  1.65M|        {
  136|  1.65M|            u1_mb_ngbr_avail |= TOP_MB_AVAILABLE_MASK;
  ------------------
  |  |   55|  1.65M|#define TOP_MB_AVAILABLE_MASK       0x04
  ------------------
  137|  1.65M|            u2_top_right_mask |= TOP_RIGHT_TOP_AVAILABLE;
  ------------------
  |  |   62|  1.65M|#define TOP_RIGHT_TOP_AVAILABLE                0x0007
  ------------------
  138|  1.65M|            u2_top_left_mask |= TOP_LEFT_TOP_AVAILABLE;
  ------------------
  |  |   66|  1.65M|#define TOP_LEFT_TOP_AVAILABLE                0x000E
  ------------------
  139|  1.65M|            ps_dec->p_top_ctxt_mb_info = ps_dec->ps_curr_ctxt_mb_info;
  140|  1.65M|        }
  141|  1.66M|        if((mb_x > (i2_prev_slice_mbx - 1)) && ((UWORD32) mb_x != (u2_frm_width_in_mb - 1)))
  ------------------
  |  Branch (141:12): [True: 1.65M, False: 7.33k]
  |  Branch (141:48): [True: 1.53M, False: 129k]
  ------------------
  142|  1.53M|        {
  143|  1.53M|            u1_mb_ngbr_avail |= TOP_RIGHT_MB_AVAILABLE_MASK;
  ------------------
  |  |   56|  1.53M|#define TOP_RIGHT_MB_AVAILABLE_MASK 0x08
  ------------------
  144|  1.53M|            u2_top_right_mask |= TOP_RIGHT_TOPR_AVAILABLE;
  ------------------
  |  |   61|  1.53M|#define TOP_RIGHT_TOPR_AVAILABLE               0x0008
  ------------------
  145|  1.53M|        }
  146|       |
  147|  1.66M|        if(mb_x > (i2_prev_slice_mbx + 1))
  ------------------
  |  Branch (147:12): [True: 1.52M, False: 146k]
  ------------------
  148|  1.52M|        {
  149|  1.52M|            u1_mb_ngbr_avail |= TOP_LEFT_MB_AVAILABLE_MASK;
  ------------------
  |  |   54|  1.52M|#define TOP_LEFT_MB_AVAILABLE_MASK  0x02
  ------------------
  150|  1.52M|            u2_top_left_mask |= TOP_LEFT_TOPL_AVAILABLE;
  ------------------
  |  |   65|  1.52M|#define TOP_LEFT_TOPL_AVAILABLE               0x0001
  ------------------
  151|  1.52M|        }
  152|       |        /* Next row */
  153|  1.66M|        i2_prev_slice_mbx = -1;
  154|  1.66M|    }
  155|       |    /* Same row */
  156|  1.84M|    if(mb_x > (i2_prev_slice_mbx + 1))
  ------------------
  |  Branch (156:8): [True: 1.68M, False: 163k]
  ------------------
  157|  1.68M|    {
  158|  1.68M|        u1_mb_ngbr_avail |= LEFT_MB_AVAILABLE_MASK;
  ------------------
  |  |   53|  1.68M|#define LEFT_MB_AVAILABLE_MASK      0x01
  ------------------
  159|  1.68M|        u2_top_left_mask |= TOP_LEFT_LEFT_AVAILABLE;
  ------------------
  |  |   67|  1.68M|#define TOP_LEFT_LEFT_AVAILABLE               0x1110
  ------------------
  160|  1.68M|        ps_dec->p_left_ctxt_mb_info = ps_dec->ps_curr_ctxt_mb_info - 1;
  161|  1.68M|    }
  162|  1.84M|    {
  163|  1.84M|        mb_neigbour_params_t *ps_cur_mb_row = ps_dec->ps_cur_mb_row;
  164|  1.84M|        mb_neigbour_params_t *ps_top_mb_row = ps_dec->ps_top_mb_row;
  165|       |        /* copy the parameters of topleft Mb */
  166|  1.84M|        ps_cur_mb_info->u1_topleft_mbtype = ps_dec->u1_topleft_mbtype;
  167|       |        /* Neighbour pointer assignments*/
  168|  1.84M|        ps_cur_mb_info->ps_curmb = ps_cur_mb_row + mb_x;
  169|  1.84M|        ps_cur_mb_info->ps_left_mb = ps_cur_mb_row + mb_x - 1;
  170|  1.84M|        ps_cur_mb_info->ps_top_mb = ps_top_mb_row + mb_x;
  171|  1.84M|        ps_cur_mb_info->ps_top_right_mb = ps_top_mb_row + mb_x + 1;
  172|       |
  173|       |        /* Update the parameters of topleftmb*/
  174|  1.84M|        ps_dec->u1_topleft_mbtype = ps_cur_mb_info->ps_top_mb->u1_mb_type;
  175|  1.84M|    }
  176|       |
  177|  1.84M|    ps_dec->u2_mby = mb_y;
  178|  1.84M|    ps_dec->u2_mbx = mb_x;
  179|  1.84M|    ps_cur_mb_info->u2_mbx = mb_x;
  180|  1.84M|    ps_cur_mb_info->u2_mby = mb_y;
  181|  1.84M|    ps_cur_mb_info->u1_topmb = u1_top_mb;
  182|  1.84M|    ps_dec->i4_submb_ofst += SUB_BLK_SIZE;
  ------------------
  |  |  562|  1.84M|#define SUB_BLK_SIZE                  ((SUB_BLK_WIDTH) * (SUB_BLK_WIDTH))
  |  |  ------------------
  |  |  |  |  560|  1.84M|#define SUB_BLK_WIDTH                 4
  |  |  ------------------
  |  |               #define SUB_BLK_SIZE                  ((SUB_BLK_WIDTH) * (SUB_BLK_WIDTH))
  |  |  ------------------
  |  |  |  |  560|  1.84M|#define SUB_BLK_WIDTH                 4
  |  |  ------------------
  ------------------
  183|  1.84M|    ps_dec->u1_mb_ngbr_availablity = u1_mb_ngbr_avail;
  184|  1.84M|    ps_cur_mb_info->u1_mb_ngbr_availablity = u1_mb_ngbr_avail;
  185|  1.84M|    ps_cur_mb_info->ps_curmb->u1_mb_fld = ps_dec->u1_cur_mb_fld_dec_flag;
  186|  1.84M|    ps_cur_mb_info->u1_mb_field_decodingflag = ps_dec->u1_cur_mb_fld_dec_flag;
  187|  1.84M|    ps_cur_mb_info->u2_top_left_avail_mask = u2_top_left_mask;
  188|  1.84M|    ps_cur_mb_info->u2_top_right_avail_mask = u2_top_right_mask;
  189|       |
  190|       |    /*********************************************************************/
  191|       |    /*                  Assign the neigbours                             */
  192|       |    /*********************************************************************/
  193|  1.84M|    if(u4_mbskip)
  ------------------
  |  Branch (193:8): [True: 1.75M, False: 92.1k]
  ------------------
  194|  1.75M|    {
  195|  1.75M|        UWORD8 u1_a, u1_b;
  196|  1.75M|        UWORD32 u4_ctx_inc;
  197|       |
  198|  1.75M|        u1_a = (ps_dec->p_top_ctxt_mb_info->u1_mb_type != CAB_INFERRED)
  ------------------
  |  |   74|  1.75M|#define CAB_INFERRED 0xFF
  ------------------
  |  Branch (198:16): [True: 1.73M, False: 19.8k]
  ------------------
  199|  1.75M|                   ? (!!(ps_dec->p_top_ctxt_mb_info->u1_mb_type & CAB_SKIP_MASK))
  ------------------
  |  |  401|  1.73M|#define CAB_SKIP_MASK     0x10 /* 0001 0000 */
  ------------------
  200|  1.75M|                   : 0;
  201|  1.75M|        u1_b = (ps_dec->p_left_ctxt_mb_info->u1_mb_type != CAB_INFERRED)
  ------------------
  |  |   74|  1.75M|#define CAB_INFERRED 0xFF
  ------------------
  |  Branch (201:16): [True: 1.73M, False: 17.2k]
  ------------------
  202|  1.75M|                   ? (!!(ps_dec->p_left_ctxt_mb_info->u1_mb_type & CAB_SKIP_MASK))
  ------------------
  |  |  401|  1.73M|#define CAB_SKIP_MASK     0x10 /* 0001 0000 */
  ------------------
  203|  1.75M|                   : 0;
  204|  1.75M|        u4_ctx_inc = 2 - (u1_a + u1_b);
  205|       |
  206|  1.75M|        u4_mbskip = ih264d_decode_bin(u4_ctx_inc, ps_dec->p_mb_skip_flag_t, ps_dec->ps_bitstrm,
  207|  1.75M|                                      &ps_dec->s_cab_dec_env);
  208|       |
  209|  1.75M|        if(!u4_mbskip)
  ------------------
  |  Branch (209:12): [True: 283k, False: 1.46M]
  ------------------
  210|   283k|        {
  211|   283k|            if(!(u1_mb_ngbr_avail & LEFT_MB_AVAILABLE_MASK))
  ------------------
  |  |   53|   283k|#define LEFT_MB_AVAILABLE_MASK      0x01
  ------------------
  |  Branch (211:16): [True: 57.9k, False: 225k]
  ------------------
  212|  57.9k|            {
  213|  57.9k|                UWORD32 *pu4_buf;
  214|  57.9k|                UWORD8 *pu1_buf;
  215|       |
  216|  57.9k|                pu1_buf = ps_dec->pu1_left_nnz_y;
  217|  57.9k|                pu4_buf = (UWORD32 *) pu1_buf;
  218|  57.9k|                *pu4_buf = 0;
  219|  57.9k|                pu1_buf = ps_dec->pu1_left_nnz_uv;
  220|  57.9k|                pu4_buf = (UWORD32 *) pu1_buf;
  221|  57.9k|                *pu4_buf = 0;
  222|       |
  223|  57.9k|                *(ps_dec->pu1_left_yuv_dc_csbp) = 0;
  224|  57.9k|                MEMSET_16BYTES(&ps_dec->pu1_left_mv_ctxt_inc[0][0], 0);
  ------------------
  |  |  652|  57.9k|#define MEMSET_16BYTES(pu4_start,value)                         \
  |  |  653|  57.9k|{                                                               \
  |  |  654|  57.9k|    memset(pu4_start,value,16);                                 \
  |  |  655|  57.9k|}
  ------------------
  225|  57.9k|                *(UWORD32 *) ps_dec->pi1_left_ref_idx_ctxt_inc = 0;
  226|  57.9k|            }
  227|   283k|            if(!(u1_mb_ngbr_avail & TOP_MB_AVAILABLE_MASK))
  ------------------
  |  |   55|   283k|#define TOP_MB_AVAILABLE_MASK       0x04
  ------------------
  |  Branch (227:16): [True: 36.4k, False: 247k]
  ------------------
  228|  36.4k|            {
  229|  36.4k|                MEMSET_16BYTES(ps_dec->ps_curr_ctxt_mb_info->u1_mv, 0);
  ------------------
  |  |  652|  36.4k|#define MEMSET_16BYTES(pu4_start,value)                         \
  |  |  653|  36.4k|{                                                               \
  |  |  654|  36.4k|    memset(pu4_start,value,16);                                 \
  |  |  655|  36.4k|}
  ------------------
  230|  36.4k|                memset(ps_dec->ps_curr_ctxt_mb_info->i1_ref_idx, 0, 4);
  231|  36.4k|            }
  232|   283k|        }
  233|  1.75M|    }
  234|  1.84M|    return (u4_mbskip);
  235|  1.84M|}
isvcd_get_mb_info_cavlc_nonmbaff:
  271|  11.8M|{
  272|  11.8M|    WORD32 mb_x;
  273|  11.8M|    WORD32 mb_y;
  274|  11.8M|    UWORD8 u1_mb_ngbr_avail = 0;
  275|  11.8M|    UWORD16 u2_frm_width_in_mb = ps_dec->u2_frm_wd_in_mbs;
  276|  11.8M|    WORD16 i2_prev_slice_mbx = ps_dec->i2_prev_slice_mbx;
  277|  11.8M|    UWORD16 u2_top_right_mask = TOP_RIGHT_DEFAULT_AVAILABLE;
  ------------------
  |  |   60|  11.8M|#define TOP_RIGHT_DEFAULT_AVAILABLE            0x5750
  ------------------
  278|  11.8M|    UWORD16 u2_top_left_mask = TOP_LEFT_DEFAULT_AVAILABLE;
  ------------------
  |  |   64|  11.8M|#define TOP_LEFT_DEFAULT_AVAILABLE            0xEEE0
  ------------------
  279|  11.8M|    UNUSED(u4_mbskip_run);
  ------------------
  |  |   45|  11.8M|#define UNUSED(x) ((void)(x))
  ------------------
  280|       |    /*--------------------------------------------------------------------*/
  281|       |    /* Calculate values of mb_x and mb_y                                  */
  282|       |    /*--------------------------------------------------------------------*/
  283|  11.8M|    mb_x = (WORD16) ps_dec->u2_mbx;
  284|  11.8M|    mb_y = (WORD16) ps_dec->u2_mby;
  285|       |
  286|  11.8M|    ps_dec->u4_cur_mb_addr = u2_cur_mb_address;
  287|       |
  288|  11.8M|    mb_x++;
  289|       |
  290|  11.8M|    if(mb_x == u2_frm_width_in_mb)
  ------------------
  |  Branch (290:8): [True: 1.98M, False: 9.87M]
  ------------------
  291|  1.98M|    {
  292|  1.98M|        mb_x = 0;
  293|  1.98M|        mb_y++;
  294|  1.98M|        if(mb_y >= ps_dec->u2_frm_ht_in_mbs)
  ------------------
  |  Branch (294:12): [True: 0, False: 1.98M]
  ------------------
  295|      0|        {
  296|      0|            mb_y = ps_dec->u2_frm_ht_in_mbs - 1;
  297|      0|        }
  298|  1.98M|    }
  299|  11.8M|    if(mb_y > ps_dec->i2_prev_slice_mby)
  ------------------
  |  Branch (299:8): [True: 11.1M, False: 713k]
  ------------------
  300|  11.1M|    {
  301|       |        /* if not in the immemdiate row of prev slice end then top
  302|       |         will be available */
  303|  11.1M|        if(mb_y > (ps_dec->i2_prev_slice_mby + 1)) i2_prev_slice_mbx = -1;
  ------------------
  |  Branch (303:12): [True: 10.3M, False: 743k]
  ------------------
  304|       |
  305|  11.1M|        if(mb_x > i2_prev_slice_mbx)
  ------------------
  |  Branch (305:12): [True: 11.0M, False: 109k]
  ------------------
  306|  11.0M|        {
  307|  11.0M|            u1_mb_ngbr_avail |= TOP_MB_AVAILABLE_MASK;
  ------------------
  |  |   55|  11.0M|#define TOP_MB_AVAILABLE_MASK       0x04
  ------------------
  308|  11.0M|            u2_top_right_mask |= TOP_RIGHT_TOP_AVAILABLE;
  ------------------
  |  |   62|  11.0M|#define TOP_RIGHT_TOP_AVAILABLE                0x0007
  ------------------
  309|  11.0M|            u2_top_left_mask |= TOP_LEFT_TOP_AVAILABLE;
  ------------------
  |  |   66|  11.0M|#define TOP_LEFT_TOP_AVAILABLE                0x000E
  ------------------
  310|  11.0M|        }
  311|       |
  312|  11.1M|        if((mb_x > (i2_prev_slice_mbx - 1)) && (mb_x != (u2_frm_width_in_mb - 1)))
  ------------------
  |  Branch (312:12): [True: 11.0M, False: 68.4k]
  |  Branch (312:48): [True: 9.10M, False: 1.96M]
  ------------------
  313|  9.10M|        {
  314|  9.10M|            u1_mb_ngbr_avail |= TOP_RIGHT_MB_AVAILABLE_MASK;
  ------------------
  |  |   56|  9.10M|#define TOP_RIGHT_MB_AVAILABLE_MASK 0x08
  ------------------
  315|  9.10M|            u2_top_right_mask |= TOP_RIGHT_TOPR_AVAILABLE;
  ------------------
  |  |   61|  9.10M|#define TOP_RIGHT_TOPR_AVAILABLE               0x0008
  ------------------
  316|  9.10M|        }
  317|       |
  318|  11.1M|        if(mb_x > (i2_prev_slice_mbx + 1))
  ------------------
  |  Branch (318:12): [True: 9.06M, False: 2.08M]
  ------------------
  319|  9.06M|        {
  320|  9.06M|            u1_mb_ngbr_avail |= TOP_LEFT_MB_AVAILABLE_MASK;
  ------------------
  |  |   54|  9.06M|#define TOP_LEFT_MB_AVAILABLE_MASK  0x02
  ------------------
  321|  9.06M|            u2_top_left_mask |= TOP_LEFT_TOPL_AVAILABLE;
  ------------------
  |  |   65|  9.06M|#define TOP_LEFT_TOPL_AVAILABLE               0x0001
  ------------------
  322|  9.06M|        }
  323|       |
  324|       |        /* Next row  Left will be available*/
  325|  11.1M|        i2_prev_slice_mbx = -1;
  326|  11.1M|    }
  327|       |
  328|       |    /* Same row */
  329|  11.8M|    if(mb_x > (i2_prev_slice_mbx + 1))
  ------------------
  |  Branch (329:8): [True: 9.70M, False: 2.14M]
  ------------------
  330|  9.70M|    {
  331|  9.70M|        u1_mb_ngbr_avail |= LEFT_MB_AVAILABLE_MASK;
  ------------------
  |  |   53|  9.70M|#define LEFT_MB_AVAILABLE_MASK      0x01
  ------------------
  332|  9.70M|        u2_top_left_mask |= TOP_LEFT_LEFT_AVAILABLE;
  ------------------
  |  |   67|  9.70M|#define TOP_LEFT_LEFT_AVAILABLE               0x1110
  ------------------
  333|  9.70M|    }
  334|       |
  335|  11.8M|    {
  336|  11.8M|        mb_neigbour_params_t *ps_cur_mb_row = ps_dec->ps_cur_mb_row;
  337|  11.8M|        mb_neigbour_params_t *ps_top_mb_row = ps_dec->ps_top_mb_row;
  338|       |
  339|       |        /* copy the parameters of topleft Mb */
  340|  11.8M|        ps_cur_mb_info->u1_topleft_mbtype = ps_dec->u1_topleft_mbtype;
  341|       |        /* Neighbour pointer assignments*/
  342|  11.8M|        ps_cur_mb_info->ps_curmb = ps_cur_mb_row + mb_x;
  343|  11.8M|        ps_cur_mb_info->ps_left_mb = ps_cur_mb_row + mb_x - 1;
  344|  11.8M|        ps_cur_mb_info->ps_top_mb = ps_top_mb_row + mb_x;
  345|  11.8M|        ps_cur_mb_info->ps_top_right_mb = ps_top_mb_row + mb_x + 1;
  346|       |
  347|       |        /* Update the parameters of topleftmb*/
  348|  11.8M|        ps_dec->u1_topleft_mbtype = ps_cur_mb_info->ps_top_mb->u1_mb_type;
  349|  11.8M|    }
  350|       |
  351|  11.8M|    ps_dec->u2_mby = mb_y;
  352|  11.8M|    ps_dec->u2_mbx = mb_x;
  353|  11.8M|    ps_cur_mb_info->u2_mbx = mb_x;
  354|  11.8M|    ps_cur_mb_info->u2_mby = mb_y;
  355|  11.8M|    ps_cur_mb_info->u1_topmb = 1;
  356|  11.8M|    ps_dec->i4_submb_ofst += SUB_BLK_SIZE;
  ------------------
  |  |  562|  11.8M|#define SUB_BLK_SIZE                  ((SUB_BLK_WIDTH) * (SUB_BLK_WIDTH))
  |  |  ------------------
  |  |  |  |  560|  11.8M|#define SUB_BLK_WIDTH                 4
  |  |  ------------------
  |  |               #define SUB_BLK_SIZE                  ((SUB_BLK_WIDTH) * (SUB_BLK_WIDTH))
  |  |  ------------------
  |  |  |  |  560|  11.8M|#define SUB_BLK_WIDTH                 4
  |  |  ------------------
  ------------------
  357|  11.8M|    ps_dec->u1_mb_ngbr_availablity = u1_mb_ngbr_avail;
  358|  11.8M|    ps_cur_mb_info->u1_mb_ngbr_availablity = u1_mb_ngbr_avail;
  359|  11.8M|    ps_cur_mb_info->ps_curmb->u1_mb_fld = ps_dec->u1_cur_mb_fld_dec_flag;
  360|  11.8M|    ps_cur_mb_info->u1_mb_field_decodingflag = ps_dec->u1_cur_mb_fld_dec_flag;
  361|  11.8M|    ps_cur_mb_info->u2_top_left_avail_mask = u2_top_left_mask;
  362|  11.8M|    ps_cur_mb_info->u2_top_right_avail_mask = u2_top_right_mask;
  363|  11.8M|    return (OK);
  ------------------
  |  |  114|  11.8M|#define OK        0
  ------------------
  364|  11.8M|}

isvcd_ref_lyr_part_idc:
  783|   130k|{
  784|       |    /*! Flow of the module is as follows                                   */
  785|       |    /*! 1. runs loops over the 16 4x4 blocks and gets teh reference layer
  786|       |    patition information by projecting the 1,1 locations of
  787|       |    each block                                                      */
  788|       |    /*! 2. if the projected partition is in INTRA MB then its stores -1
  789|       |    to the partition idc array                                      */
  790|       |    /*! 3. if projected partition is in INTER MB then it packs and stores
  791|       |    the offsets form the starting pointer in the part_idc array     */
  792|       |    /*! 4. IN non dyaydic cases. the part idc having -1 are replaced by
  793|       |    neighbours if the current MB projected is not INTRA             */
  794|       |    /*! 5. the -1 values are replaced first on a 4x4 inside an 8x8 basis   */
  795|       |    /*! 6. in second iteration -1 are replaced at an 8x8 basis             */
  796|       |    /*! 7. stores the intra MB status in the location given                */
  797|       |
  798|   130k|    mode_motion_ctxt_t *ps_ctxt;
  799|   130k|    mode_motion_lyr_ctxt *ps_lyr_mem;
  800|   130k|    inter_lyr_mb_prms_t *ps_inter_lyr_mb_prms;
  801|   130k|    WORD32 i4_blk_y, i4_blk_x;
  802|   130k|    WORD32 i4_mb_x, i4_mb_y;
  803|   130k|    WORD32 i4_intra_mb_flag;
  804|   130k|    WORD32 i4_inter_lyr_mb_prms_stride;
  805|   130k|    dec_mb_info_t *ps_mb_params;
  806|       |
  807|   130k|    ps_mb_params = (dec_mb_info_t *) pv_mb_params;
  808|   130k|    ps_ctxt = (mode_motion_ctxt_t *) pv_comp_mode_mv_ctxt;
  809|       |
  810|       |    /* get the current layer ctxt */
  811|   130k|    ps_lyr_mem = &ps_ctxt->as_res_lyr_mem[ps_ctxt->i4_res_id];
  812|       |
  813|       |    /* ref layer mb mode */
  814|   130k|    ps_inter_lyr_mb_prms = (inter_lyr_mb_prms_t *) ps_lyr_mem->s_ref_mb_mode.pv_buffer;
  815|   130k|    i4_inter_lyr_mb_prms_stride = ps_lyr_mem->s_ref_mb_mode.i4_num_element_stride;
  816|       |
  817|       |    /* derive the MB_X and MB_Y for the current MB */
  818|   130k|    i4_mb_x = ps_mb_params->u2_mbx;
  819|   130k|    i4_mb_y = ps_mb_params->u2_mby;
  820|       |
  821|       |    /* set the intra MB flag to default TRUE */
  822|   130k|    i4_intra_mb_flag = SVCD_TRUE;
  ------------------
  |  |   46|   130k|#define SVCD_TRUE 1
  ------------------
  823|       |
  824|       |    /*-----------------------------------------------------------------------*/
  825|       |    /* derive the reference layer part idc for all 16 partitions             */
  826|       |    /*-----------------------------------------------------------------------*/
  827|   653k|    for(i4_blk_y = 0; i4_blk_y < NUM_SUB_MB_PARTS; i4_blk_y++)
  ------------------
  |  |   60|   653k|#define NUM_SUB_MB_PARTS 4
  ------------------
  |  Branch (827:23): [True: 522k, False: 130k]
  ------------------
  828|   522k|    {
  829|  2.61M|        for(i4_blk_x = 0; i4_blk_x < NUM_SUB_MB_PARTS; i4_blk_x++)
  ------------------
  |  |   60|  2.61M|#define NUM_SUB_MB_PARTS 4
  ------------------
  |  Branch (829:27): [True: 2.09M, False: 522k]
  ------------------
  830|  2.09M|        {
  831|  2.09M|            WORD32 i4_curr_x, i4_curr_y;
  832|  2.09M|            WORD32 i4_ref_x, i4_ref_y;
  833|  2.09M|            WORD32 i4_ref_mb_x, i4_ref_mb_y;
  834|  2.09M|            WORD8 i1_ref_mb_mode;
  835|  2.09M|            inter_lyr_mb_prms_t *ps_inter_lyr_mb_prms_temp;
  836|       |
  837|  2.09M|            i4_curr_x = (i4_mb_x << 4) + (i4_blk_x << 2) + 1;
  838|  2.09M|            i4_curr_y = (i4_mb_y << 4) + (i4_blk_y << 2) + 1;
  839|       |
  840|       |            /* get the colocated position in the refernce layer */
  841|  2.09M|            i4_ref_x = ps_lyr_mem->pi2_ref_loc_x[i4_curr_x];
  842|  2.09M|            i4_ref_y = ps_lyr_mem->pi2_ref_loc_y[i4_curr_y];
  843|       |
  844|  2.09M|            i4_ref_x = CLIP3(0, ((ps_lyr_mem->i4_ref_width) - 1), i4_ref_x);
  ------------------
  |  |   77|  2.09M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 0, False: 2.09M]
  |  |  |  Branch (77:54): [True: 0, False: 2.09M]
  |  |  ------------------
  ------------------
  845|       |
  846|  2.09M|            i4_ref_y = CLIP3(0, ((ps_lyr_mem->i4_ref_height) - 1), i4_ref_y);
  ------------------
  |  |   77|  2.09M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 0, False: 2.09M]
  |  |  |  Branch (77:54): [True: 0, False: 2.09M]
  |  |  ------------------
  ------------------
  847|       |
  848|       |            /* get the reference mb x and y */
  849|  2.09M|            i4_ref_mb_x = (i4_ref_x >> 4);
  850|  2.09M|            i4_ref_mb_y = (i4_ref_y >> 4);
  851|       |
  852|       |            /* get the appropriate mb params in reference layer */
  853|  2.09M|            ps_inter_lyr_mb_prms_temp = ps_inter_lyr_mb_prms + i4_ref_mb_x;
  854|  2.09M|            ps_inter_lyr_mb_prms_temp += i4_ref_mb_y * i4_inter_lyr_mb_prms_stride;
  855|       |
  856|  2.09M|            i1_ref_mb_mode = ps_inter_lyr_mb_prms_temp->i1_mb_mode;
  857|       |
  858|       |            /* check if the MB mode of the refernce MB is Intra*/
  859|  2.09M|            if(i1_ref_mb_mode > SVC_INTER_MB)
  ------------------
  |  |  114|  2.09M|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
  |  Branch (859:16): [True: 265k, False: 1.82M]
  ------------------
  860|   265k|            {
  861|       |                /* store the -1 value */
  862|   265k|                ai4_ref_part_idc[i4_blk_y][i4_blk_x] = -1;
  863|   265k|            }
  864|  1.82M|            else
  865|  1.82M|            {
  866|       |                /* pack and store the reference x and y */
  867|  1.82M|                ai4_ref_part_idc[i4_blk_y][i4_blk_x] = (i4_ref_y << 16) + i4_ref_x;
  868|  1.82M|                i4_intra_mb_flag = SVCD_FALSE;
  ------------------
  |  |   45|  1.82M|#define SVCD_FALSE 0
  ------------------
  869|  1.82M|            }
  870|       |
  871|  2.09M|        } /* end of block x loop */
  872|       |
  873|   522k|    }     /* end of block y loop */
  874|       |
  875|       |    /*************************************************************************/
  876|       |    /* if the restricted spatial resolution change flag is 0                 */
  877|       |    /* modify the part_idc for all the partitions                            */
  878|       |    /*************************************************************************/
  879|   130k|    if(SVCD_FALSE == (ps_lyr_mem->ps_curr_lyr_res_prms->u1_rstrct_res_change_flag) &&
  ------------------
  |  |   45|   130k|#define SVCD_FALSE 0
  ------------------
  |  Branch (879:8): [True: 130k, False: 0]
  ------------------
  880|   130k|       (SVCD_FALSE == i4_intra_mb_flag))
  ------------------
  |  |   45|   130k|#define SVCD_FALSE 0
  ------------------
  |  Branch (880:8): [True: 121k, False: 9.11k]
  ------------------
  881|   121k|    {
  882|       |        /* replace values of "-1" on a 4x4 block basis */
  883|   121k|        WORD32 i4_xp, i4_yp;
  884|   121k|        WORD32 i4_indx_x, i4_indx_y;
  885|   121k|        WORD32 ai4_flag_8x8[2][2] = {SVCD_FALSE};
  ------------------
  |  |   45|   121k|#define SVCD_FALSE 0
  ------------------
  886|       |
  887|       |        /* loop over (4) 8x8 partitions */
  888|   364k|        for(i4_yp = 0; i4_yp < 2; i4_yp++)
  ------------------
  |  Branch (888:24): [True: 243k, False: 121k]
  ------------------
  889|   243k|        {
  890|   729k|            for(i4_xp = 0; i4_xp < 2; i4_xp++)
  ------------------
  |  Branch (890:28): [True: 486k, False: 243k]
  ------------------
  891|   486k|            {
  892|   486k|                WORD32 i4_xs, i4_ys;
  893|   486k|                WORD32 ai4_flag_4x4[2][2] = {SVCD_FALSE};
  ------------------
  |  |   45|   486k|#define SVCD_FALSE 0
  ------------------
  894|       |
  895|       |                /* loop over (4) 4x4 partitions */
  896|  1.45M|                for(i4_ys = 0; i4_ys < 2; i4_ys++)
  ------------------
  |  Branch (896:32): [True: 972k, False: 486k]
  ------------------
  897|   972k|                {
  898|  2.91M|                    for(i4_xs = 0; i4_xs < 2; i4_xs++)
  ------------------
  |  Branch (898:36): [True: 1.94M, False: 972k]
  ------------------
  899|  1.94M|                    {
  900|       |                        /* index to the exact 4x4 block */
  901|  1.94M|                        i4_indx_y = (i4_yp << 1) + i4_ys;
  902|  1.94M|                        i4_indx_x = (i4_xp << 1) + i4_xs;
  903|       |
  904|       |                        /* check if the current part idc is -1*/
  905|  1.94M|                        if(ai4_ref_part_idc[i4_indx_y][i4_indx_x] == -1)
  ------------------
  |  Branch (905:28): [True: 119k, False: 1.82M]
  ------------------
  906|   119k|                        {
  907|   119k|                            WORD32 i4_temp_x = 1 - i4_xs;
  908|   119k|                            WORD32 i4_temp_y = 1 - i4_ys;
  909|   119k|                            WORD32 i4_temp_part_y = (i4_yp << 1) + i4_temp_y;
  910|       |
  911|   119k|                            WORD32 i4_temp_part_x = (i4_xp << 1) + i4_temp_x;
  912|       |
  913|   119k|                            ai4_flag_4x4[i4_ys][i4_xs] = SVCD_TRUE;
  ------------------
  |  |   46|   119k|#define SVCD_TRUE 1
  ------------------
  914|       |
  915|       |                            /* replace with appropriate values */
  916|   119k|                            if((SVCD_FALSE == ai4_flag_4x4[i4_ys][i4_temp_x]) &&
  ------------------
  |  |   45|   119k|#define SVCD_FALSE 0
  ------------------
  |  Branch (916:32): [True: 59.7k, False: 59.7k]
  ------------------
  917|  59.7k|                               (ai4_ref_part_idc[i4_indx_y][i4_temp_part_x] != -1))
  ------------------
  |  Branch (917:32): [True: 0, False: 59.7k]
  ------------------
  918|      0|                            {
  919|      0|                                ai4_ref_part_idc[i4_indx_y][i4_indx_x] =
  920|      0|                                    ai4_ref_part_idc[i4_indx_y][i4_temp_part_x];
  921|      0|                            }
  922|   119k|                            else if((SVCD_FALSE == ai4_flag_4x4[i4_temp_y][i4_xs]) &&
  ------------------
  |  |   45|   119k|#define SVCD_FALSE 0
  ------------------
  |  Branch (922:37): [True: 59.7k, False: 59.7k]
  ------------------
  923|  59.7k|                                    (ai4_ref_part_idc[i4_temp_part_y][i4_indx_x] != -1))
  ------------------
  |  Branch (923:37): [True: 0, False: 59.7k]
  ------------------
  924|      0|                            {
  925|      0|                                ai4_ref_part_idc[i4_indx_y][i4_indx_x] =
  926|      0|                                    ai4_ref_part_idc[i4_temp_part_y][i4_indx_x];
  927|      0|                            }
  928|   119k|                            else if((SVCD_FALSE == ai4_flag_4x4[i4_temp_y][i4_temp_x]) &&
  ------------------
  |  |   45|   119k|#define SVCD_FALSE 0
  ------------------
  |  Branch (928:37): [True: 59.7k, False: 59.7k]
  ------------------
  929|  59.7k|                                    (ai4_ref_part_idc[i4_temp_part_y][i4_temp_part_x] != -1))
  ------------------
  |  Branch (929:37): [True: 0, False: 59.7k]
  ------------------
  930|      0|                            {
  931|      0|                                ai4_ref_part_idc[i4_indx_y][i4_indx_x] =
  932|      0|                                    ai4_ref_part_idc[i4_temp_part_y][i4_temp_part_x];
  933|      0|                            }
  934|   119k|                        } /* end of part idc equal to -1 check */
  935|       |
  936|  1.94M|                    }     /* end of sub partition xs loop */
  937|       |
  938|   972k|                }         /* end of sub partition ys loop */
  939|       |
  940|   486k|            }             /* end of partition xp loop */
  941|       |
  942|   243k|        }                 /* end of partition yp loop */
  943|       |
  944|       |        /* replace values of "-1" on an 8x8 block basis */
  945|       |
  946|       |        /* loop over (4) 8x8 partitions */
  947|   364k|        for(i4_yp = 0; i4_yp < 2; i4_yp++)
  ------------------
  |  Branch (947:24): [True: 243k, False: 121k]
  ------------------
  948|   243k|        {
  949|   729k|            for(i4_xp = 0; i4_xp < 2; i4_xp++)
  ------------------
  |  Branch (949:28): [True: 486k, False: 243k]
  ------------------
  950|   486k|            {
  951|   486k|                WORD32 i4_yp_inv = 1 - i4_yp;
  952|   486k|                WORD32 i4_xp_inv = 1 - i4_xp;
  953|   486k|                WORD32 i4_xo_inv = (2 - i4_xp);
  954|   486k|                WORD32 i4_yo_inv = (2 - i4_yp);
  955|   486k|                i4_indx_x = (i4_xp << 1);
  956|   486k|                i4_indx_y = (i4_yp << 1);
  957|       |
  958|       |                /* check if the current part idc is -1*/
  959|   486k|                if(ai4_ref_part_idc[i4_indx_y][i4_indx_x] == -1)
  ------------------
  |  Branch (959:20): [True: 29.8k, False: 456k]
  ------------------
  960|  29.8k|                {
  961|  29.8k|                    ai4_flag_8x8[i4_yp][i4_xp] = SVCD_TRUE;
  ------------------
  |  |   46|  29.8k|#define SVCD_TRUE 1
  ------------------
  962|       |
  963|       |                    /* replace the -1 with appropriate values */
  964|  29.8k|                    if(SVCD_FALSE == ai4_flag_8x8[i4_yp][i4_xp_inv] &&
  ------------------
  |  |   45|  29.8k|#define SVCD_FALSE 0
  ------------------
  |  Branch (964:24): [True: 22.9k, False: 6.91k]
  ------------------
  965|  22.9k|                       ai4_ref_part_idc[i4_indx_y][i4_xo_inv] != -1)
  ------------------
  |  Branch (965:24): [True: 16.0k, False: 6.91k]
  ------------------
  966|  16.0k|                    {
  967|  16.0k|                        ai4_ref_part_idc[i4_indx_y][i4_indx_x] =
  968|  16.0k|                            ai4_ref_part_idc[i4_indx_y][i4_xo_inv];
  969|       |
  970|  16.0k|                        ai4_ref_part_idc[i4_indx_y + 1][i4_indx_x] =
  971|  16.0k|                            ai4_ref_part_idc[i4_indx_y + 1][i4_xo_inv];
  972|       |
  973|  16.0k|                        ai4_ref_part_idc[i4_indx_y][i4_indx_x + 1] =
  974|  16.0k|                            ai4_ref_part_idc[i4_indx_y][i4_xo_inv];
  975|       |
  976|  16.0k|                        ai4_ref_part_idc[i4_indx_y + 1][i4_indx_x + 1] =
  977|  16.0k|                            ai4_ref_part_idc[i4_indx_y + 1][i4_xo_inv];
  978|  16.0k|                    }
  979|  13.8k|                    else if(SVCD_FALSE == ai4_flag_8x8[i4_yp_inv][i4_xp] &&
  ------------------
  |  |   45|  13.8k|#define SVCD_FALSE 0
  ------------------
  |  Branch (979:29): [True: 13.4k, False: 346]
  ------------------
  980|  13.4k|                            ai4_ref_part_idc[i4_yo_inv][i4_indx_x] != -1)
  ------------------
  |  Branch (980:29): [True: 13.1k, False: 288]
  ------------------
  981|  13.1k|                    {
  982|  13.1k|                        ai4_ref_part_idc[i4_indx_y][i4_indx_x] =
  983|  13.1k|                            ai4_ref_part_idc[i4_yo_inv][i4_indx_x];
  984|       |
  985|  13.1k|                        ai4_ref_part_idc[i4_indx_y + 1][i4_indx_x] =
  986|  13.1k|                            ai4_ref_part_idc[i4_yo_inv][i4_indx_x];
  987|       |
  988|  13.1k|                        ai4_ref_part_idc[i4_indx_y][i4_indx_x + 1] =
  989|  13.1k|                            ai4_ref_part_idc[i4_yo_inv][i4_indx_x + 1];
  990|       |
  991|  13.1k|                        ai4_ref_part_idc[i4_indx_y + 1][i4_indx_x + 1] =
  992|  13.1k|                            ai4_ref_part_idc[i4_yo_inv][i4_indx_x + 1];
  993|  13.1k|                    }
  994|    634|                    else if(SVCD_FALSE == ai4_flag_8x8[i4_yp_inv][i4_xp_inv] &&
  ------------------
  |  |   45|    634|#define SVCD_FALSE 0
  ------------------
  |  Branch (994:29): [True: 634, False: 0]
  ------------------
  995|    634|                            ai4_ref_part_idc[i4_yo_inv][i4_xo_inv] != -1)
  ------------------
  |  Branch (995:29): [True: 634, False: 0]
  ------------------
  996|    634|                    {
  997|    634|                        ai4_ref_part_idc[i4_indx_y][i4_indx_x] =
  998|    634|                            ai4_ref_part_idc[i4_yo_inv][i4_xo_inv];
  999|       |
 1000|    634|                        ai4_ref_part_idc[i4_indx_y + 1][i4_indx_x] =
 1001|    634|                            ai4_ref_part_idc[i4_yo_inv][i4_xo_inv];
 1002|       |
 1003|    634|                        ai4_ref_part_idc[i4_indx_y][i4_indx_x + 1] =
 1004|    634|                            ai4_ref_part_idc[i4_yo_inv][i4_xo_inv];
 1005|       |
 1006|    634|                        ai4_ref_part_idc[i4_indx_y + 1][i4_indx_x + 1] =
 1007|    634|                            ai4_ref_part_idc[i4_yo_inv][i4_xo_inv];
 1008|    634|                    }
 1009|  29.8k|                } /* end of part idc equal to -1 check */
 1010|       |
 1011|   486k|            }     /* end of partition xp loop */
 1012|       |
 1013|   243k|        }         /* end of partition yp loop */
 1014|       |
 1015|   121k|    }             /* end of refinement of part idc for non dyadic case*/
 1016|       |
 1017|       |    /* store the intra flag in the location provided */
 1018|   130k|    *pi4_intra_flag = i4_intra_mb_flag;
 1019|       |
 1020|   130k|    return;
 1021|   130k|}
isvcd_check_motion:
 1049|   557k|{
 1050|   557k|    mv_pred_t *ps_part_a;
 1051|   557k|    mv_pred_t *ps_part_b;
 1052|   557k|    WORD32 i4_cntr;
 1053|   557k|    WORD32 i4_mv_treshold;
 1054|   557k|    WORD32 i4_flag = 0;
 1055|       |
 1056|   557k|    ps_part_a = (mv_pred_t *) pv_motion_prm_mb_part_a;
 1057|   557k|    ps_part_b = (mv_pred_t *) pv_motion_prm_mb_part_b;
 1058|       |
 1059|  1.17M|    for(i4_cntr = 0; i4_cntr < i4_listx; i4_cntr++)
  ------------------
  |  Branch (1059:22): [True: 668k, False: 503k]
  ------------------
 1060|   668k|    {
 1061|       |        /* calculate the absolute diff of both components */
 1062|   668k|        i4_mv_treshold = ABS((ps_part_a->i2_mv[2 * i4_cntr]) - (ps_part_b->i2_mv[2 * i4_cntr]));
  ------------------
  |  |  100|   668k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 24.7k, False: 644k]
  |  |  ------------------
  ------------------
 1063|   668k|        i4_mv_treshold +=
 1064|   668k|            ABS((ps_part_a->i2_mv[1 + 2 * i4_cntr]) - (ps_part_b->i2_mv[1 + 2 * i4_cntr]));
  ------------------
  |  |  100|   668k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 22.6k, False: 646k]
  |  |  ------------------
  ------------------
 1065|       |
 1066|   668k|        if((0 == i4_mv_treshold) &&
  ------------------
  |  Branch (1066:12): [True: 621k, False: 47.1k]
  ------------------
 1067|   621k|           (ps_part_a->i1_ref_frame[i4_cntr] == ps_part_b->i1_ref_frame[i4_cntr]))
  ------------------
  |  Branch (1067:12): [True: 614k, False: 7.31k]
  ------------------
 1068|   614k|        {
 1069|   614k|            i4_flag = 1;
 1070|   614k|        }
 1071|  54.4k|        else
 1072|  54.4k|        {
 1073|  54.4k|            i4_flag = 0;
 1074|  54.4k|            return (i4_flag);
 1075|  54.4k|        }
 1076|       |
 1077|   668k|    } /* end of loop over lists */
 1078|       |
 1079|   503k|    return (i4_flag);
 1080|   557k|}
isvcd_get_min_positive:
 1104|  2.34M|{
 1105|  2.34M|    UWORD32 u4_x, u4_y;
 1106|  2.34M|    WORD32 i4_min_positive;
 1107|       |
 1108|       |    /* get positive values */
 1109|  2.34M|    u4_x = (UWORD32) i4_input_1;
 1110|  2.34M|    u4_y = (UWORD32) i4_input_2;
 1111|       |
 1112|       |    /* logic and desired output
 1113|       |
 1114|       |    u4_x     magnitude compare    u4_y           o/p
 1115|       |    +              >              +             u4_y
 1116|       |    +              <              +             u4_x
 1117|       |    +              =              +             u4_x
 1118|       |    -              >              -             u4_y
 1119|       |    -              <              -             u4_x
 1120|       |    -              =              -             u4_x
 1121|       |    0              =              0             u4_x
 1122|       |    -              n/a            +             u4_y
 1123|       |    +              n/a            -             u4_x
 1124|       |
 1125|       |    */
 1126|       |
 1127|  2.34M|    if((u4_y < u4_x) && (0 <= i4_input_2))
  ------------------
  |  Branch (1127:8): [True: 3.74k, False: 2.34M]
  |  Branch (1127:25): [True: 3.74k, False: 0]
  ------------------
 1128|  3.74k|    {
 1129|  3.74k|        i4_min_positive = i4_input_2;
 1130|  3.74k|    }
 1131|  2.34M|    else
 1132|  2.34M|    {
 1133|  2.34M|        i4_min_positive = i4_input_1;
 1134|  2.34M|    }
 1135|  2.34M|    return (i4_min_positive);
 1136|  2.34M|}
isvcd_interlyr_motion_scale:
 1286|  2.15M|{
 1287|       |    /*! Flow of the module is as follows                                   */
 1288|       |    /*! 1. derive the offsets form part idc                                */
 1289|       |    /*! 2. takes the motion vector and scales it based on scale factor     */
 1290|       |    /*! 3. adds the correction factors for crop window change cases        */
 1291|       |    /*! 4. store the default motion params for intra projected blocks      */
 1292|       |
 1293|  2.15M|    mode_motion_ctxt_t *ps_ctxt;
 1294|  2.15M|    mode_motion_lyr_ctxt *ps_lyr_mem;
 1295|  2.15M|    mv_pred_t *ps_motion_pred;
 1296|  2.15M|    mv_pred_t *ps_ref_mv;
 1297|  2.15M|    WORD32 i4_lists;
 1298|  2.15M|    WORD32 i4_ref_16x16_flag = 0;
 1299|  2.15M|    WORD32 i4_scale_x, i4_scale_y;
 1300|  2.15M|    WORD16 i2_max_mv_x, i2_max_mv_y;
 1301|       |
 1302|  2.15M|    ps_ctxt = (mode_motion_ctxt_t *) pv_comp_mode_mv_ctxt;
 1303|       |
 1304|       |    /* get the current layer ctxt */
 1305|  2.15M|    ps_lyr_mem = &ps_ctxt->as_res_lyr_mem[ps_ctxt->i4_res_id];
 1306|       |
 1307|       |    /* ----------- Get the reference layer MV structure ---------- */
 1308|  2.15M|    {
 1309|  2.15M|        mv_pred_t *ps_ref_lyr_motion_prms;
 1310|  2.15M|        WORD32 i4_ref_x, i4_ref_y;
 1311|  2.15M|        WORD32 i4_ref_mb_x, i4_ref_mb_y;
 1312|  2.15M|        WORD32 i4_ref_width;
 1313|       |
 1314|  2.15M|        ps_ref_lyr_motion_prms = (mv_pred_t *) ps_lyr_mem->pv_ref_mv_bank_l0;
 1315|  2.15M|        i4_ref_width = ps_lyr_mem->i4_ref_width;
 1316|  2.15M|        i2_max_mv_x = i4_ref_width << 2;
 1317|  2.15M|        i2_max_mv_y = ps_lyr_mem->i4_ref_height << 2;
 1318|       |
 1319|       |        /* extract the reference x and y positions */
 1320|  2.15M|        i4_ref_x = (*pi4_ref_part_idc) & 0xFFFF;
 1321|  2.15M|        i4_ref_y = (*pi4_ref_part_idc) >> 16;
 1322|       |
 1323|       |        /* get the reference mb x and y */
 1324|  2.15M|        i4_ref_mb_x = (i4_ref_x >> 4);
 1325|  2.15M|        i4_ref_mb_y = (i4_ref_y >> 4);
 1326|       |
 1327|       |        /* get the reference layer motion struct pointing  */
 1328|       |        /* to first 4x4 partition of the refernce layer MB */
 1329|  2.15M|        ps_ref_mv = ps_ref_lyr_motion_prms + (i4_ref_mb_x << 4);
 1330|  2.15M|        ps_ref_mv += (i4_ref_mb_y * i4_ref_width);
 1331|       |
 1332|       |        /* if reference layer mb type is non 16x16 */
 1333|  2.15M|        if(0 == i4_ref_16x16_flag)
  ------------------
  |  Branch (1333:12): [True: 2.15M, False: 0]
  ------------------
 1334|  2.15M|        {
 1335|       |            /* increment the pointer to appropaite 4x4 */
 1336|  2.15M|            ps_ref_mv += ((i4_ref_x >> 2) & 0x03);
 1337|  2.15M|            ps_ref_mv += (((i4_ref_y >> 2) & 0x03) << 2);
 1338|  2.15M|        }
 1339|  2.15M|    }
 1340|       |
 1341|       |    /* motion pred structure */
 1342|  2.15M|    ps_motion_pred = pv_motion_pred;
 1343|       |
 1344|       |    /* retrive the scale factors */
 1345|  2.15M|    i4_scale_x = ps_lyr_mem->i4_scale_mv_x;
 1346|  2.15M|    i4_scale_y = ps_lyr_mem->i4_scale_mv_y;
 1347|       |
 1348|       |    /* loop on the lists given as input */
 1349|  4.81M|    for(i4_lists = 0; i4_lists < i4_listx; i4_lists++)
  ------------------
  |  Branch (1349:23): [True: 2.65M, False: 2.15M]
  ------------------
 1350|  2.65M|    {
 1351|  2.65M|        WORD32 i4_mv_x, i4_mv_y;
 1352|  2.65M|        WORD16 i2_mv_x, i2_mv_y;
 1353|       |
 1354|       |        /* if the refernce index is -1 set the default values */
 1355|  2.65M|        if(-1 == ps_ref_mv->i1_ref_frame[i4_lists])
  ------------------
  |  Branch (1355:12): [True: 322k, False: 2.33M]
  ------------------
 1356|   322k|        {
 1357|   322k|            ps_motion_pred->i1_ref_frame[i4_lists] = -1;
 1358|   322k|            ps_motion_pred->i2_mv[2 * i4_lists] = 0;
 1359|   322k|            ps_motion_pred->i2_mv[1 + 2 * i4_lists] = 0;
 1360|   322k|        }
 1361|  2.33M|        else
 1362|  2.33M|        {
 1363|       |            /* field MB and field pictures modification are present */
 1364|       |            /* currently not implemented */
 1365|  2.33M|            ps_motion_pred->i1_ref_frame[i4_lists] = ps_ref_mv->i1_ref_frame[i4_lists];
 1366|       |
 1367|  2.33M|            i2_mv_x = ps_ref_mv->i2_mv[2 * i4_lists];
 1368|  2.33M|            i2_mv_y = ps_ref_mv->i2_mv[1 + 2 * i4_lists];
 1369|  2.33M|            i2_mv_x = CLIP3(-i2_max_mv_x, i2_max_mv_x, i2_mv_x);
  ------------------
  |  |   77|  2.33M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 1.64k, False: 2.33M]
  |  |  |  Branch (77:54): [True: 1.67k, False: 2.33M]
  |  |  ------------------
  ------------------
 1370|  2.33M|            i2_mv_y = CLIP3(-i2_max_mv_y, i2_max_mv_y, i2_mv_y);
  ------------------
  |  |   77|  2.33M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 1.53k, False: 2.33M]
  |  |  |  Branch (77:54): [True: 776, False: 2.33M]
  |  |  ------------------
  ------------------
 1371|       |            /* scale the motion vectors */
 1372|  2.33M|            i4_mv_x = (i2_mv_x * i4_scale_x + 32768) >> 16;
 1373|  2.33M|            i4_mv_y = (i2_mv_y * i4_scale_y + 32768) >> 16;
 1374|       |
 1375|       |            /* store the final motion vectors */
 1376|  2.33M|            ps_motion_pred->i2_mv[2 * i4_lists] = i4_mv_x;
 1377|  2.33M|            ps_motion_pred->i2_mv[1 + 2 * i4_lists] = i4_mv_y;
 1378|       |
 1379|       |            /* if cropping change flag is present */
 1380|  2.33M|            if(SVCD_TRUE == ps_lyr_mem->ps_curr_lyr_res_prms->u1_cropping_change_flag)
  ------------------
  |  |   46|  2.33M|#define SVCD_TRUE 1
  ------------------
  |  Branch (1380:16): [True: 0, False: 2.33M]
  ------------------
 1381|      0|            {
 1382|       |                /* over write the motion vectors x and y */
 1383|      0|                isvcd_motion_scale_crop_wdw_change(ps_ctxt, ps_lyr_mem, ps_mb_params, ps_ref_mv,
 1384|      0|                                                   ps_motion_pred, i4_listx, i4_part_frm_x,
 1385|      0|                                                   i4_part_frm_y, ppv_map_ref_idx_to_poc, i4_lists);
 1386|      0|            }
 1387|  2.33M|        }
 1388|  2.65M|    } /* end of lists loop */
 1389|       |
 1390|  2.15M|    return (i4_ref_16x16_flag);
 1391|  2.15M|}
isvcd_store_motion_map:
 1425|   214k|{
 1426|       |    /*! Flow of the module is as follows                                   */
 1427|       |    /*! 1. loops over part_width and part_height                           */
 1428|       |    /*! 2. copies the src params toi destination                           */
 1429|       |    /*! 3. updates the source pointer if src_update flag is set to 1       */
 1430|       |
 1431|   214k|    WORD32 i4_i, i4_j;
 1432|   214k|    mv_pred_t *ps_mv_pred_src;
 1433|   214k|    mv_pred_t *ps_mv_map_dst;
 1434|       |
 1435|   214k|    ps_mv_pred_src = (mv_pred_t *) pv_motion_pred;
 1436|   214k|    ps_mv_map_dst = (mv_pred_t *) pv_curr_lyr_motion_map;
 1437|       |
 1438|       |    /* store the current motion pred to all the motion map structures */
 1439|   643k|    for(i4_i = 0; i4_i < i4_part_height; i4_i++)
  ------------------
  |  Branch (1439:19): [True: 428k, False: 214k]
  ------------------
 1440|   428k|    {
 1441|  1.28M|        for(i4_j = 0; i4_j < i4_part_width; i4_j++)
  ------------------
  |  Branch (1441:23): [True: 857k, False: 428k]
  ------------------
 1442|   857k|        {
 1443|       |            /* copy form source to destination */
 1444|   857k|            *(ps_mv_map_dst + i4_j) = *(ps_mv_pred_src + (i4_src_update_flag * i4_j));
 1445|       |
 1446|   857k|        } /* end of loop over partition width */
 1447|       |
 1448|   428k|        ps_mv_map_dst += i4_dst_stride;
 1449|   428k|        ps_mv_pred_src += (i4_src_stride * i4_src_update_flag);
 1450|       |
 1451|   428k|    } /* end of loop over partition height */
 1452|   214k|    return;
 1453|   214k|}
isvcd_check_mv_diff:
 1482|  2.43M|{
 1483|  2.43M|    mv_pred_t *ps_part_a;
 1484|  2.43M|    mv_pred_t *ps_part_b;
 1485|  2.43M|    WORD32 i4_cntr;
 1486|  2.43M|    WORD32 i4_mv_treshold;
 1487|  2.43M|    WORD32 i4_flag;
 1488|       |
 1489|  2.43M|    ps_part_a = (mv_pred_t *) pv_motion_prm_a;
 1490|  2.43M|    ps_part_b = (mv_pred_t *) pv_motion_prm_b;
 1491|       |
 1492|  2.43M|    i4_flag = 1;
 1493|  5.31M|    for(i4_cntr = 0; i4_cntr < i4_listx; i4_cntr++)
  ------------------
  |  Branch (1493:22): [True: 2.92M, False: 2.38M]
  ------------------
 1494|  2.92M|    {
 1495|       |        /* calculate the absolute diff of both components */
 1496|  2.92M|        i4_mv_treshold = ABS((ps_part_a->i2_mv[2 * i4_cntr]) - (ps_part_b->i2_mv[2 * i4_cntr]));
  ------------------
  |  |  100|  2.92M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 22.7k, False: 2.90M]
  |  |  ------------------
  ------------------
 1497|  2.92M|        i4_mv_treshold +=
 1498|  2.92M|            ABS((ps_part_a->i2_mv[1 + (2 * i4_cntr)]) - (ps_part_b->i2_mv[1 + (2 * i4_cntr)]));
  ------------------
  |  |  100|  2.92M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 28.2k, False: 2.90M]
  |  |  ------------------
  ------------------
 1499|       |
 1500|  2.92M|        if(i4_actual_threshold < i4_mv_treshold)
  ------------------
  |  Branch (1500:12): [True: 45.5k, False: 2.88M]
  ------------------
 1501|  45.5k|        {
 1502|  45.5k|            i4_flag = 0;
 1503|  45.5k|            break;
 1504|  45.5k|        }
 1505|       |
 1506|  2.92M|    } /* end of loop over lists */
 1507|  2.43M|    return (i4_flag);
 1508|  2.43M|}
isvcd_interlyr_motion_submbmode_pred:
 1541|   121k|{
 1542|       |    /*! Flow of the module is as follows                                   */
 1543|       |    /*! 1. if dyadic case it calculates the motion vectors based on dyadic
 1544|       |           scale factor and loop counts calculated at layer level          */
 1545|       |    /*! 2. if non dyadic then it calculates the motion vectors based on
 1546|       |            reference layer part idc                                        */
 1547|       |    /*! 3. does the motion vector modification for non dyayic cases, by
 1548|       |           calculating the minimum positive of reference indices of 4 4x4
 1549|       |           blocks and getiing a single reference index for 8x8             */
 1550|       |    /*! 4. if direct 8x8 inference is present and current slice is
 1551|       |           B OR EB, then it stores the corner motion vectors for each 8x8  */
 1552|       |    /*! 5. does the sub mb mode prediction and merging of motion vectors
 1553|       |           which are closely related by setting appropriate thresholds
 1554|       |           for MVs                                                         */
 1555|       |    /*! 6. stores the sub mb modes in the array given as input             */
 1556|       |
 1557|   121k|    mode_motion_ctxt_t *ps_ctxt;
 1558|   121k|    mode_motion_lyr_ctxt *ps_lyr_mem;
 1559|   121k|    mv_pred_t *ps_motion_pred;
 1560|   121k|    dec_mb_info_t *ps_mb_params;
 1561|   121k|    dec_svc_mb_info_t *ps_svc_mb_params;
 1562|   121k|    WORD32 i4_blk_y, i4_blk_x;
 1563|   121k|    WORD32 i4_i;
 1564|   121k|    WORD32 i4_listx;
 1565|   121k|    WORD32 i4_mv_treshold;
 1566|   121k|    WORD32 ai4_temp_ref_indx[NUM_REF_LISTS][NUM_MB_PARTS] = {0};
 1567|   121k|    WORD32 i4_mb_x, i4_mb_y;
 1568|   121k|    WORD32 i4_mb_pic_x, i4_mb_pic_y;
 1569|   121k|    dec_struct_t *ps_dec;
 1570|       |
 1571|   121k|    ps_dec = (dec_struct_t *) pv_dec;
 1572|   121k|    ps_ctxt = (mode_motion_ctxt_t *) pv_comp_mode_mv_ctxt;
 1573|       |
 1574|       |    /* get the current layer ctxt */
 1575|   121k|    ps_lyr_mem = &ps_ctxt->as_res_lyr_mem[ps_ctxt->i4_res_id];
 1576|       |
 1577|   121k|    ps_mb_params = (dec_mb_info_t *) pv_mb_params;
 1578|   121k|    ps_svc_mb_params = (dec_svc_mb_info_t *) pv_svc_mb_params;
 1579|   121k|    ps_motion_pred = ps_ctxt->ps_motion_pred_struct;
 1580|       |
 1581|   121k|    i4_listx = ps_ctxt->i4_listx;
 1582|       |
 1583|       |    /* derive the MB_X and MB_Y for the current MB */
 1584|   121k|    i4_mb_x = ps_mb_params->u2_mbx;
 1585|   121k|    i4_mb_y = ps_mb_params->u2_mby;
 1586|       |
 1587|       |    /* convert into picture units */
 1588|   121k|    i4_mb_pic_x = i4_mb_x << 4;
 1589|   121k|    i4_mb_pic_y = i4_mb_y << 4;
 1590|       |
 1591|       |    /* compute the motion vectors and reference indices of all part */
 1592|   607k|    for(i4_blk_y = 0; i4_blk_y < NUM_SUB_MB_PARTS; i4_blk_y++)
  ------------------
  |  |   60|   607k|#define NUM_SUB_MB_PARTS 4
  ------------------
  |  Branch (1592:23): [True: 486k, False: 121k]
  ------------------
 1593|   486k|    {
 1594|  2.43M|        for(i4_blk_x = 0; i4_blk_x < NUM_SUB_MB_PARTS; i4_blk_x++)
  ------------------
  |  |   60|  2.43M|#define NUM_SUB_MB_PARTS 4
  ------------------
  |  Branch (1594:27): [True: 1.94M, False: 486k]
  ------------------
 1595|  1.94M|        {
 1596|  1.94M|            isvcd_interlyr_motion_scale(pv_comp_mode_mv_ctxt, &ai4_ref_part_idc[i4_blk_y][i4_blk_x],
 1597|  1.94M|                                        ps_mb_params, (ps_motion_pred + (4 * i4_blk_y) + i4_blk_x),
 1598|  1.94M|                                        i4_listx, (i4_mb_pic_x + (i4_blk_x << 2) + 1),
 1599|  1.94M|                                        (i4_mb_pic_y + (i4_blk_y << 2) + 1),
 1600|  1.94M|                                        ps_dec->ppv_map_ref_idx_to_poc);
 1601|       |
 1602|  1.94M|        } /* end of blk x loop */
 1603|   486k|    }     /* end of blk y loop */
 1604|       |
 1605|       |    /********************************************************/
 1606|       |    /* get the final reference index into a temparory array */
 1607|       |    /********************************************************/
 1608|       |
 1609|       |    /* set reference indices */
 1610|   268k|    for(i4_i = 0; i4_i < i4_listx; i4_i++)
  ------------------
  |  Branch (1610:19): [True: 146k, False: 121k]
  ------------------
 1611|   146k|    {
 1612|   146k|        ai4_temp_ref_indx[i4_i][0] = ps_motion_pred[0].i1_ref_frame[i4_i];
 1613|   146k|        ai4_temp_ref_indx[i4_i][1] = ps_motion_pred[2].i1_ref_frame[i4_i];
 1614|   146k|        ai4_temp_ref_indx[i4_i][2] = ps_motion_pred[8].i1_ref_frame[i4_i];
 1615|   146k|        ai4_temp_ref_indx[i4_i][3] = ps_motion_pred[10].i1_ref_frame[i4_i];
 1616|       |
 1617|   146k|    } /* end of loop over lists */
 1618|       |
 1619|       |    /* if restricted spatial resolution change is not set */
 1620|   121k|    if(SVCD_FALSE == ps_lyr_mem->ps_curr_lyr_res_prms->u1_rstrct_res_change_flag)
  ------------------
  |  |   45|   121k|#define SVCD_FALSE 0
  ------------------
  |  Branch (1620:8): [True: 121k, False: 0]
  ------------------
 1621|   121k|    {
 1622|   121k|        WORD32 i4_xp, i4_yp;
 1623|   121k|        WORD32 i4_xs, i4_ys;
 1624|       |
 1625|       |        /* merge reference indices and modify the motion vectors */
 1626|   268k|        for(i4_i = 0; i4_i < i4_listx; i4_i++)
  ------------------
  |  Branch (1626:23): [True: 146k, False: 121k]
  ------------------
 1627|   146k|        {
 1628|   439k|            for(i4_yp = 0; i4_yp < 2; i4_yp++)
  ------------------
  |  Branch (1628:28): [True: 293k, False: 146k]
  ------------------
 1629|   293k|            {
 1630|   879k|                for(i4_xp = 0; i4_xp < 2; i4_xp++)
  ------------------
  |  Branch (1630:32): [True: 586k, False: 293k]
  ------------------
 1631|   586k|                {
 1632|       |                    /* get the minimum positive of the refernce index */
 1633|  1.75M|                    for(i4_ys = 0; i4_ys < 2; i4_ys++)
  ------------------
  |  Branch (1633:36): [True: 1.17M, False: 586k]
  ------------------
 1634|  1.17M|                    {
 1635|  3.51M|                        for(i4_xs = 0; i4_xs < 2; i4_xs++)
  ------------------
  |  Branch (1635:40): [True: 2.34M, False: 1.17M]
  ------------------
 1636|  2.34M|                        {
 1637|  2.34M|                            mv_pred_t *ps_temp;
 1638|  2.34M|                            ps_temp = ps_motion_pred + (i4_xp << 1) + i4_xs;
 1639|  2.34M|                            ps_temp += 4 * ((i4_yp << 1) + i4_ys);
 1640|       |
 1641|       |                            /* get the minimum positive */
 1642|  2.34M|                            ai4_temp_ref_indx[i4_i][2 * i4_yp + i4_xp] =
 1643|  2.34M|                                isvcd_get_min_positive(ai4_temp_ref_indx[i4_i][2 * i4_yp + i4_xp],
 1644|  2.34M|                                                       ps_temp->i1_ref_frame[i4_i]);
 1645|  2.34M|                        }
 1646|  1.17M|                    }
 1647|       |                    /* update motion vectors */
 1648|  1.75M|                    for(i4_ys = 0; i4_ys < 2; i4_ys++)
  ------------------
  |  Branch (1648:36): [True: 1.17M, False: 586k]
  ------------------
 1649|  1.17M|                    {
 1650|  3.51M|                        for(i4_xs = 0; i4_xs < 2; i4_xs++)
  ------------------
  |  Branch (1650:40): [True: 2.34M, False: 1.17M]
  ------------------
 1651|  2.34M|                        {
 1652|  2.34M|                            mv_pred_t *ps_temp;
 1653|  2.34M|                            ps_temp = ps_motion_pred + (i4_xp << 1) + i4_xs;
 1654|  2.34M|                            ps_temp += 4 * ((i4_yp << 1) + i4_ys);
 1655|       |
 1656|       |                            /* check if the current part reference index is */
 1657|       |                            /* not choosen as the final reference index */
 1658|       |                            /* if not copy the neighbours MV */
 1659|  2.34M|                            if(ai4_temp_ref_indx[i4_i][2 * i4_yp + i4_xp] !=
  ------------------
  |  Branch (1659:32): [True: 10.5k, False: 2.33M]
  ------------------
 1660|  2.34M|                               ps_temp->i1_ref_frame[i4_i])
 1661|  10.5k|                            {
 1662|  10.5k|                                mv_pred_t *ps_temp_1;
 1663|  10.5k|                                WORD32 i4_updated_flag = SVCD_FALSE;
  ------------------
  |  |   45|  10.5k|#define SVCD_FALSE 0
  ------------------
 1664|       |
 1665|  10.5k|                                ps_temp_1 = ps_motion_pred + (i4_xp << 1) + (1 - i4_xs);
 1666|  10.5k|                                ps_temp_1 += 4 * ((i4_yp << 1) + i4_ys);
 1667|       |
 1668|       |                                /* store the appropriate neighbours */
 1669|  10.5k|                                if(ai4_temp_ref_indx[i4_i][2 * i4_yp + i4_xp] ==
  ------------------
  |  Branch (1669:36): [True: 4.63k, False: 5.95k]
  ------------------
 1670|  10.5k|                                   ps_temp_1->i1_ref_frame[i4_i])
 1671|  4.63k|                                {
 1672|  4.63k|                                    ps_temp->i2_mv[2 * i4_i] = ps_temp_1->i2_mv[2 * i4_i];
 1673|       |
 1674|  4.63k|                                    ps_temp->i2_mv[1 + (2 * i4_i)] =
 1675|  4.63k|                                        ps_temp_1->i2_mv[1 + (2 * i4_i)];
 1676|  4.63k|                                    i4_updated_flag = SVCD_TRUE;
  ------------------
  |  |   46|  4.63k|#define SVCD_TRUE 1
  ------------------
 1677|  4.63k|                                }
 1678|       |
 1679|  10.5k|                                if(SVCD_FALSE == i4_updated_flag)
  ------------------
  |  |   45|  10.5k|#define SVCD_FALSE 0
  ------------------
  |  Branch (1679:36): [True: 5.95k, False: 4.63k]
  ------------------
 1680|  5.95k|                                {
 1681|  5.95k|                                    ps_temp_1 = ps_motion_pred + (i4_xp << 1) + i4_xs;
 1682|       |
 1683|  5.95k|                                    ps_temp_1 += 4 * ((i4_yp << 1) + 1 - i4_ys);
 1684|       |
 1685|  5.95k|                                    if(ai4_temp_ref_indx[i4_i][2 * i4_yp + i4_xp] ==
  ------------------
  |  Branch (1685:40): [True: 5.67k, False: 281]
  ------------------
 1686|  5.95k|                                       ps_temp_1->i1_ref_frame[i4_i])
 1687|  5.67k|                                    {
 1688|  5.67k|                                        ps_temp->i2_mv[2 * i4_i] = ps_temp_1->i2_mv[2 * i4_i];
 1689|       |
 1690|  5.67k|                                        ps_temp->i2_mv[1 + (2 * i4_i)] =
 1691|  5.67k|                                            ps_temp_1->i2_mv[1 + (2 * i4_i)];
 1692|  5.67k|                                        i4_updated_flag = SVCD_TRUE;
  ------------------
  |  |   46|  5.67k|#define SVCD_TRUE 1
  ------------------
 1693|  5.67k|                                    }
 1694|  5.95k|                                }
 1695|  10.5k|                                if(SVCD_FALSE == i4_updated_flag)
  ------------------
  |  |   45|  10.5k|#define SVCD_FALSE 0
  ------------------
  |  Branch (1695:36): [True: 281, False: 10.3k]
  ------------------
 1696|    281|                                {
 1697|    281|                                    ps_temp_1 = ps_motion_pred + (i4_xp << 1) + (1 - i4_xs);
 1698|    281|                                    ps_temp_1 += 4 * ((i4_yp << 1) + 1 - i4_ys);
 1699|       |
 1700|    281|                                    ps_temp->i2_mv[2 * i4_i] = ps_temp_1->i2_mv[2 * i4_i];
 1701|       |
 1702|    281|                                    ps_temp->i2_mv[1 + (2 * i4_i)] =
 1703|    281|                                        ps_temp_1->i2_mv[1 + (2 * i4_i)];
 1704|       |
 1705|    281|                                    i4_updated_flag = SVCD_TRUE;
  ------------------
  |  |   46|    281|#define SVCD_TRUE 1
  ------------------
 1706|    281|                                }
 1707|  10.5k|                            } /* end of replacement of mv based on ref indx */
 1708|  2.34M|                        }     /* end of loop over sub partition xs */
 1709|  1.17M|                    }         /* end of loop over sub partition ys */
 1710|   586k|                }             /* end of loop over partition xp */
 1711|   293k|            }                 /* end of loop over partition yp */
 1712|   146k|        }                     /* end of loop over lists */
 1713|   121k|    }
 1714|       |
 1715|       |    /************************************************************************/
 1716|       |    /* if restircted saptial resolution change flag is 0                    */
 1717|       |    /* modify the reference indixes and motion vectors                      */
 1718|       |    /************************************************************************/
 1719|   121k|    if((SVCD_FALSE == ps_lyr_mem->ps_curr_lyr_res_prms->u1_rstrct_res_change_flag) &&
  ------------------
  |  |   45|   121k|#define SVCD_FALSE 0
  ------------------
  |  Branch (1719:8): [True: 121k, False: 0]
  ------------------
 1720|   121k|       (2 == i4_listx) && (SVCD_TRUE == ps_ctxt->u1_direct_8x8_inference_flag))
  ------------------
  |  |   46|  25.0k|#define SVCD_TRUE 1
  ------------------
  |  Branch (1720:8): [True: 25.0k, False: 96.4k]
  |  Branch (1720:27): [True: 7.46k, False: 17.5k]
  ------------------
 1721|  7.46k|    {
 1722|       |        /* only applicable for EB Slice */
 1723|       |        /* store the corner 4x4 motion vectors to the whole block */
 1724|       |        /* 2 lists and 4 partitions */
 1725|  7.46k|        mot_vec_t s_temp_mv[2][4];
 1726|  7.46k|        WORD32 i4_xp, i4_yp;
 1727|  7.46k|        memset(&s_temp_mv[0][0], 0, sizeof(s_temp_mv));
 1728|       |
 1729|  22.3k|        for(i4_i = 0; i4_i < i4_listx; i4_i++)
  ------------------
  |  Branch (1729:23): [True: 14.9k, False: 7.46k]
  ------------------
 1730|  14.9k|        {
 1731|  14.9k|            s_temp_mv[i4_i][0].i2_mv_x = ps_motion_pred[0].i2_mv[2 * i4_i];
 1732|  14.9k|            s_temp_mv[i4_i][0].i2_mv_y = ps_motion_pred[0].i2_mv[1 + (2 * i4_i)];
 1733|       |
 1734|  14.9k|            s_temp_mv[i4_i][1].i2_mv_x = ps_motion_pred[3].i2_mv[2 * i4_i];
 1735|  14.9k|            s_temp_mv[i4_i][1].i2_mv_y = ps_motion_pred[3].i2_mv[1 + (2 * i4_i)];
 1736|       |
 1737|  14.9k|            s_temp_mv[i4_i][2].i2_mv_x = ps_motion_pred[12].i2_mv[2 * i4_i];
 1738|  14.9k|            s_temp_mv[i4_i][2].i2_mv_y = ps_motion_pred[12].i2_mv[1 + (2 * i4_i)];
 1739|       |
 1740|  14.9k|            s_temp_mv[i4_i][3].i2_mv_x = ps_motion_pred[15].i2_mv[2 * i4_i];
 1741|  14.9k|            s_temp_mv[i4_i][3].i2_mv_y = ps_motion_pred[15].i2_mv[1 + (2 * i4_i)];
 1742|       |
 1743|  14.9k|        } /* end of loop over lists */
 1744|       |
 1745|       |        /* replace the motion vectors */
 1746|  22.3k|        for(i4_i = 0; i4_i < i4_listx; i4_i++)
  ------------------
  |  Branch (1746:23): [True: 14.9k, False: 7.46k]
  ------------------
 1747|  14.9k|        {
 1748|  74.6k|            for(i4_yp = 0; i4_yp < 4; i4_yp++)
  ------------------
  |  Branch (1748:28): [True: 59.6k, False: 14.9k]
  ------------------
 1749|  59.6k|            {
 1750|   298k|                for(i4_xp = 0; i4_xp < 4; i4_xp++)
  ------------------
  |  Branch (1750:32): [True: 238k, False: 59.6k]
  ------------------
 1751|   238k|                {
 1752|   238k|                    mv_pred_t *ps_temp;
 1753|   238k|                    ps_temp = ps_motion_pred + i4_xp;
 1754|   238k|                    ps_temp += 4 * i4_yp;
 1755|       |
 1756|   238k|                    ps_temp->i2_mv[2 * i4_i] =
 1757|   238k|                        s_temp_mv[i4_i][2 * (i4_yp >> 1) + (i4_xp >> 1)].i2_mv_x;
 1758|       |
 1759|   238k|                    ps_temp->i2_mv[1 + (2 * i4_i)] =
 1760|   238k|                        s_temp_mv[i4_i][2 * (i4_yp >> 1) + (i4_xp >> 1)].i2_mv_y;
 1761|       |
 1762|   238k|                } /* end of loop over sub partitions xp */
 1763|  59.6k|            }     /* end of loop over sub partitions yp */
 1764|  14.9k|        }         /* end of loop over lists */
 1765|  7.46k|    }
 1766|       |
 1767|       |    /* store the final reference index for all sub partitions */
 1768|       |    /* approporiate reference index is stored for each 4x4 belonging to 8x8 */
 1769|   121k|    {
 1770|   121k|        WORD32 i4_xp, i4_yp;
 1771|       |
 1772|   268k|        for(i4_i = 0; i4_i < i4_listx; i4_i++)
  ------------------
  |  Branch (1772:23): [True: 146k, False: 121k]
  ------------------
 1773|   146k|        {
 1774|   732k|            for(i4_yp = 0; i4_yp < 4; i4_yp++)
  ------------------
  |  Branch (1774:28): [True: 586k, False: 146k]
  ------------------
 1775|   586k|            {
 1776|  2.93M|                for(i4_xp = 0; i4_xp < 4; i4_xp++)
  ------------------
  |  Branch (1776:32): [True: 2.34M, False: 586k]
  ------------------
 1777|  2.34M|                {
 1778|  2.34M|                    mv_pred_t *ps_temp;
 1779|  2.34M|                    ps_temp = ps_motion_pred + i4_xp;
 1780|  2.34M|                    ps_temp += 4 * i4_yp;
 1781|       |
 1782|  2.34M|                    ps_temp->i1_ref_frame[i4_i] =
 1783|  2.34M|                        ai4_temp_ref_indx[i4_i][2 * (i4_yp >> 1) + (i4_xp >> 1)];
 1784|       |
 1785|  2.34M|                } /* end of loop over partition xp */
 1786|   586k|            }     /* end of loop over partition yp */
 1787|   146k|        }         /* end of loop over lists */
 1788|   121k|    }
 1789|       |
 1790|       |    /********************************************************************/
 1791|       |    /* modify the motion vectors for non dyadic cases, set the mv       */
 1792|       |    /* threshold appropraitely to derive the sub MB type                */
 1793|       |    /********************************************************************/
 1794|   121k|    if(SVCD_FALSE == ps_lyr_mem->ps_curr_lyr_res_prms->u1_rstrct_res_change_flag)
  ------------------
  |  |   45|   121k|#define SVCD_FALSE 0
  ------------------
  |  Branch (1794:8): [True: 121k, False: 0]
  ------------------
 1795|   121k|    {
 1796|       |        /* non dyadic cases set the mv treshold to 1 */
 1797|   121k|        i4_mv_treshold = 1;
 1798|   121k|    }
 1799|      0|    else
 1800|      0|    {
 1801|       |        /* dyadic cases set the mv treshold to 0 */
 1802|      0|        i4_mv_treshold = 0;
 1803|      0|    }
 1804|       |
 1805|       |    /* modify the motion vectors and get sub mb mode if base mode flag is 1 */
 1806|   121k|    if((SVCD_FALSE == ps_lyr_mem->ps_curr_lyr_res_prms->u1_rstrct_res_change_flag) ||
  ------------------
  |  |   45|   121k|#define SVCD_FALSE 0
  ------------------
  |  Branch (1806:8): [True: 121k, False: 0]
  ------------------
 1807|      0|       (1 == ps_svc_mb_params->u1_base_mode_flag))
  ------------------
  |  Branch (1807:8): [True: 0, False: 0]
  ------------------
 1808|   121k|    {
 1809|   121k|        WORD32 i4_xp, i4_yp;
 1810|   364k|        for(i4_yp = 0; i4_yp < 2; i4_yp++)
  ------------------
  |  Branch (1810:24): [True: 243k, False: 121k]
  ------------------
 1811|   243k|        {
 1812|   729k|            for(i4_xp = 0; i4_xp < 2; i4_xp++)
  ------------------
  |  Branch (1812:28): [True: 486k, False: 243k]
  ------------------
 1813|   486k|            {
 1814|   486k|                mv_pred_t *ps_temp;
 1815|   486k|                WORD32 i4_part_size = 0;
 1816|   486k|                WORD32 i4_horz1_match, i4_vert1_match;
 1817|   486k|                WORD32 i4_horz2_match, i4_vert2_match;
 1818|   486k|                WORD32 i4_diag_match;
 1819|       |
 1820|   486k|                WORD32 i4_8x8_match, i4_horz_match, i4_vert_match;
 1821|   486k|                WORD32 i4_mv_x, i4_mv_y;
 1822|       |
 1823|   486k|                ps_temp = ps_motion_pred + (i4_xp << 1);
 1824|   486k|                ps_temp += 4 * ((i4_yp << 1));
 1825|       |
 1826|       |                /* default init */
 1827|   486k|                i4_8x8_match = i4_horz_match = i4_vert_match = SVCD_TRUE;
  ------------------
  |  |   46|   486k|#define SVCD_TRUE 1
  ------------------
 1828|       |
 1829|       |                /* check if the mv diff in horz direction is under threshold*/
 1830|   486k|                i4_horz1_match =
 1831|   486k|                    isvcd_check_mv_diff(ps_temp, (ps_temp + 1), i4_listx, i4_mv_treshold);
 1832|       |
 1833|   486k|                i4_horz2_match =
 1834|   486k|                    isvcd_check_mv_diff((ps_temp + 4), (ps_temp + 4 + 1), i4_listx, i4_mv_treshold);
 1835|       |
 1836|       |                /* check if the mv diff in horz direction is under threshold*/
 1837|   486k|                i4_vert1_match =
 1838|   486k|                    isvcd_check_mv_diff(ps_temp, (ps_temp + 4), i4_listx, i4_mv_treshold);
 1839|       |
 1840|   486k|                i4_vert2_match =
 1841|   486k|                    isvcd_check_mv_diff((ps_temp + 1), (ps_temp + 4 + 1), i4_listx, i4_mv_treshold);
 1842|       |
 1843|       |                /* check if in diagonal direction is under threshold*/
 1844|   486k|                i4_diag_match =
 1845|   486k|                    isvcd_check_mv_diff(ps_temp, (ps_temp + 4 + 1), i4_listx, i4_mv_treshold);
 1846|       |
 1847|       |                /* calculate the excat matching points*/
 1848|   486k|                i4_8x8_match = i4_8x8_match && i4_horz1_match && i4_vert1_match && i4_diag_match;
  ------------------
  |  Branch (1848:32): [True: 486k, False: 0]
  |  Branch (1848:48): [True: 477k, False: 8.27k]
  |  Branch (1848:66): [True: 471k, False: 6.60k]
  |  Branch (1848:84): [True: 471k, False: 206]
  ------------------
 1849|   486k|                i4_horz_match = i4_horz_match && i4_horz1_match && i4_horz2_match;
  ------------------
  |  Branch (1849:33): [True: 486k, False: 0]
  |  Branch (1849:50): [True: 477k, False: 8.27k]
  |  Branch (1849:68): [True: 477k, False: 340]
  ------------------
 1850|   486k|                i4_vert_match = i4_vert_match && i4_vert1_match && i4_vert2_match;
  ------------------
  |  Branch (1850:33): [True: 486k, False: 0]
  |  Branch (1850:50): [True: 479k, False: 7.05k]
  |  Branch (1850:68): [True: 478k, False: 346]
  ------------------
 1851|       |
 1852|       |                /* modify the motion vectors appropriately */
 1853|       |
 1854|  1.07M|                for(i4_i = 0; i4_i < i4_listx; i4_i++)
  ------------------
  |  Branch (1854:31): [True: 586k, False: 486k]
  ------------------
 1855|   586k|                {
 1856|       |                    /* 8x8 mode all the 4 blocks are under threshold */
 1857|   586k|                    if(SVCD_TRUE == i4_8x8_match)
  ------------------
  |  |   46|   586k|#define SVCD_TRUE 1
  ------------------
  |  Branch (1857:24): [True: 569k, False: 16.6k]
  ------------------
 1858|   569k|                    {
 1859|       |                        /* calculate the avarage */
 1860|   569k|                        i4_mv_x =
 1861|   569k|                            ((ps_temp[0].i2_mv[2 * i4_i]) + (ps_temp[1].i2_mv[2 * i4_i]) +
 1862|   569k|                             (ps_temp[4].i2_mv[2 * i4_i]) + (ps_temp[5].i2_mv[2 * i4_i] + 2)) >>
 1863|   569k|                            2;
 1864|       |
 1865|   569k|                        i4_mv_y = ((ps_temp[0].i2_mv[1 + (2 * i4_i)]) +
 1866|   569k|                                   (ps_temp[1].i2_mv[1 + (2 * i4_i)]) +
 1867|   569k|                                   (ps_temp[4].i2_mv[1 + (2 * i4_i)]) +
 1868|   569k|                                   (ps_temp[5].i2_mv[1 + (2 * i4_i)] + 2)) >>
 1869|   569k|                                  2;
 1870|       |
 1871|       |                        /* store the modified motion vectors */
 1872|   569k|                        ps_temp[0].i2_mv[2 * i4_i] = (WORD16) i4_mv_x;
 1873|   569k|                        ps_temp[1].i2_mv[2 * i4_i] = (WORD16) i4_mv_x;
 1874|   569k|                        ps_temp[4].i2_mv[2 * i4_i] = (WORD16) i4_mv_x;
 1875|   569k|                        ps_temp[5].i2_mv[2 * i4_i] = (WORD16) i4_mv_x;
 1876|       |
 1877|   569k|                        ps_temp[0].i2_mv[1 + (2 * i4_i)] = (WORD16) i4_mv_y;
 1878|   569k|                        ps_temp[1].i2_mv[1 + (2 * i4_i)] = (WORD16) i4_mv_y;
 1879|   569k|                        ps_temp[4].i2_mv[1 + (2 * i4_i)] = (WORD16) i4_mv_y;
 1880|   569k|                        ps_temp[5].i2_mv[1 + (2 * i4_i)] = (WORD16) i4_mv_y;
 1881|       |
 1882|       |                        /* store the sub mb partition size */
 1883|   569k|                        i4_part_size = SUBMB_8x8;
  ------------------
  |  |  501|   569k|#define SUBMB_8x8    0
  ------------------
 1884|   569k|                    }
 1885|       |                    /* 8x4 mode  */
 1886|  16.6k|                    else if(SVCD_TRUE == i4_horz_match)
  ------------------
  |  |   46|  16.6k|#define SVCD_TRUE 1
  ------------------
  |  Branch (1886:29): [True: 7.87k, False: 8.79k]
  ------------------
 1887|  7.87k|                    {
 1888|       |                        /* horizontal directional merging */
 1889|       |                        /* calculate the average of first two and store back*/
 1890|  7.87k|                        i4_mv_x =
 1891|  7.87k|                            ((ps_temp[0].i2_mv[2 * i4_i]) + (ps_temp[1].i2_mv[2 * i4_i] + 1)) >> 1;
 1892|       |
 1893|  7.87k|                        i4_mv_y = ((ps_temp[0].i2_mv[1 + (2 * i4_i)]) +
 1894|  7.87k|                                   (ps_temp[1].i2_mv[1 + (2 * i4_i)] + 1)) >>
 1895|  7.87k|                                  1;
 1896|       |
 1897|  7.87k|                        ps_temp[0].i2_mv[2 * i4_i] = (WORD16) i4_mv_x;
 1898|  7.87k|                        ps_temp[1].i2_mv[2 * i4_i] = (WORD16) i4_mv_x;
 1899|       |
 1900|  7.87k|                        ps_temp[0].i2_mv[1 + (2 * i4_i)] = (WORD16) i4_mv_y;
 1901|  7.87k|                        ps_temp[1].i2_mv[1 + (2 * i4_i)] = (WORD16) i4_mv_y;
 1902|       |
 1903|       |                        /* calculate the average of next two and store back*/
 1904|  7.87k|                        i4_mv_x =
 1905|  7.87k|                            ((ps_temp[4].i2_mv[2 * i4_i]) + (ps_temp[5].i2_mv[2 * i4_i] + 1)) >> 1;
 1906|       |
 1907|  7.87k|                        i4_mv_y = ((ps_temp[4].i2_mv[1 + (2 * i4_i)]) +
 1908|  7.87k|                                   (ps_temp[5].i2_mv[1 + (2 * i4_i)] + 1)) >>
 1909|  7.87k|                                  1;
 1910|       |
 1911|  7.87k|                        ps_temp[4].i2_mv[2 * i4_i] = (WORD16) i4_mv_x;
 1912|  7.87k|                        ps_temp[5].i2_mv[2 * i4_i] = (WORD16) i4_mv_x;
 1913|       |
 1914|  7.87k|                        ps_temp[4].i2_mv[1 + (2 * i4_i)] = (WORD16) i4_mv_y;
 1915|  7.87k|                        ps_temp[5].i2_mv[1 + (2 * i4_i)] = (WORD16) i4_mv_y;
 1916|       |                        /* store the sub mb partition size */
 1917|  7.87k|                        i4_part_size = SUBMB_8x4;
  ------------------
  |  |  502|  7.87k|#define SUBMB_8x4    1
  ------------------
 1918|  7.87k|                    }
 1919|       |                    /* 4x8 mode all the 4 blocks are under threshold */
 1920|  8.79k|                    else if(SVCD_TRUE == i4_vert_match)
  ------------------
  |  |   46|  8.79k|#define SVCD_TRUE 1
  ------------------
  |  Branch (1920:29): [True: 7.83k, False: 960]
  ------------------
 1921|  7.83k|                    {
 1922|       |                        /* vertical directional merging */
 1923|  7.83k|                        i4_mv_x =
 1924|  7.83k|                            ((ps_temp[0].i2_mv[2 * i4_i]) + (ps_temp[4].i2_mv[2 * i4_i] + 1)) >> 1;
 1925|       |
 1926|  7.83k|                        i4_mv_y = ((ps_temp[0].i2_mv[1 + (2 * i4_i)]) +
 1927|  7.83k|                                   (ps_temp[4].i2_mv[1 + (2 * i4_i)] + 1)) >>
 1928|  7.83k|                                  1;
 1929|       |
 1930|  7.83k|                        ps_temp[0].i2_mv[2 * i4_i] = (WORD16) i4_mv_x;
 1931|  7.83k|                        ps_temp[4].i2_mv[2 * i4_i] = (WORD16) i4_mv_x;
 1932|       |
 1933|  7.83k|                        ps_temp[0].i2_mv[1 + (2 * i4_i)] = (WORD16) i4_mv_y;
 1934|  7.83k|                        ps_temp[4].i2_mv[1 + (2 * i4_i)] = (WORD16) i4_mv_y;
 1935|       |
 1936|       |                        /* calculate the average of next two and store back*/
 1937|  7.83k|                        i4_mv_x =
 1938|  7.83k|                            ((ps_temp[1].i2_mv[2 * i4_i]) + (ps_temp[5].i2_mv[2 * i4_i] + 1)) >> 1;
 1939|       |
 1940|  7.83k|                        i4_mv_y = ((ps_temp[1].i2_mv[1 + (2 * i4_i)]) +
 1941|  7.83k|                                   (ps_temp[5].i2_mv[1 + (2 * i4_i)] + 1)) >>
 1942|  7.83k|                                  1;
 1943|       |
 1944|  7.83k|                        ps_temp[1].i2_mv[2 * i4_i] = (WORD16) i4_mv_x;
 1945|  7.83k|                        ps_temp[5].i2_mv[2 * i4_i] = (WORD16) i4_mv_x;
 1946|       |
 1947|  7.83k|                        ps_temp[1].i2_mv[1 + (2 * i4_i)] = (WORD16) i4_mv_y;
 1948|  7.83k|                        ps_temp[5].i2_mv[1 + (2 * i4_i)] = (WORD16) i4_mv_y;
 1949|       |                        /* store the sub mb partition size */
 1950|  7.83k|                        i4_part_size = SUBMB_4x8;
  ------------------
  |  |  503|  7.83k|#define SUBMB_4x8    2
  ------------------
 1951|  7.83k|                    }
 1952|    960|                    else
 1953|    960|                    {
 1954|       |                        /* store the sub mb partition size */
 1955|    960|                        i4_part_size = SUBMB_4x4;
  ------------------
  |  |  504|    960|#define SUBMB_4x4    3
  ------------------
 1956|    960|                    }
 1957|       |
 1958|   586k|                } /* end of loop over lists */
 1959|       |
 1960|       |                /* store the sub MB type B slice */
 1961|   486k|                if(2 == i4_listx)
  ------------------
  |  Branch (1961:20): [True: 100k, False: 385k]
  ------------------
 1962|   100k|                {
 1963|   100k|                    WORD32 i4_part_mode_a;
 1964|   100k|                    WORD32 i4_indx;
 1965|       |
 1966|   100k|                    i4_part_mode_a = 0;
 1967|       |                    /* check the 0th partiton reference indices */
 1968|   100k|                    if(0 <= ps_temp[0].i1_ref_frame[0])
  ------------------
  |  Branch (1968:24): [True: 84.0k, False: 16.1k]
  ------------------
 1969|  84.0k|                    {
 1970|  84.0k|                        i4_part_mode_a += 1;
 1971|  84.0k|                    }
 1972|   100k|                    if(0 <= ps_temp[0].i1_ref_frame[1])
  ------------------
  |  Branch (1972:24): [True: 53.0k, False: 47.1k]
  ------------------
 1973|  53.0k|                    {
 1974|  53.0k|                        i4_part_mode_a += 2;
 1975|  53.0k|                    }
 1976|   100k|                    i4_indx = 3 * i4_part_size + (i4_part_mode_a - 1);
 1977|       |
 1978|   100k|                    pi4_sub_mb_mode[2 * i4_yp + i4_xp] = g_au1_eb_submb_type[i4_indx];
 1979|   100k|                }
 1980|       |                /* P slice */
 1981|   385k|                else
 1982|   385k|                {
 1983|   385k|                    pi4_sub_mb_mode[2 * i4_yp + i4_xp] = g_au1_ep_submb_type[i4_part_size];
 1984|   385k|                }
 1985|   486k|            } /* end of loop over partition xp */
 1986|       |
 1987|   243k|        }     /* end of loop over partition yp */
 1988|   121k|    }
 1989|       |
 1990|   121k|    return;
 1991|   121k|}
isvcd_interlyr_mbmode_pred_bmb:
 2020|  35.5k|{
 2021|  35.5k|    WORD32 i4_part_mode_a, i4_part_mode_b;
 2022|  35.5k|    WORD32 i4_idx;
 2023|  35.5k|    dec_mb_info_t *ps_mb_params = (dec_mb_info_t *) pv_mb_params;
 2024|  35.5k|    parse_part_params_t *ps_part = (parse_part_params_t *) pv_part;
 2025|       |
 2026|  35.5k|    UNUSED(ps_ctxt);
  ------------------
  |  |   45|  35.5k|#define UNUSED(x) ((void)(x))
  ------------------
 2027|       |
 2028|  35.5k|    i4_part_mode_a = 0;
 2029|       |
 2030|       |    /* check the 0th partiton reference indices */
 2031|  35.5k|    if(PRED_8x8 != i4_part_size)
  ------------------
  |  |  453|  35.5k|#define PRED_8x8    3
  ------------------
  |  Branch (2031:8): [True: 29.3k, False: 6.25k]
  ------------------
 2032|  29.3k|    {
 2033|  29.3k|        if(0 <= ps_motion_pred[0].i1_ref_frame[0])
  ------------------
  |  Branch (2033:12): [True: 25.5k, False: 3.77k]
  ------------------
 2034|  25.5k|        {
 2035|  25.5k|            i4_part_mode_a += 1;
 2036|  25.5k|        }
 2037|  29.3k|        if(0 <= ps_motion_pred[0].i1_ref_frame[1])
  ------------------
  |  Branch (2037:12): [True: 13.6k, False: 15.6k]
  ------------------
 2038|  13.6k|        {
 2039|  13.6k|            i4_part_mode_a += 2;
 2040|  13.6k|        }
 2041|  29.3k|    }
 2042|       |
 2043|       |    /* check the 15th partiton reference indices */
 2044|       |    /* this done since all the reference indices will be replicated */
 2045|  35.5k|    i4_part_mode_b = 0;
 2046|       |
 2047|  35.5k|    if((PRED_16x8 == i4_part_size) || (PRED_8x16 == i4_part_size))
  ------------------
  |  |  451|  35.5k|#define PRED_16x8   1
  ------------------
                  if((PRED_16x8 == i4_part_size) || (PRED_8x16 == i4_part_size))
  ------------------
  |  |  452|  31.6k|#define PRED_8x16   2
  ------------------
  |  Branch (2047:8): [True: 3.94k, False: 31.6k]
  |  Branch (2047:39): [True: 2.02k, False: 29.6k]
  ------------------
 2048|  5.96k|    {
 2049|  5.96k|        ps_motion_pred += (3 * i4_cur_mot_stride) + 3;
 2050|       |
 2051|  5.96k|        if(0 <= ps_motion_pred[0].i1_ref_frame[0])
  ------------------
  |  Branch (2051:12): [True: 4.55k, False: 1.41k]
  ------------------
 2052|  4.55k|        {
 2053|  4.55k|            i4_part_mode_b += 1;
 2054|  4.55k|        }
 2055|  5.96k|        if(0 <= ps_motion_pred[0].i1_ref_frame[1])
  ------------------
  |  Branch (2055:12): [True: 2.28k, False: 3.68k]
  ------------------
 2056|  2.28k|        {
 2057|  2.28k|            i4_part_mode_b += 2;
 2058|  2.28k|        }
 2059|  5.96k|    }
 2060|       |    /* update the pred modes for B cases */
 2061|       |    /* If partition size is not equal to 8x8 */
 2062|       |    /* then update the prediciton mode of    */
 2063|       |    /* partitions                            */
 2064|  35.5k|    if(PRED_8x8 != i4_part_size)
  ------------------
  |  |  453|  35.5k|#define PRED_8x8    3
  ------------------
  |  Branch (2064:8): [True: 29.3k, False: 6.25k]
  ------------------
 2065|  29.3k|    {
 2066|  29.3k|        UWORD8 u1_pred_mode_part0;
 2067|  29.3k|        UWORD8 u1_pred_mode_part1;
 2068|       |
 2069|  29.3k|        i4_idx = 3 * i4_part_size;
 2070|  29.3k|        i4_idx += 3 * (i4_part_mode_a - 1);
 2071|  29.3k|        i4_part_mode_b = (i4_part_mode_b > 0) ? i4_part_mode_b : 1;
  ------------------
  |  Branch (2071:26): [True: 5.96k, False: 23.3k]
  ------------------
 2072|  29.3k|        i4_idx += (i4_part_mode_b - 1);
 2073|  29.3k|        i4_idx = (i4_idx < 0) ? 0 : i4_idx;
  ------------------
  |  Branch (2073:18): [True: 0, False: 29.3k]
  ------------------
 2074|       |        /* Get the mb type                     */
 2075|       |        /* From mb type - get prediciton modes */
 2076|       |        /*  of parttions                       */
 2077|       |        /* Update the prediciton mode parma of */
 2078|       |        /* mb param structure                  */
 2079|       |
 2080|  29.3k|        ps_mb_params->u1_mb_type = g_au1_eb_mb_type[i4_idx + (6 * i4_part_size)];
 2081|  29.3k|        u1_pred_mode_part0 = g_au1_mb_pred_mode[0][5 + ps_mb_params->u1_mb_type];
 2082|  29.3k|        u1_pred_mode_part1 = g_au1_mb_pred_mode[1][5 + ps_mb_params->u1_mb_type];
 2083|  29.3k|        ps_part[0].u1_pred_mode = u1_pred_mode_part0;
 2084|  29.3k|        ps_part[1].u1_pred_mode = u1_pred_mode_part1;
 2085|  29.3k|    }
 2086|  6.25k|    else
 2087|  6.25k|    {
 2088|  6.25k|        WORD32 i4_i, i4_ctr, i4_num_submb_part;
 2089|  6.25k|        UWORD8 u1_sub_mb_type, u1_sub_mb_mc_mode;
 2090|  6.25k|        UWORD8 u1_pred_mode;
 2091|       |
 2092|  6.25k|        ps_mb_params->u1_mb_type = B_8x8;
  ------------------
  |  |  480|  6.25k|#define B_8x8    22
  ------------------
 2093|       |
 2094|  31.2k|        for(i4_i = 0; i4_i < NUM_MB_PARTS; i4_i++)
  ------------------
  |  |   59|  31.2k|#define NUM_MB_PARTS 4
  ------------------
  |  Branch (2094:23): [True: 25.0k, False: 6.25k]
  ------------------
 2095|  25.0k|        {
 2096|  25.0k|            u1_sub_mb_type = (UWORD8) pi4_sub_mb_mode[i4_i];
 2097|       |
 2098|  25.0k|            u1_sub_mb_mc_mode = gau1_ih264d_submb_mc_mode[4 + u1_sub_mb_type];
 2099|  25.0k|            i4_num_submb_part = g_au1_num_sub_mb_part[u1_sub_mb_mc_mode];
 2100|  25.0k|            *pu1_col_info |= (u1_sub_mb_mc_mode << 4);
 2101|  25.0k|            pu1_col_info++;
 2102|  25.0k|            u1_pred_mode = g_au1_sub_mb_pred_mode[4 + u1_sub_mb_type];
 2103|  82.1k|            for(i4_ctr = 0; i4_ctr < i4_num_submb_part; i4_ctr++)
  ------------------
  |  Branch (2103:29): [True: 57.0k, False: 25.0k]
  ------------------
 2104|  57.0k|            {
 2105|  57.0k|                ps_part->u1_pred_mode = u1_pred_mode;
 2106|  57.0k|                ps_part++;
 2107|  57.0k|            }
 2108|  25.0k|        }
 2109|  6.25k|    }
 2110|       |
 2111|  35.5k|    return;
 2112|  35.5k|}
isvcd_populate_ref_idx:
 2139|  26.9k|{
 2140|  26.9k|    UWORD8 u1_mot_pred_flag;
 2141|  26.9k|    WORD32 i4_lx;
 2142|       |
 2143|  67.8k|    for(i4_lx = 0; i4_lx < i4_listx; i4_lx++)
  ------------------
  |  Branch (2143:20): [True: 40.8k, False: 26.9k]
  ------------------
 2144|  40.8k|    {
 2145|  40.8k|        u1_mot_pred_flag = ps_svc_mb_params->au1_motion_pred_flag[i4_lx];
 2146|       |
 2147|  40.8k|        if((PRED_16x16 == ps_mb_params->u1_mb_mc_mode) && (u1_mot_pred_flag & 0x1))
  ------------------
  |  |  450|  40.8k|#define PRED_16x16  0
  ------------------
  |  Branch (2147:12): [True: 14.9k, False: 25.9k]
  |  Branch (2147:59): [True: 10.3k, False: 4.59k]
  ------------------
 2148|  10.3k|        {
 2149|  10.3k|            ps_mb_part_info->i1_ref_idx[i4_lx][0] = ps_motion_pred[0].i1_ref_frame[i4_lx];
 2150|  10.3k|        }
 2151|  30.4k|        else if((PRED_8x16 == ps_mb_params->u1_mb_mc_mode))
  ------------------
  |  |  452|  30.4k|#define PRED_8x16   2
  ------------------
  |  Branch (2151:17): [True: 10.8k, False: 19.6k]
  ------------------
 2152|  10.8k|        {
 2153|  10.8k|            if(u1_mot_pred_flag & 0x01)
  ------------------
  |  Branch (2153:16): [True: 3.05k, False: 7.78k]
  ------------------
 2154|  3.05k|            {
 2155|  3.05k|                ps_mb_part_info->i1_ref_idx[i4_lx][0] = ps_motion_pred[0].i1_ref_frame[i4_lx];
 2156|  3.05k|            }
 2157|  10.8k|            if(u1_mot_pred_flag & 0x02)
  ------------------
  |  Branch (2157:16): [True: 5.92k, False: 4.91k]
  ------------------
 2158|  5.92k|            {
 2159|  5.92k|                ps_mb_part_info->i1_ref_idx[i4_lx][1] = ps_motion_pred[2].i1_ref_frame[i4_lx];
 2160|  5.92k|            }
 2161|  10.8k|        }
 2162|  19.6k|        else if((PRED_16x8 == ps_mb_params->u1_mb_mc_mode))
  ------------------
  |  |  451|  19.6k|#define PRED_16x8   1
  ------------------
  |  Branch (2162:17): [True: 6.20k, False: 13.4k]
  ------------------
 2163|  6.20k|        {
 2164|  6.20k|            if(u1_mot_pred_flag & 0x01)
  ------------------
  |  Branch (2164:16): [True: 3.95k, False: 2.24k]
  ------------------
 2165|  3.95k|            {
 2166|  3.95k|                ps_mb_part_info->i1_ref_idx[i4_lx][0] = ps_motion_pred[0].i1_ref_frame[i4_lx];
 2167|  3.95k|            }
 2168|  6.20k|            if(u1_mot_pred_flag & 0x02)
  ------------------
  |  Branch (2168:16): [True: 4.49k, False: 1.71k]
  ------------------
 2169|  4.49k|            {
 2170|  4.49k|                ps_mb_part_info->i1_ref_idx[i4_lx][1] = ps_motion_pred[8].i1_ref_frame[i4_lx];
 2171|  4.49k|            }
 2172|  6.20k|        }
 2173|  13.4k|        else if((PRED_8x8 == ps_mb_params->u1_mb_mc_mode))
  ------------------
  |  |  453|  13.4k|#define PRED_8x8    3
  ------------------
  |  Branch (2173:17): [True: 8.81k, False: 4.64k]
  ------------------
 2174|  8.81k|        {
 2175|  8.81k|            if(u1_mot_pred_flag & 0x01)
  ------------------
  |  Branch (2175:16): [True: 3.66k, False: 5.14k]
  ------------------
 2176|  3.66k|            {
 2177|  3.66k|                ps_mb_part_info->i1_ref_idx[i4_lx][0] = ps_motion_pred[0].i1_ref_frame[i4_lx];
 2178|  3.66k|            }
 2179|  8.81k|            if(u1_mot_pred_flag & 0x02)
  ------------------
  |  Branch (2179:16): [True: 3.47k, False: 5.34k]
  ------------------
 2180|  3.47k|            {
 2181|  3.47k|                ps_mb_part_info->i1_ref_idx[i4_lx][1] = ps_motion_pred[2].i1_ref_frame[i4_lx];
 2182|  3.47k|            }
 2183|  8.81k|            if(u1_mot_pred_flag & 0x04)
  ------------------
  |  Branch (2183:16): [True: 4.44k, False: 4.37k]
  ------------------
 2184|  4.44k|            {
 2185|  4.44k|                ps_mb_part_info->i1_ref_idx[i4_lx][2] = ps_motion_pred[8].i1_ref_frame[i4_lx];
 2186|  4.44k|            }
 2187|  8.81k|            if(u1_mot_pred_flag & 0x08)
  ------------------
  |  Branch (2187:16): [True: 3.68k, False: 5.13k]
  ------------------
 2188|  3.68k|            {
 2189|  3.68k|                ps_mb_part_info->i1_ref_idx[i4_lx][3] = ps_motion_pred[10].i1_ref_frame[i4_lx];
 2190|  3.68k|            }
 2191|  8.81k|        }
 2192|  40.8k|    }
 2193|  26.9k|}
isvcd_interlyr_mbmode_pred:
 2223|   111k|{
 2224|       |    /*! Flow of the module is as follows                                   */
 2225|       |    /*! 1. it checks if all the sub mb modes are 8x8 modes                 */
 2226|       |    /*! 2. it matches the motion vectors at 8x8 level and computes the
 2227|       |           partiton size. store the same in the part type of mb params     */
 2228|       |    /*! 3. stores the pred modes based on slcie type and reference indices */
 2229|       |    /*! 4. stores the sub mb type in the mb params if teh part size is 8x8 */
 2230|   111k|    mode_motion_ctxt_t *ps_ctxt;
 2231|   111k|    mv_pred_t *ps_motion_pred;
 2232|   111k|    dec_mb_info_t *ps_mb_params;
 2233|   111k|    WORD32 i4_listx;
 2234|   111k|    WORD32 i4_part_size;
 2235|   111k|    WORD32 i4_mb_mode_flag;
 2236|   111k|    WORD32 i4_i;
 2237|   111k|    WORD32 i4_blk_mode;
 2238|   111k|    parse_part_params_t *ps_part = (parse_part_params_t *) pv_part;
 2239|   111k|    parse_pmbarams_t *ps_mb_part_info = (parse_pmbarams_t *) pv_mb_part_info;
 2240|   111k|    UWORD8 *pu1_col_info = ps_mb_part_info->u1_col_info;
 2241|   111k|    UNUSED(pv_dec);
  ------------------
  |  |   45|   111k|#define UNUSED(x) ((void)(x))
  ------------------
 2242|       |
 2243|   111k|    ps_ctxt = (mode_motion_ctxt_t *) pv_comp_mode_mv_ctxt;
 2244|   111k|    ps_motion_pred = ps_ctxt->ps_motion_pred_struct;
 2245|   111k|    ps_mb_params = (dec_mb_info_t *) pv_mb_params;
 2246|       |
 2247|       |    /*********** store the MB mode as inter *************************/
 2248|   111k|    *pi4_mb_mode = SVC_INTER_MB;
  ------------------
  |  |  114|   111k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
 2249|       |
 2250|       |    /***********************************************************************/
 2251|       |    /* derivation of part type                                             */
 2252|       |    /***********************************************************************/
 2253|   111k|    i4_listx = ps_ctxt->i4_listx;
 2254|       |
 2255|       |    /* set the mb mode derivation flag to false */
 2256|   111k|    i4_mb_mode_flag = SVCD_FALSE;
  ------------------
  |  |   45|   111k|#define SVCD_FALSE 0
  ------------------
 2257|       |
 2258|       |    /* for B and P slice different blk mod treshold */
 2259|   111k|    if(2 == i4_listx)
  ------------------
  |  Branch (2259:8): [True: 20.7k, False: 90.9k]
  ------------------
 2260|  20.7k|    {
 2261|  20.7k|        i4_blk_mode = B_BI_8x8;
  ------------------
  |  |  468|  20.7k|#define B_BI_8x8        3
  ------------------
 2262|  20.7k|    }
 2263|  90.9k|    else
 2264|  90.9k|    {
 2265|  90.9k|        i4_blk_mode = P_L0_8x8;
  ------------------
  |  |  459|  90.9k|#define P_L0_8x8    0
  ------------------
 2266|  90.9k|    }
 2267|       |
 2268|       |    /* set the mode derivation flag to true base on conditions */
 2269|   111k|    if((i4_blk_mode >= pi4_sub_mb_mode[0]) && (i4_blk_mode >= pi4_sub_mb_mode[1]) &&
  ------------------
  |  Branch (2269:8): [True: 109k, False: 2.24k]
  |  Branch (2269:47): [True: 105k, False: 3.70k]
  ------------------
 2270|   105k|       (i4_blk_mode >= pi4_sub_mb_mode[2]) && (i4_blk_mode >= pi4_sub_mb_mode[3]))
  ------------------
  |  Branch (2270:8): [True: 103k, False: 2.07k]
  |  Branch (2270:47): [True: 102k, False: 734]
  ------------------
 2271|   102k|    {
 2272|   102k|        i4_mb_mode_flag = SVCD_TRUE;
  ------------------
  |  |   46|   102k|#define SVCD_TRUE 1
  ------------------
 2273|   102k|    }
 2274|       |
 2275|       |    /* store the default 8x8 mode */
 2276|   111k|    ps_mb_part_info->u1_num_part = 4;
 2277|   111k|    i4_part_size = PRED_8x8;
  ------------------
  |  |  453|   111k|#define PRED_8x8    3
  ------------------
 2278|       |
 2279|       |    /* further check is present if all are 8x8 mode */
 2280|   111k|    if(SVCD_TRUE == i4_mb_mode_flag)
  ------------------
  |  |   46|   111k|#define SVCD_TRUE 1
  ------------------
  |  Branch (2280:8): [True: 102k, False: 8.75k]
  ------------------
 2281|   102k|    {
 2282|   102k|        WORD32 i4_horz_match, i4_vert_match;
 2283|       |
 2284|       |        /* check if the motion in horz direction are same*/
 2285|   102k|        i4_horz_match = isvcd_check_motion(ps_motion_pred, (ps_motion_pred + 2), i4_listx);
 2286|   102k|        i4_horz_match += isvcd_check_motion((ps_motion_pred + 8), (ps_motion_pred + 10), i4_listx);
 2287|       |
 2288|       |        /* check if the motion in vertical direction is same */
 2289|   102k|        i4_vert_match = isvcd_check_motion(ps_motion_pred, (ps_motion_pred + 8), i4_listx);
 2290|   102k|        i4_vert_match += isvcd_check_motion((ps_motion_pred + 2), (ps_motion_pred + 10), i4_listx);
 2291|       |
 2292|       |        /* decide the partition size based on the results of matching */
 2293|   102k|        if((2 == i4_horz_match) && (2 == i4_vert_match))
  ------------------
  |  Branch (2293:12): [True: 92.4k, False: 10.4k]
  |  Branch (2293:36): [True: 85.7k, False: 6.71k]
  ------------------
 2294|  85.7k|        {
 2295|  85.7k|            ps_mb_params->u1_mb_type = P_L0_16x16;
 2296|  85.7k|            i4_part_size = PRED_16x16;
  ------------------
  |  |  450|  85.7k|#define PRED_16x16  0
  ------------------
 2297|  85.7k|            ps_mb_part_info->u1_num_part = 1;
 2298|  85.7k|            *pu1_col_info++ = (PRED_16x16 << 6);
  ------------------
  |  |  450|  85.7k|#define PRED_16x16  0
  ------------------
 2299|  85.7k|            ps_mb_part_info->i1_ref_idx[0][0] = ps_motion_pred->i1_ref_frame[0];
 2300|  85.7k|            if(2 == i4_listx) ps_mb_part_info->i1_ref_idx[1][0] = ps_motion_pred->i1_ref_frame[1];
  ------------------
  |  Branch (2300:16): [True: 11.7k, False: 74.0k]
  ------------------
 2301|       |
 2302|  85.7k|            ps_part->u1_partwidth = 4;  // interms of 4x4
 2303|  85.7k|            ps_part->u1_partheight = 4;
 2304|  85.7k|            ps_part->u1_pred_mode = PRED_L0;
  ------------------
  |  |  483|  85.7k|#define PRED_L0   1
  ------------------
 2305|  85.7k|            ps_part->u1_is_direct = 0;
 2306|  85.7k|            ps_part->u1_sub_mb_num = 0;
 2307|  85.7k|        }
 2308|  17.1k|        else if(2 == i4_horz_match)
  ------------------
  |  Branch (2308:17): [True: 6.71k, False: 10.4k]
  ------------------
 2309|  6.71k|        {
 2310|  6.71k|            i4_part_size = PRED_16x8;
  ------------------
  |  |  451|  6.71k|#define PRED_16x8   1
  ------------------
 2311|  6.71k|            ps_mb_params->u1_mb_type = P_L0_L0_16x8;
 2312|  6.71k|            ps_mb_part_info->u1_num_part = 2;
 2313|  6.71k|            *pu1_col_info++ = (PRED_16x8 << 6);
  ------------------
  |  |  451|  6.71k|#define PRED_16x8   1
  ------------------
 2314|  6.71k|            *pu1_col_info++ = (PRED_16x8 << 6);
  ------------------
  |  |  451|  6.71k|#define PRED_16x8   1
  ------------------
 2315|       |
 2316|  6.71k|            ps_mb_part_info->i1_ref_idx[0][0] = ps_motion_pred->i1_ref_frame[0];
 2317|  6.71k|            ps_mb_part_info->i1_ref_idx[0][1] = ps_motion_pred[8].i1_ref_frame[0];
 2318|  6.71k|            if(2 == i4_listx)
  ------------------
  |  Branch (2318:16): [True: 3.79k, False: 2.91k]
  ------------------
 2319|  3.79k|            {
 2320|  3.79k|                ps_mb_part_info->i1_ref_idx[1][0] = ps_motion_pred->i1_ref_frame[1];
 2321|  3.79k|                ps_mb_part_info->i1_ref_idx[1][1] = ps_motion_pred[8].i1_ref_frame[1];
 2322|  3.79k|            }
 2323|  6.71k|            ps_part->u1_partwidth = 4;  // interms of 4x4
 2324|  6.71k|            ps_part->u1_partheight = 2;
 2325|  6.71k|            ps_part->u1_pred_mode = PRED_L0;
  ------------------
  |  |  483|  6.71k|#define PRED_L0   1
  ------------------
 2326|  6.71k|            ps_part->u1_is_direct = 0;
 2327|  6.71k|            ps_part->u1_sub_mb_num = 0;
 2328|       |
 2329|  6.71k|            ps_part++;
 2330|  6.71k|            ps_part->u1_partwidth = 4;
 2331|  6.71k|            ps_part->u1_partheight = 2;
 2332|  6.71k|            ps_part->u1_pred_mode = PRED_L0;
  ------------------
  |  |  483|  6.71k|#define PRED_L0   1
  ------------------
 2333|  6.71k|            ps_part->u1_is_direct = 0;
 2334|  6.71k|            ps_part->u1_sub_mb_num = 8;
 2335|  6.71k|        }
 2336|  10.4k|        else if(2 == i4_vert_match)
  ------------------
  |  Branch (2336:17): [True: 4.09k, False: 6.38k]
  ------------------
 2337|  4.09k|        {
 2338|  4.09k|            ps_mb_params->u1_mb_type = P_L0_L0_8x16;
 2339|  4.09k|            i4_part_size = PRED_8x16;
  ------------------
  |  |  452|  4.09k|#define PRED_8x16   2
  ------------------
 2340|  4.09k|            ps_mb_part_info->u1_num_part = 2;
 2341|  4.09k|            *pu1_col_info++ = (PRED_8x16 << 6);
  ------------------
  |  |  452|  4.09k|#define PRED_8x16   2
  ------------------
 2342|  4.09k|            *pu1_col_info++ = (PRED_8x16 << 6);
  ------------------
  |  |  452|  4.09k|#define PRED_8x16   2
  ------------------
 2343|       |
 2344|  4.09k|            ps_mb_part_info->i1_ref_idx[0][0] = ps_motion_pred->i1_ref_frame[0];
 2345|  4.09k|            ps_mb_part_info->i1_ref_idx[0][1] = ps_motion_pred[2].i1_ref_frame[0];
 2346|  4.09k|            if(2 == i4_listx)
  ------------------
  |  Branch (2346:16): [True: 1.52k, False: 2.56k]
  ------------------
 2347|  1.52k|            {
 2348|  1.52k|                ps_mb_part_info->i1_ref_idx[1][0] = ps_motion_pred->i1_ref_frame[1];
 2349|  1.52k|                ps_mb_part_info->i1_ref_idx[1][1] = ps_motion_pred[2].i1_ref_frame[1];
 2350|  1.52k|            }
 2351|  4.09k|            ps_part->u1_partwidth = 2;  // interms of 4x4
 2352|  4.09k|            ps_part->u1_partheight = 4;
 2353|  4.09k|            ps_part->u1_pred_mode = PRED_L0;
  ------------------
  |  |  483|  4.09k|#define PRED_L0   1
  ------------------
 2354|  4.09k|            ps_part->u1_is_direct = 0;
 2355|  4.09k|            ps_part->u1_sub_mb_num = 0;
 2356|       |
 2357|  4.09k|            ps_part++;
 2358|  4.09k|            ps_part->u1_partwidth = 2;
 2359|  4.09k|            ps_part->u1_partheight = 4;
 2360|  4.09k|            ps_part->u1_pred_mode = PRED_L0;
  ------------------
  |  |  483|  4.09k|#define PRED_L0   1
  ------------------
 2361|  4.09k|            ps_part->u1_is_direct = 0;
 2362|  4.09k|            ps_part->u1_sub_mb_num = 2;
 2363|  4.09k|        }
 2364|   102k|    }
 2365|       |
 2366|       |    /* store the part size to the mb params */
 2367|   111k|    ps_mb_params->u1_mb_mc_mode = i4_part_size;
 2368|       |
 2369|       |    /* in case of slice derive the partition modes */
 2370|       |
 2371|   111k|    {
 2372|       |        /* store the sub MB modes if 8x8 mode is choosen */
 2373|   111k|        if(PRED_8x8 == i4_part_size)
  ------------------
  |  |  453|   111k|#define PRED_8x8    3
  ------------------
  |  Branch (2373:12): [True: 15.1k, False: 96.5k]
  ------------------
 2374|  15.1k|        {
 2375|  15.1k|            UWORD8 u1_sub_mb_type, u1_sub_mb_mc_mode = 0;
 2376|       |
 2377|       |            /* for P_MB sub part type is same as sub mb type */
 2378|  15.1k|            ps_mb_params->u1_mb_type = P_8x8;
 2379|  15.1k|            ps_mb_part_info->i1_ref_idx[0][0] = ps_motion_pred[0].i1_ref_frame[0];
 2380|  15.1k|            ps_mb_part_info->i1_ref_idx[0][1] = ps_motion_pred[2].i1_ref_frame[0];
 2381|  15.1k|            ps_mb_part_info->i1_ref_idx[0][2] = ps_motion_pred[8].i1_ref_frame[0];
 2382|  15.1k|            ps_mb_part_info->i1_ref_idx[0][3] = ps_motion_pred[10].i1_ref_frame[0];
 2383|  15.1k|            if(2 == i4_listx)
  ------------------
  |  Branch (2383:16): [True: 3.69k, False: 11.4k]
  ------------------
 2384|  3.69k|            {
 2385|  3.69k|                ps_mb_part_info->i1_ref_idx[1][0] = ps_motion_pred[0].i1_ref_frame[1];
 2386|  3.69k|                ps_mb_part_info->i1_ref_idx[1][1] = ps_motion_pred[2].i1_ref_frame[1];
 2387|  3.69k|                ps_mb_part_info->i1_ref_idx[1][2] = ps_motion_pred[8].i1_ref_frame[1];
 2388|  3.69k|                ps_mb_part_info->i1_ref_idx[1][3] = ps_motion_pred[10].i1_ref_frame[1];
 2389|  3.69k|            }
 2390|       |
 2391|  15.1k|            ps_mb_part_info->u1_num_part = 0;
 2392|  75.6k|            for(i4_i = 0; i4_i < NUM_MB_PARTS; i4_i++)
  ------------------
  |  |   59|  75.6k|#define NUM_MB_PARTS 4
  ------------------
  |  Branch (2392:27): [True: 60.5k, False: 15.1k]
  ------------------
 2393|  60.5k|            {
 2394|  60.5k|                WORD32 i4_num_submb_part, i4_part_width, i4_part_height, i4_ctr;
 2395|  60.5k|                u1_sub_mb_type = (UWORD8) pi4_sub_mb_mode[i4_i];
 2396|       |
 2397|  60.5k|                if(1 == i4_listx)
  ------------------
  |  Branch (2397:20): [True: 45.7k, False: 14.7k]
  ------------------
 2398|  45.7k|                {
 2399|  45.7k|                    u1_sub_mb_mc_mode = gau1_ih264d_submb_mc_mode[u1_sub_mb_type];
 2400|  45.7k|                }
 2401|  14.7k|                else if(2 == i4_listx)
  ------------------
  |  Branch (2401:25): [True: 14.7k, False: 0]
  ------------------
 2402|  14.7k|                {
 2403|  14.7k|                    u1_sub_mb_mc_mode = gau1_ih264d_submb_mc_mode[4 + u1_sub_mb_type];
 2404|  14.7k|                }
 2405|  60.5k|                i4_num_submb_part = g_au1_num_sub_mb_part[u1_sub_mb_mc_mode];
 2406|       |
 2407|  60.5k|                ps_mb_part_info->u1_num_part += i4_num_submb_part;
 2408|       |
 2409|  60.5k|                i4_part_width = g_au1_sub_mb_part_wd[u1_sub_mb_mc_mode];
 2410|  60.5k|                i4_part_height = g_au1_sub_mb_part_ht[u1_sub_mb_mc_mode];
 2411|  60.5k|                *pu1_col_info++ = (PRED_8x8 << 6) | (u1_sub_mb_mc_mode << 4);
  ------------------
  |  |  453|  60.5k|#define PRED_8x8    3
  ------------------
 2412|   137k|                for(i4_ctr = 0; i4_ctr < i4_num_submb_part; i4_ctr++)
  ------------------
  |  Branch (2412:33): [True: 77.0k, False: 60.5k]
  ------------------
 2413|  77.0k|                {
 2414|  77.0k|                    ps_part->u1_partwidth = i4_part_width;  // interms of 4x4
 2415|  77.0k|                    ps_part->u1_partheight = i4_part_height;
 2416|  77.0k|                    ps_part->u1_pred_mode = PRED_L0;
  ------------------
  |  |  483|  77.0k|#define PRED_L0   1
  ------------------
 2417|  77.0k|                    ps_part->u1_is_direct = 0;
 2418|  77.0k|                    ps_part->u1_sub_mb_num = (i4_i & 0x01) * 2 + (i4_i >> 1) * 8;
 2419|  77.0k|                    if(i4_num_submb_part == 2)
  ------------------
  |  Branch (2419:24): [True: 27.5k, False: 49.4k]
  ------------------
 2420|  27.5k|                    {
 2421|  27.5k|                        ps_part->u1_sub_mb_num +=
 2422|  27.5k|                            i4_ctr ? (((i4_part_width - 1) << 2) + (i4_part_height - 1)) : 0;
  ------------------
  |  Branch (2422:29): [True: 13.7k, False: 13.7k]
  ------------------
 2423|  27.5k|                    }
 2424|  49.4k|                    else if(i4_num_submb_part == 4)
  ------------------
  |  Branch (2424:29): [True: 3.61k, False: 45.8k]
  ------------------
 2425|  3.61k|                    {
 2426|  3.61k|                        ps_part->u1_sub_mb_num += ((i4_ctr >> 1) << 2) + (i4_ctr & 0x01);
 2427|  3.61k|                    }
 2428|       |
 2429|  77.0k|                    ps_part++;
 2430|  77.0k|                }
 2431|  60.5k|            }
 2432|  15.1k|        }
 2433|   111k|        if(2 == i4_listx)
  ------------------
  |  Branch (2433:12): [True: 20.7k, False: 90.9k]
  ------------------
 2434|  20.7k|        {
 2435|  20.7k|            ps_part = (parse_part_params_t *) pv_part;
 2436|  20.7k|            pu1_col_info = ps_mb_part_info->u1_col_info;
 2437|       |            /* B_MBs */
 2438|  20.7k|            isvcd_interlyr_mbmode_pred_bmb(ps_ctxt, ps_motion_pred, 4, i4_part_size,
 2439|  20.7k|                                           pi4_sub_mb_mode, ps_mb_params, ps_part, pu1_col_info);
 2440|  20.7k|        }
 2441|   111k|    }
 2442|       |
 2443|   111k|    return;
 2444|   111k|}
isvcd_compute_interlyr_motion_mode:
 2474|   130k|{
 2475|       |    /*! Flow of the module is as follows                                   */
 2476|       |    /*! 1. if dyaydic case then it sets the sub mb mode to 8x8             */
 2477|       |    /*! 2. else it call the ref part idc comute fucntion                   */
 2478|       |    /*! 3. it calls the motion vectors and submb mode derive function.
 2479|       |           if the current mb is not inffered as INTRA                      */
 2480|       |    /*! 4. it calls the mode predcition module if base mode flag is 1      */
 2481|       |
 2482|   130k|    mode_motion_ctxt_t *ps_ctxt;
 2483|   130k|    WORD32 i4_intra_flag;
 2484|   130k|    WORD32 ai4_sub_mb_mode[NUM_MB_PARTS] = {0};
 2485|   130k|    dec_mb_info_t *ps_mb_params;
 2486|   130k|    dec_svc_mb_info_t *ps_svc_mb_params;
 2487|   130k|    dec_struct_t *ps_dec = (dec_struct_t *) pv_dec;
 2488|   130k|    WORD32 i4_mb_mode = -1;
 2489|   130k|    parse_pmbarams_t *ps_mb_part_info = (parse_pmbarams_t *) pv_mb_part_info;
 2490|   130k|    parse_part_params_t *ps_part = (parse_part_params_t *) pv_part;
 2491|       |
 2492|   130k|    ps_ctxt = (mode_motion_ctxt_t *) pv_comp_mode_mv_ctxt;
 2493|   130k|    ps_mb_params = (dec_mb_info_t *) pv_mb_params;
 2494|   130k|    ps_svc_mb_params = (dec_svc_mb_info_t *) pv_svc_mb_params;
 2495|       |
 2496|   130k|    i4_intra_flag = SVCD_FALSE;
  ------------------
  |  |   45|   130k|#define SVCD_FALSE 0
  ------------------
 2497|       |
 2498|   130k|    isvcd_ref_lyr_part_idc(pv_comp_mode_mv_ctxt, ps_ctxt->ai4_ref_part_idc, &i4_intra_flag,
 2499|   130k|                           pv_mb_params);
 2500|       |
 2501|       |    /* If base is Intra */
 2502|   130k|    if(SVCD_TRUE == i4_intra_flag)
  ------------------
  |  |   46|   130k|#define SVCD_TRUE 1
  ------------------
  |  Branch (2502:8): [True: 9.11k, False: 121k]
  ------------------
 2503|  9.11k|    {
 2504|  9.11k|        if(1 == ps_svc_mb_params->u1_base_mode_flag)
  ------------------
  |  Branch (2504:12): [True: 8.88k, False: 232]
  ------------------
 2505|  8.88k|        {
 2506|  8.88k|            i4_mb_mode = SVC_IBL_MB;
  ------------------
  |  |  117|  8.88k|#define SVC_IBL_MB (1 << 3)         /*!< I_BL MB always inferred */
  ------------------
 2507|  8.88k|            ps_svc_mb_params->u1_residual_prediction_flag = 0;
 2508|  8.88k|        }
 2509|  9.11k|    }
 2510|   121k|    else
 2511|   121k|    {
 2512|       |        /* derive the motion and reference index by inter layer predcition */
 2513|   121k|        isvcd_interlyr_motion_submbmode_pred(pv_comp_mode_mv_ctxt, ps_mb_params, ps_svc_mb_params,
 2514|   121k|                                             ps_ctxt->ai4_ref_part_idc, ai4_sub_mb_mode, pv_dec);
 2515|       |
 2516|       |        /* derive the MB mode */
 2517|   121k|        if(1 == ps_svc_mb_params->u1_base_mode_flag)
  ------------------
  |  Branch (2517:12): [True: 111k, False: 9.82k]
  ------------------
 2518|   111k|        {
 2519|   111k|            isvcd_interlyr_mbmode_pred(pv_comp_mode_mv_ctxt, pv_mb_params, ai4_sub_mb_mode,
 2520|   111k|                                       &i4_mb_mode, ps_dec, ps_mb_part_info, ps_part);
 2521|   111k|        }
 2522|  9.82k|        else
 2523|  9.82k|        {
 2524|  9.82k|            isvcd_populate_ref_idx(ps_mb_params, ps_svc_mb_params, ps_ctxt->ps_motion_pred_struct,
 2525|  9.82k|                                   ps_mb_part_info, ps_ctxt->i4_listx);
 2526|  9.82k|        }
 2527|   121k|    }
 2528|       |
 2529|   130k|    return i4_mb_mode;
 2530|   130k|}
isvcd_interlyr_motion_mode_pred_dyadic:
 2564|  68.3k|{
 2565|  68.3k|    mode_motion_ctxt_t *ps_ctxt;
 2566|  68.3k|    mode_motion_lyr_ctxt *ps_lyr_mem;
 2567|  68.3k|    dec_mb_info_t *ps_mb_params;
 2568|  68.3k|    dec_svc_mb_info_t *ps_svc_mb_params;
 2569|  68.3k|    WORD32 i4_listx;
 2570|  68.3k|    WORD32 i4_mb_pic_x, i4_mb_pic_y;
 2571|  68.3k|    WORD32 i4_ref_x, i4_ref_y;
 2572|  68.3k|    UWORD8 u1_base_mode_flag;
 2573|  68.3k|    dec_struct_t *ps_dec = (dec_struct_t *) pv_dec;
 2574|  68.3k|    WORD32 i4_mb_mode = -1;
 2575|  68.3k|    parse_pmbarams_t *ps_mb_part_info = (parse_pmbarams_t *) pv_mb_part_info;
 2576|  68.3k|    UWORD8 *pu1_col_info = ps_mb_part_info->u1_col_info;
 2577|  68.3k|    parse_part_params_t *ps_part = (parse_part_params_t *) pv_part;
 2578|       |
 2579|  68.3k|    ps_ctxt = (mode_motion_ctxt_t *) pv_comp_mode_mv_ctxt;
 2580|       |
 2581|       |    /* get the current layer ctxt */
 2582|  68.3k|    ps_lyr_mem = &ps_ctxt->as_res_lyr_mem[ps_ctxt->i4_res_id];
 2583|       |
 2584|  68.3k|    ps_mb_params = (dec_mb_info_t *) pv_mb_params;
 2585|  68.3k|    ps_svc_mb_params = (dec_svc_mb_info_t *) pv_svc_mb_params;
 2586|  68.3k|    i4_listx = ps_ctxt->i4_listx;
 2587|       |
 2588|  68.3k|    {
 2589|  68.3k|        WORD32 i4_mb_x, i4_mb_y;
 2590|       |        /* derive the MB_X and MB_Y for the current MB */
 2591|  68.3k|        i4_mb_x = ps_mb_params->u2_mbx;
 2592|  68.3k|        i4_mb_y = ps_mb_params->u2_mby;
 2593|       |
 2594|       |        /* get the colocated position in the refernce layer */
 2595|  68.3k|        i4_ref_x = ps_lyr_mem->pi2_ref_loc_x[i4_mb_x << 4];
 2596|  68.3k|        i4_ref_y = ps_lyr_mem->pi2_ref_loc_y[i4_mb_y << 4];
 2597|  68.3k|        i4_ref_x = CLIP3(0, ((ps_lyr_mem->i4_ref_width) - 1), i4_ref_x);
  ------------------
  |  |   77|  68.3k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 0, False: 68.3k]
  |  |  |  Branch (77:54): [True: 0, False: 68.3k]
  |  |  ------------------
  ------------------
 2598|  68.3k|        i4_ref_y = CLIP3(0, ((ps_lyr_mem->i4_ref_height) - 1), i4_ref_y);
  ------------------
  |  |   77|  68.3k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 0, False: 68.3k]
  |  |  |  Branch (77:54): [True: 391, False: 67.9k]
  |  |  ------------------
  ------------------
 2599|       |
 2600|       |        /* convert into picture units */
 2601|  68.3k|        i4_mb_pic_x = i4_mb_x << 4;
 2602|  68.3k|        i4_mb_pic_y = i4_mb_y << 4;
 2603|  68.3k|    }
 2604|       |
 2605|       |    /* ref layer mb mode */
 2606|  68.3k|    {
 2607|  68.3k|        inter_lyr_mb_prms_t *ps_inter_lyr_mb_prms;
 2608|  68.3k|        WORD32 i4_inter_lyr_mb_prms_stride;
 2609|  68.3k|        WORD32 i4_ref_mb_x, i4_ref_mb_y;
 2610|  68.3k|        WORD8 i1_ref_mb_mode;
 2611|       |
 2612|  68.3k|        ps_inter_lyr_mb_prms = (inter_lyr_mb_prms_t *) ps_lyr_mem->s_ref_mb_mode.pv_buffer;
 2613|  68.3k|        i4_inter_lyr_mb_prms_stride = ps_lyr_mem->s_ref_mb_mode.i4_num_element_stride;
 2614|       |
 2615|       |        /* get the reference mb x and y */
 2616|  68.3k|        i4_ref_mb_x = (i4_ref_x >> 4);
 2617|  68.3k|        i4_ref_mb_y = (i4_ref_y >> 4);
 2618|       |
 2619|       |        /* get the appropriate mb params in reference layer */
 2620|  68.3k|        ps_inter_lyr_mb_prms += i4_ref_mb_x;
 2621|  68.3k|        ps_inter_lyr_mb_prms += i4_ref_mb_y * i4_inter_lyr_mb_prms_stride;
 2622|  68.3k|        i1_ref_mb_mode = ps_inter_lyr_mb_prms->i1_mb_mode;
 2623|  68.3k|        u1_base_mode_flag = ps_svc_mb_params->u1_base_mode_flag;
 2624|       |
 2625|       |        /* check if the MB mode of the refernce MB is Intra*/
 2626|  68.3k|        if(i1_ref_mb_mode > SVC_INTER_MB)
  ------------------
  |  |  114|  68.3k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
  |  Branch (2626:12): [True: 14.7k, False: 53.6k]
  ------------------
 2627|  14.7k|        {
 2628|  14.7k|            if(1 == u1_base_mode_flag)
  ------------------
  |  Branch (2628:16): [True: 14.4k, False: 268]
  ------------------
 2629|  14.4k|            {
 2630|  14.4k|                i4_mb_mode = SVC_IBL_MB;
  ------------------
  |  |  117|  14.4k|#define SVC_IBL_MB (1 << 3)         /*!< I_BL MB always inferred */
  ------------------
 2631|  14.4k|                ps_svc_mb_params->u1_residual_prediction_flag = 0;
 2632|  14.4k|            }
 2633|  14.7k|            return i4_mb_mode;
 2634|  14.7k|        }
 2635|  68.3k|    }
 2636|       |
 2637|       |    /*-----------------------------------------------------------------------*/
 2638|       |    /* Inter MB upsampling process                                           */
 2639|       |    /*-----------------------------------------------------------------------*/
 2640|  53.6k|    {
 2641|  53.6k|        mv_pred_t *ps_motion_pred;
 2642|  53.6k|        WORD32 i4_16x16_flag;
 2643|  53.6k|        WORD32 i4_part_idc;
 2644|  53.6k|        WORD32 i4_blk_idx;
 2645|  53.6k|        WORD32 i4_curr_mot_stride;
 2646|       |
 2647|       |        /* choose the appropriate mv bank pointer and stride */
 2648|  53.6k|        if(1 == u1_base_mode_flag)
  ------------------
  |  Branch (2648:12): [True: 36.4k, False: 17.1k]
  ------------------
 2649|  36.4k|        {
 2650|  36.4k|            i4_mb_mode = SVC_INTER_MB;
  ------------------
  |  |  114|  36.4k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
 2651|  36.4k|        }
 2652|       |
 2653|  53.6k|        ps_motion_pred = ps_ctxt->ps_motion_pred_struct;
 2654|  53.6k|        i4_curr_mot_stride = 4;
 2655|       |
 2656|       |        /* call the motion upsampling for 1st 4x4 */
 2657|  53.6k|        i4_part_idc = (i4_ref_y << 16) + i4_ref_x;
 2658|  53.6k|        i4_16x16_flag = isvcd_interlyr_motion_scale(
 2659|  53.6k|            pv_comp_mode_mv_ctxt, &i4_part_idc, ps_mb_params, ps_motion_pred, i4_listx,
 2660|  53.6k|            (i4_mb_pic_x + 1), (i4_mb_pic_y + 1), ps_dec->ppv_map_ref_idx_to_poc);
 2661|       |
 2662|       |        /* ---------- reference layer MB is 16x16 ------------------*/
 2663|  53.6k|        if(i4_16x16_flag)
  ------------------
  |  Branch (2663:12): [True: 0, False: 53.6k]
  ------------------
 2664|      0|        {
 2665|      0|            if(1 == u1_base_mode_flag)
  ------------------
  |  Branch (2665:16): [True: 0, False: 0]
  ------------------
 2666|      0|            {
 2667|      0|                ps_mb_params->u1_mb_type = P_L0_16x16;
 2668|      0|                ps_mb_params->u1_mb_mc_mode = PRED_16x16;
  ------------------
  |  |  450|      0|#define PRED_16x16  0
  ------------------
 2669|      0|                ps_mb_part_info->u1_num_part = 1;
 2670|      0|                *pu1_col_info++ = (PRED_16x16 << 6);
  ------------------
  |  |  450|      0|#define PRED_16x16  0
  ------------------
 2671|      0|                ps_mb_part_info->i1_ref_idx[0][0] = ps_motion_pred->i1_ref_frame[0];
 2672|       |
 2673|      0|                ps_part->u1_partwidth = 4;  // interms of 4x4
 2674|      0|                ps_part->u1_partheight = 4;
 2675|      0|                ps_part->u1_pred_mode = PRED_L0;
  ------------------
  |  |  483|      0|#define PRED_L0   1
  ------------------
 2676|      0|                ps_part->u1_is_direct = 0;
 2677|      0|                ps_part->u1_sub_mb_num = 0;
 2678|       |
 2679|      0|                if(2 == i4_listx)
  ------------------
  |  Branch (2679:20): [True: 0, False: 0]
  ------------------
 2680|      0|                {
 2681|      0|                    WORD32 i4_part_mode_a = 0;
 2682|      0|                    WORD32 i4_temp;
 2683|       |
 2684|      0|                    ps_mb_part_info->i1_ref_idx[1][0] = ps_motion_pred->i1_ref_frame[1];
 2685|      0|                    if(0 <= ps_motion_pred[0].i1_ref_frame[0])
  ------------------
  |  Branch (2685:24): [True: 0, False: 0]
  ------------------
 2686|      0|                    {
 2687|      0|                        i4_part_mode_a += 1;
 2688|      0|                    }
 2689|      0|                    if(0 <= ps_motion_pred[0].i1_ref_frame[1])
  ------------------
  |  Branch (2689:24): [True: 0, False: 0]
  ------------------
 2690|      0|                    {
 2691|      0|                        i4_part_mode_a += 2;
 2692|      0|                    }
 2693|       |
 2694|      0|                    i4_temp = 3 * PRED_16x16;
  ------------------
  |  |  450|      0|#define PRED_16x16  0
  ------------------
 2695|      0|                    i4_temp += (3 * (i4_part_mode_a - 1) - 1);
 2696|      0|                    i4_temp = (i4_temp < 0) ? 0 : i4_temp;
  ------------------
  |  Branch (2696:31): [True: 0, False: 0]
  ------------------
 2697|      0|                    i4_temp = g_au1_eb_mb_type[i4_temp];
 2698|      0|                    ps_mb_params->u1_mb_type = i4_temp;
 2699|      0|                    ps_part->u1_pred_mode = g_au1_mb_pred_mode[0][5 + i4_temp];
 2700|      0|                }
 2701|      0|            }
 2702|      0|            else
 2703|      0|            {
 2704|       |                /* motion prediction flag cases replicate the motion vectors for entire MB */
 2705|      0|                isvcd_store_motion_map(ps_motion_pred, (ps_motion_pred), 0, i4_curr_mot_stride,
 2706|      0|                                       NUM_MB_PARTS, NUM_MB_PARTS, SVCD_FALSE);
  ------------------
  |  |   59|      0|#define NUM_MB_PARTS 4
  ------------------
                                                     NUM_MB_PARTS, NUM_MB_PARTS, SVCD_FALSE);
  ------------------
  |  |   59|      0|#define NUM_MB_PARTS 4
  ------------------
                                                     NUM_MB_PARTS, NUM_MB_PARTS, SVCD_FALSE);
  ------------------
  |  |   45|      0|#define SVCD_FALSE 0
  ------------------
 2707|       |
 2708|      0|                isvcd_populate_ref_idx(ps_mb_params, ps_svc_mb_params, ps_motion_pred,
 2709|      0|                                       ps_mb_part_info, i4_listx);
 2710|      0|            }
 2711|      0|            return i4_mb_mode;
 2712|      0|        }
 2713|       |        /* ---------- reference layer MB is non 16x16 ------------------ */
 2714|  53.6k|        else
 2715|  53.6k|        {
 2716|  53.6k|            WORD32 ai4_sub_mb_mode[NUM_MB_PARTS] = {0};
 2717|       |
 2718|       |            /* replicate the motion vectors for 8x8 */
 2719|  53.6k|            isvcd_store_motion_map(ps_motion_pred, ps_motion_pred, 0, i4_curr_mot_stride, 2, 2,
 2720|  53.6k|                                   SVCD_FALSE);
  ------------------
  |  |   45|  53.6k|#define SVCD_FALSE 0
  ------------------
 2721|       |
 2722|  53.6k|            if(2 == i4_listx)
  ------------------
  |  Branch (2722:16): [True: 24.3k, False: 29.2k]
  ------------------
 2723|  24.3k|            {
 2724|  24.3k|                WORD32 i4_indx = 0;
 2725|       |
 2726|       |                /* replicate the motion vectors for 8x8 */
 2727|       |                /* check the 0th partiton reference indices */
 2728|  24.3k|                if(0 <= ps_motion_pred[0].i1_ref_frame[0])
  ------------------
  |  Branch (2728:20): [True: 23.6k, False: 742]
  ------------------
 2729|  23.6k|                {
 2730|  23.6k|                    i4_indx += 1;
 2731|  23.6k|                }
 2732|  24.3k|                if(0 <= ps_motion_pred[0].i1_ref_frame[1])
  ------------------
  |  Branch (2732:20): [True: 8.54k, False: 15.8k]
  ------------------
 2733|  8.54k|                {
 2734|  8.54k|                    i4_indx += 2;
 2735|  8.54k|                }
 2736|       |
 2737|  24.3k|                i4_indx = 3 * PRED_8x8 + (i4_indx - 1);
  ------------------
  |  |  453|  24.3k|#define PRED_8x8    3
  ------------------
 2738|  24.3k|                ai4_sub_mb_mode[0] = g_au1_eb_submb_type[i4_indx];
 2739|  24.3k|            }
 2740|       |
 2741|       |            /* derive the motion vectors and reference indices of 3 rem partitions */
 2742|   214k|            for(i4_blk_idx = 1; i4_blk_idx < NUM_MB_PARTS; i4_blk_idx++)
  ------------------
  |  |   59|   214k|#define NUM_MB_PARTS 4
  ------------------
  |  Branch (2742:33): [True: 160k, False: 53.6k]
  ------------------
 2743|   160k|            {
 2744|   160k|                WORD32 i4_blk_y, i4_blk_x;
 2745|   160k|                mv_pred_t *ps_temp;
 2746|       |
 2747|   160k|                i4_blk_x = i4_blk_idx & 1;
 2748|   160k|                i4_blk_y = i4_blk_idx >> 1;
 2749|       |
 2750|   160k|                ps_temp = ps_motion_pred + (i4_blk_x << 1);
 2751|   160k|                ps_temp += (i4_blk_y * i4_curr_mot_stride << 1);
 2752|       |
 2753|       |                /* store the reference layer positions */
 2754|   160k|                i4_part_idc = ((i4_ref_y + (i4_blk_y << 2)) << 16) + (i4_ref_x + (i4_blk_x << 2));
 2755|   160k|                isvcd_interlyr_motion_scale(pv_comp_mode_mv_ctxt, &i4_part_idc, ps_mb_params,
 2756|   160k|                                            ps_temp, i4_listx, (i4_mb_pic_x + (i4_blk_x << 2) + 1),
 2757|   160k|                                            (i4_mb_pic_y + (i4_blk_y << 2) + 1),
 2758|   160k|                                            ps_dec->ppv_map_ref_idx_to_poc);
 2759|       |
 2760|       |                /* replicate the motion vectors for 8x8 */
 2761|   160k|                isvcd_store_motion_map(ps_temp, ps_temp, 0, i4_curr_mot_stride, 2, 2, SVCD_FALSE);
  ------------------
  |  |   45|   160k|#define SVCD_FALSE 0
  ------------------
 2762|       |
 2763|   160k|                if(2 == i4_listx)
  ------------------
  |  Branch (2763:20): [True: 73.1k, False: 87.6k]
  ------------------
 2764|  73.1k|                {
 2765|  73.1k|                    WORD32 i4_indx = 0;
 2766|       |
 2767|       |                    /* check the 0th partiton reference indices */
 2768|  73.1k|                    if(0 <= ps_temp[0].i1_ref_frame[0])
  ------------------
  |  Branch (2768:24): [True: 70.9k, False: 2.22k]
  ------------------
 2769|  70.9k|                    {
 2770|  70.9k|                        i4_indx += 1;
 2771|  70.9k|                    }
 2772|  73.1k|                    if(0 <= ps_temp[0].i1_ref_frame[1])
  ------------------
  |  Branch (2772:24): [True: 26.4k, False: 46.7k]
  ------------------
 2773|  26.4k|                    {
 2774|  26.4k|                        i4_indx += 2;
 2775|  26.4k|                    }
 2776|       |
 2777|  73.1k|                    i4_indx = 3 * PRED_8x8 + (i4_indx - 1);
  ------------------
  |  |  453|  73.1k|#define PRED_8x8    3
  ------------------
 2778|       |
 2779|  73.1k|                    ai4_sub_mb_mode[i4_blk_idx] = g_au1_eb_submb_type[i4_indx];
 2780|  73.1k|                }
 2781|   160k|            }
 2782|       |
 2783|       |            /* if MB mode has to derivied */
 2784|  53.6k|            if(1 == u1_base_mode_flag)
  ------------------
  |  Branch (2784:16): [True: 36.4k, False: 17.1k]
  ------------------
 2785|  36.4k|            {
 2786|  36.4k|                WORD32 i4_horz_match, i4_vert_match;
 2787|  36.4k|                WORD32 i4_part_size = PRED_8x8;
  ------------------
  |  |  453|  36.4k|#define PRED_8x8    3
  ------------------
 2788|       |
 2789|  36.4k|                mv_pred_t *ps_motion_1;
 2790|  36.4k|                mv_pred_t *ps_motion_2;
 2791|  36.4k|                mv_pred_t *ps_motion_3;
 2792|       |
 2793|  36.4k|                ps_motion_1 = ps_motion_pred + 2;
 2794|  36.4k|                ps_motion_2 = ps_motion_pred + (i4_curr_mot_stride << 1);
 2795|  36.4k|                ps_motion_3 = ps_motion_2 + 2;
 2796|       |
 2797|       |                /* check if the motion in horz direction are same*/
 2798|  36.4k|                i4_horz_match = isvcd_check_motion(ps_motion_pred, ps_motion_1, i4_listx);
 2799|  36.4k|                i4_horz_match += isvcd_check_motion(ps_motion_2, ps_motion_3, i4_listx);
 2800|       |
 2801|       |                /* check if the motion in vertical direction is same */
 2802|  36.4k|                i4_vert_match = isvcd_check_motion(ps_motion_pred, ps_motion_2, i4_listx);
 2803|  36.4k|                i4_vert_match += isvcd_check_motion(ps_motion_1, ps_motion_3, i4_listx);
 2804|       |
 2805|  36.4k|                ps_mb_part_info->u1_num_part = 4;
 2806|       |
 2807|       |                /* decide the partition size based on the results of matching */
 2808|  36.4k|                if((2 == i4_horz_match) && (2 == i4_vert_match))
  ------------------
  |  Branch (2808:20): [True: 32.2k, False: 4.19k]
  |  Branch (2808:44): [True: 31.8k, False: 441]
  ------------------
 2809|  31.8k|                {
 2810|  31.8k|                    ps_mb_params->u1_mb_type = P_L0_16x16;
 2811|  31.8k|                    i4_part_size = PRED_16x16;
  ------------------
  |  |  450|  31.8k|#define PRED_16x16  0
  ------------------
 2812|  31.8k|                    ps_mb_part_info->u1_num_part = 1;
 2813|  31.8k|                    *pu1_col_info++ = (PRED_16x16 << 6);
  ------------------
  |  |  450|  31.8k|#define PRED_16x16  0
  ------------------
 2814|       |
 2815|  31.8k|                    ps_mb_part_info->i1_ref_idx[0][0] = ps_motion_pred->i1_ref_frame[0];
 2816|  31.8k|                    if(2 == i4_listx)
  ------------------
  |  Branch (2816:24): [True: 11.6k, False: 20.1k]
  ------------------
 2817|  11.6k|                        ps_mb_part_info->i1_ref_idx[1][0] = ps_motion_pred->i1_ref_frame[1];
 2818|       |
 2819|  31.8k|                    ps_part->u1_partwidth = 4;  // interms of 4x4
 2820|  31.8k|                    ps_part->u1_partheight = 4;
 2821|  31.8k|                    ps_part->u1_pred_mode = PRED_L0;
  ------------------
  |  |  483|  31.8k|#define PRED_L0   1
  ------------------
 2822|  31.8k|                    ps_part->u1_is_direct = 0;
 2823|  31.8k|                    ps_part->u1_sub_mb_num = 0;
 2824|  31.8k|                }
 2825|  4.63k|                else if(2 == i4_horz_match)
  ------------------
  |  Branch (2825:25): [True: 441, False: 4.19k]
  ------------------
 2826|    441|                {
 2827|    441|                    ps_mb_params->u1_mb_type = P_L0_L0_16x8;
 2828|    441|                    i4_part_size = PRED_16x8;
  ------------------
  |  |  451|    441|#define PRED_16x8   1
  ------------------
 2829|    441|                    ps_mb_part_info->u1_num_part = 2;
 2830|    441|                    *pu1_col_info++ = (PRED_16x8 << 6);
  ------------------
  |  |  451|    441|#define PRED_16x8   1
  ------------------
 2831|    441|                    *pu1_col_info++ = (PRED_16x8 << 6);
  ------------------
  |  |  451|    441|#define PRED_16x8   1
  ------------------
 2832|       |
 2833|    441|                    ps_mb_part_info->i1_ref_idx[0][0] = ps_motion_pred->i1_ref_frame[0];
 2834|    441|                    ps_mb_part_info->i1_ref_idx[0][1] = ps_motion_pred[8].i1_ref_frame[0];
 2835|    441|                    if(2 == i4_listx)
  ------------------
  |  Branch (2835:24): [True: 143, False: 298]
  ------------------
 2836|    143|                    {
 2837|    143|                        ps_mb_part_info->i1_ref_idx[1][0] = ps_motion_pred->i1_ref_frame[1];
 2838|    143|                        ps_mb_part_info->i1_ref_idx[1][1] = ps_motion_pred[8].i1_ref_frame[1];
 2839|    143|                    }
 2840|    441|                    ps_part->u1_partwidth = 4;  // interms of 4x4
 2841|    441|                    ps_part->u1_partheight = 2;
 2842|    441|                    ps_part->u1_pred_mode = PRED_L0;
  ------------------
  |  |  483|    441|#define PRED_L0   1
  ------------------
 2843|    441|                    ps_part->u1_is_direct = 0;
 2844|    441|                    ps_part->u1_sub_mb_num = 0;
 2845|       |
 2846|    441|                    ps_part++;
 2847|    441|                    ps_part->u1_partwidth = 4;
 2848|    441|                    ps_part->u1_partheight = 2;
 2849|    441|                    ps_part->u1_pred_mode = PRED_L0;
  ------------------
  |  |  483|    441|#define PRED_L0   1
  ------------------
 2850|    441|                    ps_part->u1_is_direct = 0;
 2851|    441|                    ps_part->u1_sub_mb_num = 8;
 2852|    441|                }
 2853|  4.19k|                else if(2 == i4_vert_match)
  ------------------
  |  Branch (2853:25): [True: 753, False: 3.43k]
  ------------------
 2854|    753|                {
 2855|    753|                    ps_mb_params->u1_mb_type = P_L0_L0_8x16;
 2856|    753|                    i4_part_size = PRED_8x16;
  ------------------
  |  |  452|    753|#define PRED_8x16   2
  ------------------
 2857|    753|                    ps_mb_part_info->u1_num_part = 2;
 2858|    753|                    *pu1_col_info++ = (PRED_8x16 << 6);
  ------------------
  |  |  452|    753|#define PRED_8x16   2
  ------------------
 2859|    753|                    *pu1_col_info++ = (PRED_8x16 << 6);
  ------------------
  |  |  452|    753|#define PRED_8x16   2
  ------------------
 2860|       |
 2861|    753|                    ps_mb_part_info->i1_ref_idx[0][0] = ps_motion_pred->i1_ref_frame[0];
 2862|    753|                    ps_mb_part_info->i1_ref_idx[0][1] = ps_motion_pred[2].i1_ref_frame[0];
 2863|    753|                    if(2 == i4_listx)
  ------------------
  |  Branch (2863:24): [True: 502, False: 251]
  ------------------
 2864|    502|                    {
 2865|    502|                        ps_mb_part_info->i1_ref_idx[1][0] = ps_motion_pred->i1_ref_frame[1];
 2866|    502|                        ps_mb_part_info->i1_ref_idx[1][1] = ps_motion_pred[2].i1_ref_frame[1];
 2867|    502|                    }
 2868|    753|                    ps_part->u1_partwidth = 2;  // interms of 4x4
 2869|    753|                    ps_part->u1_partheight = 4;
 2870|    753|                    ps_part->u1_pred_mode = PRED_L0;
  ------------------
  |  |  483|    753|#define PRED_L0   1
  ------------------
 2871|    753|                    ps_part->u1_is_direct = 0;
 2872|    753|                    ps_part->u1_sub_mb_num = 0;
 2873|       |
 2874|    753|                    ps_part++;
 2875|    753|                    ps_part->u1_partwidth = 2;
 2876|    753|                    ps_part->u1_partheight = 4;
 2877|    753|                    ps_part->u1_pred_mode = PRED_L0;
  ------------------
  |  |  483|    753|#define PRED_L0   1
  ------------------
 2878|    753|                    ps_part->u1_is_direct = 0;
 2879|    753|                    ps_part->u1_sub_mb_num = 2;
 2880|    753|                }
 2881|       |
 2882|       |                /* store the part size to the mb params */
 2883|  36.4k|                ps_mb_params->u1_mb_mc_mode = i4_part_size;
 2884|       |
 2885|       |                /* store the sub partition size */
 2886|  36.4k|                if(PRED_8x8 == i4_part_size)
  ------------------
  |  |  453|  36.4k|#define PRED_8x8    3
  ------------------
  |  Branch (2886:20): [True: 3.43k, False: 33.0k]
  ------------------
 2887|  3.43k|                {
 2888|  3.43k|                    UWORD8 u1_ctr;
 2889|       |                    /* for P_MB sub part type is P_L0_8x8*/
 2890|       |
 2891|  3.43k|                    ps_mb_params->u1_mb_type = P_8x8;
 2892|  3.43k|                    ps_mb_part_info->i1_ref_idx[0][0] = ps_motion_pred[0].i1_ref_frame[0];
 2893|  3.43k|                    ps_mb_part_info->i1_ref_idx[0][1] = ps_motion_pred[2].i1_ref_frame[0];
 2894|  3.43k|                    ps_mb_part_info->i1_ref_idx[0][2] = ps_motion_pred[8].i1_ref_frame[0];
 2895|  3.43k|                    ps_mb_part_info->i1_ref_idx[0][3] = ps_motion_pred[10].i1_ref_frame[0];
 2896|  3.43k|                    if(2 == i4_listx)
  ------------------
  |  Branch (2896:24): [True: 2.55k, False: 884]
  ------------------
 2897|  2.55k|                    {
 2898|  2.55k|                        ps_mb_part_info->i1_ref_idx[1][0] = ps_motion_pred[0].i1_ref_frame[1];
 2899|  2.55k|                        ps_mb_part_info->i1_ref_idx[1][1] = ps_motion_pred[2].i1_ref_frame[1];
 2900|  2.55k|                        ps_mb_part_info->i1_ref_idx[1][2] = ps_motion_pred[8].i1_ref_frame[1];
 2901|  2.55k|                        ps_mb_part_info->i1_ref_idx[1][3] = ps_motion_pred[10].i1_ref_frame[1];
 2902|  2.55k|                    }
 2903|       |
 2904|  17.1k|                    for(u1_ctr = 0; u1_ctr < 4; u1_ctr++)
  ------------------
  |  Branch (2904:37): [True: 13.7k, False: 3.43k]
  ------------------
 2905|  13.7k|                    {
 2906|  13.7k|                        *pu1_col_info++ = (PRED_8x8 << 6);
  ------------------
  |  |  453|  13.7k|#define PRED_8x8    3
  ------------------
 2907|       |
 2908|  13.7k|                        ps_part->u1_partwidth = 2;  // interms of 4x4
 2909|  13.7k|                        ps_part->u1_partheight = 2;
 2910|  13.7k|                        ps_part->u1_pred_mode = PRED_L0;
  ------------------
  |  |  483|  13.7k|#define PRED_L0   1
  ------------------
 2911|  13.7k|                        ps_part->u1_is_direct = 0;
 2912|  13.7k|                        ps_part->u1_sub_mb_num = (u1_ctr & 0x01) * 2 + (u1_ctr >> 1) * 8;
 2913|  13.7k|                        ps_part++;
 2914|  13.7k|                    }
 2915|  3.43k|                }
 2916|       |
 2917|  36.4k|                if(2 == i4_listx)
  ------------------
  |  Branch (2917:20): [True: 14.8k, False: 21.6k]
  ------------------
 2918|  14.8k|                {
 2919|  14.8k|                    ps_part = (parse_part_params_t *) pv_part;
 2920|  14.8k|                    pu1_col_info = ps_mb_part_info->u1_col_info;
 2921|  14.8k|                    isvcd_interlyr_mbmode_pred_bmb(ps_ctxt, ps_motion_pred, i4_curr_mot_stride,
 2922|  14.8k|                                                   i4_part_size, &ai4_sub_mb_mode[0], ps_mb_params,
 2923|  14.8k|                                                   ps_part, pu1_col_info);
 2924|  14.8k|                }
 2925|  36.4k|            } /* end of mode derivation */
 2926|  17.1k|            else
 2927|  17.1k|            {
 2928|  17.1k|                isvcd_populate_ref_idx(ps_mb_params, ps_svc_mb_params, ps_motion_pred,
 2929|  17.1k|                                       ps_mb_part_info, i4_listx);
 2930|       |
 2931|  17.1k|            } /* non 16x16 mv mode derivation */
 2932|  53.6k|        }
 2933|  53.6k|    }
 2934|  53.6k|    return i4_mb_mode;
 2935|  53.6k|}
isvcd_compute_scaled_offsets:
 2968|  34.0k|{
 2969|  34.0k|    WORD32 i4_offset_x, i4_offset_y;
 2970|  34.0k|    UWORD32 i4_scaled_ref_lyr_width;
 2971|  34.0k|    UWORD32 i4_scaled_ref_lyr_height;
 2972|  34.0k|    UWORD32 i4_ref_lyr_width;
 2973|  34.0k|    UWORD32 i4_ref_lyr_height;
 2974|  34.0k|    UWORD32 i4_shift_x, i4_shift_y;
 2975|  34.0k|    UWORD32 i4_scale_x, i4_scale_y;
 2976|  34.0k|    WORD32 i4_cntr;
 2977|  34.0k|    WORD32 i4_scale_add_x, i4_scale_add_y;
 2978|  34.0k|    WORD32 i4_curr_lyr_width, i4_curr_lyr_height;
 2979|       |
 2980|  34.0k|    if((NULL == ps_curr_res_prms) || (NULL == ps_ref_res_prms) || (NULL == pi2_offset_x) ||
  ------------------
  |  Branch (2980:8): [True: 0, False: 34.0k]
  |  Branch (2980:38): [True: 0, False: 34.0k]
  |  Branch (2980:67): [True: 0, False: 34.0k]
  ------------------
 2981|  34.0k|       (NULL == pi2_offset_y))
  ------------------
  |  Branch (2981:8): [True: 0, False: 34.0k]
  ------------------
 2982|      0|    {
 2983|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 2984|      0|    }
 2985|       |    /* initial calculation */
 2986|  34.0k|    i4_offset_x = ps_curr_res_prms->s_ref_lyr_scaled_offset.i2_left;
 2987|  34.0k|    i4_offset_y = ps_curr_res_prms->s_ref_lyr_scaled_offset.i2_top;
 2988|       |
 2989|       |    /* get the width and height */
 2990|  34.0k|    i4_scaled_ref_lyr_width = ps_curr_res_prms->u2_scaled_ref_width;
 2991|  34.0k|    i4_scaled_ref_lyr_height = ps_curr_res_prms->u2_scaled_ref_height;
 2992|  34.0k|    i4_ref_lyr_width = ps_ref_res_prms->i4_res_width;
 2993|  34.0k|    i4_ref_lyr_height = ps_ref_res_prms->i4_res_height;
 2994|  34.0k|    i4_curr_lyr_width = ps_curr_res_prms->i4_res_width;
 2995|  34.0k|    i4_curr_lyr_height = ps_curr_res_prms->i4_res_height;
 2996|       |
 2997|       |    /* derive shift x and y based on level idd */
 2998|  34.0k|    if(u1_level_idc <= 30)
  ------------------
  |  Branch (2998:8): [True: 31.4k, False: 2.57k]
  ------------------
 2999|  31.4k|    {
 3000|  31.4k|        i4_shift_x = 16;
 3001|  31.4k|        i4_shift_y = 16;
 3002|  31.4k|    }
 3003|  2.57k|    else
 3004|  2.57k|    {
 3005|  2.57k|        i4_shift_x = 31 - isvcd_get_ceil_log2(i4_ref_lyr_width);
 3006|  2.57k|        i4_shift_y = 31 - isvcd_get_ceil_log2(i4_ref_lyr_height);
 3007|  2.57k|    }
 3008|       |
 3009|       |    /* assert on max ranges of width and shift values */
 3010|  34.0k|    if((i4_ref_lyr_width > H264_MAX_FRAME_WIDTH) ||
  ------------------
  |  |   39|  34.0k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
  |  Branch (3010:8): [True: 0, False: 34.0k]
  ------------------
 3011|  34.0k|       (i4_scaled_ref_lyr_width > H264_MAX_FRAME_WIDTH) ||
  ------------------
  |  |   39|  34.0k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
  |  Branch (3011:8): [True: 0, False: 34.0k]
  ------------------
 3012|  34.0k|       (i4_ref_lyr_height > H264_MAX_FRAME_HEIGHT) ||
  ------------------
  |  |   40|  34.0k|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
  |  Branch (3012:8): [True: 0, False: 34.0k]
  ------------------
 3013|  34.0k|       (i4_scaled_ref_lyr_height > H264_MAX_FRAME_HEIGHT) ||
  ------------------
  |  |   40|  34.0k|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
  |  Branch (3013:8): [True: 0, False: 34.0k]
  ------------------
 3014|  34.0k|       (i4_curr_lyr_width > H264_MAX_FRAME_WIDTH) || (i4_curr_lyr_height > H264_MAX_FRAME_HEIGHT))
  ------------------
  |  |   39|  34.0k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
                     (i4_curr_lyr_width > H264_MAX_FRAME_WIDTH) || (i4_curr_lyr_height > H264_MAX_FRAME_HEIGHT))
  ------------------
  |  |   40|  34.0k|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
  |  Branch (3014:8): [True: 0, False: 34.0k]
  |  Branch (3014:54): [True: 0, False: 34.0k]
  ------------------
 3015|      0|    {
 3016|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 3017|      0|    }
 3018|       |
 3019|       |    /* calculate scale factor x and y */
 3020|  34.0k|    i4_scale_x = (((UWORD32) i4_ref_lyr_width << i4_shift_x) + (i4_scaled_ref_lyr_width >> 1)) /
 3021|  34.0k|                 i4_scaled_ref_lyr_width;
 3022|       |
 3023|  34.0k|    i4_scale_y = (((UWORD32) i4_ref_lyr_height << i4_shift_y) + (i4_scaled_ref_lyr_height >> 1)) /
 3024|  34.0k|                 i4_scaled_ref_lyr_height;
 3025|       |
 3026|       |    /* calcualte the values to be added based on left and top offset */
 3027|  34.0k|    i4_scale_add_x = (1 << (i4_shift_x - 1)) - (i4_offset_x * (WORD32) i4_scale_x);
 3028|  34.0k|    i4_scale_add_y = (1 << (i4_shift_y - 1)) - (i4_offset_y * (WORD32) i4_scale_y);
 3029|       |
 3030|       |    /* derive the projected locations in the reference layer */
 3031|  1.93M|    for(i4_cntr = 0; i4_cntr < i4_curr_lyr_width; i4_cntr++)
  ------------------
  |  Branch (3031:22): [True: 1.90M, False: 34.0k]
  ------------------
 3032|  1.90M|    {
 3033|  1.90M|        WORD32 i4_ref_x;
 3034|  1.90M|        i4_ref_x = (i4_cntr * i4_scale_x + i4_scale_add_x) >> i4_shift_x;
 3035|  1.90M|        *pi2_offset_x++ = (WORD16) i4_ref_x;
 3036|  1.90M|    }
 3037|       |
 3038|       |    /* derive the projected locations in the reference layer */
 3039|  5.04M|    for(i4_cntr = 0; i4_cntr < i4_curr_lyr_height; i4_cntr++)
  ------------------
  |  Branch (3039:22): [True: 5.01M, False: 34.0k]
  ------------------
 3040|  5.01M|    {
 3041|  5.01M|        WORD32 i4_ref_y;
 3042|  5.01M|        i4_ref_y = (i4_cntr * i4_scale_y + i4_scale_add_y) >> i4_shift_y;
 3043|  5.01M|        *pi2_offset_y++ = (WORD16) i4_ref_y;
 3044|  5.01M|    }
 3045|  34.0k|    return OK;
  ------------------
  |  |  114|  34.0k|#define OK        0
  ------------------
 3046|  34.0k|}
isvcd_comp_mode_mv_res_init:
 3077|   132k|{
 3078|       |    /*! Flow of the module is as follows                                   */
 3079|       |    /*! 1. calculates the scale factors for dyadic cases                   */
 3080|       |    /*! 2. calculaets the loop counts and part width and height based on
 3081|       |           dyadic scale factor                                             */
 3082|       |    /*! 2. calculate the MV scale factors                                  */
 3083|       |    /*! 3. initialises the default mv ped structure with deafult values    */
 3084|       |
 3085|   132k|    mode_motion_ctxt_t *ps_ctxt;
 3086|   132k|    mode_motion_lyr_ctxt *ps_lyr_mem;
 3087|   132k|    dec_seq_params_t *ps_sps;
 3088|   132k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec = (svc_dec_lyr_struct_t *) pv_svc_dec;
 3089|   132k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
 3090|   132k|    svc_dec_lyr_struct_t *ps_svc_dec_ref_layer;
 3091|   132k|    WORD32 ret;
 3092|   132k|    WORD32 i4_scaled_ref_lyr_width;
 3093|   132k|    WORD32 i4_scaled_ref_lyr_height;
 3094|   132k|    WORD32 i4_ref_lyr_width;
 3095|   132k|    WORD32 i4_ref_lyr_height;
 3096|   132k|    res_prms_t *ps_curr_lyr_res_prms = &ps_svc_lyr_dec->s_res_prms;
 3097|       |
 3098|   132k|    ps_svc_dec_ref_layer = ps_svc_lyr_dec->ps_dec_svc_ref_layer;
 3099|   132k|    if(NULL == ps_curr_lyr_res_prms)
  ------------------
  |  Branch (3099:8): [True: 0, False: 132k]
  ------------------
 3100|      0|    {
 3101|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 3102|      0|    }
 3103|       |
 3104|   132k|    ps_ctxt = (mode_motion_ctxt_t *) ps_svc_lyr_dec->pv_mode_mv_sample_ctxt;
 3105|   132k|    ps_ctxt->u1_direct_8x8_inference_flag = ps_curr_lyr_res_prms->u1_direct_8x8_inference_flag;
 3106|       |
 3107|       |    /* if called for base resolution store deafult values */
 3108|   132k|    if(SVCD_TRUE == ps_svc_lyr_dec->u1_base_res_flag)
  ------------------
  |  |   46|   132k|#define SVCD_TRUE 1
  ------------------
  |  Branch (3108:8): [True: 98.8k, False: 34.0k]
  ------------------
 3109|  98.8k|    {
 3110|  98.8k|        ps_ctxt->i4_res_id = -1;
 3111|  98.8k|        ps_ctxt->i4_ref_width = ps_curr_lyr_res_prms->i4_res_width;
 3112|  98.8k|        ps_ctxt->i4_ref_height = ps_curr_lyr_res_prms->i4_res_height;
 3113|  98.8k|        return OK;
  ------------------
  |  |  114|  98.8k|#define OK        0
  ------------------
 3114|  98.8k|    }
 3115|       |
 3116|       |    /* call the function which populates the projected ref locations */
 3117|  34.0k|    ps_sps = ps_dec->ps_cur_sps;
 3118|       |
 3119|       |    /* store the res id appropriately */
 3120|  34.0k|    ps_ctxt->i4_res_id = ps_svc_lyr_dec->u1_layer_id - 1;
 3121|       |
 3122|       |    /* get the current layer ctxt */
 3123|  34.0k|    ps_lyr_mem = &ps_ctxt->as_res_lyr_mem[ps_ctxt->i4_res_id];
 3124|       |
 3125|       |    /* store the current and reference res params to the context */
 3126|  34.0k|    ps_lyr_mem->ps_curr_lyr_res_prms = ps_curr_lyr_res_prms;
 3127|       |
 3128|       |    /* store the reference layer mv bank pointer */
 3129|  34.0k|    ps_lyr_mem->pv_ref_mv_bank_l0 = ps_svc_dec_ref_layer->s_dec.s_cur_pic.ps_mv;
 3130|       |
 3131|       |    /* store the reference layer mb mode pointer */
 3132|  34.0k|    ps_lyr_mem->s_ref_mb_mode.pv_buffer = ps_svc_dec_ref_layer->ps_inter_lyr_mb_prms_frm_start;
 3133|  34.0k|    ps_lyr_mem->s_ref_mb_mode.i4_num_element_stride =
 3134|  34.0k|        ps_svc_dec_ref_layer->u2_inter_lyr_mb_prms_stride;
 3135|  34.0k|    ps_lyr_mem->s_ref_mb_mode.i4_element_size = sizeof(inter_lyr_mb_prms_t);
 3136|       |
 3137|       |    /* check for recomputation of mapping required */
 3138|  34.0k|    if(SVCD_TRUE == ps_curr_lyr_res_prms->u1_remap_req_flag)
  ------------------
  |  |   46|  34.0k|#define SVCD_TRUE 1
  ------------------
  |  Branch (3138:8): [True: 34.0k, False: 0]
  ------------------
 3139|  34.0k|    {
 3140|  34.0k|        res_prms_t s_ref_res_prms = {0};
 3141|       |
 3142|       |        /* store the reference layer resolution width and height */
 3143|  34.0k|        s_ref_res_prms.i4_res_width = ps_ctxt->i4_ref_width;
 3144|  34.0k|        s_ref_res_prms.i4_res_height = ps_ctxt->i4_ref_height;
 3145|       |
 3146|       |        /* call projection map calculation function */
 3147|  34.0k|        ret = isvcd_compute_scaled_offsets(ps_curr_lyr_res_prms, &s_ref_res_prms,
 3148|  34.0k|                                           ps_lyr_mem->pi2_ref_loc_x, ps_lyr_mem->pi2_ref_loc_y,
 3149|  34.0k|                                           ps_sps->u1_level_idc);
 3150|  34.0k|        if(OK != ret)
  ------------------
  |  |  114|  34.0k|#define OK        0
  ------------------
  |  Branch (3150:12): [True: 0, False: 34.0k]
  ------------------
 3151|      0|        {
 3152|      0|            return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 3153|      0|        }
 3154|       |
 3155|       |        /* derive the scaling variables */
 3156|  34.0k|        ps_lyr_mem->i4_offset_x = ps_curr_lyr_res_prms->s_ref_lyr_scaled_offset.i2_left;
 3157|       |
 3158|  34.0k|        ps_lyr_mem->i4_offset_y = ps_curr_lyr_res_prms->s_ref_lyr_scaled_offset.i2_top;
 3159|       |
 3160|       |        /* get the width and heights */
 3161|  34.0k|        i4_scaled_ref_lyr_width = ps_curr_lyr_res_prms->u2_scaled_ref_width;
 3162|  34.0k|        i4_scaled_ref_lyr_height = ps_curr_lyr_res_prms->u2_scaled_ref_height;
 3163|  34.0k|        i4_ref_lyr_width = ps_ctxt->i4_ref_width;
 3164|  34.0k|        i4_ref_lyr_height = ps_ctxt->i4_ref_height;
 3165|       |
 3166|       |        /*store the reference layer width adn height */
 3167|  34.0k|        ps_lyr_mem->i4_ref_width = ps_ctxt->i4_ref_width;
 3168|  34.0k|        ps_lyr_mem->i4_ref_height = ps_ctxt->i4_ref_height;
 3169|       |
 3170|  34.0k|        if((i4_ref_lyr_width > H264_MAX_FRAME_WIDTH) || (i4_ref_lyr_width <= 0)) return NOT_OK;
  ------------------
  |  |   39|  34.0k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
                      if((i4_ref_lyr_width > H264_MAX_FRAME_WIDTH) || (i4_ref_lyr_width <= 0)) return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  |  Branch (3170:12): [True: 0, False: 34.0k]
  |  Branch (3170:57): [True: 0, False: 34.0k]
  ------------------
 3171|  34.0k|        if((i4_scaled_ref_lyr_width > H264_MAX_FRAME_WIDTH) || (i4_scaled_ref_lyr_width <= 0))
  ------------------
  |  |   39|  34.0k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
  |  Branch (3171:12): [True: 0, False: 34.0k]
  |  Branch (3171:64): [True: 0, False: 34.0k]
  ------------------
 3172|      0|            return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 3173|  34.0k|        if((i4_ref_lyr_height > H264_MAX_FRAME_HEIGHT) || (i4_ref_lyr_height <= 0)) return NOT_OK;
  ------------------
  |  |   40|  34.0k|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
                      if((i4_ref_lyr_height > H264_MAX_FRAME_HEIGHT) || (i4_ref_lyr_height <= 0)) return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  |  Branch (3173:12): [True: 0, False: 34.0k]
  |  Branch (3173:59): [True: 0, False: 34.0k]
  ------------------
 3174|  34.0k|        if((i4_scaled_ref_lyr_height > H264_MAX_FRAME_HEIGHT) || (i4_scaled_ref_lyr_height <= 0))
  ------------------
  |  |   40|  34.0k|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
  |  Branch (3174:12): [True: 0, False: 34.0k]
  |  Branch (3174:66): [True: 0, False: 34.0k]
  ------------------
 3175|      0|            return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 3176|       |
 3177|       |        /* derivation of variables for dyadic cases cropping should be MB aligned */
 3178|       |        /* default values for flags */
 3179|  34.0k|        ps_lyr_mem->pf_inter_lyr_pred = &isvcd_compute_interlyr_motion_mode;
 3180|       |
 3181|  34.0k|        if(SVCD_TRUE == ps_curr_lyr_res_prms->u1_dyadic_flag)
  ------------------
  |  |   46|  34.0k|#define SVCD_TRUE 1
  ------------------
  |  Branch (3181:12): [True: 16.6k, False: 17.3k]
  ------------------
 3182|  16.6k|        {
 3183|  16.6k|            ps_lyr_mem->pf_inter_lyr_pred = &isvcd_interlyr_motion_mode_pred_dyadic;
 3184|  16.6k|        }
 3185|       |
 3186|       |        /* Store the Dyadic flag */
 3187|  34.0k|        ps_lyr_mem->i4_dyadic_flag = ps_curr_lyr_res_prms->u1_dyadic_flag;
 3188|       |
 3189|       |        /* derive the scaling factors for motion upscaling */
 3190|       |        /* this is derived assuming no crop change flag is present */
 3191|  34.0k|        ps_lyr_mem->i4_scale_mv_x =
 3192|  34.0k|            ((i4_scaled_ref_lyr_width << 16) + (i4_ref_lyr_width >> 1)) / i4_ref_lyr_width;
 3193|       |
 3194|  34.0k|        ps_lyr_mem->i4_scale_mv_y =
 3195|  34.0k|            ((i4_scaled_ref_lyr_height << 16) + (i4_ref_lyr_height >> 1)) / i4_ref_lyr_height;
 3196|  34.0k|    }
 3197|      0|    else
 3198|      0|    {
 3199|       |        /* should take false value */
 3200|      0|        if(SVCD_FALSE != ps_curr_lyr_res_prms->u1_remap_req_flag)
  ------------------
  |  |   45|      0|#define SVCD_FALSE 0
  ------------------
  |  Branch (3200:12): [True: 0, False: 0]
  ------------------
 3201|      0|        {
 3202|      0|            return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 3203|      0|        }
 3204|      0|    }
 3205|       |
 3206|       |    /* store the current layer width and height to context */
 3207|  34.0k|    ps_ctxt->i4_ref_width = ps_curr_lyr_res_prms->i4_res_width;
 3208|  34.0k|    ps_ctxt->i4_ref_height = ps_curr_lyr_res_prms->i4_res_height;
 3209|       |
 3210|  34.0k|    return OK;
  ------------------
  |  |  114|  34.0k|#define OK        0
  ------------------
 3211|  34.0k|}

isvcd_nal_buf_reset:
  146|  2.31M|{
  147|  2.31M|    nal_buf_t *ps_nal_buf = pv_nal_buf;
  148|       |
  149|  2.31M|    ps_nal_buf->i4_valid_flag = SVCD_FALSE;
  ------------------
  |  |   45|  2.31M|#define SVCD_FALSE 0
  ------------------
  150|  2.31M|    ps_nal_buf->i4_buf_size = 0;
  151|  2.31M|    ps_nal_buf->u4_max_bits = 0;
  152|       |    ps_nal_buf->pu1_buf = NULL;
  153|  2.31M|}
isvcd_nal_find_start_code:
  184|  1.23M|{
  185|  1.23M|    UWORD8 *pu1_buf = pu1_buf_start + i4_cur_pos;
  186|  1.23M|    WORD32 i4_i;
  187|       |
  188|   134M|    for(i4_i = 0; i4_i < (i4_max_num_bytes - i4_cur_pos); i4_i++)
  ------------------
  |  Branch (188:19): [True: 134M, False: 34.4k]
  ------------------
  189|   134M|    {
  190|       |        /*-------------------------------------------------------------------*/
  191|       |        /* If zero increment the zero byte counter                           */
  192|       |        /*-------------------------------------------------------------------*/
  193|   134M|        if(0 == *pu1_buf)
  ------------------
  |  Branch (193:12): [True: 29.8M, False: 104M]
  ------------------
  194|  29.8M|        {
  195|  29.8M|            (*pi4_zero_cnt)++;
  196|  29.8M|        }
  197|       |
  198|       |        /*-------------------------------------------------------------------*/
  199|       |        /* If start code found then increment the byte consumed and return   */
  200|       |        /*-------------------------------------------------------------------*/
  201|   104M|        else if(0x01 == *pu1_buf && *pi4_zero_cnt >= NUM_OF_ZERO_BYTES_BEFORE_START_CODE)
  ------------------
  |  |   54|  36.1M|#define NUM_OF_ZERO_BYTES_BEFORE_START_CODE (2)
  ------------------
  |  Branch (201:17): [True: 36.1M, False: 68.3M]
  |  Branch (201:37): [True: 1.19M, False: 34.9M]
  ------------------
  202|  1.19M|        {
  203|  1.19M|            (*pu4_bytes_consumed)++;
  204|  1.19M|            return (SC_FOUND);
  ------------------
  |  |   52|  1.19M|#define SC_FOUND 1
  ------------------
  205|  1.19M|        }
  206|       |        /*-------------------------------------------------------------------*/
  207|       |        /* If non zero byte and value is not equal to 1 a then reset zero    */
  208|       |        /* byte counter                                                      */
  209|       |        /*-------------------------------------------------------------------*/
  210|   103M|        else
  211|   103M|        {
  212|   103M|            *pi4_zero_cnt = 0;
  213|   103M|        }
  214|       |
  215|   133M|        (*pu4_bytes_consumed)++;
  216|   133M|        pu1_buf++;
  217|   133M|    }
  218|       |
  219|  34.4k|    return (SC_NOT_FOUND);
  ------------------
  |  |   51|  34.4k|#define SC_NOT_FOUND (-1)
  ------------------
  220|  1.23M|}
isvcd_get_first_start_code:
  248|  22.5k|{
  249|  22.5k|    WORD32 i4_zero_cnt = 0, i4_status;
  250|  22.5k|    UWORD32 u4_bytes_consumed_temp = 0;
  251|       |
  252|  22.5k|    i4_status = isvcd_nal_find_start_code(pu1_stream_buffer, 0, *pu4_num_bytes, &i4_zero_cnt,
  253|  22.5k|                                          &u4_bytes_consumed_temp);
  254|       |
  255|       |    /*-----------------------------------------------------------------------*/
  256|       |    /* If start code is not found then return and start searching for it     */
  257|       |    /* again in the next process call. This process is repeated till we      */
  258|       |    /* get a start code                                                      */
  259|       |    /*-----------------------------------------------------------------------*/
  260|  22.5k|    if(SC_NOT_FOUND == i4_status)
  ------------------
  |  |   51|  22.5k|#define SC_NOT_FOUND (-1)
  ------------------
  |  Branch (260:8): [True: 22, False: 22.5k]
  ------------------
  261|     22|    {
  262|     22|        *pu4_bytes_consumed += u4_bytes_consumed_temp;
  263|     22|        return (i4_status);
  264|     22|    }
  265|  22.5k|    else
  266|  22.5k|    {
  267|       |        /*-------------------------------------------------------------------*/
  268|       |        /* If start code found then proceed with bitstream extraction        */
  269|       |        /*-------------------------------------------------------------------*/
  270|  22.5k|        *pu4_bytes_consumed += u4_bytes_consumed_temp;
  271|  22.5k|        return (i4_status);
  272|  22.5k|    }
  273|  22.5k|}
isvcd_get_annex_b_nal_unit:
  312|  1.20M|{
  313|  1.20M|    nal_unit_t *ps_nal_unit = (nal_unit_t *) pv_nal_unit;
  314|  1.20M|    WORD32 i4_status, i4_nal_start_flag = SVCD_FALSE;
  ------------------
  |  |   45|  1.20M|#define SVCD_FALSE 0
  ------------------
  315|       |
  316|       |    /*-----------------------------------------------------------------------*/
  317|       |    /* Initialization                                                        */
  318|       |    /*-----------------------------------------------------------------------*/
  319|  1.20M|    *pu4_bytes_consumed = 0;
  320|  1.20M|    *pi4_more_data_flag = SVCD_TRUE;
  ------------------
  |  |   46|  1.20M|#define SVCD_TRUE 1
  ------------------
  321|       |
  322|       |    /*------------------------ check ----------------------------------------*/
  323|       |    /* Assumptions is that this fucntion should not be called with this state*/
  324|       |    /* hence it is responsibility of the caller to reset the state after the */
  325|       |    /* NAL_END.                                                              */
  326|       |    /*-----------------------------------------------------------------------*/
  327|  1.20M|    if(NAL_END == *pi4_state)
  ------------------
  |  Branch (327:8): [True: 97, False: 1.20M]
  ------------------
  328|     97|    {
  329|     97|        return i4_nal_start_flag;
  330|     97|    }
  331|       |
  332|       |    /*-----------------------------------------------------------------------*/
  333|       |    /* ps_nal_unit->apu1_bufs[0] is expected to point to start of buffer of  */
  334|       |    /* current NAL unit of the current process call. If a NAL unit is frag-  */
  335|       |    /* -mented across multiple process call then this buffer should point to */
  336|       |    /* start address of buffers. But when start of NAL is present in the     */
  337|       |    /* buffer of current process call then ps_nal_unit->apu1_bufs[0] is      */
  338|       |    /* expected to point to start adress of NAL unit (should be pointing to) */
  339|       |    /* NAL header)                                                           */
  340|       |    /*-----------------------------------------------------------------------*/
  341|  1.20M|    ps_nal_unit->pu1_bufs = pu1_buf_start + i4_cur_pos;
  342|       |
  343|  1.20M|    if(NAL_START == *pi4_state)
  ------------------
  |  Branch (343:8): [True: 1.20M, False: 26]
  ------------------
  344|  1.20M|    {
  345|  1.20M|        if(0 != *pi4_zero_byte_cnt)
  ------------------
  |  Branch (345:12): [True: 0, False: 1.20M]
  ------------------
  346|      0|        {
  347|      0|            return i4_nal_start_flag;
  348|      0|        }
  349|  1.20M|        i4_nal_start_flag = SVCD_TRUE;
  ------------------
  |  |   46|  1.20M|#define SVCD_TRUE 1
  ------------------
  350|  1.20M|        ps_nal_unit->i4_num_bufs = 1;
  351|  1.20M|        ps_nal_unit->i4_buf_sizes = 0;
  352|  1.20M|        *pi4_state = FIND_NAL_END;
  353|  1.20M|    }
  354|       |
  355|  1.20M|    i4_status = isvcd_nal_find_start_code(pu1_buf_start, i4_cur_pos, i4_max_num_bytes,
  356|  1.20M|                                          pi4_zero_byte_cnt, pu4_bytes_consumed);
  357|       |
  358|  1.20M|    if(SC_NOT_FOUND == i4_status)
  ------------------
  |  |   51|  1.20M|#define SC_NOT_FOUND (-1)
  ------------------
  |  Branch (358:8): [True: 34.2k, False: 1.17M]
  ------------------
  359|  34.2k|    {
  360|       |        /*-------------------------------------------------------------------*/
  361|       |        /* If start code is not found then there are 2 possibilities         */
  362|       |        /* 1. We are in the middle of decoding the start code. This means    */
  363|       |        /*    that we might have decoded the one or 2 zeroes of the start    */
  364|       |        /*    code. In such cases, we should not consume these bytes. Though */
  365|       |        /*    doing so we might encounter spurious cases where 0's are not   */
  366|       |        /*    actually corresponds to start code but these will not harm us  */
  367|       |        /* 2. Not of above case. Straightforward one                         */
  368|       |        /*-------------------------------------------------------------------*/
  369|  34.2k|        ps_nal_unit->i4_buf_sizes = *pu4_bytes_consumed;
  370|  34.2k|        *pi4_more_data_flag = SVCD_FALSE;
  ------------------
  |  |   45|  34.2k|#define SVCD_FALSE 0
  ------------------
  371|       |
  372|  34.2k|        return (i4_nal_start_flag);
  373|  34.2k|    }
  374|  1.17M|    else
  375|  1.17M|    {
  376|       |        /*-------------------------------------------------------------------*/
  377|       |        /* If NAL END is found then increment the bytes consumed appropriatly*/
  378|       |        /* reset the zero byte counter                                       */
  379|       |        /*-------------------------------------------------------------------*/
  380|  1.17M|        *pi4_state = NAL_END;
  381|  1.17M|        ps_nal_unit->i4_buf_sizes = *pu4_bytes_consumed - 1;
  382|  1.17M|        *pi4_zero_byte_cnt = 0;
  383|  1.17M|        return (i4_nal_start_flag);
  384|  1.17M|    }
  385|  1.20M|}
isvcd_nal_rbsp_to_sodb:
  413|   552k|{
  414|   552k|    UWORD32 u4_last_word_pos;
  415|   552k|    UWORD32 u4_word, u4_max_bit_offset;
  416|   552k|    UWORD8 i4_num_bits;
  417|   552k|    WORD32 i4_i;
  418|   552k|    WORD64 i8_nal_len;
  419|   552k|    UWORD32 *pu4_buf;
  420|       |
  421|   552k|    if(0 >= i4_nal_len_in_bytes)
  ------------------
  |  Branch (421:8): [True: 14.2k, False: 537k]
  ------------------
  422|  14.2k|    {
  423|  14.2k|        return (0);
  424|  14.2k|    }
  425|       |
  426|       |    /* Get offset in bits */
  427|   537k|    i8_nal_len = (WORD64) i4_nal_len_in_bytes << 3;
  428|   537k|    u4_max_bit_offset = (UWORD32) i8_nal_len;
  429|       |
  430|       |    /* If NAL is coded in CABAC then SODB */
  431|       |    /* length has to account for CABAC    */
  432|       |    /* ZERO WORDS also                    */
  433|   537k|    if(1 == u1_ecd_mode)
  ------------------
  |  Branch (433:8): [True: 62.8k, False: 474k]
  ------------------
  434|  62.8k|    {
  435|  62.8k|        return (u4_max_bit_offset);
  436|  62.8k|    }
  437|       |
  438|       |    /* Calculate the position of last word */
  439|   474k|    u4_last_word_pos = i4_nal_len_in_bytes >> 2;
  440|       |
  441|       |    /* Load the last word                 */
  442|   474k|    i4_i = i4_nal_len_in_bytes & 0x03;
  443|   474k|    if(0 != i4_i)
  ------------------
  |  Branch (443:8): [True: 345k, False: 129k]
  ------------------
  444|   345k|    {
  445|   345k|        pu4_buf = (UWORD32 *) pu1_buf;
  446|   345k|        pu4_buf += u4_last_word_pos;
  447|   345k|        u4_word = *pu4_buf;
  448|   345k|        i4_num_bits = i4_i << 3;
  449|   345k|        u4_word >>= (32 - i4_num_bits);
  450|   345k|    }
  451|   129k|    else
  452|   129k|    {
  453|   129k|        pu4_buf = (UWORD32 *) pu1_buf;
  454|   129k|        pu4_buf += (u4_last_word_pos - 1);
  455|   129k|        u4_word = *pu4_buf;
  456|   129k|        i4_num_bits = 32;
  457|   129k|    }
  458|       |
  459|       |    /* Search for RBSP stop bit          */
  460|   474k|    do
  461|   500k|    {
  462|  2.36M|        for(i4_i = 0; (i4_i < i4_num_bits) && !CHECKBIT(u4_word, i4_i); i4_i++)
  ------------------
  |  |   54|  2.33M|#define CHECKBIT(a,i) ((a) &  (1 << i))
  ------------------
  |  Branch (462:23): [True: 2.33M, False: 27.5k]
  |  Branch (462:47): [True: 1.86M, False: 472k]
  ------------------
  463|  1.86M|            ;
  464|       |
  465|   500k|        u4_max_bit_offset -= i4_i;
  466|       |
  467|       |        /* RBSP stop bit is found then   */
  468|       |        /* come out of the loop          */
  469|   500k|        if(0 != CHECKBIT(u4_word, i4_i))
  ------------------
  |  |   54|   500k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  ------------------
  |  Branch (469:12): [True: 472k, False: 27.5k]
  ------------------
  470|   472k|        {
  471|       |            /* Remove RBSP stop bit */
  472|   472k|            u4_max_bit_offset -= 1;
  473|   472k|            break;
  474|   472k|        }
  475|       |
  476|  27.5k|        pu4_buf -= 1;
  477|  27.5k|        u4_word = *pu4_buf;
  478|  27.5k|        i4_num_bits = 32;
  479|  27.5k|    } while(u4_max_bit_offset > 0);
  ------------------
  |  Branch (479:13): [True: 25.2k, False: 2.28k]
  ------------------
  480|       |
  481|   474k|    return (u4_max_bit_offset);
  482|   537k|}
isvcd_reset_emulation_ctxt:
  510|  1.80M|{
  511|  1.80M|    emulation_prevent_ctxt_t *ps_emulation_ctxt = (emulation_prevent_ctxt_t *) pv_emulation_ctxt;
  512|       |
  513|       |    /*! Reset the emulation prevention context */
  514|  1.80M|    ps_emulation_ctxt->i4_state = NOT_STUFFED_BYTE;
  515|  1.80M|    ps_emulation_ctxt->i4_zeroes_cnt = 0;
  516|  1.80M|    ps_emulation_ctxt->u4_bytes_in_word = 0;
  517|  1.80M|    ps_emulation_ctxt->u4_word = 0;
  518|  1.80M|}
isvcd_nal_byte_swap_emulation:
  564|  1.46M|{
  565|  1.46M|    UWORD32 u4_i, u4_num_bytes, u4_offset;
  566|  1.46M|    UWORD8 u1_cur_byte;
  567|  1.46M|    emulation_prevent_ctxt_t *ps_emulation_ctxt = (emulation_prevent_ctxt_t *) pv_emulation_ctxt;
  568|       |
  569|  1.46M|    u4_offset = ps_emulation_ctxt->u4_bytes_in_word;
  570|  1.46M|    u4_num_bytes = ps_emulation_ctxt->u4_bytes_in_word;
  571|       |
  572|  70.5M|    for(u4_i = 0; u4_i < u4_in_len; u4_i++)
  ------------------
  |  Branch (572:19): [True: 69.1M, False: 1.33M]
  ------------------
  573|  69.1M|    {
  574|  69.1M|        UWORD8 u1_cur_byte_emu, u1_cur_byte_sc;
  575|  69.1M|        UWORD64 u8_sft_word;
  576|       |
  577|  69.1M|        u1_cur_byte = *pu1_in_stream++;
  578|  69.1M|        u1_cur_byte_emu = (EMULATION_PREVENTION_BYTE == u1_cur_byte);
  ------------------
  |  |   57|  69.1M|#define EMULATION_PREVENTION_BYTE (0x03)
  ------------------
  579|  69.1M|        u1_cur_byte_sc = (START_CODE_BYTE == u1_cur_byte);
  ------------------
  |  |   55|  69.1M|#define START_CODE_BYTE (0x01)
  ------------------
  580|       |
  581|  69.1M|        if((ps_emulation_ctxt->i4_zeroes_cnt >= i4_0s_bfr_sc) & (u1_cur_byte_emu | u1_cur_byte_sc) &
  ------------------
  |  Branch (581:12): [True: 142k, False: 69.0M]
  ------------------
  582|  69.1M|           (NOT_STUFFED_BYTE == ps_emulation_ctxt->i4_state))
  583|   142k|        {
  584|   142k|            if(u1_cur_byte_sc)
  ------------------
  |  Branch (584:16): [True: 136k, False: 5.68k]
  ------------------
  585|   136k|            {
  586|   136k|                break;
  587|   136k|            }
  588|  5.68k|            ps_emulation_ctxt->i4_zeroes_cnt = 0;
  589|  5.68k|            ps_emulation_ctxt->i4_state = STUFFED_BYTE;
  590|  5.68k|            continue;
  591|   142k|        }
  592|       |
  593|  69.0M|        u8_sft_word = (UWORD64) ps_emulation_ctxt->u4_word << 8;
  594|  69.0M|        ps_emulation_ctxt->u4_word = (UWORD32) (u8_sft_word | u1_cur_byte);
  595|  69.0M|        ps_emulation_ctxt->u4_bytes_in_word++;
  596|  69.0M|        u4_num_bytes++;
  597|  69.0M|        ps_emulation_ctxt->i4_zeroes_cnt++;
  598|  69.0M|        if(u1_cur_byte != 0x00)
  ------------------
  |  Branch (598:12): [True: 45.0M, False: 23.9M]
  ------------------
  599|  45.0M|        {
  600|  45.0M|            ps_emulation_ctxt->i4_zeroes_cnt = 0;
  601|  45.0M|        }
  602|       |
  603|  69.0M|        if((u4_num_bytes & 0x03) == 0x00)
  ------------------
  |  Branch (603:12): [True: 16.7M, False: 52.3M]
  ------------------
  604|  16.7M|        {
  605|  16.7M|            *pu4_out_stream = ps_emulation_ctxt->u4_word;
  606|  16.7M|            ps_emulation_ctxt->u4_bytes_in_word = 0;
  607|  16.7M|            pu4_out_stream++;
  608|  16.7M|        }
  609|       |
  610|  69.0M|        ps_emulation_ctxt->i4_state = NOT_STUFFED_BYTE;
  611|  69.0M|    }
  612|       |
  613|  1.46M|    if(ps_emulation_ctxt->u4_bytes_in_word)
  ------------------
  |  Branch (613:8): [True: 1.15M, False: 313k]
  ------------------
  614|  1.15M|    {
  615|  1.15M|        UWORD64 temp_out_stream = (UWORD64) ps_emulation_ctxt->u4_word
  616|  1.15M|                                  << ((4 - ps_emulation_ctxt->u4_bytes_in_word) << 3);
  617|  1.15M|        *pu4_out_stream = (UWORD32) temp_out_stream;
  618|  1.15M|    }
  619|       |
  620|  1.46M|    *pu4_out_len = (u4_num_bytes - u4_offset);
  621|  1.46M|    return ((u4_num_bytes & 0xFFFFFFFC));
  622|  1.46M|}
isvcd_set_default_nal_prms:
  650|  1.42M|{
  651|  1.42M|    nal_prms_t *ps_nal_prms;
  652|  1.42M|    ps_nal_prms = (nal_prms_t *) pv_nal_prms;
  653|       |
  654|       |    /* Set default values */
  655|  1.42M|    ps_nal_prms->i4_dependency_id = 0;
  656|  1.42M|    ps_nal_prms->i4_derived_nal_type = 0xFF;
  657|  1.42M|    ps_nal_prms->i4_idr_pic_flag = SVCD_FALSE;
  ------------------
  |  |   45|  1.42M|#define SVCD_FALSE 0
  ------------------
  658|  1.42M|    ps_nal_prms->i4_nal_header_len = 0;
  659|  1.42M|    ps_nal_prms->i4_nal_ref_idc = 0xFF;
  660|  1.42M|    ps_nal_prms->i4_nal_unit_type = 0xFF;
  661|  1.42M|    ps_nal_prms->i4_no_int_lyr_pred = 1;
  662|  1.42M|    ps_nal_prms->i4_priority_id = 0;
  663|  1.42M|    ps_nal_prms->i4_quality_id = 0;
  664|  1.42M|    ps_nal_prms->i4_discard_flag = 0;
  665|  1.42M|    ps_nal_prms->i4_dqid = 0;
  666|  1.42M|    ps_nal_prms->i4_use_ref_base_pic_flag = 0;
  667|  1.42M|    ps_nal_prms->i4_temporal_id = 0;
  668|  1.42M|    ps_nal_prms->i4_idr_pic_num = 0;
  669|  1.42M|    ps_nal_prms->u2_frm_num = 0;
  670|  1.42M|    ps_nal_prms->i4_poc_lsb = 0;
  671|  1.42M|    ps_nal_prms->i4_delta_poc_bot = 0;
  672|  1.42M|    ps_nal_prms->ai4_delta_poc[0] = 0;
  673|  1.42M|    ps_nal_prms->ai4_delta_poc[1] = 0;
  674|  1.42M|    ps_nal_prms->u1_pps_id = 0;
  675|  1.42M|}
isvcd_dec_nal_hdr:
  705|  1.40M|{
  706|  1.40M|    nal_prms_t *ps_nal_prms;
  707|  1.40M|    nal_prms_t *ps_prefix_nal_prms;
  708|  1.40M|    nal_buf_t *ps_prefix_nal_buf;
  709|  1.40M|    dec_bit_stream_t s_stream_ctxt = {0};
  710|  1.40M|    WORD32 i4_forbidden_zero_bit;
  711|       |
  712|       |    /* byte swapping */
  713|  1.40M|    UWORD8 *pu1_buf = (UWORD8 *) pv_nal_header_buf;
  714|  1.40M|    UWORD8 *pu1_src = (UWORD8 *) pv_buf_ptr;
  715|       |
  716|  1.40M|    ps_nal_prms = (nal_prms_t *) pv_nal_prms;
  717|  1.40M|    ps_prefix_nal_prms = (nal_prms_t *) pv_prefix_nal_prms;
  718|  1.40M|    ps_prefix_nal_buf = (nal_buf_t *) pv_prefix_nal_buf;
  719|       |
  720|       |    /* The NAL header syntax elements are read through bitstream fucntions.  */
  721|       |    /* Hence bitstream context structure initializaton is needed before      */
  722|       |    /* parsing from the bitstream                                            */
  723|       |    /* Also bitstream fucntions assume the buffer is byteswapped. Hence the  */
  724|       |    /* byte swapping is also done for 4 bytes                                */
  725|  1.40M|    s_stream_ctxt.u4_ofst = 0;
  726|  1.40M|    s_stream_ctxt.pu4_buffer = pv_nal_header_buf;
  727|  1.40M|    s_stream_ctxt.u4_max_ofst = (i4_buf_size << 3);
  728|       |
  729|  1.40M|    *pu4_err_code = 0;
  730|       |
  731|       |    /* Check the size of bitstream buffer */
  732|  1.40M|    if(s_stream_ctxt.u4_max_ofst < 8)
  ------------------
  |  Branch (732:8): [True: 7.76k, False: 1.39M]
  ------------------
  733|  7.76k|    {
  734|  7.76k|        *pu4_err_code = (UWORD32) NAL_INSUFFICIENT_DATA;
  735|  7.76k|        return;
  736|  7.76k|    }
  737|       |
  738|  1.39M|    if(s_stream_ctxt.u4_max_ofst >= 32)
  ------------------
  |  Branch (738:8): [True: 1.12M, False: 269k]
  ------------------
  739|  1.12M|    {
  740|  1.12M|        *pu1_buf++ = *(pu1_src + 3);
  741|  1.12M|        *pu1_buf++ = *(pu1_src + 2);
  742|  1.12M|        *pu1_buf++ = *(pu1_src + 1);
  743|  1.12M|        *pu1_buf++ = *pu1_src;
  744|  1.12M|    }
  745|   269k|    else
  746|   269k|    {
  747|   269k|        *pu1_buf++ = *pu1_src;
  748|   269k|    }
  749|       |
  750|       |    /*-----------------------------------------------------------------------*/
  751|       |    /*! Parse the NAL header and update the NAL header structure members     */
  752|       |    /*-----------------------------------------------------------------------*/
  753|       |    /* Read forbidden 0 bit */
  754|  1.39M|    i4_forbidden_zero_bit = ih264d_get_bit_h264(&s_stream_ctxt);
  755|       |
  756|  1.39M|    if(0 != i4_forbidden_zero_bit)
  ------------------
  |  Branch (756:8): [True: 25.0k, False: 1.36M]
  ------------------
  757|  25.0k|    {
  758|  25.0k|        *pu4_err_code = (UWORD32) NAL_CORRUPT_DATA;
  759|  25.0k|        return;
  760|  25.0k|    }
  761|       |
  762|       |    /*---------------- Read NAL ref idc -----------------------------*/
  763|  1.36M|    ps_nal_prms->i4_nal_ref_idc = ih264d_get_bits_h264(&s_stream_ctxt, 2);
  764|       |
  765|       |    /*----------------- Read NAL type -------------------------------*/
  766|  1.36M|    ps_nal_prms->i4_nal_unit_type = ih264d_get_bits_h264(&s_stream_ctxt, 5);
  767|  1.36M|    if(ps_nal_prms->i4_nal_unit_type > CODED_SLICE_EXTENSION_NAL)
  ------------------
  |  |   66|  1.36M|#define CODED_SLICE_EXTENSION_NAL 20
  ------------------
  |  Branch (767:8): [True: 4.66k, False: 1.36M]
  ------------------
  768|  4.66k|    {
  769|  4.66k|        *pu4_err_code = (UWORD32) NAL_CORRUPT_DATA;
  770|  4.66k|        return;
  771|  4.66k|    }
  772|  1.36M|    if(ACCESS_UNIT_DELIMITER_RBSP == ps_nal_prms->i4_nal_unit_type)
  ------------------
  |  |  332|  1.36M|#define ACCESS_UNIT_DELIMITER_RBSP      9
  ------------------
  |  Branch (772:8): [True: 11.0k, False: 1.35M]
  ------------------
  773|  11.0k|    {
  774|  11.0k|        ps_nal_prms->i4_derived_nal_type = NON_VCL_NAL;
  775|  11.0k|        return;
  776|  11.0k|    }
  777|       |
  778|       |    /* set idr pic flag */
  779|  1.35M|    if(IDR_SLICE_NAL == ps_nal_prms->i4_nal_unit_type)
  ------------------
  |  |  328|  1.35M|#define IDR_SLICE_NAL                   5
  ------------------
  |  Branch (779:8): [True: 400k, False: 951k]
  ------------------
  780|   400k|    {
  781|   400k|        ps_nal_prms->i4_idr_pic_flag = SVCD_TRUE;
  ------------------
  |  |   46|   400k|#define SVCD_TRUE 1
  ------------------
  782|   400k|    }
  783|   951k|    else
  784|   951k|    {
  785|   951k|        ps_nal_prms->i4_idr_pic_flag = SVCD_FALSE;
  ------------------
  |  |   45|   951k|#define SVCD_FALSE 0
  ------------------
  786|   951k|    }
  787|       |
  788|       |    /*----------------- Read SVC extension NAL header ---------------*/
  789|  1.35M|    if(CODED_SLICE_EXTENSION_NAL == ps_nal_prms->i4_nal_unit_type ||
  ------------------
  |  |   66|  1.35M|#define CODED_SLICE_EXTENSION_NAL 20
  ------------------
  |  Branch (789:8): [True: 138k, False: 1.21M]
  ------------------
  790|  1.21M|       PREFIX_UNIT_NAL == ps_nal_prms->i4_nal_unit_type)
  ------------------
  |  |   64|  1.21M|#define PREFIX_UNIT_NAL 14
  ------------------
  |  Branch (790:8): [True: 7.55k, False: 1.20M]
  ------------------
  791|   146k|    {
  792|   146k|        WORD32 i4_svc_extension_flag, i4_idr_flag;
  793|       |
  794|       |        /* check the size of the buffer */
  795|   146k|        if(s_stream_ctxt.u4_max_ofst < 32)
  ------------------
  |  Branch (795:12): [True: 2.79k, False: 143k]
  ------------------
  796|  2.79k|        {
  797|  2.79k|            *pu4_err_code = (UWORD32) NAL_INSUFFICIENT_DATA;
  798|  2.79k|            return;
  799|  2.79k|        }
  800|       |
  801|   143k|        i4_svc_extension_flag = ih264d_get_bit_h264(&s_stream_ctxt);
  802|   143k|        UNUSED(i4_svc_extension_flag);
  ------------------
  |  |   45|   143k|#define UNUSED(x) ((void)(x))
  ------------------
  803|       |
  804|   143k|        i4_idr_flag = ih264d_get_bit_h264(&s_stream_ctxt);
  805|       |
  806|       |        /* Set idr pic flag based on idr flag */
  807|   143k|        if(1 == i4_idr_flag)
  ------------------
  |  Branch (807:12): [True: 107k, False: 36.2k]
  ------------------
  808|   107k|        {
  809|   107k|            ps_nal_prms->i4_idr_pic_flag = SVCD_TRUE;
  ------------------
  |  |   46|   107k|#define SVCD_TRUE 1
  ------------------
  810|   107k|        }
  811|  36.2k|        else
  812|  36.2k|        {
  813|  36.2k|            ps_nal_prms->i4_idr_pic_flag = SVCD_FALSE;
  ------------------
  |  |   45|  36.2k|#define SVCD_FALSE 0
  ------------------
  814|  36.2k|        }
  815|       |
  816|       |        /* parse priorit id */
  817|   143k|        ps_nal_prms->i4_priority_id = ih264d_get_bits_h264(&s_stream_ctxt, 6);
  818|       |
  819|       |        /* parse the no inter layer prediction flag */
  820|   143k|        ps_nal_prms->i4_no_int_lyr_pred = ih264d_get_bit_h264(&s_stream_ctxt);
  821|       |
  822|       |        /* parse dependency id */
  823|   143k|        ps_nal_prms->i4_dependency_id = ih264d_get_bits_h264(&s_stream_ctxt, 3);
  824|       |
  825|       |        /* parse quality id */
  826|   143k|        ps_nal_prms->i4_quality_id = ih264d_get_bits_h264(&s_stream_ctxt, 4);
  827|       |
  828|   143k|        if((ps_nal_prms->i4_quality_id > 0) || (ps_nal_prms->i4_dependency_id > 2))
  ------------------
  |  Branch (828:12): [True: 6.08k, False: 137k]
  |  Branch (828:48): [True: 460, False: 137k]
  ------------------
  829|  6.54k|        {
  830|  6.54k|            *pu4_err_code = (UWORD32) NAL_CORRUPT_DATA;
  831|  6.54k|            return;
  832|  6.54k|        }
  833|       |        /* parse temporal id */
  834|   137k|        ps_nal_prms->i4_temporal_id = ih264d_get_bits_h264(&s_stream_ctxt, 3);
  835|       |
  836|       |        /* parse use ref base pic flag */
  837|   137k|        ps_nal_prms->i4_use_ref_base_pic_flag = ih264d_get_bit_h264(&s_stream_ctxt);
  838|       |
  839|   137k|        if(0 != ps_nal_prms->i4_use_ref_base_pic_flag)
  ------------------
  |  Branch (839:12): [True: 1.72k, False: 135k]
  ------------------
  840|  1.72k|        {
  841|  1.72k|            *pu4_err_code = (UWORD32) NAL_CORRUPT_DATA;
  842|  1.72k|            return;
  843|  1.72k|        }
  844|       |        /* parse discrad flag */
  845|   135k|        ps_nal_prms->i4_discard_flag = ih264d_get_bit_h264(&s_stream_ctxt);
  846|       |
  847|       |        /* parse the reserved bits */
  848|   135k|        ih264d_get_bits_h264(&s_stream_ctxt, 3);
  849|   135k|    }
  850|       |
  851|       |    /* update NAL hedaer length in bytes */
  852|  1.34M|    ps_nal_prms->i4_nal_header_len = s_stream_ctxt.u4_ofst >> 3;
  853|       |
  854|       |    /*************************************************************************/
  855|       |    /* PREFIX NAL UNIT ASSOCIATION WITH ASSOCIATED NAL UNIT                  */
  856|       |    /*************************************************************************/
  857|       |
  858|       |    /* if current NAL is not a AVC NAL unit then */
  859|       |    /* discard the prefix NAL unit if present    */
  860|  1.34M|    if(CODED_SLICE_EXTENSION_NAL == ps_nal_prms->i4_nal_unit_type)
  ------------------
  |  |   66|  1.34M|#define CODED_SLICE_EXTENSION_NAL 20
  ------------------
  |  Branch (860:8): [True: 129k, False: 1.21M]
  ------------------
  861|   129k|    {
  862|   129k|        isvcd_nal_buf_reset(ps_prefix_nal_buf);
  863|   129k|    }
  864|       |
  865|  1.34M|    if(SVCD_TRUE == ps_prefix_nal_buf->i4_valid_flag)
  ------------------
  |  |   46|  1.34M|#define SVCD_TRUE 1
  ------------------
  |  Branch (865:8): [True: 3.82k, False: 1.33M]
  ------------------
  866|  3.82k|    {
  867|       |        /* Copy the required parameters from the prefix NAL unit */
  868|  3.82k|        ps_nal_prms->i4_dependency_id = ps_prefix_nal_prms->i4_dependency_id;
  869|  3.82k|        ps_nal_prms->i4_quality_id = ps_prefix_nal_prms->i4_quality_id;
  870|  3.82k|        ps_nal_prms->i4_priority_id = ps_prefix_nal_prms->i4_priority_id;
  871|  3.82k|        ps_nal_prms->i4_temporal_id = ps_prefix_nal_prms->i4_temporal_id;
  872|  3.82k|        ps_nal_prms->i4_no_int_lyr_pred = ps_prefix_nal_prms->i4_no_int_lyr_pred;
  873|  3.82k|        ps_nal_prms->i4_use_ref_base_pic_flag = ps_prefix_nal_prms->i4_use_ref_base_pic_flag;
  874|  3.82k|        ps_nal_prms->i4_discard_flag = ps_prefix_nal_prms->i4_discard_flag;
  875|  3.82k|    }
  876|       |
  877|       |    /*-----------------------------------------------------------------------*/
  878|       |    /* Set the derived NAL unit type and also update the DQID for VCL NAL    */
  879|       |    /*  units                                                                */
  880|       |    /*-----------------------------------------------------------------------*/
  881|  1.34M|    if(CODED_SLICE_EXTENSION_NAL == ps_nal_prms->i4_nal_unit_type ||
  ------------------
  |  |   66|  1.34M|#define CODED_SLICE_EXTENSION_NAL 20
  ------------------
  |  Branch (881:8): [True: 129k, False: 1.21M]
  ------------------
  882|  1.21M|       SLICE_NAL == ps_nal_prms->i4_nal_unit_type ||
  ------------------
  |  |  324|  1.21M|#define SLICE_NAL                       1
  ------------------
  |  Branch (882:8): [True: 143k, False: 1.06M]
  ------------------
  883|  1.06M|       IDR_SLICE_NAL == ps_nal_prms->i4_nal_unit_type ||
  ------------------
  |  |  328|  1.06M|#define IDR_SLICE_NAL                   5
  ------------------
  |  Branch (883:8): [True: 400k, False: 668k]
  ------------------
  884|   668k|       PREFIX_UNIT_NAL == ps_nal_prms->i4_nal_unit_type)
  ------------------
  |  |   64|   668k|#define PREFIX_UNIT_NAL 14
  ------------------
  |  Branch (884:8): [True: 5.88k, False: 662k]
  ------------------
  885|   679k|    {
  886|   679k|        ps_nal_prms->i4_derived_nal_type = VCL_NAL;
  887|       |
  888|       |        /* calculate the DQID and modified DQID */
  889|   679k|        ps_nal_prms->i4_dqid = (ps_nal_prms->i4_dependency_id << 4) + ps_nal_prms->i4_quality_id;
  890|   679k|    }
  891|   662k|    else
  892|   662k|    {
  893|   662k|        ps_nal_prms->i4_derived_nal_type = NON_VCL_NAL;
  894|   662k|    }
  895|  1.34M|}
isvcd_parse_part_slice_hdr:
  928|   573k|{
  929|   573k|    UWORD32 u4_slice_type;
  930|   573k|    dec_seq_params_t *ps_sps = (dec_seq_params_t *) pv_sps;
  931|   573k|    dec_pic_params_t *ps_pps = (dec_pic_params_t *) pv_pps;
  932|   573k|    dec_bit_stream_t s_stream_ctxt = {0};
  933|   573k|    dec_bit_stream_t *ps_stream_ctxt;
  934|   573k|    UWORD32 *pu4_bitstrm_buf;
  935|   573k|    UWORD32 *pu4_bitstrm_ofst;
  936|       |
  937|   573k|    *pi4_sps_pps_status = NAL_CORRUPT_DATA;
  938|       |    /* Perform the emulation prevention and byte swap */
  939|   573k|    {
  940|   573k|        emulation_prevent_ctxt_t s_emulation_ctxt = {0};
  941|   573k|        WORD32 i4_size, i4_temp;
  942|       |
  943|   573k|        isvcd_reset_emulation_ctxt((void *) &s_emulation_ctxt);
  944|   573k|        i4_size = MIN(i4_input_buf_size, HEADER_BUFFER_LEN_BEFORE_EP);
  ------------------
  |  |   61|   573k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 404k, False: 168k]
  |  |  ------------------
  ------------------
  945|       |
  946|   573k|        isvcd_nal_byte_swap_emulation((UWORD32 *) pu1_temp_buf, (UWORD32 *) &i4_temp, pu1_input_buf,
  947|   573k|                                      (UWORD32) i4_size, NUM_OF_ZERO_BYTES_BEFORE_START_CODE,
  ------------------
  |  |   54|   573k|#define NUM_OF_ZERO_BYTES_BEFORE_START_CODE (2)
  ------------------
  948|   573k|                                      &s_emulation_ctxt);
  949|       |
  950|       |        /* Initialize the stream context structure */
  951|   573k|        s_stream_ctxt.pu4_buffer = (UWORD32 *) pu1_temp_buf;
  952|   573k|        s_stream_ctxt.u4_ofst = 0;
  953|   573k|        s_stream_ctxt.u4_max_ofst = (i4_size << 3);
  954|   573k|    }
  955|       |
  956|   573k|    ps_stream_ctxt = &s_stream_ctxt;
  957|       |
  958|       |    /* Parse the first mb address in slice */
  959|   573k|    pu4_bitstrm_buf = ps_stream_ctxt->pu4_buffer;
  960|   573k|    pu4_bitstrm_ofst = &ps_stream_ctxt->u4_ofst;
  961|   573k|    ps_nal_prms->u4_first_mb_addr = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  962|   573k|    if(ps_nal_prms->u4_first_mb_addr >= (MAX_MBS_LEVEL_51))
  ------------------
  |  |  307|   573k|#define MAX_MBS_LEVEL_51 36864
  ------------------
  |  Branch (962:8): [True: 19.5k, False: 553k]
  ------------------
  963|  19.5k|    {
  964|  19.5k|        return ERROR_CORRUPTED_SLICE;
  965|  19.5k|    }
  966|       |    /* Parse slice type */
  967|   553k|    u4_slice_type = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  968|       |
  969|   553k|    if(u4_slice_type > 9) return ERROR_INV_SLC_TYPE_T;
  ------------------
  |  Branch (969:8): [True: 28.3k, False: 525k]
  ------------------
  970|       |
  971|       |    /* Check the validity of slice prms */
  972|   525k|    switch(u4_slice_type)
  973|   525k|    {
  974|   290k|        case 0:
  ------------------
  |  Branch (974:9): [True: 290k, False: 234k]
  ------------------
  975|   312k|        case 5:
  ------------------
  |  Branch (975:9): [True: 21.9k, False: 503k]
  ------------------
  976|   312k|            u4_slice_type = P_SLICE;
  ------------------
  |  |  368|   312k|#define P_SLICE  0
  ------------------
  977|       |            /* P slice */
  978|   312k|            break;
  979|   138k|        case 1:
  ------------------
  |  Branch (979:9): [True: 138k, False: 387k]
  ------------------
  980|   146k|        case 6:
  ------------------
  |  Branch (980:9): [True: 8.04k, False: 517k]
  ------------------
  981|   146k|            u4_slice_type = B_SLICE;
  ------------------
  |  |  369|   146k|#define B_SLICE  1
  ------------------
  982|       |            /* B slice */
  983|   146k|            break;
  984|  39.2k|        case 2:
  ------------------
  |  Branch (984:9): [True: 39.2k, False: 486k]
  ------------------
  985|  47.1k|        case 7:
  ------------------
  |  Branch (985:9): [True: 7.93k, False: 517k]
  ------------------
  986|       |            /* I slice */
  987|  47.1k|            u4_slice_type = I_SLICE;
  ------------------
  |  |  370|  47.1k|#define I_SLICE  2
  ------------------
  988|  47.1k|            break;
  989|  19.4k|        default:
  ------------------
  |  Branch (989:9): [True: 19.4k, False: 505k]
  ------------------
  990|  19.4k|            break;
  991|   525k|    }
  992|       |
  993|       |    /* Parse the pps id */
  994|   525k|    ps_nal_prms->u1_pps_id = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  995|   525k|    if(ps_nal_prms->u1_pps_id & MASK_ERR_PIC_SET_ID) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  |  527|   525k|#define MASK_ERR_PIC_SET_ID   (0xFFFFFF00)
  ------------------
  |  Branch (995:8): [True: 0, False: 525k]
  ------------------
  996|       |
  997|       |    /* validate pps id */
  998|   525k|    ps_pps += ps_nal_prms->u1_pps_id;
  999|   525k|    if(0 == ps_pps->u1_is_valid)
  ------------------
  |  Branch (999:8): [True: 69.1k, False: 456k]
  ------------------
 1000|  69.1k|    {
 1001|  69.1k|        return NOT_OK;
  ------------------
  |  |  116|  69.1k|#define NOT_OK    -1
  ------------------
 1002|  69.1k|    }
 1003|       |    /* Derive sps id */
 1004|   456k|    ps_sps = ps_pps->ps_sps;
 1005|       |
 1006|   456k|    ps_nal_prms->u1_sps_id = ps_sps->u1_seq_parameter_set_id;
 1007|   456k|    if(CODED_SLICE_EXTENSION_NAL == ps_nal_prms->i4_nal_unit_type)
  ------------------
  |  |   66|   456k|#define CODED_SLICE_EXTENSION_NAL 20
  ------------------
  |  Branch (1007:8): [True: 95.9k, False: 360k]
  ------------------
 1008|  95.9k|    {
 1009|  95.9k|        ps_sps += MAX_NUM_SEQ_PARAMS;
  ------------------
  |  |  521|  95.9k|#define MAX_NUM_SEQ_PARAMS 32
  ------------------
 1010|  95.9k|        ps_nal_prms->u1_sps_id = ps_sps->u1_seq_parameter_set_id;
 1011|  95.9k|        ps_nal_prms->u1_sps_id += MAX_NUM_SEQ_PARAMS;
  ------------------
  |  |  521|  95.9k|#define MAX_NUM_SEQ_PARAMS 32
  ------------------
 1012|  95.9k|    }
 1013|       |
 1014|   456k|    if(NULL == ps_sps)
  ------------------
  |  Branch (1014:8): [True: 0, False: 456k]
  ------------------
 1015|      0|    {
 1016|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 1017|      0|    }
 1018|   456k|    if(FALSE == ps_sps->u1_is_valid)
  ------------------
  |  |  592|   456k|#define FALSE   0
  ------------------
  |  Branch (1018:8): [True: 12.7k, False: 443k]
  ------------------
 1019|  12.7k|    {
 1020|  12.7k|        return ERROR_INV_SLICE_HDR_T;
 1021|  12.7k|    }
 1022|   443k|    if(ps_nal_prms->u4_first_mb_addr > (ps_sps->u2_frm_ht_in_mbs * ps_sps->u2_frm_wd_in_mbs))
  ------------------
  |  Branch (1022:8): [True: 32.9k, False: 410k]
  ------------------
 1023|  32.9k|    {
 1024|  32.9k|        return ERROR_CORRUPTED_SLICE;
 1025|  32.9k|    }
 1026|   410k|    *pi4_sps_pps_status = 0;
 1027|       |
 1028|       |    /* Parse frame number */
 1029|   410k|    ps_nal_prms->u2_frm_num = ih264d_get_bits_h264(ps_stream_ctxt, ps_sps->u1_bits_in_frm_num);
 1030|       |
 1031|       |    /* IDR picture number */
 1032|   410k|    if(SVCD_TRUE == ps_nal_prms->i4_idr_pic_flag)
  ------------------
  |  |   46|   410k|#define SVCD_TRUE 1
  ------------------
  |  Branch (1032:8): [True: 321k, False: 88.9k]
  ------------------
 1033|   321k|    {
 1034|   321k|        ps_nal_prms->i4_idr_pic_num = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1035|       |
 1036|   321k|        if(ps_nal_prms->i4_idr_pic_num > 65535) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  Branch (1036:12): [True: 7.68k, False: 313k]
  ------------------
 1037|   321k|    }
 1038|       |
 1039|       |    /* Poc lsb */
 1040|   402k|    if(0 == ps_sps->u1_pic_order_cnt_type)
  ------------------
  |  Branch (1040:8): [True: 311k, False: 91.1k]
  ------------------
 1041|   311k|    {
 1042|   311k|        ps_nal_prms->i4_poc_lsb =
 1043|   311k|            ih264d_get_bits_h264(ps_stream_ctxt, ps_sps->u1_log2_max_pic_order_cnt_lsb_minus);
 1044|       |
 1045|   311k|        if(ps_nal_prms->i4_poc_lsb < 0 ||
  ------------------
  |  Branch (1045:12): [True: 0, False: 311k]
  ------------------
 1046|   311k|           ps_nal_prms->i4_poc_lsb >= ps_sps->i4_max_pic_order_cntLsb)
  ------------------
  |  Branch (1046:12): [True: 0, False: 311k]
  ------------------
 1047|      0|            return ERROR_INV_SLICE_HDR_T;
 1048|   311k|        if(SVCD_TRUE == ps_pps->u1_pic_order_present_flag)
  ------------------
  |  |   46|   311k|#define SVCD_TRUE 1
  ------------------
  |  Branch (1048:12): [True: 108k, False: 203k]
  ------------------
 1049|   108k|        {
 1050|   108k|            ps_nal_prms->i4_delta_poc_bot = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1051|   108k|        }
 1052|   311k|    }
 1053|  91.1k|    else if((1 == ps_sps->u1_pic_order_cnt_type) && (!ps_sps->u1_delta_pic_order_always_zero_flag))
  ------------------
  |  Branch (1053:13): [True: 81.6k, False: 9.49k]
  |  Branch (1053:53): [True: 73.0k, False: 8.57k]
  ------------------
 1054|  73.0k|    {
 1055|  73.0k|        ps_nal_prms->ai4_delta_poc[0] = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1056|       |
 1057|  73.0k|        if(SVCD_TRUE == ps_pps->u1_pic_order_present_flag)
  ------------------
  |  |   46|  73.0k|#define SVCD_TRUE 1
  ------------------
  |  Branch (1057:12): [True: 65.0k, False: 7.97k]
  ------------------
 1058|  65.0k|        {
 1059|  65.0k|            ps_nal_prms->ai4_delta_poc[1] = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1060|  65.0k|        }
 1061|  73.0k|    }
 1062|       |
 1063|   402k|    *pu4_err_code = 0;
 1064|   402k|    return (OK);
  ------------------
  |  |  114|   402k|#define OK        0
  ------------------
 1065|   402k|}
isvcd_get_int_tgt_lyr_attr:
 1093|   774k|{
 1094|   774k|    WORD32 i4_dep_id;
 1095|   774k|    WORD32 i4_quality_id;
 1096|   774k|    WORD32 i4_temp_id;
 1097|   774k|    WORD32 i4_prior_id;
 1098|       |
 1099|       |    /* sanity checks */
 1100|   774k|    if((NULL == ps_app_attr) || (NULL == ps_int_attr) || (NULL == ps_nal_prms))
  ------------------
  |  Branch (1100:8): [True: 0, False: 774k]
  |  Branch (1100:33): [True: 0, False: 774k]
  |  Branch (1100:58): [True: 0, False: 774k]
  ------------------
 1101|      0|    {
 1102|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 1103|      0|    }
 1104|       |
 1105|   774k|    i4_dep_id = ps_int_attr->i4_dependency_id;
 1106|   774k|    i4_quality_id = ps_int_attr->i4_quality_id;
 1107|   774k|    i4_temp_id = ps_int_attr->i4_temporal_id;
 1108|   774k|    i4_prior_id = ps_int_attr->i4_priority_id;
 1109|       |
 1110|       |    /* check for idr pic flag                                  */
 1111|       |    /* dependency & temporal id is updated only for IDR picture */
 1112|   774k|    if(SVCD_TRUE == ps_nal_prms->i4_idr_pic_flag)
  ------------------
  |  |   46|   774k|#define SVCD_TRUE 1
  ------------------
  |  Branch (1112:8): [True: 320k, False: 453k]
  ------------------
 1113|   320k|    {
 1114|   320k|        if(ps_int_attr->i4_dependency_id < ps_app_attr->i4_dependency_id)
  ------------------
  |  Branch (1114:12): [True: 93.7k, False: 226k]
  ------------------
 1115|  93.7k|        {
 1116|       |            /* update the internal attributes only if             */
 1117|       |            /* current dep_id -1 == highest dep id decoded so far */
 1118|       |            /* and quality id is equal to 0                       */
 1119|  93.7k|            if((ps_nal_prms->i4_dependency_id - 1 == ps_int_attr->i4_dependency_id) &&
  ------------------
  |  Branch (1119:16): [True: 26.5k, False: 67.1k]
  ------------------
 1120|  26.5k|               (0 == ps_nal_prms->i4_quality_id))
  ------------------
  |  Branch (1120:16): [True: 26.5k, False: 0]
  ------------------
 1121|  26.5k|            {
 1122|       |                /* Set revised target dependency id */
 1123|  26.5k|                i4_dep_id = ps_nal_prms->i4_dependency_id;
 1124|  26.5k|                i4_temp_id = ps_app_attr->i4_temporal_id;
 1125|  26.5k|                i4_prior_id = ps_app_attr->i4_priority_id;
 1126|  26.5k|            }
 1127|  93.7k|        }
 1128|   226k|        else
 1129|   226k|        {
 1130|       |            /* cases when the curr dep is greater than or equal to app dep */
 1131|   226k|            i4_dep_id = ps_app_attr->i4_dependency_id;
 1132|   226k|            i4_temp_id = ps_app_attr->i4_temporal_id;
 1133|   226k|            i4_prior_id = ps_app_attr->i4_priority_id;
 1134|   226k|        }
 1135|   320k|    }
 1136|       |
 1137|       |    /* Set quality id */
 1138|   774k|    if(i4_dep_id == ps_app_attr->i4_dependency_id)
  ------------------
  |  Branch (1138:8): [True: 599k, False: 174k]
  ------------------
 1139|   599k|    {
 1140|   599k|        i4_quality_id = ps_app_attr->i4_quality_id;
 1141|   599k|    }
 1142|   174k|    else
 1143|   174k|    {
 1144|   174k|        i4_quality_id = MAX_QUALITY_ID;
  ------------------
  |  |  102|   174k|#define MAX_QUALITY_ID 0
  ------------------
 1145|   174k|    }
 1146|       |
 1147|       |    /* Update the internal attributes */
 1148|   774k|    ps_int_attr->i4_dependency_id = i4_dep_id;
 1149|   774k|    ps_int_attr->i4_quality_id = i4_quality_id;
 1150|   774k|    ps_int_attr->i4_temporal_id = i4_temp_id;
 1151|   774k|    ps_int_attr->i4_priority_id = i4_prior_id;
 1152|       |
 1153|   774k|    return (OK);
  ------------------
  |  |  114|   774k|#define OK        0
  ------------------
 1154|   774k|}
isvcd_discard_nal:
 1188|  1.34M|{
 1189|  1.34M|    WORD32 i4_discard_nal_flag;
 1190|  1.34M|    nal_prms_t *ps_nal_prms;
 1191|  1.34M|    target_lyr_attr_t *ps_app_attr;
 1192|  1.34M|    target_lyr_attr_t *ps_int_attr;
 1193|  1.34M|    WORD32 i4_status;
 1194|       |
 1195|  1.34M|    ps_nal_prms = (nal_prms_t *) pv_nal_prms;
 1196|  1.34M|    ps_app_attr = (target_lyr_attr_t *) pv_app_attr;
 1197|  1.34M|    ps_int_attr = (target_lyr_attr_t *) pv_int_attr;
 1198|       |
 1199|       |    /* Get the updated target layer attributes */
 1200|  1.34M|    if(SVCD_TRUE == i4_update_flag)
  ------------------
  |  |   46|  1.34M|#define SVCD_TRUE 1
  ------------------
  |  Branch (1200:8): [True: 774k, False: 567k]
  ------------------
 1201|   774k|    {
 1202|   774k|        i4_status = isvcd_get_int_tgt_lyr_attr(ps_app_attr, ps_int_attr, ps_nal_prms);
 1203|   774k|        if(OK != i4_status)
  ------------------
  |  |  114|   774k|#define OK        0
  ------------------
  |  Branch (1203:12): [True: 0, False: 774k]
  ------------------
 1204|      0|        {
 1205|      0|            return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 1206|      0|        }
 1207|   774k|    }
 1208|       |
 1209|  1.34M|    i4_discard_nal_flag = SVCD_FALSE;
  ------------------
  |  |   45|  1.34M|#define SVCD_FALSE 0
  ------------------
 1210|       |
 1211|  1.34M|    if(VCL_NAL == ps_nal_prms->i4_derived_nal_type)
  ------------------
  |  Branch (1211:8): [True: 679k, False: 662k]
  ------------------
 1212|   679k|    {
 1213|       |        /*-------------------------------------------------------------------*/
 1214|       |        /*!Discard VCL NAL if any of following is true                       */
 1215|       |        /*! - Dependency id is greater than target dependency id             */
 1216|       |        /*! - Dependency id is equal to target dependency id but quality id  */
 1217|       |        /*!   is greater than target quality id                              */
 1218|       |        /*! - priority id is greater than target priority id                 */
 1219|       |        /*! - Temporal id is greater than target temporal id                 */
 1220|       |        /*! - If dependency id is greater than a NAL unit for which discard  */
 1221|       |        /*!   flag of the NAL header is set                                  */
 1222|       |        /*-------------------------------------------------------------------*/
 1223|   679k|        if(PREFIX_UNIT_NAL != ps_nal_prms->i4_nal_unit_type)
  ------------------
  |  |   64|   679k|#define PREFIX_UNIT_NAL 14
  ------------------
  |  Branch (1223:12): [True: 673k, False: 5.88k]
  ------------------
 1224|   673k|        {
 1225|   673k|            if(ps_nal_prms->i4_dependency_id > ps_int_attr->i4_dependency_id)
  ------------------
  |  Branch (1225:16): [True: 99.9k, False: 573k]
  ------------------
 1226|  99.9k|            {
 1227|  99.9k|                i4_discard_nal_flag = SVCD_TRUE;
  ------------------
  |  |   46|  99.9k|#define SVCD_TRUE 1
  ------------------
 1228|  99.9k|            }
 1229|       |
 1230|   673k|            if(ps_nal_prms->i4_dependency_id == ps_int_attr->i4_dependency_id &&
  ------------------
  |  Branch (1230:16): [True: 366k, False: 306k]
  ------------------
 1231|   366k|               ps_nal_prms->i4_quality_id > ps_int_attr->i4_quality_id)
  ------------------
  |  Branch (1231:16): [True: 0, False: 366k]
  ------------------
 1232|      0|            {
 1233|      0|                i4_discard_nal_flag = SVCD_TRUE;
  ------------------
  |  |   46|      0|#define SVCD_TRUE 1
  ------------------
 1234|      0|            }
 1235|       |
 1236|   673k|            if(ps_nal_prms->i4_temporal_id > ps_int_attr->i4_temporal_id)
  ------------------
  |  Branch (1236:16): [True: 2.04k, False: 671k]
  ------------------
 1237|  2.04k|            {
 1238|  2.04k|                i4_discard_nal_flag = SVCD_TRUE;
  ------------------
  |  |   46|  2.04k|#define SVCD_TRUE 1
  ------------------
 1239|  2.04k|            }
 1240|       |
 1241|   673k|            if(ps_nal_prms->i4_priority_id > ps_int_attr->i4_priority_id)
  ------------------
  |  Branch (1241:16): [True: 0, False: 673k]
  ------------------
 1242|      0|            {
 1243|      0|                i4_discard_nal_flag = SVCD_TRUE;
  ------------------
  |  |   46|      0|#define SVCD_TRUE 1
  ------------------
 1244|      0|            }
 1245|   673k|        }
 1246|  5.88k|        else
 1247|  5.88k|        {
 1248|  5.88k|            if(0 == ps_int_attr->i4_quality_id && 0 == ps_int_attr->i4_dependency_id)
  ------------------
  |  Branch (1248:16): [True: 5.88k, False: 0]
  |  Branch (1248:51): [True: 2.03k, False: 3.85k]
  ------------------
 1249|  2.03k|            {
 1250|  2.03k|                i4_discard_nal_flag = SVCD_TRUE;
  ------------------
  |  |   46|  2.03k|#define SVCD_TRUE 1
  ------------------
 1251|  2.03k|            }
 1252|  5.88k|        }
 1253|   679k|    }
 1254|       |
 1255|  1.34M|    return (i4_discard_nal_flag);
 1256|  1.34M|}

isvcd_get_nal_buf:
  108|   911k|{
  109|   911k|    nal_prms_t *ps_nal_prms;
  110|   911k|    nal_buf_t *ps_nal_buf;
  111|       |
  112|   911k|    ps_nal_prms = &ps_nal_parse_ctxt->s_nal_prms;
  113|       |
  114|       |    /* Get the NAL buffer structure */
  115|   911k|    if(PREFIX_UNIT_NAL == ps_nal_prms->i4_nal_unit_type)
  ------------------
  |  |   64|   911k|#define PREFIX_UNIT_NAL 14
  ------------------
  |  Branch (115:8): [True: 3.28k, False: 908k]
  ------------------
  116|  3.28k|    {
  117|  3.28k|        ps_nal_buf = &ps_nal_parse_ctxt->s_prefix_nal_buf;
  118|       |
  119|       |        /* Note: This reset will cause a prefix NAL unit */
  120|       |        /* which is followed by another prefix NAL unit  */
  121|       |        /* to be ignored by the module. This is indeed   */
  122|       |        /* a desired behaviour                           */
  123|  3.28k|        isvcd_nal_buf_reset(ps_nal_buf);
  124|  3.28k|    }
  125|   908k|    else
  126|   908k|    {
  127|   908k|        ps_nal_buf = &ps_nal_parse_ctxt->s_nal_buf;
  128|   908k|    }
  129|       |
  130|       |    /* Initialize the buffer structure */
  131|   911k|    ps_nal_buf->i4_valid_flag = SVCD_TRUE;
  ------------------
  |  |   46|   911k|#define SVCD_TRUE 1
  ------------------
  132|   911k|    if(VCL_NAL == ps_nal_prms->i4_derived_nal_type)
  ------------------
  |  Branch (132:8): [True: 259k, False: 651k]
  ------------------
  133|   259k|    {
  134|   259k|        ps_nal_buf->pu1_buf = ps_nal_parse_ctxt->pu1_vcl_nal_buf;
  135|   259k|    }
  136|   651k|    else if(NON_VCL_NAL == ps_nal_prms->i4_derived_nal_type)
  ------------------
  |  Branch (136:13): [True: 639k, False: 12.3k]
  ------------------
  137|   639k|    {
  138|   639k|        ps_nal_buf->pu1_buf = ps_nal_parse_ctxt->pu1_non_vcl_nal_buf;
  139|   639k|    }
  140|  12.3k|    else
  141|  12.3k|    {
  142|  12.3k|        ps_nal_buf->pu1_buf = NULL;
  143|  12.3k|        return;
  144|  12.3k|    }
  145|       |
  146|   899k|    *pps_nal_buf = ps_nal_buf;
  147|   899k|}
isvcd_dqid_ctxt_reset:
  172|   290k|{
  173|   290k|    WORD32 i4_lyr_idx;
  174|   290k|    WORD32 i4_max_num_lyrs;
  175|   290k|    dqid_node_t *ps_dqid_node;
  176|       |
  177|       |    /* sanity checks */
  178|   290k|    if(NULL == ps_dqid_ctxt)
  ------------------
  |  Branch (178:8): [True: 0, False: 290k]
  ------------------
  179|      0|    {
  180|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  181|      0|    }
  182|       |
  183|   290k|    i4_max_num_lyrs = ps_dqid_ctxt->i4_max_num_lyrs;
  184|   290k|    ps_dqid_node = ps_dqid_ctxt->ps_dqid_node;
  185|       |
  186|       |    /* Loop over all the layers */
  187|  1.16M|    for(i4_lyr_idx = 0; i4_lyr_idx < i4_max_num_lyrs; i4_lyr_idx++)
  ------------------
  |  Branch (187:25): [True: 871k, False: 290k]
  ------------------
  188|   871k|    {
  189|       |        /* Reset the valid flag */
  190|   871k|        ps_dqid_node->u1_valid_flag = SVCD_FALSE;
  ------------------
  |  |   45|   871k|#define SVCD_FALSE 0
  ------------------
  191|       |
  192|       |        /* Loop updates */
  193|   871k|        ps_dqid_node += 1;
  194|   871k|    } /* loop over all the layers */
  195|       |
  196|   290k|    return (OK);
  ------------------
  |  |  114|   290k|#define OK        0
  ------------------
  197|   290k|}
isvcd_get_dqid_node:
  226|   927k|{
  227|   927k|    WORD32 i4_lyr_idx;
  228|   927k|    WORD32 i4_max_num_lyrs;
  229|   927k|    dqid_node_t *ps_dqid_node;
  230|   927k|    dqid_node_t *ps_rqrd_dqid_node;
  231|       |
  232|       |    /* sanity checks */
  233|   927k|    if((NULL == ps_dqid_ctxt) || (NULL == pps_dqid_node))
  ------------------
  |  Branch (233:8): [True: 0, False: 927k]
  |  Branch (233:34): [True: 0, False: 927k]
  ------------------
  234|      0|    {
  235|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  236|      0|    }
  237|       |
  238|   927k|    i4_max_num_lyrs = ps_dqid_ctxt->i4_max_num_lyrs;
  239|   927k|    ps_dqid_node = ps_dqid_ctxt->ps_dqid_node;
  240|       |
  241|       |    /*Initialization */
  242|   927k|    ps_rqrd_dqid_node = NULL;
  243|       |
  244|       |    /* Loop over all the buffer nodes */
  245|  2.17M|    for(i4_lyr_idx = 0; i4_lyr_idx < i4_max_num_lyrs; i4_lyr_idx++)
  ------------------
  |  Branch (245:25): [True: 1.78M, False: 388k]
  ------------------
  246|  1.78M|    {
  247|  1.78M|        if((SVCD_TRUE == ps_dqid_node->u1_valid_flag) && (u1_dqid == ps_dqid_node->u1_dqid))
  ------------------
  |  |   46|  1.78M|#define SVCD_TRUE 1
  ------------------
  |  Branch (247:12): [True: 691k, False: 1.09M]
  |  Branch (247:58): [True: 539k, False: 152k]
  ------------------
  248|   539k|        {
  249|   539k|            ps_rqrd_dqid_node = ps_dqid_node;
  250|   539k|            break;
  251|   539k|        }
  252|       |        /* Loop updates */
  253|  1.24M|        ps_dqid_node += 1;
  254|  1.24M|    } /* Loop over all the buffer nodes */
  255|       |
  256|   927k|    if(NULL == ps_rqrd_dqid_node)
  ------------------
  |  Branch (256:8): [True: 388k, False: 539k]
  ------------------
  257|   388k|    {
  258|       |        /* If vcl node is not allocated for the requested DQID then allocate buffer */
  259|   388k|        ps_dqid_node = ps_dqid_ctxt->ps_dqid_node;
  260|   461k|        for(i4_lyr_idx = 0; i4_lyr_idx < i4_max_num_lyrs; i4_lyr_idx++)
  ------------------
  |  Branch (260:29): [True: 461k, False: 0]
  ------------------
  261|   461k|        {
  262|   461k|            if(SVCD_FALSE == ps_dqid_node->u1_valid_flag)
  ------------------
  |  |   45|   461k|#define SVCD_FALSE 0
  ------------------
  |  Branch (262:16): [True: 388k, False: 73.0k]
  ------------------
  263|   388k|            {
  264|   388k|                break;
  265|   388k|            }
  266|       |            /* Loop updates */
  267|  73.0k|            ps_dqid_node += 1;
  268|  73.0k|        } /* Loop over all the nodes */
  269|       |        /* Update the node structure */
  270|   388k|        ps_rqrd_dqid_node = ps_dqid_node;
  271|   388k|    }
  272|       |
  273|       |    /* sanity checks */
  274|   927k|    if(NULL == ps_rqrd_dqid_node)
  ------------------
  |  Branch (274:8): [True: 0, False: 927k]
  ------------------
  275|      0|    {
  276|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  277|      0|    }
  278|   927k|    *pps_dqid_node = ps_rqrd_dqid_node;
  279|       |
  280|   927k|    return (OK);
  ------------------
  |  |  114|   927k|#define OK        0
  ------------------
  281|   927k|}
isvcd_nal_reset_ctxt:
  307|  1.23M|{
  308|  1.23M|    nal_unit_t *ps_nal_unit;
  309|       |
  310|  1.23M|    if(NULL == ps_nal_parse_ctxt)
  ------------------
  |  Branch (310:8): [True: 0, False: 1.23M]
  ------------------
  311|      0|    {
  312|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  313|      0|    }
  314|       |
  315|       |    /* Reset the NAL boundary detetction */
  316|  1.23M|    ps_nal_parse_ctxt->i4_find_nal_state = NAL_START;
  317|  1.23M|    ps_nal_parse_ctxt->i4_zero_byte_cnt = 0;
  318|  1.23M|    ps_nal_unit = ps_nal_parse_ctxt->pv_nal_unit;
  319|  1.23M|    ps_nal_unit->i4_num_bufs = 0;
  320|       |
  321|       |    /*Reset emulation prevention */
  322|  1.23M|    isvcd_reset_emulation_ctxt(&ps_nal_parse_ctxt->s_emulation_ctxt);
  323|       |
  324|       |    /*Reset the NAL header prms */
  325|  1.23M|    isvcd_set_default_nal_prms(&ps_nal_parse_ctxt->s_nal_prms);
  326|       |
  327|       |    /* Reset other NAL level tracking variables */
  328|  1.23M|    ps_nal_parse_ctxt->i4_discard_nal_flag = SVCD_FALSE;
  ------------------
  |  |   45|  1.23M|#define SVCD_FALSE 0
  ------------------
  329|       |
  330|       |    /*Reset NAL buffer structure*/
  331|  1.23M|    isvcd_nal_buf_reset(&ps_nal_parse_ctxt->s_nal_buf);
  332|       |
  333|  1.23M|    return (OK);
  ------------------
  |  |  114|  1.23M|#define OK        0
  ------------------
  334|  1.23M|}
isvcd_pic_reset_ctxt:
  361|   290k|{
  362|   290k|    WORD32 i4_status;
  363|       |
  364|       |    /*-----------------------------------------------------------------------*/
  365|       |    /*! Reset NAL boundary detetction logic                                  */
  366|       |    /*-----------------------------------------------------------------------*/
  367|   290k|    i4_status = isvcd_nal_reset_ctxt(ps_nal_parse_ctxt);
  368|       |
  369|   290k|    UNUSED(i4_status);
  ------------------
  |  |   45|   290k|#define UNUSED(x) ((void)(x))
  ------------------
  370|       |
  371|       |    /*-----------------------------------------------------------------------*/
  372|       |    /*! Reset picture boundary detctetion logic                              */
  373|       |    /*-----------------------------------------------------------------------*/
  374|   290k|    ps_nal_parse_ctxt->i4_is_frst_vcl_nal_in_au = SVCD_TRUE;
  ------------------
  |  |   46|   290k|#define SVCD_TRUE 1
  ------------------
  375|       |
  376|       |    /*-----------------------------------------------------------------------*/
  377|       |    /*! Reset VCL and non VCL NAL buffer tracking variables                  */
  378|       |    /*-----------------------------------------------------------------------*/
  379|   290k|    ps_nal_parse_ctxt->pu1_non_vcl_nal_buf = ps_nal_parse_ctxt->pv_non_vcl_nal_buf;
  380|   290k|    ps_nal_parse_ctxt->pu1_vcl_nal_buf = ps_nal_parse_ctxt->pv_vcl_nal_buf;
  381|       |
  382|       |    /* reset the bytes left to buffer size */
  383|   290k|    ps_nal_parse_ctxt->u4_bytes_left_vcl = MAX_VCL_NAL_BUFF_SIZE;
  ------------------
  |  |   69|   290k|#define MAX_VCL_NAL_BUFF_SIZE (1024 * 1024 * 2)
  ------------------
  384|       |
  385|   290k|    ps_nal_parse_ctxt->u4_bytes_left_non_vcl = MAX_NON_VCL_NAL_BUFF_SIZE;
  ------------------
  |  |   70|   290k|#define MAX_NON_VCL_NAL_BUFF_SIZE (1024 * 1024 * 2)
  ------------------
  386|       |
  387|       |    /* Offset the buffer to start of vcl data */
  388|   290k|    UPDATE_NAL_BUF_PTR(&ps_nal_parse_ctxt->pu1_non_vcl_nal_buf, NON_VCL_NAL,
  389|   290k|                       &ps_nal_parse_ctxt->u4_bytes_left_non_vcl);
  390|       |
  391|   290k|    UPDATE_NAL_BUF_PTR(&ps_nal_parse_ctxt->pu1_vcl_nal_buf, VCL_NAL,
  392|   290k|                       &ps_nal_parse_ctxt->u4_bytes_left_vcl);
  393|       |
  394|       |    /* Reset previous field */
  395|   290k|    ps_nal_parse_ctxt->ps_prev_non_vcl_buf = NULL;
  396|   290k|    ps_nal_parse_ctxt->i4_idr_pic_err_flag = 0;
  397|       |
  398|       |    /*-----------------------------------------------------------------------*/
  399|       |    /*! Reset other NAL related tracking variables                           */
  400|       |    /*-----------------------------------------------------------------------*/
  401|   290k|    ps_nal_parse_ctxt->i4_num_non_vcl_nals = 0;
  402|       |
  403|       |    /* Reset the vcl nal node buffer context */
  404|   290k|    i4_status = isvcd_dqid_ctxt_reset(&ps_nal_parse_ctxt->s_dqid_ctxt);
  405|       |
  406|       |    /* Reset target layer update flag */
  407|   290k|    ps_nal_parse_ctxt->i4_tgt_lyr_update = SVCD_TRUE;
  ------------------
  |  |   46|   290k|#define SVCD_TRUE 1
  ------------------
  408|   290k|}
isvcd_get_nal_prms:
  443|  1.40M|{
  444|  1.40M|    UWORD8 *pu1_input_buf;
  445|  1.40M|    WORD32 i4_status;
  446|  1.40M|    dec_seq_params_t *ps_sps;
  447|  1.40M|    dec_pic_params_t *ps_pps;
  448|       |
  449|  1.40M|    ps_sps = ps_nal_parse_ctxt->pv_seq_prms;
  450|  1.40M|    ps_pps = ps_nal_parse_ctxt->pv_pic_prms;
  451|       |
  452|  1.40M|    *pu4_err_code = 0;
  453|  1.40M|    *pi4_sps_pps_status = NAL_CORRUPT_DATA;
  454|       |
  455|       |    /* Decode the NAL header */
  456|  1.40M|    isvcd_dec_nal_hdr(pu1_buf, i4_buf_size, ps_nal_parse_ctxt->pv_nal_header_buf, ps_nal_prms,
  457|  1.40M|                      ps_prefix_nal_buf, ps_prefix_nal_prms, pu4_err_code);
  458|       |
  459|       |    /* If encountered with error return fail */
  460|  1.40M|    if(0 != *pu4_err_code)
  ------------------
  |  Branch (460:8): [True: 48.5k, False: 1.35M]
  ------------------
  461|  48.5k|    {
  462|  48.5k|        return (NOT_OK);
  ------------------
  |  |  116|  48.5k|#define NOT_OK    -1
  ------------------
  463|  48.5k|    }
  464|       |
  465|  1.35M|    if(ACCESS_UNIT_DELIMITER_RBSP == ps_nal_prms->i4_nal_unit_type)
  ------------------
  |  |  332|  1.35M|#define ACCESS_UNIT_DELIMITER_RBSP      9
  ------------------
  |  Branch (465:8): [True: 11.0k, False: 1.34M]
  ------------------
  466|  11.0k|    {
  467|  11.0k|        *pi4_nal_discard_flag = 1;
  468|  11.0k|        return OK;
  ------------------
  |  |  114|  11.0k|#define OK        0
  ------------------
  469|  11.0k|    }
  470|       |
  471|       |    /* Set the discard flag */
  472|  1.34M|    *pi4_nal_discard_flag = isvcd_discard_nal(
  473|  1.34M|        (void *) ps_nal_prms, (void *) &ps_nal_parse_ctxt->s_app_attr,
  474|  1.34M|        (void *) &ps_nal_parse_ctxt->s_int_attr, ps_nal_parse_ctxt->i4_tgt_lyr_update);
  475|       |
  476|       |    /* Parse the slice header if all the following */
  477|       |    /* conditions are true                         */
  478|       |    /* 1. NAL is a VCL NAL unit                    */
  479|       |    /* 2. NAL is not a prefix NAL unit             */
  480|       |    /* 3. NAL is not discarded                     */
  481|  1.34M|    if((NON_VCL_NAL == ps_nal_prms->i4_derived_nal_type) ||
  ------------------
  |  Branch (481:8): [True: 662k, False: 679k]
  ------------------
  482|   679k|       (PREFIX_UNIT_NAL == ps_nal_prms->i4_nal_unit_type) || (SVCD_TRUE == *pi4_nal_discard_flag))
  ------------------
  |  |   64|   679k|#define PREFIX_UNIT_NAL 14
  ------------------
                     (PREFIX_UNIT_NAL == ps_nal_prms->i4_nal_unit_type) || (SVCD_TRUE == *pi4_nal_discard_flag))
  ------------------
  |  |   46|   673k|#define SVCD_TRUE 1
  ------------------
  |  Branch (482:8): [True: 5.88k, False: 673k]
  |  Branch (482:62): [True: 99.9k, False: 573k]
  ------------------
  483|   768k|    {
  484|   768k|        return (OK);
  ------------------
  |  |  114|   768k|#define OK        0
  ------------------
  485|   768k|    }
  486|       |
  487|   573k|    pu1_input_buf = pu1_buf;
  488|   573k|    pu1_input_buf += ps_nal_prms->i4_nal_header_len;
  489|   573k|    i4_buf_size -= ps_nal_prms->i4_nal_header_len;
  490|       |
  491|   573k|    i4_status =
  492|   573k|        isvcd_parse_part_slice_hdr(pu1_input_buf, i4_buf_size, ps_nal_parse_ctxt->pv_nal_header_buf,
  493|   573k|                                   ps_sps, ps_pps, ps_nal_prms, pu4_err_code, pi4_sps_pps_status);
  494|       |
  495|   573k|    return (i4_status);
  496|  1.34M|}
isvcd_compare_nal_prms:
  534|   493k|{
  535|   493k|    dqid_node_t *ps_dqid_node;
  536|   493k|    vcl_node_t *ps_vcl_node;
  537|   493k|    WORD32 i4_status;
  538|       |
  539|       |    /* If DQID is lesser than the DQID of the previous */
  540|       |    /* NAL then declare the picture boundary           */
  541|   493k|    *pi4_pic_bound_type = PIC_BOUND_DQID;
  542|   493k|    if(i4_prev_dqid > ps_nal_prms->i4_dqid)
  ------------------
  |  Branch (542:8): [True: 50.9k, False: 442k]
  ------------------
  543|  50.9k|    {
  544|  50.9k|        *pi4_pic_bound_status = PIC_BOUNDARY_TRUE;
  545|  50.9k|        return (OK);
  ------------------
  |  |  114|  50.9k|#define OK        0
  ------------------
  546|  50.9k|    }
  547|       |
  548|       |    /* Perform the picture boundary detection only for */
  549|       |    /* the layers with quality id equal to 0           */
  550|   442k|    if((FIRST_PASS == i4_pass) && (0 != (ps_nal_prms->i4_dqid & 0x0F)))
  ------------------
  |  |   61|   442k|#define FIRST_PASS 0
  ------------------
  |  Branch (550:8): [True: 311k, False: 131k]
  |  Branch (550:35): [True: 0, False: 311k]
  ------------------
  551|      0|    {
  552|      0|        *pi4_pic_bound_status = PIC_BOUNDARY_FALSE;
  553|      0|        return (OK);
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
  554|      0|    }
  555|       |
  556|       |    /* Get the DQID node */
  557|   442k|    i4_status =
  558|   442k|        isvcd_get_dqid_node(&ps_nal_parse_ctxt->s_dqid_ctxt, (UWORD8) i4_prev_dqid, &ps_dqid_node);
  559|   442k|    if((OK != i4_status) || (NULL == ps_dqid_node))
  ------------------
  |  |  114|   442k|#define OK        0
  ------------------
  |  Branch (559:8): [True: 0, False: 442k]
  |  Branch (559:29): [True: 0, False: 442k]
  ------------------
  560|      0|    {
  561|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  562|      0|    }
  563|       |    /* If the current slice is first slice in the layer */
  564|       |    /* then do not compare                              */
  565|   442k|    if(SVCD_FALSE == ps_dqid_node->u1_valid_flag)
  ------------------
  |  |   45|   442k|#define SVCD_FALSE 0
  ------------------
  |  Branch (565:8): [True: 155k, False: 286k]
  ------------------
  566|   155k|    {
  567|   155k|        *pi4_pic_bound_status = PIC_BOUNDARY_FALSE;
  568|   155k|        return (OK);
  ------------------
  |  |  114|   155k|#define OK        0
  ------------------
  569|   155k|    }
  570|       |
  571|   286k|    *pi4_pic_bound_type = PIC_BOUND_SLICE_PRMS;
  572|   286k|    *pi4_pic_bound_status = PIC_BOUNDARY_TRUE;
  573|   286k|    ps_vcl_node = ps_dqid_node->ps_vcl_node;
  574|       |
  575|       |    /* Compare NAL ref idc */
  576|   286k|    {
  577|   286k|        WORD32 i4_prev_ref_pic_flag;
  578|   286k|        WORD32 i4_cur_ref_pic_flag;
  579|       |
  580|   286k|        i4_prev_ref_pic_flag = (0 != ps_vcl_node->i4_nal_ref_idc);
  581|   286k|        i4_cur_ref_pic_flag = (0 != ps_nal_prms->i4_nal_ref_idc);
  582|       |
  583|   286k|        if(i4_prev_ref_pic_flag != i4_cur_ref_pic_flag)
  ------------------
  |  Branch (583:12): [True: 41.8k, False: 244k]
  ------------------
  584|  41.8k|        {
  585|  41.8k|            return (OK);
  ------------------
  |  |  114|  41.8k|#define OK        0
  ------------------
  586|  41.8k|        }
  587|   286k|    }
  588|       |
  589|       |    /* Compare IDR picture flag */
  590|   244k|    if(ps_vcl_node->i4_idr_pic_flag != ps_nal_prms->i4_idr_pic_flag)
  ------------------
  |  Branch (590:8): [True: 24.4k, False: 220k]
  ------------------
  591|  24.4k|    {
  592|  24.4k|        return (OK);
  ------------------
  |  |  114|  24.4k|#define OK        0
  ------------------
  593|  24.4k|    }
  594|       |
  595|       |    /* Compare PPS id */
  596|   220k|    if(ps_vcl_node->u1_pps_id != ps_nal_prms->u1_pps_id)
  ------------------
  |  Branch (596:8): [True: 52.7k, False: 167k]
  ------------------
  597|  52.7k|    {
  598|  52.7k|        return (OK);
  ------------------
  |  |  114|  52.7k|#define OK        0
  ------------------
  599|  52.7k|    }
  600|       |
  601|       |    /* Compare idr pic num */
  602|   167k|    if((SVCD_TRUE == ps_nal_prms->i4_idr_pic_flag) &&
  ------------------
  |  |   46|   167k|#define SVCD_TRUE 1
  ------------------
  |  Branch (602:8): [True: 127k, False: 39.7k]
  ------------------
  603|   127k|       (ps_vcl_node->i4_idr_pic_num != ps_nal_prms->i4_idr_pic_num))
  ------------------
  |  Branch (603:8): [True: 33.0k, False: 94.9k]
  ------------------
  604|  33.0k|    {
  605|  33.0k|        return (OK);
  ------------------
  |  |  114|  33.0k|#define OK        0
  ------------------
  606|  33.0k|    }
  607|       |
  608|       |    /* Compare frame number */
  609|   134k|    if(ps_vcl_node->u2_frm_num != ps_nal_prms->u2_frm_num)
  ------------------
  |  Branch (609:8): [True: 70.1k, False: 64.5k]
  ------------------
  610|  70.1k|    {
  611|  70.1k|        return (OK);
  ------------------
  |  |  114|  70.1k|#define OK        0
  ------------------
  612|  70.1k|    }
  613|       |
  614|       |    /* Compare poc lsb */
  615|  64.5k|    if(ps_dqid_node->i4_poc_lsb != ps_nal_prms->i4_poc_lsb)
  ------------------
  |  Branch (615:8): [True: 3.54k, False: 61.0k]
  ------------------
  616|  3.54k|    {
  617|  3.54k|        return (OK);
  ------------------
  |  |  114|  3.54k|#define OK        0
  ------------------
  618|  3.54k|    }
  619|       |
  620|       |    /* Compare delta poc bottom */
  621|  61.0k|    if(ps_dqid_node->i4_delta_poc_bot != ps_nal_prms->i4_delta_poc_bot)
  ------------------
  |  Branch (621:8): [True: 1.36k, False: 59.6k]
  ------------------
  622|  1.36k|    {
  623|  1.36k|        return (OK);
  ------------------
  |  |  114|  1.36k|#define OK        0
  ------------------
  624|  1.36k|    }
  625|       |
  626|       |    /* Compare delta poc [0] */
  627|  59.6k|    if(ps_dqid_node->ai4_delta_poc[0] != ps_nal_prms->ai4_delta_poc[0])
  ------------------
  |  Branch (627:8): [True: 3.15k, False: 56.5k]
  ------------------
  628|  3.15k|    {
  629|  3.15k|        return (OK);
  ------------------
  |  |  114|  3.15k|#define OK        0
  ------------------
  630|  3.15k|    }
  631|       |
  632|       |    /* Compare delta poc [0] */
  633|  56.5k|    if(ps_dqid_node->ai4_delta_poc[1] != ps_nal_prms->ai4_delta_poc[1])
  ------------------
  |  Branch (633:8): [True: 730, False: 55.7k]
  ------------------
  634|    730|    {
  635|    730|        return (OK);
  ------------------
  |  |  114|    730|#define OK        0
  ------------------
  636|    730|    }
  637|       |
  638|  55.7k|    *pi4_pic_bound_status = PIC_BOUNDARY_FALSE;
  639|  55.7k|    return (OK);
  ------------------
  |  |  114|  55.7k|#define OK        0
  ------------------
  640|  56.5k|}
isvcd_detect_pic_boundary_annex_b:
  696|   406k|{
  697|   406k|    UWORD32 u4_err_code;
  698|   406k|    WORD32 i4_zero_cnt;
  699|   406k|    WORD32 i4_status;
  700|   406k|    nal_prms_t s_nal_prms = {0};
  701|   406k|    nal_prms_t s_prefix_nal_prms = {0};
  702|   406k|    nal_buf_t s_prefix_nal_buf = {0};
  703|   406k|    WORD32 i4_pic_bound_type;
  704|   406k|    WORD32 i4_pic_bound_status;
  705|   406k|    UWORD8 *pu1_buf;
  706|   406k|    WORD32 i4_buf_size;
  707|   406k|    WORD32 i4_more_data_flag;
  708|   406k|    WORD32 i4_new_lyr_flag;
  709|   406k|    WORD32 i4_prev_dqid;
  710|   406k|    WORD32 i4_nal_discard_flag;
  711|       |
  712|       |    /* Initializations */
  713|   406k|    i4_zero_cnt = 0;
  714|   406k|    s_prefix_nal_buf.i4_valid_flag = SVCD_FALSE;
  ------------------
  |  |   45|   406k|#define SVCD_FALSE 0
  ------------------
  715|   406k|    *pi4_pic_bound_status = PIC_BOUNDARY_FALSE;
  716|   406k|    i4_new_lyr_flag = SVCD_TRUE;
  ------------------
  |  |   46|   406k|#define SVCD_TRUE 1
  ------------------
  717|       |
  718|       |    /* Get the previous layer's DQID                    */
  719|   406k|    if(SVCD_TRUE == ps_nal_parse_ctxt->i4_is_frst_vcl_nal_in_au)
  ------------------
  |  |   46|   406k|#define SVCD_TRUE 1
  ------------------
  |  Branch (719:8): [True: 155k, False: 250k]
  ------------------
  720|   155k|    {
  721|   155k|        ps_nal_parse_ctxt->i4_prev_dq_id = ps_nal_prms->i4_dqid;
  722|   155k|        ps_nal_parse_ctxt->i4_is_frst_vcl_nal_in_au = SVCD_FALSE;
  ------------------
  |  |   45|   155k|#define SVCD_FALSE 0
  ------------------
  723|   155k|    }
  724|   406k|    i4_prev_dqid = ps_nal_parse_ctxt->i4_prev_dq_id;
  725|   406k|    ps_nal_parse_ctxt->i4_prev_dq_id = ps_nal_prms->i4_dqid;
  726|       |
  727|       |    /* Detect the picture boundary */
  728|   406k|    if(ps_nal_prms->i4_dqid <= i4_prev_dqid)
  ------------------
  |  Branch (728:8): [True: 359k, False: 46.8k]
  ------------------
  729|   359k|    {
  730|   359k|        i4_status =
  731|   359k|            isvcd_compare_nal_prms(ps_nal_prms, FIRST_PASS, i4_prev_dqid, &i4_pic_bound_type,
  ------------------
  |  |   61|   359k|#define FIRST_PASS 0
  ------------------
  732|   359k|                                   &i4_pic_bound_status, ps_nal_parse_ctxt);
  733|   359k|        if(OK != i4_status)
  ------------------
  |  |  114|   359k|#define OK        0
  ------------------
  |  Branch (733:12): [True: 0, False: 359k]
  ------------------
  734|      0|        {
  735|      0|            return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  736|      0|        }
  737|   359k|        i4_new_lyr_flag = SVCD_FALSE;
  ------------------
  |  |   45|   359k|#define SVCD_FALSE 0
  ------------------
  738|       |
  739|       |        /* Check whether the picture boundary is detected */
  740|       |        /* or not */
  741|   359k|        if(PIC_BOUNDARY_FALSE == i4_pic_bound_status)
  ------------------
  |  Branch (741:12): [True: 199k, False: 160k]
  ------------------
  742|   199k|        {
  743|   199k|            return (OK);
  ------------------
  |  |  114|   199k|#define OK        0
  ------------------
  744|   199k|        }
  745|       |
  746|       |        /* Otherwise look for next nal and compare again */
  747|   160k|        *pi4_pic_bound_status = PIC_BOUNDARY_TRUE;
  748|   160k|    }
  749|       |
  750|   207k|    do
  751|   210k|    {
  752|   210k|        WORD32 i4_sps_pps_corrupt_status;
  753|   210k|        WORD32 i4_tgt_lyr_bckup;
  754|       |        /* If following conditions are true then there */
  755|       |        /* is no data left to decode next NAL and hence*/
  756|       |        /* no further processing is required           */
  757|   210k|        if((NAL_END != ps_nal_parse_ctxt->i4_find_nal_state) ||
  ------------------
  |  Branch (757:12): [True: 13.1k, False: 196k]
  ------------------
  758|   196k|           ((WORD64) i4_cur_pos >= (WORD64) *pu4_num_bytes))
  ------------------
  |  Branch (758:12): [True: 336, False: 196k]
  ------------------
  759|  13.4k|        {
  760|  13.4k|            return (OK);
  ------------------
  |  |  114|  13.4k|#define OK        0
  ------------------
  761|  13.4k|        }
  762|       |
  763|       |        /* Otherwise fill the parameters */
  764|   196k|        pu1_buf = pu1_stream_buffer;
  765|   196k|        pu1_buf += i4_cur_pos;
  766|   196k|        i4_buf_size = *pu4_num_bytes - i4_cur_pos;
  767|       |
  768|       |        /* Get the NAL prms. This involves the following things*/
  769|       |        /* 1. Decode the NAL header                            */
  770|       |        /* 2. Set the discard flag                             */
  771|       |        /* 3. Decode the slice header if needed                */
  772|   196k|        isvcd_set_default_nal_prms(&s_nal_prms);
  773|       |
  774|       |        /* take a back up of tgt lyr update flag */
  775|   196k|        i4_tgt_lyr_bckup = ps_nal_parse_ctxt->i4_tgt_lyr_update;
  776|       |
  777|       |        /* the tgt attributes should not be  updaetd while pic boundary det*/
  778|   196k|        ps_nal_parse_ctxt->i4_tgt_lyr_update = SVCD_FALSE;
  ------------------
  |  |   45|   196k|#define SVCD_FALSE 0
  ------------------
  779|       |
  780|   196k|        i4_status = isvcd_get_nal_prms(pu1_buf, i4_buf_size, &s_nal_prms, &s_prefix_nal_prms,
  781|   196k|                                       &s_prefix_nal_buf, &u4_err_code, &i4_sps_pps_corrupt_status,
  782|   196k|                                       &i4_nal_discard_flag, ps_nal_parse_ctxt);
  783|       |        /* restore back the tgt lyr update flag */
  784|   196k|        ps_nal_parse_ctxt->i4_tgt_lyr_update = i4_tgt_lyr_bckup;
  785|       |        /* If the error code by the nal prms decoder then declare*/
  786|       |        /* picture boundary                                     */
  787|   196k|        if(0 != u4_err_code)
  ------------------
  |  Branch (787:12): [True: 7.44k, False: 189k]
  ------------------
  788|  7.44k|        {
  789|  7.44k|            return (OK);
  ------------------
  |  |  114|  7.44k|#define OK        0
  ------------------
  790|  7.44k|        }
  791|       |
  792|   189k|        i4_more_data_flag = SVCD_FALSE;
  ------------------
  |  |   45|   189k|#define SVCD_FALSE 0
  ------------------
  793|       |
  794|       |        /* If prefix NAL unit comes then save the nal prms*/
  795|   189k|        if(PREFIX_UNIT_NAL == s_nal_prms.i4_nal_unit_type)
  ------------------
  |  |   64|   189k|#define PREFIX_UNIT_NAL 14
  ------------------
  |  Branch (795:12): [True: 3.14k, False: 185k]
  ------------------
  796|  3.14k|        {
  797|  3.14k|            UWORD32 u4_bytes_consumed;
  798|  3.14k|            WORD32 i4_status;
  799|       |
  800|       |            /* If prefix NAL is not discarded then set the varaibles */
  801|       |            /* appropriatly */
  802|  3.14k|            if(SVCD_FALSE == i4_nal_discard_flag)
  ------------------
  |  |   45|  3.14k|#define SVCD_FALSE 0
  ------------------
  |  Branch (802:16): [True: 1.54k, False: 1.60k]
  ------------------
  803|  1.54k|            {
  804|  1.54k|                s_prefix_nal_buf.i4_valid_flag = SVCD_TRUE;
  ------------------
  |  |   46|  1.54k|#define SVCD_TRUE 1
  ------------------
  805|  1.54k|                memcpy(&s_prefix_nal_prms, &s_nal_prms, sizeof(nal_prms_t));
  806|  1.54k|            }
  807|       |
  808|       |            /* Go to next start code */
  809|  3.14k|            i4_zero_cnt = 0;
  810|  3.14k|            u4_bytes_consumed = 0;
  811|  3.14k|            i4_status = isvcd_nal_find_start_code(pu1_stream_buffer, i4_cur_pos, *pu4_num_bytes,
  812|  3.14k|                                                  &i4_zero_cnt, &u4_bytes_consumed);
  813|       |            /* If associated NAL unit is  not present then */
  814|  3.14k|            if(SC_FOUND != i4_status)
  ------------------
  |  |   52|  3.14k|#define SC_FOUND 1
  ------------------
  |  Branch (814:16): [True: 190, False: 2.95k]
  ------------------
  815|    190|            {
  816|    190|                return (OK);
  ------------------
  |  |  114|    190|#define OK        0
  ------------------
  817|    190|            }
  818|  2.95k|            i4_cur_pos += u4_bytes_consumed;
  819|  2.95k|            i4_more_data_flag = SVCD_TRUE;
  ------------------
  |  |   46|  2.95k|#define SVCD_TRUE 1
  ------------------
  820|  2.95k|        }
  821|   189k|    } while(SVCD_TRUE == i4_more_data_flag);
  ------------------
  |  |   46|   188k|#define SVCD_TRUE 1
  ------------------
  |  Branch (821:13): [True: 2.95k, False: 185k]
  ------------------
  822|       |
  823|       |    /* Do further picture boundary detection only for */
  824|       |    /* VCL NAL unit (excliding prefix NAL unit)       */
  825|   185k|    if((NON_VCL_NAL == s_nal_prms.i4_derived_nal_type) ||
  ------------------
  |  Branch (825:8): [True: 26.2k, False: 159k]
  ------------------
  826|   159k|       (PREFIX_UNIT_NAL == s_nal_prms.i4_nal_unit_type) || (SVCD_TRUE == i4_nal_discard_flag))
  ------------------
  |  |   64|   159k|#define PREFIX_UNIT_NAL 14
  ------------------
                     (PREFIX_UNIT_NAL == s_nal_prms.i4_nal_unit_type) || (SVCD_TRUE == i4_nal_discard_flag))
  ------------------
  |  |   46|   159k|#define SVCD_TRUE 1
  ------------------
  |  Branch (826:8): [True: 0, False: 159k]
  |  Branch (826:60): [True: 3.90k, False: 155k]
  ------------------
  827|  30.1k|    {
  828|  30.1k|        return (OK);
  ------------------
  |  |  114|  30.1k|#define OK        0
  ------------------
  829|  30.1k|    }
  830|       |
  831|   155k|    if(SVCD_FALSE == i4_new_lyr_flag)
  ------------------
  |  |   45|   155k|#define SVCD_FALSE 0
  ------------------
  |  Branch (831:8): [True: 117k, False: 38.4k]
  ------------------
  832|   117k|    {
  833|   117k|        if(PIC_BOUND_DQID == i4_pic_bound_type)
  ------------------
  |  Branch (833:12): [True: 36.9k, False: 80.3k]
  ------------------
  834|  36.9k|        {
  835|       |            /* If picture boundary was detetcted based on change*/
  836|       |            /* in DQID then declare picture boundary if DQID of the third slice is different */
  837|  36.9k|            if(i4_prev_dqid != s_nal_prms.i4_dqid)
  ------------------
  |  Branch (837:16): [True: 18.4k, False: 18.5k]
  ------------------
  838|  18.4k|            {
  839|  18.4k|                return (OK);
  ------------------
  |  |  114|  18.4k|#define OK        0
  ------------------
  840|  18.4k|            }
  841|  36.9k|        }
  842|  80.3k|        else
  843|  80.3k|        {
  844|       |            /* If picture boundary was detetcted based on change in DQID */
  845|       |            /* then declare picture boundary if dependency id of third slice is different */
  846|  80.3k|            if(PIC_BOUND_SLICE_PRMS != i4_pic_bound_type)
  ------------------
  |  Branch (846:16): [True: 0, False: 80.3k]
  ------------------
  847|      0|            {
  848|      0|                return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  849|      0|            }
  850|       |
  851|  80.3k|            if((i4_prev_dqid & 0xF) != (s_nal_prms.i4_dqid & 0xF))
  ------------------
  |  Branch (851:16): [True: 0, False: 80.3k]
  ------------------
  852|      0|            {
  853|      0|                return (OK);
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
  854|      0|            }
  855|  80.3k|        }
  856|       |
  857|  98.8k|        isvcd_compare_nal_prms(&s_nal_prms, SECOND_PASS, i4_prev_dqid, &i4_pic_bound_type,
  ------------------
  |  |   62|  98.8k|#define SECOND_PASS 1
  ------------------
  858|  98.8k|                               &i4_pic_bound_status, ps_nal_parse_ctxt);
  859|  98.8k|        *pi4_pic_bound_status = i4_pic_bound_status;
  860|       |
  861|  98.8k|        if(PIC_BOUNDARY_FALSE == i4_pic_bound_status)
  ------------------
  |  Branch (861:12): [True: 11.0k, False: 87.8k]
  ------------------
  862|  11.0k|        {
  863|  11.0k|            ps_nal_parse_ctxt->i4_prev_dq_id = i4_prev_dqid;
  864|  11.0k|        }
  865|  98.8k|    }
  866|  38.4k|    else
  867|  38.4k|    {
  868|  38.4k|        if(SVCD_TRUE != i4_new_lyr_flag)
  ------------------
  |  |   46|  38.4k|#define SVCD_TRUE 1
  ------------------
  |  Branch (868:12): [True: 0, False: 38.4k]
  ------------------
  869|      0|        {
  870|      0|            return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  871|      0|        }
  872|       |        /* The NAL header is not corrupted only if any of the following conditions are true */
  873|       |        /* 1. The DQID of the first slice differs with DQID of the third slice */
  874|       |        /* 2. Picture boundary is detected between first slice and third slice */
  875|  38.4k|        if(i4_prev_dqid == s_nal_prms.i4_dqid)
  ------------------
  |  Branch (875:12): [True: 34.8k, False: 3.66k]
  ------------------
  876|  34.8k|        {
  877|  34.8k|            isvcd_compare_nal_prms(&s_nal_prms, SECOND_PASS, i4_prev_dqid, &i4_pic_bound_type,
  ------------------
  |  |   62|  34.8k|#define SECOND_PASS 1
  ------------------
  878|  34.8k|                                   &i4_pic_bound_status, ps_nal_parse_ctxt);
  879|       |            /* NAL header is corrupted and hence correct it  */
  880|  34.8k|            if(PIC_BOUNDARY_FALSE == i4_pic_bound_status)
  ------------------
  |  Branch (880:16): [True: 1.00k, False: 33.8k]
  ------------------
  881|  1.00k|            {
  882|  1.00k|                ps_nal_prms->i4_dqid = s_nal_prms.i4_dqid;
  883|  1.00k|                ps_nal_prms->i4_dependency_id = s_nal_prms.i4_dependency_id;
  884|  1.00k|                ps_nal_prms->i4_quality_id = s_nal_prms.i4_quality_id;
  885|  1.00k|                ps_nal_parse_ctxt->i4_prev_dq_id = ps_nal_prms->i4_dqid;
  886|  1.00k|            }
  887|  34.8k|        }
  888|  38.4k|        *pi4_pic_bound_status = PIC_BOUNDARY_FALSE;
  889|  38.4k|    }
  890|   137k|    return (OK);
  ------------------
  |  |  114|   137k|#define OK        0
  ------------------
  891|   155k|}
isvcd_insert_vcl_node:
  917|   232k|{
  918|   232k|    vcl_node_t *ps_bot_node;
  919|   232k|    vcl_node_t *ps_top_node;
  920|   232k|    vcl_node_t *ps_node;
  921|   232k|    WORD32 i4_rqrd_dqid;
  922|       |
  923|       |    /* sanity checks */
  924|   232k|    if((NULL == ps_vcl_nal) || (NULL == ps_vcl_node))
  ------------------
  |  Branch (924:8): [True: 0, False: 232k]
  |  Branch (924:32): [True: 0, False: 232k]
  ------------------
  925|      0|    {
  926|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  927|      0|    }
  928|       |
  929|   232k|    i4_rqrd_dqid = (ps_vcl_node->i4_dependency_id << 4);
  930|   232k|    i4_rqrd_dqid += ps_vcl_node->i4_quality_id;
  931|   232k|    ps_node = ps_vcl_nal->ps_bot_node;
  932|       |
  933|       |    /* Search for node which has a DQID which is */
  934|       |    /* lesser than taht of the node to inserted  */
  935|   301k|    while(NULL != ps_node)
  ------------------
  |  Branch (935:11): [True: 72.8k, False: 229k]
  ------------------
  936|  72.8k|    {
  937|  72.8k|        WORD32 i4_dqid;
  938|       |
  939|  72.8k|        i4_dqid = (ps_node->i4_dependency_id << 4);
  940|  72.8k|        i4_dqid += ps_node->i4_quality_id;
  941|       |
  942|       |        /* If we get a DQID which is greater than*/
  943|       |        /* the DQID of the  node to be inserted  */
  944|       |        /* then break out of the loop and update */
  945|  72.8k|        if(i4_dqid > i4_rqrd_dqid)
  ------------------
  |  Branch (945:12): [True: 3.14k, False: 69.6k]
  ------------------
  946|  3.14k|        {
  947|  3.14k|            ps_bot_node = ps_node->ps_bot_node;
  948|  3.14k|            break;
  949|  3.14k|        }
  950|       |
  951|  69.6k|        ps_node = ps_node->ps_top_node;
  952|  69.6k|    }
  953|       |
  954|       |    /* If none of the nodes in the list have DQId */
  955|       |    /* greater than the node to be inserted then  */
  956|       |    /* bottom node will be top most node          */
  957|   232k|    if(NULL == ps_node)
  ------------------
  |  Branch (957:8): [True: 229k, False: 3.14k]
  ------------------
  958|   229k|    {
  959|   229k|        ps_bot_node = ps_vcl_nal->ps_top_node;
  960|   229k|    }
  961|       |
  962|       |    /* Insert the node into DQID list */
  963|   232k|    if(NULL != ps_bot_node)
  ------------------
  |  Branch (963:8): [True: 69.6k, False: 162k]
  ------------------
  964|  69.6k|    {
  965|  69.6k|        ps_top_node = ps_bot_node->ps_top_node;
  966|  69.6k|    }
  967|   162k|    else
  968|   162k|    {
  969|   162k|        ps_top_node = ps_vcl_nal->ps_bot_node;
  970|   162k|    }
  971|       |
  972|       |    /* Join previous node and specified node */
  973|   232k|    if(NULL != ps_bot_node)
  ------------------
  |  Branch (973:8): [True: 69.6k, False: 162k]
  ------------------
  974|  69.6k|    {
  975|  69.6k|        ps_bot_node->ps_top_node = ps_vcl_node;
  976|  69.6k|    }
  977|   162k|    else
  978|   162k|    {
  979|   162k|        ps_vcl_nal->ps_bot_node = ps_vcl_node;
  980|   162k|    }
  981|   232k|    ps_vcl_node->ps_bot_node = ps_bot_node;
  982|       |
  983|       |    /* Join next node and specified node */
  984|   232k|    if(NULL != ps_top_node)
  ------------------
  |  Branch (984:8): [True: 3.14k, False: 229k]
  ------------------
  985|  3.14k|    {
  986|  3.14k|        ps_top_node->ps_bot_node = ps_vcl_node;
  987|  3.14k|    }
  988|   229k|    else
  989|   229k|    {
  990|   229k|        ps_vcl_nal->ps_top_node = ps_vcl_node;
  991|   229k|    }
  992|   232k|    ps_vcl_node->ps_top_node = ps_top_node;
  993|       |
  994|   232k|    return (OK);
  ------------------
  |  |  114|   232k|#define OK        0
  ------------------
  995|   232k|}
isvcd_update_nal_ctxt:
 1024|   891k|{
 1025|       |    /*! If current NAL is VCL NAL then
 1026|       |          - Insert a VCL node into DQID list if neccessery
 1027|       |          - update the information part of NAL unit */
 1028|       |    /*! Otherwise, populate the buffer parameters into non vcl output
 1029|       |    structure */
 1030|   891k|    nal_prms_t *ps_nal_prms;
 1031|   891k|    nal_buf_t *ps_nal_buf, *ps_prefix_nal_buf;
 1032|       |
 1033|   891k|    ps_nal_prms = &ps_nal_parse_ctxt->s_nal_prms;
 1034|   891k|    ps_nal_prms = &ps_nal_parse_ctxt->s_nal_prms;
 1035|   891k|    ps_nal_buf = &ps_nal_parse_ctxt->s_nal_buf;
 1036|   891k|    ps_prefix_nal_buf = &ps_nal_parse_ctxt->s_prefix_nal_buf;
 1037|       |
 1038|       |    /* If prefix NAL unit then          */
 1039|       |    /* - calculate the SODB length      */
 1040|   891k|    if(PREFIX_UNIT_NAL == ps_nal_prms->i4_nal_unit_type)
  ------------------
  |  |   64|   891k|#define PREFIX_UNIT_NAL 14
  ------------------
  |  Branch (1040:8): [True: 2.28k, False: 888k]
  ------------------
 1041|  2.28k|    {
 1042|       |        /* Since we consume the zeroes in start code also */
 1043|       |        /* size has to reduced                            */
 1044|  2.28k|        if(NAL_END == ps_nal_parse_ctxt->i4_find_nal_state)
  ------------------
  |  Branch (1044:12): [True: 2.28k, False: 0]
  ------------------
 1045|  2.28k|        {
 1046|  2.28k|            ps_prefix_nal_buf->i4_buf_size -= 2;
 1047|  2.28k|        }
 1048|       |
 1049|  2.28k|        ps_prefix_nal_buf->u4_max_bits =
 1050|  2.28k|            isvcd_nal_rbsp_to_sodb(ps_prefix_nal_buf->pu1_buf, ps_prefix_nal_buf->i4_buf_size, 0);
 1051|  2.28k|        memcpy(&ps_nal_parse_ctxt->s_prefix_nal_prms, &ps_nal_parse_ctxt->s_nal_prms,
 1052|  2.28k|               sizeof(nal_prms_t));
 1053|  2.28k|        return;
 1054|  2.28k|    }
 1055|       |
 1056|   888k|    if(ANNEX_B == ps_nal_parse_ctxt->i4_input_bitstream_mode)
  ------------------
  |  |   64|   888k|#define ANNEX_B 0     /*!< Annex B stream*/
  ------------------
  |  Branch (1056:8): [True: 888k, False: 0]
  ------------------
 1057|   888k|    {
 1058|       |        /* Since we consume the zeroes in start code also */
 1059|       |        /* size has to reduced                            */
 1060|   888k|        if(NAL_END == ps_nal_parse_ctxt->i4_find_nal_state)
  ------------------
  |  Branch (1060:12): [True: 888k, False: 0]
  ------------------
 1061|   888k|        {
 1062|   888k|            ps_nal_buf->i4_buf_size -= 2;
 1063|   888k|        }
 1064|   888k|    }
 1065|       |
 1066|   888k|    if(VCL_NAL == ps_nal_prms->i4_derived_nal_type)
  ------------------
  |  Branch (1066:8): [True: 256k, False: 632k]
  ------------------
 1067|   256k|    {
 1068|   256k|        dqid_node_t *ps_dqid_node;
 1069|   256k|        vcl_node_t *ps_node;
 1070|   256k|        WORD32 i4_status;
 1071|   256k|        dec_pic_params_t *ps_pps;
 1072|   256k|        dec_seq_params_t *ps_sps;
 1073|   256k|        vcl_buf_hdr_t *ps_vcl_hdr;
 1074|   256k|        vcl_buf_hdr_t *ps_prev_vcl_hdr;
 1075|   256k|        WORD32 i4_slice_offset;
 1076|       |
 1077|   256k|        ps_sps = ps_nal_parse_ctxt->pv_seq_prms;
 1078|   256k|        ps_sps += ps_nal_prms->u1_sps_id;
 1079|   256k|        ps_pps = ps_nal_parse_ctxt->pv_pic_prms;
 1080|   256k|        ps_pps += ps_nal_prms->u1_pps_id;
 1081|       |
 1082|       |        /* Get the VCL NAL node */
 1083|   256k|        i4_status = isvcd_get_dqid_node(&ps_nal_parse_ctxt->s_dqid_ctxt,
 1084|   256k|                                        (UWORD8) ps_nal_parse_ctxt->i4_prev_dq_id, &ps_dqid_node);
 1085|       |
 1086|   256k|        ps_node = ps_dqid_node->ps_vcl_node;
 1087|       |
 1088|   256k|        if(NULL == ps_node)
  ------------------
  |  Branch (1088:12): [True: 0, False: 256k]
  ------------------
 1089|      0|        {
 1090|       |            /* no active node has been acquired */
 1091|      0|            return;
 1092|      0|        }
 1093|       |
 1094|       |        /*-------------------------------------------------------------------*/
 1095|       |        /* The DQID list updation should happen only once in a               */
 1096|       |        /* layer. Hence a flag used to determine whether the                 */
 1097|       |        /* layer is already initialized or not.                              */
 1098|       |        /*-------------------------------------------------------------------*/
 1099|   256k|        if(SVCD_FALSE == ps_dqid_node->u1_valid_flag)
  ------------------
  |  |   45|   256k|#define SVCD_FALSE 0
  ------------------
  |  Branch (1099:12): [True: 200k, False: 55.6k]
  ------------------
 1100|   200k|        {
 1101|       |            /* Update the DQID node */
 1102|   200k|            ps_dqid_node->u1_valid_flag = SVCD_TRUE;
  ------------------
  |  |   46|   200k|#define SVCD_TRUE 1
  ------------------
 1103|   200k|            ps_dqid_node->u1_dqid = (ps_nal_prms->i4_dependency_id << 4);
 1104|   200k|            ps_dqid_node->u1_dqid += ps_nal_prms->i4_quality_id;
 1105|   200k|            ps_dqid_node->i4_poc_lsb = ps_nal_prms->i4_poc_lsb;
 1106|   200k|            ps_dqid_node->i4_delta_poc_bot = ps_nal_prms->i4_delta_poc_bot;
 1107|   200k|            ps_dqid_node->ai4_delta_poc[0] = ps_nal_prms->ai4_delta_poc[0];
 1108|   200k|            ps_dqid_node->ai4_delta_poc[1] = ps_nal_prms->ai4_delta_poc[1];
 1109|       |
 1110|       |            /* Update the VCL node */
 1111|   200k|            ps_node->i4_quality_id = ps_nal_prms->i4_quality_id;
 1112|   200k|            ps_node->i4_dependency_id = ps_nal_prms->i4_dependency_id;
 1113|   200k|            ps_node->i4_temporal_id = ps_nal_prms->i4_temporal_id;
 1114|   200k|            ps_node->i4_priority_id = ps_nal_prms->i4_priority_id;
 1115|   200k|            ps_node->i4_idr_pic_flag = ps_nal_prms->i4_idr_pic_flag;
 1116|   200k|            ps_node->i4_nal_ref_idc = ps_nal_prms->i4_nal_ref_idc;
 1117|   200k|            ps_node->i4_nal_unit_type = ps_nal_prms->i4_nal_unit_type;
 1118|   200k|            ps_node->i4_use_ref_base = ps_nal_prms->i4_use_ref_base_pic_flag;
 1119|   200k|            ps_node->i4_nal_ref_idc = ps_nal_prms->i4_nal_ref_idc;
 1120|   200k|            ps_node->u1_sps_id = ps_nal_prms->u1_sps_id;
 1121|   200k|            ps_node->u1_pps_id = ps_nal_prms->u1_pps_id;
 1122|   200k|            ps_node->u2_frm_num = ps_nal_prms->u2_frm_num;
 1123|   200k|            ps_node->i4_idr_pic_num = ps_nal_prms->i4_idr_pic_num;
 1124|   200k|            ps_node->i4_num_slices = 0;
 1125|   200k|            ps_node->u1_acc_no_int_pred = 1;
 1126|   200k|            if(0 == ps_sps->u1_pic_order_cnt_type)
  ------------------
  |  Branch (1126:16): [True: 162k, False: 37.9k]
  ------------------
 1127|   162k|            {
 1128|   162k|                ps_node->i4_poc_syntax = ps_nal_prms->i4_poc_lsb;
 1129|   162k|            }
 1130|  37.9k|            else
 1131|  37.9k|            {
 1132|  37.9k|                ps_node->i4_poc_syntax = ps_nal_prms->ai4_delta_poc[0];
 1133|  37.9k|            }
 1134|       |
 1135|       |            /* Insert the node into DQID list */
 1136|   200k|            i4_status = isvcd_insert_vcl_node(ps_vcl_nal, ps_node);
 1137|   200k|            if(OK != i4_status)
  ------------------
  |  |  114|   200k|#define OK        0
  ------------------
  |  Branch (1137:16): [True: 0, False: 200k]
  ------------------
 1138|      0|            {
 1139|      0|                return;
 1140|      0|            }
 1141|       |
 1142|       |            /* Reset the previous field */
 1143|   200k|            ps_nal_parse_ctxt->ps_prev_vcl_buf = NULL;
 1144|   200k|            ps_node->ps_first_vcl_nal = NULL;
 1145|   200k|        }
 1146|       |
 1147|       |        /* Update accumulated no inter layer prediction */
 1148|   256k|        ps_node->u1_acc_no_int_pred &= (UWORD8) ps_nal_prms->i4_no_int_lyr_pred;
 1149|       |
 1150|       |        /****************** Fill VCL BUF header ************/
 1151|       |
 1152|       |        /* If prefix NAL unit is present then update  */
 1153|       |        /* the following                              */
 1154|       |        /* - Start of buffer header will be present in*/
 1155|       |        /*   before the start of prefix NAL unit's SODB*/
 1156|       |        /*   data.                                    */
 1157|       |        /*   Note: If memeory left for buffer header  */
 1158|       |        /*   of the prefix NAL unit will have junk    */
 1159|       |        /*   values                                   */
 1160|       |
 1161|   256k|        if(NULL == ps_nal_buf->pu1_buf)
  ------------------
  |  Branch (1161:12): [True: 0, False: 256k]
  ------------------
 1162|      0|        {
 1163|       |            /* no nal needs to be added into the list hence return */
 1164|      0|            return;
 1165|      0|        }
 1166|   256k|        else
 1167|   256k|        {
 1168|   256k|            ps_vcl_hdr = (vcl_buf_hdr_t *) (ps_nal_buf->pu1_buf - GET_NAL_BUF_INC(VCL_NAL));
 1169|   256k|        }
 1170|       |
 1171|   256k|        i4_slice_offset = 0;
 1172|   256k|        if(SVCD_TRUE == ps_prefix_nal_buf->i4_valid_flag)
  ------------------
  |  |   46|   256k|#define SVCD_TRUE 1
  ------------------
  |  Branch (1172:12): [True: 767, False: 255k]
  ------------------
 1173|    767|        {
 1174|    767|            ps_vcl_hdr = (vcl_buf_hdr_t *) (ps_prefix_nal_buf->pu1_buf - GET_NAL_BUF_INC(VCL_NAL));
 1175|    767|            i4_slice_offset = ps_nal_buf->pu1_buf - ps_prefix_nal_buf->pu1_buf;
 1176|    767|        }
 1177|       |
 1178|       |        /* Update the next field of the previous nal  */
 1179|       |        /* unit or if it is the first NAL then update */
 1180|       |        /* VCL node information                       */
 1181|   256k|        ps_prev_vcl_hdr = ps_nal_parse_ctxt->ps_prev_vcl_buf;
 1182|   256k|        if(NULL != ps_prev_vcl_hdr)
  ------------------
  |  Branch (1182:12): [True: 55.6k, False: 200k]
  ------------------
 1183|  55.6k|        {
 1184|  55.6k|            ps_prev_vcl_hdr->ps_next = ps_vcl_hdr;
 1185|  55.6k|        }
 1186|   200k|        else
 1187|   200k|        {
 1188|   200k|            ps_node->ps_first_vcl_nal = ps_vcl_hdr;
 1189|   200k|        }
 1190|       |
 1191|       |        /* Fill the VCL buffer header */
 1192|   256k|        ps_vcl_hdr->ps_next = NULL;
 1193|   256k|        ps_vcl_hdr->i4_no_int_lyr_pred = ps_nal_prms->i4_no_int_lyr_pred;
 1194|   256k|        ps_vcl_hdr->i4_first_mb_addr = ps_nal_prms->u4_first_mb_addr;
 1195|   256k|        ps_vcl_hdr->u4_prefix_nal_bits = ps_prefix_nal_buf->u4_max_bits;
 1196|   256k|        ps_vcl_hdr->i4_slice_offset = 0;
 1197|   256k|        ps_vcl_hdr->i4_buf_offset = GET_NAL_BUF_INC(VCL_NAL);
 1198|   256k|        ps_vcl_hdr->i4_slice_offset = i4_slice_offset;
 1199|       |
 1200|       |        /* Determine max num bits */
 1201|   256k|        ps_nal_buf->u4_max_bits = isvcd_nal_rbsp_to_sodb(
 1202|   256k|            ps_nal_buf->pu1_buf, ps_nal_buf->i4_buf_size, ps_pps->u1_entropy_coding_mode);
 1203|   256k|        ps_vcl_hdr->u4_max_bits = ps_nal_buf->u4_max_bits;
 1204|       |
 1205|       |        /* Updates */
 1206|   256k|        ps_nal_parse_ctxt->ps_prev_vcl_buf = ps_vcl_hdr;
 1207|   256k|        ps_node->i4_num_slices += 1;
 1208|   256k|    }
 1209|       |    /*-----------------------------------------------------------------------*/
 1210|       |    /* If start of NAL and if its a NON VCL NAL then update the              */
 1211|       |    /* start address of the NON VCL NAL                                      */
 1212|       |    /*-----------------------------------------------------------------------*/
 1213|   632k|    else
 1214|   632k|    {
 1215|   632k|        non_vcl_buf_hdr_t *ps_non_vcl_buf_hdr;
 1216|   632k|        non_vcl_buf_hdr_t *ps_prev_non_vcl_buf_hdr;
 1217|       |
 1218|   632k|        ps_non_vcl_buf_hdr =
 1219|   632k|            (non_vcl_buf_hdr_t *) (ps_nal_buf->pu1_buf - GET_NAL_BUF_INC(NON_VCL_NAL));
 1220|       |
 1221|       |        /* Update NON VCL structure */
 1222|   632k|        ps_non_vcl_buf_hdr->i4_nal_unit_type = ps_nal_prms->i4_nal_unit_type;
 1223|   632k|        ps_non_vcl_buf_hdr->ps_next = NULL;
 1224|   632k|        ps_non_vcl_buf_hdr->i4_buf_offset = GET_NAL_BUF_INC(NON_VCL_NAL);
 1225|   632k|        ps_non_vcl_buf_hdr->i4_buf_size = ps_nal_buf->i4_buf_size;
 1226|       |
 1227|       |        /* Update the next field and first non vcl fields of */
 1228|       |        /* non vcl buffer header structure and non vcl       */
 1229|       |        /* structure respectively                            */
 1230|   632k|        ps_prev_non_vcl_buf_hdr = ps_nal_parse_ctxt->ps_prev_non_vcl_buf;
 1231|   632k|        if(NULL != ps_prev_non_vcl_buf_hdr)
  ------------------
  |  Branch (1231:12): [True: 546k, False: 86.0k]
  ------------------
 1232|   546k|        {
 1233|   546k|            ps_prev_non_vcl_buf_hdr->ps_next = ps_non_vcl_buf_hdr;
 1234|   546k|        }
 1235|  86.0k|        else
 1236|  86.0k|        {
 1237|  86.0k|            ps_non_vcl_nal->ps_first_non_vcl_nal = ps_non_vcl_buf_hdr;
 1238|  86.0k|        }
 1239|       |
 1240|       |        /* Updates */
 1241|   632k|        ps_nal_parse_ctxt->i4_num_non_vcl_nals += 1;
 1242|   632k|        ps_non_vcl_nal->i4_num_non_vcl_nals = ps_nal_parse_ctxt->i4_num_non_vcl_nals;
 1243|   632k|        ps_nal_parse_ctxt->ps_prev_non_vcl_buf = ps_non_vcl_buf_hdr;
 1244|   632k|    }
 1245|   888k|}
isvcd_refine_dqid_list:
 1340|   156k|{
 1341|   156k|    vcl_node_t *ps_node;
 1342|   156k|    target_lyr_attr_t *ps_int_attr;
 1343|   156k|    dqid_ctxt_t *ps_dqid_ctxt;
 1344|   156k|    UWORD8 u1_dep_id;
 1345|   156k|    WORD32 i4_status;
 1346|   156k|    WORD32 i4_dep_id;
 1347|       |
 1348|   156k|    ps_int_attr = &ps_nal_parse_ctxt->s_int_attr;
 1349|   156k|    ps_dqid_ctxt = &ps_nal_parse_ctxt->s_dqid_ctxt;
 1350|   156k|    i4_dep_id = -1;
 1351|       |
 1352|   384k|    for(u1_dep_id = 0; u1_dep_id <= ps_int_attr->i4_dependency_id; u1_dep_id++)
  ------------------
  |  Branch (1352:24): [True: 228k, False: 156k]
  ------------------
 1353|   228k|    {
 1354|   228k|        dqid_node_t *ps_dqid_node;
 1355|       |
 1356|       |        /* Get a DQID node */
 1357|   228k|        i4_status = isvcd_get_dqid_node(ps_dqid_ctxt, (UWORD8) (u1_dep_id << 4), &ps_dqid_node);
 1358|   228k|        if(OK != i4_status)
  ------------------
  |  |  114|   228k|#define OK        0
  ------------------
  |  Branch (1358:12): [True: 0, False: 228k]
  ------------------
 1359|      0|        {
 1360|      0|            return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 1361|      0|        }
 1362|       |
 1363|       |        /* If node does not exist already then insert a dummy node */
 1364|   228k|        if(SVCD_FALSE == ps_dqid_node->u1_valid_flag)
  ------------------
  |  |   45|   228k|#define SVCD_FALSE 0
  ------------------
  |  Branch (1364:12): [True: 31.6k, False: 196k]
  ------------------
 1365|  31.6k|        {
 1366|  31.6k|            if(1 == ps_nal_parse_ctxt->i4_idr_pic_err_flag)
  ------------------
  |  Branch (1366:16): [True: 301, False: 31.3k]
  ------------------
 1367|    301|            {
 1368|    301|                ps_int_attr->i4_dependency_id = i4_dep_id;
 1369|    301|                ps_int_attr->i4_quality_id = MAX_QUALITY_ID;
  ------------------
  |  |  102|    301|#define MAX_QUALITY_ID 0
  ------------------
 1370|       |
 1371|       |                /* remove all the nodes from dependency list */
 1372|       |                /* which are at higher dependency than the   */
 1373|       |                /* value set in init attributes              */
 1374|    445|                while(NULL != ps_vcl_nal->ps_top_node)
  ------------------
  |  Branch (1374:23): [True: 231, False: 214]
  ------------------
 1375|    231|                {
 1376|       |                    /* if higher dependency */
 1377|    231|                    if(ps_vcl_nal->ps_top_node->i4_dependency_id > i4_dep_id)
  ------------------
  |  Branch (1377:24): [True: 144, False: 87]
  ------------------
 1378|    144|                    {
 1379|    144|                        ps_vcl_nal->ps_top_node = ps_vcl_nal->ps_top_node->ps_bot_node;
 1380|    144|                    }
 1381|     87|                    else
 1382|     87|                    {
 1383|     87|                        break;
 1384|     87|                    }
 1385|    231|                }
 1386|       |
 1387|       |                /* if no node exists in the dependency list */
 1388|    301|                if(NULL == ps_vcl_nal->ps_top_node)
  ------------------
  |  Branch (1388:20): [True: 214, False: 87]
  ------------------
 1389|    214|                {
 1390|    214|                    ps_vcl_nal->ps_bot_node = NULL;
 1391|    214|                }
 1392|     87|                else if(ps_vcl_nal->ps_top_node == ps_vcl_nal->ps_bot_node)
  ------------------
  |  Branch (1392:25): [True: 87, False: 0]
  ------------------
 1393|     87|                {
 1394|       |                    /* if a single node exists */
 1395|     87|                    ps_vcl_nal->ps_top_node->ps_bot_node = NULL;
 1396|     87|                    ps_vcl_nal->ps_bot_node->ps_top_node = NULL;
 1397|     87|                }
 1398|       |
 1399|    301|                return (NOT_OK);
  ------------------
  |  |  116|    301|#define NOT_OK    -1
  ------------------
 1400|    301|            }
 1401|  31.3k|            else
 1402|  31.3k|            {
 1403|  31.3k|                ps_dqid_node->u1_valid_flag = SVCD_TRUE;
  ------------------
  |  |   46|  31.3k|#define SVCD_TRUE 1
  ------------------
 1404|  31.3k|                ps_dqid_node->u1_dqid = (u1_dep_id << 4);
 1405|       |
 1406|       |                /* Fill VCL node information */
 1407|  31.3k|                ps_node = ps_dqid_node->ps_vcl_node;
 1408|  31.3k|                ps_node->i4_dependency_id = u1_dep_id;
 1409|  31.3k|                ps_node->i4_quality_id = 0;
 1410|  31.3k|                ps_node->ps_first_vcl_nal = NULL;
 1411|  31.3k|            }
 1412|       |
 1413|       |            /* Insert node into DQID list */
 1414|  31.3k|            i4_status = isvcd_insert_vcl_node(ps_vcl_nal, ps_node);
 1415|  31.3k|            if(OK != i4_status)
  ------------------
  |  |  114|  31.3k|#define OK        0
  ------------------
  |  Branch (1415:16): [True: 0, False: 31.3k]
  ------------------
 1416|      0|            {
 1417|      0|                return (NOT_OK);
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 1418|      0|            }
 1419|  31.3k|        }
 1420|       |
 1421|   228k|        i4_dep_id++;
 1422|   228k|    } /* End of loop over all the dependency id */
 1423|   156k|    return (OK);
  ------------------
  |  |  114|   156k|#define OK        0
  ------------------
 1424|   156k|}
isvcd_nal_parse_set_target_attr:
 1457|  22.5k|{
 1458|  22.5k|    nal_parse_ctxt_t *ps_nal_parse_ctxt;
 1459|  22.5k|    target_lyr_attr_t *ps_app_attr;
 1460|       |
 1461|  22.5k|    if((i4_target_quality_id > MAX_QUALITY_ID) || (i4_target_dependency_id > MAX_DEPENDENCY_ID))
  ------------------
  |  |  102|  22.5k|#define MAX_QUALITY_ID 0
  ------------------
                  if((i4_target_quality_id > MAX_QUALITY_ID) || (i4_target_dependency_id > MAX_DEPENDENCY_ID))
  ------------------
  |  |  103|  22.5k|#define MAX_DEPENDENCY_ID 4
  ------------------
  |  Branch (1461:8): [True: 0, False: 22.5k]
  |  Branch (1461:51): [True: 0, False: 22.5k]
  ------------------
 1462|      0|    {
 1463|      0|        return IV_FAIL;
 1464|      0|    }
 1465|       |
 1466|  22.5k|    ps_nal_parse_ctxt = (nal_parse_ctxt_t *) pv_nal_parse_ctxt;
 1467|  22.5k|    ps_app_attr = &ps_nal_parse_ctxt->s_app_attr;
 1468|       |
 1469|       |    /*-----------------------------------------------------------------------*/
 1470|       |    /*! Register the target information into context structure               */
 1471|       |    /*-----------------------------------------------------------------------*/
 1472|  22.5k|    ps_app_attr->i4_quality_id = i4_target_quality_id;
 1473|  22.5k|    ps_app_attr->i4_dependency_id = i4_target_dependency_id;
 1474|  22.5k|    ps_app_attr->i4_temporal_id = i4_target_temporal_id;
 1475|  22.5k|    ps_app_attr->i4_priority_id = i4_target_priority_id;
 1476|  22.5k|    return IV_SUCCESS;
 1477|  22.5k|}
isvcd_nal_parse_reset_ctxt:
 1510|  22.5k|{
 1511|  22.5k|    nal_parse_ctxt_t *ps_nal_parse_ctxt = (nal_parse_ctxt_t *) pv_nal_parse_ctxt;
 1512|  22.5k|    UNUSED(i4_input_mode);
  ------------------
  |  |   45|  22.5k|#define UNUSED(x) ((void)(x))
  ------------------
 1513|       |
 1514|       |    /*-----------------------------------------------------------------------*/
 1515|       |    /*! Set the input bitstream mode of context structure                    */
 1516|       |    /*-----------------------------------------------------------------------*/
 1517|  22.5k|    switch(i4_input_bitstream_mode)
 1518|  22.5k|    {
 1519|  22.5k|        case ANNEX_B:
  ------------------
  |  |   64|  22.5k|#define ANNEX_B 0     /*!< Annex B stream*/
  ------------------
  |  Branch (1519:9): [True: 22.5k, False: 0]
  ------------------
 1520|  22.5k|        case NON_ANNEX_B:
  ------------------
  |  |   65|  22.5k|#define NON_ANNEX_B 1 /*!< Non Annex B RFC stream */
  ------------------
  |  Branch (1520:9): [True: 0, False: 22.5k]
  ------------------
 1521|  22.5k|            break;
 1522|      0|        default:
  ------------------
  |  Branch (1522:9): [True: 0, False: 22.5k]
  ------------------
 1523|      0|            break;
 1524|  22.5k|    }
 1525|       |
 1526|  22.5k|    ps_nal_parse_ctxt->i4_input_bitstream_mode = i4_input_bitstream_mode;
 1527|       |
 1528|       |    /*-----------------------------------------------------------------------*/
 1529|       |    /*! Perform the picture level initialization                             */
 1530|       |    /*-----------------------------------------------------------------------*/
 1531|  22.5k|    isvcd_pic_reset_ctxt(pv_nal_parse_ctxt);
 1532|       |
 1533|       |    /* Reset the prefix nal unit buffer structure */
 1534|  22.5k|    isvcd_nal_buf_reset(&ps_nal_parse_ctxt->s_prefix_nal_buf);
 1535|       |
 1536|       |    /*-----------------------------------------------------------------------*/
 1537|       |    /*! Reset other varaibles                                                */
 1538|       |    /*-----------------------------------------------------------------------*/
 1539|  22.5k|    ps_nal_parse_ctxt->i4_dec_frst_sc_flag = SVCD_TRUE;
  ------------------
  |  |   46|  22.5k|#define SVCD_TRUE 1
  ------------------
 1540|  22.5k|    ps_nal_parse_ctxt->i4_eos_flag = SVCD_FALSE;
  ------------------
  |  |   45|  22.5k|#define SVCD_FALSE 0
  ------------------
 1541|  22.5k|    ps_nal_parse_ctxt->u1_pic_boundary_aud_flag = 0;
 1542|  22.5k|    ps_nal_parse_ctxt->u4_bytes_left = 0;
 1543|       |
 1544|       |    /* Reset target layer attributes */
 1545|  22.5k|    {
 1546|  22.5k|        target_lyr_attr_t *ps_app_attr;
 1547|  22.5k|        target_lyr_attr_t *ps_int_attr;
 1548|       |
 1549|  22.5k|        ps_app_attr = &ps_nal_parse_ctxt->s_app_attr;
 1550|  22.5k|        ps_int_attr = &ps_nal_parse_ctxt->s_int_attr;
 1551|       |
 1552|  22.5k|        ps_app_attr->i4_dependency_id = MAX_DEPENDENCY_ID;
  ------------------
  |  |  103|  22.5k|#define MAX_DEPENDENCY_ID 4
  ------------------
 1553|  22.5k|        ps_app_attr->i4_quality_id = MAX_QUALITY_ID;
  ------------------
  |  |  102|  22.5k|#define MAX_QUALITY_ID 0
  ------------------
 1554|  22.5k|        ps_app_attr->i4_temporal_id = MAX_TEMPORAL_ID;
  ------------------
  |  |  104|  22.5k|#define MAX_TEMPORAL_ID 7
  ------------------
 1555|  22.5k|        ps_app_attr->i4_priority_id = MAX_PRIORITY_ID;
  ------------------
  |  |  105|  22.5k|#define MAX_PRIORITY_ID 63
  ------------------
 1556|       |
 1557|  22.5k|        ps_int_attr->i4_dependency_id = -1;
 1558|  22.5k|        ps_int_attr->i4_quality_id = MAX_QUALITY_ID;
  ------------------
  |  |  102|  22.5k|#define MAX_QUALITY_ID 0
  ------------------
 1559|  22.5k|        ps_int_attr->i4_temporal_id = 0;
 1560|  22.5k|        ps_int_attr->i4_priority_id = MAX_PRIORITY_ID;
  ------------------
  |  |  105|  22.5k|#define MAX_PRIORITY_ID 63
  ------------------
 1561|  22.5k|    }
 1562|  22.5k|}
isvcd_nal_parse_partial_signal_eos:
 1593|     18|{
 1594|     18|    nal_parse_ctxt_t *ps_nal_parse_ctxt;
 1595|     18|    vcl_nal_t *ps_vcl_nal;
 1596|       |
 1597|     18|    ps_nal_parse_ctxt = (nal_parse_ctxt_t *) pv_nal_parse_ctxt;
 1598|     18|    ps_vcl_nal = (vcl_nal_t *) pv_out_vcl_nal;
 1599|       |
 1600|       |    /* for RFC mode */
 1601|     18|    if(NON_ANNEX_B == ps_nal_parse_ctxt->i4_input_bitstream_mode)
  ------------------
  |  |   65|     18|#define NON_ANNEX_B 1 /*!< Non Annex B RFC stream */
  ------------------
  |  Branch (1601:8): [True: 0, False: 18]
  ------------------
 1602|      0|    {
 1603|       |        /* Reset the end of stream flag so that in    */
 1604|      0|        ps_nal_parse_ctxt->i4_eos_flag = SVCD_TRUE;
  ------------------
  |  |   46|      0|#define SVCD_TRUE 1
  ------------------
 1605|      0|    }
 1606|       |
 1607|     18|    if(1 == ps_nal_parse_ctxt->u1_pic_boundary_aud_flag)
  ------------------
  |  Branch (1607:8): [True: 14, False: 4]
  ------------------
 1608|     14|    {
 1609|     14|        ps_nal_parse_ctxt->i4_eos_flag = SVCD_TRUE;
  ------------------
  |  |   46|     14|#define SVCD_TRUE 1
  ------------------
 1610|     14|    }
 1611|       |    /* Update VCL node if it is first call in the */
 1612|       |    /* flush mode                                 */
 1613|     18|    if(SVCD_FALSE == ps_nal_parse_ctxt->i4_eos_flag)
  ------------------
  |  |   45|     18|#define SVCD_FALSE 0
  ------------------
  |  Branch (1613:8): [True: 4, False: 14]
  ------------------
 1614|      4|    {
 1615|      4|        WORD32 i4_status;
 1616|       |
 1617|       |        /* Update the unfinished NAL into VCL node if */
 1618|       |        /* all the following conditions are true      */
 1619|       |        /* 1. We have not found the start code and    */
 1620|       |        /*    NAL boundary is not detected yet        */
 1621|       |        /* 2. NAL is not discarded                    */
 1622|      4|        if((FIND_NAL_END == ps_nal_parse_ctxt->i4_find_nal_state) &&
  ------------------
  |  Branch (1622:12): [True: 0, False: 4]
  ------------------
 1623|      0|           (SVCD_FALSE == ps_nal_parse_ctxt->i4_discard_nal_flag))
  ------------------
  |  |   45|      0|#define SVCD_FALSE 0
  ------------------
  |  Branch (1623:12): [True: 0, False: 0]
  ------------------
 1624|      0|        {
 1625|      0|            isvcd_update_nal_ctxt(ps_nal_parse_ctxt, pv_out_vcl_nal, pv_out_non_vcl_nal);
 1626|      0|        }
 1627|       |
 1628|      4|        ps_nal_parse_ctxt->i4_idr_pic_err_flag = 0;
 1629|       |        /* Refine based on the no inter layer pred flag*/
 1630|      4|        i4_status = isvcd_refine_dqid_list(ps_vcl_nal, ps_nal_parse_ctxt);
 1631|       |
 1632|      4|        if(!(OK == i4_status))
  ------------------
  |  |  114|      4|#define OK        0
  ------------------
  |  Branch (1632:12): [True: 0, False: 4]
  ------------------
 1633|      0|        {
 1634|      0|            return i4_status;
 1635|      0|        }
 1636|      4|        UNUSED(i4_status);
  ------------------
  |  |   45|      4|#define UNUSED(x) ((void)(x))
  ------------------
 1637|       |
 1638|       |        /* Reset the context structure variables */
 1639|      4|        isvcd_nal_reset_ctxt(ps_nal_parse_ctxt);
 1640|       |
 1641|       |        /* Reset the end of stream flag so that in    */
 1642|       |        /* the next flush call the above steps need   */
 1643|       |        /* not be performed                           */
 1644|      4|        ps_nal_parse_ctxt->i4_eos_flag = SVCD_TRUE;
  ------------------
  |  |   46|      4|#define SVCD_TRUE 1
  ------------------
 1645|       |
 1646|      4|        return (PIC_BOUNDARY_TRUE);
 1647|      4|    }
 1648|     14|    else
 1649|     14|    {
 1650|     14|        return (FLUSH_DECODED_PICTURE);
 1651|     14|    }
 1652|     18|}
isvcd_nal_parse_pic_bound_proc:
 1677|   156k|{
 1678|   156k|    WORD32 i4_status;
 1679|       |
 1680|   156k|    i4_status = isvcd_refine_dqid_list(ps_vcl_nal, ps_nal_parse_ctxt);
 1681|       |
 1682|       |    /* in case of IDR pictures if the node     */
 1683|       |    /* which has to be added into dependency   */
 1684|       |    /* list is not valied then the layer below */
 1685|       |    /* that node is set as target layer        */
 1686|       |
 1687|   156k|    if(NOT_OK == i4_status)
  ------------------
  |  |  116|   156k|#define NOT_OK    -1
  ------------------
  |  Branch (1687:8): [True: 301, False: 156k]
  ------------------
 1688|    301|    {
 1689|    301|        ps_nal_parse_ctxt->i4_discard_nal_flag = SVCD_TRUE;
  ------------------
  |  |   46|    301|#define SVCD_TRUE 1
  ------------------
 1690|    301|        ps_vcl_nal->i1_nal_ref_id_next = -1;
 1691|    301|    }
 1692|   156k|    else
 1693|   156k|    {
 1694|       |        /* update the next access unit params */
 1695|       |        /* will be used by lower level decoder*/
 1696|       |        /* for concealment of frame number    */
 1697|       |        /* applicable for single layer cases  */
 1698|   156k|        ps_vcl_nal->i1_nal_ref_id_next = ps_nal_prms->i4_nal_ref_idc;
 1699|       |
 1700|   156k|        ps_vcl_nal->u2_frm_num_next = ps_nal_prms->u2_frm_num;
 1701|   156k|    }
 1702|       |
 1703|       |    /* -------- reset few variables in context structure ----*/
 1704|   156k|    isvcd_pic_reset_ctxt(ps_nal_parse_ctxt);
 1705|   156k|}
isvcd_nal_parse_vcl_nal_partial:
 1741|   161k|{
 1742|       |    /*! - Search for the NAL boundary
 1743|       |        - If NAL boundary is not found and bytes consumed is lesser than
 1744|       |          minimum buffer size then break out of the loop
 1745|       |        - if it is start of NAL then read the NAL header
 1746|       |        - If it is a VCL NAL then invoke picture boundary detection logic and
 1747|       |          picture boundary is detected then break out of the loop without
 1748|       |          updating the bytes consumed variable
 1749|       |        - NAL discard logic determines whther the current NAL has to be
 1750|       |          discarded or not
 1751|       |        - If NAL is not discarded then populate the vcl or non vcl output
 1752|       |          structures
 1753|       |    */
 1754|   161k|    nal_parse_ctxt_t *ps_nal_parse_ctxt;
 1755|   161k|    vcl_nal_t *ps_vcl_nal;
 1756|   161k|    non_vcl_nal_t *ps_non_vcl_nal;
 1757|   161k|    nal_unit_t *ps_nal_unit;
 1758|   161k|    WORD32 i4_nal_start_flag, i4_cur_pos, i4_status;
 1759|   161k|    WORD32 i4_nal_header_len, i4_more_data_flag;
 1760|   161k|    UWORD32 u4_bytes_consumed_temp = 0;
 1761|   161k|    UWORD8 **ppu1_out_buf;
 1762|   161k|    nal_prms_t *ps_nal_prms;
 1763|   161k|    WORD32 i4_pic_bound_status;
 1764|       |
 1765|   161k|    ps_nal_parse_ctxt = (nal_parse_ctxt_t *) pv_nal_parse_ctxt;
 1766|   161k|    ps_vcl_nal = (vcl_nal_t *) pv_out_vcl_nal;
 1767|   161k|    ps_non_vcl_nal = (non_vcl_nal_t *) pv_out_non_vcl_nal;
 1768|   161k|    ps_nal_unit = (nal_unit_t *) ps_nal_parse_ctxt->pv_nal_unit;
 1769|   161k|    ps_nal_prms = &ps_nal_parse_ctxt->s_nal_prms;
 1770|       |
 1771|       |    /* Initialization */
 1772|   161k|    i4_cur_pos = 0;
 1773|   161k|    *pu4_bytes_consumed = 0;
 1774|   161k|    i4_nal_header_len = 0;
 1775|   161k|    i4_nal_start_flag = SVCD_FALSE;
  ------------------
  |  |   45|   161k|#define SVCD_FALSE 0
  ------------------
 1776|   161k|    i4_more_data_flag = SVCD_TRUE;
  ------------------
  |  |   46|   161k|#define SVCD_TRUE 1
  ------------------
 1777|   161k|    i4_pic_bound_status = PIC_BOUNDARY_FALSE;
 1778|       |
 1779|   161k|    ps_non_vcl_nal->i4_num_non_vcl_nals = ps_nal_parse_ctxt->i4_num_non_vcl_nals;
 1780|       |
 1781|       |    /* Since we do not perform the picture boundary detection */
 1782|       |    /* on the prefix NAL unit, the current picture's prefix   */
 1783|       |    /* NAL unit will be at the bottom of the buffer. Hence    */
 1784|       |    /* it should be copied to top of the buffer               */
 1785|   161k|    if(SVCD_TRUE == ps_nal_parse_ctxt->i4_is_frst_vcl_nal_in_au)
  ------------------
  |  |   46|   161k|#define SVCD_TRUE 1
  ------------------
  |  Branch (1785:8): [True: 161k, False: 115]
  ------------------
 1786|   161k|    {
 1787|   161k|        nal_buf_t *ps_prefix_nal_buf;
 1788|       |
 1789|   161k|        ps_prefix_nal_buf = &ps_nal_parse_ctxt->s_prefix_nal_buf;
 1790|   161k|        if(SVCD_TRUE == ps_prefix_nal_buf->i4_valid_flag)
  ------------------
  |  |   46|   161k|#define SVCD_TRUE 1
  ------------------
  |  Branch (1790:12): [True: 543, False: 161k]
  ------------------
 1791|    543|        {
 1792|    543|            WORD32 i4_buf_size;
 1793|    543|            UWORD8 *pu1_vcl_nal;
 1794|       |
 1795|    543|            if(ps_prefix_nal_buf->i4_buf_size > 0)
  ------------------
  |  Branch (1795:16): [True: 92, False: 451]
  ------------------
 1796|     92|            {
 1797|     92|                i4_buf_size = ps_prefix_nal_buf->i4_buf_size;
 1798|     92|                i4_buf_size = UP_ALIGN_8(i4_buf_size + BUFFER_ALIGN_4);
  ------------------
  |  |   51|     92|#define UP_ALIGN_8(x) (((((UWORD64) x) + 7) >> 3) << 3)
  ------------------
 1799|     92|            }
 1800|    451|            else
 1801|    451|            {
 1802|    451|                i4_buf_size = 0;
 1803|    451|            }
 1804|       |
 1805|    543|            pu1_vcl_nal = ps_nal_parse_ctxt->pu1_vcl_nal_buf + i4_buf_size;
 1806|       |
 1807|    543|            memmove(ps_nal_parse_ctxt->pu1_vcl_nal_buf, ps_prefix_nal_buf->pu1_buf, i4_buf_size);
 1808|    543|            ps_prefix_nal_buf->pu1_buf = ps_nal_parse_ctxt->pu1_vcl_nal_buf;
 1809|    543|            ps_nal_parse_ctxt->pu1_vcl_nal_buf = pu1_vcl_nal;
 1810|       |
 1811|       |            /* subtract the buffer size left */
 1812|    543|            ps_nal_parse_ctxt->u4_bytes_left_vcl -= i4_buf_size;
 1813|    543|        }
 1814|       |        /* Reset the top and bottom node */
 1815|   161k|        ps_vcl_nal->ps_top_node = NULL;
 1816|   161k|        ps_vcl_nal->ps_bot_node = NULL;
 1817|   161k|        ps_vcl_nal->i1_nal_ref_id_next = -1;
 1818|   161k|        ps_vcl_nal->u2_frm_num_next = 0;
 1819|   161k|    }
 1820|       |
 1821|       |    /* If number of bytes left in the previous process call  */
 1822|       |    /* is is greater or equal to number of bytes in input    */
 1823|       |    /* buffer of the current process call then declare that  */
 1824|       |    /* end of bitstream has occurred and consume the bytes   */
 1825|       |    /* but do not decode                                     */
 1826|   161k|    if(ps_nal_parse_ctxt->u4_bytes_left >= (UWORD32) *pu4_num_bytes)
  ------------------
  |  Branch (1826:8): [True: 18, False: 161k]
  ------------------
 1827|     18|    {
 1828|     18|        ps_nal_parse_ctxt->i4_discard_nal_flag = SVCD_TRUE;
  ------------------
  |  |   46|     18|#define SVCD_TRUE 1
  ------------------
 1829|     18|        *pu4_bytes_consumed = *pu4_num_bytes;
 1830|       |
 1831|     18|        i4_status =
 1832|     18|            isvcd_nal_parse_partial_signal_eos(ps_nal_parse_ctxt, ps_vcl_nal, ps_non_vcl_nal);
 1833|       |        /* set the next AU params to default values */
 1834|     18|        ps_vcl_nal->i1_nal_ref_id_next = -1;
 1835|     18|        ps_vcl_nal->u2_frm_num_next = 0;
 1836|       |
 1837|     18|        return (i4_status);
 1838|     18|    }
 1839|   161k|    ps_nal_parse_ctxt->u4_bytes_left = 0;
 1840|       |
 1841|       |    /*************************************************************************/
 1842|       |    /*                      LOOP OVER NALs                                   */
 1843|       |    /*************************************************************************/
 1844|   161k|    do
 1845|   805k|    {
 1846|   805k|        nal_buf_t *ps_nal_buf;
 1847|   805k|        UWORD32 *pu4_bytes_left;
 1848|       |
 1849|       |        /* Find NAL boundary                */
 1850|   805k|        if(ANNEX_B == ps_nal_parse_ctxt->i4_input_bitstream_mode)
  ------------------
  |  |   64|   805k|#define ANNEX_B 0     /*!< Annex B stream*/
  ------------------
  |  Branch (1850:12): [True: 805k, False: 0]
  ------------------
 1851|   805k|        {
 1852|   805k|            i4_nal_start_flag = isvcd_get_annex_b_nal_unit(
 1853|   805k|                pu1_stream_buffer, i4_cur_pos, *pu4_num_bytes,
 1854|   805k|                &ps_nal_parse_ctxt->i4_find_nal_state, &ps_nal_parse_ctxt->i4_zero_byte_cnt,
 1855|   805k|                &u4_bytes_consumed_temp, ps_nal_parse_ctxt->pv_nal_unit, &i4_more_data_flag);
 1856|       |
 1857|   805k|            i4_cur_pos += u4_bytes_consumed_temp;
 1858|   805k|        }
 1859|       |
 1860|       |        /*********************************************************************/
 1861|       |        /*          READ NAL HEADER AND NAL DISCARD LOGIC                    */
 1862|       |        /*********************************************************************/
 1863|       |
 1864|       |        /* If it is the start of NAL header perform the following */
 1865|       |        /* 1. Decode NAL header                                   */
 1866|       |        /* 2. Determine whether the NAL has to be discarded or not*/
 1867|       |        /* 3. Detect the picture boundary                         */
 1868|   805k|        if(SVCD_TRUE == i4_nal_start_flag)
  ------------------
  |  |   46|   805k|#define SVCD_TRUE 1
  ------------------
  |  Branch (1868:12): [True: 805k, False: 123]
  ------------------
 1869|   805k|        {
 1870|   805k|            UWORD32 u4_err_code;
 1871|   805k|            WORD32 i4_sps_pps_corrupt_status;
 1872|   805k|            WORD32 i4_internal_dep_id_prev;
 1873|       |
 1874|       |            /* Get the NAL prms. This involves the following things*/
 1875|       |            /* 1. Decode the NAL header                            */
 1876|       |            /* 2. Set the discard flag                             */
 1877|       |            /* 3. Decode the slice header if needed                */
 1878|       |
 1879|       |            /* get the dependency id at which the NAl parse is currently */
 1880|       |            /* present */
 1881|   805k|            i4_internal_dep_id_prev = ps_nal_parse_ctxt->s_int_attr.i4_dependency_id;
 1882|       |
 1883|   805k|            i4_status = isvcd_get_nal_prms(
 1884|   805k|                ps_nal_unit->pu1_bufs, ps_nal_unit->i4_buf_sizes, ps_nal_prms,
 1885|   805k|                &ps_nal_parse_ctxt->s_prefix_nal_prms, &ps_nal_parse_ctxt->s_prefix_nal_buf,
 1886|   805k|                &u4_err_code, &i4_sps_pps_corrupt_status, &ps_nal_parse_ctxt->i4_discard_nal_flag,
 1887|   805k|                ps_nal_parse_ctxt);
 1888|       |
 1889|   805k|            if(NON_ANNEX_B == ps_nal_parse_ctxt->i4_input_bitstream_mode)
  ------------------
  |  |   65|   805k|#define NON_ANNEX_B 1 /*!< Non Annex B RFC stream */
  ------------------
  |  Branch (1889:16): [True: 0, False: 805k]
  ------------------
 1890|      0|            {
 1891|      0|                ps_nal_parse_ctxt->i4_prev_dq_id = ps_nal_prms->i4_dqid;
 1892|      0|            }
 1893|       |
 1894|       |            /* If the error code returned by the "picture boundary" */
 1895|       |            /* detetction is                                        */
 1896|       |            /* 1. Insufficient bitstream size: then store the bytes */
 1897|       |            /*    left and break out of the loop                    */
 1898|       |            /* 2. Corrupted slice: then discard the slice           */
 1899|   805k|            if((NAL_INSUFFICIENT_DATA == (WORD32) u4_err_code) &&
  ------------------
  |  Branch (1899:16): [True: 3.21k, False: 801k]
  ------------------
 1900|  3.21k|               (NAL_END != ps_nal_parse_ctxt->i4_find_nal_state))
  ------------------
  |  Branch (1900:16): [True: 2.39k, False: 817]
  ------------------
 1901|  2.39k|            {
 1902|  2.39k|                ps_nal_parse_ctxt->u4_bytes_left = *pu4_num_bytes - *pu4_bytes_consumed;
 1903|       |
 1904|       |                /* Reset the NAL level tracking variables */
 1905|  2.39k|                isvcd_nal_reset_ctxt(ps_nal_parse_ctxt);
 1906|  2.39k|                break;
 1907|  2.39k|            }
 1908|   802k|            else if(0 != u4_err_code)
  ------------------
  |  Branch (1908:21): [True: 21.0k, False: 781k]
  ------------------
 1909|  21.0k|            {
 1910|  21.0k|                ps_nal_parse_ctxt->i4_discard_nal_flag = SVCD_TRUE;
  ------------------
  |  |   46|  21.0k|#define SVCD_TRUE 1
  ------------------
 1911|       |
 1912|  21.0k|                if(SVCD_TRUE == ps_nal_prms->i4_idr_pic_flag)
  ------------------
  |  |   46|  21.0k|#define SVCD_TRUE 1
  ------------------
  |  Branch (1912:20): [True: 1.26k, False: 19.7k]
  ------------------
 1913|  1.26k|                {
 1914|       |                    /* IDR Error handler is called       */
 1915|       |                    /* only if for a given layer the NAL */
 1916|       |                    /* haeder and partial slice decode   */
 1917|       |                    /* routine comes out as no SPS PPS   */
 1918|       |                    /* error. But for Lowest layer in    */
 1919|       |                    /* access unit it is doen always     */
 1920|  1.26k|                    if(i4_internal_dep_id_prev != ps_nal_parse_ctxt->s_int_attr.i4_dependency_id)
  ------------------
  |  Branch (1920:24): [True: 0, False: 1.26k]
  ------------------
 1921|      0|                    {
 1922|       |                        /* if the target depedency id has been */
 1923|       |                        /* changed while decoding currnet NAL  */
 1924|       |
 1925|      0|                        if((0 != i4_sps_pps_corrupt_status) ||
  ------------------
  |  Branch (1925:28): [True: 0, False: 0]
  ------------------
 1926|      0|                           (-1 == ps_nal_parse_ctxt->i4_prev_dq_id))
  ------------------
  |  Branch (1926:28): [True: 0, False: 0]
  ------------------
 1927|      0|                        {
 1928|      0|                            i4_status =
 1929|      0|                                isvcd_idr_err_hdlr(ps_vcl_nal, ps_nal_prms, ps_nal_parse_ctxt);
 1930|      0|                            if(OK != i4_status)
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
  |  Branch (1930:32): [True: 0, False: 0]
  ------------------
 1931|      0|                            {
 1932|      0|                                return i4_status;
 1933|      0|                            }
 1934|      0|                            UNUSED(i4_status);
  ------------------
  |  |   45|      0|#define UNUSED(x) ((void)(x))
  ------------------
 1935|       |
 1936|      0|                            ps_nal_parse_ctxt->i4_tgt_lyr_update = SVCD_FALSE;
  ------------------
  |  |   45|      0|#define SVCD_FALSE 0
  ------------------
 1937|      0|                        }
 1938|      0|                        else
 1939|      0|                        {
 1940|      0|                            if(0 == ps_nal_prms->i4_quality_id)
  ------------------
  |  Branch (1940:32): [True: 0, False: 0]
  ------------------
 1941|      0|                            {
 1942|       |                                /* over write the frame number */
 1943|      0|                                ps_nal_parse_ctxt->s_nal_prms.u2_frm_num = 0;
 1944|       |
 1945|       |                                /* Get the previous layer's DQID */
 1946|      0|                                if(ps_nal_parse_ctxt->i4_prev_dq_id < ps_nal_prms->i4_dqid)
  ------------------
  |  Branch (1946:36): [True: 0, False: 0]
  ------------------
 1947|      0|                                {
 1948|      0|                                    ps_nal_parse_ctxt->i4_prev_dq_id = ps_nal_prms->i4_dqid;
 1949|      0|                                    ps_nal_parse_ctxt->i4_is_frst_vcl_nal_in_au = SVCD_FALSE;
  ------------------
  |  |   45|      0|#define SVCD_FALSE 0
  ------------------
 1950|      0|                                }
 1951|       |
 1952|       |                                /* update the nal context with the nal */
 1953|       |                                /* header params */
 1954|      0|                                isvcd_update_nal_ctxt(ps_nal_parse_ctxt, ps_vcl_nal,
 1955|      0|                                                      ps_non_vcl_nal);
 1956|      0|                            }
 1957|      0|                        }
 1958|      0|                    }
 1959|  1.26k|                }
 1960|  21.0k|            }
 1961|       |
 1962|       |            /* Populate the derived nal type into bitstream extract*/
 1963|       |            /* context structure                                   */
 1964|   802k|            i4_nal_header_len = ps_nal_prms->i4_nal_header_len;
 1965|   802k|            ps_nal_parse_ctxt->i4_nal_type = ps_nal_prms->i4_derived_nal_type;
 1966|       |
 1967|       |            /* get the accumulated idr pic error flag */
 1968|   802k|            ps_nal_parse_ctxt->i4_idr_pic_err_flag |=
 1969|   802k|                ((SVCD_TRUE == ps_nal_prms->i4_idr_pic_flag) &&
  ------------------
  |  |   46|   802k|#define SVCD_TRUE 1
  ------------------
  |  Branch (1969:18): [True: 321k, False: 480k]
  ------------------
 1970|   321k|                 (SVCD_FALSE == ps_nal_parse_ctxt->i4_discard_nal_flag) &&
  ------------------
  |  |   45|   321k|#define SVCD_FALSE 0
  ------------------
  |  Branch (1970:18): [True: 319k, False: 2.28k]
  ------------------
 1971|   319k|                 (i4_internal_dep_id_prev != ps_nal_parse_ctxt->s_int_attr.i4_dependency_id));
  ------------------
  |  Branch (1971:18): [True: 26.3k, False: 293k]
  ------------------
 1972|       |
 1973|   802k|            if(ACCESS_UNIT_DELIMITER_RBSP == ps_nal_prms->i4_nal_unit_type)
  ------------------
  |  |  332|   802k|#define ACCESS_UNIT_DELIMITER_RBSP      9
  ------------------
  |  Branch (1973:16): [True: 7.63k, False: 795k]
  ------------------
 1974|  7.63k|            {
 1975|  7.63k|                i4_pic_bound_status = PIC_BOUNDARY_TRUE;
 1976|  7.63k|                ps_nal_parse_ctxt->u1_pic_boundary_aud_flag = 1;
 1977|       |                /* If picture boundary is detected then come out of  */
 1978|       |                /* the loop                                          */
 1979|  7.63k|                if(PIC_BOUNDARY_TRUE == i4_pic_bound_status)
  ------------------
  |  Branch (1979:20): [True: 7.63k, False: 0]
  ------------------
 1980|  7.63k|                {
 1981|  7.63k|                    isvcd_nal_parse_pic_bound_proc(ps_nal_parse_ctxt, ps_vcl_nal, ps_nal_prms);
 1982|  7.63k|                    break;
 1983|  7.63k|                }
 1984|  7.63k|            }
 1985|       |            /* Perform the picture boundary detetction if all the  */
 1986|       |            /* following conditions are TRUE                       */
 1987|       |            /*  1. VCL NAL                                         */
 1988|       |            /*  2. Not a prefix NAL                                */
 1989|       |            /*  3. Not a discardable NAL                           */
 1990|   795k|            if((VCL_NAL == ps_nal_prms->i4_derived_nal_type) &&
  ------------------
  |  Branch (1990:16): [True: 413k, False: 381k]
  ------------------
 1991|   413k|               (PREFIX_UNIT_NAL != ps_nal_prms->i4_nal_unit_type) &&
  ------------------
  |  |   64|   413k|#define PREFIX_UNIT_NAL 14
  ------------------
  |  Branch (1991:16): [True: 411k, False: 1.64k]
  ------------------
 1992|   411k|               (SVCD_FALSE == ps_nal_parse_ctxt->i4_discard_nal_flag))
  ------------------
  |  |   45|   411k|#define SVCD_FALSE 0
  ------------------
  |  Branch (1992:16): [True: 406k, False: 5.10k]
  ------------------
 1993|   406k|            {
 1994|   406k|                if(ANNEX_B == ps_nal_parse_ctxt->i4_input_bitstream_mode)
  ------------------
  |  |   64|   406k|#define ANNEX_B 0     /*!< Annex B stream*/
  ------------------
  |  Branch (1994:20): [True: 406k, False: 0]
  ------------------
 1995|   406k|                {
 1996|   406k|                    ps_nal_parse_ctxt->u1_pic_boundary_aud_flag = 0;
 1997|       |
 1998|   406k|                    i4_status = isvcd_detect_pic_boundary_annex_b(ps_nal_prms, pu1_stream_buffer,
 1999|   406k|                                                                  i4_cur_pos, &i4_pic_bound_status,
 2000|   406k|                                                                  ps_nal_parse_ctxt, pu4_num_bytes);
 2001|   406k|                }
 2002|       |
 2003|       |                /* If picture boundary is detected then come out of  */
 2004|       |                /* the loop                                          */
 2005|   406k|                if(PIC_BOUNDARY_TRUE == i4_pic_bound_status)
  ------------------
  |  Branch (2005:20): [True: 149k, False: 257k]
  ------------------
 2006|   149k|                {
 2007|   149k|                    isvcd_nal_parse_pic_bound_proc(ps_nal_parse_ctxt, ps_vcl_nal, ps_nal_prms);
 2008|   149k|                    break;
 2009|   149k|                }
 2010|   406k|            }
 2011|       |
 2012|   645k|            if(SVCD_FALSE == ps_nal_parse_ctxt->i4_discard_nal_flag)
  ------------------
  |  |   45|   645k|#define SVCD_FALSE 0
  ------------------
  |  Branch (2012:16): [True: 619k, False: 26.5k]
  ------------------
 2013|   619k|            {
 2014|       |                /* Set the active NAL buffer structure and initialize */
 2015|       |                /* the nal buffer structure                           */
 2016|   619k|                isvcd_get_nal_buf(ps_nal_parse_ctxt, &ps_nal_buf);
 2017|   619k|                ps_nal_parse_ctxt->ps_nal_buf = ps_nal_buf;
 2018|   619k|            }
 2019|  26.5k|            else
 2020|  26.5k|            {
 2021|  26.5k|                ps_nal_parse_ctxt->ps_nal_buf = NULL;
 2022|  26.5k|            }
 2023|   645k|        }
 2024|       |
 2025|       |        /*-------------------------------------------------------------------*/
 2026|       |        /* In RFC based bitstreams, this is a dummy update (in this mode, the*/
 2027|       |        /* bytes consumed updation is done by picture boundary dectection    */
 2028|       |        /* But for Annex B based streams this is valid update                */
 2029|       |        /*-------------------------------------------------------------------*/
 2030|   645k|        *pu4_bytes_consumed += u4_bytes_consumed_temp;
 2031|       |
 2032|       |        /*********************************************************************/
 2033|       |        /*          EMULATION PREVENTION AND BYTE SWAPPING                   */
 2034|       |        /*********************************************************************/
 2035|       |
 2036|       |        /* Determine output buffer */
 2037|   645k|        ps_nal_buf = ps_nal_parse_ctxt->ps_nal_buf;
 2038|       |
 2039|   645k|        if(VCL_NAL == ps_nal_parse_ctxt->i4_nal_type)
  ------------------
  |  Branch (2039:12): [True: 264k, False: 381k]
  ------------------
 2040|   264k|        {
 2041|   264k|            ppu1_out_buf = &ps_nal_parse_ctxt->pu1_vcl_nal_buf;
 2042|   264k|            pu4_bytes_left = &ps_nal_parse_ctxt->u4_bytes_left_vcl;
 2043|   264k|            if(*pu4_bytes_left < (MAX_VCL_NAL_BUFF_SIZE * 0.05))
  ------------------
  |  |   69|   264k|#define MAX_VCL_NAL_BUFF_SIZE (1024 * 1024 * 2)
  ------------------
  |  Branch (2043:16): [True: 0, False: 264k]
  ------------------
 2044|      0|            {
 2045|      0|                return (VCL_NAL_FOUND_FALSE);
 2046|      0|            }
 2047|   264k|        }
 2048|   381k|        else
 2049|   381k|        {
 2050|   381k|            ppu1_out_buf = &ps_nal_parse_ctxt->pu1_non_vcl_nal_buf;
 2051|   381k|            pu4_bytes_left = &ps_nal_parse_ctxt->u4_bytes_left_non_vcl;
 2052|   381k|            if(*pu4_bytes_left < (MAX_NON_VCL_NAL_BUFF_SIZE * 0.05))
  ------------------
  |  |   70|   381k|#define MAX_NON_VCL_NAL_BUFF_SIZE (1024 * 1024 * 2)
  ------------------
  |  Branch (2052:16): [True: 98, False: 381k]
  ------------------
 2053|     98|            {
 2054|     98|                return (VCL_NAL_FOUND_FALSE);
 2055|     98|            }
 2056|   381k|        }
 2057|       |
 2058|       |        /* if 0 bytes left then discard the current NAL */
 2059|   645k|        if(0 >= (WORD32) *pu4_bytes_left)
  ------------------
  |  Branch (2059:12): [True: 0, False: 645k]
  ------------------
 2060|      0|        {
 2061|      0|            ps_nal_parse_ctxt->i4_discard_nal_flag = SVCD_TRUE;
  ------------------
  |  |   46|      0|#define SVCD_TRUE 1
  ------------------
 2062|      0|        }
 2063|       |
 2064|       |        /* Perform the emulation prevention and byte swap */
 2065|   645k|        if(SVCD_FALSE == ps_nal_parse_ctxt->i4_discard_nal_flag)
  ------------------
  |  |   45|   645k|#define SVCD_FALSE 0
  ------------------
  |  Branch (2065:12): [True: 619k, False: 26.5k]
  ------------------
 2066|   619k|        {
 2067|   619k|            UWORD32 u4_output_bytes, u4_buf_inc;
 2068|       |
 2069|       |            /* Do emulation prevention and byte swapping on all the packets  */
 2070|       |            /* of RFC or current partial or full Annex B NAL unit            */
 2071|   619k|            {
 2072|   619k|                UWORD32 u4_buf_size;
 2073|       |
 2074|       |                /* clip the size before emulation prevention */
 2075|   619k|                u4_buf_size = (UWORD32) CLIP3(0, (WORD32) *pu4_bytes_left,
  ------------------
  |  |   77|   619k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 0, False: 619k]
  |  |  |  Branch (77:54): [True: 0, False: 619k]
  |  |  ------------------
  ------------------
 2076|   619k|                                              (ps_nal_unit->i4_buf_sizes - i4_nal_header_len));
 2077|       |
 2078|   619k|                u4_buf_inc = isvcd_nal_byte_swap_emulation(
 2079|   619k|                    (UWORD32 *) *ppu1_out_buf, &u4_output_bytes,
 2080|   619k|                    ps_nal_unit->pu1_bufs + i4_nal_header_len, u4_buf_size,
 2081|   619k|                    NUM_OF_ZERO_BYTES_BEFORE_START_CODE, &ps_nal_parse_ctxt->s_emulation_ctxt);
  ------------------
  |  |   54|   619k|#define NUM_OF_ZERO_BYTES_BEFORE_START_CODE (2)
  ------------------
 2082|       |
 2083|   619k|                i4_nal_header_len = 0;
 2084|   619k|                u4_buf_inc = UP_ALIGN_8(u4_buf_inc);
  ------------------
  |  |   51|   619k|#define UP_ALIGN_8(x) (((((UWORD64) x) + 7) >> 3) << 3)
  ------------------
 2085|   619k|                *ppu1_out_buf += u4_buf_inc;
 2086|   619k|                *pu4_bytes_left -= u4_buf_inc;
 2087|   619k|                ps_nal_buf->i4_buf_size += u4_output_bytes;
 2088|   619k|            }
 2089|   619k|        }
 2090|       |
 2091|       |        /*********************************************************************/
 2092|       |        /*                UPDATE VARIABLES                                   */
 2093|       |        /*********************************************************************/
 2094|   645k|        if(NAL_END == ps_nal_parse_ctxt->i4_find_nal_state)
  ------------------
  |  Branch (2094:12): [True: 643k, False: 2.30k]
  ------------------
 2095|   643k|        {
 2096|   643k|            if(SVCD_FALSE == ps_nal_parse_ctxt->i4_discard_nal_flag)
  ------------------
  |  |   45|   643k|#define SVCD_FALSE 0
  ------------------
  |  Branch (2096:16): [True: 617k, False: 26.1k]
  ------------------
 2097|   617k|            {
 2098|       |                /* This fucntions updates output nal ctxt - vcl nal structure*/
 2099|       |                /* and non vcl nal structure depending upon the current NAL  */
 2100|       |                /* type.                                                     */
 2101|       |                /* This will only update parameters which are available at   */
 2102|       |                /* end of NAL unit like nal unit's total size                */
 2103|   617k|                isvcd_update_nal_ctxt(ps_nal_parse_ctxt, ps_vcl_nal, ps_non_vcl_nal);
 2104|       |
 2105|   617k|                UPDATE_NAL_BUF_PTR(ppu1_out_buf, ps_nal_prms->i4_derived_nal_type, pu4_bytes_left);
 2106|   617k|            }
 2107|       |
 2108|       |            /* If the prefix NAL unit is not immediatly followed by */
 2109|       |            /* a AVC NAL unit it shall be discarded and hence reset */
 2110|       |            /* is done                                              */
 2111|       |            /* Also if prefix NAL unit is discarded then we should  */
 2112|       |            /* not associate the prefix NAL unit with AVC NAL unit  */
 2113|       |            /* and hence a reset is required                        */
 2114|   643k|            if((PREFIX_UNIT_NAL != ps_nal_prms->i4_nal_unit_type) ||
  ------------------
  |  |   64|   643k|#define PREFIX_UNIT_NAL 14
  ------------------
  |  Branch (2114:16): [True: 641k, False: 2.22k]
  ------------------
 2115|  2.22k|               (SVCD_TRUE == ps_nal_parse_ctxt->i4_discard_nal_flag))
  ------------------
  |  |   46|  2.22k|#define SVCD_TRUE 1
  ------------------
  |  Branch (2115:16): [True: 1.00k, False: 1.22k]
  ------------------
 2116|   642k|            {
 2117|   642k|                isvcd_nal_buf_reset(&ps_nal_parse_ctxt->s_prefix_nal_buf);
 2118|   642k|            }
 2119|       |
 2120|       |            /* Reset the nal level tracking variables */
 2121|   643k|            isvcd_nal_reset_ctxt(ps_nal_parse_ctxt);
 2122|   643k|        }
 2123|       |
 2124|       |        /*------------- while loop ends here --------------------------------*/
 2125|   645k|    } while(SVCD_TRUE == i4_more_data_flag);
  ------------------
  |  |   46|   645k|#define SVCD_TRUE 1
  ------------------
  |  Branch (2125:13): [True: 643k, False: 2.30k]
  ------------------
 2126|       |
 2127|   161k|    return (i4_pic_bound_status);
 2128|   161k|}
isvcd_nal_parse_non_vcl_nal:
 2164|   110k|{
 2165|       |    /*! - Search for the NAL boundary
 2166|       |        - If NAL boundary is not found and bytes consumed is lesser than
 2167|       |          minimum buffer size then break out of the loop
 2168|       |        - if it is start of NAL then read the NAL header
 2169|       |        - If it is a VCL NAL then return from this fucntion saying that
 2170|       |          VCL NAL found
 2171|       |        - NAL discard logic determines whther the current NAL has to be
 2172|       |          discarded or not
 2173|       |        - If NAL is not discarded then populate the vcl or non vcl output
 2174|       |          structures
 2175|       |    */
 2176|       |
 2177|   110k|    nal_parse_ctxt_t *ps_nal_parse_ctxt;
 2178|   110k|    non_vcl_nal_t *ps_non_vcl_nal;
 2179|   110k|    nal_unit_t *ps_nal_unit;
 2180|   110k|    WORD32 i4_nal_start_flag, i4_cur_pos, i4_status;
 2181|   110k|    WORD32 i4_nal_header_len, i4_more_data_flag;
 2182|   110k|    UWORD32 u4_bytes_consumed_temp = 0;
 2183|   110k|    UWORD8 **ppu1_out_buf;
 2184|   110k|    nal_prms_t *ps_nal_prms;
 2185|       |
 2186|   110k|    ps_nal_parse_ctxt = (nal_parse_ctxt_t *) pv_nal_parse_ctxt;
 2187|   110k|    ps_non_vcl_nal = (non_vcl_nal_t *) pv_out_non_vcl_nal;
 2188|   110k|    ps_nal_unit = (nal_unit_t *) ps_nal_parse_ctxt->pv_nal_unit;
 2189|   110k|    ps_nal_prms = &ps_nal_parse_ctxt->s_nal_prms;
 2190|       |
 2191|       |    /* Initialization */
 2192|   110k|    i4_cur_pos = 0;
 2193|   110k|    *pu4_bytes_consumed = 0;
 2194|   110k|    i4_nal_header_len = 0;
 2195|   110k|    i4_nal_start_flag = SVCD_FALSE;
  ------------------
  |  |   45|   110k|#define SVCD_FALSE 0
  ------------------
 2196|   110k|    i4_more_data_flag = SVCD_TRUE;
  ------------------
  |  |   46|   110k|#define SVCD_TRUE 1
  ------------------
 2197|   110k|    i4_status = PIC_BOUNDARY_FALSE;
 2198|       |
 2199|       |    /* reset the target layer update flag */
 2200|   110k|    ps_nal_parse_ctxt->i4_tgt_lyr_update = SVCD_FALSE;
  ------------------
  |  |   45|   110k|#define SVCD_FALSE 0
  ------------------
 2201|       |    /*************************************************************************/
 2202|       |    /*              SEARCHING FOR THE START OF BITSTREAM                     */
 2203|       |    /*************************************************************************/
 2204|       |
 2205|       |    /*-----------------------------------------------------------------------*/
 2206|       |    /* For Annex B based bitstreams the first start code has to decoded      */
 2207|       |    /* The first start code can come after multiple process call also. This  */
 2208|       |    /* has to be carefully handled                                           */
 2209|       |    /*-----------------------------------------------------------------------*/
 2210|       |
 2211|   110k|    if(ANNEX_B == ps_nal_parse_ctxt->i4_input_bitstream_mode &&
  ------------------
  |  |   64|   110k|#define ANNEX_B 0     /*!< Annex B stream*/
  ------------------
  |  Branch (2211:8): [True: 110k, False: 0]
  ------------------
 2212|   110k|       SVCD_TRUE == ps_nal_parse_ctxt->i4_dec_frst_sc_flag)
  ------------------
  |  |   46|   110k|#define SVCD_TRUE 1
  ------------------
  |  Branch (2212:8): [True: 22.5k, False: 88.4k]
  ------------------
 2213|  22.5k|    {
 2214|  22.5k|        WORD32 i4_status;
 2215|       |
 2216|  22.5k|        i4_status =
 2217|  22.5k|            isvcd_get_first_start_code(pu1_stream_buffer, pu4_bytes_consumed, pu4_num_bytes);
 2218|       |
 2219|       |        /*-------------------------------------------------------------------*/
 2220|       |        /* If start code found then proceed with bitstream extraction        */
 2221|       |        /*-------------------------------------------------------------------*/
 2222|       |
 2223|  22.5k|        if(i4_status == SC_NOT_FOUND)
  ------------------
  |  |   51|  22.5k|#define SC_NOT_FOUND (-1)
  ------------------
  |  Branch (2223:12): [True: 22, False: 22.5k]
  ------------------
 2224|     22|        {
 2225|     22|            return (VCL_NAL_FOUND_FALSE);
 2226|     22|        }
 2227|       |
 2228|  22.5k|        i4_cur_pos = *pu4_bytes_consumed;
 2229|  22.5k|        ps_nal_parse_ctxt->i4_dec_frst_sc_flag = SVCD_FALSE;
  ------------------
  |  |   45|  22.5k|#define SVCD_FALSE 0
  ------------------
 2230|  22.5k|    }
 2231|       |
 2232|       |    /* If number of bytes left in the previous process call  */
 2233|       |    /* is is greater or equal to number of bytes in input    */
 2234|       |    /* buffer of the current process call then declare that  */
 2235|       |    /* end of bitstream has occurred and consume the bytes   */
 2236|       |    /* but do not decode                                     */
 2237|   110k|    if(ps_nal_parse_ctxt->u4_bytes_left >= (UWORD32) *pu4_num_bytes)
  ------------------
  |  Branch (2237:8): [True: 0, False: 110k]
  ------------------
 2238|      0|    {
 2239|      0|        ps_nal_parse_ctxt->i4_discard_nal_flag = SVCD_TRUE;
  ------------------
  |  |   46|      0|#define SVCD_TRUE 1
  ------------------
 2240|      0|        *pu4_bytes_consumed = *pu4_num_bytes;
 2241|       |
 2242|      0|        i4_status = isvcd_nal_parse_partial_signal_eos(ps_nal_parse_ctxt, NULL, ps_non_vcl_nal);
 2243|      0|        return (i4_status);
 2244|      0|    }
 2245|       |
 2246|   110k|    do
 2247|   399k|    {
 2248|   399k|        nal_buf_t *ps_nal_buf;
 2249|   399k|        UWORD32 *pu4_bytes_left;
 2250|       |
 2251|       |        /*********************************************************************/
 2252|       |        /*                  NAL BOUNDARY DETECTION                           */
 2253|       |        /*********************************************************************/
 2254|       |        /*-------------------------------------------------------------------*/
 2255|       |        /* Detect NAL boundary                                               */
 2256|       |        /* After return,  this NAL boundary detetction logic might be in     */
 2257|       |        /* one of following states:                                          */
 2258|       |        /*  - NAL_START                                                      */
 2259|       |        /*  - FIND_NAL_END                                                   */
 2260|       |        /*  - NAL_END                                                        */
 2261|       |        /*-------------------------------------------------------------------*/
 2262|   399k|        if(ANNEX_B == ps_nal_parse_ctxt->i4_input_bitstream_mode)
  ------------------
  |  |   64|   399k|#define ANNEX_B 0     /*!< Annex B stream*/
  ------------------
  |  Branch (2262:12): [True: 399k, False: 0]
  ------------------
 2263|   399k|        {
 2264|   399k|            i4_nal_start_flag = isvcd_get_annex_b_nal_unit(
 2265|   399k|                pu1_stream_buffer, i4_cur_pos, *pu4_num_bytes,
 2266|   399k|                &ps_nal_parse_ctxt->i4_find_nal_state, &ps_nal_parse_ctxt->i4_zero_byte_cnt,
 2267|   399k|                &u4_bytes_consumed_temp, ps_nal_parse_ctxt->pv_nal_unit, &i4_more_data_flag);
 2268|       |
 2269|   399k|            i4_cur_pos += u4_bytes_consumed_temp;
 2270|   399k|        }
 2271|       |
 2272|       |        /* If current NAL unit is start of new NAL unit then parse the NAL
 2273|       |            header. If the current NAL unit type is VCL NAL then return from
 2274|       |            this function. otherwise apply NAL discard logic and discard the
 2275|       |            NAL if discard NAL flag is true                                  */
 2276|       |
 2277|   399k|        if(SVCD_TRUE == i4_nal_start_flag)
  ------------------
  |  |   46|   399k|#define SVCD_TRUE 1
  ------------------
  |  Branch (2277:12): [True: 399k, False: 0]
  ------------------
 2278|   399k|        {
 2279|   399k|            UWORD32 u4_err_code;
 2280|   399k|            WORD32 i4_sps_pps_corrupt_status;
 2281|       |
 2282|       |            /* Get the NAL prms. This involves the following things*/
 2283|       |            /* 1. Decode the NAL header                            */
 2284|       |            /* 2. Set the discard flag                             */
 2285|       |            /* 3. Decode the slice header if needed                */
 2286|   399k|            isvcd_get_nal_prms(ps_nal_unit->pu1_bufs, ps_nal_unit->i4_buf_sizes, ps_nal_prms,
 2287|   399k|                               &ps_nal_parse_ctxt->s_prefix_nal_prms,
 2288|   399k|                               &ps_nal_parse_ctxt->s_prefix_nal_buf, &u4_err_code,
 2289|   399k|                               &i4_sps_pps_corrupt_status, &ps_nal_parse_ctxt->i4_discard_nal_flag,
 2290|   399k|                               ps_nal_parse_ctxt);
 2291|       |            /* If the error code returned by the "picture boundary" */
 2292|       |            /* detetction is                                        */
 2293|       |            /* 1. Insufficient bitstream size: then store the bytes */
 2294|       |            /*    left and break out of the loop                    */
 2295|       |            /* 2. Corrupted slice: then discard the slice           */
 2296|   399k|            if((NAL_INSUFFICIENT_DATA == (WORD32) u4_err_code) &&
  ------------------
  |  Branch (2296:16): [True: 7.32k, False: 391k]
  ------------------
 2297|  7.32k|               (NAL_END != ps_nal_parse_ctxt->i4_find_nal_state))
  ------------------
  |  Branch (2297:16): [True: 5.40k, False: 1.92k]
  ------------------
 2298|  5.40k|            {
 2299|  5.40k|                ps_nal_parse_ctxt->u4_bytes_left = *pu4_num_bytes - *pu4_bytes_consumed;
 2300|       |
 2301|       |                /* Reset the NAL level tracking variables */
 2302|  5.40k|                isvcd_nal_reset_ctxt(ps_nal_parse_ctxt);
 2303|  5.40k|                break;
 2304|  5.40k|            }
 2305|   393k|            else if(0 != u4_err_code)
  ------------------
  |  Branch (2305:21): [True: 12.3k, False: 381k]
  ------------------
 2306|  12.3k|            {
 2307|  12.3k|                ps_nal_parse_ctxt->i4_discard_nal_flag = SVCD_TRUE;
  ------------------
  |  |   46|  12.3k|#define SVCD_TRUE 1
  ------------------
 2308|  12.3k|            }
 2309|       |
 2310|       |            /* Populate other paramters based on the nal prms */
 2311|   393k|            ps_nal_parse_ctxt->i4_nal_type = ps_nal_prms->i4_derived_nal_type;
 2312|   393k|            i4_nal_header_len = ps_nal_prms->i4_nal_header_len;
 2313|       |
 2314|       |            /* If derived NAL unit is VCL_NAL then return from this function */
 2315|   393k|            if(VCL_NAL == ps_nal_prms->i4_derived_nal_type &&
  ------------------
  |  Branch (2315:16): [True: 102k, False: 291k]
  ------------------
 2316|   102k|               PREFIX_UNIT_NAL != ps_nal_prms->i4_nal_unit_type)
  ------------------
  |  |   64|   102k|#define PREFIX_UNIT_NAL 14
  ------------------
  |  Branch (2316:16): [True: 101k, False: 1.09k]
  ------------------
 2317|   101k|            {
 2318|   101k|                isvcd_pic_reset_ctxt(ps_nal_parse_ctxt);
 2319|       |
 2320|   101k|                return (VCL_NAL_FOUND_TRUE);
 2321|   101k|            }
 2322|       |
 2323|       |            /* Set the active NAL buffer structure and initialize */
 2324|       |            /* the nal buffer structure                           */
 2325|   292k|            isvcd_get_nal_buf(ps_nal_parse_ctxt, &ps_nal_buf);
 2326|       |
 2327|   292k|            ps_nal_parse_ctxt->ps_nal_buf = ps_nal_buf;
 2328|   292k|        }
 2329|       |
 2330|       |        /* Update the bytes consumed variable */
 2331|       |
 2332|   292k|        *pu4_bytes_consumed += u4_bytes_consumed_temp;
 2333|       |
 2334|   292k|        ps_nal_buf = ps_nal_parse_ctxt->ps_nal_buf;
 2335|   292k|        if(VCL_NAL == ps_nal_parse_ctxt->i4_nal_type)
  ------------------
  |  Branch (2335:12): [True: 1.09k, False: 291k]
  ------------------
 2336|  1.09k|        {
 2337|  1.09k|            ppu1_out_buf = &ps_nal_parse_ctxt->pu1_vcl_nal_buf;
 2338|  1.09k|            pu4_bytes_left = &ps_nal_parse_ctxt->u4_bytes_left_vcl;
 2339|  1.09k|            if(*pu4_bytes_left < (MAX_VCL_NAL_BUFF_SIZE * 0.05))
  ------------------
  |  |   69|  1.09k|#define MAX_VCL_NAL_BUFF_SIZE (1024 * 1024 * 2)
  ------------------
  |  Branch (2339:16): [True: 0, False: 1.09k]
  ------------------
 2340|      0|            {
 2341|      0|                return (VCL_NAL_FOUND_FALSE);
 2342|      0|            }
 2343|  1.09k|        }
 2344|   291k|        else
 2345|   291k|        {
 2346|   291k|            ppu1_out_buf = &ps_nal_parse_ctxt->pu1_non_vcl_nal_buf;
 2347|   291k|            pu4_bytes_left = &ps_nal_parse_ctxt->u4_bytes_left_non_vcl;
 2348|   291k|            if(*pu4_bytes_left < (MAX_NON_VCL_NAL_BUFF_SIZE * 0.05))
  ------------------
  |  |   70|   291k|#define MAX_NON_VCL_NAL_BUFF_SIZE (1024 * 1024 * 2)
  ------------------
  |  Branch (2348:16): [True: 0, False: 291k]
  ------------------
 2349|      0|            {
 2350|      0|                return (VCL_NAL_FOUND_FALSE);
 2351|      0|            }
 2352|   291k|        }
 2353|       |
 2354|       |        /* if 0 bytes left then discard the current NAL */
 2355|   292k|        if(0 >= (WORD32) *pu4_bytes_left)
  ------------------
  |  Branch (2355:12): [True: 0, False: 292k]
  ------------------
 2356|      0|        {
 2357|      0|            ps_nal_parse_ctxt->i4_discard_nal_flag = SVCD_TRUE;
  ------------------
  |  |   46|      0|#define SVCD_TRUE 1
  ------------------
 2358|      0|        }
 2359|       |
 2360|       |        /* If NAL is not discarded then :
 2361|       |            1) Perform emulation prevention and byte swapping on the RBSP data
 2362|       |            2) Update the NAL unit ctxt:
 2363|       |                a) If VCL NAL then update DQID list
 2364|       |                b) If NON VCL NAL then update the non vcl output structure   */
 2365|       |
 2366|   292k|        if(SVCD_FALSE == ps_nal_parse_ctxt->i4_discard_nal_flag)
  ------------------
  |  |   45|   292k|#define SVCD_FALSE 0
  ------------------
  |  Branch (2366:12): [True: 276k, False: 15.3k]
  ------------------
 2367|   276k|        {
 2368|   276k|            UWORD32 u4_output_bytes, u4_buf_inc;
 2369|       |
 2370|   276k|            {
 2371|   276k|                UWORD32 u4_buf_size;
 2372|       |
 2373|       |                /* clip the size before emulation prevention */
 2374|   276k|                u4_buf_size = (UWORD32) CLIP3(0, (WORD32) *pu4_bytes_left,
  ------------------
  |  |   77|   276k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 0, False: 276k]
  |  |  |  Branch (77:54): [True: 0, False: 276k]
  |  |  ------------------
  ------------------
 2375|   276k|                                              (ps_nal_unit->i4_buf_sizes - i4_nal_header_len));
 2376|       |
 2377|   276k|                u4_buf_inc = isvcd_nal_byte_swap_emulation(
 2378|   276k|                    (UWORD32 *) *ppu1_out_buf, &u4_output_bytes,
 2379|   276k|                    ps_nal_unit->pu1_bufs + i4_nal_header_len, u4_buf_size,
 2380|   276k|                    NUM_OF_ZERO_BYTES_BEFORE_START_CODE, &ps_nal_parse_ctxt->s_emulation_ctxt);
  ------------------
  |  |   54|   276k|#define NUM_OF_ZERO_BYTES_BEFORE_START_CODE (2)
  ------------------
 2381|   276k|                i4_nal_header_len = 0;
 2382|       |
 2383|   276k|                u4_buf_inc = UP_ALIGN_8(u4_buf_inc);
  ------------------
  |  |   51|   276k|#define UP_ALIGN_8(x) (((((UWORD64) x) + 7) >> 3) << 3)
  ------------------
 2384|   276k|                *ppu1_out_buf += u4_buf_inc;
 2385|   276k|                *pu4_bytes_left -= u4_buf_inc;
 2386|   276k|                ps_nal_buf->i4_buf_size += u4_output_bytes;
 2387|   276k|            }
 2388|   276k|        }
 2389|       |
 2390|       |        /*********************************************************************/
 2391|       |        /*                UPDATE VARIABLES                                   */
 2392|       |        /*********************************************************************/
 2393|       |
 2394|   292k|        if(NAL_END == ps_nal_parse_ctxt->i4_find_nal_state)
  ------------------
  |  Branch (2394:12): [True: 288k, False: 3.90k]
  ------------------
 2395|   288k|        {
 2396|       |            /*---------------------------------------------------------------*/
 2397|       |            /* - Update the total bits in the NAL. While doing so bits       */
 2398|       |            /* calculated so far should be converted to SODB length          */
 2399|       |            /*---------------------------------------------------------------*/
 2400|   288k|            if(SVCD_FALSE == ps_nal_parse_ctxt->i4_discard_nal_flag)
  ------------------
  |  |   45|   288k|#define SVCD_FALSE 0
  ------------------
  |  Branch (2400:16): [True: 273k, False: 14.7k]
  ------------------
 2401|   273k|            {
 2402|   273k|                isvcd_update_nal_ctxt(ps_nal_parse_ctxt, NULL, ps_non_vcl_nal);
 2403|       |
 2404|   273k|                UPDATE_NAL_BUF_PTR(ppu1_out_buf, ps_nal_prms->i4_derived_nal_type, pu4_bytes_left);
 2405|   273k|            }
 2406|       |
 2407|       |            /* If the prefix NAL unit is not immediatly followed by */
 2408|       |            /* a AVC NAL unit it shall be discarded and hence reset */
 2409|       |            /* is done                                              */
 2410|       |            /* Also if prefix NAL unit is discarded then we should  */
 2411|       |            /* not associate the prefix NAL unit with AVC NAL unit  */
 2412|       |            /* and hence a reset is required                        */
 2413|   288k|            if((PREFIX_UNIT_NAL != ps_nal_prms->i4_nal_unit_type) ||
  ------------------
  |  |   64|   288k|#define PREFIX_UNIT_NAL 14
  ------------------
  |  Branch (2413:16): [True: 286k, False: 1.98k]
  ------------------
 2414|  1.98k|               (SVCD_TRUE == ps_nal_parse_ctxt->i4_discard_nal_flag))
  ------------------
  |  |   46|  1.98k|#define SVCD_TRUE 1
  ------------------
  |  Branch (2414:16): [True: 915, False: 1.06k]
  ------------------
 2415|   287k|            {
 2416|   287k|                isvcd_nal_buf_reset(&ps_nal_parse_ctxt->s_prefix_nal_buf);
 2417|   287k|            }
 2418|       |
 2419|       |            /* Reset NAL level tracking variables */
 2420|   288k|            isvcd_nal_reset_ctxt(ps_nal_parse_ctxt);
 2421|   288k|        }
 2422|       |
 2423|   292k|        i4_nal_header_len = 0;
 2424|       |        /*------------- while loop ends here --------------------------------*/
 2425|   292k|    } while(SVCD_TRUE == i4_more_data_flag);
  ------------------
  |  |   46|   292k|#define SVCD_TRUE 1
  ------------------
  |  Branch (2425:13): [True: 288k, False: 3.90k]
  ------------------
 2426|       |
 2427|  9.30k|    if(i4_more_data_flag == 0)
  ------------------
  |  Branch (2427:8): [True: 9.30k, False: 0]
  ------------------
 2428|  9.30k|    {
 2429|  9.30k|        isvcd_pic_reset_ctxt(ps_nal_parse_ctxt);
 2430|  9.30k|        return (VCL_NAL_FOUND_TRUE);
 2431|  9.30k|    }
 2432|       |
 2433|      0|    return (VCL_NAL_FOUND_FALSE);
 2434|  9.30k|}

isvcd_nal_parse.c:UPDATE_NAL_BUF_PTR:
   86|  1.47M|{
   87|  1.47M|    UWORD8 *pu1_buf_ptr;
   88|  1.47M|    UWORD64 u4_inc;
   89|       |
   90|       |    /* Align the start of the structure */
   91|       |
   92|  1.47M|    pu1_buf_ptr = *ppu1_buf;
   93|       |
   94|       |    /* Account for the vcl or non-vcl header */
   95|  1.47M|    u4_inc = GET_NAL_BUF_INC(i4_derived_nal_type);
   96|  1.47M|    u4_inc = UP_ALIGN_8(u4_inc);
  ------------------
  |  |   51|  1.47M|#define UP_ALIGN_8(x) (((((UWORD64) x) + 7) >> 3) << 3)
  ------------------
   97|  1.47M|    pu1_buf_ptr += u4_inc;
   98|       |
   99|       |    /* Update the pointers */
  100|  1.47M|    if(*pu4_bytes_left >= u4_inc)
  ------------------
  |  Branch (100:8): [True: 1.47M, False: 0]
  ------------------
  101|  1.47M|    {
  102|  1.47M|        *pu4_bytes_left -= u4_inc;
  103|  1.47M|    }
  104|  1.47M|    *ppu1_buf = pu1_buf_ptr;
  105|  1.47M|}
isvcd_nal_parse.c:GET_NAL_BUF_INC:
   68|  3.25M|{
   69|  3.25M|    UWORD32 u4_buf_inc;
   70|       |
   71|  3.25M|    if(VCL_NAL == i4_derived_nal_type)
  ------------------
  |  Branch (71:8): [True: 1.06M, False: 2.18M]
  ------------------
   72|  1.06M|    {
   73|  1.06M|        u4_buf_inc = sizeof(vcl_buf_hdr_t);
   74|  1.06M|    }
   75|  2.18M|    else
   76|  2.18M|    {
   77|  2.18M|        u4_buf_inc = sizeof(non_vcl_buf_hdr_t);
   78|  2.18M|    }
   79|       |
   80|  3.25M|    u4_buf_inc = UP_ALIGN_8(u4_buf_inc);
  ------------------
  |  |   51|  3.25M|#define UP_ALIGN_8(x) (((((UWORD64) x) + 7) >> 3) << 3)
  ------------------
   81|  3.25M|    return (u4_buf_inc);
   82|  3.25M|}

isvcd_parse_bmb_ref_index_cavlc_range1:
   90|  6.17k|{
   91|  6.17k|    UWORD32 u4_i;
   92|  6.17k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
   93|  6.17k|    UWORD32 *pu4_bitstream_off = &ps_bitstrm->u4_ofst;
   94|  6.17k|    UNUSED(u4_num_ref_idx_active_minus1);
  ------------------
  |  |   45|  6.17k|#define UNUSED(x) ((void)(x))
  ------------------
   95|  14.3k|    for(u4_i = 0; u4_i < u4_num_part; u4_i++)
  ------------------
  |  Branch (95:19): [True: 8.20k, False: 6.17k]
  ------------------
   96|  8.20k|    {
   97|  8.20k|        if(pi1_ref_idx[u4_i] > -1 && (((*pu1_motion_prediction_flag >> u4_i) & 0x01) == 0))
  ------------------
  |  Branch (97:12): [True: 2.80k, False: 5.40k]
  |  Branch (97:38): [True: 1.38k, False: 1.41k]
  ------------------
   98|  1.38k|        {
   99|  1.38k|            UWORD32 u4_ref_idx;
  100|  1.38k|            u4_ref_idx = ih264d_tev_range1(pu4_bitstream_off, pu4_bitstrm_buf);
  101|       |
  102|       |            /* Storing Reference Idx Information */
  103|  1.38k|            pi1_ref_idx[u4_i] = (WORD8) u4_ref_idx;
  104|  1.38k|        }
  105|  8.20k|    }
  106|  6.17k|}
isvcd_parse_bmb_ref_index_cavlc:
  138|  14.2k|{
  139|  14.2k|    UWORD32 u4_i;
  140|  14.2k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
  141|  14.2k|    UWORD32 *pu4_bitstream_off = &ps_bitstrm->u4_ofst;
  142|       |
  143|  36.2k|    for(u4_i = 0; u4_i < u4_num_part; u4_i++)
  ------------------
  |  Branch (143:19): [True: 23.9k, False: 12.2k]
  ------------------
  144|  23.9k|    {
  145|  23.9k|        if(pi1_ref_idx[u4_i] > -1 && (((*pu1_motion_prediction_flag >> u4_i) & 0x01) == 0))
  ------------------
  |  Branch (145:12): [True: 12.0k, False: 11.9k]
  |  Branch (145:38): [True: 8.03k, False: 4.01k]
  ------------------
  146|  8.03k|        {
  147|  8.03k|            UWORD32 u4_ref_idx;
  148|       |            // inlining ih264d_uev
  149|  8.03k|            UWORD32 u4_bitstream_offset = *pu4_bitstream_off;
  150|  8.03k|            UWORD32 u4_word, u4_ldz;
  151|       |
  152|       |            /***************************************************************/
  153|       |            /* Find leading zeros in next 32 bits                          */
  154|       |            /***************************************************************/
  155|  8.03k|            NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  8.03k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  8.03k|{                                                                           \
  |  |  152|  8.03k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  8.03k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  8.03k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  8.03k|                                                                            \
  |  |  156|  8.03k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  8.03k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 7.65k, False: 375]
  |  |  ------------------
  |  |  158|  8.03k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  7.65k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  8.03k|}
  ------------------
  156|  8.03k|            u4_ldz = CLZ(u4_word);
  157|       |            /* Flush the ps_bitstrm */
  158|  8.03k|            u4_bitstream_offset += (u4_ldz + 1);
  159|       |            /* Read the suffix from the ps_bitstrm */
  160|  8.03k|            u4_word = 0;
  161|  8.03k|            if(u4_ldz) GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  5.54k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  5.54k|{                                                                           \
  |  |  122|  5.54k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  5.54k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  5.54k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  5.54k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  5.54k|                                                                            \
  |  |  127|  5.54k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 5.07k, False: 473]
  |  |  ------------------
  |  |  128|  5.54k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  5.07k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  5.54k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  5.54k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  5.54k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  5.54k|}                                                                           \
  ------------------
  |  Branch (161:16): [True: 5.54k, False: 2.48k]
  ------------------
  162|  8.03k|            *pu4_bitstream_off = u4_bitstream_offset;
  163|  8.03k|            u4_ref_idx = ((1 << u4_ldz) + u4_word - 1);
  164|       |            // inlining ih264d_uev
  165|  8.03k|            if(u4_ref_idx > u4_num_ref_idx_active_minus1) return ERROR_REF_IDX;
  ------------------
  |  Branch (165:16): [True: 1.95k, False: 6.07k]
  ------------------
  166|       |
  167|       |            /* Storing Reference Idx Information */
  168|  6.07k|            pi1_ref_idx[u4_i] = (WORD8) u4_ref_idx;
  169|  6.07k|        }
  170|  23.9k|    }
  171|  12.2k|    return OK;
  ------------------
  |  |  114|  12.2k|#define OK        0
  ------------------
  172|  14.2k|}
isvcd_parse_pmb_ref_index_cavlc:
  205|  8.46k|{
  206|  8.46k|    UWORD32 u4_i;
  207|  8.46k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
  208|  8.46k|    UWORD32 *pu4_bitstream_off = &ps_bitstrm->u4_ofst;
  209|       |
  210|  21.7k|    for(u4_i = 0; u4_i < u4_num_part; u4_i++)
  ------------------
  |  Branch (210:19): [True: 13.5k, False: 8.24k]
  ------------------
  211|  13.5k|    {
  212|  13.5k|        if(((*pu1_motion_prediction_flag >> u4_i) & 0x01) == 0)
  ------------------
  |  Branch (212:12): [True: 7.28k, False: 6.22k]
  ------------------
  213|  7.28k|        {
  214|  7.28k|            UWORD32 u4_ref_idx;
  215|       |            // Inlined ih264d_uev
  216|  7.28k|            UWORD32 u4_bitstream_offset = *pu4_bitstream_off;
  217|  7.28k|            UWORD32 u4_word, u4_ldz;
  218|       |
  219|       |            /***************************************************************/
  220|       |            /* Find leading zeros in next 32 bits                          */
  221|       |            /***************************************************************/
  222|  7.28k|            NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  7.28k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  7.28k|{                                                                           \
  |  |  152|  7.28k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  7.28k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  7.28k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  7.28k|                                                                            \
  |  |  156|  7.28k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  7.28k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 6.99k, False: 290]
  |  |  ------------------
  |  |  158|  7.28k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  6.99k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  7.28k|}
  ------------------
  223|  7.28k|            u4_ldz = CLZ(u4_word);
  224|       |            /* Flush the ps_bitstrm */
  225|  7.28k|            u4_bitstream_offset += (u4_ldz + 1);
  226|       |            /* Read the suffix from the ps_bitstrm */
  227|  7.28k|            u4_word = 0;
  228|  7.28k|            if(u4_ldz) GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  2.85k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  2.85k|{                                                                           \
  |  |  122|  2.85k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  2.85k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  2.85k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  2.85k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  2.85k|                                                                            \
  |  |  127|  2.85k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 2.42k, False: 431]
  |  |  ------------------
  |  |  128|  2.85k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  2.42k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  2.85k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  2.85k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  2.85k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  2.85k|}                                                                           \
  ------------------
  |  Branch (228:16): [True: 2.85k, False: 4.43k]
  ------------------
  229|  7.28k|            *pu4_bitstream_off = u4_bitstream_offset;
  230|  7.28k|            u4_ref_idx = ((1 << u4_ldz) + u4_word - 1);
  231|       |
  232|       |            // Inlined ih264d_uev
  233|  7.28k|            if(u4_ref_idx > u4_num_ref_idx_active_minus1) return ERROR_REF_IDX;
  ------------------
  |  Branch (233:16): [True: 226, False: 7.06k]
  ------------------
  234|       |
  235|       |            /* Storing Reference Idx Information */
  236|  7.06k|            pi1_ref_idx[u4_i] = (WORD8) u4_ref_idx;
  237|  7.06k|        }
  238|  13.5k|    }
  239|  8.24k|    return OK;
  ------------------
  |  |  114|  8.24k|#define OK        0
  ------------------
  240|  8.46k|}
isvcd_parse_pmb_ref_index_cavlc_range1:
  272|  3.34k|{
  273|  3.34k|    UWORD32 u4_i;
  274|  3.34k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
  275|  3.34k|    UWORD32 *pu4_bitstream_off = &ps_bitstrm->u4_ofst;
  276|  3.34k|    UNUSED(u4_num_ref_idx_active_minus1);
  ------------------
  |  |   45|  3.34k|#define UNUSED(x) ((void)(x))
  ------------------
  277|  9.68k|    for(u4_i = 0; u4_i < u4_num_part; u4_i++)
  ------------------
  |  Branch (277:19): [True: 6.34k, False: 3.34k]
  ------------------
  278|  6.34k|    {
  279|  6.34k|        if(((*pu1_motion_prediction_flag >> u4_i) & 0x01) == 0)
  ------------------
  |  Branch (279:12): [True: 2.05k, False: 4.29k]
  ------------------
  280|  2.05k|        {
  281|  2.05k|            UWORD32 u4_ref_idx;
  282|  2.05k|            u4_ref_idx = ih264d_tev_range1(pu4_bitstream_off, pu4_bitstrm_buf);
  283|       |
  284|       |            /* Storing Reference Idx Information */
  285|  2.05k|            pi1_ref_idx[u4_i] = (WORD8) u4_ref_idx;
  286|  2.05k|        }
  287|  6.34k|    }
  288|  3.34k|}

isvcd_parse_bmb_cabac:
  102|  14.8k|{
  103|  14.8k|    UWORD8 u1_cbp = 0;
  104|  14.8k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
  105|  14.8k|    deblk_mb_t *ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + u4_mb_num;
  106|  14.8k|    const UWORD8 *puc_mb_mc_mode = (const UWORD8 *) gau1_ih264d_mb_mc_mode;
  107|  14.8k|    UWORD8 u1_mb_type = ps_cur_mb_info->u1_mb_type;
  108|  14.8k|    ctxt_inc_mb_info_t *p_curr_ctxt = ps_dec->ps_curr_ctxt_mb_info;
  109|  14.8k|    dec_bit_stream_t *ps_bitstrm = ps_dec->ps_bitstrm;
  110|  14.8k|    decoding_envirnoment_t *ps_cab_env = &ps_dec->s_cab_dec_env;
  111|  14.8k|    WORD32 ret;
  112|  14.8k|    UWORD8 u1_Bdirect_tranform_read = 1;
  113|  14.8k|    dec_slice_svc_ext_params_t *ps_svc_slice_params = NULL;
  114|       |
  115|  14.8k|    ps_svc_slice_params = &ps_svc_lyr_dec->s_svc_slice_params;
  116|  14.8k|    ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag = 1;
  117|  14.8k|    ps_cur_mb_info->u1_mb_mc_mode = puc_mb_mc_mode[5 + u1_mb_type];
  118|  14.8k|    ps_cur_mb_info->u1_yuv_dc_block_flag = 0;
  119|  14.8k|    ps_cur_deblk_mb->u1_mb_type |= D_B_SLICE;
  ------------------
  |  |  384|  14.8k|#define D_B_SLICE         4
  ------------------
  120|       |
  121|  14.8k|    if(u1_mb_type != B_DIRECT)
  ------------------
  |  |  482|  14.8k|#define B_DIRECT  0
  ------------------
  |  Branch (121:8): [True: 13.8k, False: 1.03k]
  ------------------
  122|  13.8k|    {
  123|  13.8k|        ret = isvcd_parse_bmb_non_direct_cabac(ps_svc_lyr_dec, ps_cur_mb_info, ps_svc_cur_mb_info,
  124|  13.8k|                                               u4_mb_num, u4_num_mbsNby2);
  125|  13.8k|        if(ret != OK) return ret;
  ------------------
  |  |  114|  13.8k|#define OK        0
  ------------------
  |  Branch (125:12): [True: 181, False: 13.6k]
  ------------------
  126|  13.8k|    }
  127|  1.03k|    else
  128|  1.03k|    {
  129|       |        /************ STORING PARTITION INFO ***********/
  130|  1.03k|        parse_part_params_t *ps_part_info;
  131|  1.03k|        ps_part_info = ps_dec->ps_part;
  132|  1.03k|        ps_part_info->u1_is_direct = PART_DIRECT_16x16;
  ------------------
  |  |  572|  1.03k|#define PART_DIRECT_16x16              2
  ------------------
  133|  1.03k|        ps_part_info->u1_sub_mb_num = 0;
  134|  1.03k|        ps_dec->ps_part++;
  135|  1.03k|        p_curr_ctxt->u1_mb_type = CAB_BD16x16;
  ------------------
  |  |  396|  1.03k|#define CAB_BD16x16       0x04 /* 0000 0100 */
  ------------------
  136|       |
  137|  1.03k|        MEMSET_16BYTES(&ps_dec->pu1_left_mv_ctxt_inc[0][0], 0);
  ------------------
  |  |  652|  1.03k|#define MEMSET_16BYTES(pu4_start,value)                         \
  |  |  653|  1.03k|{                                                               \
  |  |  654|  1.03k|    memset(pu4_start,value,16);                                 \
  |  |  655|  1.03k|}
  ------------------
  138|  1.03k|        memset(ps_dec->pi1_left_ref_idx_ctxt_inc, 0, 4);
  139|  1.03k|        MEMSET_16BYTES(p_curr_ctxt->u1_mv, 0);
  ------------------
  |  |  652|  1.03k|#define MEMSET_16BYTES(pu4_start,value)                         \
  |  |  653|  1.03k|{                                                               \
  |  |  654|  1.03k|    memset(pu4_start,value,16);                                 \
  |  |  655|  1.03k|}
  ------------------
  140|  1.03k|        memset(p_curr_ctxt->i1_ref_idx, 0, 4);
  141|       |
  142|       |        /* check whether transform8x8 u4_flag to be read or not */
  143|  1.03k|        u1_Bdirect_tranform_read = ps_dec->s_high_profile.u1_direct_8x8_inference_flag;
  144|  1.03k|    }
  145|       |
  146|  14.6k|    if(ps_svc_slice_params->u1_adaptive_residual_prediction_flag &&
  ------------------
  |  Branch (146:8): [True: 3.66k, False: 11.0k]
  ------------------
  147|  3.66k|       ps_svc_cur_mb_info->u1_crop_window_flag)
  ------------------
  |  Branch (147:8): [True: 3.41k, False: 244]
  ------------------
  148|  3.41k|    {
  149|  3.41k|        ps_svc_cur_mb_info->u1_residual_prediction_flag = ih264d_decode_bin(
  150|  3.41k|            1, ps_svc_lyr_dec->ps_residual_prediction_flag, ps_bitstrm, ps_cab_env);
  151|  3.41k|        COPYTHECONTEXT("SVC ext: u1_residual_prediction_flag",
  152|  3.41k|                       ps_svc_cur_mb_info->u1_residual_prediction_flag);
  153|  3.41k|    }
  154|  11.2k|    else
  155|  11.2k|    {
  156|       |        /*residual flag inference code */
  157|  11.2k|        if(1 == ps_svc_cur_mb_info->u1_crop_window_flag &&
  ------------------
  |  Branch (157:12): [True: 11.0k, False: 244]
  ------------------
  158|  11.0k|           1 == ps_svc_cur_mb_info->u1_base_mode_flag)
  ------------------
  |  Branch (158:12): [True: 0, False: 11.0k]
  ------------------
  159|      0|        {
  160|      0|            ps_svc_cur_mb_info->u1_residual_prediction_flag =
  161|      0|                ps_svc_slice_params->u1_default_residual_prediction_flag;
  162|      0|        }
  163|  11.2k|        else
  164|  11.2k|        {
  165|  11.2k|            ps_svc_cur_mb_info->u1_residual_prediction_flag = 0;
  166|  11.2k|        }
  167|  11.2k|    }
  168|       |
  169|  14.6k|    if(ps_svc_slice_params->u1_scan_idx_end >= ps_svc_slice_params->u1_scan_idx_start)
  ------------------
  |  Branch (169:8): [True: 14.6k, False: 0]
  ------------------
  170|  14.6k|    {
  171|       |        /* Read the Coded block pattern */
  172|  14.6k|        u1_cbp = (WORD8) ih264d_parse_ctx_cbp_cabac(ps_dec);
  173|  14.6k|        p_curr_ctxt->u1_cbp = u1_cbp;
  174|  14.6k|        ps_cur_mb_info->u1_cbp = u1_cbp;
  175|       |
  176|  14.6k|        if(u1_cbp > 47) return ERROR_CBP;
  ------------------
  |  Branch (176:12): [True: 0, False: 14.6k]
  ------------------
  177|  14.6k|        COPYTHECONTEXT("coded_block_pattern", u1_cbp);
  178|       |
  179|  14.6k|        ps_cur_mb_info->u1_tran_form8x8 = 0;
  180|  14.6k|        ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
  181|       |
  182|  14.6k|        if((ps_dec->s_high_profile.u1_transform8x8_present) && (u1_cbp & (0xf)) &&
  ------------------
  |  Branch (182:12): [True: 3.32k, False: 11.3k]
  |  Branch (182:64): [True: 2.60k, False: 719]
  ------------------
  183|  2.60k|           (ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag) && (u1_Bdirect_tranform_read))
  ------------------
  |  Branch (183:12): [True: 2.17k, False: 427]
  |  Branch (183:73): [True: 1.53k, False: 645]
  ------------------
  184|  1.53k|        {
  185|  1.53k|            ps_cur_mb_info->u1_tran_form8x8 =
  186|  1.53k|                ih264d_parse_transform8x8flag_cabac(ps_dec, ps_cur_mb_info);
  187|  1.53k|            COPYTHECONTEXT("transform_size_8x8_flag", ps_cur_mb_info->u1_tran_form8x8);
  188|       |
  189|  1.53k|            ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = ps_cur_mb_info->u1_tran_form8x8;
  190|  1.53k|            p_curr_ctxt->u1_transform8x8_ctxt = ps_cur_mb_info->u1_tran_form8x8;
  191|  1.53k|        }
  192|  13.1k|        else
  193|  13.1k|        {
  194|  13.1k|            p_curr_ctxt->u1_transform8x8_ctxt = 0;
  195|  13.1k|        }
  196|  14.6k|    }
  197|       |
  198|  14.6k|    p_curr_ctxt->u1_intra_chroma_pred_mode = 0;
  199|  14.6k|    p_curr_ctxt->u1_yuv_dc_csbp &= 0xFE;
  200|  14.6k|    ps_dec->pu1_left_yuv_dc_csbp[0] &= 0x6;
  201|       |
  202|       |    /* Read mb_qp_delta */
  203|  14.6k|    if(u1_cbp)
  ------------------
  |  Branch (203:8): [True: 4.58k, False: 10.0k]
  ------------------
  204|  4.58k|    {
  205|  4.58k|        WORD8 c_temp;
  206|  4.58k|        ret = ih264d_parse_mb_qp_delta_cabac(ps_dec, &c_temp);
  207|  4.58k|        if(ret != OK) return ret;
  ------------------
  |  |  114|  4.58k|#define OK        0
  ------------------
  |  Branch (207:12): [True: 113, False: 4.47k]
  ------------------
  208|  4.47k|        COPYTHECONTEXT("mb_qp_delta", c_temp);
  209|  4.47k|        if(c_temp)
  ------------------
  |  Branch (209:12): [True: 1.48k, False: 2.98k]
  ------------------
  210|  1.48k|        {
  211|  1.48k|            ret = ih264d_update_qp(ps_dec, c_temp);
  212|  1.48k|            if(ret != OK) return ret;
  ------------------
  |  |  114|  1.48k|#define OK        0
  ------------------
  |  Branch (212:16): [True: 0, False: 1.48k]
  ------------------
  213|  1.48k|        }
  214|  4.47k|    }
  215|  10.0k|    else
  216|  10.0k|        ps_dec->i1_prev_mb_qp_delta = 0;
  217|       |
  218|       |    /*RESIDUAL FOR Start to end idx*/
  219|  14.5k|    ih264d_parse_residual4x4_cabac(ps_dec, ps_cur_mb_info, 0);
  220|  14.5k|    if(EXCEED_OFFSET(ps_dec->ps_bitstrm)) return ERROR_EOB_TERMINATE_T;
  ------------------
  |  |   93|  14.5k|  (ps_bitstrm->u4_ofst > ps_bitstrm->u4_max_ofst)
  |  |  ------------------
  |  |  |  Branch (93:3): [True: 612, False: 13.9k]
  |  |  ------------------
  ------------------
  221|  13.9k|    return OK;
  ------------------
  |  |  114|  13.9k|#define OK        0
  ------------------
  222|  14.5k|}
isvcd_mv_pred_ref_tfr_nby2_ebmb:
  235|  42.1k|{
  236|  42.1k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec = (svc_dec_lyr_struct_t *) ps_dec;
  237|  42.1k|    parse_pmbarams_t *ps_mb_part_info;
  238|  42.1k|    parse_part_params_t *ps_part;
  239|  42.1k|    mv_pred_t *ps_mv_nmb, *ps_mv_nmb_start, *ps_mv_ntop, *ps_mv_ntop_start;
  240|  42.1k|    pic_buffer_t *ps_ref_frame;
  241|  42.1k|    UWORD8 u1_direct_mode_width;
  242|  42.1k|    UWORD8 i, j;
  243|  42.1k|    dec_mb_info_t *ps_cur_mb_info;
  244|  42.1k|    dec_svc_mb_info_t *ps_svc_cur_mb_info;
  245|  42.1k|    const UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
  246|  42.1k|    UWORD8 u1_field;
  247|  42.1k|    WORD32 ret = 0;
  248|  42.1k|    WORD16 i2_mv_x, i2_mv_y;
  249|       |
  250|  42.1k|    ps_dec->i4_submb_ofst -= (u4_num_mbs - u4_mb_idx) << 4;
  251|  42.1k|    ps_mb_part_info = ps_dec->ps_parse_mb_data;
  252|  42.1k|    ps_part = ps_dec->ps_parse_part_params;
  253|       |
  254|       |    /* N/2 Mb MvPred and Transfer Setup Loop */
  255|   181k|    for(i = u4_mb_idx; i < u4_num_mbs; i++, ps_mb_part_info++)
  ------------------
  |  Branch (255:24): [True: 139k, False: 41.8k]
  ------------------
  256|   139k|    {
  257|   139k|        UWORD8 u1_colz = 0;
  258|   139k|        ps_dec->i4_submb_ofst += SUB_BLK_SIZE;
  ------------------
  |  |  562|   139k|#define SUB_BLK_SIZE                  ((SUB_BLK_WIDTH) * (SUB_BLK_WIDTH))
  |  |  ------------------
  |  |  |  |  560|   139k|#define SUB_BLK_WIDTH                 4
  |  |  ------------------
  |  |               #define SUB_BLK_SIZE                  ((SUB_BLK_WIDTH) * (SUB_BLK_WIDTH))
  |  |  ------------------
  |  |  |  |  560|   139k|#define SUB_BLK_WIDTH                 4
  |  |  ------------------
  ------------------
  259|       |        /* Restore the slice scratch MbX and MbY context */
  260|   139k|        ps_cur_mb_info = ps_dec->ps_nmb_info + i;
  261|   139k|        ps_svc_cur_mb_info = ps_svc_lyr_dec->ps_svc_nmb_info + i;
  262|   139k|        u1_field = ps_cur_mb_info->u1_mb_field_decodingflag;
  263|   139k|        ps_mv_nmb_start = ps_dec->ps_mv_cur + (i << 4);
  264|   139k|        ps_dec->u2_mbx = ps_cur_mb_info->u2_mbx;
  265|   139k|        ps_dec->u2_mby = ps_cur_mb_info->u2_mby;
  266|   139k|        ps_dec->u1_currB_type = 0;
  267|   139k|        ps_dec->u2_mv_2mb[i & 0x1] = 0;
  268|       |
  269|       |        /* Look for MV Prediction and Reference Transfer in Non-I Mbs */
  270|   139k|        if(!ps_mb_part_info->u4_isI_mb)
  ------------------
  |  Branch (270:12): [True: 139k, False: 492]
  ------------------
  271|   139k|        {
  272|   139k|            UWORD8 u1_blk_no;
  273|   139k|            WORD16 i1_ref_idx, i1_ref_idx1;
  274|   139k|            UWORD8 u1_pred_mode;
  275|   139k|            UWORD8 u1_sub_mb_x, u1_sub_mb_y, u1_sub_mb_num;
  276|   139k|            UWORD8 u1_lx, u1_lx_start, u1_lxend, u1_tmp_lx;
  277|   139k|            UWORD8 u1_num_part, u1_num_ref, u1_wd, u1_ht;
  278|   139k|            UWORD32 *pu4_wt_offst;
  279|   139k|            UWORD8 u1_scale_ref, u4_bot_mb;
  280|   139k|            deblk_mb_t *ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + i;
  281|   139k|            WORD8(*pi1_ref_idx)[MAX_REFIDX_INFO_PER_MB] = ps_mb_part_info->i1_ref_idx;
  282|   139k|            WORD8 *pi1_ref_idx0 = pi1_ref_idx[0], *pi1_ref_idx1 = pi1_ref_idx[1];
  283|   139k|            UWORD32 **ppu4_wt_ofst = ps_mb_part_info->pu4_wt_offst;
  284|   139k|            WORD32 i4_mb_mode_svc;
  285|   139k|            UWORD8 u1_motion_pred_flag_l0 = ps_svc_cur_mb_info->au1_motion_pred_flag[0];
  286|   139k|            UWORD8 u1_motion_pred_flag_l1 = ps_svc_cur_mb_info->au1_motion_pred_flag[1];
  287|       |
  288|       |            /* MB Level initialisations */
  289|   139k|            ps_dec->u4_num_pmbair = i >> u1_mbaff;
  290|   139k|            ps_dec->u4_mb_idx_mv = i;
  291|       |
  292|   139k|            i4_mb_mode_svc = isvcd_interlyr_motion_mode_pred(
  293|   139k|                ps_svc_lyr_dec, ps_cur_mb_info, ps_svc_cur_mb_info, ps_mb_part_info, ps_part);
  294|       |
  295|   139k|            if((-1 == i4_mb_mode_svc) || (SVC_INTER_MB == i4_mb_mode_svc))
  ------------------
  |  |  114|  38.7k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
  |  Branch (295:16): [True: 100k, False: 38.7k]
  |  Branch (295:42): [True: 35.5k, False: 3.20k]
  ------------------
  296|   136k|            {
  297|   136k|                ps_mv_ntop_start =
  298|   136k|                    ps_mv_nmb_start - (ps_dec->u2_frm_wd_in_mbs << (4 + u1_mbaff)) + 12;
  299|       |
  300|   136k|                u1_num_part = ps_mb_part_info->u1_num_part;
  301|   136k|                ps_cur_deblk_mb->u1_mb_type |= (u1_num_part > 1) << 1;
  302|   136k|                u1_direct_mode_width = (1 == ps_mb_part_info->u1_num_part) ? 16 : 8;
  ------------------
  |  Branch (302:40): [True: 109k, False: 26.3k]
  ------------------
  303|       |
  304|   136k|                ps_cur_mb_info->u4_pred_info_pkd_idx = ps_dec->u4_pred_info_pkd_idx;
  305|   136k|                ps_cur_mb_info->u1_num_pred_parts = 0;
  306|       |
  307|       |                /****************************************************/
  308|       |                /* weighted u4_ofst pointer calculations, this loop  */
  309|       |                /* runs maximum 4 times, even in direct cases       */
  310|       |                /****************************************************/
  311|   136k|                u1_scale_ref = u1_mbaff & ps_cur_mb_info->u1_mb_field_decodingflag;
  312|   136k|                u4_bot_mb = 1 - ps_cur_mb_info->u1_topmb;
  313|   136k|                if(ps_dec->ps_cur_pps->u1_wted_bipred_idc)
  ------------------
  |  Branch (313:20): [True: 83.2k, False: 52.8k]
  ------------------
  314|  83.2k|                {
  315|  83.2k|                    u1_num_ref = MIN(u1_num_part, 4);
  ------------------
  |  |   61|  83.2k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 74.1k, False: 9.08k]
  |  |  ------------------
  ------------------
  316|  83.2k|                    if(PART_DIRECT_16x16 != ps_part->u1_is_direct)
  ------------------
  |  |  572|  83.2k|#define PART_DIRECT_16x16              2
  ------------------
  |  Branch (316:24): [True: 41.3k, False: 41.8k]
  ------------------
  317|  41.3k|                    {
  318|   118k|                        for(u1_blk_no = 0; u1_blk_no < u1_num_ref; u1_blk_no++)
  ------------------
  |  Branch (318:44): [True: 77.0k, False: 41.3k]
  ------------------
  319|  77.0k|                        {
  320|  77.0k|                            i1_ref_idx = MAX(pi1_ref_idx0[u1_blk_no], 0);
  ------------------
  |  |   60|  77.0k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 11.9k, False: 65.1k]
  |  |  ------------------
  ------------------
  321|  77.0k|                            if(u1_scale_ref) i1_ref_idx >>= 1;
  ------------------
  |  Branch (321:32): [True: 0, False: 77.0k]
  ------------------
  322|  77.0k|                            i1_ref_idx *= ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1];
  323|  77.0k|                            if(u1_scale_ref)
  ------------------
  |  Branch (323:32): [True: 0, False: 77.0k]
  ------------------
  324|      0|                                i1_ref_idx += (MAX(pi1_ref_idx1[u1_blk_no], 0) >> 1);
  ------------------
  |  |   60|      0|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  325|  77.0k|                            else
  326|  77.0k|                                i1_ref_idx += MAX(pi1_ref_idx1[u1_blk_no], 0);
  ------------------
  |  |   60|  77.0k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 1.48k, False: 75.5k]
  |  |  ------------------
  ------------------
  327|  77.0k|                            pu4_wt_offst = (UWORD32 *) &ps_dec->pu4_wt_ofsts[2 * X3(i1_ref_idx)];
  ------------------
  |  |   92|  77.0k|#define X3(a)   (((a) << 1) + (a))
  ------------------
  328|       |
  329|  77.0k|                            if(pi1_ref_idx0[u1_blk_no] < 0) pu4_wt_offst += 1;
  ------------------
  |  Branch (329:32): [True: 20.7k, False: 56.3k]
  ------------------
  330|       |
  331|  77.0k|                            ppu4_wt_ofst[u1_blk_no] = pu4_wt_offst;
  332|  77.0k|                            if(u1_scale_ref && (ps_dec->ps_cur_pps->u1_wted_bipred_idc == 2))
  ------------------
  |  Branch (332:32): [True: 0, False: 77.0k]
  |  Branch (332:48): [True: 0, False: 0]
  ------------------
  333|      0|                            {
  334|      0|                                i1_ref_idx = MAX(pi1_ref_idx0[u1_blk_no], 0);
  ------------------
  |  |   60|      0|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  335|      0|                                i1_ref_idx *=
  336|      0|                                    (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1] << 1);
  337|      0|                                i1_ref_idx += MAX(pi1_ref_idx1[u1_blk_no], 0);
  ------------------
  |  |   60|      0|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  338|      0|                                if(u4_bot_mb)
  ------------------
  |  Branch (338:36): [True: 0, False: 0]
  ------------------
  339|      0|                                {
  340|      0|                                    i1_ref_idx +=
  341|      0|                                        (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0] << 1) *
  342|      0|                                        (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1] << 1);
  343|      0|                                }
  344|      0|                                pu4_wt_offst =
  345|      0|                                    (UWORD32 *) &ps_dec->pu4_mbaff_wt_mat[2 * X3(i1_ref_idx)];
  ------------------
  |  |   92|      0|#define X3(a)   (((a) << 1) + (a))
  ------------------
  346|      0|                                ppu4_wt_ofst[u1_blk_no] = pu4_wt_offst;
  347|      0|                            }
  348|  77.0k|                        }
  349|  41.3k|                    }
  350|  83.2k|                }
  351|       |
  352|       |                /**************************************************/
  353|       |                /* Loop on Partitions                             */
  354|       |                /* direct mode is reflected as a single partition */
  355|       |                /**************************************************/
  356|   327k|                for(j = 0; j < u1_num_part; j++, ps_part++)
  ------------------
  |  Branch (356:28): [True: 192k, False: 135k]
  ------------------
  357|   192k|                {
  358|   192k|                    u1_sub_mb_num = ps_part->u1_sub_mb_num;
  359|   192k|                    ps_dec->u1_sub_mb_num = u1_sub_mb_num;
  360|       |
  361|   192k|                    if(PART_NOT_DIRECT != ps_part->u1_is_direct)
  ------------------
  |  |  570|   192k|#define PART_NOT_DIRECT                0
  ------------------
  |  Branch (361:24): [True: 78.7k, False: 113k]
  ------------------
  362|  78.7k|                    {
  363|       |                        /**************************************************/
  364|       |                        /* Direct Mode, Call DecodeSpatial/TemporalDirect */
  365|       |                        /* only (those will in turn call FormMbPartInfo)  */
  366|       |                        /**************************************************/
  367|  78.7k|                        ret = isvcd_decode_spatial_direct(ps_dec, u1_direct_mode_width,
  368|  78.7k|                                                          ps_cur_mb_info, i);
  369|  78.7k|                        if(ret != OK) return ret;
  ------------------
  |  |  114|  78.7k|#define OK        0
  ------------------
  |  Branch (369:28): [True: 0, False: 78.7k]
  ------------------
  370|  78.7k|                        ps_cur_deblk_mb->u1_mb_type |= (ps_dec->u1_currB_type << 1);
  371|  78.7k|                    }
  372|   113k|                    else
  373|   113k|                    {
  374|   113k|                        mv_pred_t s_mvPred = {0};
  375|       |                        /**************************************************/
  376|       |                        /* Non Direct Mode, Call Motion Vector Predictor  */
  377|       |                        /* and FormMbpartInfo                             */
  378|       |                        /**************************************************/
  379|   113k|                        u1_sub_mb_x = u1_sub_mb_num & 0x03;
  380|   113k|                        u1_sub_mb_y = u1_sub_mb_num >> 2;
  381|   113k|                        u1_blk_no = (u1_num_part < 4)
  ------------------
  |  Branch (381:37): [True: 65.9k, False: 47.3k]
  ------------------
  382|   113k|                                        ? j
  383|   113k|                                        : (((u1_sub_mb_y >> 1) << 1) + (u1_sub_mb_x >> 1));
  384|       |
  385|   113k|                        ps_mv_ntop = ps_mv_ntop_start + u1_sub_mb_x;
  386|   113k|                        ps_mv_nmb = ps_mv_nmb_start + u1_sub_mb_num;
  387|       |
  388|       |                        /* Populate the colpic info and reference frames */
  389|   113k|                        s_mvPred.i1_ref_frame[0] = pi1_ref_idx0[u1_blk_no];
  390|   113k|                        s_mvPred.i1_ref_frame[1] = pi1_ref_idx1[u1_blk_no];
  391|   113k|                        u1_pred_mode = ps_part->u1_pred_mode;
  392|   113k|                        u1_wd = ps_part->u1_partwidth;
  393|   113k|                        u1_ht = ps_part->u1_partheight;
  394|       |
  395|   113k|                        if(1 != ps_svc_cur_mb_info->u1_base_mode_flag)
  ------------------
  |  Branch (395:28): [True: 51.5k, False: 61.7k]
  ------------------
  396|  51.5k|                        {
  397|  51.5k|                            u1_lx_start = 0;
  398|  51.5k|                            u1_lxend = 2;
  399|  51.5k|                            if(PRED_L0 == u1_pred_mode)
  ------------------
  |  |  483|  51.5k|#define PRED_L0   1
  ------------------
  |  Branch (399:32): [True: 21.7k, False: 29.8k]
  ------------------
  400|  21.7k|                            {
  401|  21.7k|                                s_mvPred.i2_mv[2] = 0;
  402|  21.7k|                                s_mvPred.i2_mv[3] = 0;
  403|  21.7k|                                if(0 == (u1_motion_pred_flag_l0 & (1 << u1_blk_no)))
  ------------------
  |  Branch (403:36): [True: 13.5k, False: 8.24k]
  ------------------
  404|  13.5k|                                {
  405|  13.5k|                                    u1_lxend = 1;
  406|  13.5k|                                }
  407|  8.24k|                                else
  408|  8.24k|                                {
  409|  8.24k|                                    u1_lxend = 0;
  410|  8.24k|                                }
  411|  21.7k|                            }
  412|  29.8k|                            else if(PRED_L1 == u1_pred_mode)
  ------------------
  |  |  484|  29.8k|#define PRED_L1   2
  ------------------
  |  Branch (412:37): [True: 20.7k, False: 9.07k]
  ------------------
  413|  20.7k|                            {
  414|  20.7k|                                s_mvPred.i2_mv[0] = 0;
  415|  20.7k|                                s_mvPred.i2_mv[1] = 0;
  416|  20.7k|                                if(0 == (u1_motion_pred_flag_l1 & (1 << u1_blk_no)))
  ------------------
  |  Branch (416:36): [True: 16.5k, False: 4.20k]
  ------------------
  417|  16.5k|                                {
  418|  16.5k|                                    u1_lx_start = 1;
  419|  16.5k|                                }
  420|  4.20k|                                else
  421|  4.20k|                                {
  422|  4.20k|                                    u1_lx_start = 2;
  423|  4.20k|                                }
  424|  20.7k|                            }
  425|  9.07k|                            else  // Bi Pred
  426|  9.07k|                            {
  427|  9.07k|                                if(0 == (u1_motion_pred_flag_l0 & (1 << u1_blk_no)))
  ------------------
  |  Branch (427:36): [True: 7.54k, False: 1.52k]
  ------------------
  428|  7.54k|                                {
  429|  7.54k|                                    u1_lxend = 1;
  430|  7.54k|                                }
  431|  9.07k|                                if(0 == (u1_motion_pred_flag_l1 & (1 << u1_blk_no)))
  ------------------
  |  Branch (431:36): [True: 2.50k, False: 6.56k]
  ------------------
  432|  2.50k|                                {
  433|  2.50k|                                    u1_lx_start = 1;
  434|  2.50k|                                }
  435|  9.07k|                                if((0 != (u1_motion_pred_flag_l0 & (1 << u1_blk_no))) &&
  ------------------
  |  Branch (435:36): [True: 1.52k, False: 7.54k]
  ------------------
  436|  1.52k|                                   (0 != (u1_motion_pred_flag_l1 & (1 << u1_blk_no))))
  ------------------
  |  Branch (436:36): [True: 1.14k, False: 382]
  ------------------
  437|  1.14k|                                {
  438|  1.14k|                                    u1_lx_start = 0;
  439|  1.14k|                                    u1_lxend = 0;
  440|  1.14k|                                }
  441|  9.07k|                                if((0 == (u1_motion_pred_flag_l0 & (1 << u1_blk_no))) &&
  ------------------
  |  Branch (441:36): [True: 7.54k, False: 1.52k]
  ------------------
  442|  7.54k|                                   (0 == (u1_motion_pred_flag_l1 & (1 << u1_blk_no))))
  ------------------
  |  Branch (442:36): [True: 2.12k, False: 5.42k]
  ------------------
  443|  2.12k|                                {
  444|  2.12k|                                    u1_lx_start = 0;
  445|  2.12k|                                    u1_lxend = 2;
  446|  2.12k|                                }
  447|  9.07k|                            }
  448|  51.5k|                            ps_dec->pf_mvpred(ps_dec, ps_cur_mb_info, ps_mv_nmb, ps_mv_ntop,
  449|  51.5k|                                              &s_mvPred, u1_sub_mb_num, u1_wd, u1_lx_start,
  450|  51.5k|                                              u1_lxend, ps_cur_mb_info->u1_mb_mc_mode);
  451|  51.5k|                        }
  452|       |
  453|       |                        /* for generic case based on pred mode derived / signalled */
  454|   113k|                        u1_lx_start = 0;
  455|   113k|                        u1_lxend = 2;
  456|   113k|                        if(PRED_L0 == u1_pred_mode)
  ------------------
  |  |  483|   113k|#define PRED_L0   1
  ------------------
  |  Branch (456:28): [True: 60.3k, False: 52.9k]
  ------------------
  457|  60.3k|                        {
  458|  60.3k|                            s_mvPred.i2_mv[2] = 0;
  459|  60.3k|                            s_mvPred.i2_mv[3] = 0;
  460|  60.3k|                            u1_lxend = 1;
  461|  60.3k|                        }
  462|   113k|                        if(PRED_L1 == u1_pred_mode)
  ------------------
  |  |  484|   113k|#define PRED_L1   2
  ------------------
  |  Branch (462:28): [True: 29.2k, False: 84.0k]
  ------------------
  463|  29.2k|                        {
  464|  29.2k|                            s_mvPred.i2_mv[0] = 0;
  465|  29.2k|                            s_mvPred.i2_mv[1] = 0;
  466|  29.2k|                            u1_lx_start = 1;
  467|  29.2k|                        }
  468|       |
  469|       |                        /**********************************************************/
  470|       |                        /* Loop on number of predictors, 1 Each for Forw Backw    */
  471|       |                        /* Loop 2 times for BiDirect mode                         */
  472|       |                        /**********************************************************/
  473|   249k|                        for(u1_lx = u1_lx_start; u1_lx < u1_lxend; u1_lx++)
  ------------------
  |  Branch (473:50): [True: 136k, False: 112k]
  ------------------
  474|   136k|                        {
  475|   136k|                            UWORD8 u1_motion_pred_flag =
  476|   136k|                                u1_lx ? u1_motion_pred_flag_l1 : u1_motion_pred_flag_l0;
  ------------------
  |  Branch (476:33): [True: 52.9k, False: 84.0k]
  ------------------
  477|       |
  478|   136k|                            if((0 != (u1_motion_pred_flag & (1 << u1_blk_no))) ||
  ------------------
  |  Branch (478:32): [True: 20.5k, False: 116k]
  ------------------
  479|   116k|                               (ps_svc_cur_mb_info->u1_base_mode_flag))
  ------------------
  |  Branch (479:32): [True: 76.3k, False: 40.1k]
  ------------------
  480|  96.8k|                            {
  481|  96.8k|                                isvcd_retrive_infer_mode_mv(ps_svc_lyr_dec, &s_mvPred, u1_lx,
  482|  96.8k|                                                            u1_sub_mb_num);
  483|  96.8k|                            }
  484|       |                            /********************************************************/
  485|       |                            /* Predict Mv                                           */
  486|       |                            /* Add Mv Residuals and store back                      */
  487|       |                            /********************************************************/
  488|   136k|                            u1_tmp_lx = (u1_lx << 1);
  489|   136k|                            i1_ref_idx = s_mvPred.i1_ref_frame[u1_lx];
  490|       |                            /********************************************************************/
  491|       |                            /* If reference index is inferred from the base layer and it is     */
  492|       |                            /* exceeding the number of active reference in the current layer.   */
  493|       |                            /* Then reference index is clipped to the max in the current layer  */
  494|       |                            /********************************************************************/
  495|   136k|                            if(ps_svc_cur_mb_info->u1_base_mode_flag == 1)
  ------------------
  |  Branch (495:32): [True: 76.3k, False: 60.6k]
  ------------------
  496|  76.3k|                            {
  497|  76.3k|                                if(i1_ref_idx > (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[u1_lx] - 1))
  ------------------
  |  Branch (497:36): [True: 10.6k, False: 65.6k]
  ------------------
  498|  10.6k|                                {
  499|  10.6k|                                    i1_ref_idx = ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[u1_lx] - 1;
  500|  10.6k|                                }
  501|  76.3k|                            }
  502|   136k|                            if(0 == ps_svc_cur_mb_info->u1_base_mode_flag)
  ------------------
  |  Branch (502:32): [True: 60.6k, False: 76.3k]
  ------------------
  503|  60.6k|                            {
  504|  60.6k|                                i2_mv_x = ps_mv_nmb->i2_mv[u1_tmp_lx];
  505|  60.6k|                                i2_mv_y = ps_mv_nmb->i2_mv[u1_tmp_lx + 1];
  506|       |
  507|  60.6k|                                i2_mv_x += s_mvPred.i2_mv[u1_tmp_lx];
  508|  60.6k|                                i2_mv_y += s_mvPred.i2_mv[u1_tmp_lx + 1];
  509|       |
  510|  60.6k|                                s_mvPred.i2_mv[u1_tmp_lx] = i2_mv_x;
  511|  60.6k|                                s_mvPred.i2_mv[u1_tmp_lx + 1] = i2_mv_y;
  512|  60.6k|                            }
  513|  76.3k|                            else
  514|  76.3k|                            {
  515|  76.3k|                                i2_mv_x = s_mvPred.i2_mv[u1_tmp_lx];
  516|  76.3k|                                i2_mv_y = s_mvPred.i2_mv[u1_tmp_lx + 1];
  517|  76.3k|                            }
  518|       |
  519|       |                            /********************************************************/
  520|       |                            /* Transfer setup call                                  */
  521|       |                            /* convert RefIdx if it is MbAff                        */
  522|       |                            /* Pass Weight Offset and refFrame                      */
  523|       |                            /********************************************************/
  524|   136k|                            i1_ref_idx1 = i1_ref_idx >> u1_scale_ref;
  525|       |
  526|   136k|                            if(-1 == i1_ref_idx1) return NOT_OK;
  ------------------
  |  |  116|    317|#define NOT_OK    -1
  ------------------
  |  Branch (526:32): [True: 317, False: 136k]
  ------------------
  527|   136k|                            if(u1_scale_ref && ((i1_ref_idx & 0x01) != u4_bot_mb))
  ------------------
  |  Branch (527:32): [True: 0, False: 136k]
  |  Branch (527:48): [True: 0, False: 0]
  ------------------
  528|      0|                                i1_ref_idx1 += MAX_REF_BUFS;
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
  529|   136k|                            ps_ref_frame = ps_dec->ps_ref_pic_buf_lx[u1_lx][i1_ref_idx1];
  530|       |
  531|       |                            /* Storing Colocated-Zero u4_flag */
  532|   136k|                            if(u1_lx == u1_lx_start)
  ------------------
  |  Branch (532:32): [True: 113k, False: 23.6k]
  ------------------
  533|   113k|                            {
  534|       |                                /* Fill colocated info in MvPred structure */
  535|   113k|                                s_mvPred.u1_col_ref_pic_idx = ps_ref_frame->u1_mv_buf_id;
  536|   113k|                                s_mvPred.u1_pic_type = ps_ref_frame->u1_pic_type;
  537|       |
  538|       |                                /* Calculating colocated zero information */
  539|   113k|                                u1_colz =
  540|   113k|                                    (u1_field << 1) | ((i1_ref_idx == 0) && (ABS(i2_mv_x) <= 1) &&
  ------------------
  |  |  100|  97.1k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 19.2k, False: 77.8k]
  |  |  ------------------
  ------------------
  |  Branch (540:56): [True: 97.1k, False: 15.9k]
  |  Branch (540:77): [True: 60.9k, False: 36.1k]
  ------------------
  541|  60.9k|                                                       (ABS(i2_mv_y) <= 1));
  ------------------
  |  |  100|  60.9k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 5.09k, False: 55.8k]
  |  |  ------------------
  ------------------
  |  Branch (541:56): [True: 50.6k, False: 10.3k]
  ------------------
  542|   113k|                                u1_colz |= ps_mb_part_info->u1_col_info[u1_blk_no];
  543|   113k|                            }
  544|       |
  545|   136k|                            pu4_wt_offst = ppu4_wt_ofst[u1_blk_no];
  546|   136k|                            {
  547|   136k|                                pred_info_pkd_t *ps_pred_pkd;
  548|   136k|                                WORD16 i2_mv[2];
  549|       |
  550|   136k|                                i2_mv[0] = i2_mv_x;
  551|   136k|                                i2_mv[1] = i2_mv_y;
  552|       |
  553|   136k|                                ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
  554|   136k|                                ih264d_fill_pred_info(i2_mv, u1_wd, u1_ht, u1_sub_mb_num,
  555|   136k|                                                      u1_pred_mode, ps_pred_pkd,
  556|   136k|                                                      ps_ref_frame->u1_pic_buf_id, i1_ref_idx,
  557|   136k|                                                      pu4_wt_offst, ps_ref_frame->u1_pic_type);
  558|   136k|                                ps_dec->u4_pred_info_pkd_idx++;
  559|   136k|                                ps_cur_mb_info->u1_num_pred_parts++;
  560|   136k|                            }
  561|   136k|                        }
  562|   112k|                        if(ps_mv_nmb)
  ------------------
  |  Branch (562:28): [True: 112k, False: 0]
  ------------------
  563|   112k|                        {
  564|   112k|                            ih264d_rep_mv_colz(ps_dec, &s_mvPred, ps_mv_nmb, u1_sub_mb_num, u1_colz,
  565|   112k|                                               u1_ht, u1_wd);
  566|   112k|                        }
  567|      0|                        else
  568|      0|                        {
  569|      0|                            return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  570|      0|                        }
  571|   112k|                    }
  572|   192k|                }
  573|       |                /* to take care of 16 parttitions increment for base mode flag case*/
  574|   135k|                if(1 == ps_svc_cur_mb_info->u1_base_mode_flag)
  ------------------
  |  Branch (574:20): [True: 35.5k, False: 100k]
  ------------------
  575|  35.5k|                {
  576|  35.5k|                    ps_part += (MAX_NUM_MB_PART - u1_num_part);
  ------------------
  |  |   62|  35.5k|#define MAX_NUM_MB_PART NUM_MB_PARTS *NUM_SUB_MB_PARTS
  |  |  ------------------
  |  |  |  |   59|  35.5k|#define NUM_MB_PARTS 4
  |  |  ------------------
  |  |               #define MAX_NUM_MB_PART NUM_MB_PARTS *NUM_SUB_MB_PARTS
  |  |  ------------------
  |  |  |  |   60|  35.5k|#define NUM_SUB_MB_PARTS 4
  |  |  ------------------
  ------------------
  577|  35.5k|                }
  578|   135k|            }
  579|  3.20k|            else
  580|  3.20k|            {
  581|       |                /* Set zero values in case of Intra Mbs */
  582|  3.20k|                mv_pred_t s_mvPred = {{0, 0, 0, 0}, {-1, -1}, 0, 0};
  583|       |                /* to take care of 16 parttitions increment for base mode flag case*/
  584|  3.20k|                if(1 != ps_svc_cur_mb_info->u1_base_mode_flag)
  ------------------
  |  Branch (584:20): [True: 0, False: 3.20k]
  ------------------
  585|      0|                {
  586|      0|                    return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  587|      0|                }
  588|       |
  589|  3.20k|                ps_cur_deblk_mb->u1_mb_type |= D_INTRA_IBL;
  ------------------
  |  |   72|  3.20k|#define D_INTRA_IBL 16
  ------------------
  590|  3.20k|                if((ps_svc_lyr_dec->u1_layer_identifier != TARGET_LAYER) &&
  ------------------
  |  |  110|  3.20k|#define TARGET_LAYER 2
  ------------------
  |  Branch (590:20): [True: 0, False: 3.20k]
  ------------------
  591|      0|                   (DBLK_ENABLED == ps_dec->ps_cur_slice->u1_disable_dblk_filter_idc))
  ------------------
  |  |  549|      0|#define DBLK_ENABLED                  0
  ------------------
  |  Branch (591:20): [True: 0, False: 0]
  ------------------
  592|      0|                {
  593|      0|                    ps_cur_deblk_mb->u1_deblocking_mode = MB_ENABLE_FILTERING;
  ------------------
  |  |   69|      0|#define MB_ENABLE_FILTERING           0x00
  ------------------
  594|      0|                }
  595|       |
  596|  3.20k|                ps_part += (MAX_NUM_MB_PART);
  ------------------
  |  |   62|  3.20k|#define MAX_NUM_MB_PART NUM_MB_PARTS *NUM_SUB_MB_PARTS
  |  |  ------------------
  |  |  |  |   59|  3.20k|#define NUM_MB_PARTS 4
  |  |  ------------------
  |  |               #define MAX_NUM_MB_PART NUM_MB_PARTS *NUM_SUB_MB_PARTS
  |  |  ------------------
  |  |  |  |   60|  3.20k|#define NUM_SUB_MB_PARTS 4
  |  |  ------------------
  ------------------
  597|       |                /* Storing colocated zero information */
  598|  3.20k|                if(ps_mv_nmb_start)
  ------------------
  |  Branch (598:20): [True: 3.20k, False: 0]
  ------------------
  599|  3.20k|                {
  600|  3.20k|                    ih264d_rep_mv_colz(ps_dec, &s_mvPred, ps_mv_nmb_start, 0,
  601|  3.20k|                                       (UWORD8) (u1_field << 1), 4, 4);
  602|  3.20k|                }
  603|      0|                else
  604|      0|                {
  605|      0|                    return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  606|      0|                }
  607|  3.20k|            }
  608|   139k|        }
  609|    492|        else
  610|    492|        {
  611|       |            /* Set zero values in case of Intra Mbs */
  612|    492|            mv_pred_t s_mvPred = {{0, 0, 0, 0}, {-1, -1}, 0, 0};
  613|       |            /* Storing colocated zero information */
  614|    492|            if(ps_mv_nmb_start)
  ------------------
  |  Branch (614:16): [True: 492, False: 0]
  ------------------
  615|    492|            {
  616|    492|                ih264d_rep_mv_colz(ps_dec, &s_mvPred, ps_mv_nmb_start, 0, (UWORD8) (u1_field << 1),
  617|    492|                                   4, 4);
  618|    492|            }
  619|      0|            else
  620|      0|            {
  621|      0|                return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  622|      0|            }
  623|    492|        }
  624|   139k|    }
  625|       |
  626|  41.8k|    return OK;
  ------------------
  |  |  114|  41.8k|#define OK        0
  ------------------
  627|  42.1k|}
isvcd_parse_bmb_cavlc:
  643|  24.9k|{
  644|  24.9k|    UWORD32 u4_cbp = 0;
  645|  24.9k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
  646|  24.9k|    deblk_mb_t *ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + u4_mb_num;
  647|  24.9k|    dec_bit_stream_t *const ps_bitstrm = ps_dec->ps_bitstrm;
  648|  24.9k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
  649|  24.9k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
  650|  24.9k|    const UWORD8 *puc_mb_mc_mode = (const UWORD8 *) gau1_ih264d_mb_mc_mode;
  651|  24.9k|    UWORD8 u1_mb_type = ps_cur_mb_info->u1_mb_type;
  652|  24.9k|    WORD32 ret;
  653|  24.9k|    UWORD8 u1_Bdirect_tranform_read = 1;
  654|  24.9k|    dec_slice_svc_ext_params_t *ps_svc_slice_params = NULL;
  655|       |
  656|  24.9k|    ps_svc_slice_params = &ps_svc_lyr_dec->s_svc_slice_params;
  657|  24.9k|    ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag = 1;
  658|  24.9k|    ps_cur_mb_info->u1_tran_form8x8 = 0;
  659|  24.9k|    ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
  660|  24.9k|    ps_cur_mb_info->u1_yuv_dc_block_flag = 0;
  661|  24.9k|    ps_cur_mb_info->u1_mb_mc_mode = puc_mb_mc_mode[5 + u1_mb_type];
  662|  24.9k|    ps_cur_deblk_mb->u1_mb_type |= D_B_SLICE;
  ------------------
  |  |  384|  24.9k|#define D_B_SLICE         4
  ------------------
  663|  24.9k|    if(u1_mb_type != B_DIRECT)
  ------------------
  |  |  482|  24.9k|#define B_DIRECT  0
  ------------------
  |  Branch (663:8): [True: 14.9k, False: 10.0k]
  ------------------
  664|  14.9k|    {
  665|  14.9k|        ret = isvcd_parse_bmb_non_direct_cavlc(ps_svc_lyr_dec, ps_cur_mb_info, ps_svc_cur_mb_info,
  666|  14.9k|                                               u4_mb_num, u4_num_mbsNby2);
  667|  14.9k|        if(ret != OK) return ret;
  ------------------
  |  |  114|  14.9k|#define OK        0
  ------------------
  |  Branch (667:12): [True: 423, False: 14.4k]
  ------------------
  668|  14.9k|    }
  669|  10.0k|    else
  670|  10.0k|    {
  671|       |        /************ STORING PARTITION INFO ***********/
  672|  10.0k|        parse_part_params_t *ps_part_info;
  673|  10.0k|        ps_part_info = ps_dec->ps_part;
  674|  10.0k|        ps_part_info->u1_is_direct = PART_DIRECT_16x16;
  ------------------
  |  |  572|  10.0k|#define PART_DIRECT_16x16              2
  ------------------
  675|  10.0k|        ps_part_info->u1_sub_mb_num = 0;
  676|  10.0k|        ps_dec->ps_part++;
  677|       |        /* check whether transform8x8 u4_flag to be read or not */
  678|  10.0k|        u1_Bdirect_tranform_read = ps_dec->s_high_profile.u1_direct_8x8_inference_flag;
  679|  10.0k|    }
  680|       |
  681|  24.5k|    if(ps_svc_slice_params->u1_adaptive_residual_prediction_flag &&
  ------------------
  |  Branch (681:8): [True: 14.7k, False: 9.74k]
  ------------------
  682|  14.7k|       ps_svc_cur_mb_info->u1_crop_window_flag)
  ------------------
  |  Branch (682:8): [True: 14.0k, False: 693]
  ------------------
  683|  14.0k|    {
  684|  14.0k|        ps_svc_cur_mb_info->u1_residual_prediction_flag = ih264d_get_bit_h264(ps_bitstrm);
  685|  14.0k|        COPYTHECONTEXT("SVC ext: u1_residual_prediction_flag",
  686|  14.0k|                       ps_svc_cur_mb_info->u1_residual_prediction_flag);
  687|  14.0k|    }
  688|  10.4k|    else
  689|  10.4k|    {
  690|       |        /*residual flag inference code */
  691|  10.4k|        if(1 == ps_svc_cur_mb_info->u1_crop_window_flag &&
  ------------------
  |  Branch (691:12): [True: 9.69k, False: 750]
  ------------------
  692|  9.69k|           1 == ps_svc_cur_mb_info->u1_base_mode_flag)
  ------------------
  |  Branch (692:12): [True: 0, False: 9.69k]
  ------------------
  693|      0|        {
  694|      0|            ps_svc_cur_mb_info->u1_residual_prediction_flag =
  695|      0|                ps_svc_slice_params->u1_default_residual_prediction_flag;
  696|      0|        }
  697|  10.4k|        else
  698|  10.4k|        {
  699|  10.4k|            ps_svc_cur_mb_info->u1_residual_prediction_flag = 0;
  700|  10.4k|        }
  701|  10.4k|    }
  702|       |
  703|  24.5k|    if(ps_svc_slice_params->u1_scan_idx_end >= ps_svc_slice_params->u1_scan_idx_start)
  ------------------
  |  Branch (703:8): [True: 24.5k, False: 0]
  ------------------
  704|  24.5k|    {
  705|       |        /* Read the Coded block pattern */
  706|  24.5k|        const UWORD8 *puc_CbpInter = gau1_ih264d_cbp_inter;
  707|       |        // Inlined ih264d_uev
  708|  24.5k|        UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
  709|  24.5k|        UWORD32 u4_word, u4_ldz;
  710|       |
  711|       |        /***************************************************************/
  712|       |        /* Find leading zeros in next 32 bits                          */
  713|       |        /***************************************************************/
  714|  24.5k|        NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  24.5k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  24.5k|{                                                                           \
  |  |  152|  24.5k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  24.5k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  24.5k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  24.5k|                                                                            \
  |  |  156|  24.5k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  24.5k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 23.8k, False: 656]
  |  |  ------------------
  |  |  158|  24.5k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  23.8k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  24.5k|}
  ------------------
  715|  24.5k|        u4_ldz = CLZ(u4_word);
  716|       |        /* Flush the ps_bitstrm */
  717|  24.5k|        u4_bitstream_offset += (u4_ldz + 1);
  718|       |        /* Read the suffix from the ps_bitstrm */
  719|  24.5k|        u4_word = 0;
  720|  24.5k|        if(u4_ldz) GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  8.70k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  8.70k|{                                                                           \
  |  |  122|  8.70k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  8.70k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  8.70k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  8.70k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  8.70k|                                                                            \
  |  |  127|  8.70k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 8.22k, False: 484]
  |  |  ------------------
  |  |  128|  8.70k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  8.22k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  8.70k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  8.70k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  8.70k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  8.70k|}                                                                           \
  ------------------
  |  Branch (720:12): [True: 8.70k, False: 15.8k]
  ------------------
  721|  24.5k|        *pu4_bitstrm_ofst = u4_bitstream_offset;
  722|  24.5k|        u4_cbp = ((1 << u4_ldz) + u4_word - 1);
  723|       |        // Inlined ih264d_uev
  724|  24.5k|        if(u4_cbp > 47) return ERROR_CBP;
  ------------------
  |  Branch (724:12): [True: 521, False: 24.0k]
  ------------------
  725|  24.0k|        u4_cbp = puc_CbpInter[u4_cbp];
  726|       |
  727|  24.0k|        if((ps_dec->s_high_profile.u1_transform8x8_present) && (u4_cbp & (0xf)) &&
  ------------------
  |  Branch (727:12): [True: 7.61k, False: 16.4k]
  |  Branch (727:64): [True: 2.00k, False: 5.60k]
  ------------------
  728|  2.00k|           (ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag) && (u1_Bdirect_tranform_read))
  ------------------
  |  Branch (728:12): [True: 1.37k, False: 636]
  |  Branch (728:73): [True: 879, False: 494]
  ------------------
  729|    879|        {
  730|    879|            ps_cur_mb_info->u1_tran_form8x8 = ih264d_get_bit_h264(ps_bitstrm);
  731|    879|            COPYTHECONTEXT("transform_size_8x8_flag", ps_cur_mb_info->u1_tran_form8x8);
  732|    879|            ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = ps_cur_mb_info->u1_tran_form8x8;
  733|    879|        }
  734|       |
  735|  24.0k|        COPYTHECONTEXT("coded_block_pattern", u4_cbp);
  736|  24.0k|        ps_cur_mb_info->u1_cbp = u4_cbp;
  737|  24.0k|    }
  738|       |
  739|       |    /* Read mb_qp_delta */
  740|  24.0k|    if(u4_cbp)
  ------------------
  |  Branch (740:8): [True: 8.18k, False: 15.8k]
  ------------------
  741|  8.18k|    {
  742|  8.18k|        WORD32 i_temp;
  743|       |        // inlining ih264d_sev
  744|  8.18k|        UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
  745|  8.18k|        UWORD32 u4_word, u4_ldz, u4_abs_val;
  746|       |
  747|       |        /***************************************************************/
  748|       |        /* Find leading zeros in next 32 bits                          */
  749|       |        /***************************************************************/
  750|  8.18k|        NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  8.18k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  8.18k|{                                                                           \
  |  |  152|  8.18k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  8.18k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  8.18k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  8.18k|                                                                            \
  |  |  156|  8.18k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  8.18k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 8.02k, False: 159]
  |  |  ------------------
  |  |  158|  8.18k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  8.02k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  8.18k|}
  ------------------
  751|  8.18k|        u4_ldz = CLZ(u4_word);
  752|       |
  753|       |        /* Flush the ps_bitstrm */
  754|  8.18k|        u4_bitstream_offset += (u4_ldz + 1);
  755|       |
  756|       |        /* Read the suffix from the ps_bitstrm */
  757|  8.18k|        u4_word = 0;
  758|  8.18k|        if(u4_ldz) GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  3.18k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  3.18k|{                                                                           \
  |  |  122|  3.18k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  3.18k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  3.18k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  3.18k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  3.18k|                                                                            \
  |  |  127|  3.18k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 2.88k, False: 296]
  |  |  ------------------
  |  |  128|  3.18k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  2.88k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  3.18k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  3.18k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  3.18k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  3.18k|}                                                                           \
  ------------------
  |  Branch (758:12): [True: 3.18k, False: 5.00k]
  ------------------
  759|       |
  760|  8.18k|        *pu4_bitstrm_ofst = u4_bitstream_offset;
  761|  8.18k|        u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1;
  762|       |
  763|  8.18k|        if(u4_word & 0x1)
  ------------------
  |  Branch (763:12): [True: 1.35k, False: 6.82k]
  ------------------
  764|  1.35k|            i_temp = (-(WORD32) u4_abs_val);
  765|  6.82k|        else
  766|  6.82k|            i_temp = (u4_abs_val);
  767|       |
  768|  8.18k|        if(i_temp < -26 || i_temp > 25) return ERROR_INV_RANGE_QP_T;
  ------------------
  |  Branch (768:12): [True: 107, False: 8.08k]
  |  Branch (768:28): [True: 173, False: 7.90k]
  ------------------
  769|       |        // inlinined ih264d_sev
  770|  7.90k|        COPYTHECONTEXT("mb_qp_delta", i_temp);
  771|  7.90k|        if(i_temp)
  ------------------
  |  Branch (771:12): [True: 2.90k, False: 5.00k]
  ------------------
  772|  2.90k|        {
  773|  2.90k|            ret = ih264d_update_qp(ps_dec, (WORD8) i_temp);
  774|  2.90k|            if(ret != OK) return ret;
  ------------------
  |  |  114|  2.90k|#define OK        0
  ------------------
  |  Branch (774:16): [True: 0, False: 2.90k]
  ------------------
  775|  2.90k|        }
  776|       |
  777|       |        /*SVC residual from start to end idx*/
  778|  7.90k|        ret = ih264d_parse_residual4x4_cavlc(ps_dec, ps_cur_mb_info, 0);
  779|  7.90k|        if(ret != OK) return ret;
  ------------------
  |  |  114|  7.90k|#define OK        0
  ------------------
  |  Branch (779:12): [True: 185, False: 7.72k]
  ------------------
  780|  7.72k|        if(EXCEED_OFFSET(ps_bitstrm)) return ERROR_EOB_TERMINATE_T;
  ------------------
  |  |   93|  7.72k|  (ps_bitstrm->u4_ofst > ps_bitstrm->u4_max_ofst)
  |  |  ------------------
  |  |  |  Branch (93:3): [True: 153, False: 7.56k]
  |  |  ------------------
  ------------------
  781|  7.72k|    }
  782|  15.8k|    else
  783|  15.8k|    {
  784|  15.8k|        ps_dec->i1_prev_mb_qp_delta = 0;
  785|  15.8k|        ih264d_update_nnz_for_skipmb(ps_dec, ps_cur_mb_info, CAVLC);
  ------------------
  |  |  338|  15.8k|#define CAVLC  0
  ------------------
  786|  15.8k|    }
  787|       |
  788|  23.4k|    return OK;
  ------------------
  |  |  114|  23.4k|#define OK        0
  ------------------
  789|  24.0k|}
isvcd_parse_bmb_non_direct_cabac:
  808|  13.8k|{
  809|       |    /* Loads from ps_dec */
  810|  13.8k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
  811|  13.8k|    decoding_envirnoment_t *ps_cab_env = &ps_dec->s_cab_dec_env;
  812|  13.8k|    dec_bit_stream_t *ps_bitstrm = ps_dec->ps_bitstrm;
  813|  13.8k|    ctxt_inc_mb_info_t *p_curr_ctxt = ps_dec->ps_curr_ctxt_mb_info;
  814|  13.8k|    parse_pmbarams_t *ps_parse_mb_data = ps_dec->ps_parse_mb_data + u4_num_mbsNby2;
  815|       |
  816|       |    /* table pointer loads */
  817|  13.8k|    const UWORD8 *pu1_sub_mb_pred_modes = (UWORD8 *) (gau1_ih264d_submb_pred_modes) + 4;
  818|  13.8k|    const UWORD8(*pu1_mb_pred_modes)[32] = (const UWORD8(*)[32]) gau1_ih264d_mb_pred_modes;
  819|  13.8k|    const UWORD8 *pu1_num_mb_part = (const UWORD8 *) gau1_ih264d_num_mb_part;
  820|  13.8k|    const UWORD8 *pu1_sub_mb_mc_mode = (UWORD8 *) (gau1_ih264d_submb_mc_mode) + 4;
  821|       |
  822|  13.8k|    const UWORD8 u1_mb_type = ps_cur_mb_info->u1_mb_type;
  823|  13.8k|    UWORD8 *pu1_col_info = ps_parse_mb_data->u1_col_info;
  824|  13.8k|    WORD8 *pi1_ref_idx_l0 = &ps_parse_mb_data->i1_ref_idx[0][0];
  825|  13.8k|    WORD8 *pi1_ref_idx_l1 = &ps_parse_mb_data->i1_ref_idx[1][0];
  826|  13.8k|    UWORD8 u1_dec_ref_l0, u1_dec_ref_l1;
  827|       |
  828|  13.8k|    UWORD8 u1_num_mb_part, u1_mb_mc_mode, u1_sub_mb, u1_mbpred_mode = 5 + u1_mb_type;
  829|  13.8k|    UWORD32 u4_mb_mc_mode = 0, u4_mb_pred_mode = 0;
  830|  13.8k|    WORD32 ret;
  831|  13.8k|    dec_slice_svc_ext_params_t *ps_svc_slice_params = NULL;
  832|  13.8k|    ps_svc_slice_params = &ps_svc_lyr_dec->s_svc_slice_params;
  833|       |
  834|  13.8k|    p_curr_ctxt->u1_mb_type = CAB_NON_BD16x16;
  ------------------
  |  |  397|  13.8k|#define CAB_NON_BD16x16   0x05 /* 0000 0101 */
  ------------------
  835|  13.8k|    u1_sub_mb = !(u1_mb_type ^ B_8x8);
  ------------------
  |  |  480|  13.8k|#define B_8x8    22
  ------------------
  836|       |
  837|  13.8k|    {
  838|  13.8k|        UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
  839|  13.8k|        UWORD8 *pu1_num_ref_idx_lx_active = ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active;
  840|  13.8k|        UWORD8 uc_field = ps_cur_mb_info->u1_mb_field_decodingflag;
  841|  13.8k|        UWORD8 u1_mbaff_field = (u1_mbaff & uc_field);
  842|  13.8k|        u1_dec_ref_l0 = (pu1_num_ref_idx_lx_active[0] << u1_mbaff_field) - 1;
  843|  13.8k|        u1_dec_ref_l1 = (pu1_num_ref_idx_lx_active[1] << u1_mbaff_field) - 1;
  844|  13.8k|    }
  845|       |
  846|  13.8k|    if(u1_sub_mb)
  ------------------
  |  Branch (846:8): [True: 3.75k, False: 10.0k]
  ------------------
  847|  3.75k|    {
  848|  3.75k|        const UWORD8 u1_colz = ((PRED_8x8) << 6);
  ------------------
  |  |  453|  3.75k|#define PRED_8x8    3
  ------------------
  849|  3.75k|        UWORD8 uc_i;
  850|  3.75k|        u1_mb_mc_mode = 0;
  851|  3.75k|        u1_num_mb_part = 4;
  852|       |        /* Reading the subMB type */
  853|  18.7k|        for(uc_i = 0; uc_i < 4; uc_i++)
  ------------------
  |  Branch (853:23): [True: 15.0k, False: 3.75k]
  ------------------
  854|  15.0k|        {
  855|  15.0k|            UWORD8 u1_sub_mb_mode, u1_subMbPredModes;
  856|  15.0k|            u1_sub_mb_mode =
  857|  15.0k|                ih264d_parse_submb_type_cabac(1, ps_cab_env, ps_bitstrm, ps_dec->p_sub_mb_type_t);
  858|       |
  859|  15.0k|            if(u1_sub_mb_mode > 12) return ERROR_SUB_MB_TYPE;
  ------------------
  |  Branch (859:16): [True: 0, False: 15.0k]
  ------------------
  860|       |
  861|  15.0k|            u1_subMbPredModes = pu1_sub_mb_pred_modes[u1_sub_mb_mode];
  862|  15.0k|            u4_mb_mc_mode = (u4_mb_mc_mode << 8) | pu1_sub_mb_mc_mode[u1_sub_mb_mode];
  863|  15.0k|            u4_mb_pred_mode = (u4_mb_pred_mode << 8) | u1_subMbPredModes;
  864|  15.0k|            *pi1_ref_idx_l0++ = (u1_subMbPredModes & PRED_L0) ? u1_dec_ref_l0 : -1;
  ------------------
  |  |  483|  15.0k|#define PRED_L0   1
  ------------------
  |  Branch (864:33): [True: 8.49k, False: 6.51k]
  ------------------
  865|  15.0k|            *pi1_ref_idx_l1++ = (u1_subMbPredModes & PRED_L1) ? u1_dec_ref_l1 : -1;
  ------------------
  |  |  484|  15.0k|#define PRED_L1   2
  ------------------
  |  Branch (865:33): [True: 10.7k, False: 4.28k]
  ------------------
  866|  15.0k|            COPYTHECONTEXT("sub_mb_type", u1_sub_mb_mode);
  867|       |            /* Storing collocated Mb and SubMb mode information */
  868|  15.0k|            *pu1_col_info++ = (u1_colz | (pu1_sub_mb_mc_mode[u1_sub_mb_mode] << 4));
  869|  15.0k|            if(u1_sub_mb_mode != B_DIRECT_8x8)
  ------------------
  |  |  465|  15.0k|#define B_DIRECT_8x8    0
  ------------------
  |  Branch (869:16): [True: 12.5k, False: 2.49k]
  ------------------
  870|  12.5k|            {
  871|  12.5k|                if(u1_sub_mb_mode > B_BI_8x8)
  ------------------
  |  |  468|  12.5k|#define B_BI_8x8        3
  ------------------
  |  Branch (871:20): [True: 3.81k, False: 8.69k]
  ------------------
  872|  3.81k|                {
  873|  3.81k|                    ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag = 0;
  874|  3.81k|                }
  875|  12.5k|            }
  876|  2.49k|            else if(!ps_dec->s_high_profile.u1_direct_8x8_inference_flag)
  ------------------
  |  Branch (876:21): [True: 1.12k, False: 1.37k]
  ------------------
  877|  1.12k|            {
  878|  1.12k|                ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag = 0;
  879|  1.12k|            }
  880|  15.0k|        }
  881|  3.75k|        pi1_ref_idx_l0 -= 4;
  882|  3.75k|        pi1_ref_idx_l1 -= 4;
  883|  3.75k|    }
  884|  10.0k|    else
  885|  10.0k|    {
  886|  10.0k|        UWORD8 u1_mb_pred_mode_part0 = pu1_mb_pred_modes[0][u1_mbpred_mode];
  887|  10.0k|        UWORD8 u1_mb_pred_mode_part1 = pu1_mb_pred_modes[1][u1_mbpred_mode];
  888|  10.0k|        u1_mb_mc_mode = ps_cur_mb_info->u1_mb_mc_mode;
  889|  10.0k|        u1_num_mb_part = pu1_num_mb_part[u1_mb_mc_mode];
  890|       |        /* Storing collocated Mb and SubMb mode information */
  891|  10.0k|        *pu1_col_info++ = (u1_mb_mc_mode << 6);
  892|  10.0k|        if(u1_mb_mc_mode) *pu1_col_info++ = (u1_mb_mc_mode << 6);
  ------------------
  |  Branch (892:12): [True: 3.50k, False: 6.57k]
  ------------------
  893|  10.0k|        u4_mb_mc_mode = u1_mb_mc_mode | (u1_mb_mc_mode << 8);
  894|  10.0k|        u4_mb_mc_mode <<= 16;
  895|  10.0k|        u4_mb_pred_mode = ((u1_mb_pred_mode_part0 << 8) | u1_mb_pred_mode_part1) << 16;
  896|       |
  897|  10.0k|        *pi1_ref_idx_l0++ = (u1_mb_pred_mode_part0 & PRED_L0) ? u1_dec_ref_l0 : -1;
  ------------------
  |  |  483|  10.0k|#define PRED_L0   1
  ------------------
  |  Branch (897:29): [True: 3.22k, False: 6.84k]
  ------------------
  898|  10.0k|        *pi1_ref_idx_l0-- = (u1_mb_pred_mode_part1 & PRED_L0) ? u1_dec_ref_l0 : -1;
  ------------------
  |  |  483|  10.0k|#define PRED_L0   1
  ------------------
  |  Branch (898:29): [True: 9.35k, False: 721]
  ------------------
  899|  10.0k|        *pi1_ref_idx_l1++ = (u1_mb_pred_mode_part0 & PRED_L1) ? u1_dec_ref_l1 : -1;
  ------------------
  |  |  484|  10.0k|#define PRED_L1   2
  ------------------
  |  Branch (899:29): [True: 7.57k, False: 2.49k]
  ------------------
  900|  10.0k|        *pi1_ref_idx_l1-- = (u1_mb_pred_mode_part1 & PRED_L1) ? u1_dec_ref_l1 : -1;
  ------------------
  |  |  484|  10.0k|#define PRED_L1   2
  ------------------
  |  Branch (900:29): [True: 8.21k, False: 1.86k]
  ------------------
  901|  10.0k|    }
  902|       |
  903|       |    /*Adding SVC extension code to get Motion Prediction Flags*/
  904|  13.8k|    {
  905|  13.8k|        UWORD8 uc_i, u1_mvp_l1, u1_mvp_l0;
  906|  13.8k|        UWORD8 *pu1_motion_pred_flag_l0;
  907|  13.8k|        UWORD8 *pu1_motion_pred_flag_l1;
  908|  13.8k|        WORD8 *pi1_ref_idx;
  909|  13.8k|        WORD8 *pi1_lft_cxt = ps_dec->pi1_left_ref_idx_ctxt_inc;
  910|  13.8k|        WORD8 *pi1_top_cxt = p_curr_ctxt->i1_ref_idx;
  911|       |
  912|  13.8k|        pu1_motion_pred_flag_l0 = &ps_svc_cur_mb_info->au1_motion_pred_flag[0];
  913|  13.8k|        *pu1_motion_pred_flag_l0 = 0;
  914|  13.8k|        pu1_motion_pred_flag_l1 = &ps_svc_cur_mb_info->au1_motion_pred_flag[1];
  915|  13.8k|        *pu1_motion_pred_flag_l1 = 0;
  916|       |
  917|  13.8k|        if(ps_svc_cur_mb_info->u1_crop_window_flag &&
  ------------------
  |  Branch (917:12): [True: 13.5k, False: 244]
  ------------------
  918|  13.5k|           ps_svc_slice_params->u1_adaptive_motion_prediction_flag)
  ------------------
  |  Branch (918:12): [True: 11.3k, False: 2.18k]
  ------------------
  919|  11.3k|        {
  920|  11.3k|            pi1_ref_idx = pi1_ref_idx_l0;
  921|  36.2k|            for(uc_i = 0; uc_i < u1_num_mb_part; uc_i++, pi1_ref_idx++)
  ------------------
  |  Branch (921:27): [True: 24.9k, False: 11.3k]
  ------------------
  922|  24.9k|            {
  923|  24.9k|                if(*pi1_ref_idx > 0)
  ------------------
  |  Branch (923:20): [True: 2.22k, False: 22.6k]
  ------------------
  924|  2.22k|                {
  925|  2.22k|                    u1_mvp_l0 = ih264d_decode_bin(0, ps_svc_lyr_dec->ps_motion_prediction_flag_l0,
  926|  2.22k|                                                  ps_bitstrm, ps_cab_env);
  927|  2.22k|                    COPYTHECONTEXT("SVC ext: u1_motion_prediction_flag_l0", u1_mvp_l0);
  928|       |
  929|  2.22k|                    *pu1_motion_pred_flag_l0 |= (u1_mvp_l0 << uc_i);
  930|  2.22k|                    if((u1_mvp_l0 & 0x01))
  ------------------
  |  Branch (930:24): [True: 933, False: 1.29k]
  ------------------
  931|    933|                    {
  932|    933|                        *pi1_ref_idx = -1;
  933|    933|                    }
  934|  2.22k|                }
  935|  24.9k|            }
  936|  11.3k|            pi1_ref_idx = pi1_ref_idx_l1;
  937|  36.2k|            for(uc_i = 0; uc_i < u1_num_mb_part; uc_i++, pi1_ref_idx++)
  ------------------
  |  Branch (937:27): [True: 24.9k, False: 11.3k]
  ------------------
  938|  24.9k|            {
  939|  24.9k|                if(*pi1_ref_idx > 0)
  ------------------
  |  Branch (939:20): [True: 14.0k, False: 10.8k]
  ------------------
  940|  14.0k|                {
  941|  14.0k|                    u1_mvp_l1 = ih264d_decode_bin(0, ps_svc_lyr_dec->ps_motion_prediction_flag_l1,
  942|  14.0k|                                                  ps_bitstrm, ps_cab_env);
  943|  14.0k|                    COPYTHECONTEXT("SVC ext: u1_motion_prediction_flag_l1", u1_mvp_l1);
  944|       |
  945|  14.0k|                    *pu1_motion_pred_flag_l1 |= (u1_mvp_l1 << uc_i);
  946|       |
  947|  14.0k|                    if((u1_mvp_l1 & 0x01))
  ------------------
  |  Branch (947:24): [True: 8.86k, False: 5.17k]
  ------------------
  948|  8.86k|                    {
  949|  8.86k|                        *pi1_ref_idx = -1;
  950|  8.86k|                    }
  951|  14.0k|                }
  952|  24.9k|            }
  953|  11.3k|        }
  954|  2.43k|        else if(ps_svc_cur_mb_info->u1_crop_window_flag)
  ------------------
  |  Branch (954:17): [True: 2.18k, False: 244]
  ------------------
  955|  2.18k|        {
  956|  2.18k|            *pu1_motion_pred_flag_l0 = ps_svc_slice_params->u1_default_motion_prediction_flag
  ------------------
  |  Branch (956:40): [True: 483, False: 1.70k]
  ------------------
  957|  2.18k|                                           ? ((1 << u1_num_mb_part) - 1)
  958|  2.18k|                                           : 0;
  959|  2.18k|            *pu1_motion_pred_flag_l1 = ps_svc_slice_params->u1_default_motion_prediction_flag
  ------------------
  |  Branch (959:40): [True: 483, False: 1.70k]
  ------------------
  960|  2.18k|                                           ? ((1 << u1_num_mb_part) - 1)
  961|  2.18k|                                           : 0;
  962|  2.18k|            if(ps_svc_slice_params->u1_default_motion_prediction_flag)
  ------------------
  |  Branch (962:16): [True: 483, False: 1.70k]
  ------------------
  963|    483|            {
  964|    483|                pi1_ref_idx_l0[0] = -1;
  965|    483|                pi1_ref_idx_l0[1] = -1;
  966|    483|                pi1_ref_idx_l0[2] = -1;
  967|    483|                pi1_ref_idx_l0[3] = -1;
  968|       |
  969|    483|                pi1_ref_idx_l1[0] = -1;
  970|    483|                pi1_ref_idx_l1[1] = -1;
  971|    483|                pi1_ref_idx_l1[2] = -1;
  972|    483|                pi1_ref_idx_l1[3] = -1;
  973|    483|            }
  974|  2.18k|        }
  975|       |
  976|  13.8k|        ret = ih264d_parse_ref_idx_cabac(u1_num_mb_part, 0, u1_dec_ref_l0, u1_mb_mc_mode,
  977|  13.8k|                                         pi1_ref_idx_l0, pi1_lft_cxt, pi1_top_cxt, ps_cab_env,
  978|  13.8k|                                         ps_bitstrm, ps_dec->p_ref_idx_t);
  979|       |
  980|  13.8k|        if(ret != OK)
  ------------------
  |  |  114|  13.8k|#define OK        0
  ------------------
  |  Branch (980:12): [True: 107, False: 13.7k]
  ------------------
  981|    107|        {
  982|    107|            return ret;
  983|    107|        }
  984|  13.7k|        ret = ih264d_parse_ref_idx_cabac(u1_num_mb_part, 2, u1_dec_ref_l1, u1_mb_mc_mode,
  985|  13.7k|                                         pi1_ref_idx_l1, pi1_lft_cxt, pi1_top_cxt, ps_cab_env,
  986|  13.7k|                                         ps_bitstrm, ps_dec->p_ref_idx_t);
  987|       |
  988|  13.7k|        if(ret != OK)
  ------------------
  |  |  114|  13.7k|#define OK        0
  ------------------
  |  Branch (988:12): [True: 74, False: 13.6k]
  ------------------
  989|     74|        {
  990|     74|            return ret;
  991|     74|        }
  992|  13.7k|    }
  993|       |    /* Read MotionVectors */
  994|  13.6k|    {
  995|  13.6k|        const UWORD8 *pu1_top_left_sub_mb_indx;
  996|  13.6k|        UWORD8 uc_j, uc_lx;
  997|  13.6k|        UWORD8 u1_mb_part_wd, u1_mb_part_ht;
  998|       |
  999|  13.6k|        const UWORD8 *pu1_sub_mb_indx_mod =
 1000|  13.6k|            (const UWORD8 *) gau1_ih264d_submb_indx_mod + (u1_sub_mb * 6);
 1001|  13.6k|        const UWORD8 *pu1_sub_mb_partw = (const UWORD8 *) gau1_ih264d_submb_partw;
 1002|  13.6k|        const UWORD8 *pu1_sub_mb_parth = (const UWORD8 *) gau1_ih264d_submb_parth;
 1003|  13.6k|        const UWORD8 *pu1_num_sub_mb_part = (const UWORD8 *) gau1_ih264d_num_submb_part;
 1004|  13.6k|        const UWORD8 *pu1_mb_partw = (const UWORD8 *) gau1_ih264d_mb_partw;
 1005|  13.6k|        const UWORD8 *pu1_mb_parth = (const UWORD8 *) gau1_ih264d_mb_parth;
 1006|       |
 1007|  13.6k|        UWORD8 u1_p_idx = 0;
 1008|  13.6k|        UWORD8 u1_num_submb_part;
 1009|  13.6k|        parse_part_params_t *ps_part;
 1010|  13.6k|        mv_pred_t *ps_mv_start = ps_dec->ps_mv_cur + (u4_mb_num << 4);
 1011|  13.6k|        ps_part = ps_dec->ps_part;
 1012|       |
 1013|       |        /* Default initialization for non subMb case */
 1014|  13.6k|        u1_mb_part_wd = pu1_mb_partw[u1_mb_mc_mode];
 1015|  13.6k|        u1_mb_part_ht = pu1_mb_parth[u1_mb_mc_mode];
 1016|  13.6k|        u1_num_submb_part = 1;
 1017|       |
 1018|       |        /* Decoding the MV for the subMB */
 1019|  40.9k|        for(uc_lx = 0; uc_lx < 2; uc_lx++)
  ------------------
  |  Branch (1019:24): [True: 27.2k, False: 13.6k]
  ------------------
 1020|  27.2k|        {
 1021|  27.2k|            UWORD8 u1_sub_mb_num = 0;
 1022|  27.2k|            UWORD32 u4_mb_pred_mode_tmp = u4_mb_pred_mode;
 1023|  27.2k|            UWORD32 u4_mb_mc_mode_tmp = u4_mb_mc_mode;
 1024|  27.2k|            UWORD8 u1_mb_mc_mode_1, u1_pred_mode, uc_i;
 1025|  27.2k|            UWORD16 u2_sub_mb_num = 0x028A;
 1026|  27.2k|            UWORD8 u1_b2 = uc_lx << 1;
 1027|  27.2k|            u1_pred_mode = (uc_lx) ? PRED_L1 : PRED_L0;
  ------------------
  |  |  484|  13.6k|#define PRED_L1   2
  ------------------
                          u1_pred_mode = (uc_lx) ? PRED_L1 : PRED_L0;
  ------------------
  |  |  483|  40.9k|#define PRED_L0   1
  ------------------
  |  Branch (1027:28): [True: 13.6k, False: 13.6k]
  ------------------
 1028|       |            /* Default for Cabac */
 1029|  27.2k|            pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + (u1_mb_mc_mode << 1);
 1030|  83.7k|            for(uc_i = 0; uc_i < u1_num_mb_part; uc_i++)
  ------------------
  |  Branch (1030:27): [True: 56.4k, False: 27.2k]
  ------------------
 1031|  56.4k|            {
 1032|  56.4k|                WORD8 i1_pred = (UWORD8) (u4_mb_pred_mode_tmp >> 24);
 1033|  56.4k|                u1_mb_mc_mode_1 = (UWORD8) (u4_mb_mc_mode_tmp >> 24);
 1034|  56.4k|                u4_mb_pred_mode_tmp <<= 8;
 1035|  56.4k|                u4_mb_mc_mode_tmp <<= 8;
 1036|       |
 1037|       |                /* subMb prediction mode */
 1038|  56.4k|                if(u1_sub_mb)
  ------------------
  |  Branch (1038:20): [True: 29.9k, False: 26.4k]
  ------------------
 1039|  29.9k|                {
 1040|  29.9k|                    u1_mb_part_wd = pu1_sub_mb_partw[u1_mb_mc_mode_1];
 1041|  29.9k|                    u1_mb_part_ht = pu1_sub_mb_parth[u1_mb_mc_mode_1];
 1042|  29.9k|                    u1_sub_mb_num = u2_sub_mb_num >> 12;
 1043|  29.9k|                    pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + (u1_mb_mc_mode_1 << 1);
 1044|  29.9k|                    u1_num_submb_part = pu1_num_sub_mb_part[u1_mb_mc_mode_1];
 1045|  29.9k|                    u2_sub_mb_num = u2_sub_mb_num << 4;
 1046|  29.9k|                }
 1047|       |
 1048|   122k|                for(uc_j = 0; uc_j < u1_num_submb_part; uc_j++, pu1_top_left_sub_mb_indx++)
  ------------------
  |  Branch (1048:31): [True: 65.9k, False: 56.4k]
  ------------------
 1049|  65.9k|                {
 1050|  65.9k|                    mv_pred_t *ps_mv;
 1051|  65.9k|                    u1_sub_mb_num = u1_sub_mb_num + *pu1_top_left_sub_mb_indx;
 1052|  65.9k|                    ps_mv = ps_mv_start + u1_sub_mb_num;
 1053|       |
 1054|       |                    /* Storing Info for partitions, writing only once */
 1055|  65.9k|                    if(uc_lx)
  ------------------
  |  Branch (1055:24): [True: 32.9k, False: 32.9k]
  ------------------
 1056|  32.9k|                    {
 1057|  32.9k|                        ps_part->u1_is_direct = (!i1_pred);
 1058|  32.9k|                        ps_part->u1_pred_mode = i1_pred;
 1059|  32.9k|                        ps_part->u1_sub_mb_num = u1_sub_mb_num;
 1060|  32.9k|                        ps_part->u1_partheight = u1_mb_part_ht;
 1061|  32.9k|                        ps_part->u1_partwidth = u1_mb_part_wd;
 1062|       |
 1063|       |                        /* Increment partition Index */
 1064|  32.9k|                        u1_p_idx++;
 1065|  32.9k|                        ps_part++;
 1066|  32.9k|                    }
 1067|       |
 1068|  65.9k|                    ih264d_get_mvd_cabac(u1_sub_mb_num, u1_b2, u1_mb_part_wd, u1_mb_part_ht,
 1069|  65.9k|                                         (UWORD8) (i1_pred & u1_pred_mode), ps_dec, ps_mv);
 1070|  65.9k|                }
 1071|  56.4k|            }
 1072|  27.2k|        }
 1073|       |        /* write back to the scratch partition info */
 1074|  13.6k|        ps_dec->ps_part = ps_part;
 1075|  13.6k|        ps_parse_mb_data->u1_num_part = u1_sub_mb ? u1_p_idx : u1_num_mb_part;
  ------------------
  |  Branch (1075:41): [True: 3.74k, False: 9.89k]
  ------------------
 1076|  13.6k|    }
 1077|       |
 1078|  13.6k|    return OK;
  ------------------
  |  |  114|  13.6k|#define OK        0
  ------------------
 1079|  13.7k|}
isvcd_parse_bmb_non_direct_cavlc:
 1097|  14.9k|{
 1098|  14.9k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
 1099|  14.9k|    dec_bit_stream_t *ps_bitstrm = ps_dec->ps_bitstrm;
 1100|  14.9k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
 1101|  14.9k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
 1102|  14.9k|    UWORD8 *pu1_sub_mb_pred_modes = (UWORD8 *) (gau1_ih264d_submb_pred_modes) + 4;
 1103|  14.9k|    const UWORD8(*pu1_mb_pred_modes)[32] = (const UWORD8(*)[32]) gau1_ih264d_mb_pred_modes;
 1104|  14.9k|    const UWORD8 *pu1_num_mb_part = (const UWORD8 *) gau1_ih264d_num_mb_part;
 1105|  14.9k|    const UWORD8 *pu1_sub_mb_mc_mode = (const UWORD8 *) (gau1_ih264d_submb_mc_mode) + 4;
 1106|  14.9k|    parse_pmbarams_t *ps_parse_mb_data = ps_dec->ps_parse_mb_data + u4_num_mbsNby2;
 1107|  14.9k|    UWORD8 *pu1_col_info = ps_parse_mb_data->u1_col_info;
 1108|  14.9k|    WORD8(*pi1_ref_idx)[MAX_REFIDX_INFO_PER_MB] = ps_parse_mb_data->i1_ref_idx;
 1109|  14.9k|    UWORD8 u1_mb_type = ps_cur_mb_info->u1_mb_type;
 1110|  14.9k|    UWORD8 u1_mb_mc_mode, u1_num_mb_part, u1_sub_mb = !(u1_mb_type ^ B_8x8);
  ------------------
  |  |  480|  14.9k|#define B_8x8    22
  ------------------
 1111|  14.9k|    UWORD32 u4_mb_mc_mode = 0, u4_mb_pred_mode = 0;
 1112|  14.9k|    WORD32 ret = OK;
  ------------------
  |  |  114|  14.9k|#define OK        0
  ------------------
 1113|  14.9k|    dec_slice_svc_ext_params_t *ps_svc_slice_params = NULL;
 1114|  14.9k|    ps_svc_slice_params = &ps_svc_lyr_dec->s_svc_slice_params;
 1115|       |
 1116|  14.9k|    if(u1_sub_mb)
  ------------------
  |  Branch (1116:8): [True: 1.39k, False: 13.5k]
  ------------------
 1117|  1.39k|    {
 1118|  1.39k|        UWORD8 uc_i;
 1119|  1.39k|        u1_mb_mc_mode = 0;
 1120|  1.39k|        u1_num_mb_part = 4;
 1121|       |        /* Reading the subMB type */
 1122|  6.49k|        for(uc_i = 0; uc_i < 4; uc_i++)
  ------------------
  |  Branch (1122:23): [True: 5.31k, False: 1.17k]
  ------------------
 1123|  5.31k|        {
 1124|  5.31k|            UWORD32 ui_sub_mb_mode;
 1125|       |            // Inlined ih264d_uev
 1126|  5.31k|            UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
 1127|  5.31k|            UWORD32 u4_word, u4_ldz;
 1128|       |
 1129|       |            /***************************************************************/
 1130|       |            /* Find leading zeros in next 32 bits                          */
 1131|       |            /***************************************************************/
 1132|  5.31k|            NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  5.31k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  5.31k|{                                                                           \
  |  |  152|  5.31k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  5.31k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  5.31k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  5.31k|                                                                            \
  |  |  156|  5.31k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  5.31k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 4.84k, False: 468]
  |  |  ------------------
  |  |  158|  5.31k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  4.84k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  5.31k|}
  ------------------
 1133|  5.31k|            u4_ldz = CLZ(u4_word);
 1134|       |            /* Flush the ps_bitstrm */
 1135|  5.31k|            u4_bitstream_offset += (u4_ldz + 1);
 1136|       |            /* Read the suffix from the ps_bitstrm */
 1137|  5.31k|            u4_word = 0;
 1138|  5.31k|            if(u4_ldz) GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  3.03k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  3.03k|{                                                                           \
  |  |  122|  3.03k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  3.03k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  3.03k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  3.03k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  3.03k|                                                                            \
  |  |  127|  3.03k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 2.56k, False: 466]
  |  |  ------------------
  |  |  128|  3.03k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  2.56k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  3.03k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  3.03k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  3.03k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  3.03k|}                                                                           \
  ------------------
  |  Branch (1138:16): [True: 3.03k, False: 2.28k]
  ------------------
 1139|  5.31k|            *pu4_bitstrm_ofst = u4_bitstream_offset;
 1140|  5.31k|            ui_sub_mb_mode = ((1 << u4_ldz) + u4_word - 1);
 1141|       |            // Inlined ih264d_uev
 1142|  5.31k|            if(ui_sub_mb_mode > 12)
  ------------------
  |  Branch (1142:16): [True: 217, False: 5.09k]
  ------------------
 1143|    217|                return ERROR_SUB_MB_TYPE;
 1144|  5.09k|            else
 1145|  5.09k|            {
 1146|  5.09k|                UWORD8 u1_subMbPredMode = pu1_sub_mb_pred_modes[ui_sub_mb_mode];
 1147|  5.09k|                u4_mb_mc_mode = (u4_mb_mc_mode << 8) | pu1_sub_mb_mc_mode[ui_sub_mb_mode];
 1148|  5.09k|                u4_mb_pred_mode = (u4_mb_pred_mode << 8) | u1_subMbPredMode;
 1149|  5.09k|                pi1_ref_idx[0][uc_i] = ((u1_subMbPredMode & PRED_L0) - 1) >> 1;
  ------------------
  |  |  483|  5.09k|#define PRED_L0   1
  ------------------
 1150|  5.09k|                pi1_ref_idx[1][uc_i] = ((u1_subMbPredMode & PRED_L1) - 1) >> 1;
  ------------------
  |  |  484|  5.09k|#define PRED_L1   2
  ------------------
 1151|  5.09k|                COPYTHECONTEXT("sub_mb_type", u1_subMbPredMode);
 1152|  5.09k|            }
 1153|       |            /* Storing collocated Mb and SubMb mode information */
 1154|  5.09k|            *pu1_col_info++ = ((PRED_8x8) << 6) | ((pu1_sub_mb_mc_mode[ui_sub_mb_mode] << 4));
  ------------------
  |  |  453|  5.09k|#define PRED_8x8    3
  ------------------
 1155|  5.09k|            if(ui_sub_mb_mode != B_DIRECT_8x8)
  ------------------
  |  |  465|  5.09k|#define B_DIRECT_8x8    0
  ------------------
  |  Branch (1155:16): [True: 2.81k, False: 2.28k]
  ------------------
 1156|  2.81k|            {
 1157|  2.81k|                if(ui_sub_mb_mode > B_BI_8x8)
  ------------------
  |  |  468|  2.81k|#define B_BI_8x8        3
  ------------------
  |  Branch (1157:20): [True: 1.02k, False: 1.78k]
  ------------------
 1158|  1.02k|                {
 1159|  1.02k|                    ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag = 0;
 1160|  1.02k|                }
 1161|  2.81k|            }
 1162|  2.28k|            else if(!ps_dec->s_high_profile.u1_direct_8x8_inference_flag)
  ------------------
  |  Branch (1162:21): [True: 1.48k, False: 795]
  ------------------
 1163|  1.48k|            {
 1164|  1.48k|                ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag = 0;
 1165|  1.48k|            }
 1166|  5.09k|        }
 1167|  1.39k|    }
 1168|  13.5k|    else
 1169|  13.5k|    {
 1170|  13.5k|        UWORD8 u1_mb_pred_mode_idx = 5 + u1_mb_type;
 1171|  13.5k|        UWORD8 u1_mb_pred_mode_part0 = pu1_mb_pred_modes[0][u1_mb_pred_mode_idx];
 1172|  13.5k|        UWORD8 u1_mb_pred_mode_part1 = pu1_mb_pred_modes[1][u1_mb_pred_mode_idx];
 1173|  13.5k|        u1_mb_mc_mode = ps_cur_mb_info->u1_mb_mc_mode;
 1174|  13.5k|        u1_num_mb_part = pu1_num_mb_part[u1_mb_mc_mode];
 1175|       |
 1176|  13.5k|        pi1_ref_idx[0][0] = ((u1_mb_pred_mode_part0 & PRED_L0) - 1) >> 1;
  ------------------
  |  |  483|  13.5k|#define PRED_L0   1
  ------------------
 1177|  13.5k|        pi1_ref_idx[1][0] = ((u1_mb_pred_mode_part0 & PRED_L1) - 1) >> 1;
  ------------------
  |  |  484|  13.5k|#define PRED_L1   2
  ------------------
 1178|  13.5k|        pi1_ref_idx[0][1] = ((u1_mb_pred_mode_part1 & PRED_L0) - 1) >> 1;
  ------------------
  |  |  483|  13.5k|#define PRED_L0   1
  ------------------
 1179|  13.5k|        pi1_ref_idx[1][1] = ((u1_mb_pred_mode_part1 & PRED_L1) - 1) >> 1;
  ------------------
  |  |  484|  13.5k|#define PRED_L1   2
  ------------------
 1180|       |
 1181|  13.5k|        u4_mb_pred_mode = (u1_mb_pred_mode_part0 << 8) | u1_mb_pred_mode_part1;
 1182|  13.5k|        u4_mb_mc_mode = u1_mb_mc_mode | (u1_mb_mc_mode << 8);
 1183|  13.5k|        u4_mb_mc_mode <<= 16;
 1184|  13.5k|        u4_mb_pred_mode <<= 16;
 1185|       |
 1186|       |        /* Storing collocated Mb and SubMb mode information */
 1187|  13.5k|        *pu1_col_info++ = (u1_mb_mc_mode << 6);
 1188|  13.5k|        if(u1_mb_mc_mode) *pu1_col_info++ = (u1_mb_mc_mode << 6);
  ------------------
  |  Branch (1188:12): [True: 6.94k, False: 6.57k]
  ------------------
 1189|  13.5k|    }
 1190|       |
 1191|  14.6k|    {
 1192|  14.6k|        UWORD8 uc_i;
 1193|  14.6k|        UWORD8 *pu1_motion_pred_flag_l0;
 1194|  14.6k|        UWORD8 *pu1_motion_pred_flag_l1;
 1195|  14.6k|        UWORD8 u1_mvp_l1;
 1196|  14.6k|        UWORD8 u1_mvp_l0;
 1197|       |
 1198|  14.6k|        pu1_motion_pred_flag_l0 = &ps_svc_cur_mb_info->au1_motion_pred_flag[0];
 1199|  14.6k|        *pu1_motion_pred_flag_l0 = 0;
 1200|  14.6k|        pu1_motion_pred_flag_l1 = &ps_svc_cur_mb_info->au1_motion_pred_flag[1];
 1201|  14.6k|        *pu1_motion_pred_flag_l1 = 0;
 1202|       |
 1203|  14.6k|        if(ps_svc_cur_mb_info->u1_crop_window_flag &&
  ------------------
  |  Branch (1203:12): [True: 14.6k, False: 93]
  ------------------
 1204|  14.6k|           ps_svc_slice_params->u1_adaptive_motion_prediction_flag)
  ------------------
  |  Branch (1204:12): [True: 5.66k, False: 8.94k]
  ------------------
 1205|  5.66k|        {
 1206|  16.5k|            for(uc_i = 0; uc_i < u1_num_mb_part; uc_i++)
  ------------------
  |  Branch (1206:27): [True: 10.8k, False: 5.66k]
  ------------------
 1207|  10.8k|            {
 1208|  10.8k|                if(pi1_ref_idx[0][uc_i] > -1)
  ------------------
  |  Branch (1208:20): [True: 7.87k, False: 2.99k]
  ------------------
 1209|  7.87k|                {
 1210|  7.87k|                    u1_mvp_l0 = ih264d_get_bit_h264(ps_bitstrm);
 1211|  7.87k|                    COPYTHECONTEXT("SVC ext: ps_motion_prediction_flag_l0", u1_mvp_l0);
 1212|  7.87k|                    *pu1_motion_pred_flag_l0 |= (u1_mvp_l0 << uc_i);
 1213|  7.87k|                }
 1214|  10.8k|            }
 1215|       |
 1216|  16.5k|            for(uc_i = 0; uc_i < u1_num_mb_part; uc_i++)
  ------------------
  |  Branch (1216:27): [True: 10.8k, False: 5.66k]
  ------------------
 1217|  10.8k|            {
 1218|  10.8k|                if(pi1_ref_idx[1][uc_i] > -1)
  ------------------
  |  Branch (1218:20): [True: 1.89k, False: 8.97k]
  ------------------
 1219|  1.89k|                {
 1220|  1.89k|                    u1_mvp_l1 = ih264d_get_bit_h264(ps_bitstrm);
 1221|  1.89k|                    COPYTHECONTEXT("SVC ext: ps_motion_prediction_flag_l1", u1_mvp_l1);
 1222|  1.89k|                    *pu1_motion_pred_flag_l1 |= (u1_mvp_l1 << uc_i);
 1223|  1.89k|                }
 1224|  10.8k|            }
 1225|  5.66k|        }
 1226|  9.03k|        else if(ps_svc_cur_mb_info->u1_crop_window_flag)
  ------------------
  |  Branch (1226:17): [True: 8.94k, False: 93]
  ------------------
 1227|  8.94k|        {
 1228|  8.94k|            *pu1_motion_pred_flag_l0 = ps_svc_slice_params->u1_default_motion_prediction_flag
  ------------------
  |  Branch (1228:40): [True: 2.17k, False: 6.76k]
  ------------------
 1229|  8.94k|                                           ? ((1 << u1_num_mb_part) - 1)
 1230|  8.94k|                                           : 0;
 1231|  8.94k|            *pu1_motion_pred_flag_l1 = ps_svc_slice_params->u1_default_motion_prediction_flag
  ------------------
  |  Branch (1231:40): [True: 2.17k, False: 6.76k]
  ------------------
 1232|  8.94k|                                           ? ((1 << u1_num_mb_part) - 1)
 1233|  8.94k|                                           : 0;
 1234|  8.94k|        }
 1235|       |
 1236|  14.6k|        {
 1237|  14.6k|            UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
 1238|  14.6k|            UWORD8 uc_field = ps_cur_mb_info->u1_mb_field_decodingflag;
 1239|  14.6k|            UWORD8 *pu1_num_ref_idx_lx_active = ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active;
 1240|  14.6k|            const UWORD8 u1_mbaff_field = (u1_mbaff & uc_field);
 1241|  14.6k|            UWORD8 u4_num_ref_idx_lx_active;
 1242|       |
 1243|  14.6k|            u4_num_ref_idx_lx_active = (pu1_num_ref_idx_lx_active[0] << u1_mbaff_field) - 1;
 1244|       |
 1245|  14.6k|            if(u4_num_ref_idx_lx_active)
  ------------------
  |  Branch (1245:16): [True: 9.95k, False: 4.73k]
  ------------------
 1246|  9.95k|            {
 1247|  9.95k|                if(1 == u4_num_ref_idx_lx_active)
  ------------------
  |  Branch (1247:20): [True: 1.77k, False: 8.18k]
  ------------------
 1248|  1.77k|                    isvcd_parse_bmb_ref_index_cavlc_range1(u1_num_mb_part, ps_bitstrm,
 1249|  1.77k|                                                           pi1_ref_idx[0], u4_num_ref_idx_lx_active,
 1250|  1.77k|                                                           pu1_motion_pred_flag_l0);
 1251|  8.18k|                else
 1252|  8.18k|                {
 1253|  8.18k|                    isvcd_parse_bmb_ref_index_cavlc(u1_num_mb_part, ps_bitstrm, pi1_ref_idx[0],
 1254|  8.18k|                                                    u4_num_ref_idx_lx_active,
 1255|  8.18k|                                                    pu1_motion_pred_flag_l0);
 1256|  8.18k|                    if(ret != OK) return ret;
  ------------------
  |  |  114|  8.18k|#define OK        0
  ------------------
  |  Branch (1256:24): [True: 0, False: 8.18k]
  ------------------
 1257|  8.18k|                }
 1258|  9.95k|            }
 1259|       |
 1260|  14.6k|            u4_num_ref_idx_lx_active = (pu1_num_ref_idx_lx_active[1] << u1_mbaff_field) - 1;
 1261|       |
 1262|  14.6k|            if(u4_num_ref_idx_lx_active)
  ------------------
  |  Branch (1262:16): [True: 10.4k, False: 4.23k]
  ------------------
 1263|  10.4k|            {
 1264|  10.4k|                if(1 == u4_num_ref_idx_lx_active)
  ------------------
  |  Branch (1264:20): [True: 4.40k, False: 6.05k]
  ------------------
 1265|  4.40k|                    isvcd_parse_bmb_ref_index_cavlc_range1(u1_num_mb_part, ps_bitstrm,
 1266|  4.40k|                                                           pi1_ref_idx[1], u4_num_ref_idx_lx_active,
 1267|  4.40k|                                                           pu1_motion_pred_flag_l1);
 1268|  6.05k|                else
 1269|  6.05k|                {
 1270|  6.05k|                    ret = isvcd_parse_bmb_ref_index_cavlc(u1_num_mb_part, ps_bitstrm,
 1271|  6.05k|                                                          pi1_ref_idx[1], u4_num_ref_idx_lx_active,
 1272|  6.05k|                                                          pu1_motion_pred_flag_l1);
 1273|  6.05k|                    if(ret != OK) return ret;
  ------------------
  |  |  114|  6.05k|#define OK        0
  ------------------
  |  Branch (1273:24): [True: 206, False: 5.84k]
  ------------------
 1274|  6.05k|                }
 1275|  10.4k|            }
 1276|  14.6k|        }
 1277|  14.6k|    }
 1278|       |    /* Read MotionVectors */
 1279|  14.4k|    {
 1280|  14.4k|        const UWORD8 *pu1_top_left_sub_mb_indx;
 1281|  14.4k|        const UWORD8 *pu1_sub_mb_indx_mod =
 1282|  14.4k|            (const UWORD8 *) (gau1_ih264d_submb_indx_mod) + (u1_sub_mb * 6);
 1283|  14.4k|        const UWORD8 *pu1_sub_mb_partw = (const UWORD8 *) gau1_ih264d_submb_partw;
 1284|  14.4k|        const UWORD8 *pu1_sub_mb_parth = (const UWORD8 *) gau1_ih264d_submb_parth;
 1285|  14.4k|        const UWORD8 *pu1_num_sub_mb_part = (const UWORD8 *) gau1_ih264d_num_submb_part;
 1286|  14.4k|        const UWORD8 *pu1_mb_partw = (const UWORD8 *) gau1_ih264d_mb_partw;
 1287|  14.4k|        const UWORD8 *pu1_mb_parth = (const UWORD8 *) gau1_ih264d_mb_parth;
 1288|  14.4k|        UWORD8 u1_p_idx = 0, u1_num_submb_part, uc_lx;
 1289|  14.4k|        parse_part_params_t *ps_part;
 1290|  14.4k|        mv_pred_t *ps_mv_start = ps_dec->ps_mv_cur + (u4_mb_num << 4);
 1291|  14.4k|        UWORD8 u1_mb_part_wd, u1_mb_part_ht;
 1292|       |
 1293|  14.4k|        ps_part = ps_dec->ps_part;
 1294|       |        /* Default Initialization for Non subMb Case Mode */
 1295|  14.4k|        u1_mb_part_wd = pu1_mb_partw[u1_mb_mc_mode];
 1296|  14.4k|        u1_mb_part_ht = pu1_mb_parth[u1_mb_mc_mode];
 1297|  14.4k|        u1_num_submb_part = 1;
 1298|       |
 1299|       |        /* Decoding the MV for the subMB */
 1300|  43.4k|        for(uc_lx = 0; uc_lx < 2; uc_lx++)
  ------------------
  |  Branch (1300:24): [True: 28.9k, False: 14.4k]
  ------------------
 1301|  28.9k|        {
 1302|  28.9k|            UWORD8 u1_sub_mb_num = 0, u1_pred_mode, uc_i;
 1303|  28.9k|            UWORD32 u4_mb_mc_mode_tmp = u4_mb_mc_mode;
 1304|  28.9k|            UWORD32 u4_mb_pred_mode_tmp = u4_mb_pred_mode;
 1305|  28.9k|            UWORD16 u2_sub_mb_num = 0x028A;  // for sub mb case
 1306|  28.9k|            UWORD8 u1_b2 = uc_lx << 1;
 1307|  28.9k|            u1_pred_mode = (uc_lx) ? PRED_L1 : PRED_L0;
  ------------------
  |  |  484|  14.4k|#define PRED_L1   2
  ------------------
                          u1_pred_mode = (uc_lx) ? PRED_L1 : PRED_L0;
  ------------------
  |  |  483|  43.4k|#define PRED_L0   1
  ------------------
  |  Branch (1307:28): [True: 14.4k, False: 14.4k]
  ------------------
 1308|  28.9k|            pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + (u1_mb_mc_mode << 1);
 1309|       |
 1310|  78.5k|            for(uc_i = 0; uc_i < u1_num_mb_part; uc_i++)
  ------------------
  |  Branch (1310:27): [True: 49.6k, False: 28.9k]
  ------------------
 1311|  49.6k|            {
 1312|  49.6k|                UWORD8 u1_mb_mc_mode, uc_j;
 1313|  49.6k|                UWORD8 i1_pred = u4_mb_pred_mode_tmp >> 24;
 1314|  49.6k|                u1_mb_mc_mode = u4_mb_mc_mode_tmp >> 24;
 1315|  49.6k|                u4_mb_pred_mode_tmp <<= 8;
 1316|  49.6k|                u4_mb_mc_mode_tmp <<= 8;
 1317|       |                /* subMb prediction mode */
 1318|  49.6k|                if(u1_sub_mb)
  ------------------
  |  Branch (1318:20): [True: 9.41k, False: 40.1k]
  ------------------
 1319|  9.41k|                {
 1320|  9.41k|                    u1_mb_part_wd = pu1_sub_mb_partw[u1_mb_mc_mode];
 1321|  9.41k|                    u1_mb_part_ht = pu1_sub_mb_parth[u1_mb_mc_mode];
 1322|  9.41k|                    u1_sub_mb_num = u2_sub_mb_num >> 12;
 1323|  9.41k|                    u1_num_submb_part = pu1_num_sub_mb_part[u1_mb_mc_mode];
 1324|  9.41k|                    pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + (u1_mb_mc_mode << 1);
 1325|  9.41k|                    u2_sub_mb_num <<= 4;
 1326|  9.41k|                }
 1327|   103k|                for(uc_j = 0; uc_j < u1_num_submb_part; uc_j++, pu1_top_left_sub_mb_indx++)
  ------------------
  |  Branch (1327:31): [True: 54.1k, False: 49.6k]
  ------------------
 1328|  54.1k|                {
 1329|  54.1k|                    mv_pred_t *ps_mv;
 1330|  54.1k|                    u1_sub_mb_num = u1_sub_mb_num + *pu1_top_left_sub_mb_indx;
 1331|  54.1k|                    ps_mv = ps_mv_start + u1_sub_mb_num;
 1332|       |
 1333|       |                    /* Storing Info for partitions, writing only once */
 1334|  54.1k|                    if(uc_lx)
  ------------------
  |  Branch (1334:24): [True: 27.0k, False: 27.0k]
  ------------------
 1335|  27.0k|                    {
 1336|  27.0k|                        ps_part->u1_is_direct = (!i1_pred);
 1337|  27.0k|                        ps_part->u1_pred_mode = i1_pred;
 1338|  27.0k|                        ps_part->u1_sub_mb_num = u1_sub_mb_num;
 1339|  27.0k|                        ps_part->u1_partheight = u1_mb_part_ht;
 1340|  27.0k|                        ps_part->u1_partwidth = u1_mb_part_wd;
 1341|       |                        /* Increment partition Index */
 1342|  27.0k|                        u1_p_idx++;
 1343|  27.0k|                        ps_part++;
 1344|  27.0k|                    }
 1345|       |
 1346|  54.1k|                    if(i1_pred & u1_pred_mode)
  ------------------
  |  Branch (1346:24): [True: 26.2k, False: 27.8k]
  ------------------
 1347|  26.2k|                    {
 1348|  26.2k|                        WORD16 i2_mvx, i2_mvy;
 1349|       |                        // inlining ih264d_sev
 1350|  26.2k|                        {
 1351|  26.2k|                            UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
 1352|  26.2k|                            UWORD32 u4_word, u4_ldz, u4_abs_val;
 1353|       |
 1354|       |                            /***************************************************************/
 1355|       |                            /* Find leading zeros in next 32 bits                          */
 1356|       |                            /***************************************************************/
 1357|  26.2k|                            NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  26.2k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  26.2k|{                                                                           \
  |  |  152|  26.2k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  26.2k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  26.2k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  26.2k|                                                                            \
  |  |  156|  26.2k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  26.2k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 25.5k, False: 755]
  |  |  ------------------
  |  |  158|  26.2k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  25.5k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  26.2k|}
  ------------------
 1358|  26.2k|                            u4_ldz = CLZ(u4_word);
 1359|       |
 1360|       |                            /* Flush the ps_bitstrm */
 1361|  26.2k|                            u4_bitstream_offset += (u4_ldz + 1);
 1362|       |
 1363|       |                            /* Read the suffix from the ps_bitstrm */
 1364|  26.2k|                            u4_word = 0;
 1365|  26.2k|                            if(u4_ldz)
  ------------------
  |  Branch (1365:32): [True: 11.6k, False: 14.5k]
  ------------------
 1366|  11.6k|                                GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  11.6k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  11.6k|{                                                                           \
  |  |  122|  11.6k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  11.6k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  11.6k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  11.6k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  11.6k|                                                                            \
  |  |  127|  11.6k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 11.1k, False: 571]
  |  |  ------------------
  |  |  128|  11.6k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  11.1k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  11.6k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  11.6k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  11.6k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  11.6k|}                                                                           \
  ------------------
 1367|       |
 1368|  26.2k|                            *pu4_bitstrm_ofst = u4_bitstream_offset;
 1369|  26.2k|                            u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1;
 1370|       |
 1371|  26.2k|                            if(u4_word & 0x1)
  ------------------
  |  Branch (1371:32): [True: 8.95k, False: 17.3k]
  ------------------
 1372|  8.95k|                                i2_mvx = (-(WORD32) u4_abs_val);
 1373|  17.3k|                            else
 1374|  17.3k|                                i2_mvx = (u4_abs_val);
 1375|  26.2k|                        }
 1376|       |                        // inlinined ih264d_sev
 1377|  26.2k|                        {
 1378|  26.2k|                            UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
 1379|  26.2k|                            UWORD32 u4_word, u4_ldz, u4_abs_val;
 1380|       |
 1381|       |                            /***************************************************************/
 1382|       |                            /* Find leading zeros in next 32 bits                          */
 1383|       |                            /***************************************************************/
 1384|  26.2k|                            NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  26.2k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  26.2k|{                                                                           \
  |  |  152|  26.2k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  26.2k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  26.2k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  26.2k|                                                                            \
  |  |  156|  26.2k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  26.2k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 25.0k, False: 1.24k]
  |  |  ------------------
  |  |  158|  26.2k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  25.0k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  26.2k|}
  ------------------
 1385|  26.2k|                            u4_ldz = CLZ(u4_word);
 1386|       |
 1387|       |                            /* Flush the ps_bitstrm */
 1388|  26.2k|                            u4_bitstream_offset += (u4_ldz + 1);
 1389|       |
 1390|       |                            /* Read the suffix from the ps_bitstrm */
 1391|  26.2k|                            u4_word = 0;
 1392|  26.2k|                            if(u4_ldz)
  ------------------
  |  Branch (1392:32): [True: 15.7k, False: 10.5k]
  ------------------
 1393|  15.7k|                                GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  15.7k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  15.7k|{                                                                           \
  |  |  122|  15.7k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  15.7k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  15.7k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  15.7k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  15.7k|                                                                            \
  |  |  127|  15.7k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 14.9k, False: 799]
  |  |  ------------------
  |  |  128|  15.7k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  14.9k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  15.7k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  15.7k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  15.7k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  15.7k|}                                                                           \
  ------------------
 1394|       |
 1395|  26.2k|                            *pu4_bitstrm_ofst = u4_bitstream_offset;
 1396|  26.2k|                            u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1;
 1397|       |
 1398|  26.2k|                            if(u4_word & 0x1)
  ------------------
  |  Branch (1398:32): [True: 6.44k, False: 19.8k]
  ------------------
 1399|  6.44k|                                i2_mvy = (-(WORD32) u4_abs_val);
 1400|  19.8k|                            else
 1401|  19.8k|                                i2_mvy = (u4_abs_val);
 1402|  26.2k|                        }
 1403|       |                        // inlinined ih264d_sev
 1404|       |                        /* Storing Mv residuals */
 1405|  26.2k|                        ps_mv->i2_mv[u1_b2] = i2_mvx;
 1406|  26.2k|                        ps_mv->i2_mv[u1_b2 + 1] = i2_mvy;
 1407|  26.2k|                    }
 1408|  54.1k|                }
 1409|  49.6k|            }
 1410|  28.9k|        }
 1411|       |        /* write back to the scratch partition info */
 1412|  14.4k|        ps_dec->ps_part = ps_part;
 1413|  14.4k|        ps_parse_mb_data->u1_num_part = u1_sub_mb ? u1_p_idx : u1_num_mb_part;
  ------------------
  |  Branch (1413:41): [True: 1.17k, False: 13.3k]
  ------------------
 1414|  14.4k|    }
 1415|  14.4k|    return OK;
  ------------------
  |  |  114|  14.4k|#define OK        0
  ------------------
 1416|  14.6k|}
isvcd_parse_ebslice:
 1431|  13.6k|{
 1432|  13.6k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
 1433|  13.6k|    WORD32 i_status = OK;
  ------------------
  |  |  114|  13.6k|#define OK        0
  ------------------
 1434|  13.6k|    dec_pic_params_t *ps_pps = ps_dec->ps_cur_pps;
 1435|  13.6k|    dec_slice_params_t *ps_slice = ps_dec->ps_cur_slice;
 1436|  13.6k|    dec_bit_stream_t *ps_bitstrm = ps_dec->ps_bitstrm;
 1437|  13.6k|    dec_svc_seq_params_t *ps_subset_seq;
 1438|  13.6k|    dec_slice_svc_ext_params_t *ps_svc_slice_params = NULL;
 1439|  13.6k|    dec_subset_seq_params_t *ps_sps_svc_ext = NULL;
 1440|  13.6k|    dec_nal_unit_svc_ext_params_t *ps_nal_svc_ext = NULL;
 1441|  13.6k|    UWORD8 u1_ref_idx_re_flag_lx;
 1442|  13.6k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
 1443|  13.6k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
 1444|  13.6k|    UWORD64 u8_ref_idx_l0, u8_ref_idx_l1;
 1445|  13.6k|    UWORD32 u4_temp;
 1446|  13.6k|    WORD32 i_temp;
 1447|  13.6k|    WORD32 ret;
 1448|       |
 1449|  13.6k|    ps_nal_svc_ext = ps_svc_lyr_dec->ps_nal_svc_ext;
 1450|  13.6k|    ps_subset_seq = ps_svc_lyr_dec->ps_cur_subset_sps;
 1451|  13.6k|    ps_sps_svc_ext = &ps_subset_seq->s_sps_svc_ext;
 1452|  13.6k|    ps_svc_slice_params = &ps_svc_lyr_dec->s_svc_slice_params;
 1453|       |
 1454|       |    /*--------------------------------------------------------------------*/
 1455|       |    /* Read remaining contents of the slice header                        */
 1456|       |    /*--------------------------------------------------------------------*/
 1457|  13.6k|    {
 1458|  13.6k|        WORD8 *pi1_buf;
 1459|  13.6k|        WORD16 *pi2_mv = ps_dec->s_default_mv_pred.i2_mv;
 1460|  13.6k|        WORD32 *pi4_mv = (WORD32 *) pi2_mv;
 1461|  13.6k|        WORD16 *pi16_refFrame;
 1462|  13.6k|        pi1_buf = ps_dec->s_default_mv_pred.i1_ref_frame;
 1463|  13.6k|        pi16_refFrame = (WORD16 *) pi1_buf;
 1464|  13.6k|        *pi4_mv = 0;
 1465|  13.6k|        *(pi4_mv + 1) = 0;
 1466|  13.6k|        *pi16_refFrame = OUT_OF_RANGE_REF;
  ------------------
  |  |   45|  13.6k|#define OUT_OF_RANGE_REF  -1
  ------------------
 1467|  13.6k|        ps_dec->s_default_mv_pred.u1_col_ref_pic_idx = (UWORD8) -1;
 1468|  13.6k|        ps_dec->s_default_mv_pred.u1_pic_type = (UWORD8) -1;
 1469|  13.6k|    }
 1470|       |
 1471|  13.6k|    if(0 == ps_svc_lyr_dec->ps_nal_svc_ext->u1_quality_id)
  ------------------
  |  Branch (1471:8): [True: 13.6k, False: 0]
  ------------------
 1472|  13.6k|    {
 1473|  13.6k|        ps_slice->u1_num_ref_idx_active_override_flag = ih264d_get_bit_h264(ps_bitstrm);
 1474|  13.6k|        COPYTHECONTEXT("Slice Header SVC ext: num_ref_idx_override_flag",
 1475|  13.6k|                       ps_slice->u1_num_ref_idx_active_override_flag);
 1476|       |
 1477|  13.6k|        u8_ref_idx_l0 = ps_dec->ps_cur_pps->u1_num_ref_idx_lx_active[0];
 1478|  13.6k|        u8_ref_idx_l1 = ps_dec->ps_cur_pps->u1_num_ref_idx_lx_active[1];
 1479|  13.6k|        if(ps_slice->u1_num_ref_idx_active_override_flag)
  ------------------
  |  Branch (1479:12): [True: 7.89k, False: 5.80k]
  ------------------
 1480|  7.89k|        {
 1481|  7.89k|            u8_ref_idx_l0 = (UWORD64) 1 + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1482|  7.89k|            COPYTHECONTEXT("Slice Header SVC ext: num_ref_idx_l0_active_minus1", u8_ref_idx_l0 - 1);
 1483|       |
 1484|  7.89k|            u8_ref_idx_l1 = (UWORD64) 1 + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1485|  7.89k|            COPYTHECONTEXT("Slice Header SVC ext: num_ref_idx_l1_active_minus1", u8_ref_idx_l1 - 1);
 1486|  7.89k|        }
 1487|       |
 1488|  13.6k|        {
 1489|  13.6k|            UWORD8 u1_max_ref_idx = H264_MAX_REF_PICS;
  ------------------
  |  |  534|  13.6k|#define H264_MAX_REF_PICS         16
  ------------------
 1490|  13.6k|            if(ps_slice->u1_field_pic_flag)
  ------------------
  |  Branch (1490:16): [True: 0, False: 13.6k]
  ------------------
 1491|      0|            {
 1492|      0|                u1_max_ref_idx = H264_MAX_REF_PICS << 1;
  ------------------
  |  |  534|      0|#define H264_MAX_REF_PICS         16
  ------------------
 1493|      0|            }
 1494|  13.6k|            if((u8_ref_idx_l0 >= u1_max_ref_idx) || (u8_ref_idx_l1 >= u1_max_ref_idx))
  ------------------
  |  Branch (1494:16): [True: 216, False: 13.4k]
  |  Branch (1494:53): [True: 178, False: 13.2k]
  ------------------
 1495|    394|            {
 1496|    394|                return ERROR_NUM_REF;
 1497|    394|            }
 1498|  13.2k|            ps_slice->u1_num_ref_idx_lx_active[0] = (UWORD8) u8_ref_idx_l0;
 1499|  13.2k|            ps_slice->u1_num_ref_idx_lx_active[1] = (UWORD8) u8_ref_idx_l1;
 1500|  13.2k|        }
 1501|       |
 1502|      0|        ih264d_init_ref_idx_lx_b(ps_dec);
 1503|       |        /* Store the value for future slices in the same picture */
 1504|  13.2k|        ps_dec->u1_num_ref_idx_lx_active_prev = ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0];
 1505|       |
 1506|  13.2k|        u1_ref_idx_re_flag_lx = ih264d_get_bit_h264(ps_bitstrm);
 1507|  13.2k|        COPYTHECONTEXT("Slice Header SVC ext: ref_pic_list_reordering_flag_l0",
 1508|  13.2k|                       u1_ref_idx_re_flag_lx);
 1509|       |
 1510|       |        /* Modified temporarily */
 1511|  13.2k|        if(u1_ref_idx_re_flag_lx)
  ------------------
  |  Branch (1511:12): [True: 4.63k, False: 8.66k]
  ------------------
 1512|  4.63k|        {
 1513|  4.63k|            WORD8 ret;
 1514|  4.63k|            ps_dec->ps_ref_pic_buf_lx[0] = ps_dec->ps_dpb_mgr->ps_mod_dpb[0];
 1515|  4.63k|            ret = ih264d_ref_idx_reordering(ps_dec, 0);
 1516|  4.63k|            if(ret == -1) return ERROR_REFIDX_ORDER_T;
  ------------------
  |  Branch (1516:16): [True: 0, False: 4.63k]
  ------------------
 1517|  4.63k|        }
 1518|  8.66k|        else
 1519|  8.66k|            ps_dec->ps_ref_pic_buf_lx[0] = ps_dec->ps_dpb_mgr->ps_init_dpb[0];
 1520|       |
 1521|  13.2k|        u1_ref_idx_re_flag_lx = ih264d_get_bit_h264(ps_bitstrm);
 1522|  13.2k|        COPYTHECONTEXT("Slice Header SVC ext: ref_pic_list_reordering_flag_l1",
 1523|  13.2k|                       u1_ref_idx_re_flag_lx);
 1524|       |
 1525|       |        /* Modified temporarily */
 1526|  13.2k|        if(u1_ref_idx_re_flag_lx)
  ------------------
  |  Branch (1526:12): [True: 5.43k, False: 7.86k]
  ------------------
 1527|  5.43k|        {
 1528|  5.43k|            WORD8 ret;
 1529|  5.43k|            ps_dec->ps_ref_pic_buf_lx[1] = ps_dec->ps_dpb_mgr->ps_mod_dpb[1];
 1530|  5.43k|            ret = ih264d_ref_idx_reordering(ps_dec, 1);
 1531|  5.43k|            if(ret == -1) return ERROR_REFIDX_ORDER_T;
  ------------------
  |  Branch (1531:16): [True: 0, False: 5.43k]
  ------------------
 1532|  5.43k|        }
 1533|  7.86k|        else
 1534|  7.86k|            ps_dec->ps_ref_pic_buf_lx[1] = ps_dec->ps_dpb_mgr->ps_init_dpb[1];
 1535|       |
 1536|       |        /* Create refIdx to POC mapping */
 1537|  13.2k|        {
 1538|  13.2k|            void **ppv_map_ref_idx_to_poc_lx;
 1539|  13.2k|            WORD8 idx;
 1540|  13.2k|            struct pic_buffer_t *ps_pic;
 1541|       |
 1542|  13.2k|            ppv_map_ref_idx_to_poc_lx = ps_dec->ppv_map_ref_idx_to_poc + FRM_LIST_L0;
  ------------------
  |  |   89|  13.2k|#define FRM_LIST_L0             0                                               //0
  ------------------
 1543|  13.2k|            ppv_map_ref_idx_to_poc_lx[0] = 0;
 1544|  13.2k|            ppv_map_ref_idx_to_poc_lx++;
 1545|  41.6k|            for(idx = 0; idx < ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0]; idx++)
  ------------------
  |  Branch (1545:26): [True: 28.3k, False: 13.2k]
  ------------------
 1546|  28.3k|            {
 1547|  28.3k|                ps_pic = ps_dec->ps_ref_pic_buf_lx[0][idx];
 1548|  28.3k|                ppv_map_ref_idx_to_poc_lx[idx] = (ps_pic->pu1_buf1);
 1549|  28.3k|            }
 1550|       |
 1551|  13.2k|            ppv_map_ref_idx_to_poc_lx = ps_dec->ppv_map_ref_idx_to_poc + FRM_LIST_L1;
  ------------------
  |  |   90|  13.2k|#define FRM_LIST_L1             1 * POC_LIST_L0_TO_L1_DIFF//FRM_LIST_L0 + POC_LIST_L0_TO_L1_DIFF        //0+33                  //(1 * POC_LIST_L0_TO_L1_DIFF)
  |  |  ------------------
  |  |  |  |   86|  13.2k|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |  600|  13.2k|#define MAX_FRAMES              16
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 1552|       |
 1553|  13.2k|            ppv_map_ref_idx_to_poc_lx[0] = 0;
 1554|  13.2k|            ppv_map_ref_idx_to_poc_lx++;
 1555|  49.2k|            for(idx = 0; idx < ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1]; idx++)
  ------------------
  |  Branch (1555:26): [True: 35.9k, False: 13.2k]
  ------------------
 1556|  35.9k|            {
 1557|  35.9k|                ps_pic = ps_dec->ps_ref_pic_buf_lx[1][idx];
 1558|  35.9k|                ppv_map_ref_idx_to_poc_lx[idx] = (ps_pic->pu1_buf1);
 1559|  35.9k|            }
 1560|       |
 1561|  13.2k|            if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag)
  ------------------
  |  Branch (1561:16): [True: 0, False: 13.2k]
  ------------------
 1562|      0|            {
 1563|      0|                void **ppv_map_ref_idx_to_poc_lx_t, **ppv_map_ref_idx_to_poc_lx_b;
 1564|       |
 1565|      0|                ppv_map_ref_idx_to_poc_lx_t = ps_dec->ppv_map_ref_idx_to_poc + TOP_LIST_FLD_L0;
  ------------------
  |  |   91|      0|#define TOP_LIST_FLD_L0         2 * POC_LIST_L0_TO_L1_DIFF//FRM_LIST_L1 + POC_LIST_L0_TO_L1_DIFF        //0+33+33                   //(2 * POC_LIST_L0_TO_L1_DIFF)
  |  |  ------------------
  |  |  |  |   86|      0|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |  600|      0|#define MAX_FRAMES              16
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 1566|      0|                ppv_map_ref_idx_to_poc_lx_b = ps_dec->ppv_map_ref_idx_to_poc + BOT_LIST_FLD_L0;
  ------------------
  |  |   93|      0|#define BOT_LIST_FLD_L0         4 * POC_LIST_L0_TO_L1_DIFF//TOP_LIST_FLD_L1 + POC_LIST_L0_TO_L1_DIFF_1  //0+33+33+17+17
  |  |  ------------------
  |  |  |  |   86|      0|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |  600|      0|#define MAX_FRAMES              16
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 1567|       |
 1568|      0|                ppv_map_ref_idx_to_poc_lx_t[0] = 0;
 1569|      0|                ppv_map_ref_idx_to_poc_lx_t++;
 1570|      0|                ppv_map_ref_idx_to_poc_lx_b[0] = 0;
 1571|      0|                ppv_map_ref_idx_to_poc_lx_b++;
 1572|      0|                for(idx = 0; idx < ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0]; idx++)
  ------------------
  |  Branch (1572:30): [True: 0, False: 0]
  ------------------
 1573|      0|                {
 1574|      0|                    ps_pic = ps_dec->ps_ref_pic_buf_lx[0][idx];
 1575|      0|                    ppv_map_ref_idx_to_poc_lx_t[0] = (ps_pic->pu1_buf1);
 1576|      0|                    ppv_map_ref_idx_to_poc_lx_b[1] = (ps_pic->pu1_buf1);
 1577|       |
 1578|      0|                    ppv_map_ref_idx_to_poc_lx_b[0] = (ps_pic->pu1_buf1) + 1;
 1579|      0|                    ppv_map_ref_idx_to_poc_lx_t[1] = (ps_pic->pu1_buf1) + 1;
 1580|       |
 1581|      0|                    ppv_map_ref_idx_to_poc_lx_t += 2;
 1582|      0|                    ppv_map_ref_idx_to_poc_lx_b += 2;
 1583|      0|                }
 1584|       |
 1585|      0|                ppv_map_ref_idx_to_poc_lx_t = ps_dec->ppv_map_ref_idx_to_poc + TOP_LIST_FLD_L1;
  ------------------
  |  |   92|      0|#define TOP_LIST_FLD_L1         3 * POC_LIST_L0_TO_L1_DIFF//TOP_LIST_FLD_L0 + POC_LIST_L0_TO_L1_DIFF_1  //0+33+33+17                //(3 * POC_LIST_L0_TO_L1_DIFF)
  |  |  ------------------
  |  |  |  |   86|      0|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |  600|      0|#define MAX_FRAMES              16
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 1586|      0|                ppv_map_ref_idx_to_poc_lx_b = ps_dec->ppv_map_ref_idx_to_poc + BOT_LIST_FLD_L1;
  ------------------
  |  |   94|      0|#define BOT_LIST_FLD_L1         5 * POC_LIST_L0_TO_L1_DIFF//BOT_LIST_FLD_L0 + POC_LIST_L0_TO_L1_DIFF_1  //0+33+33+17+17+17
  |  |  ------------------
  |  |  |  |   86|      0|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |  600|      0|#define MAX_FRAMES              16
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 1587|       |
 1588|      0|                ppv_map_ref_idx_to_poc_lx_t[0] = 0;
 1589|      0|                ppv_map_ref_idx_to_poc_lx_t++;
 1590|      0|                ppv_map_ref_idx_to_poc_lx_b[0] = 0;
 1591|      0|                ppv_map_ref_idx_to_poc_lx_b++;
 1592|      0|                for(idx = 0; idx < ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1]; idx++)
  ------------------
  |  Branch (1592:30): [True: 0, False: 0]
  ------------------
 1593|      0|                {
 1594|      0|                    UWORD8 u1_tmp_idx = idx << 1;
 1595|      0|                    ps_pic = ps_dec->ps_ref_pic_buf_lx[1][idx];
 1596|      0|                    ppv_map_ref_idx_to_poc_lx_t[u1_tmp_idx] = (ps_pic->pu1_buf1);
 1597|      0|                    ppv_map_ref_idx_to_poc_lx_b[u1_tmp_idx + 1] = (ps_pic->pu1_buf1);
 1598|       |
 1599|      0|                    ppv_map_ref_idx_to_poc_lx_b[u1_tmp_idx] = (ps_pic->pu1_buf1) + 1;
 1600|      0|                    ppv_map_ref_idx_to_poc_lx_t[u1_tmp_idx + 1] = (ps_pic->pu1_buf1) + 1;
 1601|      0|                }
 1602|      0|            }
 1603|       |
 1604|       |            /* BS is moved post recon gen in SVC ext*/
 1605|  13.2k|            if(ps_dec->u4_num_cores >= 2)
  ------------------
  |  Branch (1605:16): [True: 8.13k, False: 5.16k]
  ------------------
 1606|  8.13k|            {
 1607|  8.13k|                WORD32 num_entries;
 1608|  8.13k|                WORD32 size;
 1609|  8.13k|                num_entries = MAX_FRAMES;
  ------------------
  |  |  600|  8.13k|#define MAX_FRAMES              16
  ------------------
 1610|  8.13k|                if((1 >= ps_dec->ps_cur_sps->u1_num_ref_frames) && (0 == ps_dec->i4_display_delay))
  ------------------
  |  Branch (1610:20): [True: 7.40k, False: 730]
  |  Branch (1610:68): [True: 0, False: 7.40k]
  ------------------
 1611|      0|                {
 1612|      0|                    num_entries = 1;
 1613|      0|                }
 1614|       |
 1615|  8.13k|                num_entries = ((2 * num_entries) + 1);
 1616|  8.13k|                num_entries *= 2;
 1617|       |
 1618|  8.13k|                size = num_entries * sizeof(void *);
 1619|  8.13k|                size += PAD_MAP_IDX_POC * sizeof(void *);
  ------------------
  |  |  100|  8.13k|#define PAD_MAP_IDX_POC             (1)
  ------------------
 1620|       |
 1621|  8.13k|                memcpy((void *) ps_dec->ps_parse_cur_slice->ppv_map_ref_idx_to_poc,
 1622|  8.13k|                       ps_dec->ppv_map_ref_idx_to_poc, size);
 1623|  8.13k|            }
 1624|  13.2k|        }
 1625|       |
 1626|  13.2k|        if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag &&
  ------------------
  |  Branch (1626:12): [True: 0, False: 13.2k]
  ------------------
 1627|      0|           (ps_dec->ps_cur_slice->u1_field_pic_flag == 0))
  ------------------
  |  Branch (1627:12): [True: 0, False: 0]
  ------------------
 1628|      0|        {
 1629|      0|            ih264d_convert_frm_mbaff_list(ps_dec);
 1630|      0|        }
 1631|       |
 1632|  13.2k|        if(ps_pps->u1_wted_bipred_idc == 1)
  ------------------
  |  Branch (1632:12): [True: 4.77k, False: 8.52k]
  ------------------
 1633|  4.77k|        {
 1634|  4.77k|            if(!ps_nal_svc_ext->u1_no_inter_layer_pred_flag)
  ------------------
  |  Branch (1634:16): [True: 4.77k, False: 0]
  ------------------
 1635|  4.77k|            {
 1636|  4.77k|                ps_svc_slice_params->u1_base_pred_weight_table_flag =
 1637|  4.77k|                    ih264d_get_bit_h264(ps_bitstrm);
 1638|  4.77k|                COPYTHECONTEXT("Slice Header SVC ext: u1_base_pred_weight_table_flag",
 1639|  4.77k|                               ps_svc_slice_params->u1_base_pred_weight_table_flag);
 1640|  4.77k|            }
 1641|       |
 1642|  4.77k|            if(ps_nal_svc_ext->u1_no_inter_layer_pred_flag ||
  ------------------
  |  Branch (1642:16): [True: 0, False: 4.77k]
  ------------------
 1643|  4.77k|               !ps_svc_slice_params->u1_base_pred_weight_table_flag)
  ------------------
  |  Branch (1643:16): [True: 2.40k, False: 2.37k]
  ------------------
 1644|  2.40k|            {
 1645|  2.40k|                ih264d_parse_pred_weight_table(ps_slice, ps_bitstrm);
 1646|       |
 1647|  2.40k|                ih264d_form_pred_weight_matrix(ps_dec);
 1648|  2.40k|                ps_dec->pu4_wt_ofsts = ps_dec->pu4_wts_ofsts_mat;
 1649|  2.40k|            }
 1650|  4.77k|        }
 1651|  8.52k|        else if(ps_pps->u1_wted_bipred_idc == 2)
  ------------------
  |  Branch (1651:17): [True: 3.20k, False: 5.31k]
  ------------------
 1652|  3.20k|        {
 1653|       |            /* Implicit Weighted prediction */
 1654|  3.20k|            ps_slice->u2_log2Y_crwd = 0x0505;
 1655|  3.20k|            ps_dec->pu4_wt_ofsts = ps_dec->pu4_wts_ofsts_mat;
 1656|  3.20k|            ih264d_get_implicit_weights(ps_dec);
 1657|  3.20k|        }
 1658|  5.31k|        else
 1659|  5.31k|            ps_dec->ps_cur_slice->u2_log2Y_crwd = 0;
 1660|       |
 1661|  13.2k|        ps_dec->ps_parse_cur_slice->u2_log2Y_crwd = ps_dec->ps_cur_slice->u2_log2Y_crwd;
 1662|       |
 1663|       |        /* G050 */
 1664|  13.2k|        if(ps_slice->u1_nal_ref_idc != 0)
  ------------------
  |  Branch (1664:12): [True: 8.36k, False: 4.93k]
  ------------------
 1665|  8.36k|        {
 1666|  8.36k|            if(!ps_dec->ps_dpb_cmds->u1_dpb_commands_read)
  ------------------
  |  Branch (1666:16): [True: 8.28k, False: 76]
  ------------------
 1667|  8.28k|            {
 1668|  8.28k|                dec_pic_params_t *ps_pps = ps_dec->ps_cur_pps;
 1669|  8.28k|                dec_seq_params_t *ps_sps_tmp = ps_pps->ps_sps;
 1670|  8.28k|                UWORD8 u1_nal_unit_type_tmp = ps_dec->u1_nal_unit_type;
 1671|       |
 1672|  8.28k|                ps_pps->ps_sps = ps_dec->ps_cur_sps;
 1673|  8.28k|                if(ps_svc_lyr_dec->ps_nal_svc_ext->u1_idr_flag)
  ------------------
  |  Branch (1673:20): [True: 8.04k, False: 242]
  ------------------
 1674|  8.04k|                    ps_dec->u1_nal_unit_type = IDR_SLICE_NAL;
  ------------------
  |  |  328|  8.04k|#define IDR_SLICE_NAL                   5
  ------------------
 1675|       |
 1676|  8.28k|                i_temp = ih264d_read_mmco_commands(ps_dec);
 1677|       |
 1678|  8.28k|                ps_pps->ps_sps = ps_sps_tmp;
 1679|  8.28k|                ps_dec->u1_nal_unit_type = u1_nal_unit_type_tmp;
 1680|       |
 1681|  8.28k|                if(i_temp < 0)
  ------------------
  |  Branch (1681:20): [True: 69, False: 8.21k]
  ------------------
 1682|     69|                {
 1683|     69|                    return ERROR_DBP_MANAGER_T;
 1684|     69|                }
 1685|  8.21k|                ps_dec->u4_bitoffset = i_temp;
 1686|  8.21k|            }
 1687|     76|            else
 1688|     76|                ps_bitstrm->u4_ofst += ps_dec->u4_bitoffset;
 1689|       |
 1690|  8.29k|            if(!ps_sps_svc_ext->u1_slice_header_restriction_flag)
  ------------------
  |  Branch (1690:16): [True: 7.14k, False: 1.14k]
  ------------------
 1691|  7.14k|            {
 1692|  7.14k|                ps_svc_slice_params->u1_store_ref_base_pic_flag = ih264d_get_bit_h264(ps_bitstrm);
 1693|  7.14k|                COPYTHECONTEXT("SPS_SVC_EXT: u1_store_ref_base_pic_flag",
 1694|  7.14k|                               ps_svc_slice_params->u1_store_ref_base_pic_flag);
 1695|       |
 1696|  7.14k|                if(0 != ps_svc_slice_params->u1_store_ref_base_pic_flag)
  ------------------
  |  Branch (1696:20): [True: 686, False: 6.46k]
  ------------------
 1697|    686|                {
 1698|    686|                    return NOT_OK;
  ------------------
  |  |  116|    686|#define NOT_OK    -1
  ------------------
 1699|    686|                }
 1700|  6.46k|                if(((1 == ps_nal_svc_ext->u1_use_ref_base_pic_flag) ||
  ------------------
  |  Branch (1700:21): [True: 0, False: 6.46k]
  ------------------
 1701|  6.46k|                    (1 == ps_svc_slice_params->u1_store_ref_base_pic_flag)) &&
  ------------------
  |  Branch (1701:21): [True: 0, False: 6.46k]
  ------------------
 1702|      0|                   (!ps_nal_svc_ext->u1_idr_flag))
  ------------------
  |  Branch (1702:20): [True: 0, False: 0]
  ------------------
 1703|      0|                {
 1704|      0|                    i_status = isvcd_dec_ref_base_pic_marking(
 1705|      0|                        &ps_svc_slice_params->s_ref_base_pic_marking_svc_ext, ps_bitstrm);
 1706|      0|                    if(i_status != OK)
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
  |  Branch (1706:24): [True: 0, False: 0]
  ------------------
 1707|      0|                    {
 1708|      0|                        return i_status;
 1709|      0|                    }
 1710|      0|                }
 1711|  6.46k|            }
 1712|  8.29k|        }
 1713|  13.2k|    }
 1714|       |    /* G050 */
 1715|       |    /*Code is present in standard but omitted in the reference code*/
 1716|  12.5k|    if(ps_pps->u1_entropy_coding_mode == CABAC)
  ------------------
  |  |  339|  12.5k|#define CABAC  1
  ------------------
  |  Branch (1716:8): [True: 2.70k, False: 9.84k]
  ------------------
 1717|  2.70k|    {
 1718|  2.70k|        u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1719|  2.70k|        if(u4_temp > MAX_CABAC_INIT_IDC)
  ------------------
  |  |  537|  2.70k|#define MAX_CABAC_INIT_IDC        2
  ------------------
  |  Branch (1719:12): [True: 129, False: 2.57k]
  ------------------
 1720|    129|        {
 1721|    129|            return ERROR_INV_SLICE_HDR_T;
 1722|    129|        }
 1723|  2.57k|        ps_slice->u1_cabac_init_idc = u4_temp;
 1724|  2.57k|        COPYTHECONTEXT("Slice Header SVC ext: cabac_init_idc", ps_slice->u1_cabac_init_idc);
 1725|  2.57k|    }
 1726|       |
 1727|  12.4k|    {
 1728|       |        /* Read slice_qp_delta */
 1729|  12.4k|        WORD64 i8_temp =
 1730|  12.4k|            (WORD64) ps_pps->u1_pic_init_qp + ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1731|  12.4k|        if((i8_temp < MIN_H264_QP) || (i8_temp > MAX_H264_QP))
  ------------------
  |  |  629|  12.4k|#define MIN_H264_QP 0
  ------------------
                      if((i8_temp < MIN_H264_QP) || (i8_temp > MAX_H264_QP))
  ------------------
  |  |  634|  12.0k|#define MAX_H264_QP 51
  ------------------
  |  Branch (1731:12): [True: 349, False: 12.0k]
  |  Branch (1731:39): [True: 1.41k, False: 10.6k]
  ------------------
 1732|  1.76k|        {
 1733|  1.76k|            return ERROR_INV_RANGE_QP_T;
 1734|  1.76k|        }
 1735|  10.6k|        ps_slice->u1_slice_qp = (UWORD8) i8_temp;
 1736|  10.6k|        COPYTHECONTEXT("Slice Header SVC ext: slice_qp_delta",
 1737|  10.6k|                       (WORD8) (ps_slice->u1_slice_qp - ps_pps->u1_pic_init_qp));
 1738|  10.6k|    }
 1739|  10.6k|    if(ps_pps->u1_deblocking_filter_parameters_present_flag == 1)
  ------------------
  |  Branch (1739:8): [True: 7.18k, False: 3.47k]
  ------------------
 1740|  7.18k|    {
 1741|  7.18k|        u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1742|  7.18k|        if(u4_temp > SLICE_BOUNDARY_DBLK_DISABLED)
  ------------------
  |  |  547|  7.18k|#define SLICE_BOUNDARY_DBLK_DISABLED  2
  ------------------
  |  Branch (1742:12): [True: 369, False: 6.81k]
  ------------------
 1743|    369|        {
 1744|    369|            return ERROR_INV_SLICE_HDR_T;
 1745|    369|        }
 1746|  6.81k|        COPYTHECONTEXT("Slice Header SVC ext: disable_deblocking_filter_idc", u4_temp);
 1747|  6.81k|        ps_slice->u1_disable_dblk_filter_idc = u4_temp;
 1748|  6.81k|        if(u4_temp != 1)
  ------------------
  |  Branch (1748:12): [True: 6.15k, False: 655]
  ------------------
 1749|  6.15k|        {
 1750|  6.15k|            i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf) << 1;
 1751|  6.15k|            if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
  ------------------
  |  |  550|  6.15k|#define MIN_DBLK_FIL_OFF              -12
  ------------------
                          if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
  ------------------
  |  |  551|  5.76k|#define MAX_DBLK_FIL_OFF              12
  ------------------
  |  Branch (1751:16): [True: 391, False: 5.76k]
  |  Branch (1751:47): [True: 125, False: 5.64k]
  ------------------
 1752|    516|            {
 1753|    516|                return ERROR_INV_SLICE_HDR_T;
 1754|    516|            }
 1755|  5.64k|            ps_slice->i1_slice_alpha_c0_offset = i_temp;
 1756|  5.64k|            COPYTHECONTEXT("Slice Header SVC ext: slice_alpha_c0_offset_div2",
 1757|  5.64k|                           ps_slice->i1_slice_alpha_c0_offset >> 1);
 1758|       |
 1759|  5.64k|            i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf) << 1;
 1760|  5.64k|            if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
  ------------------
  |  |  550|  5.64k|#define MIN_DBLK_FIL_OFF              -12
  ------------------
                          if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
  ------------------
  |  |  551|  5.42k|#define MAX_DBLK_FIL_OFF              12
  ------------------
  |  Branch (1760:16): [True: 215, False: 5.42k]
  |  Branch (1760:47): [True: 162, False: 5.26k]
  ------------------
 1761|    377|            {
 1762|    377|                return ERROR_INV_SLICE_HDR_T;
 1763|    377|            }
 1764|  5.26k|            ps_slice->i1_slice_beta_offset = i_temp;
 1765|  5.26k|            COPYTHECONTEXT("Slice Header SVC ext: slice_beta_offset_div2",
 1766|  5.26k|                           ps_slice->i1_slice_beta_offset >> 1);
 1767|  5.26k|        }
 1768|    655|        else
 1769|    655|        {
 1770|    655|            ps_slice->i1_slice_alpha_c0_offset = 0;
 1771|    655|            ps_slice->i1_slice_beta_offset = 0;
 1772|    655|        }
 1773|  6.81k|    }
 1774|  3.47k|    else
 1775|  3.47k|    {
 1776|  3.47k|        ps_slice->u1_disable_dblk_filter_idc = 0;
 1777|  3.47k|        ps_slice->i1_slice_alpha_c0_offset = 0;
 1778|  3.47k|        ps_slice->i1_slice_beta_offset = 0;
 1779|  3.47k|    }
 1780|       |
 1781|       |    /* add the remaining part of the code for svc extension from reference */
 1782|  9.38k|    ret = isvcd_set_default_slice_header_ext(ps_svc_lyr_dec);
 1783|  9.38k|    if(ret != OK)
  ------------------
  |  |  114|  9.38k|#define OK        0
  ------------------
  |  Branch (1783:8): [True: 0, False: 9.38k]
  ------------------
 1784|      0|    {
 1785|      0|        return ERROR_INV_SLICE_HDR_T;
 1786|      0|    }
 1787|       |
 1788|  9.38k|    ret = isvcd_parse_slice_header(ps_svc_lyr_dec);
 1789|  9.38k|    if(ret != OK)
  ------------------
  |  |  114|  9.38k|#define OK        0
  ------------------
  |  Branch (1789:8): [True: 536, False: 8.85k]
  ------------------
 1790|    536|    {
 1791|    536|        return ERROR_INV_SLICE_HDR_T;
 1792|    536|    }
 1793|       |
 1794|  8.85k|    ps_dec->u1_slice_header_done = 2;
 1795|  8.85k|    if(!ps_svc_slice_params->u1_slice_skip_flag)
  ------------------
  |  Branch (1795:8): [True: 8.20k, False: 651]
  ------------------
 1796|  8.20k|    {
 1797|  8.20k|        if(ps_pps->u1_entropy_coding_mode)
  ------------------
  |  Branch (1797:12): [True: 2.42k, False: 5.77k]
  ------------------
 1798|  2.42k|        {
 1799|  2.42k|            SWITCHOFFTRACE;
 1800|  2.42k|            SWITCHONTRACECABAC;
 1801|  2.42k|            ps_svc_lyr_dec->pf_parse_inter_slice_svc_ext =
 1802|  2.42k|                isvcd_parse_inter_slice_data_cabac_enh_lyr;
 1803|  2.42k|            ps_svc_lyr_dec->pf_parse_inter_mb_svc_ext = isvcd_parse_bmb_cabac;
 1804|  2.42k|            isvcd_init_cabac_contexts(B_SLICE, ps_dec);
  ------------------
  |  |  369|  2.42k|#define B_SLICE  1
  ------------------
 1805|       |
 1806|  2.42k|            if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag)
  ------------------
  |  Branch (1806:16): [True: 0, False: 2.42k]
  ------------------
 1807|      0|                ps_dec->pf_get_mb_info = ih264d_get_mb_info_cabac_mbaff;
 1808|  2.42k|            else
 1809|  2.42k|                ps_dec->pf_get_mb_info = isvcd_get_mb_info_cabac_nonmbaff;
 1810|  2.42k|        }
 1811|  5.77k|        else
 1812|  5.77k|        {
 1813|  5.77k|            SWITCHONTRACE;
 1814|  5.77k|            SWITCHOFFTRACECABAC;
 1815|  5.77k|            ps_svc_lyr_dec->pf_parse_inter_slice_svc_ext =
 1816|  5.77k|                isvcd_parse_inter_slice_data_cavlc_enh_lyr;
 1817|  5.77k|            ps_svc_lyr_dec->pf_parse_inter_mb_svc_ext = isvcd_parse_bmb_cavlc;
 1818|  5.77k|            if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag)
  ------------------
  |  Branch (1818:16): [True: 0, False: 5.77k]
  ------------------
 1819|      0|                ps_dec->pf_get_mb_info = ih264d_get_mb_info_cavlc_mbaff;
 1820|  5.77k|            else
 1821|  5.77k|                ps_dec->pf_get_mb_info = isvcd_get_mb_info_cavlc_nonmbaff;
 1822|  5.77k|        }
 1823|  8.20k|    }
 1824|    651|    else
 1825|    651|    {
 1826|    651|        return ERROR_FEATURE_UNAVAIL;
 1827|    651|    }
 1828|       |
 1829|  8.20k|    ret = ih264d_cal_col_pic(ps_dec);
 1830|  8.20k|    if(ret != OK) return ret;
  ------------------
  |  |  114|  8.20k|#define OK        0
  ------------------
  |  Branch (1830:8): [True: 0, False: 8.20k]
  ------------------
 1831|  8.20k|    ps_dec->u1_B = 1;
 1832|  8.20k|    ps_dec->pf_mvpred_ref_tfr_nby2mb = isvcd_mv_pred_ref_tfr_nby2_ebmb;
 1833|  8.20k|    ret = ps_svc_lyr_dec->pf_parse_inter_slice_svc_ext(ps_svc_lyr_dec, ps_slice,
 1834|  8.20k|                                                       u2_first_mb_in_slice);
 1835|  8.20k|    if(ret != OK) return ret;
  ------------------
  |  |  114|  8.20k|#define OK        0
  ------------------
  |  Branch (1835:8): [True: 4.25k, False: 3.94k]
  ------------------
 1836|       |
 1837|  3.94k|    return OK;
  ------------------
  |  |  114|  3.94k|#define OK        0
  ------------------
 1838|  8.20k|}
isvcd_parse_bslice:
 1853|  30.4k|{
 1854|  30.4k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
 1855|  30.4k|    dec_pic_params_t *ps_pps = ps_dec->ps_cur_pps;
 1856|  30.4k|    dec_slice_params_t *ps_slice = ps_dec->ps_cur_slice;
 1857|  30.4k|    dec_bit_stream_t *ps_bitstrm = ps_dec->ps_bitstrm;
 1858|  30.4k|    UWORD8 u1_ref_idx_re_flag_lx;
 1859|  30.4k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
 1860|  30.4k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
 1861|  30.4k|    UWORD64 u8_ref_idx_l0, u8_ref_idx_l1;
 1862|  30.4k|    UWORD32 u4_temp;
 1863|  30.4k|    WORD32 i_temp;
 1864|  30.4k|    WORD32 ret;
 1865|       |
 1866|       |    /*--------------------------------------------------------------------*/
 1867|       |    /* Read remaining contents of the slice header                        */
 1868|       |    /*--------------------------------------------------------------------*/
 1869|  30.4k|    {
 1870|  30.4k|        WORD8 *pi1_buf;
 1871|  30.4k|        WORD16 *pi2_mv = ps_dec->s_default_mv_pred.i2_mv;
 1872|  30.4k|        WORD32 *pi4_mv = (WORD32 *) pi2_mv;
 1873|  30.4k|        WORD16 *pi16_refFrame;
 1874|  30.4k|        pi1_buf = ps_dec->s_default_mv_pred.i1_ref_frame;
 1875|  30.4k|        pi16_refFrame = (WORD16 *) pi1_buf;
 1876|  30.4k|        *pi4_mv = 0;
 1877|  30.4k|        *(pi4_mv + 1) = 0;
 1878|  30.4k|        *pi16_refFrame = OUT_OF_RANGE_REF;
  ------------------
  |  |   45|  30.4k|#define OUT_OF_RANGE_REF  -1
  ------------------
 1879|  30.4k|        ps_dec->s_default_mv_pred.u1_col_ref_pic_idx = (UWORD8) -1;
 1880|  30.4k|        ps_dec->s_default_mv_pred.u1_pic_type = (UWORD8) -1;
 1881|  30.4k|    }
 1882|       |
 1883|  30.4k|    ps_slice->u1_num_ref_idx_active_override_flag = ih264d_get_bit_h264(ps_bitstrm);
 1884|  30.4k|    COPYTHECONTEXT("SH: num_ref_idx_override_flag", ps_slice->u1_num_ref_idx_active_override_flag);
 1885|       |
 1886|  30.4k|    u8_ref_idx_l0 = ps_dec->ps_cur_pps->u1_num_ref_idx_lx_active[0];
 1887|  30.4k|    u8_ref_idx_l1 = ps_dec->ps_cur_pps->u1_num_ref_idx_lx_active[1];
 1888|  30.4k|    if(ps_slice->u1_num_ref_idx_active_override_flag)
  ------------------
  |  Branch (1888:8): [True: 17.6k, False: 12.8k]
  ------------------
 1889|  17.6k|    {
 1890|  17.6k|        u8_ref_idx_l0 = (UWORD64) 1 + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1891|  17.6k|        COPYTHECONTEXT("SH: num_ref_idx_l0_active_minus1", u8_ref_idx_l0 - 1);
 1892|       |
 1893|  17.6k|        u8_ref_idx_l1 = (UWORD64) 1 + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1894|  17.6k|        COPYTHECONTEXT("SH: num_ref_idx_l1_active_minus1", u8_ref_idx_l1 - 1);
 1895|  17.6k|    }
 1896|       |
 1897|  30.4k|    {
 1898|  30.4k|        UWORD8 u1_max_ref_idx = H264_MAX_REF_PICS;
  ------------------
  |  |  534|  30.4k|#define H264_MAX_REF_PICS         16
  ------------------
 1899|  30.4k|        if(ps_slice->u1_field_pic_flag)
  ------------------
  |  Branch (1899:12): [True: 0, False: 30.4k]
  ------------------
 1900|      0|        {
 1901|      0|            u1_max_ref_idx = H264_MAX_REF_PICS << 1;
  ------------------
  |  |  534|      0|#define H264_MAX_REF_PICS         16
  ------------------
 1902|      0|        }
 1903|  30.4k|        if((u8_ref_idx_l0 >= u1_max_ref_idx) || (u8_ref_idx_l1 >= u1_max_ref_idx))
  ------------------
  |  Branch (1903:12): [True: 590, False: 29.8k]
  |  Branch (1903:49): [True: 703, False: 29.1k]
  ------------------
 1904|  1.29k|        {
 1905|  1.29k|            return ERROR_NUM_REF;
 1906|  1.29k|        }
 1907|  29.1k|        ps_slice->u1_num_ref_idx_lx_active[0] = (UWORD8) u8_ref_idx_l0;
 1908|  29.1k|        ps_slice->u1_num_ref_idx_lx_active[1] = (UWORD8) u8_ref_idx_l1;
 1909|  29.1k|    }
 1910|       |
 1911|      0|    ih264d_init_ref_idx_lx_b(ps_dec);
 1912|       |    /* Store the value for future slices in the same picture */
 1913|  29.1k|    ps_dec->u1_num_ref_idx_lx_active_prev = ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0];
 1914|       |
 1915|  29.1k|    u1_ref_idx_re_flag_lx = ih264d_get_bit_h264(ps_bitstrm);
 1916|  29.1k|    COPYTHECONTEXT("SH: ref_pic_list_reordering_flag_l0", u1_ref_idx_re_flag_lx);
 1917|       |
 1918|       |    /* Modified temporarily */
 1919|  29.1k|    if(u1_ref_idx_re_flag_lx)
  ------------------
  |  Branch (1919:8): [True: 12.3k, False: 16.7k]
  ------------------
 1920|  12.3k|    {
 1921|  12.3k|        WORD8 ret;
 1922|  12.3k|        ps_dec->ps_ref_pic_buf_lx[0] = ps_dec->ps_dpb_mgr->ps_mod_dpb[0];
 1923|  12.3k|        ret = ih264d_ref_idx_reordering(ps_dec, 0);
 1924|  12.3k|        if(ret == -1) return ERROR_REFIDX_ORDER_T;
  ------------------
  |  Branch (1924:12): [True: 0, False: 12.3k]
  ------------------
 1925|  12.3k|    }
 1926|  16.7k|    else
 1927|  16.7k|        ps_dec->ps_ref_pic_buf_lx[0] = ps_dec->ps_dpb_mgr->ps_init_dpb[0];
 1928|       |
 1929|  29.1k|    u1_ref_idx_re_flag_lx = ih264d_get_bit_h264(ps_bitstrm);
 1930|  29.1k|    COPYTHECONTEXT("SH: ref_pic_list_reordering_flag_l1", u1_ref_idx_re_flag_lx);
 1931|       |
 1932|       |    /* Modified temporarily */
 1933|  29.1k|    if(u1_ref_idx_re_flag_lx)
  ------------------
  |  Branch (1933:8): [True: 11.1k, False: 17.9k]
  ------------------
 1934|  11.1k|    {
 1935|  11.1k|        WORD8 ret;
 1936|  11.1k|        ps_dec->ps_ref_pic_buf_lx[1] = ps_dec->ps_dpb_mgr->ps_mod_dpb[1];
 1937|  11.1k|        ret = ih264d_ref_idx_reordering(ps_dec, 1);
 1938|  11.1k|        if(ret == -1) return ERROR_REFIDX_ORDER_T;
  ------------------
  |  Branch (1938:12): [True: 0, False: 11.1k]
  ------------------
 1939|  11.1k|    }
 1940|  17.9k|    else
 1941|  17.9k|        ps_dec->ps_ref_pic_buf_lx[1] = ps_dec->ps_dpb_mgr->ps_init_dpb[1];
 1942|       |
 1943|       |    /* Create refIdx to POC mapping */
 1944|  29.1k|    {
 1945|  29.1k|        void **ppv_map_ref_idx_to_poc_lx;
 1946|  29.1k|        WORD8 idx;
 1947|  29.1k|        struct pic_buffer_t *ps_pic;
 1948|       |
 1949|  29.1k|        ppv_map_ref_idx_to_poc_lx = ps_dec->ppv_map_ref_idx_to_poc + FRM_LIST_L0;
  ------------------
  |  |   89|  29.1k|#define FRM_LIST_L0             0                                               //0
  ------------------
 1950|  29.1k|        ppv_map_ref_idx_to_poc_lx[0] = 0;
 1951|  29.1k|        ppv_map_ref_idx_to_poc_lx++;
 1952|  98.0k|        for(idx = 0; idx < ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0]; idx++)
  ------------------
  |  Branch (1952:22): [True: 68.9k, False: 29.1k]
  ------------------
 1953|  68.9k|        {
 1954|  68.9k|            ps_pic = ps_dec->ps_ref_pic_buf_lx[0][idx];
 1955|  68.9k|            ppv_map_ref_idx_to_poc_lx[idx] = (ps_pic->pu1_buf1);
 1956|  68.9k|        }
 1957|       |
 1958|  29.1k|        ppv_map_ref_idx_to_poc_lx = ps_dec->ppv_map_ref_idx_to_poc + FRM_LIST_L1;
  ------------------
  |  |   90|  29.1k|#define FRM_LIST_L1             1 * POC_LIST_L0_TO_L1_DIFF//FRM_LIST_L0 + POC_LIST_L0_TO_L1_DIFF        //0+33                  //(1 * POC_LIST_L0_TO_L1_DIFF)
  |  |  ------------------
  |  |  |  |   86|  29.1k|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |  600|  29.1k|#define MAX_FRAMES              16
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 1959|       |
 1960|  29.1k|        ppv_map_ref_idx_to_poc_lx[0] = 0;
 1961|  29.1k|        ppv_map_ref_idx_to_poc_lx++;
 1962|  86.3k|        for(idx = 0; idx < ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1]; idx++)
  ------------------
  |  Branch (1962:22): [True: 57.1k, False: 29.1k]
  ------------------
 1963|  57.1k|        {
 1964|  57.1k|            ps_pic = ps_dec->ps_ref_pic_buf_lx[1][idx];
 1965|  57.1k|            ppv_map_ref_idx_to_poc_lx[idx] = (ps_pic->pu1_buf1);
 1966|  57.1k|        }
 1967|       |
 1968|  29.1k|        if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag)
  ------------------
  |  Branch (1968:12): [True: 0, False: 29.1k]
  ------------------
 1969|      0|        {
 1970|      0|            void **ppv_map_ref_idx_to_poc_lx_t, **ppv_map_ref_idx_to_poc_lx_b;
 1971|       |
 1972|      0|            ppv_map_ref_idx_to_poc_lx_t = ps_dec->ppv_map_ref_idx_to_poc + TOP_LIST_FLD_L0;
  ------------------
  |  |   91|      0|#define TOP_LIST_FLD_L0         2 * POC_LIST_L0_TO_L1_DIFF//FRM_LIST_L1 + POC_LIST_L0_TO_L1_DIFF        //0+33+33                   //(2 * POC_LIST_L0_TO_L1_DIFF)
  |  |  ------------------
  |  |  |  |   86|      0|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |  600|      0|#define MAX_FRAMES              16
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 1973|      0|            ppv_map_ref_idx_to_poc_lx_b = ps_dec->ppv_map_ref_idx_to_poc + BOT_LIST_FLD_L0;
  ------------------
  |  |   93|      0|#define BOT_LIST_FLD_L0         4 * POC_LIST_L0_TO_L1_DIFF//TOP_LIST_FLD_L1 + POC_LIST_L0_TO_L1_DIFF_1  //0+33+33+17+17
  |  |  ------------------
  |  |  |  |   86|      0|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |  600|      0|#define MAX_FRAMES              16
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 1974|       |
 1975|      0|            ppv_map_ref_idx_to_poc_lx_t[0] = 0;
 1976|      0|            ppv_map_ref_idx_to_poc_lx_t++;
 1977|      0|            ppv_map_ref_idx_to_poc_lx_b[0] = 0;
 1978|      0|            ppv_map_ref_idx_to_poc_lx_b++;
 1979|      0|            for(idx = 0; idx < ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0]; idx++)
  ------------------
  |  Branch (1979:26): [True: 0, False: 0]
  ------------------
 1980|      0|            {
 1981|      0|                ps_pic = ps_dec->ps_ref_pic_buf_lx[0][idx];
 1982|      0|                ppv_map_ref_idx_to_poc_lx_t[0] = (ps_pic->pu1_buf1);
 1983|      0|                ppv_map_ref_idx_to_poc_lx_b[1] = (ps_pic->pu1_buf1);
 1984|       |
 1985|      0|                ppv_map_ref_idx_to_poc_lx_b[0] = (ps_pic->pu1_buf1) + 1;
 1986|      0|                ppv_map_ref_idx_to_poc_lx_t[1] = (ps_pic->pu1_buf1) + 1;
 1987|       |
 1988|      0|                ppv_map_ref_idx_to_poc_lx_t += 2;
 1989|      0|                ppv_map_ref_idx_to_poc_lx_b += 2;
 1990|      0|            }
 1991|       |
 1992|      0|            ppv_map_ref_idx_to_poc_lx_t = ps_dec->ppv_map_ref_idx_to_poc + TOP_LIST_FLD_L1;
  ------------------
  |  |   92|      0|#define TOP_LIST_FLD_L1         3 * POC_LIST_L0_TO_L1_DIFF//TOP_LIST_FLD_L0 + POC_LIST_L0_TO_L1_DIFF_1  //0+33+33+17                //(3 * POC_LIST_L0_TO_L1_DIFF)
  |  |  ------------------
  |  |  |  |   86|      0|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |  600|      0|#define MAX_FRAMES              16
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 1993|      0|            ppv_map_ref_idx_to_poc_lx_b = ps_dec->ppv_map_ref_idx_to_poc + BOT_LIST_FLD_L1;
  ------------------
  |  |   94|      0|#define BOT_LIST_FLD_L1         5 * POC_LIST_L0_TO_L1_DIFF//BOT_LIST_FLD_L0 + POC_LIST_L0_TO_L1_DIFF_1  //0+33+33+17+17+17
  |  |  ------------------
  |  |  |  |   86|      0|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |  600|      0|#define MAX_FRAMES              16
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 1994|       |
 1995|      0|            ppv_map_ref_idx_to_poc_lx_t[0] = 0;
 1996|      0|            ppv_map_ref_idx_to_poc_lx_t++;
 1997|      0|            ppv_map_ref_idx_to_poc_lx_b[0] = 0;
 1998|      0|            ppv_map_ref_idx_to_poc_lx_b++;
 1999|      0|            for(idx = 0; idx < ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1]; idx++)
  ------------------
  |  Branch (1999:26): [True: 0, False: 0]
  ------------------
 2000|      0|            {
 2001|      0|                UWORD8 u1_tmp_idx = idx << 1;
 2002|      0|                ps_pic = ps_dec->ps_ref_pic_buf_lx[1][idx];
 2003|      0|                ppv_map_ref_idx_to_poc_lx_t[u1_tmp_idx] = (ps_pic->pu1_buf1);
 2004|      0|                ppv_map_ref_idx_to_poc_lx_b[u1_tmp_idx + 1] = (ps_pic->pu1_buf1);
 2005|       |
 2006|      0|                ppv_map_ref_idx_to_poc_lx_b[u1_tmp_idx] = (ps_pic->pu1_buf1) + 1;
 2007|      0|                ppv_map_ref_idx_to_poc_lx_t[u1_tmp_idx + 1] = (ps_pic->pu1_buf1) + 1;
 2008|      0|            }
 2009|      0|        }
 2010|  29.1k|    }
 2011|       |
 2012|  29.1k|    if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag && (ps_dec->ps_cur_slice->u1_field_pic_flag == 0))
  ------------------
  |  Branch (2012:8): [True: 0, False: 29.1k]
  |  Branch (2012:53): [True: 0, False: 0]
  ------------------
 2013|      0|    {
 2014|      0|        ih264d_convert_frm_mbaff_list(ps_dec);
 2015|      0|    }
 2016|       |
 2017|  29.1k|    if(ps_pps->u1_wted_bipred_idc == 1)
  ------------------
  |  Branch (2017:8): [True: 3.38k, False: 25.7k]
  ------------------
 2018|  3.38k|    {
 2019|  3.38k|        ret = ih264d_parse_pred_weight_table(ps_slice, ps_bitstrm);
 2020|  3.38k|        if(ret != OK) return ret;
  ------------------
  |  |  114|  3.38k|#define OK        0
  ------------------
  |  Branch (2020:12): [True: 2.00k, False: 1.38k]
  ------------------
 2021|  1.38k|        ih264d_form_pred_weight_matrix(ps_dec);
 2022|  1.38k|        ps_dec->pu4_wt_ofsts = ps_dec->pu4_wts_ofsts_mat;
 2023|  1.38k|    }
 2024|  25.7k|    else if(ps_pps->u1_wted_bipred_idc == 2)
  ------------------
  |  Branch (2024:13): [True: 13.3k, False: 12.4k]
  ------------------
 2025|  13.3k|    {
 2026|       |        /* Implicit Weighted prediction */
 2027|  13.3k|        ps_slice->u2_log2Y_crwd = 0x0505;
 2028|  13.3k|        ps_dec->pu4_wt_ofsts = ps_dec->pu4_wts_ofsts_mat;
 2029|  13.3k|        ih264d_get_implicit_weights(ps_dec);
 2030|  13.3k|    }
 2031|  12.4k|    else
 2032|  12.4k|        ps_dec->ps_cur_slice->u2_log2Y_crwd = 0;
 2033|       |
 2034|  27.1k|    ps_dec->ps_parse_cur_slice->u2_log2Y_crwd = ps_dec->ps_cur_slice->u2_log2Y_crwd;
 2035|       |
 2036|       |    /* G050 */
 2037|  27.1k|    if(ps_slice->u1_nal_ref_idc != 0)
  ------------------
  |  Branch (2037:8): [True: 18.1k, False: 8.95k]
  ------------------
 2038|  18.1k|    {
 2039|  18.1k|        if(!ps_dec->ps_dpb_cmds->u1_dpb_commands_read)
  ------------------
  |  Branch (2039:12): [True: 18.0k, False: 167]
  ------------------
 2040|  18.0k|        {
 2041|  18.0k|            dec_pic_params_t *ps_pps = ps_dec->ps_cur_pps;
 2042|  18.0k|            dec_seq_params_t *ps_sps_tmp = ps_pps->ps_sps;
 2043|  18.0k|            UWORD8 u1_nal_unit_type_tmp = ps_dec->u1_nal_unit_type;
 2044|       |
 2045|  18.0k|            ps_pps->ps_sps = ps_dec->ps_cur_sps;
 2046|  18.0k|            if(ps_svc_lyr_dec->ps_nal_svc_ext->u1_idr_flag)
  ------------------
  |  Branch (2046:16): [True: 12.2k, False: 5.82k]
  ------------------
 2047|  12.2k|                ps_dec->u1_nal_unit_type = IDR_SLICE_NAL;
  ------------------
  |  |  328|  12.2k|#define IDR_SLICE_NAL                   5
  ------------------
 2048|       |
 2049|  18.0k|            i_temp = ih264d_read_mmco_commands(ps_dec);
 2050|       |
 2051|  18.0k|            ps_pps->ps_sps = ps_sps_tmp;
 2052|  18.0k|            ps_dec->u1_nal_unit_type = u1_nal_unit_type_tmp;
 2053|       |
 2054|  18.0k|            if(i_temp < 0)
  ------------------
  |  Branch (2054:16): [True: 172, False: 17.8k]
  ------------------
 2055|    172|            {
 2056|    172|                return ERROR_DBP_MANAGER_T;
 2057|    172|            }
 2058|  17.8k|            ps_dec->u4_bitoffset = i_temp;
 2059|  17.8k|        }
 2060|    167|        else
 2061|    167|            ps_bitstrm->u4_ofst += ps_dec->u4_bitoffset;
 2062|  18.1k|    }
 2063|       |    /* G050 */
 2064|       |
 2065|  26.9k|    if(ps_pps->u1_entropy_coding_mode == CABAC)
  ------------------
  |  |  339|  26.9k|#define CABAC  1
  ------------------
  |  Branch (2065:8): [True: 10.5k, False: 16.3k]
  ------------------
 2066|  10.5k|    {
 2067|  10.5k|        u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 2068|  10.5k|        if(u4_temp > MAX_CABAC_INIT_IDC)
  ------------------
  |  |  537|  10.5k|#define MAX_CABAC_INIT_IDC        2
  ------------------
  |  Branch (2068:12): [True: 979, False: 9.60k]
  ------------------
 2069|    979|        {
 2070|    979|            return ERROR_INV_SLICE_HDR_T;
 2071|    979|        }
 2072|  9.60k|        ps_slice->u1_cabac_init_idc = u4_temp;
 2073|  9.60k|        COPYTHECONTEXT("SH: cabac_init_idc", ps_slice->u1_cabac_init_idc);
 2074|  9.60k|    }
 2075|  26.0k|    {
 2076|       |        /* Read slice_qp_delta */
 2077|  26.0k|        WORD64 i8_temp =
 2078|  26.0k|            (WORD64) ps_pps->u1_pic_init_qp + ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 2079|  26.0k|        if((i8_temp < MIN_H264_QP) || (i8_temp > MAX_H264_QP))
  ------------------
  |  |  629|  26.0k|#define MIN_H264_QP 0
  ------------------
                      if((i8_temp < MIN_H264_QP) || (i8_temp > MAX_H264_QP))
  ------------------
  |  |  634|  25.3k|#define MAX_H264_QP 51
  ------------------
  |  Branch (2079:12): [True: 675, False: 25.3k]
  |  Branch (2079:39): [True: 1.86k, False: 23.4k]
  ------------------
 2080|  2.53k|        {
 2081|  2.53k|            return ERROR_INV_RANGE_QP_T;
 2082|  2.53k|        }
 2083|  23.4k|        ps_slice->u1_slice_qp = (UWORD8) i8_temp;
 2084|  23.4k|        COPYTHECONTEXT("SH: slice_qp_delta",
 2085|  23.4k|                       (WORD8) (ps_slice->u1_slice_qp - ps_pps->u1_pic_init_qp));
 2086|  23.4k|    }
 2087|  23.4k|    if(ps_pps->u1_deblocking_filter_parameters_present_flag == 1)
  ------------------
  |  Branch (2087:8): [True: 12.5k, False: 10.9k]
  ------------------
 2088|  12.5k|    {
 2089|  12.5k|        u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 2090|  12.5k|        if(u4_temp > SLICE_BOUNDARY_DBLK_DISABLED)
  ------------------
  |  |  547|  12.5k|#define SLICE_BOUNDARY_DBLK_DISABLED  2
  ------------------
  |  Branch (2090:12): [True: 734, False: 11.8k]
  ------------------
 2091|    734|        {
 2092|    734|            return ERROR_INV_SLICE_HDR_T;
 2093|    734|        }
 2094|  11.8k|        COPYTHECONTEXT("SH: disable_deblocking_filter_idc", u4_temp);
 2095|  11.8k|        ps_slice->u1_disable_dblk_filter_idc = u4_temp;
 2096|  11.8k|        if(u4_temp != 1)
  ------------------
  |  Branch (2096:12): [True: 9.81k, False: 2.00k]
  ------------------
 2097|  9.81k|        {
 2098|  9.81k|            i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf) << 1;
 2099|  9.81k|            if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
  ------------------
  |  |  550|  9.81k|#define MIN_DBLK_FIL_OFF              -12
  ------------------
                          if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
  ------------------
  |  |  551|  9.59k|#define MAX_DBLK_FIL_OFF              12
  ------------------
  |  Branch (2099:16): [True: 218, False: 9.59k]
  |  Branch (2099:47): [True: 165, False: 9.43k]
  ------------------
 2100|    383|            {
 2101|    383|                return ERROR_INV_SLICE_HDR_T;
 2102|    383|            }
 2103|  9.43k|            ps_slice->i1_slice_alpha_c0_offset = i_temp;
 2104|  9.43k|            COPYTHECONTEXT("SH: slice_alpha_c0_offset_div2",
 2105|  9.43k|                           ps_slice->i1_slice_alpha_c0_offset >> 1);
 2106|       |
 2107|  9.43k|            i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf) << 1;
 2108|  9.43k|            if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
  ------------------
  |  |  550|  9.43k|#define MIN_DBLK_FIL_OFF              -12
  ------------------
                          if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
  ------------------
  |  |  551|  9.25k|#define MAX_DBLK_FIL_OFF              12
  ------------------
  |  Branch (2108:16): [True: 173, False: 9.25k]
  |  Branch (2108:47): [True: 182, False: 9.07k]
  ------------------
 2109|    355|            {
 2110|    355|                return ERROR_INV_SLICE_HDR_T;
 2111|    355|            }
 2112|  9.07k|            ps_slice->i1_slice_beta_offset = i_temp;
 2113|  9.07k|            COPYTHECONTEXT("SH: slice_beta_offset_div2", ps_slice->i1_slice_beta_offset >> 1);
 2114|  9.07k|        }
 2115|  2.00k|        else
 2116|  2.00k|        {
 2117|  2.00k|            ps_slice->i1_slice_alpha_c0_offset = 0;
 2118|  2.00k|            ps_slice->i1_slice_beta_offset = 0;
 2119|  2.00k|        }
 2120|  11.8k|    }
 2121|  10.9k|    else
 2122|  10.9k|    {
 2123|  10.9k|        ps_slice->u1_disable_dblk_filter_idc = 0;
 2124|  10.9k|        ps_slice->i1_slice_alpha_c0_offset = 0;
 2125|  10.9k|        ps_slice->i1_slice_beta_offset = 0;
 2126|  10.9k|    }
 2127|       |
 2128|  21.9k|    ps_dec->u1_slice_header_done = 2;
 2129|       |
 2130|  21.9k|    if(ps_pps->u1_entropy_coding_mode)
  ------------------
  |  Branch (2130:8): [True: 9.46k, False: 12.5k]
  ------------------
 2131|  9.46k|    {
 2132|  9.46k|        SWITCHOFFTRACE;
 2133|  9.46k|        SWITCHONTRACECABAC;
 2134|  9.46k|        ps_svc_lyr_dec->pf_parse_svc_inter_slice = isvcd_parse_inter_slice_data_cabac;
 2135|  9.46k|        ps_dec->pf_parse_inter_mb = ih264d_parse_bmb_cabac;
 2136|  9.46k|        ih264d_init_cabac_contexts(B_SLICE, ps_dec);
  ------------------
  |  |  369|  9.46k|#define B_SLICE  1
  ------------------
 2137|       |
 2138|  9.46k|        if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag)
  ------------------
  |  Branch (2138:12): [True: 0, False: 9.46k]
  ------------------
 2139|      0|            ps_dec->pf_get_mb_info = ih264d_get_mb_info_cabac_mbaff;
 2140|  9.46k|        else
 2141|  9.46k|            ps_dec->pf_get_mb_info = isvcd_get_mb_info_cabac_nonmbaff;
 2142|  9.46k|    }
 2143|  12.5k|    else
 2144|  12.5k|    {
 2145|  12.5k|        SWITCHONTRACE;
 2146|  12.5k|        SWITCHOFFTRACECABAC;
 2147|  12.5k|        ps_svc_lyr_dec->pf_parse_svc_inter_slice = isvcd_parse_inter_slice_data_cavlc;
 2148|  12.5k|        ps_dec->pf_parse_inter_mb = ih264d_parse_bmb_cavlc;
 2149|  12.5k|        if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag)
  ------------------
  |  Branch (2149:12): [True: 0, False: 12.5k]
  ------------------
 2150|      0|            ps_dec->pf_get_mb_info = ih264d_get_mb_info_cavlc_mbaff;
 2151|  12.5k|        else
 2152|  12.5k|            ps_dec->pf_get_mb_info = isvcd_get_mb_info_cavlc_nonmbaff;
 2153|  12.5k|    }
 2154|       |
 2155|  21.9k|    ret = ih264d_cal_col_pic(ps_dec);
 2156|  21.9k|    if(ret != OK) return ret;
  ------------------
  |  |  114|  21.9k|#define OK        0
  ------------------
  |  Branch (2156:8): [True: 0, False: 21.9k]
  ------------------
 2157|  21.9k|    ps_dec->u1_B = 1;
 2158|  21.9k|    ps_dec->pf_mvpred_ref_tfr_nby2mb = ih264d_mv_pred_ref_tfr_nby2_bmb;
 2159|  21.9k|    ret = ps_svc_lyr_dec->pf_parse_svc_inter_slice(ps_svc_lyr_dec, ps_slice, u2_first_mb_in_slice);
 2160|  21.9k|    if(ret != OK) return ret;
  ------------------
  |  |  114|  21.9k|#define OK        0
  ------------------
  |  Branch (2160:8): [True: 7.36k, False: 14.6k]
  ------------------
 2161|       |
 2162|  14.6k|    return OK;
  ------------------
  |  |  114|  14.6k|#define OK        0
  ------------------
 2163|  21.9k|}

isvcd_parse_islice_data_cabac:
  108|  3.67k|{
  109|  3.67k|    UWORD8 uc_more_data_flag;
  110|  3.67k|    UWORD32 u4_num_mbs, u4_mb_idx;
  111|  3.67k|    dec_mb_info_t *ps_cur_mb_info;
  112|  3.67k|    deblk_mb_t *ps_cur_deblk_mb;
  113|  3.67k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
  114|  3.67k|    dec_bit_stream_t *const ps_bitstrm = ps_dec->ps_bitstrm;
  115|  3.67k|    UWORD16 i2_pic_wdin_mbs = ps_dec->u2_frm_wd_in_mbs;
  116|  3.67k|    WORD32 i4_cur_mb_addr;
  117|  3.67k|    UWORD8 u1_mbaff;
  118|  3.67k|    UWORD32 u4_num_mbs_next, u4_end_of_row, u4_tfr_n_mb;
  119|  3.67k|    WORD32 ret = OK;
  ------------------
  |  |  114|  3.67k|#define OK        0
  ------------------
  120|       |
  121|  3.67k|    ps_dec->u1_qp = ps_slice->u1_slice_qp;
  122|  3.67k|    ih264d_update_qp(ps_dec, 0);
  123|  3.67k|    u1_mbaff = ps_slice->u1_mbaff_frame_flag;
  124|       |
  125|  3.67k|    if(ps_bitstrm->u4_ofst & 0x07)
  ------------------
  |  Branch (125:8): [True: 2.99k, False: 680]
  ------------------
  126|  2.99k|    {
  127|  2.99k|        ps_bitstrm->u4_ofst += 8;
  128|  2.99k|        ps_bitstrm->u4_ofst &= 0xFFFFFFF8;
  129|  2.99k|    }
  130|  3.67k|    ret = ih264d_init_cabac_dec_envirnoment(&(ps_dec->s_cab_dec_env), ps_bitstrm);
  131|  3.67k|    if(ret != OK) return ret;
  ------------------
  |  |  114|  3.67k|#define OK        0
  ------------------
  |  Branch (131:8): [True: 226, False: 3.45k]
  ------------------
  132|  3.45k|    ih264d_init_cabac_contexts(I_SLICE, ps_dec);
  ------------------
  |  |  370|  3.45k|#define I_SLICE  2
  ------------------
  133|       |
  134|  3.45k|    ps_dec->i1_prev_mb_qp_delta = 0;
  135|       |
  136|       |    /* initializations */
  137|  3.45k|    u4_mb_idx = ps_dec->u4_mb_idx;
  138|  3.45k|    u4_num_mbs = u4_mb_idx;
  139|  3.45k|    uc_more_data_flag = 1;
  140|  3.45k|    i4_cur_mb_addr = u2_first_mb_in_slice << u1_mbaff;
  141|  3.45k|    do
  142|  73.9k|    {
  143|  73.9k|        UWORD16 u2_mbx;
  144|  73.9k|        ps_dec->pv_prev_mb_parse_tu_coeff_data = ps_dec->pv_parse_tu_coeff_data;
  145|       |
  146|  73.9k|        if(i4_cur_mb_addr > ps_dec->ps_cur_sps->u4_max_mb_addr)
  ------------------
  |  Branch (146:12): [True: 128, False: 73.7k]
  ------------------
  147|    128|        {
  148|    128|            break;
  149|    128|        }
  150|       |
  151|  73.7k|        {
  152|  73.7k|            UWORD8 u1_mb_type;
  153|  73.7k|            ps_cur_mb_info = ps_dec->ps_nmb_info + u4_num_mbs;
  154|  73.7k|            ps_dec->u4_num_mbs_cur_nmb = u4_num_mbs;
  155|  73.7k|            ps_dec->u4_num_pmbair = (u4_num_mbs >> u1_mbaff);
  156|  73.7k|            ps_cur_mb_info->u1_end_of_slice = 0;
  157|       |
  158|       |            /***************************************************************/
  159|       |            /* Get the required information for decoding of MB                  */
  160|       |            /* mb_x, mb_y , neighbour availablity,                              */
  161|       |            /***************************************************************/
  162|  73.7k|            ps_dec->pf_get_mb_info(ps_dec, i4_cur_mb_addr, ps_cur_mb_info, 0);
  163|  73.7k|            u2_mbx = ps_dec->u2_mbx;
  164|       |
  165|       |            /*********************************************************************/
  166|       |            /* initialize u1_tran_form8x8 to zero to aviod uninitialized accesses */
  167|       |            /*********************************************************************/
  168|  73.7k|            ps_cur_mb_info->u1_tran_form8x8 = 0;
  169|  73.7k|            ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
  170|       |
  171|       |            /***************************************************************/
  172|       |            /* Set the deblocking parameters for this MB                   */
  173|       |            /***************************************************************/
  174|  73.7k|            ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + u4_num_mbs;
  175|  73.7k|            if(ps_dec->u4_app_disable_deblk_frm == 0)
  ------------------
  |  Branch (175:16): [True: 73.7k, False: 0]
  ------------------
  176|  73.7k|                ih264d_set_deblocking_parameters(ps_cur_deblk_mb, ps_slice,
  177|  73.7k|                                                 ps_dec->u1_mb_ngbr_availablity,
  178|  73.7k|                                                 ps_dec->u1_cur_mb_fld_dec_flag);
  179|       |
  180|  73.7k|            ps_cur_deblk_mb->u1_mb_type = ps_cur_deblk_mb->u1_mb_type | D_INTRA_MB;
  ------------------
  |  |  382|  73.7k|#define D_INTRA_MB        1
  ------------------
  181|       |
  182|       |            /* Macroblock Layer Begins */
  183|       |            /* Decode the u1_mb_type */
  184|  73.7k|            u1_mb_type = ih264d_parse_mb_type_intra_cabac(0, ps_dec);
  185|  73.7k|            if(u1_mb_type > 25) return ERROR_MB_TYPE;
  ------------------
  |  Branch (185:16): [True: 0, False: 73.7k]
  ------------------
  186|  73.7k|            ps_cur_mb_info->u1_mb_type = u1_mb_type;
  187|  73.7k|            COPYTHECONTEXT("u1_mb_type", u1_mb_type);
  188|       |
  189|       |            /* Parse Macroblock Data */
  190|  73.7k|            if(25 == u1_mb_type)
  ------------------
  |  Branch (190:16): [True: 827, False: 72.9k]
  ------------------
  191|    827|            {
  192|       |                /* I_PCM_MB */
  193|    827|                ps_cur_mb_info->ps_curmb->u1_mb_type = I_PCM_MB;
  ------------------
  |  |  423|    827|#define I_PCM_MB    6
  ------------------
  194|    827|                ret = ih264d_parse_ipcm_mb(ps_dec, ps_cur_mb_info, u4_num_mbs);
  195|    827|                if(ret != OK) return ret;
  ------------------
  |  |  114|    827|#define OK        0
  ------------------
  |  Branch (195:20): [True: 445, False: 382]
  ------------------
  196|    382|                ps_cur_deblk_mb->u1_mb_qp = 0;
  197|    382|            }
  198|  72.9k|            else
  199|  72.9k|            {
  200|  72.9k|                ret = ih264d_parse_imb_cabac(ps_dec, ps_cur_mb_info, u1_mb_type);
  201|  72.9k|                if(ret != OK) return ret;
  ------------------
  |  |  114|  72.9k|#define OK        0
  ------------------
  |  Branch (201:20): [True: 2.28k, False: 70.6k]
  ------------------
  202|  70.6k|                ps_cur_deblk_mb->u1_mb_qp = ps_dec->u1_qp;
  203|  70.6k|            }
  204|       |
  205|  71.0k|            if(ps_dec->u1_enable_mb_info)
  ------------------
  |  Branch (205:16): [True: 0, False: 71.0k]
  ------------------
  206|      0|            {
  207|      0|                ih264d_populate_mb_info_map(ps_dec, ps_cur_mb_info, ps_cur_mb_info->u2_mbx << 1,
  208|      0|                                            ps_cur_mb_info->u2_mby << 1, ps_cur_deblk_mb->u1_mb_qp);
  209|      0|            }
  210|  71.0k|            if(u1_mbaff)
  ------------------
  |  Branch (210:16): [True: 0, False: 71.0k]
  ------------------
  211|      0|            {
  212|      0|                ih264d_update_mbaff_left_nnz(ps_dec, ps_cur_mb_info);
  213|      0|            }
  214|       |
  215|  71.0k|            if(ps_cur_mb_info->u1_topmb && u1_mbaff)
  ------------------
  |  Branch (215:16): [True: 71.0k, False: 0]
  |  Branch (215:44): [True: 0, False: 71.0k]
  ------------------
  216|      0|                uc_more_data_flag = 1;
  217|  71.0k|            else
  218|  71.0k|            {
  219|  71.0k|                uc_more_data_flag = ih264d_decode_terminate(&ps_dec->s_cab_dec_env, ps_bitstrm);
  220|  71.0k|                uc_more_data_flag = !uc_more_data_flag;
  221|  71.0k|                COPYTHECONTEXT("Decode Sliceterm", !uc_more_data_flag);
  222|  71.0k|            }
  223|       |
  224|  71.0k|            if(u1_mbaff)
  ------------------
  |  Branch (224:16): [True: 0, False: 71.0k]
  ------------------
  225|      0|            {
  226|      0|                if(!uc_more_data_flag && (0 == (i4_cur_mb_addr & 1)))
  ------------------
  |  Branch (226:20): [True: 0, False: 0]
  |  Branch (226:42): [True: 0, False: 0]
  ------------------
  227|      0|                {
  228|      0|                    return ERROR_EOB_FLUSHBITS_T;
  229|      0|                }
  230|      0|            }
  231|       |            /* Next macroblock information */
  232|  71.0k|            i4_cur_mb_addr++;
  233|       |            /* Store the colocated information */
  234|  71.0k|            {
  235|  71.0k|                mv_pred_t *ps_mv_nmb_start = ps_dec->ps_mv_cur + (u4_num_mbs << 4);
  236|  71.0k|                mv_pred_t s_mvPred = {{0, 0, 0, 0}, {-1, -1}, 0, 0};
  237|  71.0k|                ih264d_rep_mv_colz(ps_dec, &s_mvPred, ps_mv_nmb_start, 0,
  238|  71.0k|                                   (UWORD8) (ps_dec->u1_cur_mb_fld_dec_flag << 1), 4, 4);
  239|  71.0k|            }
  240|       |            /*if num _cores is set to 3,compute bs will be done in another thread*/
  241|  71.0k|            if(ps_dec->u4_num_cores < 3)
  ------------------
  |  Branch (241:16): [True: 71.0k, False: 0]
  ------------------
  242|  71.0k|            {
  243|  71.0k|                if(ps_dec->u4_app_disable_deblk_frm == 0)
  ------------------
  |  Branch (243:20): [True: 71.0k, False: 0]
  ------------------
  244|  71.0k|                    ps_svc_lyr_dec->pf_svc_compute_bs(ps_svc_lyr_dec, ps_cur_mb_info,
  245|  71.0k|                                                      (UWORD16) (u4_num_mbs >> u1_mbaff));
  246|  71.0k|            }
  247|  71.0k|            u4_num_mbs++;
  248|  71.0k|        }
  249|       |
  250|       |        /****************************************************************/
  251|       |        /* Check for End Of Row                                         */
  252|       |        /****************************************************************/
  253|      0|        u4_num_mbs_next = i2_pic_wdin_mbs - u2_mbx - 1;
  254|  71.0k|        u4_end_of_row = (!u4_num_mbs_next) && (!(u1_mbaff && (u4_num_mbs & 0x01)));
  ------------------
  |  Branch (254:25): [True: 10.2k, False: 60.7k]
  |  Branch (254:50): [True: 0, False: 10.2k]
  |  Branch (254:62): [True: 0, False: 0]
  ------------------
  255|  71.0k|        u4_tfr_n_mb =
  256|  71.0k|            (u4_num_mbs == ps_dec->u4_recon_mb_grp) || u4_end_of_row || (!uc_more_data_flag);
  ------------------
  |  Branch (256:13): [True: 9.67k, False: 61.3k]
  |  Branch (256:56): [True: 575, False: 60.7k]
  |  Branch (256:73): [True: 522, False: 60.2k]
  ------------------
  257|  71.0k|        ps_cur_mb_info->u1_end_of_slice = (!uc_more_data_flag);
  258|       |
  259|  71.0k|        if(u4_tfr_n_mb || (!uc_more_data_flag))
  ------------------
  |  Branch (259:12): [True: 10.7k, False: 60.2k]
  |  Branch (259:27): [True: 0, False: 60.2k]
  ------------------
  260|  10.7k|        {
  261|  10.7k|            if(ps_dec->u1_separate_parse)
  ------------------
  |  Branch (261:16): [True: 4.41k, False: 6.35k]
  ------------------
  262|  4.41k|            {
  263|  4.41k|                ih264d_parse_tfr_nmb(ps_dec, u4_mb_idx, u4_num_mbs, u4_num_mbs_next, u4_tfr_n_mb,
  264|  4.41k|                                     u4_end_of_row);
  265|  4.41k|                ps_dec->ps_nmb_info += u4_num_mbs;
  266|  4.41k|                ps_svc_lyr_dec->ps_svc_nmb_info += u4_num_mbs;
  267|  4.41k|            }
  268|  6.35k|            else
  269|  6.35k|            {
  270|  6.35k|                if(ps_svc_lyr_dec->u1_layer_identifier == TARGET_LAYER)
  ------------------
  |  |  110|  6.35k|#define TARGET_LAYER 2
  ------------------
  |  Branch (270:20): [True: 4.34k, False: 2.01k]
  ------------------
  271|  4.34k|                {
  272|  4.34k|                    ih264d_decode_recon_tfr_nmb(ps_dec, u4_mb_idx, u4_num_mbs, u4_num_mbs_next,
  273|  4.34k|                                                u4_tfr_n_mb, u4_end_of_row);
  274|  4.34k|                }
  275|  2.01k|                else
  276|  2.01k|                {
  277|  2.01k|                    isvcd_decode_recon_tfr_nmb_base_lyr(ps_svc_lyr_dec, u4_mb_idx, u4_num_mbs,
  278|  2.01k|                                                        u4_num_mbs_next, u4_tfr_n_mb,
  279|  2.01k|                                                        u4_end_of_row);
  280|  2.01k|                }
  281|  6.35k|            }
  282|  10.7k|            ps_dec->u4_total_mbs_coded += u4_num_mbs;
  283|  10.7k|            if(u4_tfr_n_mb) u4_num_mbs = 0;
  ------------------
  |  Branch (283:16): [True: 10.7k, False: 0]
  ------------------
  284|  10.7k|            u4_mb_idx = u4_num_mbs;
  285|  10.7k|            ps_dec->u4_mb_idx = u4_num_mbs;
  286|  10.7k|        }
  287|  71.0k|    } while(uc_more_data_flag);
  ------------------
  |  Branch (287:13): [True: 70.4k, False: 594]
  ------------------
  288|       |
  289|    722|    ps_dec->u4_num_mbs_cur_nmb = 0;
  290|    722|    ps_dec->ps_cur_slice->u4_mbs_in_slice = i4_cur_mb_addr - (u2_first_mb_in_slice << u1_mbaff);
  291|       |
  292|    722|    return ret;
  293|  3.45k|}
isvcd_parse_islice_data_cavlc:
  322|  2.12k|{
  323|  2.12k|    UWORD8 uc_more_data_flag;
  324|  2.12k|    UWORD32 u4_num_mbs, u4_mb_idx;
  325|  2.12k|    dec_mb_info_t *ps_cur_mb_info;
  326|  2.12k|    deblk_mb_t *ps_cur_deblk_mb;
  327|  2.12k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
  328|  2.12k|    dec_bit_stream_t *const ps_bitstrm = ps_dec->ps_bitstrm;
  329|  2.12k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
  330|  2.12k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
  331|  2.12k|    UWORD16 i2_pic_wdin_mbs = ps_dec->u2_frm_wd_in_mbs;
  332|  2.12k|    WORD32 i4_cur_mb_addr;
  333|  2.12k|    UWORD8 u1_mbaff;
  334|  2.12k|    UWORD32 u4_num_mbs_next, u4_end_of_row, u4_tfr_n_mb;
  335|  2.12k|    WORD32 ret = OK;
  ------------------
  |  |  114|  2.12k|#define OK        0
  ------------------
  336|       |
  337|  2.12k|    ps_dec->u1_qp = ps_slice->u1_slice_qp;
  338|  2.12k|    ih264d_update_qp(ps_dec, 0);
  339|  2.12k|    u1_mbaff = ps_slice->u1_mbaff_frame_flag;
  340|       |
  341|       |    /* initializations */
  342|  2.12k|    u4_mb_idx = ps_dec->u4_mb_idx;
  343|  2.12k|    u4_num_mbs = u4_mb_idx;
  344|       |
  345|  2.12k|    uc_more_data_flag = 1;
  346|  2.12k|    i4_cur_mb_addr = u2_first_mb_in_slice << u1_mbaff;
  347|  2.12k|    do
  348|  29.8k|    {
  349|  29.8k|        UWORD8 u1_mb_type;
  350|  29.8k|        ps_dec->pv_prev_mb_parse_tu_coeff_data = ps_dec->pv_parse_tu_coeff_data;
  351|  29.8k|        if(i4_cur_mb_addr > ps_dec->ps_cur_sps->u4_max_mb_addr)
  ------------------
  |  Branch (351:12): [True: 92, False: 29.7k]
  ------------------
  352|     92|        {
  353|     92|            break;
  354|     92|        }
  355|       |
  356|  29.7k|        ps_cur_mb_info = ps_dec->ps_nmb_info + u4_num_mbs;
  357|  29.7k|        ps_dec->u4_num_mbs_cur_nmb = u4_num_mbs;
  358|  29.7k|        ps_dec->u4_num_pmbair = (u4_num_mbs >> u1_mbaff);
  359|  29.7k|        ps_cur_mb_info->u1_end_of_slice = 0;
  360|       |
  361|       |        /***************************************************************/
  362|       |        /* Get the required information for decoding of MB             */
  363|       |        /* mb_x, mb_y , neighbour availablity,                         */
  364|       |        /***************************************************************/
  365|  29.7k|        ps_dec->pf_get_mb_info(ps_dec, i4_cur_mb_addr, ps_cur_mb_info, 0);
  366|       |
  367|       |        /***************************************************************/
  368|       |        /* Set the deblocking parameters for this MB                   */
  369|       |        /***************************************************************/
  370|  29.7k|        ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + u4_num_mbs;
  371|  29.7k|        if(ps_dec->u4_app_disable_deblk_frm == 0)
  ------------------
  |  Branch (371:12): [True: 29.7k, False: 0]
  ------------------
  372|  29.7k|            ih264d_set_deblocking_parameters(ps_cur_deblk_mb, ps_slice,
  373|  29.7k|                                             ps_dec->u1_mb_ngbr_availablity,
  374|  29.7k|                                             ps_dec->u1_cur_mb_fld_dec_flag);
  375|       |
  376|  29.7k|        ps_cur_deblk_mb->u1_mb_type = ps_cur_deblk_mb->u1_mb_type | D_INTRA_MB;
  ------------------
  |  |  382|  29.7k|#define D_INTRA_MB        1
  ------------------
  377|       |
  378|       |        /**************************************************************/
  379|       |        /* Macroblock Layer Begins, Decode the u1_mb_type             */
  380|       |        /**************************************************************/
  381|       |        /* Inlined ih264d_uev */
  382|  29.7k|        {
  383|  29.7k|            UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
  384|  29.7k|            UWORD32 u4_word, u4_ldz, u4_temp;
  385|       |
  386|       |            /***************************************************************/
  387|       |            /* Find leading zeros in next 32 bits                          */
  388|       |            /***************************************************************/
  389|  29.7k|            NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  29.7k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  29.7k|{                                                                           \
  |  |  152|  29.7k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  29.7k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  29.7k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  29.7k|                                                                            \
  |  |  156|  29.7k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  29.7k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 29.0k, False: 682]
  |  |  ------------------
  |  |  158|  29.7k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  29.0k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  29.7k|}
  ------------------
  390|  29.7k|            u4_ldz = CLZ(u4_word);
  391|       |            /* Flush the ps_bitstrm */
  392|  29.7k|            u4_bitstream_offset += (u4_ldz + 1);
  393|       |            /* Read the suffix from the ps_bitstrm */
  394|  29.7k|            u4_word = 0;
  395|  29.7k|            if(u4_ldz) GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  19.0k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  19.0k|{                                                                           \
  |  |  122|  19.0k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  19.0k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  19.0k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  19.0k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  19.0k|                                                                            \
  |  |  127|  19.0k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 18.6k, False: 380]
  |  |  ------------------
  |  |  128|  19.0k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  18.6k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  19.0k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  19.0k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  19.0k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  19.0k|}                                                                           \
  ------------------
  |  Branch (395:16): [True: 19.0k, False: 10.6k]
  ------------------
  396|  29.7k|            *pu4_bitstrm_ofst = u4_bitstream_offset;
  397|  29.7k|            u4_temp = ((1 << u4_ldz) + u4_word - 1);
  398|  29.7k|            if(u4_temp > 25) return ERROR_MB_TYPE;
  ------------------
  |  Branch (398:16): [True: 140, False: 29.6k]
  ------------------
  399|  29.6k|            u1_mb_type = u4_temp;
  400|  29.6k|        }
  401|       |        /* Inlined ih264d_uev */
  402|      0|        ps_cur_mb_info->u1_mb_type = u1_mb_type;
  403|  29.6k|        COPYTHECONTEXT("u1_mb_type", u1_mb_type);
  404|       |
  405|       |        /**************************************************************/
  406|       |        /* Parse Macroblock data                                      */
  407|       |        /**************************************************************/
  408|  29.6k|        if(25 == u1_mb_type)
  ------------------
  |  Branch (408:12): [True: 110, False: 29.4k]
  ------------------
  409|    110|        {
  410|       |            /* I_PCM_MB */
  411|    110|            ps_cur_mb_info->ps_curmb->u1_mb_type = I_PCM_MB;
  ------------------
  |  |  423|    110|#define I_PCM_MB    6
  ------------------
  412|    110|            ret = ih264d_parse_ipcm_mb(ps_dec, ps_cur_mb_info, u4_num_mbs);
  413|    110|            if(ret != OK) return ret;
  ------------------
  |  |  114|    110|#define OK        0
  ------------------
  |  Branch (413:16): [True: 0, False: 110]
  ------------------
  414|    110|            ps_cur_deblk_mb->u1_mb_qp = 0;
  415|    110|        }
  416|  29.4k|        else
  417|  29.4k|        {
  418|  29.4k|            ret = ih264d_parse_imb_cavlc(ps_dec, ps_cur_mb_info, u4_num_mbs, u1_mb_type);
  419|  29.4k|            if(ret != OK) return ret;
  ------------------
  |  |  114|  29.4k|#define OK        0
  ------------------
  |  Branch (419:16): [True: 1.70k, False: 27.7k]
  ------------------
  420|  27.7k|            ps_cur_deblk_mb->u1_mb_qp = ps_dec->u1_qp;
  421|  27.7k|        }
  422|  27.9k|        if(ps_dec->u1_enable_mb_info)
  ------------------
  |  Branch (422:12): [True: 0, False: 27.9k]
  ------------------
  423|      0|        {
  424|      0|            ih264d_populate_mb_info_map(ps_dec, ps_cur_mb_info, ps_cur_mb_info->u2_mbx << 1,
  425|      0|                                        ps_cur_mb_info->u2_mby << 1, ps_cur_deblk_mb->u1_mb_qp);
  426|      0|        }
  427|  27.9k|        uc_more_data_flag = MORE_RBSP_DATA(ps_bitstrm);
  ------------------
  |  |   97|  27.9k|    CHECK_BITS_SUFFICIENT(ps_bitstrm, 1)
  |  |  ------------------
  |  |  |  |   95|  27.9k|  (ps_bitstrm->u4_ofst + bits_to_read <= ps_bitstrm->u4_max_ofst)
  |  |  ------------------
  ------------------
  428|  27.9k|        if(u1_mbaff)
  ------------------
  |  Branch (428:12): [True: 0, False: 27.9k]
  ------------------
  429|      0|        {
  430|      0|            ih264d_update_mbaff_left_nnz(ps_dec, ps_cur_mb_info);
  431|      0|            if(!uc_more_data_flag && (0 == (i4_cur_mb_addr & 1)))
  ------------------
  |  Branch (431:16): [True: 0, False: 0]
  |  Branch (431:38): [True: 0, False: 0]
  ------------------
  432|      0|            {
  433|      0|                return ERROR_EOB_FLUSHBITS_T;
  434|      0|            }
  435|      0|        }
  436|       |        /**************************************************************/
  437|       |        /* Get next Macroblock address                                */
  438|       |        /**************************************************************/
  439|  27.9k|        i4_cur_mb_addr++;
  440|       |        /* Store the colocated information */
  441|  27.9k|        {
  442|  27.9k|            mv_pred_t *ps_mv_nmb_start = ps_dec->ps_mv_cur + (u4_num_mbs << 4);
  443|  27.9k|            mv_pred_t s_mvPred = {{0, 0, 0, 0}, {-1, -1}, 0, 0};
  444|  27.9k|            ih264d_rep_mv_colz(ps_dec, &s_mvPred, ps_mv_nmb_start, 0,
  445|  27.9k|                               (UWORD8) (ps_dec->u1_cur_mb_fld_dec_flag << 1), 4, 4);
  446|  27.9k|        }
  447|       |
  448|       |        /*if num _cores is set to 3,compute bs will be done in another thread*/
  449|  27.9k|        if(ps_dec->u4_num_cores < 3)
  ------------------
  |  Branch (449:12): [True: 27.9k, False: 0]
  ------------------
  450|  27.9k|        {
  451|  27.9k|            if(ps_dec->u4_app_disable_deblk_frm == 0)
  ------------------
  |  Branch (451:16): [True: 27.9k, False: 0]
  ------------------
  452|  27.9k|                ps_svc_lyr_dec->pf_svc_compute_bs(ps_svc_lyr_dec, ps_cur_mb_info,
  453|  27.9k|                                                  (UWORD16) (u4_num_mbs >> u1_mbaff));
  454|  27.9k|        }
  455|  27.9k|        u4_num_mbs++;
  456|       |
  457|       |        /****************************************************************/
  458|       |        /* Check for End Of Row                                         */
  459|       |        /****************************************************************/
  460|  27.9k|        u4_num_mbs_next = i2_pic_wdin_mbs - ps_dec->u2_mbx - 1;
  461|  27.9k|        u4_end_of_row = (!u4_num_mbs_next) && (!(u1_mbaff && (u4_num_mbs & 0x01)));
  ------------------
  |  Branch (461:25): [True: 5.27k, False: 22.6k]
  |  Branch (461:50): [True: 0, False: 5.27k]
  |  Branch (461:62): [True: 0, False: 0]
  ------------------
  462|  27.9k|        u4_tfr_n_mb =
  463|  27.9k|            (u4_num_mbs == ps_dec->u4_recon_mb_grp) || u4_end_of_row || (!uc_more_data_flag);
  ------------------
  |  Branch (463:13): [True: 5.19k, False: 22.7k]
  |  Branch (463:56): [True: 82, False: 22.6k]
  |  Branch (463:73): [True: 166, False: 22.4k]
  ------------------
  464|  27.9k|        ps_cur_mb_info->u1_end_of_slice = (!uc_more_data_flag);
  465|       |
  466|  27.9k|        if(u4_tfr_n_mb || (!uc_more_data_flag))
  ------------------
  |  Branch (466:12): [True: 5.44k, False: 22.4k]
  |  Branch (466:27): [True: 0, False: 22.4k]
  ------------------
  467|  5.44k|        {
  468|  5.44k|            if(ps_dec->u1_separate_parse)
  ------------------
  |  Branch (468:16): [True: 1.89k, False: 3.55k]
  ------------------
  469|  1.89k|            {
  470|  1.89k|                ih264d_parse_tfr_nmb(ps_dec, u4_mb_idx, u4_num_mbs, u4_num_mbs_next, u4_tfr_n_mb,
  471|  1.89k|                                     u4_end_of_row);
  472|  1.89k|                ps_dec->ps_nmb_info += u4_num_mbs;
  473|  1.89k|                ps_svc_lyr_dec->ps_svc_nmb_info += u4_num_mbs;
  474|  1.89k|            }
  475|  3.55k|            else
  476|  3.55k|            {
  477|  3.55k|                if(ps_svc_lyr_dec->u1_layer_identifier == TARGET_LAYER)
  ------------------
  |  |  110|  3.55k|#define TARGET_LAYER 2
  ------------------
  |  Branch (477:20): [True: 3.32k, False: 224]
  ------------------
  478|  3.32k|                {
  479|  3.32k|                    ih264d_decode_recon_tfr_nmb(ps_dec, u4_mb_idx, u4_num_mbs, u4_num_mbs_next,
  480|  3.32k|                                                u4_tfr_n_mb, u4_end_of_row);
  481|  3.32k|                }
  482|    224|                else
  483|    224|                {
  484|    224|                    isvcd_decode_recon_tfr_nmb_base_lyr(ps_svc_lyr_dec, u4_mb_idx, u4_num_mbs,
  485|    224|                                                        u4_num_mbs_next, u4_tfr_n_mb,
  486|    224|                                                        u4_end_of_row);
  487|    224|                }
  488|  3.55k|            }
  489|  5.44k|            ps_dec->u4_total_mbs_coded += u4_num_mbs;
  490|  5.44k|            if(u4_tfr_n_mb) u4_num_mbs = 0;
  ------------------
  |  Branch (490:16): [True: 5.44k, False: 0]
  ------------------
  491|  5.44k|            u4_mb_idx = u4_num_mbs;
  492|  5.44k|            ps_dec->u4_mb_idx = u4_num_mbs;
  493|  5.44k|        }
  494|  27.9k|    } while(uc_more_data_flag);
  ------------------
  |  Branch (494:13): [True: 27.7k, False: 182]
  ------------------
  495|       |
  496|    274|    ps_dec->u4_num_mbs_cur_nmb = 0;
  497|    274|    ps_dec->ps_cur_slice->u4_mbs_in_slice = i4_cur_mb_addr - (u2_first_mb_in_slice << u1_mbaff);
  498|       |
  499|    274|    return ret;
  500|  2.12k|}
isvcd_parse_imb_cavlc:
  517|   206k|{
  518|   206k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
  519|   206k|    WORD32 i4_delta_qp;
  520|   206k|    UWORD32 u4_temp;
  521|   206k|    UWORD32 ui_is_top_mb_available;
  522|   206k|    UWORD32 ui_is_left_mb_available;
  523|   206k|    UWORD32 u4_cbp;
  524|   206k|    UWORD32 u4_offset;
  525|   206k|    UWORD32 *pu4_bitstrm_buf;
  526|   206k|    WORD32 ret;
  527|   206k|    dec_bit_stream_t *const ps_bitstrm = ps_dec->ps_bitstrm;
  528|   206k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
  529|   206k|    UNUSED(u4_mb_num);
  ------------------
  |  |   45|   206k|#define UNUSED(x) ((void)(x))
  ------------------
  530|       |
  531|   206k|    ps_cur_mb_info->u1_tran_form8x8 = 0;
  532|   206k|    ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
  533|   206k|    ps_cur_mb_info->u1_yuv_dc_block_flag = 0;
  534|   206k|    u4_temp = ps_dec->u1_mb_ngbr_availablity;
  535|   206k|    ui_is_top_mb_available = BOOLEAN(u4_temp & TOP_MB_AVAILABLE_MASK);
  ------------------
  |  |   61|   206k|#define BOOLEAN(x) (!!(x))
  ------------------
  536|   206k|    ui_is_left_mb_available = BOOLEAN(u4_temp & LEFT_MB_AVAILABLE_MASK);
  ------------------
  |  |   61|   206k|#define BOOLEAN(x) (!!(x))
  ------------------
  537|   206k|    pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
  538|       |
  539|   206k|    ps_cur_mb_info->ps_curmb->u1_mb_type = P_MB;
  ------------------
  |  |  419|   206k|#define P_MB        2
  ------------------
  540|   206k|    if(!ps_svc_cur_mb_info->u1_base_mode_flag)
  ------------------
  |  Branch (540:8): [True: 14.5k, False: 191k]
  ------------------
  541|  14.5k|    {
  542|  14.5k|        if(u1_mb_type == I_4x4_MB)
  ------------------
  |  |  417|  14.5k|#define I_4x4_MB    0
  ------------------
  |  Branch (542:12): [True: 5.83k, False: 8.74k]
  ------------------
  543|  5.83k|        {
  544|  5.83k|            ps_cur_mb_info->ps_curmb->u1_mb_type = I_4x4_MB;
  ------------------
  |  |  417|  5.83k|#define I_4x4_MB    0
  ------------------
  545|  5.83k|            u4_offset = 0;
  546|       |
  547|       |            /*--------------------------------------------------------------------*/
  548|       |            /* Read transform_size_8x8_flag if present                            */
  549|       |            /*--------------------------------------------------------------------*/
  550|  5.83k|            if(ps_dec->ps_cur_pps->i4_transform_8x8_mode_flag)
  ------------------
  |  Branch (550:16): [True: 4.68k, False: 1.15k]
  ------------------
  551|  4.68k|            {
  552|  4.68k|                ps_cur_mb_info->u1_tran_form8x8 = ih264d_get_bit_h264(ps_bitstrm);
  553|  4.68k|                COPYTHECONTEXT("transform_size_8x8_flag", ps_cur_mb_info->u1_tran_form8x8);
  554|  4.68k|                ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = ps_cur_mb_info->u1_tran_form8x8;
  555|  4.68k|            }
  556|       |
  557|       |            /*--------------------------------------------------------------------*/
  558|       |            /* Read the IntraPrediction modes for LUMA                            */
  559|       |            /*--------------------------------------------------------------------*/
  560|  5.83k|            if(!ps_cur_mb_info->u1_tran_form8x8)
  ------------------
  |  Branch (560:16): [True: 1.50k, False: 4.33k]
  ------------------
  561|  1.50k|            {
  562|  1.50k|                UWORD8 *pu1_temp;
  563|  1.50k|                ih264d_read_intra_pred_modes(ps_dec, ((UWORD8 *) ps_dec->pv_parse_tu_coeff_data),
  564|  1.50k|                                             ((UWORD8 *) ps_dec->pv_parse_tu_coeff_data + 16),
  565|  1.50k|                                             ps_cur_mb_info->u1_tran_form8x8);
  566|  1.50k|                pu1_temp = (UWORD8 *) ps_dec->pv_parse_tu_coeff_data;
  567|  1.50k|                pu1_temp += 32;
  568|  1.50k|                ps_dec->pv_parse_tu_coeff_data = (void *) pu1_temp;
  569|  1.50k|            }
  570|  4.33k|            else
  571|  4.33k|            {
  572|  4.33k|                UWORD8 *pu1_temp;
  573|  4.33k|                ih264d_read_intra_pred_modes(ps_dec, ((UWORD8 *) ps_dec->pv_parse_tu_coeff_data),
  574|  4.33k|                                             ((UWORD8 *) ps_dec->pv_parse_tu_coeff_data + 4),
  575|  4.33k|                                             ps_cur_mb_info->u1_tran_form8x8);
  576|  4.33k|                pu1_temp = (UWORD8 *) ps_dec->pv_parse_tu_coeff_data;
  577|  4.33k|                pu1_temp += 8;
  578|  4.33k|                ps_dec->pv_parse_tu_coeff_data = (void *) pu1_temp;
  579|  4.33k|            }
  580|       |            /*--------------------------------------------------------------------*/
  581|       |            /* Read the IntraPrediction mode for CHROMA                           */
  582|       |            /*--------------------------------------------------------------------*/
  583|       |            /* Inlined ih264d_uev */
  584|  5.83k|            {
  585|  5.83k|                UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
  586|  5.83k|                UWORD32 u4_word, u4_ldz, u4_temp;
  587|       |
  588|       |                /***************************************************************/
  589|       |                /* Find leading zeros in next 32 bits                          */
  590|       |                /***************************************************************/
  591|  5.83k|                NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  5.83k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  5.83k|{                                                                           \
  |  |  152|  5.83k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  5.83k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  5.83k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  5.83k|                                                                            \
  |  |  156|  5.83k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  5.83k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 5.64k, False: 191]
  |  |  ------------------
  |  |  158|  5.83k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  5.64k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  5.83k|}
  ------------------
  592|  5.83k|                u4_ldz = CLZ(u4_word);
  593|       |                /* Flush the ps_bitstrm */
  594|  5.83k|                u4_bitstream_offset += (u4_ldz + 1);
  595|       |                /* Read the suffix from the ps_bitstrm */
  596|  5.83k|                u4_word = 0;
  597|  5.83k|                if(u4_ldz)
  ------------------
  |  Branch (597:20): [True: 1.33k, False: 4.50k]
  ------------------
  598|  1.33k|                {
  599|  1.33k|                    GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  1.33k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  1.33k|{                                                                           \
  |  |  122|  1.33k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  1.33k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  1.33k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  1.33k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  1.33k|                                                                            \
  |  |  127|  1.33k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 983, False: 354]
  |  |  ------------------
  |  |  128|  1.33k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|    983|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  1.33k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  1.33k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  1.33k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  1.33k|}                                                                           \
  ------------------
  600|  1.33k|                }
  601|  5.83k|                *pu4_bitstrm_ofst = u4_bitstream_offset;
  602|  5.83k|                u4_temp = ((1 << u4_ldz) + u4_word - 1);
  603|  5.83k|                if(u4_temp > 3)
  ------------------
  |  Branch (603:20): [True: 288, False: 5.55k]
  ------------------
  604|    288|                {
  605|    288|                    return ERROR_CHROMA_PRED_MODE;
  606|    288|                }
  607|  5.55k|                ps_cur_mb_info->u1_chroma_pred_mode = u4_temp;
  608|  5.55k|                COPYTHECONTEXT("intra_chroma_pred_mode", ps_cur_mb_info->u1_chroma_pred_mode);
  609|  5.55k|            }
  610|       |            /*--------------------------------------------------------------------*/
  611|       |            /* Read the Coded block pattern                                       */
  612|       |            /*--------------------------------------------------------------------*/
  613|      0|            {
  614|  5.55k|                UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
  615|  5.55k|                UWORD32 u4_word, u4_ldz;
  616|       |
  617|       |                /***************************************************************/
  618|       |                /* Find leading zeros in next 32 bits                          */
  619|       |                /***************************************************************/
  620|  5.55k|                NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  5.55k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  5.55k|{                                                                           \
  |  |  152|  5.55k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  5.55k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  5.55k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  5.55k|                                                                            \
  |  |  156|  5.55k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  5.55k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 5.20k, False: 341]
  |  |  ------------------
  |  |  158|  5.55k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  5.20k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  5.55k|}
  ------------------
  621|  5.55k|                u4_ldz = CLZ(u4_word);
  622|       |                /* Flush the ps_bitstrm */
  623|  5.55k|                u4_bitstream_offset += (u4_ldz + 1);
  624|       |                /* Read the suffix from the ps_bitstrm */
  625|  5.55k|                u4_word = 0;
  626|  5.55k|                if(u4_ldz)
  ------------------
  |  Branch (626:20): [True: 1.99k, False: 3.55k]
  ------------------
  627|  1.99k|                {
  628|  1.99k|                    GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  1.99k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  1.99k|{                                                                           \
  |  |  122|  1.99k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  1.99k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  1.99k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  1.99k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  1.99k|                                                                            \
  |  |  127|  1.99k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 1.87k, False: 120]
  |  |  ------------------
  |  |  128|  1.99k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  1.87k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  1.99k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  1.99k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  1.99k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  1.99k|}                                                                           \
  ------------------
  629|  1.99k|                }
  630|  5.55k|                *pu4_bitstrm_ofst = u4_bitstream_offset;
  631|  5.55k|                u4_cbp = ((1 << u4_ldz) + u4_word - 1);
  632|  5.55k|            }
  633|  5.55k|            if(u4_cbp > 47)
  ------------------
  |  Branch (633:16): [True: 203, False: 5.34k]
  ------------------
  634|    203|            {
  635|    203|                return ERROR_CBP;
  636|    203|            }
  637|       |
  638|  5.34k|            u4_cbp = gau1_ih264d_cbp_table[u4_cbp][0];
  639|  5.34k|            COPYTHECONTEXT("coded_block_pattern", u4_cbp);
  640|  5.34k|            ps_cur_mb_info->u1_cbp = u4_cbp;
  641|       |
  642|       |            /*--------------------------------------------------------------------*/
  643|       |            /* Read mb_qp_delta                                                   */
  644|       |            /*--------------------------------------------------------------------*/
  645|  5.34k|            if(ps_cur_mb_info->u1_cbp)
  ------------------
  |  Branch (645:16): [True: 4.63k, False: 713]
  ------------------
  646|  4.63k|            {
  647|  4.63k|                UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
  648|  4.63k|                UWORD32 u4_word, u4_ldz, u4_abs_val;
  649|       |
  650|       |                /***************************************************************/
  651|       |                /* Find leading zeros in next 32 bits                          */
  652|       |                /***************************************************************/
  653|  4.63k|                NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  4.63k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  4.63k|{                                                                           \
  |  |  152|  4.63k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  4.63k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  4.63k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  4.63k|                                                                            \
  |  |  156|  4.63k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  4.63k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 4.35k, False: 277]
  |  |  ------------------
  |  |  158|  4.63k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  4.35k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  4.63k|}
  ------------------
  654|  4.63k|                u4_ldz = CLZ(u4_word);
  655|       |
  656|       |                /* Flush the ps_bitstrm */
  657|  4.63k|                u4_bitstream_offset += (u4_ldz + 1);
  658|       |
  659|       |                /* Read the suffix from the ps_bitstrm */
  660|  4.63k|                u4_word = 0;
  661|  4.63k|                if(u4_ldz)
  ------------------
  |  Branch (661:20): [True: 2.08k, False: 2.54k]
  ------------------
  662|  2.08k|                {
  663|  2.08k|                    GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  2.08k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  2.08k|{                                                                           \
  |  |  122|  2.08k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  2.08k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  2.08k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  2.08k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  2.08k|                                                                            \
  |  |  127|  2.08k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 1.74k, False: 346]
  |  |  ------------------
  |  |  128|  2.08k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  1.74k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  2.08k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  2.08k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  2.08k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  2.08k|}                                                                           \
  ------------------
  664|  2.08k|                }
  665|       |
  666|  4.63k|                *pu4_bitstrm_ofst = u4_bitstream_offset;
  667|  4.63k|                u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1;
  668|       |
  669|  4.63k|                if(u4_word & 0x1)
  ------------------
  |  Branch (669:20): [True: 1.59k, False: 3.03k]
  ------------------
  670|  1.59k|                {
  671|  1.59k|                    i4_delta_qp = (-(WORD32) u4_abs_val);
  672|  1.59k|                }
  673|  3.03k|                else
  674|  3.03k|                {
  675|  3.03k|                    i4_delta_qp = (u4_abs_val);
  676|  3.03k|                }
  677|       |
  678|  4.63k|                if((i4_delta_qp < -26) || (i4_delta_qp > 25))
  ------------------
  |  Branch (678:20): [True: 160, False: 4.47k]
  |  Branch (678:43): [True: 291, False: 4.18k]
  ------------------
  679|    451|                {
  680|    451|                    return ERROR_INV_RANGE_QP_T;
  681|    451|                }
  682|       |
  683|  4.18k|                COPYTHECONTEXT("mb_qp_delta", i1_delta_qp);
  684|  4.18k|                if(i4_delta_qp != 0)
  ------------------
  |  Branch (684:20): [True: 1.63k, False: 2.54k]
  ------------------
  685|  1.63k|                {
  686|  1.63k|                    ret = ih264d_update_qp(ps_dec, (WORD8) i4_delta_qp);
  687|  1.63k|                    if(ret != OK) return ret;
  ------------------
  |  |  114|  1.63k|#define OK        0
  ------------------
  |  Branch (687:24): [True: 0, False: 1.63k]
  ------------------
  688|  1.63k|                }
  689|  4.18k|            }
  690|  5.34k|        }
  691|  8.74k|        else
  692|  8.74k|        {
  693|  8.74k|            u4_offset = 1;
  694|  8.74k|            ps_cur_mb_info->ps_curmb->u1_mb_type = I_16x16_MB;
  ------------------
  |  |  418|  8.74k|#define I_16x16_MB  1
  ------------------
  695|       |            /*-------------------------------------------------------------------*/
  696|       |            /* Read the IntraPrediction mode for CHROMA                          */
  697|       |            /*-------------------------------------------------------------------*/
  698|  8.74k|            {
  699|  8.74k|                UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
  700|  8.74k|                UWORD32 u4_word, u4_ldz;
  701|       |
  702|       |                /***************************************************************/
  703|       |                /* Find leading zeros in next 32 bits                          */
  704|       |                /***************************************************************/
  705|  8.74k|                NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  8.74k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  8.74k|{                                                                           \
  |  |  152|  8.74k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  8.74k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  8.74k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  8.74k|                                                                            \
  |  |  156|  8.74k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  8.74k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 7.94k, False: 799]
  |  |  ------------------
  |  |  158|  8.74k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  7.94k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  8.74k|}
  ------------------
  706|  8.74k|                u4_ldz = CLZ(u4_word);
  707|       |                /* Flush the ps_bitstrm */
  708|  8.74k|                u4_bitstream_offset += (u4_ldz + 1);
  709|       |                /* Read the suffix from the ps_bitstrm */
  710|  8.74k|                u4_word = 0;
  711|  8.74k|                if(u4_ldz)
  ------------------
  |  Branch (711:20): [True: 3.94k, False: 4.80k]
  ------------------
  712|  3.94k|                {
  713|  3.94k|                    GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  3.94k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  3.94k|{                                                                           \
  |  |  122|  3.94k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  3.94k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  3.94k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  3.94k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  3.94k|                                                                            \
  |  |  127|  3.94k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 3.54k, False: 400]
  |  |  ------------------
  |  |  128|  3.94k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  3.54k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  3.94k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  3.94k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  3.94k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  3.94k|}                                                                           \
  ------------------
  714|  3.94k|                }
  715|  8.74k|                *pu4_bitstrm_ofst = u4_bitstream_offset;
  716|  8.74k|                u4_temp = ((1 << u4_ldz) + u4_word - 1);
  717|       |
  718|       |                /* Inlined ih264d_uev */
  719|  8.74k|                if(u4_temp > 3)
  ------------------
  |  Branch (719:20): [True: 497, False: 8.24k]
  ------------------
  720|    497|                {
  721|    497|                    return ERROR_CHROMA_PRED_MODE;
  722|    497|                }
  723|  8.24k|                ps_cur_mb_info->u1_chroma_pred_mode = u4_temp;
  724|  8.24k|                COPYTHECONTEXT("intra_chroma_pred_mode", ps_cur_mb_info->u1_chroma_pred_mode);
  725|  8.24k|            }
  726|       |            /*-------------------------------------------------------------------*/
  727|       |            /* Read the Coded block pattern                                      */
  728|       |            /*-------------------------------------------------------------------*/
  729|      0|            u4_cbp = gau1_ih264d_cbp_tab[(u1_mb_type - 1) >> 2];
  730|  8.24k|            ps_cur_mb_info->u1_cbp = u4_cbp;
  731|       |
  732|       |            /*-------------------------------------------------------------------*/
  733|       |            /* Read mb_qp_delta                                                  */
  734|       |            /*-------------------------------------------------------------------*/
  735|  8.24k|            {
  736|  8.24k|                UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
  737|  8.24k|                UWORD32 u4_word, u4_ldz, u4_abs_val;
  738|       |
  739|       |                /***************************************************************/
  740|       |                /* Find leading zeros in next 32 bits                          */
  741|       |                /***************************************************************/
  742|  8.24k|                NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  8.24k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  8.24k|{                                                                           \
  |  |  152|  8.24k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  8.24k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  8.24k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  8.24k|                                                                            \
  |  |  156|  8.24k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  8.24k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 7.92k, False: 321]
  |  |  ------------------
  |  |  158|  8.24k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  7.92k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  8.24k|}
  ------------------
  743|  8.24k|                u4_ldz = CLZ(u4_word);
  744|       |
  745|       |                /* Flush the ps_bitstrm */
  746|  8.24k|                u4_bitstream_offset += (u4_ldz + 1);
  747|       |
  748|       |                /* Read the suffix from the ps_bitstrm */
  749|  8.24k|                u4_word = 0;
  750|  8.24k|                if(u4_ldz) GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  2.56k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  2.56k|{                                                                           \
  |  |  122|  2.56k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  2.56k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  2.56k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  2.56k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  2.56k|                                                                            \
  |  |  127|  2.56k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 2.23k, False: 337]
  |  |  ------------------
  |  |  128|  2.56k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  2.23k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  2.56k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  2.56k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  2.56k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  2.56k|}                                                                           \
  ------------------
  |  Branch (750:20): [True: 2.56k, False: 5.67k]
  ------------------
  751|       |
  752|  8.24k|                *pu4_bitstrm_ofst = u4_bitstream_offset;
  753|  8.24k|                u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1;
  754|       |
  755|  8.24k|                if(u4_word & 0x1)
  ------------------
  |  Branch (755:20): [True: 1.08k, False: 7.15k]
  ------------------
  756|  1.08k|                    i4_delta_qp = (-(WORD32) u4_abs_val);
  757|  7.15k|                else
  758|  7.15k|                    i4_delta_qp = (u4_abs_val);
  759|       |
  760|  8.24k|                if((i4_delta_qp < -26) || (i4_delta_qp > 25)) return ERROR_INV_RANGE_QP_T;
  ------------------
  |  Branch (760:20): [True: 70, False: 8.17k]
  |  Branch (760:43): [True: 56, False: 8.12k]
  ------------------
  761|  8.24k|            }
  762|       |            /* inlinined ih264d_sev */
  763|  8.12k|            COPYTHECONTEXT("Delta quant", i1_delta_qp);
  764|       |
  765|  8.12k|            if(i4_delta_qp != 0)
  ------------------
  |  Branch (765:16): [True: 2.44k, False: 5.67k]
  ------------------
  766|  2.44k|            {
  767|  2.44k|                ret = ih264d_update_qp(ps_dec, (WORD8) i4_delta_qp);
  768|  2.44k|                if(ret != OK) return ret;
  ------------------
  |  |  114|  2.44k|#define OK        0
  ------------------
  |  Branch (768:20): [True: 0, False: 2.44k]
  ------------------
  769|  2.44k|            }
  770|       |
  771|  8.12k|            {
  772|  8.12k|                WORD16 i_scaleFactor;
  773|  8.12k|                UWORD32 ui_N = 0;
  774|  8.12k|                WORD16 *pi2_scale_matrix_ptr;
  775|       |                /*******************************************************************/
  776|       |                /* for luma DC coefficients the scaling is done during the parsing */
  777|       |                /* to preserve the precision                                       */
  778|       |                /*******************************************************************/
  779|  8.12k|                if(ps_dec->s_high_profile.u1_scaling_present)
  ------------------
  |  Branch (779:20): [True: 2.89k, False: 5.22k]
  ------------------
  780|  2.89k|                {
  781|  2.89k|                    pi2_scale_matrix_ptr = ps_dec->s_high_profile.i2_scalinglist4x4[0];
  782|  2.89k|                }
  783|  5.22k|                else
  784|  5.22k|                {
  785|  5.22k|                    i_scaleFactor = 16;
  786|  5.22k|                    pi2_scale_matrix_ptr = &i_scaleFactor;
  787|  5.22k|                }
  788|       |
  789|       |                /*---------------------------------------------------------------*/
  790|       |                /* Decode DC coefficients                                        */
  791|       |                /*---------------------------------------------------------------*/
  792|       |                /*---------------------------------------------------------------*/
  793|       |                /* Calculation of N                                              */
  794|       |                /*---------------------------------------------------------------*/
  795|  8.12k|                if(ui_is_left_mb_available)
  ------------------
  |  Branch (795:20): [True: 5.25k, False: 2.86k]
  ------------------
  796|  5.25k|                {
  797|  5.25k|                    if(ui_is_top_mb_available)
  ------------------
  |  Branch (797:24): [True: 4.40k, False: 844]
  ------------------
  798|  4.40k|                    {
  799|  4.40k|                        ui_N = ((ps_cur_mb_info->ps_top_mb->pu1_nnz_y[0] +
  800|  4.40k|                                 ps_dec->pu1_left_nnz_y[0] + 1) >>
  801|  4.40k|                                1);
  802|  4.40k|                    }
  803|    844|                    else
  804|    844|                    {
  805|    844|                        ui_N = ps_dec->pu1_left_nnz_y[0];
  806|    844|                    }
  807|  5.25k|                }
  808|  2.86k|                else if(ui_is_top_mb_available)
  ------------------
  |  Branch (808:25): [True: 2.03k, False: 837]
  ------------------
  809|  2.03k|                {
  810|  2.03k|                    ui_N = ps_cur_mb_info->ps_top_mb->pu1_nnz_y[0];
  811|  2.03k|                }
  812|       |
  813|  8.12k|                {
  814|  8.12k|                    WORD16 pi2_dc_coef[16] = {0};
  815|  8.12k|                    WORD32 pi4_tmp[16] = {0};
  816|  8.12k|                    tu_sblk4x4_coeff_data_t *ps_tu_4x4 =
  817|  8.12k|                        (tu_sblk4x4_coeff_data_t *) ps_dec->pv_parse_tu_coeff_data;
  818|  8.12k|                    WORD16 *pi2_coeff_block = (WORD16 *) ps_dec->pv_parse_tu_coeff_data;
  819|  8.12k|                    UWORD32 u4_num_coeff;
  820|  8.12k|                    ps_tu_4x4->u2_sig_coeff_map = 0;
  821|  8.12k|                    ret = ps_dec->pf_cavlc_parse4x4coeff[(ui_N > 7)](pi2_dc_coef, 0, ui_N, ps_dec,
  822|  8.12k|                                                                     &u4_num_coeff);
  823|  8.12k|                    if(ret != OK) return ret;
  ------------------
  |  |  114|  8.12k|#define OK        0
  ------------------
  |  Branch (823:24): [True: 93, False: 8.02k]
  ------------------
  824|       |
  825|  8.02k|                    if(EXCEED_OFFSET(ps_bitstrm)) return ERROR_EOB_TERMINATE_T;
  ------------------
  |  |   93|  8.02k|  (ps_bitstrm->u4_ofst > ps_bitstrm->u4_max_ofst)
  |  |  ------------------
  |  |  |  Branch (93:3): [True: 121, False: 7.90k]
  |  |  ------------------
  ------------------
  826|  7.90k|                    if(ps_tu_4x4->u2_sig_coeff_map)
  ------------------
  |  Branch (826:24): [True: 2.29k, False: 5.61k]
  ------------------
  827|  2.29k|                    {
  828|  2.29k|                        memset(pi2_dc_coef, 0, sizeof(pi2_dc_coef));
  829|  2.29k|                        ih264d_unpack_coeff4x4_dc_4x4blk(ps_tu_4x4, pi2_dc_coef,
  830|  2.29k|                                                         ps_dec->pu1_inv_scan);
  831|       |
  832|  2.29k|                        PROFILE_DISABLE_IQ_IT_RECON()
  ------------------
  |  |   98|  2.29k|#define PROFILE_DISABLE_IQ_IT_RECON() ;
  ------------------
  833|  2.29k|                        ps_dec->pf_ihadamard_scaling_4x4(
  834|  2.29k|                            pi2_dc_coef, pi2_coeff_block, ps_dec->pu2_quant_scale_y,
  835|  2.29k|                            (UWORD16 *) pi2_scale_matrix_ptr, ps_dec->u1_qp_y_div6, pi4_tmp);
  836|  2.29k|                        pi2_coeff_block += 16;
  837|  2.29k|                        ps_dec->pv_parse_tu_coeff_data = (void *) pi2_coeff_block;
  838|  2.29k|                        SET_BIT(ps_cur_mb_info->u1_yuv_dc_block_flag, 0);
  ------------------
  |  |  106|  2.29k|#define SET_BIT(x, pos) (x) = (x) | (1 << pos);
  ------------------
  839|  2.29k|                    }
  840|  7.90k|                }
  841|  7.90k|            }
  842|  7.90k|        }
  843|  14.5k|    }
  844|   191k|    else
  845|   191k|    {
  846|   191k|        ps_cur_mb_info->u1_tran_form8x8 = 0;
  847|   191k|        ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
  848|   191k|        u4_offset = 0;
  849|       |        /*--------------------------------------------------------------------*/
  850|       |        /* Read the Coded block pattern                                       */
  851|       |        /*--------------------------------------------------------------------*/
  852|   191k|        {
  853|   191k|            UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
  854|   191k|            UWORD32 u4_word, u4_ldz;
  855|       |
  856|       |            /***************************************************************/
  857|       |            /* Find leading zeros in next 32 bits                          */
  858|       |            /***************************************************************/
  859|   191k|            NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|   191k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|   191k|{                                                                           \
  |  |  152|   191k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|   191k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|   191k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|   191k|                                                                            \
  |  |  156|   191k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|   191k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 185k, False: 6.13k]
  |  |  ------------------
  |  |  158|   191k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|   185k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|   191k|}
  ------------------
  860|   191k|            u4_ldz = CLZ(u4_word);
  861|       |            /* Flush the ps_bitstrm */
  862|   191k|            u4_bitstream_offset += (u4_ldz + 1);
  863|       |            /* Read the suffix from the ps_bitstrm */
  864|   191k|            u4_word = 0;
  865|   191k|            if(u4_ldz)
  ------------------
  |  Branch (865:16): [True: 20.5k, False: 171k]
  ------------------
  866|  20.5k|            {
  867|  20.5k|                GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  20.5k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  20.5k|{                                                                           \
  |  |  122|  20.5k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  20.5k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  20.5k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  20.5k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  20.5k|                                                                            \
  |  |  127|  20.5k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 19.7k, False: 783]
  |  |  ------------------
  |  |  128|  20.5k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  19.7k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  20.5k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  20.5k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  20.5k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  20.5k|}                                                                           \
  ------------------
  868|  20.5k|            }
  869|   191k|            *pu4_bitstrm_ofst = u4_bitstream_offset;
  870|   191k|            u4_cbp = ((1 << u4_ldz) + u4_word - 1);
  871|   191k|        }
  872|   191k|        if(u4_cbp > 47)
  ------------------
  |  Branch (872:12): [True: 776, False: 191k]
  ------------------
  873|    776|        {
  874|    776|            return ERROR_CBP;
  875|    776|        }
  876|       |
  877|       |        /*  inter cbp table to be used for base mode flag*/
  878|   191k|        u4_cbp = gau1_ih264d_cbp_table[u4_cbp][1];
  879|   191k|        COPYTHECONTEXT("coded_block_pattern", u4_cbp);
  880|   191k|        ps_cur_mb_info->u1_cbp = u4_cbp;
  881|   191k|        ps_cur_mb_info->u1_tran_form8x8 = 0;
  882|   191k|        ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
  883|       |
  884|       |        /*--------------------------------------------------------------------*/
  885|       |        /* Read transform_size_8x8_flag if present                            */
  886|       |        /*--------------------------------------------------------------------*/
  887|   191k|        if((ps_dec->ps_cur_pps->i4_transform_8x8_mode_flag) && (ps_cur_mb_info->u1_cbp & 0xf))
  ------------------
  |  Branch (887:12): [True: 73.6k, False: 117k]
  |  Branch (887:64): [True: 4.83k, False: 68.8k]
  ------------------
  888|  4.83k|        {
  889|  4.83k|            ps_cur_mb_info->u1_tran_form8x8 = ih264d_get_bit_h264(ps_bitstrm);
  890|  4.83k|            COPYTHECONTEXT("transform_size_8x8_flag", ps_cur_mb_info->u1_tran_form8x8);
  891|  4.83k|            ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = ps_cur_mb_info->u1_tran_form8x8;
  892|  4.83k|        }
  893|       |        /*--------------------------------------------------------------------*/
  894|       |        /* Read mb_qp_delta                                                   */
  895|       |        /*--------------------------------------------------------------------*/
  896|   191k|        if(ps_cur_mb_info->u1_cbp)
  ------------------
  |  Branch (896:12): [True: 19.7k, False: 171k]
  ------------------
  897|  19.7k|        {
  898|  19.7k|            UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
  899|  19.7k|            UWORD32 u4_word, u4_ldz, u4_abs_val;
  900|       |
  901|       |            /***************************************************************/
  902|       |            /* Find leading zeros in next 32 bits                          */
  903|       |            /***************************************************************/
  904|  19.7k|            NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  19.7k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  19.7k|{                                                                           \
  |  |  152|  19.7k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  19.7k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  19.7k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  19.7k|                                                                            \
  |  |  156|  19.7k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  19.7k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 18.8k, False: 987]
  |  |  ------------------
  |  |  158|  19.7k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  18.8k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  19.7k|}
  ------------------
  905|  19.7k|            u4_ldz = CLZ(u4_word);
  906|       |
  907|       |            /* Flush the ps_bitstrm */
  908|  19.7k|            u4_bitstream_offset += (u4_ldz + 1);
  909|       |
  910|       |            /* Read the suffix from the ps_bitstrm */
  911|  19.7k|            u4_word = 0;
  912|  19.7k|            if(u4_ldz)
  ------------------
  |  Branch (912:16): [True: 7.05k, False: 12.7k]
  ------------------
  913|  7.05k|            {
  914|  7.05k|                GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  7.05k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  7.05k|{                                                                           \
  |  |  122|  7.05k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  7.05k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  7.05k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  7.05k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  7.05k|                                                                            \
  |  |  127|  7.05k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 6.44k, False: 608]
  |  |  ------------------
  |  |  128|  7.05k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  6.44k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  7.05k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  7.05k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  7.05k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  7.05k|}                                                                           \
  ------------------
  915|  7.05k|            }
  916|       |
  917|  19.7k|            *pu4_bitstrm_ofst = u4_bitstream_offset;
  918|  19.7k|            u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1;
  919|       |
  920|  19.7k|            if(u4_word & 0x1)
  ------------------
  |  Branch (920:16): [True: 2.28k, False: 17.5k]
  ------------------
  921|  2.28k|            {
  922|  2.28k|                i4_delta_qp = (-(WORD32) u4_abs_val);
  923|  2.28k|            }
  924|  17.5k|            else
  925|  17.5k|            {
  926|  17.5k|                i4_delta_qp = (u4_abs_val);
  927|  17.5k|            }
  928|       |
  929|  19.7k|            if((i4_delta_qp < -26) || (i4_delta_qp > 25))
  ------------------
  |  Branch (929:16): [True: 167, False: 19.6k]
  |  Branch (929:39): [True: 245, False: 19.3k]
  ------------------
  930|    412|            {
  931|    412|                return ERROR_INV_RANGE_QP_T;
  932|    412|            }
  933|       |
  934|  19.3k|            COPYTHECONTEXT("mb_qp_delta", i4_delta_qp);
  935|  19.3k|            if(i4_delta_qp != 0)
  ------------------
  |  Branch (935:16): [True: 6.64k, False: 12.7k]
  ------------------
  936|  6.64k|            {
  937|  6.64k|                ret = ih264d_update_qp(ps_dec, (WORD8) i4_delta_qp);
  938|  6.64k|                if(ret != OK) return ret;
  ------------------
  |  |  114|  6.64k|#define OK        0
  ------------------
  |  Branch (938:20): [True: 0, False: 6.64k]
  ------------------
  939|  6.64k|            }
  940|  19.3k|        }
  941|   191k|    }
  942|   203k|    if(u4_cbp)
  ------------------
  |  Branch (942:8): [True: 24.2k, False: 179k]
  ------------------
  943|  24.2k|    {
  944|  24.2k|        ret = ih264d_parse_residual4x4_cavlc(ps_dec, ps_cur_mb_info, (UWORD8) u4_offset);
  945|  24.2k|        if(ret != OK) return ret;
  ------------------
  |  |  114|  24.2k|#define OK        0
  ------------------
  |  Branch (945:12): [True: 602, False: 23.6k]
  ------------------
  946|  23.6k|        if(EXCEED_OFFSET(ps_bitstrm)) return ERROR_EOB_TERMINATE_T;
  ------------------
  |  |   93|  23.6k|  (ps_bitstrm->u4_ofst > ps_bitstrm->u4_max_ofst)
  |  |  ------------------
  |  |  |  Branch (93:3): [True: 475, False: 23.2k]
  |  |  ------------------
  ------------------
  947|       |
  948|       |        /* Store Left Mb NNZ and TOP chroma NNZ */
  949|  23.6k|    }
  950|   179k|    else
  951|   179k|    {
  952|   179k|        ps_cur_mb_info->u1_qp_div6 = ps_dec->u1_qp_y_div6;
  953|   179k|        ps_cur_mb_info->u1_qpc_div6 = ps_dec->u1_qp_u_div6;
  954|   179k|        ps_cur_mb_info->u1_qpcr_div6 = ps_dec->u1_qp_v_div6;
  955|   179k|        ps_cur_mb_info->u1_qp_rem6 = ps_dec->u1_qp_y_rem6;
  956|   179k|        ps_cur_mb_info->u1_qpc_rem6 = ps_dec->u1_qp_u_rem6;
  957|   179k|        ps_cur_mb_info->u1_qpcr_rem6 = ps_dec->u1_qp_v_rem6;
  958|   179k|        ih264d_update_nnz_for_skipmb(ps_dec, ps_cur_mb_info, CAVLC);
  ------------------
  |  |  338|   179k|#define CAVLC  0
  ------------------
  959|   179k|    }
  960|       |
  961|   202k|    return OK;
  ------------------
  |  |  114|   202k|#define OK        0
  ------------------
  962|   203k|}
isvcd_parse_eislice:
  977|  5.79k|{
  978|  5.79k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
  979|  5.79k|    WORD32 i_status = OK;
  ------------------
  |  |  114|  5.79k|#define OK        0
  ------------------
  980|  5.79k|    dec_pic_params_t *ps_pps = ps_dec->ps_cur_pps;
  981|  5.79k|    dec_slice_params_t *ps_slice = ps_dec->ps_cur_slice;
  982|  5.79k|    dec_bit_stream_t *ps_bitstrm = ps_dec->ps_bitstrm;
  983|  5.79k|    dec_seq_params_t *ps_seq;
  984|  5.79k|    dec_svc_seq_params_t *ps_subset_seq;
  985|  5.79k|    dec_slice_svc_ext_params_t *ps_svc_slice_params = NULL;
  986|  5.79k|    dec_subset_seq_params_t *ps_sps_svc_ext = NULL;
  987|  5.79k|    dec_nal_unit_svc_ext_params_t *ps_nal_svc_ext = NULL;
  988|  5.79k|    UWORD32 u4_temp;
  989|  5.79k|    WORD32 i_temp;
  990|  5.79k|    WORD32 ret;
  991|  5.79k|    UWORD32 *pu4_bitstrm_buf = ps_dec->ps_bitstrm->pu4_buffer;
  992|  5.79k|    UWORD32 *pu4_bitstrm_ofst = &ps_dec->ps_bitstrm->u4_ofst;
  993|       |
  994|  5.79k|    ps_nal_svc_ext = ps_svc_lyr_dec->ps_nal_svc_ext;
  995|  5.79k|    ps_seq = ps_dec->ps_cur_sps;
  996|  5.79k|    ps_subset_seq =
  997|  5.79k|        &ps_svc_lyr_dec->ps_subset_sps[MAX_NUM_SEQ_PARAMS + ps_seq->u1_seq_parameter_set_id];
  ------------------
  |  |  521|  5.79k|#define MAX_NUM_SEQ_PARAMS 32
  ------------------
  998|  5.79k|    ps_sps_svc_ext = &ps_subset_seq->s_sps_svc_ext;
  999|  5.79k|    ps_svc_slice_params = &ps_svc_lyr_dec->s_svc_slice_params;
 1000|       |
 1001|  5.79k|    if(0 == ps_svc_lyr_dec->ps_nal_svc_ext->u1_quality_id)
  ------------------
  |  Branch (1001:8): [True: 5.79k, False: 0]
  ------------------
 1002|  5.79k|    {
 1003|       |        /*--------------------------------------------------------------------*/
 1004|       |        /* Read remaining contents of the slice header                        */
 1005|       |        /*--------------------------------------------------------------------*/
 1006|       |        /* dec_ref_pic_marking function */
 1007|       |        /* G050 */
 1008|  5.79k|        if(ps_slice->u1_nal_ref_idc != 0)
  ------------------
  |  Branch (1008:12): [True: 5.71k, False: 78]
  ------------------
 1009|  5.71k|        {
 1010|  5.71k|            if(!ps_dec->ps_dpb_cmds->u1_dpb_commands_read)
  ------------------
  |  Branch (1010:16): [True: 5.54k, False: 168]
  ------------------
 1011|  5.54k|            {
 1012|  5.54k|                dec_pic_params_t *ps_pps = ps_dec->ps_cur_pps;
 1013|  5.54k|                dec_seq_params_t *ps_sps_tmp = ps_pps->ps_sps;
 1014|  5.54k|                UWORD8 u1_nal_unit_type_tmp = ps_dec->u1_nal_unit_type;
 1015|       |
 1016|  5.54k|                ps_pps->ps_sps = ps_dec->ps_cur_sps;
 1017|  5.54k|                if(ps_svc_lyr_dec->ps_nal_svc_ext->u1_idr_flag)
  ------------------
  |  Branch (1017:20): [True: 3.94k, False: 1.60k]
  ------------------
 1018|  3.94k|                    ps_dec->u1_nal_unit_type = IDR_SLICE_NAL;
  ------------------
  |  |  328|  3.94k|#define IDR_SLICE_NAL                   5
  ------------------
 1019|       |
 1020|  5.54k|                i_temp = ih264d_read_mmco_commands(ps_dec);
 1021|       |
 1022|  5.54k|                ps_pps->ps_sps = ps_sps_tmp;
 1023|  5.54k|                ps_dec->u1_nal_unit_type = u1_nal_unit_type_tmp;
 1024|  5.54k|                if(i_temp < 0)
  ------------------
  |  Branch (1024:20): [True: 70, False: 5.47k]
  ------------------
 1025|     70|                {
 1026|     70|                    return ERROR_DBP_MANAGER_T;
 1027|     70|                }
 1028|  5.47k|                ps_dec->u4_bitoffset = i_temp;
 1029|  5.47k|            }
 1030|    168|            else
 1031|    168|                ps_dec->ps_bitstrm->u4_ofst += ps_dec->u4_bitoffset;
 1032|       |
 1033|  5.64k|            if(!ps_sps_svc_ext->u1_slice_header_restriction_flag)
  ------------------
  |  Branch (1033:16): [True: 4.71k, False: 932]
  ------------------
 1034|  4.71k|            {
 1035|  4.71k|                ps_svc_slice_params->u1_store_ref_base_pic_flag = ih264d_get_bit_h264(ps_bitstrm);
 1036|  4.71k|                COPYTHECONTEXT("SPS_SVC_EXT: u1_store_ref_base_pic_flag",
 1037|  4.71k|                               ps_svc_slice_params->u1_store_ref_base_pic_flag);
 1038|       |
 1039|  4.71k|                if(0 != ps_svc_slice_params->u1_store_ref_base_pic_flag)
  ------------------
  |  Branch (1039:20): [True: 201, False: 4.51k]
  ------------------
 1040|    201|                {
 1041|    201|                    return NOT_OK;
  ------------------
  |  |  116|    201|#define NOT_OK    -1
  ------------------
 1042|    201|                }
 1043|  4.51k|                if(((1 == ps_nal_svc_ext->u1_use_ref_base_pic_flag) ||
  ------------------
  |  Branch (1043:21): [True: 0, False: 4.51k]
  ------------------
 1044|  4.51k|                    (1 == ps_svc_slice_params->u1_store_ref_base_pic_flag)) &&
  ------------------
  |  Branch (1044:21): [True: 0, False: 4.51k]
  ------------------
 1045|      0|                   (!ps_nal_svc_ext->u1_idr_flag))
  ------------------
  |  Branch (1045:20): [True: 0, False: 0]
  ------------------
 1046|      0|                {
 1047|      0|                    i_status = isvcd_dec_ref_base_pic_marking(
 1048|      0|                        &ps_svc_slice_params->s_ref_base_pic_marking_svc_ext, ps_bitstrm);
 1049|      0|                    if(i_status != OK)
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
  |  Branch (1049:24): [True: 0, False: 0]
  ------------------
 1050|      0|                    {
 1051|      0|                        return i_status;
 1052|      0|                    }
 1053|      0|                }
 1054|  4.51k|            }
 1055|  5.64k|        }
 1056|  5.79k|    }
 1057|       |
 1058|  5.52k|    {
 1059|       |        /* G050 */
 1060|       |        /* Read slice_qp_delta */
 1061|  5.52k|        WORD64 i8_temp =
 1062|  5.52k|            (WORD64) ps_pps->u1_pic_init_qp + ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1063|  5.52k|        if((i8_temp < MIN_H264_QP) || (i8_temp > MAX_H264_QP)) return ERROR_INV_RANGE_QP_T;
  ------------------
  |  |  629|  5.52k|#define MIN_H264_QP 0
  ------------------
                      if((i8_temp < MIN_H264_QP) || (i8_temp > MAX_H264_QP)) return ERROR_INV_RANGE_QP_T;
  ------------------
  |  |  634|  5.42k|#define MAX_H264_QP 51
  ------------------
  |  Branch (1063:12): [True: 96, False: 5.42k]
  |  Branch (1063:39): [True: 135, False: 5.29k]
  ------------------
 1064|  5.29k|        ps_slice->u1_slice_qp = (UWORD8) i8_temp;
 1065|  5.29k|        COPYTHECONTEXT("Slice Header SVC ext: slice_qp_delta",
 1066|  5.29k|                       ps_slice->u1_slice_qp - ps_pps->u1_pic_init_qp);
 1067|  5.29k|    }
 1068|       |
 1069|  5.29k|    if(ps_pps->u1_deblocking_filter_parameters_present_flag == 1)
  ------------------
  |  Branch (1069:8): [True: 3.63k, False: 1.65k]
  ------------------
 1070|  3.63k|    {
 1071|  3.63k|        u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1072|  3.63k|        COPYTHECONTEXT("Slice Header SVC ext: disable_deblocking_filter_idc", u4_temp);
 1073|       |
 1074|  3.63k|        if(u4_temp > SLICE_BOUNDARY_DBLK_DISABLED)
  ------------------
  |  |  547|  3.63k|#define SLICE_BOUNDARY_DBLK_DISABLED  2
  ------------------
  |  Branch (1074:12): [True: 256, False: 3.38k]
  ------------------
 1075|    256|        {
 1076|    256|            return ERROR_INV_SLICE_HDR_T;
 1077|    256|        }
 1078|  3.38k|        ps_slice->u1_disable_dblk_filter_idc = u4_temp;
 1079|  3.38k|        if(u4_temp != 1)
  ------------------
  |  Branch (1079:12): [True: 2.94k, False: 441]
  ------------------
 1080|  2.94k|        {
 1081|  2.94k|            i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf) << 1;
 1082|  2.94k|            if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
  ------------------
  |  |  550|  2.94k|#define MIN_DBLK_FIL_OFF              -12
  ------------------
                          if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
  ------------------
  |  |  551|  2.84k|#define MAX_DBLK_FIL_OFF              12
  ------------------
  |  Branch (1082:16): [True: 102, False: 2.84k]
  |  Branch (1082:47): [True: 76, False: 2.76k]
  ------------------
 1083|    178|            {
 1084|    178|                return ERROR_INV_SLICE_HDR_T;
 1085|    178|            }
 1086|  2.76k|            ps_slice->i1_slice_alpha_c0_offset = i_temp;
 1087|  2.76k|            COPYTHECONTEXT("Slice Header SVC ext: slice_alpha_c0_offset_div2",
 1088|  2.76k|                           ps_slice->i1_slice_alpha_c0_offset >> 1);
 1089|       |
 1090|  2.76k|            i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf) << 1;
 1091|  2.76k|            if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
  ------------------
  |  |  550|  2.76k|#define MIN_DBLK_FIL_OFF              -12
  ------------------
                          if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
  ------------------
  |  |  551|  2.65k|#define MAX_DBLK_FIL_OFF              12
  ------------------
  |  Branch (1091:16): [True: 105, False: 2.65k]
  |  Branch (1091:47): [True: 78, False: 2.58k]
  ------------------
 1092|    183|            {
 1093|    183|                return ERROR_INV_SLICE_HDR_T;
 1094|    183|            }
 1095|  2.58k|            ps_slice->i1_slice_beta_offset = i_temp;
 1096|  2.58k|            COPYTHECONTEXT("Slice Header SVC ext: slice_beta_offset_div2",
 1097|  2.58k|                           ps_slice->i1_slice_beta_offset >> 1);
 1098|  2.58k|        }
 1099|    441|        else
 1100|    441|        {
 1101|    441|            ps_slice->i1_slice_alpha_c0_offset = 0;
 1102|    441|            ps_slice->i1_slice_beta_offset = 0;
 1103|    441|        }
 1104|  3.38k|    }
 1105|  1.65k|    else
 1106|  1.65k|    {
 1107|  1.65k|        ps_slice->u1_disable_dblk_filter_idc = 0;
 1108|  1.65k|        ps_slice->i1_slice_alpha_c0_offset = 0;
 1109|  1.65k|        ps_slice->i1_slice_beta_offset = 0;
 1110|  1.65k|    }
 1111|       |
 1112|       |    /* add the remaining part of the code for svc extension from reference */
 1113|  4.67k|    ret = isvcd_set_default_slice_header_ext(ps_svc_lyr_dec);
 1114|  4.67k|    if(ret != OK)
  ------------------
  |  |  114|  4.67k|#define OK        0
  ------------------
  |  Branch (1114:8): [True: 0, False: 4.67k]
  ------------------
 1115|      0|    {
 1116|      0|        return ERROR_INV_SLICE_HDR_T;
 1117|      0|    }
 1118|       |
 1119|  4.67k|    ret = isvcd_parse_slice_header(ps_svc_lyr_dec);
 1120|  4.67k|    if(ret != OK)
  ------------------
  |  |  114|  4.67k|#define OK        0
  ------------------
  |  Branch (1120:8): [True: 239, False: 4.43k]
  ------------------
 1121|    239|    {
 1122|    239|        return ERROR_INV_SLICE_HDR_T;
 1123|    239|    }
 1124|       |
 1125|       |    /* Initialization to check if number of motion vector per 2 Mbs */
 1126|       |    /* are exceeding the range or not */
 1127|  4.43k|    ps_dec->u2_mv_2mb[0] = 0;
 1128|  4.43k|    ps_dec->u2_mv_2mb[1] = 0;
 1129|       |
 1130|       |    /*set slice header cone to 2 ,to indicate  correct header*/
 1131|  4.43k|    ps_dec->u1_slice_header_done = 2;
 1132|       |
 1133|  4.43k|    if(!ps_svc_slice_params->u1_slice_skip_flag)
  ------------------
  |  Branch (1133:8): [True: 4.31k, False: 123]
  ------------------
 1134|  4.31k|    {
 1135|  4.31k|        if(ps_pps->u1_entropy_coding_mode)
  ------------------
  |  Branch (1135:12): [True: 1.27k, False: 3.03k]
  ------------------
 1136|  1.27k|        {
 1137|  1.27k|            SWITCHOFFTRACE;
 1138|  1.27k|            SWITCHONTRACECABAC;
 1139|  1.27k|            if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag)
  ------------------
  |  Branch (1139:16): [True: 0, False: 1.27k]
  ------------------
 1140|      0|            {
 1141|      0|                ps_dec->pf_get_mb_info = ih264d_get_mb_info_cabac_mbaff;
 1142|      0|            }
 1143|  1.27k|            else
 1144|  1.27k|                ps_dec->pf_get_mb_info = isvcd_get_mb_info_cabac_nonmbaff;
 1145|       |
 1146|  1.27k|            ret = isvcd_parse_eislice_data_cabac(ps_svc_lyr_dec, ps_slice, u2_first_mb_in_slice);
 1147|  1.27k|            if(ret != OK) return ret;
  ------------------
  |  |  114|  1.27k|#define OK        0
  ------------------
  |  Branch (1147:16): [True: 914, False: 363]
  ------------------
 1148|    363|            SWITCHONTRACE;
 1149|    363|            SWITCHOFFTRACECABAC;
 1150|    363|        }
 1151|  3.03k|        else
 1152|  3.03k|        {
 1153|  3.03k|            if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag)
  ------------------
  |  Branch (1153:16): [True: 0, False: 3.03k]
  ------------------
 1154|      0|            {
 1155|      0|                ps_dec->pf_get_mb_info = ih264d_get_mb_info_cavlc_mbaff;
 1156|      0|            }
 1157|  3.03k|            else
 1158|  3.03k|                ps_dec->pf_get_mb_info = isvcd_get_mb_info_cavlc_nonmbaff;
 1159|  3.03k|            ret = isvcd_parse_eislice_data_cavlc(ps_svc_lyr_dec, ps_slice, u2_first_mb_in_slice);
 1160|  3.03k|            if(ret != OK) return ret;
  ------------------
  |  |  114|  3.03k|#define OK        0
  ------------------
  |  Branch (1160:16): [True: 1.88k, False: 1.15k]
  ------------------
 1161|  3.03k|        }
 1162|  4.31k|    }
 1163|    123|    else
 1164|    123|    {
 1165|    123|        return ERROR_FEATURE_UNAVAIL;
 1166|    123|    }
 1167|       |
 1168|  1.51k|    return OK;
  ------------------
  |  |  114|  1.51k|#define OK        0
  ------------------
 1169|  4.43k|}
isvcd_parse_eislice_data_cabac:
 1197|  1.27k|{
 1198|  1.27k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
 1199|  1.27k|    UWORD8 uc_more_data_flag;
 1200|  1.27k|    UWORD8 u1_num_mbs, u1_mb_idx;
 1201|  1.27k|    dec_mb_info_t *ps_cur_mb_info;
 1202|  1.27k|    dec_svc_mb_info_t *ps_svc_cur_mb_info;
 1203|  1.27k|    deblk_mb_t *ps_cur_deblk_mb;
 1204|  1.27k|    dec_bit_stream_t *const ps_bitstrm = ps_dec->ps_bitstrm;
 1205|  1.27k|    UWORD16 i2_pic_wdin_mbs = ps_dec->u2_frm_wd_in_mbs;
 1206|  1.27k|    WORD16 i2_cur_mb_addr;
 1207|  1.27k|    UWORD8 u1_mbaff;
 1208|  1.27k|    UWORD8 u1_num_mbs_next, u1_end_of_row, u1_tfr_n_mb;
 1209|  1.27k|    WORD32 ret = OK;
  ------------------
  |  |  114|  1.27k|#define OK        0
  ------------------
 1210|  1.27k|    decoding_envirnoment_t *ps_cab_env;
 1211|  1.27k|    UWORD8 *pu1_cur_svc_base_mode_flag;
 1212|  1.27k|    UWORD8 u1_left_svc_base_mode_flag;
 1213|  1.27k|    UWORD8 u1_top_svc_base_mode_flag;
 1214|  1.27k|    UWORD32 u4_a, u4_b, u4_ctxt_inc;
 1215|  1.27k|    dec_slice_svc_ext_params_t *ps_svc_slice_params = NULL;
 1216|       |
 1217|  1.27k|    ps_svc_slice_params = &ps_svc_lyr_dec->s_svc_slice_params;
 1218|  1.27k|    ps_dec->u1_qp = ps_slice->u1_slice_qp;
 1219|  1.27k|    ih264d_update_qp(ps_dec, 0);
 1220|  1.27k|    u1_mbaff = ps_slice->u1_mbaff_frame_flag;
 1221|       |
 1222|  1.27k|    if(ps_bitstrm->u4_ofst & 0x07)
  ------------------
  |  Branch (1222:8): [True: 1.05k, False: 218]
  ------------------
 1223|  1.05k|    {
 1224|  1.05k|        ps_bitstrm->u4_ofst += 8;
 1225|  1.05k|        ps_bitstrm->u4_ofst &= 0xFFFFFFF8;
 1226|  1.05k|    }
 1227|  1.27k|    ret = ih264d_init_cabac_dec_envirnoment(&(ps_dec->s_cab_dec_env), ps_bitstrm);
 1228|  1.27k|    if(ret != OK) return ret;
  ------------------
  |  |  114|  1.27k|#define OK        0
  ------------------
  |  Branch (1228:8): [True: 111, False: 1.16k]
  ------------------
 1229|  1.16k|    isvcd_init_cabac_contexts(I_SLICE, ps_dec);
  ------------------
  |  |  370|  1.16k|#define I_SLICE  2
  ------------------
 1230|  1.16k|    ps_dec->i1_prev_mb_qp_delta = 0;
 1231|  1.16k|    ps_cab_env = &ps_dec->s_cab_dec_env;
 1232|       |    /* initializations */
 1233|  1.16k|    u1_mb_idx = ps_dec->u4_mb_idx;
 1234|  1.16k|    u1_num_mbs = u1_mb_idx;
 1235|  1.16k|    uc_more_data_flag = 1;
 1236|  1.16k|    i2_cur_mb_addr = u2_first_mb_in_slice << u1_mbaff;
 1237|  1.16k|    do
 1238|  18.5k|    {
 1239|  18.5k|        UWORD16 u2_mbx;
 1240|  18.5k|        ps_dec->pv_prev_mb_parse_tu_coeff_data = ps_dec->pv_parse_tu_coeff_data;
 1241|  18.5k|        if(i2_cur_mb_addr > ps_dec->ps_cur_sps->u4_max_mb_addr)
  ------------------
  |  Branch (1241:12): [True: 248, False: 18.3k]
  ------------------
 1242|    248|        {
 1243|    248|            break;
 1244|    248|        }
 1245|       |
 1246|  18.3k|        {
 1247|  18.3k|            UWORD8 u1_mb_type;
 1248|  18.3k|            ps_cur_mb_info = ps_dec->ps_nmb_info + u1_num_mbs;
 1249|  18.3k|            ps_svc_cur_mb_info = ps_svc_lyr_dec->ps_svc_nmb_info + u1_num_mbs;
 1250|  18.3k|            ps_dec->u4_num_mbs_cur_nmb = u1_num_mbs;
 1251|  18.3k|            ps_dec->u4_num_pmbair = (u1_num_mbs >> u1_mbaff);
 1252|  18.3k|            ps_cur_mb_info->u1_end_of_slice = 0;
 1253|       |            /***************************************************************/
 1254|       |            /* Get the required information for decoding of MB                  */
 1255|       |            /* mb_x, mb_y , neighbour availablity,                              */
 1256|       |            /***************************************************************/
 1257|  18.3k|            ps_dec->pf_get_mb_info(ps_dec, i2_cur_mb_addr, ps_cur_mb_info, 0);
 1258|  18.3k|            u2_mbx = ps_dec->u2_mbx;
 1259|  18.3k|            ps_svc_cur_mb_info->u1_crop_window_flag =
 1260|  18.3k|                *(ps_svc_lyr_dec->pu1_crop_wnd_flag + ps_cur_mb_info->u2_mbx +
 1261|  18.3k|                  (ps_cur_mb_info->u2_mby * ps_dec->u2_frm_wd_in_mbs));
 1262|       |            /*********************************************************************/
 1263|       |            /* initialize u1_tran_form8x8 to zero to aviod uninitialized accesses */
 1264|       |            /*********************************************************************/
 1265|  18.3k|            ps_cur_mb_info->u1_tran_form8x8 = 0;
 1266|  18.3k|            ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
 1267|       |            /***************************************************************/
 1268|       |            /* Set the deblocking parameters for this MB                   */
 1269|       |            /***************************************************************/
 1270|  18.3k|            ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + u1_num_mbs;
 1271|  18.3k|            if(ps_dec->u4_app_disable_deblk_frm == 0)
  ------------------
  |  Branch (1271:16): [True: 18.3k, False: 0]
  ------------------
 1272|  18.3k|                ih264d_set_deblocking_parameters(ps_cur_deblk_mb, ps_slice,
 1273|  18.3k|                                                 ps_dec->u1_mb_ngbr_availablity,
 1274|  18.3k|                                                 ps_dec->u1_cur_mb_fld_dec_flag);
 1275|       |
 1276|  18.3k|            ps_svc_cur_mb_info->u1_base_mode_flag = 0;
 1277|       |            /* Macroblock Layer Begins */
 1278|  18.3k|            if(ps_svc_cur_mb_info->u1_crop_window_flag &&
  ------------------
  |  Branch (1278:16): [True: 17.7k, False: 583]
  ------------------
 1279|  17.7k|               ps_svc_slice_params->u1_adaptive_base_mode_flag)
  ------------------
  |  Branch (1279:16): [True: 13.0k, False: 4.76k]
  ------------------
 1280|  13.0k|            {
 1281|  13.0k|                pu1_cur_svc_base_mode_flag =
 1282|  13.0k|                    ps_svc_lyr_dec->pu1_svc_base_mode_flag + ps_cur_mb_info->u2_mbx;
 1283|  13.0k|                pu1_cur_svc_base_mode_flag +=
 1284|  13.0k|                    ps_cur_mb_info->u2_mby * ps_svc_lyr_dec->i4_frm_svc_base_mode_cabac_stride;
 1285|       |
 1286|  13.0k|                u1_left_svc_base_mode_flag = 0;
 1287|  13.0k|                if(ps_dec->u1_mb_ngbr_availablity & LEFT_MB_AVAILABLE_MASK)
  ------------------
  |  |   53|  13.0k|#define LEFT_MB_AVAILABLE_MASK      0x01
  ------------------
  |  Branch (1287:20): [True: 9.36k, False: 3.63k]
  ------------------
 1288|  9.36k|                    u1_left_svc_base_mode_flag = *(pu1_cur_svc_base_mode_flag - 1);
 1289|       |
 1290|  13.0k|                u1_top_svc_base_mode_flag = 0;
 1291|  13.0k|                if(ps_dec->u1_mb_ngbr_availablity & TOP_MB_AVAILABLE_MASK)
  ------------------
  |  |   55|  13.0k|#define TOP_MB_AVAILABLE_MASK       0x04
  ------------------
  |  Branch (1291:20): [True: 10.3k, False: 2.68k]
  ------------------
 1292|  10.3k|                    u1_top_svc_base_mode_flag =
 1293|  10.3k|                        *(pu1_cur_svc_base_mode_flag -
 1294|  10.3k|                          ps_svc_lyr_dec->i4_frm_svc_base_mode_cabac_stride);
 1295|       |
 1296|  13.0k|                u4_a = 1;
 1297|  13.0k|                u4_b = 1;
 1298|       |
 1299|  13.0k|                if(u1_top_svc_base_mode_flag)
  ------------------
  |  Branch (1299:20): [True: 7.65k, False: 5.35k]
  ------------------
 1300|  7.65k|                {
 1301|  7.65k|                    u4_a = 0;
 1302|  7.65k|                }
 1303|       |
 1304|  13.0k|                if(u1_left_svc_base_mode_flag)
  ------------------
  |  Branch (1304:20): [True: 6.92k, False: 6.08k]
  ------------------
 1305|  6.92k|                {
 1306|  6.92k|                    u4_b = 0;
 1307|  6.92k|                }
 1308|       |
 1309|  13.0k|                u4_ctxt_inc = u4_a + u4_b;
 1310|  13.0k|                ps_svc_cur_mb_info->u1_base_mode_flag = ih264d_decode_bin(
 1311|  13.0k|                    u4_ctxt_inc, ps_svc_lyr_dec->ps_base_mode_flag, ps_bitstrm, ps_cab_env);
 1312|  13.0k|                COPYTHECONTEXT("SVC ext: u1_base_mode_flag", ps_cur_mb_info->u1_base_mode_flag);
 1313|  13.0k|                *pu1_cur_svc_base_mode_flag = ps_svc_cur_mb_info->u1_base_mode_flag;
 1314|  13.0k|            }
 1315|  5.34k|            else if(ps_svc_cur_mb_info->u1_crop_window_flag)
  ------------------
  |  Branch (1315:21): [True: 4.76k, False: 583]
  ------------------
 1316|  4.76k|            {
 1317|  4.76k|                ps_svc_cur_mb_info->u1_base_mode_flag =
 1318|  4.76k|                    ps_svc_slice_params->u1_default_base_mode_flag;
 1319|  4.76k|            }
 1320|       |
 1321|  18.3k|            if(!ps_svc_cur_mb_info->u1_base_mode_flag)
  ------------------
  |  Branch (1321:16): [True: 7.22k, False: 11.1k]
  ------------------
 1322|  7.22k|            {
 1323|       |                /* Decode the u1_mb_type */
 1324|  7.22k|                u1_mb_type = ih264d_parse_mb_type_intra_cabac(0, ps_dec);
 1325|  7.22k|                if(u1_mb_type > 25) return ERROR_MB_TYPE;
  ------------------
  |  Branch (1325:20): [True: 0, False: 7.22k]
  ------------------
 1326|  7.22k|                ps_cur_mb_info->u1_mb_type = u1_mb_type;
 1327|  7.22k|                COPYTHECONTEXT("u1_mb_type", u1_mb_type);
 1328|  7.22k|                ps_cur_deblk_mb->u1_mb_type = ps_cur_deblk_mb->u1_mb_type | D_INTRA_MB;
  ------------------
  |  |  382|  7.22k|#define D_INTRA_MB        1
  ------------------
 1329|  7.22k|            }
 1330|  11.1k|            else
 1331|  11.1k|            {
 1332|  11.1k|                ps_cur_mb_info->u1_mb_type = MB_INFER;
  ------------------
  |  |  112|  11.1k|#define MB_INFER 250
  ------------------
 1333|  11.1k|                ps_cur_deblk_mb->u1_mb_type = ps_cur_deblk_mb->u1_mb_type | D_INTRA_IBL;
  ------------------
  |  |   72|  11.1k|#define D_INTRA_IBL 16
  ------------------
 1334|  11.1k|            }
 1335|       |            /* Parse Macroblock Data */
 1336|  18.3k|            u1_mb_type = ps_cur_mb_info->u1_mb_type;
 1337|  18.3k|            if(25 == u1_mb_type)
  ------------------
  |  Branch (1337:16): [True: 136, False: 18.2k]
  ------------------
 1338|    136|            {
 1339|       |                /* I_PCM_MB */
 1340|    136|                ps_cur_mb_info->ps_curmb->u1_mb_type = I_PCM_MB;
  ------------------
  |  |  423|    136|#define I_PCM_MB    6
  ------------------
 1341|    136|                ret = ih264d_parse_ipcm_mb(ps_dec, ps_cur_mb_info, u1_num_mbs);
 1342|    136|                if(ret != OK) return ret;
  ------------------
  |  |  114|    136|#define OK        0
  ------------------
  |  Branch (1342:20): [True: 70, False: 66]
  ------------------
 1343|     66|                ps_cur_deblk_mb->u1_mb_qp = 0;
 1344|     66|            }
 1345|  18.2k|            else
 1346|  18.2k|            {
 1347|  18.2k|                ret = isvcd_parse_imb_cabac(ps_svc_lyr_dec, ps_cur_mb_info, ps_svc_cur_mb_info,
 1348|  18.2k|                                            u1_mb_type);
 1349|  18.2k|                if(ret != OK) return ret;
  ------------------
  |  |  114|  18.2k|#define OK        0
  ------------------
  |  Branch (1349:20): [True: 733, False: 17.4k]
  ------------------
 1350|  17.4k|                ps_cur_deblk_mb->u1_mb_qp = ps_dec->u1_qp;
 1351|  17.4k|            }
 1352|       |
 1353|  17.5k|            if(ps_dec->u1_enable_mb_info)
  ------------------
  |  Branch (1353:16): [True: 0, False: 17.5k]
  ------------------
 1354|      0|            {
 1355|      0|                ih264d_populate_mb_info_map(ps_dec, ps_cur_mb_info, ps_cur_mb_info->u2_mbx << 1,
 1356|      0|                                            ps_cur_mb_info->u2_mby << 1, ps_cur_deblk_mb->u1_mb_qp);
 1357|      0|            }
 1358|  17.5k|            if(u1_mbaff)
  ------------------
  |  Branch (1358:16): [True: 0, False: 17.5k]
  ------------------
 1359|      0|            {
 1360|      0|                ih264d_update_mbaff_left_nnz(ps_dec, ps_cur_mb_info);
 1361|      0|            }
 1362|       |
 1363|  17.5k|            if(ps_cur_mb_info->u1_topmb && u1_mbaff)
  ------------------
  |  Branch (1363:16): [True: 17.5k, False: 0]
  |  Branch (1363:44): [True: 0, False: 17.5k]
  ------------------
 1364|      0|                uc_more_data_flag = 1;
 1365|  17.5k|            else
 1366|  17.5k|            {
 1367|  17.5k|                uc_more_data_flag = ih264d_decode_terminate(&ps_dec->s_cab_dec_env, ps_bitstrm);
 1368|  17.5k|                uc_more_data_flag = !uc_more_data_flag;
 1369|  17.5k|                COPYTHECONTEXT("Decode Sliceterm", !uc_more_data_flag);
 1370|  17.5k|            }
 1371|       |
 1372|  17.5k|            if(u1_mbaff)
  ------------------
  |  Branch (1372:16): [True: 0, False: 17.5k]
  ------------------
 1373|      0|            {
 1374|      0|                if(!uc_more_data_flag && (0 == (i2_cur_mb_addr & 1)))
  ------------------
  |  Branch (1374:20): [True: 0, False: 0]
  |  Branch (1374:42): [True: 0, False: 0]
  ------------------
 1375|      0|                {
 1376|      0|                    return ERROR_EOB_FLUSHBITS_T;
 1377|      0|                }
 1378|      0|            }
 1379|       |            /* Next macroblock information */
 1380|  17.5k|            i2_cur_mb_addr++;
 1381|       |            /* Store the colocated information */
 1382|  17.5k|            {
 1383|  17.5k|                mv_pred_t *ps_mv_nmb_start = ps_dec->ps_mv_cur + (u1_num_mbs << 4);
 1384|  17.5k|                mv_pred_t s_mvPred = {{0, 0, 0, 0}, {-1, -1}, 0, 0};
 1385|  17.5k|                if(ps_mv_nmb_start)
  ------------------
  |  Branch (1385:20): [True: 17.5k, False: 0]
  ------------------
 1386|  17.5k|                {
 1387|  17.5k|                    ih264d_rep_mv_colz(ps_dec, &s_mvPred, ps_mv_nmb_start, 0,
 1388|  17.5k|                                       (UWORD8) (ps_dec->u1_cur_mb_fld_dec_flag << 1), 4, 4);
 1389|  17.5k|                }
 1390|      0|                else
 1391|      0|                {
 1392|      0|                    return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 1393|      0|                }
 1394|  17.5k|            }
 1395|       |
 1396|  17.5k|            u1_num_mbs++;
 1397|  17.5k|        }
 1398|       |
 1399|       |        /****************************************************************/
 1400|       |        /* Check for End Of Row                                         */
 1401|       |        /****************************************************************/
 1402|      0|        u1_num_mbs_next = i2_pic_wdin_mbs - u2_mbx - 1;
 1403|  17.5k|        u1_end_of_row = (!u1_num_mbs_next) && (!(u1_mbaff && (u1_num_mbs & 0x01)));
  ------------------
  |  Branch (1403:25): [True: 4.13k, False: 13.4k]
  |  Branch (1403:50): [True: 0, False: 4.13k]
  |  Branch (1403:62): [True: 0, False: 0]
  ------------------
 1404|  17.5k|        u1_tfr_n_mb =
 1405|  17.5k|            (u1_num_mbs == ps_dec->u4_recon_mb_grp) || u1_end_of_row || (!uc_more_data_flag);
  ------------------
  |  Branch (1405:13): [True: 4.13k, False: 13.4k]
  |  Branch (1405:56): [True: 0, False: 13.4k]
  |  Branch (1405:73): [True: 108, False: 13.3k]
  ------------------
 1406|  17.5k|        ps_cur_mb_info->u1_end_of_slice = (!uc_more_data_flag);
 1407|       |
 1408|  17.5k|        if(u1_tfr_n_mb || (!uc_more_data_flag))
  ------------------
  |  Branch (1408:12): [True: 4.24k, False: 13.3k]
  |  Branch (1408:27): [True: 0, False: 13.3k]
  ------------------
 1409|  4.24k|        {
 1410|  4.24k|            if(ps_dec->u1_separate_parse)
  ------------------
  |  Branch (1410:16): [True: 1.44k, False: 2.80k]
  ------------------
 1411|  1.44k|            {
 1412|  1.44k|                ih264d_parse_tfr_nmb(ps_dec, u1_mb_idx, u1_num_mbs, u1_num_mbs_next, u1_tfr_n_mb,
 1413|  1.44k|                                     u1_end_of_row);
 1414|  1.44k|                ps_dec->ps_nmb_info += u1_num_mbs;
 1415|  1.44k|                ps_svc_lyr_dec->ps_svc_nmb_info += u1_num_mbs;
 1416|  1.44k|            }
 1417|  2.80k|            else
 1418|  2.80k|            {
 1419|  2.80k|                ret = isvcd_decode_recon_tfr_nmb_non_base_lyr(ps_svc_lyr_dec, u1_mb_idx, u1_num_mbs,
 1420|  2.80k|                                                              u1_num_mbs_next, u1_tfr_n_mb,
 1421|  2.80k|                                                              u1_end_of_row);
 1422|  2.80k|                if(ret != OK) return ret;
  ------------------
  |  |  114|  2.80k|#define OK        0
  ------------------
  |  Branch (1422:20): [True: 0, False: 2.80k]
  ------------------
 1423|  2.80k|            }
 1424|  4.24k|            ps_dec->u4_total_mbs_coded += u1_num_mbs;
 1425|  4.24k|            if(u1_tfr_n_mb) u1_num_mbs = 0;
  ------------------
  |  Branch (1425:16): [True: 4.24k, False: 0]
  ------------------
 1426|  4.24k|            u1_mb_idx = u1_num_mbs;
 1427|  4.24k|            ps_dec->u4_mb_idx = u1_num_mbs;
 1428|  4.24k|        }
 1429|  17.5k|    } while(uc_more_data_flag);
  ------------------
  |  Branch (1429:13): [True: 17.4k, False: 115]
  ------------------
 1430|       |
 1431|    363|    ps_dec->u4_num_mbs_cur_nmb = 0;
 1432|    363|    ps_dec->ps_cur_slice->u4_mbs_in_slice = i2_cur_mb_addr - (u2_first_mb_in_slice << u1_mbaff);
 1433|       |
 1434|    363|    return ret;
 1435|  1.16k|}
isvcd_parse_eislice_data_cavlc:
 1463|  3.03k|{
 1464|  3.03k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
 1465|  3.03k|    UWORD8 uc_more_data_flag;
 1466|  3.03k|    UWORD8 u1_num_mbs, u1_mb_idx;
 1467|  3.03k|    dec_mb_info_t *ps_cur_mb_info;
 1468|  3.03k|    dec_svc_mb_info_t *ps_svc_cur_mb_info;
 1469|  3.03k|    deblk_mb_t *ps_cur_deblk_mb;
 1470|  3.03k|    dec_bit_stream_t *const ps_bitstrm = ps_dec->ps_bitstrm;
 1471|  3.03k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
 1472|  3.03k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
 1473|  3.03k|    UWORD16 i2_pic_wdin_mbs = ps_dec->u2_frm_wd_in_mbs;
 1474|  3.03k|    WORD16 i2_cur_mb_addr;
 1475|  3.03k|    UWORD8 u1_mbaff;
 1476|  3.03k|    UWORD8 u1_num_mbs_next, u1_end_of_row, u1_tfr_n_mb;
 1477|  3.03k|    WORD32 ret = OK;
  ------------------
  |  |  114|  3.03k|#define OK        0
  ------------------
 1478|  3.03k|    dec_slice_svc_ext_params_t *ps_svc_slice_params = NULL;
 1479|       |
 1480|  3.03k|    ps_svc_slice_params = &ps_svc_lyr_dec->s_svc_slice_params;
 1481|  3.03k|    ps_dec->u1_qp = ps_slice->u1_slice_qp;
 1482|  3.03k|    ih264d_update_qp(ps_dec, 0);
 1483|  3.03k|    u1_mbaff = ps_slice->u1_mbaff_frame_flag;
 1484|       |
 1485|  3.03k|    u1_mb_idx = ps_dec->u4_mb_idx;
 1486|  3.03k|    u1_num_mbs = u1_mb_idx;
 1487|  3.03k|    uc_more_data_flag = 1;
 1488|  3.03k|    i2_cur_mb_addr = u2_first_mb_in_slice << u1_mbaff;
 1489|       |
 1490|  3.03k|    do
 1491|  56.8k|    {
 1492|  56.8k|        UWORD8 u1_mb_type;
 1493|  56.8k|        ps_dec->pv_prev_mb_parse_tu_coeff_data = ps_dec->pv_parse_tu_coeff_data;
 1494|  56.8k|        if(i2_cur_mb_addr > ps_dec->ps_cur_sps->u4_max_mb_addr)
  ------------------
  |  Branch (1494:12): [True: 674, False: 56.2k]
  ------------------
 1495|    674|        {
 1496|    674|            break;
 1497|    674|        }
 1498|       |
 1499|  56.2k|        ps_cur_mb_info = ps_dec->ps_nmb_info + u1_num_mbs;
 1500|  56.2k|        ps_svc_cur_mb_info = ps_svc_lyr_dec->ps_svc_nmb_info + u1_num_mbs;
 1501|  56.2k|        ps_dec->u4_num_mbs_cur_nmb = u1_num_mbs;
 1502|  56.2k|        ps_dec->u4_num_pmbair = (u1_num_mbs >> u1_mbaff);
 1503|  56.2k|        ps_cur_mb_info->u1_end_of_slice = 0;
 1504|       |        /***************************************************************/
 1505|       |        /* Get the required information for decoding of MB             */
 1506|       |        /* mb_x, mb_y , neighbour availablity,                         */
 1507|       |        /***************************************************************/
 1508|  56.2k|        ps_dec->pf_get_mb_info(ps_dec, i2_cur_mb_addr, ps_cur_mb_info, 0);
 1509|  56.2k|        ps_svc_cur_mb_info->u1_crop_window_flag =
 1510|  56.2k|            *(ps_svc_lyr_dec->pu1_crop_wnd_flag + ps_cur_mb_info->u2_mbx +
 1511|  56.2k|              (ps_cur_mb_info->u2_mby * ps_dec->u2_frm_wd_in_mbs));
 1512|       |
 1513|       |        /***************************************************************/
 1514|       |        /* Set the deblocking parameters for this MB                   */
 1515|       |        /***************************************************************/
 1516|  56.2k|        ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + u1_num_mbs;
 1517|  56.2k|        if(ps_dec->u4_app_disable_deblk_frm == 0)
  ------------------
  |  Branch (1517:12): [True: 56.2k, False: 0]
  ------------------
 1518|  56.2k|            ih264d_set_deblocking_parameters(ps_cur_deblk_mb, ps_slice,
 1519|  56.2k|                                             ps_dec->u1_mb_ngbr_availablity,
 1520|  56.2k|                                             ps_dec->u1_cur_mb_fld_dec_flag);
 1521|       |
 1522|  56.2k|        ps_svc_cur_mb_info->u1_base_mode_flag = 0;
 1523|  56.2k|        if(ps_svc_cur_mb_info->u1_crop_window_flag &&
  ------------------
  |  Branch (1523:12): [True: 56.0k, False: 224]
  ------------------
 1524|  56.0k|           ps_svc_slice_params->u1_adaptive_base_mode_flag)
  ------------------
  |  Branch (1524:12): [True: 31.3k, False: 24.6k]
  ------------------
 1525|  31.3k|        {
 1526|  31.3k|            ps_svc_cur_mb_info->u1_base_mode_flag = ih264d_get_bit_h264(ps_bitstrm);
 1527|  31.3k|            COPYTHECONTEXT("SVC ext: u1_base_mode_flag", ps_cur_mb_info->u1_base_mode_flag);
 1528|  31.3k|        }
 1529|  24.8k|        else if(ps_svc_cur_mb_info->u1_crop_window_flag)
  ------------------
  |  Branch (1529:17): [True: 24.6k, False: 224]
  ------------------
 1530|  24.6k|        {
 1531|  24.6k|            ps_svc_cur_mb_info->u1_base_mode_flag = ps_svc_slice_params->u1_default_base_mode_flag;
 1532|  24.6k|        }
 1533|       |
 1534|       |        /**************************************************************/
 1535|       |        /* Macroblock Layer Begins, Decode the u1_mb_type                */
 1536|       |        /**************************************************************/
 1537|  56.2k|        if(!ps_svc_cur_mb_info->u1_base_mode_flag) /* Inlined ih264d_uev */
  ------------------
  |  Branch (1537:12): [True: 13.3k, False: 42.8k]
  ------------------
 1538|  13.3k|        {
 1539|  13.3k|            UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
 1540|  13.3k|            UWORD32 u4_word, u4_ldz, u4_temp;
 1541|       |
 1542|       |            /***************************************************************/
 1543|       |            /* Find leading zeros in next 32 bits                          */
 1544|       |            /***************************************************************/
 1545|  13.3k|            NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  13.3k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  13.3k|{                                                                           \
  |  |  152|  13.3k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  13.3k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  13.3k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  13.3k|                                                                            \
  |  |  156|  13.3k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  13.3k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 12.8k, False: 580]
  |  |  ------------------
  |  |  158|  13.3k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  12.8k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  13.3k|}
  ------------------
 1546|  13.3k|            u4_ldz = CLZ(u4_word);
 1547|       |            /* Flush the ps_bitstrm */
 1548|  13.3k|            u4_bitstream_offset += (u4_ldz + 1);
 1549|       |            /* Read the suffix from the ps_bitstrm */
 1550|  13.3k|            u4_word = 0;
 1551|  13.3k|            if(u4_ldz) GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  7.71k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  7.71k|{                                                                           \
  |  |  122|  7.71k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  7.71k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  7.71k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  7.71k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  7.71k|                                                                            \
  |  |  127|  7.71k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 6.76k, False: 949]
  |  |  ------------------
  |  |  128|  7.71k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  6.76k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  7.71k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  7.71k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  7.71k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  7.71k|}                                                                           \
  ------------------
  |  Branch (1551:16): [True: 7.71k, False: 5.68k]
  ------------------
 1552|  13.3k|            *pu4_bitstrm_ofst = u4_bitstream_offset;
 1553|  13.3k|            u4_temp = ((1 << u4_ldz) + u4_word - 1);
 1554|  13.3k|            if(u4_temp > 25) return ERROR_MB_TYPE;
  ------------------
  |  Branch (1554:16): [True: 178, False: 13.2k]
  ------------------
 1555|  13.2k|            u1_mb_type = u4_temp;
 1556|       |
 1557|       |            /* Inlined ih264d_uev */
 1558|  13.2k|            ps_cur_mb_info->u1_mb_type = u1_mb_type;
 1559|  13.2k|            COPYTHECONTEXT("u1_mb_type", u1_mb_type);
 1560|       |
 1561|  13.2k|            ps_cur_deblk_mb->u1_mb_type = ps_cur_deblk_mb->u1_mb_type | D_INTRA_MB;
  ------------------
  |  |  382|  13.2k|#define D_INTRA_MB        1
  ------------------
 1562|  13.2k|        }
 1563|  42.8k|        else
 1564|  42.8k|        {
 1565|  42.8k|            ps_cur_mb_info->u1_mb_type = MB_INFER;
  ------------------
  |  |  112|  42.8k|#define MB_INFER 250
  ------------------
 1566|  42.8k|            ps_cur_deblk_mb->u1_mb_type = ps_cur_deblk_mb->u1_mb_type | D_INTRA_IBL;
  ------------------
  |  |   72|  42.8k|#define D_INTRA_IBL 16
  ------------------
 1567|  42.8k|        }
 1568|       |
 1569|       |        /**************************************************************/
 1570|       |        /* Parse Macroblock data                                      */
 1571|       |        /**************************************************************/
 1572|  56.0k|        u1_mb_type = ps_cur_mb_info->u1_mb_type;
 1573|  56.0k|        if(25 == u1_mb_type)
  ------------------
  |  Branch (1573:12): [True: 71, False: 55.9k]
  ------------------
 1574|     71|        {
 1575|       |            /* I_PCM_MB */
 1576|     71|            ps_cur_mb_info->ps_curmb->u1_mb_type = I_PCM_MB;
  ------------------
  |  |  423|     71|#define I_PCM_MB    6
  ------------------
 1577|     71|            ret = ih264d_parse_ipcm_mb(ps_dec, ps_cur_mb_info, u1_num_mbs);
 1578|     71|            if(ret != OK) return ret;
  ------------------
  |  |  114|     71|#define OK        0
  ------------------
  |  Branch (1578:16): [True: 0, False: 71]
  ------------------
 1579|     71|            ps_cur_deblk_mb->u1_mb_qp = 0;
 1580|     71|        }
 1581|  55.9k|        else
 1582|  55.9k|        {
 1583|  55.9k|            ret = isvcd_parse_imb_cavlc(ps_svc_lyr_dec, ps_cur_mb_info, ps_svc_cur_mb_info,
 1584|  55.9k|                                        u1_num_mbs, u1_mb_type);
 1585|  55.9k|            if(ret != OK) return ret;
  ------------------
  |  |  114|  55.9k|#define OK        0
  ------------------
  |  Branch (1585:16): [True: 1.70k, False: 54.2k]
  ------------------
 1586|  54.2k|            ps_cur_deblk_mb->u1_mb_qp = ps_dec->u1_qp;
 1587|  54.2k|        }
 1588|       |
 1589|  54.3k|        if(ps_dec->u1_enable_mb_info)
  ------------------
  |  Branch (1589:12): [True: 0, False: 54.3k]
  ------------------
 1590|      0|        {
 1591|      0|            ih264d_populate_mb_info_map(ps_dec, ps_cur_mb_info, ps_cur_mb_info->u2_mbx << 1,
 1592|      0|                                        ps_cur_mb_info->u2_mby << 1, ps_cur_deblk_mb->u1_mb_qp);
 1593|      0|        }
 1594|  54.3k|        uc_more_data_flag = MORE_RBSP_DATA(ps_bitstrm);
  ------------------
  |  |   97|  54.3k|    CHECK_BITS_SUFFICIENT(ps_bitstrm, 1)
  |  |  ------------------
  |  |  |  |   95|  54.3k|  (ps_bitstrm->u4_ofst + bits_to_read <= ps_bitstrm->u4_max_ofst)
  |  |  ------------------
  ------------------
 1595|       |
 1596|  54.3k|        if(u1_mbaff)
  ------------------
  |  Branch (1596:12): [True: 0, False: 54.3k]
  ------------------
 1597|      0|        {
 1598|      0|            ih264d_update_mbaff_left_nnz(ps_dec, ps_cur_mb_info);
 1599|      0|            if(!uc_more_data_flag && (0 == (i2_cur_mb_addr & 1)))
  ------------------
  |  Branch (1599:16): [True: 0, False: 0]
  |  Branch (1599:38): [True: 0, False: 0]
  ------------------
 1600|      0|            {
 1601|      0|                return ERROR_EOB_FLUSHBITS_T;
 1602|      0|            }
 1603|      0|        }
 1604|       |        /**************************************************************/
 1605|       |        /* Get next Macroblock address                                */
 1606|       |        /**************************************************************/
 1607|  54.3k|        i2_cur_mb_addr++;
 1608|       |        /* Store the colocated information */
 1609|  54.3k|        {
 1610|  54.3k|            mv_pred_t *ps_mv_nmb_start = ps_dec->ps_mv_cur + (u1_num_mbs << 4);
 1611|  54.3k|            mv_pred_t s_mvPred = {{0, 0, 0, 0}, {-1, -1}, 0, 0};
 1612|  54.3k|            if(ps_mv_nmb_start)
  ------------------
  |  Branch (1612:16): [True: 54.3k, False: 0]
  ------------------
 1613|  54.3k|            {
 1614|  54.3k|                ih264d_rep_mv_colz(ps_dec, &s_mvPred, ps_mv_nmb_start, 0,
 1615|  54.3k|                                   (UWORD8) (ps_dec->u1_cur_mb_fld_dec_flag << 1), 4, 4);
 1616|  54.3k|            }
 1617|      0|            else
 1618|      0|            {
 1619|      0|                return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 1620|      0|            }
 1621|  54.3k|        }
 1622|  54.3k|        u1_num_mbs++;
 1623|       |
 1624|       |        /****************************************************************/
 1625|       |        /* Check for End Of Row                                         */
 1626|       |        /****************************************************************/
 1627|  54.3k|        u1_num_mbs_next = i2_pic_wdin_mbs - ps_dec->u2_mbx - 1;
 1628|  54.3k|        u1_end_of_row = (!u1_num_mbs_next) && (!(u1_mbaff && (u1_num_mbs & 0x01)));
  ------------------
  |  Branch (1628:25): [True: 14.8k, False: 39.4k]
  |  Branch (1628:50): [True: 0, False: 14.8k]
  |  Branch (1628:62): [True: 0, False: 0]
  ------------------
 1629|  54.3k|        u1_tfr_n_mb =
 1630|  54.3k|            (u1_num_mbs == ps_dec->u4_recon_mb_grp) || u1_end_of_row || (!uc_more_data_flag);
  ------------------
  |  Branch (1630:13): [True: 14.6k, False: 39.6k]
  |  Branch (1630:56): [True: 159, False: 39.4k]
  |  Branch (1630:73): [True: 244, False: 39.2k]
  ------------------
 1631|  54.3k|        ps_cur_mb_info->u1_end_of_slice = (!uc_more_data_flag);
 1632|       |
 1633|  54.3k|        if(u1_tfr_n_mb || (!uc_more_data_flag))
  ------------------
  |  Branch (1633:12): [True: 15.1k, False: 39.2k]
  |  Branch (1633:27): [True: 0, False: 39.2k]
  ------------------
 1634|  15.1k|        {
 1635|  15.1k|            if(ps_dec->u1_separate_parse)
  ------------------
  |  Branch (1635:16): [True: 10.6k, False: 4.49k]
  ------------------
 1636|  10.6k|            {
 1637|  10.6k|                ih264d_parse_tfr_nmb(ps_dec, u1_mb_idx, u1_num_mbs, u1_num_mbs_next, u1_tfr_n_mb,
 1638|  10.6k|                                     u1_end_of_row);
 1639|  10.6k|                ps_dec->ps_nmb_info += u1_num_mbs;
 1640|  10.6k|                ps_svc_lyr_dec->ps_svc_nmb_info += u1_num_mbs;
 1641|  10.6k|            }
 1642|  4.49k|            else
 1643|  4.49k|            {
 1644|  4.49k|                ret = isvcd_decode_recon_tfr_nmb_non_base_lyr(ps_svc_lyr_dec, u1_mb_idx, u1_num_mbs,
 1645|  4.49k|                                                              u1_num_mbs_next, u1_tfr_n_mb,
 1646|  4.49k|                                                              u1_end_of_row);
 1647|  4.49k|                if(ret != OK) return ret;
  ------------------
  |  |  114|  4.49k|#define OK        0
  ------------------
  |  Branch (1647:20): [True: 0, False: 4.49k]
  ------------------
 1648|  4.49k|            }
 1649|  15.1k|            ps_dec->u4_total_mbs_coded += u1_num_mbs;
 1650|  15.1k|            if(u1_tfr_n_mb) u1_num_mbs = 0;
  ------------------
  |  Branch (1650:16): [True: 15.1k, False: 0]
  ------------------
 1651|  15.1k|            u1_mb_idx = u1_num_mbs;
 1652|  15.1k|            ps_dec->u4_mb_idx = u1_num_mbs;
 1653|  15.1k|        }
 1654|  54.3k|    } while(uc_more_data_flag);
  ------------------
  |  Branch (1654:13): [True: 53.8k, False: 480]
  ------------------
 1655|       |
 1656|  1.15k|    ps_dec->u4_num_mbs_cur_nmb = 0;
 1657|  1.15k|    ps_dec->ps_cur_slice->u4_mbs_in_slice = i2_cur_mb_addr - (u2_first_mb_in_slice << u1_mbaff);
 1658|       |
 1659|  1.15k|    return ret;
 1660|  3.03k|}
isvcd_parse_imb_cabac:
 1677|  46.2k|{
 1678|  46.2k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
 1679|  46.2k|    WORD8 i1_delta_qp;
 1680|  46.2k|    UWORD8 u1_cbp;
 1681|  46.2k|    UWORD8 u1_offset;
 1682|       |    /* Variables for handling Cabac contexts */
 1683|  46.2k|    ctxt_inc_mb_info_t *p_curr_ctxt = ps_dec->ps_curr_ctxt_mb_info;
 1684|  46.2k|    ctxt_inc_mb_info_t *ps_left_ctxt = ps_dec->p_left_ctxt_mb_info;
 1685|  46.2k|    dec_bit_stream_t *const ps_bitstrm = ps_dec->ps_bitstrm;
 1686|  46.2k|    bin_ctxt_model_t *p_bin_ctxt;
 1687|  46.2k|    UWORD8 u1_intra_chrom_pred_mode;
 1688|  46.2k|    UWORD8 u1_dc_block_flag = 0;
 1689|  46.2k|    WORD32 ret;
 1690|       |
 1691|  46.2k|    ps_cur_mb_info->u1_yuv_dc_block_flag = 0;
 1692|       |
 1693|  46.2k|    if(ps_left_ctxt == ps_dec->ps_def_ctxt_mb_info)
  ------------------
  |  Branch (1693:8): [True: 11.3k, False: 34.8k]
  ------------------
 1694|  11.3k|    {
 1695|  11.3k|        ps_dec->pu1_left_yuv_dc_csbp[0] = 0xf;
 1696|  11.3k|    }
 1697|       |
 1698|  46.2k|    if(ps_dec->ps_cur_slice->u1_slice_type != I_SLICE)
  ------------------
  |  |  370|  46.2k|#define I_SLICE  2
  ------------------
  |  Branch (1698:8): [True: 28.0k, False: 18.2k]
  ------------------
 1699|  28.0k|    {
 1700|  28.0k|        MEMSET_16BYTES(&ps_dec->pu1_left_mv_ctxt_inc[0][0], 0);
  ------------------
  |  |  652|  28.0k|#define MEMSET_16BYTES(pu4_start,value)                         \
  |  |  653|  28.0k|{                                                               \
  |  |  654|  28.0k|    memset(pu4_start,value,16);                                 \
  |  |  655|  28.0k|}
  ------------------
 1701|  28.0k|        *((UWORD32 *) ps_dec->pi1_left_ref_idx_ctxt_inc) = 0;
 1702|  28.0k|        MEMSET_16BYTES(p_curr_ctxt->u1_mv, 0);
  ------------------
  |  |  652|  28.0k|#define MEMSET_16BYTES(pu4_start,value)                         \
  |  |  653|  28.0k|{                                                               \
  |  |  654|  28.0k|    memset(pu4_start,value,16);                                 \
  |  |  655|  28.0k|}
  ------------------
 1703|  28.0k|        memset(p_curr_ctxt->i1_ref_idx, 0, 4);
 1704|  28.0k|    }
 1705|       |
 1706|       |    /* default */
 1707|  46.2k|    ps_cur_mb_info->ps_curmb->u1_mb_type = P_MB;
  ------------------
  |  |  419|  46.2k|#define P_MB        2
  ------------------
 1708|  46.2k|    if(!ps_svc_cur_mb_info->u1_base_mode_flag)
  ------------------
  |  Branch (1708:8): [True: 9.76k, False: 36.4k]
  ------------------
 1709|  9.76k|    {
 1710|  9.76k|        if(u1_mb_type == I_4x4_MB)
  ------------------
  |  |  417|  9.76k|#define I_4x4_MB    0
  ------------------
  |  Branch (1710:12): [True: 4.72k, False: 5.03k]
  ------------------
 1711|  4.72k|        {
 1712|  4.72k|            ps_cur_mb_info->ps_curmb->u1_mb_type = I_4x4_MB;
  ------------------
  |  |  417|  4.72k|#define I_4x4_MB    0
  ------------------
 1713|  4.72k|            p_curr_ctxt->u1_mb_type = CAB_I4x4;
  ------------------
  |  |  394|  4.72k|#define CAB_I4x4          0x00 /* 0000 00x0 */
  ------------------
 1714|  4.72k|            u1_offset = 0;
 1715|  4.72k|            ps_cur_mb_info->u1_tran_form8x8 = 0;
 1716|  4.72k|            ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
 1717|       |            /*--------------------------------------------------------------------*/
 1718|       |            /* Read transform_size_8x8_flag if present                            */
 1719|       |            /*--------------------------------------------------------------------*/
 1720|  4.72k|            if(ps_dec->ps_cur_pps->i4_transform_8x8_mode_flag)
  ------------------
  |  Branch (1720:16): [True: 3.88k, False: 836]
  ------------------
 1721|  3.88k|            {
 1722|  3.88k|                ps_cur_mb_info->u1_tran_form8x8 =
 1723|  3.88k|                    ih264d_parse_transform8x8flag_cabac(ps_dec, ps_cur_mb_info);
 1724|  3.88k|                COPYTHECONTEXT("transform_size_8x8_flag", ps_cur_mb_info->u1_tran_form8x8);
 1725|  3.88k|                p_curr_ctxt->u1_transform8x8_ctxt = ps_cur_mb_info->u1_tran_form8x8;
 1726|  3.88k|                ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = ps_cur_mb_info->u1_tran_form8x8;
 1727|  3.88k|            }
 1728|    836|            else
 1729|    836|            {
 1730|    836|                p_curr_ctxt->u1_transform8x8_ctxt = 0;
 1731|    836|            }
 1732|       |
 1733|       |            /*--------------------------------------------------------------------*/
 1734|       |            /* Read the IntraPrediction modes for LUMA                            */
 1735|       |            /*--------------------------------------------------------------------*/
 1736|  4.72k|            if(!ps_cur_mb_info->u1_tran_form8x8)
  ------------------
  |  Branch (1736:16): [True: 1.77k, False: 2.95k]
  ------------------
 1737|  1.77k|            {
 1738|  1.77k|                UWORD8 *pu1_temp;
 1739|  1.77k|                ih264d_read_intra_pred_modes_cabac(ps_dec,
 1740|  1.77k|                                                   ((UWORD8 *) ps_dec->pv_parse_tu_coeff_data),
 1741|  1.77k|                                                   ((UWORD8 *) ps_dec->pv_parse_tu_coeff_data + 16),
 1742|  1.77k|                                                   ps_cur_mb_info->u1_tran_form8x8);
 1743|  1.77k|                pu1_temp = (UWORD8 *) ps_dec->pv_parse_tu_coeff_data;
 1744|  1.77k|                pu1_temp += 32;
 1745|  1.77k|                ps_dec->pv_parse_tu_coeff_data = (void *) pu1_temp;
 1746|  1.77k|            }
 1747|  2.95k|            else
 1748|  2.95k|            {
 1749|  2.95k|                UWORD8 *pu1_temp;
 1750|  2.95k|                ih264d_read_intra_pred_modes_cabac(ps_dec,
 1751|  2.95k|                                                   ((UWORD8 *) ps_dec->pv_parse_tu_coeff_data),
 1752|  2.95k|                                                   ((UWORD8 *) ps_dec->pv_parse_tu_coeff_data + 4),
 1753|  2.95k|                                                   ps_cur_mb_info->u1_tran_form8x8);
 1754|  2.95k|                pu1_temp = (UWORD8 *) ps_dec->pv_parse_tu_coeff_data;
 1755|  2.95k|                pu1_temp += 8;
 1756|  2.95k|                ps_dec->pv_parse_tu_coeff_data = (void *) pu1_temp;
 1757|  2.95k|            }
 1758|       |            /*--------------------------------------------------------------------*/
 1759|       |            /* Read the IntraPrediction mode for CHROMA                           */
 1760|       |            /*--------------------------------------------------------------------*/
 1761|  4.72k|            u1_intra_chrom_pred_mode = ih264d_parse_chroma_pred_mode_cabac(ps_dec);
 1762|  4.72k|            COPYTHECONTEXT("intra_chroma_pred_mode", u1_intra_chrom_pred_mode);
 1763|  4.72k|            p_curr_ctxt->u1_intra_chroma_pred_mode = ps_cur_mb_info->u1_chroma_pred_mode =
 1764|  4.72k|                u1_intra_chrom_pred_mode;
 1765|       |
 1766|       |            /*--------------------------------------------------------------------*/
 1767|       |            /* Read the Coded block pattern                                       */
 1768|       |            /*--------------------------------------------------------------------*/
 1769|  4.72k|            u1_cbp = ih264d_parse_ctx_cbp_cabac(ps_dec);
 1770|  4.72k|            COPYTHECONTEXT("coded_block_pattern", u1_cbp);
 1771|  4.72k|            ps_cur_mb_info->u1_cbp = u1_cbp;
 1772|  4.72k|            p_curr_ctxt->u1_cbp = u1_cbp;
 1773|       |
 1774|       |            /*--------------------------------------------------------------------*/
 1775|       |            /* Read mb_qp_delta                                                   */
 1776|       |            /*--------------------------------------------------------------------*/
 1777|  4.72k|            if(ps_cur_mb_info->u1_cbp)
  ------------------
  |  Branch (1777:16): [True: 3.59k, False: 1.13k]
  ------------------
 1778|  3.59k|            {
 1779|  3.59k|                ret = ih264d_parse_mb_qp_delta_cabac(ps_dec, &i1_delta_qp);
 1780|  3.59k|                if(ret != OK) return ret;
  ------------------
  |  |  114|  3.59k|#define OK        0
  ------------------
  |  Branch (1780:20): [True: 69, False: 3.52k]
  ------------------
 1781|  3.52k|                COPYTHECONTEXT("mb_qp_delta", i1_delta_qp);
 1782|  3.52k|                if(i1_delta_qp != 0)
  ------------------
  |  Branch (1782:20): [True: 855, False: 2.67k]
  ------------------
 1783|    855|                {
 1784|    855|                    ret = ih264d_update_qp(ps_dec, i1_delta_qp);
 1785|    855|                    if(ret != OK) return ret;
  ------------------
  |  |  114|    855|#define OK        0
  ------------------
  |  Branch (1785:24): [True: 0, False: 855]
  ------------------
 1786|    855|                }
 1787|  3.52k|            }
 1788|  1.13k|            else
 1789|  1.13k|                ps_dec->i1_prev_mb_qp_delta = 0;
 1790|  4.65k|            p_curr_ctxt->u1_yuv_dc_csbp &= 0xFE;
 1791|  4.65k|        }
 1792|  5.03k|        else
 1793|  5.03k|        {
 1794|  5.03k|            u1_offset = 1;
 1795|  5.03k|            ps_cur_mb_info->ps_curmb->u1_mb_type = I_16x16_MB;
  ------------------
  |  |  418|  5.03k|#define I_16x16_MB  1
  ------------------
 1796|  5.03k|            p_curr_ctxt->u1_mb_type = CAB_I16x16;
  ------------------
  |  |  395|  5.03k|#define CAB_I16x16        0x01 /* 0000 00x1 */
  ------------------
 1797|  5.03k|            ps_cur_mb_info->u1_tran_form8x8 = 0;
 1798|  5.03k|            p_curr_ctxt->u1_transform8x8_ctxt = 0;
 1799|  5.03k|            ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
 1800|       |            /*--------------------------------------------------------------------*/
 1801|       |            /* Read the IntraPrediction mode for CHROMA                           */
 1802|       |            /*--------------------------------------------------------------------*/
 1803|  5.03k|            u1_intra_chrom_pred_mode = ih264d_parse_chroma_pred_mode_cabac(ps_dec);
 1804|  5.03k|            if(u1_intra_chrom_pred_mode > 3) return ERROR_CHROMA_PRED_MODE;
  ------------------
  |  Branch (1804:16): [True: 0, False: 5.03k]
  ------------------
 1805|       |
 1806|  5.03k|            COPYTHECONTEXT("Chroma intra_chroma_pred_mode pred mode", u1_intra_chrom_pred_mode);
 1807|  5.03k|            p_curr_ctxt->u1_intra_chroma_pred_mode = ps_cur_mb_info->u1_chroma_pred_mode =
 1808|  5.03k|                u1_intra_chrom_pred_mode;
 1809|       |
 1810|       |            /*--------------------------------------------------------------------*/
 1811|       |            /* Read the Coded block pattern                                       */
 1812|       |            /*--------------------------------------------------------------------*/
 1813|  5.03k|            u1_cbp = gau1_ih264d_cbp_tab[(u1_mb_type - 1) >> 2];
 1814|  5.03k|            ps_cur_mb_info->u1_cbp = u1_cbp;
 1815|  5.03k|            p_curr_ctxt->u1_cbp = u1_cbp;
 1816|       |
 1817|       |            /*--------------------------------------------------------------------*/
 1818|       |            /* Read mb_qp_delta                                                   */
 1819|       |            /*--------------------------------------------------------------------*/
 1820|  5.03k|            ret = ih264d_parse_mb_qp_delta_cabac(ps_dec, &i1_delta_qp);
 1821|  5.03k|            if(ret != OK) return ret;
  ------------------
  |  |  114|  5.03k|#define OK        0
  ------------------
  |  Branch (1821:16): [True: 68, False: 4.96k]
  ------------------
 1822|  4.96k|            COPYTHECONTEXT("mb_qp_delta", i1_delta_qp);
 1823|  4.96k|            if(i1_delta_qp != 0)
  ------------------
  |  Branch (1823:16): [True: 1.12k, False: 3.83k]
  ------------------
 1824|  1.12k|            {
 1825|  1.12k|                ret = ih264d_update_qp(ps_dec, i1_delta_qp);
 1826|  1.12k|                if(ret != OK) return ret;
  ------------------
  |  |  114|  1.12k|#define OK        0
  ------------------
  |  Branch (1826:20): [True: 0, False: 1.12k]
  ------------------
 1827|  1.12k|            }
 1828|       |
 1829|  4.96k|            {
 1830|  4.96k|                WORD16 i_scaleFactor;
 1831|  4.96k|                WORD16 *pi2_scale_matrix_ptr;
 1832|       |                /*******************************************************************/
 1833|       |                /* for luma DC coefficients the scaling is done during the parsing */
 1834|       |                /* to preserve the precision                                       */
 1835|       |                /*******************************************************************/
 1836|  4.96k|                if(ps_dec->s_high_profile.u1_scaling_present)
  ------------------
  |  Branch (1836:20): [True: 1.16k, False: 3.80k]
  ------------------
 1837|  1.16k|                {
 1838|  1.16k|                    pi2_scale_matrix_ptr = ps_dec->s_high_profile.i2_scalinglist4x4[0];
 1839|  1.16k|                }
 1840|  3.80k|                else
 1841|  3.80k|                {
 1842|  3.80k|                    i_scaleFactor = 16;
 1843|  3.80k|                    pi2_scale_matrix_ptr = &i_scaleFactor;
 1844|  3.80k|                }
 1845|  4.96k|                {
 1846|  4.96k|                    ctxt_inc_mb_info_t *ps_top_ctxt = ps_dec->p_top_ctxt_mb_info;
 1847|  4.96k|                    UWORD8 uc_a, uc_b;
 1848|  4.96k|                    UWORD32 u4_ctx_inc;
 1849|  4.96k|                    INC_SYM_COUNT(&(ps_dec->s_cab_dec_env));
 1850|       |
 1851|       |                    /* if MbAddrN not available then CondTermN = 1 */
 1852|  4.96k|                    uc_b = ((ps_top_ctxt->u1_yuv_dc_csbp) & 0x01);
 1853|       |
 1854|       |                    /* if MbAddrN not available then CondTermN = 1 */
 1855|  4.96k|                    uc_a = ((ps_dec->pu1_left_yuv_dc_csbp[0]) & 0x01);
 1856|       |
 1857|  4.96k|                    u4_ctx_inc = (uc_a + (uc_b << 1));
 1858|  4.96k|                    {
 1859|  4.96k|                        WORD16 pi2_dc_coef[16] = {0};
 1860|  4.96k|                        tu_sblk4x4_coeff_data_t *ps_tu_4x4 =
 1861|  4.96k|                            (tu_sblk4x4_coeff_data_t *) ps_dec->pv_parse_tu_coeff_data;
 1862|  4.96k|                        WORD16 *pi2_coeff_block = (WORD16 *) ps_dec->pv_parse_tu_coeff_data;
 1863|       |
 1864|  4.96k|                        p_bin_ctxt = (ps_dec->p_cbf_t[LUMA_DC_CTXCAT]) + u4_ctx_inc;
  ------------------
  |  |   71|  4.96k|#define LUMA_DC_CTXCAT    0
  ------------------
 1865|       |
 1866|  4.96k|                        u1_dc_block_flag = ih264d_read_coeff4x4_cabac(
 1867|  4.96k|                            ps_bitstrm, LUMA_DC_CTXCAT,
  ------------------
  |  |   71|  4.96k|#define LUMA_DC_CTXCAT    0
  ------------------
 1868|  4.96k|                            ps_dec->p_significant_coeff_flag_t[LUMA_DC_CTXCAT], ps_dec, p_bin_ctxt);
  ------------------
  |  |   71|  4.96k|#define LUMA_DC_CTXCAT    0
  ------------------
 1869|       |
 1870|       |                        /* Store coded_block_flag */
 1871|  4.96k|                        p_curr_ctxt->u1_yuv_dc_csbp &= 0xFE;
 1872|  4.96k|                        p_curr_ctxt->u1_yuv_dc_csbp |= u1_dc_block_flag;
 1873|  4.96k|                        if(u1_dc_block_flag)
  ------------------
  |  Branch (1873:28): [True: 3.30k, False: 1.65k]
  ------------------
 1874|  3.30k|                        {
 1875|  3.30k|                            WORD32 pi4_tmp[16] = {0};
 1876|  3.30k|                            memset(pi2_dc_coef, 0, sizeof(pi2_dc_coef));
 1877|  3.30k|                            ih264d_unpack_coeff4x4_dc_4x4blk(ps_tu_4x4, pi2_dc_coef,
 1878|  3.30k|                                                             ps_dec->pu1_inv_scan);
 1879|       |
 1880|  3.30k|                            PROFILE_DISABLE_IQ_IT_RECON()
  ------------------
  |  |   98|  3.30k|#define PROFILE_DISABLE_IQ_IT_RECON() ;
  ------------------
 1881|  3.30k|                            ps_dec->pf_ihadamard_scaling_4x4(
 1882|  3.30k|                                pi2_dc_coef, pi2_coeff_block, ps_dec->pu2_quant_scale_y,
 1883|  3.30k|                                (UWORD16 *) pi2_scale_matrix_ptr, ps_dec->u1_qp_y_div6, pi4_tmp);
 1884|  3.30k|                            pi2_coeff_block += 16;
 1885|  3.30k|                            ps_dec->pv_parse_tu_coeff_data = (void *) pi2_coeff_block;
 1886|  3.30k|                            SET_BIT(ps_cur_mb_info->u1_yuv_dc_block_flag, 0);
  ------------------
  |  |  106|  3.30k|#define SET_BIT(x, pos) (x) = (x) | (1 << pos);
  ------------------
 1887|  3.30k|                        }
 1888|  4.96k|                    }
 1889|  4.96k|                }
 1890|  4.96k|            }
 1891|  4.96k|        }
 1892|  9.62k|        ps_dec->pu1_left_yuv_dc_csbp[0] &= 0x6;
 1893|  9.62k|        ps_dec->pu1_left_yuv_dc_csbp[0] |= u1_dc_block_flag;
 1894|  9.62k|    }
 1895|  36.4k|    else
 1896|  36.4k|    {
 1897|  36.4k|        u1_offset = 0;
 1898|       |        /*--------------------------------------------------------------------*/
 1899|       |        /* Read the Coded block pattern                                       */
 1900|       |        /*--------------------------------------------------------------------*/
 1901|  36.4k|        u1_cbp = ih264d_parse_ctx_cbp_cabac(ps_dec);
 1902|  36.4k|        COPYTHECONTEXT("coded_block_pattern", u1_cbp);
 1903|       |
 1904|  36.4k|        ps_cur_mb_info->u1_cbp = u1_cbp;
 1905|  36.4k|        p_curr_ctxt->u1_cbp = u1_cbp;
 1906|  36.4k|        p_curr_ctxt->u1_yuv_dc_csbp &= 0xFE;
 1907|  36.4k|        ps_dec->pu1_left_yuv_dc_csbp[0] &= 0x6;
 1908|  36.4k|        p_curr_ctxt->u1_mb_type = CAB_INFERRED;
  ------------------
  |  |   74|  36.4k|#define CAB_INFERRED 0xFF
  ------------------
 1909|  36.4k|        p_curr_ctxt->u1_intra_chroma_pred_mode = 0;
 1910|  36.4k|        ps_cur_mb_info->u1_tran_form8x8 = 0;
 1911|  36.4k|        ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
 1912|       |
 1913|       |        /*--------------------------------------------------------------------*/
 1914|       |        /* Read transform_size_8x8_flag if present                            */
 1915|       |        /*--------------------------------------------------------------------*/
 1916|  36.4k|        if((ps_dec->ps_cur_pps->i4_transform_8x8_mode_flag) && (u1_cbp & 0xf))
  ------------------
  |  Branch (1916:12): [True: 19.2k, False: 17.2k]
  |  Branch (1916:64): [True: 11.9k, False: 7.32k]
  ------------------
 1917|  11.9k|        {
 1918|  11.9k|            ps_cur_mb_info->u1_tran_form8x8 =
 1919|  11.9k|                ih264d_parse_transform8x8flag_cabac(ps_dec, ps_cur_mb_info);
 1920|  11.9k|            COPYTHECONTEXT("transform_size_8x8_flag", ps_cur_mb_info->u1_tran_form8x8);
 1921|  11.9k|            p_curr_ctxt->u1_transform8x8_ctxt = ps_cur_mb_info->u1_tran_form8x8;
 1922|  11.9k|            ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = ps_cur_mb_info->u1_tran_form8x8;
 1923|  11.9k|        }
 1924|  24.5k|        else
 1925|  24.5k|        {
 1926|  24.5k|            p_curr_ctxt->u1_transform8x8_ctxt = 0;
 1927|  24.5k|        }
 1928|       |
 1929|       |        /*--------------------------------------------------------------------*/
 1930|       |        /* Read mb_qp_delta                                                   */
 1931|       |        /*--------------------------------------------------------------------*/
 1932|  36.4k|        if(ps_cur_mb_info->u1_cbp)
  ------------------
  |  Branch (1932:12): [True: 25.0k, False: 11.4k]
  ------------------
 1933|  25.0k|        {
 1934|  25.0k|            ret = ih264d_parse_mb_qp_delta_cabac(ps_dec, &i1_delta_qp);
 1935|  25.0k|            if(ret != OK) return ret;
  ------------------
  |  |  114|  25.0k|#define OK        0
  ------------------
  |  Branch (1935:16): [True: 71, False: 24.9k]
  ------------------
 1936|  24.9k|            COPYTHECONTEXT("mb_qp_delta", i1_delta_qp);
 1937|  24.9k|            if(i1_delta_qp != 0)
  ------------------
  |  Branch (1937:16): [True: 4.34k, False: 20.5k]
  ------------------
 1938|  4.34k|            {
 1939|  4.34k|                ret = ih264d_update_qp(ps_dec, i1_delta_qp);
 1940|  4.34k|                if(ret != OK) return ret;
  ------------------
  |  |  114|  4.34k|#define OK        0
  ------------------
  |  Branch (1940:20): [True: 0, False: 4.34k]
  ------------------
 1941|  4.34k|            }
 1942|  24.9k|        }
 1943|  11.4k|        else
 1944|  11.4k|            ps_dec->i1_prev_mb_qp_delta = 0;
 1945|  36.4k|    }
 1946|       |
 1947|  46.0k|    ih264d_parse_residual4x4_cabac(ps_dec, ps_cur_mb_info, u1_offset);
 1948|  46.0k|    if(EXCEED_OFFSET(ps_bitstrm)) return ERROR_EOB_TERMINATE_T;
  ------------------
  |  |   93|  46.0k|  (ps_bitstrm->u4_ofst > ps_bitstrm->u4_max_ofst)
  |  |  ------------------
  |  |  |  Branch (93:3): [True: 1.60k, False: 44.4k]
  |  |  ------------------
  ------------------
 1949|  44.4k|    return OK;
  ------------------
  |  |  114|  44.4k|#define OK        0
  ------------------
 1950|  46.0k|}
isvcd_parse_islice:
 1965|  7.35k|{
 1966|  7.35k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
 1967|  7.35k|    dec_pic_params_t *ps_pps = ps_dec->ps_cur_pps;
 1968|  7.35k|    dec_slice_params_t *ps_slice = ps_dec->ps_cur_slice;
 1969|  7.35k|    UWORD32 *pu4_bitstrm_buf = ps_dec->ps_bitstrm->pu4_buffer;
 1970|  7.35k|    UWORD32 *pu4_bitstrm_ofst = &ps_dec->ps_bitstrm->u4_ofst;
 1971|  7.35k|    UWORD32 u4_temp;
 1972|  7.35k|    WORD32 i_temp;
 1973|  7.35k|    WORD32 ret;
 1974|       |
 1975|       |    /*--------------------------------------------------------------------*/
 1976|       |    /* Read remaining contents of the slice header                        */
 1977|       |    /*--------------------------------------------------------------------*/
 1978|       |    /* dec_ref_pic_marking function */
 1979|       |    /* G050 */
 1980|  7.35k|    if(ps_slice->u1_nal_ref_idc != 0)
  ------------------
  |  Branch (1980:8): [True: 6.85k, False: 499]
  ------------------
 1981|  6.85k|    {
 1982|  6.85k|        if(!ps_dec->ps_dpb_cmds->u1_dpb_commands_read)
  ------------------
  |  Branch (1982:12): [True: 5.92k, False: 929]
  ------------------
 1983|  5.92k|        {
 1984|  5.92k|            dec_pic_params_t *ps_pps = ps_dec->ps_cur_pps;
 1985|  5.92k|            dec_seq_params_t *ps_sps_tmp = ps_pps->ps_sps;
 1986|  5.92k|            UWORD8 u1_nal_unit_type_tmp = ps_dec->u1_nal_unit_type;
 1987|       |
 1988|  5.92k|            ps_pps->ps_sps = ps_dec->ps_cur_sps;
 1989|  5.92k|            if(ps_svc_lyr_dec->ps_nal_svc_ext->u1_idr_flag)
  ------------------
  |  Branch (1989:16): [True: 5.61k, False: 317]
  ------------------
 1990|  5.61k|                ps_dec->u1_nal_unit_type = IDR_SLICE_NAL;
  ------------------
  |  |  328|  5.61k|#define IDR_SLICE_NAL                   5
  ------------------
 1991|       |
 1992|  5.92k|            i_temp = ih264d_read_mmco_commands(ps_dec);
 1993|  5.92k|            ps_pps->ps_sps = ps_sps_tmp;
 1994|  5.92k|            ps_dec->u1_nal_unit_type = u1_nal_unit_type_tmp;
 1995|       |
 1996|  5.92k|            if(i_temp < 0)
  ------------------
  |  Branch (1996:16): [True: 154, False: 5.77k]
  ------------------
 1997|    154|            {
 1998|    154|                return ERROR_DBP_MANAGER_T;
 1999|    154|            }
 2000|  5.77k|            ps_dec->u4_bitoffset = i_temp;
 2001|  5.77k|        }
 2002|    929|        else
 2003|    929|            ps_dec->ps_bitstrm->u4_ofst += ps_dec->u4_bitoffset;
 2004|  6.85k|    }
 2005|       |
 2006|  7.20k|    {
 2007|       |        /* G050 */
 2008|       |        /* Read slice_qp_delta */
 2009|  7.20k|        WORD64 i8_temp =
 2010|  7.20k|            (WORD64) ps_pps->u1_pic_init_qp + ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 2011|  7.20k|        if((i8_temp < MIN_H264_QP) || (i8_temp > MAX_H264_QP)) return ERROR_INV_RANGE_QP_T;
  ------------------
  |  |  629|  7.20k|#define MIN_H264_QP 0
  ------------------
                      if((i8_temp < MIN_H264_QP) || (i8_temp > MAX_H264_QP)) return ERROR_INV_RANGE_QP_T;
  ------------------
  |  |  634|  7.00k|#define MAX_H264_QP 51
  ------------------
  |  Branch (2011:12): [True: 203, False: 7.00k]
  |  Branch (2011:39): [True: 468, False: 6.53k]
  ------------------
 2012|  6.53k|        ps_slice->u1_slice_qp = (UWORD8) i8_temp;
 2013|  6.53k|        COPYTHECONTEXT("SH: slice_qp_delta", ps_slice->u1_slice_qp - ps_pps->u1_pic_init_qp);
 2014|  6.53k|    }
 2015|  6.53k|    if(ps_pps->u1_deblocking_filter_parameters_present_flag == 1)
  ------------------
  |  Branch (2015:8): [True: 3.47k, False: 3.06k]
  ------------------
 2016|  3.47k|    {
 2017|  3.47k|        u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 2018|  3.47k|        COPYTHECONTEXT("SH: disable_deblocking_filter_idc", u4_temp);
 2019|       |
 2020|  3.47k|        if(u4_temp > SLICE_BOUNDARY_DBLK_DISABLED)
  ------------------
  |  |  547|  3.47k|#define SLICE_BOUNDARY_DBLK_DISABLED  2
  ------------------
  |  Branch (2020:12): [True: 199, False: 3.27k]
  ------------------
 2021|    199|        {
 2022|    199|            return ERROR_INV_SLICE_HDR_T;
 2023|    199|        }
 2024|  3.27k|        ps_slice->u1_disable_dblk_filter_idc = u4_temp;
 2025|  3.27k|        if(u4_temp != 1)
  ------------------
  |  Branch (2025:12): [True: 3.00k, False: 269]
  ------------------
 2026|  3.00k|        {
 2027|  3.00k|            i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf) << 1;
 2028|  3.00k|            if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
  ------------------
  |  |  550|  3.00k|#define MIN_DBLK_FIL_OFF              -12
  ------------------
                          if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
  ------------------
  |  |  551|  2.84k|#define MAX_DBLK_FIL_OFF              12
  ------------------
  |  Branch (2028:16): [True: 164, False: 2.84k]
  |  Branch (2028:47): [True: 92, False: 2.74k]
  ------------------
 2029|    256|            {
 2030|    256|                return ERROR_INV_SLICE_HDR_T;
 2031|    256|            }
 2032|  2.74k|            ps_slice->i1_slice_alpha_c0_offset = i_temp;
 2033|  2.74k|            COPYTHECONTEXT("SH: slice_alpha_c0_offset_div2",
 2034|  2.74k|                           ps_slice->i1_slice_alpha_c0_offset >> 1);
 2035|       |
 2036|  2.74k|            i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf) << 1;
 2037|  2.74k|            if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
  ------------------
  |  |  550|  2.74k|#define MIN_DBLK_FIL_OFF              -12
  ------------------
                          if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
  ------------------
  |  |  551|  2.58k|#define MAX_DBLK_FIL_OFF              12
  ------------------
  |  Branch (2037:16): [True: 163, False: 2.58k]
  |  Branch (2037:47): [True: 114, False: 2.47k]
  ------------------
 2038|    277|            {
 2039|    277|                return ERROR_INV_SLICE_HDR_T;
 2040|    277|            }
 2041|  2.47k|            ps_slice->i1_slice_beta_offset = i_temp;
 2042|  2.47k|            COPYTHECONTEXT("SH: slice_beta_offset_div2", ps_slice->i1_slice_beta_offset >> 1);
 2043|  2.47k|        }
 2044|    269|        else
 2045|    269|        {
 2046|    269|            ps_slice->i1_slice_alpha_c0_offset = 0;
 2047|    269|            ps_slice->i1_slice_beta_offset = 0;
 2048|    269|        }
 2049|  3.27k|    }
 2050|  3.06k|    else
 2051|  3.06k|    {
 2052|  3.06k|        ps_slice->u1_disable_dblk_filter_idc = 0;
 2053|  3.06k|        ps_slice->i1_slice_alpha_c0_offset = 0;
 2054|  3.06k|        ps_slice->i1_slice_beta_offset = 0;
 2055|  3.06k|    }
 2056|       |
 2057|       |    /* Initialization to check if number of motion vector per 2 Mbs */
 2058|       |    /* are exceeding the range or not */
 2059|  5.80k|    ps_dec->u2_mv_2mb[0] = 0;
 2060|  5.80k|    ps_dec->u2_mv_2mb[1] = 0;
 2061|       |
 2062|       |    /*set slice header cone to 2 ,to indicate  correct header*/
 2063|  5.80k|    ps_dec->u1_slice_header_done = 2;
 2064|  5.80k|    if(ps_pps->u1_entropy_coding_mode)
  ------------------
  |  Branch (2064:8): [True: 3.67k, False: 2.12k]
  ------------------
 2065|  3.67k|    {
 2066|  3.67k|        SWITCHOFFTRACE;
 2067|  3.67k|        SWITCHONTRACECABAC;
 2068|  3.67k|        if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag)
  ------------------
  |  Branch (2068:12): [True: 0, False: 3.67k]
  ------------------
 2069|      0|        {
 2070|      0|            ps_dec->pf_get_mb_info = ih264d_get_mb_info_cabac_mbaff;
 2071|      0|        }
 2072|  3.67k|        else
 2073|  3.67k|            ps_dec->pf_get_mb_info = isvcd_get_mb_info_cabac_nonmbaff;
 2074|       |
 2075|  3.67k|        ret = isvcd_parse_islice_data_cabac(ps_svc_lyr_dec, ps_slice, u2_first_mb_in_slice);
 2076|  3.67k|        if(ret != OK) return ret;
  ------------------
  |  |  114|  3.67k|#define OK        0
  ------------------
  |  Branch (2076:12): [True: 2.95k, False: 722]
  ------------------
 2077|    722|        SWITCHONTRACE;
 2078|    722|        SWITCHOFFTRACECABAC;
 2079|    722|    }
 2080|  2.12k|    else
 2081|  2.12k|    {
 2082|  2.12k|        if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag)
  ------------------
  |  Branch (2082:12): [True: 0, False: 2.12k]
  ------------------
 2083|      0|        {
 2084|      0|            ps_dec->pf_get_mb_info = ih264d_get_mb_info_cavlc_mbaff;
 2085|      0|        }
 2086|  2.12k|        else
 2087|  2.12k|            ps_dec->pf_get_mb_info = isvcd_get_mb_info_cavlc_nonmbaff;
 2088|  2.12k|        ret = isvcd_parse_islice_data_cavlc(ps_svc_lyr_dec, ps_slice, u2_first_mb_in_slice);
 2089|  2.12k|        if(ret != OK) return ret;
  ------------------
  |  |  114|  2.12k|#define OK        0
  ------------------
  |  Branch (2089:12): [True: 1.84k, False: 274]
  ------------------
 2090|  2.12k|    }
 2091|       |
 2092|    996|    return OK;
  ------------------
  |  |  114|    996|#define OK        0
  ------------------
 2093|  5.80k|}

isvcd_parse_epslice:
   99|  15.0k|{
  100|  15.0k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
  101|  15.0k|    WORD32 i_status = OK;
  ------------------
  |  |  114|  15.0k|#define OK        0
  ------------------
  102|  15.0k|    dec_pic_params_t *ps_pps = ps_dec->ps_cur_pps;
  103|  15.0k|    dec_slice_params_t *ps_cur_slice = ps_dec->ps_cur_slice;
  104|  15.0k|    dec_bit_stream_t *ps_bitstrm = ps_dec->ps_bitstrm;
  105|  15.0k|    dec_seq_params_t *ps_seq;
  106|  15.0k|    dec_svc_seq_params_t *ps_subset_seq;
  107|  15.0k|    dec_slice_svc_ext_params_t *ps_svc_slice_params = NULL;
  108|  15.0k|    dec_subset_seq_params_t *ps_sps_svc_ext = NULL;
  109|  15.0k|    dec_nal_unit_svc_ext_params_t *ps_nal_svc_ext = NULL;
  110|       |
  111|  15.0k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
  112|  15.0k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
  113|  15.0k|    UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
  114|  15.0k|    UWORD8 u1_field_pic_flag = ps_cur_slice->u1_field_pic_flag;
  115|       |
  116|  15.0k|    UWORD64 u8_ref_idx_l0;
  117|  15.0k|    UWORD32 u4_temp;
  118|  15.0k|    WORD32 i_temp;
  119|  15.0k|    WORD32 ret;
  120|  15.0k|    WORD64 i8_temp;
  121|       |
  122|  15.0k|    ps_nal_svc_ext = ps_svc_lyr_dec->ps_nal_svc_ext;
  123|  15.0k|    ps_seq = ps_pps->ps_sps;
  124|  15.0k|    ps_seq += MAX_NUM_SEQ_PARAMS;
  ------------------
  |  |  521|  15.0k|#define MAX_NUM_SEQ_PARAMS 32
  ------------------
  125|  15.0k|    ps_subset_seq =
  126|  15.0k|        &ps_svc_lyr_dec->ps_subset_sps[MAX_NUM_SEQ_PARAMS + ps_seq->u1_seq_parameter_set_id];
  ------------------
  |  |  521|  15.0k|#define MAX_NUM_SEQ_PARAMS 32
  ------------------
  127|  15.0k|    ps_sps_svc_ext = &ps_subset_seq->s_sps_svc_ext;
  128|  15.0k|    ps_svc_slice_params = &ps_svc_lyr_dec->s_svc_slice_params;
  129|       |
  130|       |    /*--------------------------------------------------------------------*/
  131|       |    /* Read remaining contents of the slice header                        */
  132|       |    /*--------------------------------------------------------------------*/
  133|  15.0k|    {
  134|  15.0k|        WORD8 *pi1_buf;
  135|  15.0k|        WORD16 *pi2_mv = ps_dec->s_default_mv_pred.i2_mv;
  136|  15.0k|        WORD32 *pi4_mv = (WORD32 *) pi2_mv;
  137|  15.0k|        WORD16 *pi16_refFrame;
  138|       |
  139|  15.0k|        pi1_buf = ps_dec->s_default_mv_pred.i1_ref_frame;
  140|  15.0k|        pi16_refFrame = (WORD16 *) pi1_buf;
  141|  15.0k|        *pi4_mv = 0;
  142|  15.0k|        *(pi4_mv + 1) = 0;
  143|  15.0k|        *pi16_refFrame = OUT_OF_RANGE_REF;
  ------------------
  |  |   45|  15.0k|#define OUT_OF_RANGE_REF  -1
  ------------------
  144|  15.0k|        ps_dec->s_default_mv_pred.u1_col_ref_pic_idx = (UWORD8) -1;
  145|  15.0k|        ps_dec->s_default_mv_pred.u1_pic_type = (UWORD8) -1;
  146|  15.0k|    }
  147|       |
  148|  15.0k|    if(0 == ps_svc_lyr_dec->ps_nal_svc_ext->u1_quality_id)
  ------------------
  |  Branch (148:8): [True: 15.0k, False: 0]
  ------------------
  149|  15.0k|    {
  150|  15.0k|        ps_cur_slice->u1_num_ref_idx_active_override_flag = ih264d_get_bit_h264(ps_bitstrm);
  151|       |
  152|  15.0k|        COPYTHECONTEXT("Slice Header SVC ext: num_ref_idx_override_flag",
  153|  15.0k|                       ps_cur_slice->u1_num_ref_idx_active_override_flag);
  154|       |
  155|  15.0k|        u8_ref_idx_l0 = ps_dec->ps_cur_pps->u1_num_ref_idx_lx_active[0];
  156|  15.0k|        if(ps_cur_slice->u1_num_ref_idx_active_override_flag)
  ------------------
  |  Branch (156:12): [True: 9.36k, False: 5.66k]
  ------------------
  157|  9.36k|        {
  158|  9.36k|            u8_ref_idx_l0 = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf) + (UWORD64) 1;
  159|  9.36k|        }
  160|       |
  161|  15.0k|        {
  162|  15.0k|            UWORD8 u1_max_ref_idx = H264_MAX_REF_PICS << u1_field_pic_flag;
  ------------------
  |  |  534|  15.0k|#define H264_MAX_REF_PICS         16
  ------------------
  163|  15.0k|            if(u8_ref_idx_l0 > u1_max_ref_idx)
  ------------------
  |  Branch (163:16): [True: 189, False: 14.8k]
  ------------------
  164|    189|            {
  165|    189|                return ERROR_NUM_REF;
  166|    189|            }
  167|  14.8k|            ps_cur_slice->u1_num_ref_idx_lx_active[0] = (UWORD8) u8_ref_idx_l0;
  168|  14.8k|            COPYTHECONTEXT("Slice Header SVC ext: num_ref_idx_l0_active_minus1",
  169|  14.8k|                           ps_cur_slice->u1_num_ref_idx_lx_active[0] - 1);
  170|  14.8k|        }
  171|       |
  172|      0|        {
  173|  14.8k|            UWORD8 uc_refIdxReFlagL0 = ih264d_get_bit_h264(ps_bitstrm);
  174|  14.8k|            COPYTHECONTEXT("Slice Header SVC ext: ref_pic_list_reordering_flag_l0",
  175|  14.8k|                           uc_refIdxReFlagL0);
  176|       |
  177|  14.8k|            ih264d_init_ref_idx_lx_p(ps_dec);
  178|       |            /* Store the value for future slices in the same picture */
  179|  14.8k|            ps_dec->u1_num_ref_idx_lx_active_prev = ps_cur_slice->u1_num_ref_idx_lx_active[0];
  180|       |
  181|       |            /* Modified temporarily */
  182|  14.8k|            if(uc_refIdxReFlagL0)
  ------------------
  |  Branch (182:16): [True: 8.27k, False: 6.56k]
  ------------------
  183|  8.27k|            {
  184|  8.27k|                WORD8 ret;
  185|  8.27k|                ps_dec->ps_ref_pic_buf_lx[0] = ps_dec->ps_dpb_mgr->ps_mod_dpb[0];
  186|  8.27k|                ret = ih264d_ref_idx_reordering(ps_dec, 0);
  187|  8.27k|                if(ret == -1) return ERROR_REFIDX_ORDER_T;
  ------------------
  |  Branch (187:20): [True: 0, False: 8.27k]
  ------------------
  188|  8.27k|                ps_dec->ps_ref_pic_buf_lx[0] = ps_dec->ps_dpb_mgr->ps_mod_dpb[0];
  189|  8.27k|            }
  190|  6.56k|            else
  191|  6.56k|                ps_dec->ps_ref_pic_buf_lx[0] = ps_dec->ps_dpb_mgr->ps_init_dpb[0];
  192|  14.8k|        }
  193|       |        /* Create refIdx to POC mapping */
  194|  14.8k|        {
  195|  14.8k|            void **pui_map_ref_idx_to_poc_lx0, **pui_map_ref_idx_to_poc_lx1;
  196|  14.8k|            WORD8 idx;
  197|  14.8k|            struct pic_buffer_t *ps_pic;
  198|       |
  199|  14.8k|            pui_map_ref_idx_to_poc_lx0 = ps_dec->ppv_map_ref_idx_to_poc + FRM_LIST_L0;
  ------------------
  |  |   89|  14.8k|#define FRM_LIST_L0             0                                               //0
  ------------------
  200|  14.8k|            pui_map_ref_idx_to_poc_lx0[0] = 0;
  201|  14.8k|            pui_map_ref_idx_to_poc_lx0++;
  202|  47.7k|            for(idx = 0; idx < ps_cur_slice->u1_num_ref_idx_lx_active[0]; idx++)
  ------------------
  |  Branch (202:26): [True: 32.8k, False: 14.8k]
  ------------------
  203|  32.8k|            {
  204|  32.8k|                ps_pic = ps_dec->ps_ref_pic_buf_lx[0][idx];
  205|  32.8k|                pui_map_ref_idx_to_poc_lx0[idx] = (ps_pic->pu1_buf1);
  206|  32.8k|            }
  207|       |
  208|       |            /* Bug Fix Deblocking */
  209|  14.8k|            pui_map_ref_idx_to_poc_lx1 = ps_dec->ppv_map_ref_idx_to_poc + FRM_LIST_L1;
  ------------------
  |  |   90|  14.8k|#define FRM_LIST_L1             1 * POC_LIST_L0_TO_L1_DIFF//FRM_LIST_L0 + POC_LIST_L0_TO_L1_DIFF        //0+33                  //(1 * POC_LIST_L0_TO_L1_DIFF)
  |  |  ------------------
  |  |  |  |   86|  14.8k|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |  600|  14.8k|#define MAX_FRAMES              16
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  210|  14.8k|            pui_map_ref_idx_to_poc_lx1[0] = 0;
  211|       |
  212|  14.8k|            if(u1_mbaff)
  ------------------
  |  Branch (212:16): [True: 0, False: 14.8k]
  ------------------
  213|      0|            {
  214|      0|                void **ppv_map_ref_idx_to_poc_lx_t, **ppv_map_ref_idx_to_poc_lx_b;
  215|      0|                void **ppv_map_ref_idx_to_poc_lx_t1, **ppv_map_ref_idx_to_poc_lx_b1;
  216|      0|                ppv_map_ref_idx_to_poc_lx_t = ps_dec->ppv_map_ref_idx_to_poc + TOP_LIST_FLD_L0;
  ------------------
  |  |   91|      0|#define TOP_LIST_FLD_L0         2 * POC_LIST_L0_TO_L1_DIFF//FRM_LIST_L1 + POC_LIST_L0_TO_L1_DIFF        //0+33+33                   //(2 * POC_LIST_L0_TO_L1_DIFF)
  |  |  ------------------
  |  |  |  |   86|      0|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |  600|      0|#define MAX_FRAMES              16
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  217|      0|                ppv_map_ref_idx_to_poc_lx_b = ps_dec->ppv_map_ref_idx_to_poc + BOT_LIST_FLD_L0;
  ------------------
  |  |   93|      0|#define BOT_LIST_FLD_L0         4 * POC_LIST_L0_TO_L1_DIFF//TOP_LIST_FLD_L1 + POC_LIST_L0_TO_L1_DIFF_1  //0+33+33+17+17
  |  |  ------------------
  |  |  |  |   86|      0|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |  600|      0|#define MAX_FRAMES              16
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  218|       |
  219|      0|                ppv_map_ref_idx_to_poc_lx_t[0] = 0;
  220|      0|                ppv_map_ref_idx_to_poc_lx_t++;
  221|      0|                ppv_map_ref_idx_to_poc_lx_b[0] = 0;
  222|      0|                ppv_map_ref_idx_to_poc_lx_b++;
  223|       |
  224|      0|                idx = 0;
  225|      0|                for(idx = 0; idx < ps_cur_slice->u1_num_ref_idx_lx_active[0]; idx++)
  ------------------
  |  Branch (225:30): [True: 0, False: 0]
  ------------------
  226|      0|                {
  227|      0|                    ps_pic = ps_dec->ps_ref_pic_buf_lx[0][idx];
  228|      0|                    ppv_map_ref_idx_to_poc_lx_t[0] = (ps_pic->pu1_buf1);
  229|      0|                    ppv_map_ref_idx_to_poc_lx_b[1] = (ps_pic->pu1_buf1);
  230|       |
  231|      0|                    ppv_map_ref_idx_to_poc_lx_b[0] = (ps_pic->pu1_buf1) + 1;
  232|      0|                    ppv_map_ref_idx_to_poc_lx_t[1] = (ps_pic->pu1_buf1) + 1;
  233|       |
  234|      0|                    ppv_map_ref_idx_to_poc_lx_t += 2;
  235|      0|                    ppv_map_ref_idx_to_poc_lx_b += 2;
  236|      0|                }
  237|      0|                ppv_map_ref_idx_to_poc_lx_t1 = ps_dec->ppv_map_ref_idx_to_poc + TOP_LIST_FLD_L1;
  ------------------
  |  |   92|      0|#define TOP_LIST_FLD_L1         3 * POC_LIST_L0_TO_L1_DIFF//TOP_LIST_FLD_L0 + POC_LIST_L0_TO_L1_DIFF_1  //0+33+33+17                //(3 * POC_LIST_L0_TO_L1_DIFF)
  |  |  ------------------
  |  |  |  |   86|      0|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |  600|      0|#define MAX_FRAMES              16
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  238|      0|                ppv_map_ref_idx_to_poc_lx_t1[0] = 0;
  239|      0|                ppv_map_ref_idx_to_poc_lx_b1 = ps_dec->ppv_map_ref_idx_to_poc + BOT_LIST_FLD_L1;
  ------------------
  |  |   94|      0|#define BOT_LIST_FLD_L1         5 * POC_LIST_L0_TO_L1_DIFF//BOT_LIST_FLD_L0 + POC_LIST_L0_TO_L1_DIFF_1  //0+33+33+17+17+17
  |  |  ------------------
  |  |  |  |   86|      0|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |  600|      0|#define MAX_FRAMES              16
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  240|      0|                ppv_map_ref_idx_to_poc_lx_b1[0] = 0;
  241|      0|            }
  242|       |            /* BS is moved post recon gen in libsvc*/
  243|  14.8k|            if(ps_dec->u4_num_cores >= 2)
  ------------------
  |  Branch (243:16): [True: 9.40k, False: 5.43k]
  ------------------
  244|  9.40k|            {
  245|  9.40k|                WORD32 num_entries;
  246|  9.40k|                WORD32 size;
  247|       |
  248|  9.40k|                num_entries = MAX_FRAMES;
  ------------------
  |  |  600|  9.40k|#define MAX_FRAMES              16
  ------------------
  249|  9.40k|                if((1 >= ps_dec->ps_cur_sps->u1_num_ref_frames) && (0 == ps_dec->i4_display_delay))
  ------------------
  |  Branch (249:20): [True: 6.87k, False: 2.53k]
  |  Branch (249:68): [True: 0, False: 6.87k]
  ------------------
  250|      0|                {
  251|      0|                    num_entries = 1;
  252|      0|                }
  253|  9.40k|                num_entries = ((2 * num_entries) + 1);
  254|  9.40k|                num_entries *= 2;
  255|       |
  256|  9.40k|                size = num_entries * sizeof(void *);
  257|  9.40k|                size += PAD_MAP_IDX_POC * sizeof(void *);
  ------------------
  |  |  100|  9.40k|#define PAD_MAP_IDX_POC             (1)
  ------------------
  258|       |
  259|  9.40k|                memcpy((void *) ps_dec->ps_parse_cur_slice->ppv_map_ref_idx_to_poc,
  260|  9.40k|                       ps_dec->ppv_map_ref_idx_to_poc, size);
  261|  9.40k|            }
  262|  14.8k|        }
  263|  14.8k|        if(ps_pps->u1_wted_pred_flag)
  ------------------
  |  Branch (263:12): [True: 2.63k, False: 12.2k]
  ------------------
  264|  2.63k|        {
  265|  2.63k|            if(!ps_nal_svc_ext->u1_no_inter_layer_pred_flag)
  ------------------
  |  Branch (265:16): [True: 2.63k, False: 0]
  ------------------
  266|  2.63k|            {
  267|  2.63k|                ps_svc_slice_params->u1_base_pred_weight_table_flag =
  268|  2.63k|                    ih264d_get_bit_h264(ps_bitstrm);
  269|  2.63k|                COPYTHECONTEXT("Slice Header SVC ext: u1_base_pred_weight_table_flag",
  270|  2.63k|                               ps_svc_slice_params->u1_base_pred_weight_table_flag);
  271|  2.63k|            }
  272|       |
  273|  2.63k|            if(ps_nal_svc_ext->u1_no_inter_layer_pred_flag ||
  ------------------
  |  Branch (273:16): [True: 0, False: 2.63k]
  ------------------
  274|  2.63k|               !ps_svc_slice_params->u1_base_pred_weight_table_flag)
  ------------------
  |  Branch (274:16): [True: 1.57k, False: 1.05k]
  ------------------
  275|  1.57k|            {
  276|  1.57k|                ret = ih264d_parse_pred_weight_table(ps_cur_slice, ps_bitstrm);
  277|  1.57k|                if(ret != OK) return ret;
  ------------------
  |  |  114|  1.57k|#define OK        0
  ------------------
  |  Branch (277:20): [True: 116, False: 1.46k]
  ------------------
  278|       |
  279|  1.46k|                ih264d_form_pred_weight_matrix(ps_dec);
  280|  1.46k|                ps_dec->pu4_wt_ofsts = ps_dec->pu4_wts_ofsts_mat;
  281|  1.46k|            }
  282|  2.63k|        }
  283|  12.2k|        else
  284|  12.2k|        {
  285|  12.2k|            ps_dec->ps_cur_slice->u2_log2Y_crwd = 0;
  286|  12.2k|            ps_dec->pu4_wt_ofsts = ps_dec->pu4_wts_ofsts_mat;
  287|  12.2k|        }
  288|       |
  289|  14.7k|        ps_dec->ps_parse_cur_slice->u2_log2Y_crwd = ps_dec->ps_cur_slice->u2_log2Y_crwd;
  290|       |
  291|  14.7k|        if(u1_mbaff && (u1_field_pic_flag == 0))
  ------------------
  |  Branch (291:12): [True: 0, False: 14.7k]
  |  Branch (291:24): [True: 0, False: 0]
  ------------------
  292|      0|        {
  293|      0|            ih264d_convert_frm_mbaff_list(ps_dec);
  294|      0|        }
  295|       |
  296|       |        /* G050 */
  297|  14.7k|        if(ps_cur_slice->u1_nal_ref_idc != 0)
  ------------------
  |  Branch (297:12): [True: 14.5k, False: 195]
  ------------------
  298|  14.5k|        {
  299|  14.5k|            if(!ps_dec->ps_dpb_cmds->u1_dpb_commands_read)
  ------------------
  |  Branch (299:16): [True: 14.2k, False: 304]
  ------------------
  300|  14.2k|            {
  301|  14.2k|                dec_pic_params_t *ps_pps = ps_dec->ps_cur_pps;
  302|  14.2k|                dec_seq_params_t *ps_sps_tmp = ps_pps->ps_sps;
  303|  14.2k|                UWORD8 u1_nal_unit_type_tmp = ps_dec->u1_nal_unit_type;
  304|       |
  305|  14.2k|                ps_pps->ps_sps = ps_dec->ps_cur_sps;
  306|       |
  307|  14.2k|                if(ps_svc_lyr_dec->ps_nal_svc_ext->u1_idr_flag)
  ------------------
  |  Branch (307:20): [True: 13.3k, False: 826]
  ------------------
  308|  13.3k|                    ps_dec->u1_nal_unit_type = IDR_SLICE_NAL;
  ------------------
  |  |  328|  13.3k|#define IDR_SLICE_NAL                   5
  ------------------
  309|       |
  310|  14.2k|                i_temp = ih264d_read_mmco_commands(ps_dec);
  311|       |
  312|  14.2k|                ps_pps->ps_sps = ps_sps_tmp;
  313|  14.2k|                ps_dec->u1_nal_unit_type = u1_nal_unit_type_tmp;
  314|       |
  315|  14.2k|                if(i_temp < 0)
  ------------------
  |  Branch (315:20): [True: 130, False: 14.0k]
  ------------------
  316|    130|                {
  317|    130|                    return ERROR_DBP_MANAGER_T;
  318|    130|                }
  319|  14.0k|                ps_dec->u4_bitoffset = i_temp;
  320|  14.0k|            }
  321|    304|            else
  322|    304|                ps_bitstrm->u4_ofst += ps_dec->u4_bitoffset;
  323|       |
  324|  14.3k|            if(!ps_sps_svc_ext->u1_slice_header_restriction_flag)
  ------------------
  |  Branch (324:16): [True: 11.2k, False: 3.13k]
  ------------------
  325|  11.2k|            {
  326|  11.2k|                ps_svc_slice_params->u1_store_ref_base_pic_flag = ih264d_get_bit_h264(ps_bitstrm);
  327|  11.2k|                COPYTHECONTEXT("SPS_SVC_EXT: u1_store_ref_base_pic_flag",
  328|  11.2k|                               ps_svc_slice_params->u1_store_ref_base_pic_flag);
  329|       |
  330|  11.2k|                if(0 != ps_svc_slice_params->u1_store_ref_base_pic_flag)
  ------------------
  |  Branch (330:20): [True: 540, False: 10.7k]
  ------------------
  331|    540|                {
  332|    540|                    return NOT_OK;
  ------------------
  |  |  116|    540|#define NOT_OK    -1
  ------------------
  333|    540|                }
  334|  10.7k|                if(((1 == ps_nal_svc_ext->u1_use_ref_base_pic_flag) ||
  ------------------
  |  Branch (334:21): [True: 0, False: 10.7k]
  ------------------
  335|  10.7k|                    (1 == ps_svc_slice_params->u1_store_ref_base_pic_flag)) &&
  ------------------
  |  Branch (335:21): [True: 0, False: 10.7k]
  ------------------
  336|      0|                   (!ps_nal_svc_ext->u1_idr_flag))
  ------------------
  |  Branch (336:20): [True: 0, False: 0]
  ------------------
  337|      0|                {
  338|      0|                    i_status = isvcd_dec_ref_base_pic_marking(
  339|      0|                        &ps_svc_slice_params->s_ref_base_pic_marking_svc_ext, ps_bitstrm);
  340|      0|                    if(i_status != OK)
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
  |  Branch (340:24): [True: 0, False: 0]
  ------------------
  341|      0|                    {
  342|      0|                        return i_status;
  343|      0|                    }
  344|      0|                }
  345|  10.7k|            }
  346|  14.3k|        }
  347|  14.7k|    }
  348|       |    /* G050 */
  349|       |
  350|  14.0k|    if(ps_pps->u1_entropy_coding_mode == CABAC)
  ------------------
  |  |  339|  14.0k|#define CABAC  1
  ------------------
  |  Branch (350:8): [True: 2.83k, False: 11.2k]
  ------------------
  351|  2.83k|    {
  352|  2.83k|        u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  353|       |
  354|  2.83k|        if(u4_temp > MAX_CABAC_INIT_IDC)
  ------------------
  |  |  537|  2.83k|#define MAX_CABAC_INIT_IDC        2
  ------------------
  |  Branch (354:12): [True: 126, False: 2.70k]
  ------------------
  355|    126|        {
  356|    126|            return ERROR_INV_SLICE_HDR_T;
  357|    126|        }
  358|  2.70k|        ps_cur_slice->u1_cabac_init_idc = u4_temp;
  359|  2.70k|        COPYTHECONTEXT("Slice Header SVC ext: cabac_init_idc", ps_cur_slice->u1_cabac_init_idc);
  360|  2.70k|    }
  361|       |
  362|       |    /* Read slice_qp_delta */
  363|  13.9k|    i8_temp = (WORD64) ps_pps->u1_pic_init_qp + ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  364|  13.9k|    if((i8_temp < MIN_H264_QP) || (i8_temp > MAX_H264_QP))
  ------------------
  |  |  629|  13.9k|#define MIN_H264_QP 0
  ------------------
                  if((i8_temp < MIN_H264_QP) || (i8_temp > MAX_H264_QP))
  ------------------
  |  |  634|  13.7k|#define MAX_H264_QP 51
  ------------------
  |  Branch (364:8): [True: 138, False: 13.7k]
  |  Branch (364:35): [True: 297, False: 13.4k]
  ------------------
  365|    435|    {
  366|    435|        return ERROR_INV_RANGE_QP_T;
  367|    435|    }
  368|  13.4k|    ps_cur_slice->u1_slice_qp = (UWORD8) i8_temp;
  369|  13.4k|    COPYTHECONTEXT("Slice Header SVC ext: slice_qp_delta",
  370|  13.4k|                   (WORD8) (ps_cur_slice->u1_slice_qp - ps_pps->u1_pic_init_qp));
  371|       |
  372|  13.4k|    if(ps_pps->u1_deblocking_filter_parameters_present_flag == 1)
  ------------------
  |  Branch (372:8): [True: 5.92k, False: 7.56k]
  ------------------
  373|  5.92k|    {
  374|  5.92k|        u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  375|  5.92k|        if(u4_temp > SLICE_BOUNDARY_DBLK_DISABLED)
  ------------------
  |  |  547|  5.92k|#define SLICE_BOUNDARY_DBLK_DISABLED  2
  ------------------
  |  Branch (375:12): [True: 267, False: 5.66k]
  ------------------
  376|    267|        {
  377|    267|            return ERROR_INV_SLICE_HDR_T;
  378|    267|        }
  379|       |
  380|  5.66k|        COPYTHECONTEXT("Slice Header SVC ext: disable_deblocking_filter_idc", u4_temp);
  381|  5.66k|        ps_cur_slice->u1_disable_dblk_filter_idc = u4_temp;
  382|  5.66k|        if(u4_temp != 1)
  ------------------
  |  Branch (382:12): [True: 4.41k, False: 1.24k]
  ------------------
  383|  4.41k|        {
  384|  4.41k|            i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf) << 1;
  385|  4.41k|            if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
  ------------------
  |  |  550|  4.41k|#define MIN_DBLK_FIL_OFF              -12
  ------------------
                          if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
  ------------------
  |  |  551|  4.31k|#define MAX_DBLK_FIL_OFF              12
  ------------------
  |  Branch (385:16): [True: 106, False: 4.31k]
  |  Branch (385:47): [True: 55, False: 4.25k]
  ------------------
  386|    161|            {
  387|    161|                return ERROR_INV_SLICE_HDR_T;
  388|    161|            }
  389|  4.25k|            ps_cur_slice->i1_slice_alpha_c0_offset = i_temp;
  390|  4.25k|            COPYTHECONTEXT("Slice Header SVC ext: slice_alpha_c0_offset_div2",
  391|  4.25k|                           ps_cur_slice->i1_slice_alpha_c0_offset >> 1);
  392|       |
  393|  4.25k|            i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf) << 1;
  394|  4.25k|            if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
  ------------------
  |  |  550|  4.25k|#define MIN_DBLK_FIL_OFF              -12
  ------------------
                          if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
  ------------------
  |  |  551|  4.16k|#define MAX_DBLK_FIL_OFF              12
  ------------------
  |  Branch (394:16): [True: 91, False: 4.16k]
  |  Branch (394:47): [True: 45, False: 4.12k]
  ------------------
  395|    136|            {
  396|    136|                return ERROR_INV_SLICE_HDR_T;
  397|    136|            }
  398|  4.12k|            ps_cur_slice->i1_slice_beta_offset = i_temp;
  399|  4.12k|            COPYTHECONTEXT("Slice Header SVC ext: slice_beta_offset_div2",
  400|  4.12k|                           ps_cur_slice->i1_slice_beta_offset >> 1);
  401|  4.12k|        }
  402|  1.24k|        else
  403|  1.24k|        {
  404|  1.24k|            ps_cur_slice->i1_slice_alpha_c0_offset = 0;
  405|  1.24k|            ps_cur_slice->i1_slice_beta_offset = 0;
  406|  1.24k|        }
  407|  5.66k|    }
  408|  7.56k|    else
  409|  7.56k|    {
  410|  7.56k|        ps_cur_slice->u1_disable_dblk_filter_idc = 0;
  411|  7.56k|        ps_cur_slice->i1_slice_alpha_c0_offset = 0;
  412|  7.56k|        ps_cur_slice->i1_slice_beta_offset = 0;
  413|  7.56k|    }
  414|       |
  415|       |    /* add the remaining part of the code for svc extension from reference */
  416|  12.9k|    ret = isvcd_set_default_slice_header_ext(ps_svc_lyr_dec);
  417|  12.9k|    if(ret != OK)
  ------------------
  |  |  114|  12.9k|#define OK        0
  ------------------
  |  Branch (417:8): [True: 0, False: 12.9k]
  ------------------
  418|      0|    {
  419|      0|        return ERROR_INV_SLICE_HDR_T;
  420|      0|    }
  421|       |
  422|  12.9k|    ret = isvcd_parse_slice_header(ps_svc_lyr_dec);
  423|  12.9k|    if(ret != OK)
  ------------------
  |  |  114|  12.9k|#define OK        0
  ------------------
  |  Branch (423:8): [True: 949, False: 11.9k]
  ------------------
  424|    949|    {
  425|    949|        return ERROR_INV_SLICE_HDR_T;
  426|    949|    }
  427|       |
  428|  11.9k|    ps_dec->u1_slice_header_done = 2;
  429|       |
  430|  11.9k|    if(!ps_svc_slice_params->u1_slice_skip_flag)
  ------------------
  |  Branch (430:8): [True: 11.5k, False: 405]
  ------------------
  431|  11.5k|    {
  432|  11.5k|        if(ps_pps->u1_entropy_coding_mode)
  ------------------
  |  Branch (432:12): [True: 2.64k, False: 8.92k]
  ------------------
  433|  2.64k|        {
  434|  2.64k|            SWITCHOFFTRACE;
  435|  2.64k|            SWITCHONTRACECABAC;
  436|  2.64k|            ps_svc_lyr_dec->pf_parse_inter_slice_svc_ext =
  437|  2.64k|                isvcd_parse_inter_slice_data_cabac_enh_lyr;
  438|  2.64k|            ps_svc_lyr_dec->pf_parse_inter_mb_svc_ext = isvcd_parse_pmb_cabac;
  439|       |
  440|  2.64k|            isvcd_init_cabac_contexts(P_SLICE, ps_dec);
  ------------------
  |  |  368|  2.64k|#define P_SLICE  0
  ------------------
  441|       |
  442|  2.64k|            if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag)
  ------------------
  |  Branch (442:16): [True: 0, False: 2.64k]
  ------------------
  443|      0|                ps_dec->pf_get_mb_info = ih264d_get_mb_info_cabac_mbaff;
  444|  2.64k|            else
  445|  2.64k|                ps_dec->pf_get_mb_info = isvcd_get_mb_info_cabac_nonmbaff;
  446|  2.64k|        }
  447|  8.92k|        else
  448|  8.92k|        {
  449|  8.92k|            SWITCHONTRACE;
  450|  8.92k|            SWITCHOFFTRACECABAC;
  451|  8.92k|            ps_svc_lyr_dec->pf_parse_inter_slice_svc_ext =
  452|  8.92k|                isvcd_parse_inter_slice_data_cavlc_enh_lyr;
  453|  8.92k|            ps_svc_lyr_dec->pf_parse_inter_mb_svc_ext = isvcd_parse_pmb_cavlc;
  454|       |
  455|  8.92k|            if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag)
  ------------------
  |  Branch (455:16): [True: 0, False: 8.92k]
  ------------------
  456|      0|            {
  457|      0|                ps_dec->pf_get_mb_info = ih264d_get_mb_info_cavlc_mbaff;
  458|      0|            }
  459|  8.92k|            else
  460|  8.92k|                ps_dec->pf_get_mb_info = isvcd_get_mb_info_cavlc_nonmbaff;
  461|  8.92k|        }
  462|  11.5k|    }
  463|    405|    else
  464|    405|    {
  465|    405|        return ERROR_FEATURE_UNAVAIL;
  466|    405|    }
  467|       |
  468|  11.5k|    ps_dec->u1_B = 0;
  469|  11.5k|    ps_dec->pf_mvpred_ref_tfr_nby2mb = isvcd_mv_pred_ref_tfr_nby2_epmb;
  470|  11.5k|    ret = ps_svc_lyr_dec->pf_parse_inter_slice_svc_ext(ps_svc_lyr_dec, ps_cur_slice,
  471|  11.5k|                                                       u2_first_mb_in_slice);
  472|  11.5k|    if(ret != OK) return ret;
  ------------------
  |  |  114|  11.5k|#define OK        0
  ------------------
  |  Branch (472:8): [True: 5.19k, False: 6.37k]
  ------------------
  473|       |
  474|  6.37k|    return OK;
  ------------------
  |  |  114|  6.37k|#define OK        0
  ------------------
  475|  11.5k|}
isvcd_parse_inter_slice_data_cabac:
  505|  17.1k|{
  506|  17.1k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
  507|       |
  508|  17.1k|    UWORD32 uc_more_data_flag;
  509|  17.1k|    WORD32 i2_cur_mb_addr;
  510|  17.1k|    UWORD32 u1_num_mbs, u1_num_mbsNby2, u1_mb_idx;
  511|  17.1k|    UWORD32 u1_mbaff;
  512|  17.1k|    UWORD32 u1_num_mbs_next, u1_end_of_row;
  513|  17.1k|    const UWORD16 i2_pic_wdin_mbs = ps_dec->u2_frm_wd_in_mbs;
  514|  17.1k|    UWORD32 u1_slice_end = 0;
  515|  17.1k|    UWORD32 u1_tfr_n_mb = 0;
  516|  17.1k|    UWORD32 u1_decode_nmb = 0;
  517|       |
  518|  17.1k|    deblk_mb_t *ps_cur_deblk_mb;
  519|  17.1k|    dec_mb_info_t *ps_cur_mb_info;
  520|  17.1k|    parse_pmbarams_t *ps_parse_mb_data = ps_dec->ps_parse_mb_data;
  521|  17.1k|    UWORD32 u1_inter_mb_skip_type;
  522|  17.1k|    UWORD32 u1_inter_mb_type;
  523|  17.1k|    UWORD32 u1_deblk_mb_type;
  524|  17.1k|    UWORD32 u1_mb_threshold;
  525|  17.1k|    dec_bit_stream_t *const ps_bitstrm = ps_dec->ps_bitstrm;
  526|  17.1k|    WORD32 ret = OK;
  ------------------
  |  |  114|  17.1k|#define OK        0
  ------------------
  527|       |
  528|       |    /******************************************************/
  529|       |    /* Initialisations specific to B or P slice           */
  530|       |    /******************************************************/
  531|  17.1k|    if(ps_slice->u1_slice_type == P_SLICE)
  ------------------
  |  |  368|  17.1k|#define P_SLICE  0
  ------------------
  |  Branch (531:8): [True: 7.64k, False: 9.46k]
  ------------------
  532|  7.64k|    {
  533|  7.64k|        u1_inter_mb_skip_type = CAB_P_SKIP;
  ------------------
  |  |  403|  7.64k|#define CAB_P_SKIP        0x16 /* 0001 x11x */
  ------------------
  534|  7.64k|        u1_inter_mb_type = P_MB;
  ------------------
  |  |  419|  7.64k|#define P_MB        2
  ------------------
  535|  7.64k|        u1_deblk_mb_type = D_INTER_MB;
  ------------------
  |  |  381|  7.64k|#define D_INTER_MB        0
  ------------------
  536|  7.64k|        u1_mb_threshold = 5;
  537|  7.64k|    }
  538|  9.46k|    else  // B_SLICE
  539|  9.46k|    {
  540|  9.46k|        u1_inter_mb_skip_type = CAB_B_SKIP;
  ------------------
  |  |  404|  9.46k|#define CAB_B_SKIP        0x14 /* 0001 x100 */
  ------------------
  541|  9.46k|        u1_inter_mb_type = B_MB;
  ------------------
  |  |  420|  9.46k|#define B_MB        3
  ------------------
  542|  9.46k|        u1_deblk_mb_type = D_B_SLICE;
  ------------------
  |  |  384|  9.46k|#define D_B_SLICE         4
  ------------------
  543|  9.46k|        u1_mb_threshold = 23;
  544|  9.46k|    }
  545|       |
  546|       |    /******************************************************/
  547|       |    /* Slice Level Initialisations                        */
  548|       |    /******************************************************/
  549|  17.1k|    i2_cur_mb_addr = u2_first_mb_in_slice;
  550|  17.1k|    ps_dec->u1_qp = ps_slice->u1_slice_qp;
  551|  17.1k|    ih264d_update_qp(ps_dec, 0);
  552|  17.1k|    u1_mb_idx = ps_dec->u4_mb_idx;
  553|  17.1k|    u1_num_mbs = u1_mb_idx;
  554|  17.1k|    u1_num_mbsNby2 = 0;
  555|  17.1k|    u1_mbaff = ps_slice->u1_mbaff_frame_flag;
  556|  17.1k|    i2_cur_mb_addr = u2_first_mb_in_slice << u1_mbaff;
  557|  17.1k|    uc_more_data_flag = 1;
  558|       |
  559|       |    /* Initialisations specific to cabac */
  560|  17.1k|    if(ps_bitstrm->u4_ofst & 0x07)
  ------------------
  |  Branch (560:8): [True: 13.2k, False: 3.84k]
  ------------------
  561|  13.2k|    {
  562|  13.2k|        ps_bitstrm->u4_ofst += 8;
  563|  13.2k|        ps_bitstrm->u4_ofst &= 0xFFFFFFF8;
  564|  13.2k|    }
  565|       |
  566|  17.1k|    ret = ih264d_init_cabac_dec_envirnoment(&(ps_dec->s_cab_dec_env), ps_bitstrm);
  567|  17.1k|    if(ret != OK) return ret;
  ------------------
  |  |  114|  17.1k|#define OK        0
  ------------------
  |  Branch (567:8): [True: 854, False: 16.2k]
  ------------------
  568|       |
  569|  16.2k|    ps_dec->i1_prev_mb_qp_delta = 0;
  570|       |
  571|  1.67M|    while(!u1_slice_end)
  ------------------
  |  Branch (571:11): [True: 1.67M, False: 3.60k]
  ------------------
  572|  1.67M|    {
  573|  1.67M|        UWORD8 u1_mb_type;
  574|  1.67M|        UWORD32 u4_mb_skip;
  575|       |
  576|  1.67M|        ps_dec->pv_prev_mb_parse_tu_coeff_data = ps_dec->pv_parse_tu_coeff_data;
  577|       |
  578|  1.67M|        if(i2_cur_mb_addr > ps_dec->ps_cur_sps->u4_max_mb_addr)
  ------------------
  |  Branch (578:12): [True: 7.19k, False: 1.66M]
  ------------------
  579|  7.19k|        {
  580|  7.19k|            break;
  581|  7.19k|        }
  582|       |
  583|  1.66M|        ps_cur_mb_info = ps_dec->ps_nmb_info + u1_num_mbs;
  584|  1.66M|        ps_dec->u4_num_mbs_cur_nmb = u1_num_mbs;
  585|       |
  586|  1.66M|        ps_cur_mb_info->u1_Mux = 0;
  587|  1.66M|        ps_dec->u4_num_pmbair = (u1_num_mbs >> u1_mbaff);
  588|  1.66M|        ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + u1_num_mbs;
  589|  1.66M|        ps_cur_mb_info->u1_end_of_slice = 0;
  590|       |
  591|       |        /* Storing Default partition info */
  592|  1.66M|        ps_parse_mb_data->u1_num_part = 1;
  593|  1.66M|        ps_parse_mb_data->u4_isI_mb = 0;
  594|       |
  595|       |        /***************************************************************/
  596|       |        /* Get the required information for decoding of MB             */
  597|       |        /* mb_x, mb_y , neighbour availablity,                         */
  598|       |        /***************************************************************/
  599|  1.66M|        u4_mb_skip = ps_dec->pf_get_mb_info(ps_dec, i2_cur_mb_addr, ps_cur_mb_info, 1);
  600|       |
  601|       |        /*********************************************************************/
  602|       |        /* initialize u1_tran_form8x8 to zero to aviod uninitialized accesses */
  603|       |        /*********************************************************************/
  604|  1.66M|        ps_cur_mb_info->u1_tran_form8x8 = 0;
  605|  1.66M|        ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
  606|       |
  607|       |        /***************************************************************/
  608|       |        /* Set the deblocking parameters for this MB                   */
  609|       |        /***************************************************************/
  610|  1.66M|        if(ps_dec->u4_app_disable_deblk_frm == 0)
  ------------------
  |  Branch (610:12): [True: 1.66M, False: 0]
  ------------------
  611|  1.66M|            ih264d_set_deblocking_parameters(ps_cur_deblk_mb, ps_slice,
  612|  1.66M|                                             ps_dec->u1_mb_ngbr_availablity,
  613|  1.66M|                                             ps_dec->u1_cur_mb_fld_dec_flag);
  614|       |
  615|  1.66M|        if(u4_mb_skip)
  ------------------
  |  Branch (615:12): [True: 1.44M, False: 223k]
  ------------------
  616|  1.44M|        {
  617|       |            /* Set appropriate flags in ps_cur_mb_info and ps_dec */
  618|  1.44M|            memset(ps_dec->ps_curr_ctxt_mb_info, 0, sizeof(ctxt_inc_mb_info_t));
  619|  1.44M|            ps_dec->ps_curr_ctxt_mb_info->u1_mb_type = u1_inter_mb_skip_type;
  620|       |
  621|  1.44M|            MEMSET_16BYTES(&ps_dec->pu1_left_mv_ctxt_inc[0][0], 0);
  ------------------
  |  |  652|  1.44M|#define MEMSET_16BYTES(pu4_start,value)                         \
  |  |  653|  1.44M|{                                                               \
  |  |  654|  1.44M|    memset(pu4_start,value,16);                                 \
  |  |  655|  1.44M|}
  ------------------
  622|       |
  623|  1.44M|            *((UWORD32 *) ps_dec->pi1_left_ref_idx_ctxt_inc) = 0;
  624|  1.44M|            *(ps_dec->pu1_left_yuv_dc_csbp) = 0;
  625|  1.44M|            ps_dec->i1_prev_mb_qp_delta = 0;
  626|  1.44M|            ps_cur_mb_info->u1_mb_type = MB_SKIP;
  ------------------
  |  |   59|  1.44M|#define MB_SKIP 255
  ------------------
  627|  1.44M|            ps_cur_mb_info->u1_cbp = 0;
  628|       |
  629|  1.44M|            {
  630|       |                /* Storing Skip partition info */
  631|  1.44M|                parse_part_params_t *ps_part_info = ps_dec->ps_part;
  632|  1.44M|                ps_part_info->u1_is_direct = PART_DIRECT_16x16;
  ------------------
  |  |  572|  1.44M|#define PART_DIRECT_16x16              2
  ------------------
  633|  1.44M|                ps_part_info->u1_sub_mb_num = 0;
  634|  1.44M|                ps_dec->ps_part++;
  635|  1.44M|            }
  636|       |
  637|       |            /* Update Nnzs */
  638|  1.44M|            ih264d_update_nnz_for_skipmb(ps_dec, ps_cur_mb_info, CABAC);
  ------------------
  |  |  339|  1.44M|#define CABAC  1
  ------------------
  639|  1.44M|            ps_cur_mb_info->ps_curmb->u1_mb_type = u1_inter_mb_type;
  640|  1.44M|            ps_cur_deblk_mb->u1_mb_type |= u1_deblk_mb_type;
  641|  1.44M|            ps_cur_deblk_mb->u1_mb_qp = ps_dec->u1_qp;
  642|  1.44M|            if(ps_svc_lyr_dec->u1_layer_identifier != TARGET_LAYER)
  ------------------
  |  |  110|  1.44M|#define TARGET_LAYER 2
  ------------------
  |  Branch (642:16): [True: 492k, False: 951k]
  ------------------
  643|   492k|            {
  644|   492k|                ps_cur_deblk_mb->u1_deblocking_mode = MB_DISABLE_FILTERING;
  ------------------
  |  |   70|   492k|#define MB_DISABLE_FILTERING          0x01
  ------------------
  645|   492k|            }
  646|  1.44M|        }
  647|   223k|        else
  648|   223k|        {
  649|       |            /* Macroblock Layer Begins */
  650|       |            /* Decode the u1_mb_type */
  651|   223k|            u1_mb_type = ih264d_parse_mb_type_cabac(ps_dec);
  652|   223k|            ps_cur_mb_info->u1_mb_type = u1_mb_type;
  653|   223k|            if(u1_mb_type > (25 + u1_mb_threshold)) return ERROR_MB_TYPE;
  ------------------
  |  Branch (653:16): [True: 0, False: 223k]
  ------------------
  654|       |
  655|       |            /* Parse Macroblock Data */
  656|   223k|            if(u1_mb_type < u1_mb_threshold)
  ------------------
  |  Branch (656:16): [True: 218k, False: 5.52k]
  ------------------
  657|   218k|            {
  658|   218k|                ps_cur_mb_info->ps_curmb->u1_mb_type = u1_inter_mb_type;
  659|   218k|                *(ps_dec->pu1_left_yuv_dc_csbp) &= 0x6;
  660|       |
  661|   218k|                ret = ps_dec->pf_parse_inter_mb(ps_dec, ps_cur_mb_info, u1_num_mbs, u1_num_mbsNby2);
  662|   218k|                if(ret != OK) return ret;
  ------------------
  |  |  114|   218k|#define OK        0
  ------------------
  |  Branch (662:20): [True: 4.70k, False: 213k]
  ------------------
  663|   213k|                ps_cur_deblk_mb->u1_mb_qp = ps_dec->u1_qp;
  664|   213k|                ps_cur_deblk_mb->u1_mb_type |= u1_deblk_mb_type;
  665|   213k|                if(ps_svc_lyr_dec->u1_layer_identifier != TARGET_LAYER)
  ------------------
  |  |  110|   213k|#define TARGET_LAYER 2
  ------------------
  |  Branch (665:20): [True: 39.1k, False: 174k]
  ------------------
  666|  39.1k|                {
  667|  39.1k|                    ps_cur_deblk_mb->u1_deblocking_mode = MB_DISABLE_FILTERING;
  ------------------
  |  |   70|  39.1k|#define MB_DISABLE_FILTERING          0x01
  ------------------
  668|  39.1k|                }
  669|   213k|            }
  670|  5.52k|            else
  671|  5.52k|            {
  672|       |                /* Storing Intra partition info */
  673|  5.52k|                ps_parse_mb_data->u1_num_part = 0;
  674|  5.52k|                ps_parse_mb_data->u4_isI_mb = 1;
  675|       |
  676|  5.52k|                if((25 + u1_mb_threshold) == u1_mb_type)
  ------------------
  |  Branch (676:20): [True: 220, False: 5.30k]
  ------------------
  677|    220|                {
  678|       |                    /* I_PCM_MB */
  679|    220|                    ps_cur_mb_info->ps_curmb->u1_mb_type = I_PCM_MB;
  ------------------
  |  |  423|    220|#define I_PCM_MB    6
  ------------------
  680|    220|                    ret = ih264d_parse_ipcm_mb(ps_dec, ps_cur_mb_info, u1_num_mbs);
  681|    220|                    if(ret != OK) return ret;
  ------------------
  |  |  114|    220|#define OK        0
  ------------------
  |  Branch (681:24): [True: 154, False: 66]
  ------------------
  682|     66|                    ps_cur_deblk_mb->u1_mb_qp = 0;
  683|     66|                }
  684|  5.30k|                else
  685|  5.30k|                {
  686|  5.30k|                    if(u1_mb_type == u1_mb_threshold)
  ------------------
  |  Branch (686:24): [True: 3.38k, False: 1.91k]
  ------------------
  687|  3.38k|                        ps_cur_mb_info->ps_curmb->u1_mb_type = I_4x4_MB;
  ------------------
  |  |  417|  3.38k|#define I_4x4_MB    0
  ------------------
  688|  1.91k|                    else
  689|  1.91k|                        ps_cur_mb_info->ps_curmb->u1_mb_type = I_16x16_MB;
  ------------------
  |  |  418|  1.91k|#define I_16x16_MB  1
  ------------------
  690|       |
  691|  5.30k|                    ret = ih264d_parse_imb_cabac(ps_dec, ps_cur_mb_info,
  692|  5.30k|                                                 (UWORD8) (u1_mb_type - u1_mb_threshold));
  693|  5.30k|                    if(ret != OK) return ret;
  ------------------
  |  |  114|  5.30k|#define OK        0
  ------------------
  |  Branch (693:24): [True: 592, False: 4.71k]
  ------------------
  694|  4.71k|                    ps_cur_deblk_mb->u1_mb_qp = ps_dec->u1_qp;
  695|  4.71k|                }
  696|  4.77k|                ps_cur_deblk_mb->u1_mb_type |= D_INTRA_MB;
  ------------------
  |  |  382|  4.77k|#define D_INTRA_MB        1
  ------------------
  697|  4.77k|            }
  698|   223k|        }
  699|       |
  700|  1.66M|        if(ps_dec->u1_enable_mb_info)
  ------------------
  |  Branch (700:12): [True: 0, False: 1.66M]
  ------------------
  701|      0|        {
  702|      0|            ih264d_populate_mb_info_map(ps_dec, ps_cur_mb_info, ps_cur_mb_info->u2_mbx << 1,
  703|      0|                                        ps_cur_mb_info->u2_mby << 1, ps_cur_deblk_mb->u1_mb_qp);
  704|      0|        }
  705|  1.66M|        if(u1_mbaff)
  ------------------
  |  Branch (705:12): [True: 0, False: 1.66M]
  ------------------
  706|      0|        {
  707|      0|            ih264d_update_mbaff_left_nnz(ps_dec, ps_cur_mb_info);
  708|      0|        }
  709|       |
  710|  1.66M|        if(ps_cur_mb_info->u1_topmb && u1_mbaff)
  ------------------
  |  Branch (710:12): [True: 1.66M, False: 0]
  |  Branch (710:40): [True: 0, False: 1.66M]
  ------------------
  711|      0|            uc_more_data_flag = 1;
  712|  1.66M|        else
  713|  1.66M|        {
  714|  1.66M|            uc_more_data_flag = ih264d_decode_terminate(&ps_dec->s_cab_dec_env, ps_bitstrm);
  715|  1.66M|            uc_more_data_flag = !uc_more_data_flag;
  716|  1.66M|            COPYTHECONTEXT("Decode Sliceterm", !uc_more_data_flag);
  717|  1.66M|        }
  718|       |
  719|  1.66M|        if(u1_mbaff)
  ------------------
  |  Branch (719:12): [True: 0, False: 1.66M]
  ------------------
  720|      0|        {
  721|      0|            if(!uc_more_data_flag && (0 == (i2_cur_mb_addr & 1)))
  ------------------
  |  Branch (721:16): [True: 0, False: 0]
  |  Branch (721:38): [True: 0, False: 0]
  ------------------
  722|      0|            {
  723|      0|                return ERROR_EOB_FLUSHBITS_T;
  724|      0|            }
  725|      0|        }
  726|       |        /* Next macroblock information */
  727|  1.66M|        i2_cur_mb_addr++;
  728|  1.66M|        u1_num_mbs++;
  729|  1.66M|        u1_num_mbsNby2++;
  730|  1.66M|        ps_parse_mb_data++;
  731|       |
  732|       |        /****************************************************************/
  733|       |        /* Check for End Of Row and other flags that determine when to  */
  734|       |        /* do DMA setup for N/2-Mb, Decode for N-Mb, and Transfer for   */
  735|       |        /* N-Mb                                                         */
  736|       |        /****************************************************************/
  737|  1.66M|        u1_num_mbs_next = i2_pic_wdin_mbs - ps_dec->u2_mbx - 1;
  738|  1.66M|        u1_end_of_row = (!u1_num_mbs_next) && (!(u1_mbaff && (u1_num_mbs & 0x01)));
  ------------------
  |  Branch (738:25): [True: 113k, False: 1.54M]
  |  Branch (738:50): [True: 0, False: 113k]
  |  Branch (738:62): [True: 0, False: 0]
  ------------------
  739|  1.66M|        u1_slice_end = !uc_more_data_flag;
  740|  1.66M|        u1_tfr_n_mb = (u1_num_mbs == ps_dec->u4_recon_mb_grp) || u1_end_of_row || u1_slice_end;
  ------------------
  |  Branch (740:23): [True: 111k, False: 1.55M]
  |  Branch (740:66): [True: 1.52k, False: 1.54M]
  |  Branch (740:83): [True: 3.24k, False: 1.54M]
  ------------------
  741|  1.66M|        u1_decode_nmb = u1_tfr_n_mb || u1_slice_end;
  ------------------
  |  Branch (741:25): [True: 116k, False: 1.54M]
  |  Branch (741:40): [True: 0, False: 1.54M]
  ------------------
  742|  1.66M|        ps_cur_mb_info->u1_end_of_slice = u1_slice_end;
  743|       |
  744|  1.66M|        if(u1_decode_nmb)
  ------------------
  |  Branch (744:12): [True: 116k, False: 1.54M]
  ------------------
  745|   116k|        {
  746|   116k|            ret = ps_dec->pf_mvpred_ref_tfr_nby2mb(ps_dec, u1_mb_idx, u1_num_mbs);
  747|   116k|            u1_num_mbsNby2 = 0;
  748|   116k|            {
  749|   116k|                ps_parse_mb_data = ps_dec->ps_parse_mb_data;
  750|   116k|                ps_dec->ps_part = ps_dec->ps_parse_part_params;
  751|   116k|            }
  752|   116k|            if(ret != OK) return ret;
  ------------------
  |  |  114|   116k|#define OK        0
  ------------------
  |  Branch (752:16): [True: 0, False: 116k]
  ------------------
  753|   116k|        }
  754|       |
  755|  1.66M|        if(u1_decode_nmb)
  ------------------
  |  Branch (755:12): [True: 116k, False: 1.54M]
  ------------------
  756|   116k|        {
  757|   116k|            if(ps_dec->u1_separate_parse)
  ------------------
  |  Branch (757:16): [True: 30.5k, False: 85.8k]
  ------------------
  758|  30.5k|            {
  759|  30.5k|                ih264d_parse_tfr_nmb(ps_dec, u1_mb_idx, u1_num_mbs, u1_num_mbs_next, u1_tfr_n_mb,
  760|  30.5k|                                     u1_end_of_row);
  761|  30.5k|                ps_dec->ps_nmb_info += u1_num_mbs;
  762|  30.5k|                ps_svc_lyr_dec->ps_svc_nmb_info += u1_num_mbs;
  763|  30.5k|            }
  764|  85.8k|            else
  765|  85.8k|            {
  766|  85.8k|                if(ps_svc_lyr_dec->u1_layer_identifier == TARGET_LAYER)
  ------------------
  |  |  110|  85.8k|#define TARGET_LAYER 2
  ------------------
  |  Branch (766:20): [True: 34.9k, False: 50.9k]
  ------------------
  767|  34.9k|                {
  768|  34.9k|                    ih264d_decode_recon_tfr_nmb(ps_dec, u1_mb_idx, u1_num_mbs, u1_num_mbs_next,
  769|  34.9k|                                                u1_tfr_n_mb, u1_end_of_row);
  770|  34.9k|                }
  771|  50.9k|                else
  772|  50.9k|                {
  773|  50.9k|                    isvcd_decode_recon_tfr_nmb_base_lyr(ps_svc_lyr_dec, u1_mb_idx, u1_num_mbs,
  774|  50.9k|                                                        u1_num_mbs_next, u1_tfr_n_mb,
  775|  50.9k|                                                        u1_end_of_row);
  776|  50.9k|                }
  777|  85.8k|            }
  778|   116k|            ps_dec->u4_total_mbs_coded += u1_num_mbs;
  779|   116k|            if(u1_tfr_n_mb) u1_num_mbs = 0;
  ------------------
  |  Branch (779:16): [True: 116k, False: 0]
  ------------------
  780|   116k|            u1_mb_idx = u1_num_mbs;
  781|   116k|            ps_dec->u4_mb_idx = u1_num_mbs;
  782|   116k|        }
  783|  1.66M|    }
  784|       |
  785|  10.8k|    ps_dec->u4_num_mbs_cur_nmb = 0;
  786|  10.8k|    ps_dec->ps_cur_slice->u4_mbs_in_slice = i2_cur_mb_addr - (u2_first_mb_in_slice << u1_mbaff);
  787|       |
  788|  10.8k|    return ret;
  789|  16.2k|}
isvcd_parse_inter_slice_data_cavlc:
  820|  56.8k|{
  821|  56.8k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
  822|  56.8k|    UWORD32 uc_more_data_flag;
  823|  56.8k|    WORD32 i2_cur_mb_addr;
  824|  56.8k|    UWORD32 u1_num_mbs, u1_num_mbsNby2, u1_mb_idx;
  825|  56.8k|    UWORD32 i2_mb_skip_run;
  826|  56.8k|    UWORD32 u1_read_mb_type;
  827|       |
  828|  56.8k|    UWORD32 u1_mbaff;
  829|  56.8k|    UWORD32 u1_num_mbs_next, u1_end_of_row;
  830|  56.8k|    const UWORD32 i2_pic_wdin_mbs = ps_dec->u2_frm_wd_in_mbs;
  831|  56.8k|    UWORD32 u1_slice_end = 0;
  832|  56.8k|    UWORD32 u1_tfr_n_mb = 0;
  833|  56.8k|    UWORD32 u1_decode_nmb = 0;
  834|       |
  835|  56.8k|    dec_bit_stream_t *const ps_bitstrm = ps_dec->ps_bitstrm;
  836|  56.8k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
  837|  56.8k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
  838|  56.8k|    deblk_mb_t *ps_cur_deblk_mb;
  839|  56.8k|    dec_mb_info_t *ps_cur_mb_info;
  840|  56.8k|    parse_pmbarams_t *ps_parse_mb_data = ps_dec->ps_parse_mb_data;
  841|  56.8k|    UWORD32 u1_inter_mb_type;
  842|  56.8k|    UWORD32 u1_deblk_mb_type;
  843|  56.8k|    UWORD32 u1_mb_threshold;
  844|  56.8k|    WORD32 ret = OK;
  ------------------
  |  |  114|  56.8k|#define OK        0
  ------------------
  845|       |
  846|       |    /******************************************************/
  847|       |    /* Initialisations specific to B or P slice           */
  848|       |    /******************************************************/
  849|  56.8k|    if(ps_slice->u1_slice_type == P_SLICE)
  ------------------
  |  |  368|  56.8k|#define P_SLICE  0
  ------------------
  |  Branch (849:8): [True: 44.3k, False: 12.5k]
  ------------------
  850|  44.3k|    {
  851|  44.3k|        u1_inter_mb_type = P_MB;
  ------------------
  |  |  419|  44.3k|#define P_MB        2
  ------------------
  852|  44.3k|        u1_deblk_mb_type = D_INTER_MB;
  ------------------
  |  |  381|  44.3k|#define D_INTER_MB        0
  ------------------
  853|  44.3k|        u1_mb_threshold = 5;
  854|  44.3k|    }
  855|  12.5k|    else  // B_SLICE
  856|  12.5k|    {
  857|  12.5k|        u1_inter_mb_type = B_MB;
  ------------------
  |  |  420|  12.5k|#define B_MB        3
  ------------------
  858|  12.5k|        u1_deblk_mb_type = D_B_SLICE;
  ------------------
  |  |  384|  12.5k|#define D_B_SLICE         4
  ------------------
  859|  12.5k|        u1_mb_threshold = 23;
  860|  12.5k|    }
  861|       |
  862|       |    /******************************************************/
  863|       |    /* Slice Level Initialisations                        */
  864|       |    /******************************************************/
  865|  56.8k|    ps_dec->u1_qp = ps_slice->u1_slice_qp;
  866|  56.8k|    ih264d_update_qp(ps_dec, 0);
  867|  56.8k|    u1_mb_idx = ps_dec->u4_mb_idx;
  868|  56.8k|    u1_num_mbs = u1_mb_idx;
  869|       |
  870|  56.8k|    u1_num_mbsNby2 = 0;
  871|  56.8k|    u1_mbaff = ps_slice->u1_mbaff_frame_flag;
  872|  56.8k|    i2_cur_mb_addr = u2_first_mb_in_slice << u1_mbaff;
  873|  56.8k|    i2_mb_skip_run = 0;
  874|  56.8k|    uc_more_data_flag = 1;
  875|  56.8k|    u1_read_mb_type = 0;
  876|       |
  877|  1.07M|    while(!u1_slice_end)
  ------------------
  |  Branch (877:11): [True: 1.05M, False: 24.2k]
  ------------------
  878|  1.05M|    {
  879|  1.05M|        UWORD8 u1_mb_type;
  880|       |
  881|  1.05M|        ps_dec->pv_prev_mb_parse_tu_coeff_data = ps_dec->pv_parse_tu_coeff_data;
  882|  1.05M|        if(i2_cur_mb_addr > ps_dec->ps_cur_sps->u4_max_mb_addr)
  ------------------
  |  Branch (882:12): [True: 17.7k, False: 1.03M]
  ------------------
  883|  17.7k|        {
  884|  17.7k|            break;
  885|  17.7k|        }
  886|       |
  887|  1.03M|        ps_cur_mb_info = ps_dec->ps_nmb_info + u1_num_mbs;
  888|  1.03M|        ps_dec->u4_num_mbs_cur_nmb = u1_num_mbs;
  889|  1.03M|        ps_cur_mb_info->u1_Mux = 0;
  890|  1.03M|        ps_dec->u4_num_pmbair = (u1_num_mbs >> u1_mbaff);
  891|  1.03M|        ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + u1_num_mbs;
  892|       |
  893|  1.03M|        ps_cur_mb_info->u1_end_of_slice = 0;
  894|       |
  895|       |        /* Storing Default partition info */
  896|  1.03M|        ps_parse_mb_data->u1_num_part = 1;
  897|  1.03M|        ps_parse_mb_data->u4_isI_mb = 0;
  898|       |
  899|  1.03M|        if((!i2_mb_skip_run) && (!u1_read_mb_type))
  ------------------
  |  Branch (899:12): [True: 210k, False: 823k]
  |  Branch (899:33): [True: 169k, False: 40.4k]
  ------------------
  900|   169k|        {
  901|       |            // Inlined ih264d_uev
  902|   169k|            UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
  903|   169k|            UWORD32 u4_word, u4_ldz;
  904|       |
  905|       |            /***************************************************************/
  906|       |            /* Find leading zeros in next 32 bits                          */
  907|       |            /***************************************************************/
  908|   169k|            NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|   169k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|   169k|{                                                                           \
  |  |  152|   169k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|   169k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|   169k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|   169k|                                                                            \
  |  |  156|   169k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|   169k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 162k, False: 7.02k]
  |  |  ------------------
  |  |  158|   169k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|   162k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|   169k|}
  ------------------
  909|       |
  910|   169k|            u4_ldz = CLZ(u4_word);
  911|       |
  912|       |            /* Flush the ps_bitstrm */
  913|   169k|            u4_bitstream_offset += (u4_ldz + 1);
  914|       |            /* Read the suffix from the ps_bitstrm */
  915|   169k|            u4_word = 0;
  916|   169k|            if(u4_ldz)
  ------------------
  |  Branch (916:16): [True: 63.5k, False: 106k]
  ------------------
  917|  63.5k|            {
  918|  63.5k|                GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  63.5k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  63.5k|{                                                                           \
  |  |  122|  63.5k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  63.5k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  63.5k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  63.5k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  63.5k|                                                                            \
  |  |  127|  63.5k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 59.2k, False: 4.29k]
  |  |  ------------------
  |  |  128|  63.5k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  59.2k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  63.5k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  63.5k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  63.5k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  63.5k|}                                                                           \
  ------------------
  919|  63.5k|            }
  920|   169k|            *pu4_bitstrm_ofst = u4_bitstream_offset;
  921|   169k|            i2_mb_skip_run = ((1 << u4_ldz) + u4_word - 1);
  922|       |            // Inlined ih264d_uev
  923|   169k|            COPYTHECONTEXT("mb_skip_run", i2_mb_skip_run);
  924|   169k|            uc_more_data_flag = MORE_RBSP_DATA(ps_bitstrm);
  ------------------
  |  |   97|   169k|    CHECK_BITS_SUFFICIENT(ps_bitstrm, 1)
  |  |  ------------------
  |  |  |  |   95|   169k|  (ps_bitstrm->u4_ofst + bits_to_read <= ps_bitstrm->u4_max_ofst)
  |  |  ------------------
  ------------------
  925|   169k|            u1_read_mb_type = uc_more_data_flag;
  926|   169k|        }
  927|       |
  928|       |        /***************************************************************/
  929|       |        /* Get the required information for decoding of MB            */
  930|       |        /* mb_x, mb_y , neighbour availablity,                              */
  931|       |        /***************************************************************/
  932|  1.03M|        ps_dec->pf_get_mb_info(ps_dec, i2_cur_mb_addr, ps_cur_mb_info, i2_mb_skip_run);
  933|       |
  934|       |        /***************************************************************/
  935|       |        /* Set the deblocking parameters for this MB */
  936|       |        /***************************************************************/
  937|  1.03M|        if(ps_dec->u4_app_disable_deblk_frm == 0)
  ------------------
  |  Branch (937:12): [True: 1.03M, False: 0]
  ------------------
  938|  1.03M|            ih264d_set_deblocking_parameters(ps_cur_deblk_mb, ps_slice,
  939|  1.03M|                                             ps_dec->u1_mb_ngbr_availablity,
  940|  1.03M|                                             ps_dec->u1_cur_mb_fld_dec_flag);
  941|       |
  942|  1.03M|        if(i2_mb_skip_run)
  ------------------
  |  Branch (942:12): [True: 886k, False: 146k]
  ------------------
  943|   886k|        {
  944|       |            /* Set appropriate flags in ps_cur_mb_info and ps_dec */
  945|   886k|            ps_dec->i1_prev_mb_qp_delta = 0;
  946|   886k|            ps_dec->u1_sub_mb_num = 0;
  947|   886k|            ps_cur_mb_info->u1_mb_type = MB_SKIP;
  ------------------
  |  |   59|   886k|#define MB_SKIP 255
  ------------------
  948|   886k|            ps_cur_mb_info->u1_mb_mc_mode = PRED_16x16;
  ------------------
  |  |  450|   886k|#define PRED_16x16  0
  ------------------
  949|   886k|            ps_cur_mb_info->u1_cbp = 0;
  950|       |
  951|   886k|            {
  952|       |                /* Storing Skip partition info */
  953|   886k|                parse_part_params_t *ps_part_info = ps_dec->ps_part;
  954|   886k|                ps_part_info->u1_is_direct = PART_DIRECT_16x16;
  ------------------
  |  |  572|   886k|#define PART_DIRECT_16x16              2
  ------------------
  955|   886k|                ps_part_info->u1_sub_mb_num = 0;
  956|   886k|                ps_dec->ps_part++;
  957|   886k|            }
  958|       |
  959|       |            /* Update Nnzs */
  960|   886k|            ih264d_update_nnz_for_skipmb(ps_dec, ps_cur_mb_info, CAVLC);
  ------------------
  |  |  338|   886k|#define CAVLC  0
  ------------------
  961|       |
  962|   886k|            ps_cur_mb_info->ps_curmb->u1_mb_type = u1_inter_mb_type;
  963|   886k|            ps_cur_deblk_mb->u1_mb_type |= u1_deblk_mb_type;
  964|       |
  965|   886k|            if(ps_svc_lyr_dec->u1_layer_identifier != TARGET_LAYER)
  ------------------
  |  |  110|   886k|#define TARGET_LAYER 2
  ------------------
  |  Branch (965:16): [True: 188k, False: 698k]
  ------------------
  966|   188k|            {
  967|   188k|                ps_cur_deblk_mb->u1_deblocking_mode = MB_DISABLE_FILTERING;
  ------------------
  |  |   70|   188k|#define MB_DISABLE_FILTERING          0x01
  ------------------
  968|   188k|            }
  969|       |
  970|   886k|            i2_mb_skip_run--;
  971|   886k|        }
  972|   146k|        else
  973|   146k|        {
  974|   146k|            u1_read_mb_type = 0;
  975|       |            /**************************************************************/
  976|       |            /* Macroblock Layer Begins, Decode the u1_mb_type                */
  977|       |            /**************************************************************/
  978|   146k|            {
  979|   146k|                UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
  980|   146k|                UWORD32 u4_word, u4_ldz, u4_temp;
  981|       |
  982|       |                // Inlined ih264d_uev
  983|       |                /***************************************************************/
  984|       |                /* Find leading zeros in next 32 bits                          */
  985|       |                /***************************************************************/
  986|   146k|                NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|   146k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|   146k|{                                                                           \
  |  |  152|   146k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|   146k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|   146k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|   146k|                                                                            \
  |  |  156|   146k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|   146k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 142k, False: 4.50k]
  |  |  ------------------
  |  |  158|   146k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|   142k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|   146k|}
  ------------------
  987|   146k|                u4_ldz = CLZ(u4_word);
  988|       |                /* Flush the ps_bitstrm */
  989|   146k|                u4_bitstream_offset += (u4_ldz + 1);
  990|       |                /* Read the suffix from the ps_bitstrm */
  991|   146k|                u4_word = 0;
  992|   146k|                if(u4_ldz) GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  70.0k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  70.0k|{                                                                           \
  |  |  122|  70.0k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  70.0k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  70.0k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  70.0k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  70.0k|                                                                            \
  |  |  127|  70.0k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 65.5k, False: 4.50k]
  |  |  ------------------
  |  |  128|  70.0k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  65.5k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  70.0k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  70.0k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  70.0k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  70.0k|}                                                                           \
  ------------------
  |  Branch (992:20): [True: 70.0k, False: 76.5k]
  ------------------
  993|   146k|                *pu4_bitstrm_ofst = u4_bitstream_offset;
  994|   146k|                u4_temp = ((1 << u4_ldz) + u4_word - 1);
  995|       |                // Inlined ih264d_uev
  996|   146k|                if(u4_temp > (UWORD32) (25 + u1_mb_threshold)) return ERROR_MB_TYPE;
  ------------------
  |  Branch (996:20): [True: 1.26k, False: 145k]
  ------------------
  997|   145k|                u1_mb_type = u4_temp;
  998|   145k|                COPYTHECONTEXT("u1_mb_type", u1_mb_type);
  999|   145k|            }
 1000|      0|            ps_cur_mb_info->u1_mb_type = u1_mb_type;
 1001|       |
 1002|       |            /**************************************************************/
 1003|       |            /* Parse Macroblock data                                      */
 1004|       |            /**************************************************************/
 1005|   145k|            if(u1_mb_type < u1_mb_threshold)
  ------------------
  |  Branch (1005:16): [True: 119k, False: 25.5k]
  ------------------
 1006|   119k|            {
 1007|   119k|                ps_cur_mb_info->ps_curmb->u1_mb_type = u1_inter_mb_type;
 1008|       |
 1009|   119k|                ret = ps_dec->pf_parse_inter_mb(ps_dec, ps_cur_mb_info, u1_num_mbs, u1_num_mbsNby2);
 1010|   119k|                if(ret != OK) return ret;
  ------------------
  |  |  114|   119k|#define OK        0
  ------------------
  |  Branch (1010:20): [True: 10.3k, False: 109k]
  ------------------
 1011|   109k|                ps_cur_deblk_mb->u1_mb_type |= u1_deblk_mb_type;
 1012|       |
 1013|   109k|                if(ps_svc_lyr_dec->u1_layer_identifier != TARGET_LAYER)
  ------------------
  |  |  110|   109k|#define TARGET_LAYER 2
  ------------------
  |  Branch (1013:20): [True: 41.6k, False: 67.8k]
  ------------------
 1014|  41.6k|                {
 1015|  41.6k|                    ps_cur_deblk_mb->u1_deblocking_mode = MB_DISABLE_FILTERING;
  ------------------
  |  |   70|  41.6k|#define MB_DISABLE_FILTERING          0x01
  ------------------
 1016|  41.6k|                }
 1017|   109k|            }
 1018|  25.5k|            else
 1019|  25.5k|            {
 1020|       |                /* Storing Intra partition info */
 1021|  25.5k|                ps_parse_mb_data->u1_num_part = 0;
 1022|  25.5k|                ps_parse_mb_data->u4_isI_mb = 1;
 1023|       |
 1024|  25.5k|                if((25 + u1_mb_threshold) == u1_mb_type)
  ------------------
  |  Branch (1024:20): [True: 921, False: 24.5k]
  ------------------
 1025|    921|                {
 1026|       |                    /* I_PCM_MB */
 1027|    921|                    ps_cur_mb_info->ps_curmb->u1_mb_type = I_PCM_MB;
  ------------------
  |  |  423|    921|#define I_PCM_MB    6
  ------------------
 1028|    921|                    ret = ih264d_parse_ipcm_mb(ps_dec, ps_cur_mb_info, u1_num_mbs);
 1029|    921|                    if(ret != OK) return ret;
  ------------------
  |  |  114|    921|#define OK        0
  ------------------
  |  Branch (1029:24): [True: 0, False: 921]
  ------------------
 1030|    921|                    ps_dec->u1_qp = 0;
 1031|    921|                }
 1032|  24.5k|                else
 1033|  24.5k|                {
 1034|  24.5k|                    ret = ih264d_parse_imb_cavlc(ps_dec, ps_cur_mb_info, u1_num_mbs,
 1035|  24.5k|                                                 (UWORD8) (u1_mb_type - u1_mb_threshold));
 1036|  24.5k|                    if(ret != OK) return ret;
  ------------------
  |  |  114|  24.5k|#define OK        0
  ------------------
  |  Branch (1036:24): [True: 3.24k, False: 21.3k]
  ------------------
 1037|  24.5k|                }
 1038|       |
 1039|  22.2k|                ps_cur_deblk_mb->u1_mb_type |= D_INTRA_MB;
  ------------------
  |  |  382|  22.2k|#define D_INTRA_MB        1
  ------------------
 1040|  22.2k|            }
 1041|   131k|            uc_more_data_flag = MORE_RBSP_DATA(ps_bitstrm);
  ------------------
  |  |   97|   131k|    CHECK_BITS_SUFFICIENT(ps_bitstrm, 1)
  |  |  ------------------
  |  |  |  |   95|   131k|  (ps_bitstrm->u4_ofst + bits_to_read <= ps_bitstrm->u4_max_ofst)
  |  |  ------------------
  ------------------
 1042|   131k|        }
 1043|  1.01M|        ps_cur_deblk_mb->u1_mb_qp = ps_dec->u1_qp;
 1044|       |
 1045|  1.01M|        if(ps_dec->u1_enable_mb_info)
  ------------------
  |  Branch (1045:12): [True: 0, False: 1.01M]
  ------------------
 1046|      0|        {
 1047|      0|            ih264d_populate_mb_info_map(ps_dec, ps_cur_mb_info, ps_cur_mb_info->u2_mbx << 1,
 1048|      0|                                        ps_cur_mb_info->u2_mby << 1, ps_cur_deblk_mb->u1_mb_qp);
 1049|      0|        }
 1050|  1.01M|        if(u1_mbaff)
  ------------------
  |  Branch (1050:12): [True: 0, False: 1.01M]
  ------------------
 1051|      0|        {
 1052|      0|            ih264d_update_mbaff_left_nnz(ps_dec, ps_cur_mb_info);
 1053|      0|            if(!uc_more_data_flag && !i2_mb_skip_run && (0 == (i2_cur_mb_addr & 1)))
  ------------------
  |  Branch (1053:16): [True: 0, False: 0]
  |  Branch (1053:38): [True: 0, False: 0]
  |  Branch (1053:57): [True: 0, False: 0]
  ------------------
 1054|      0|            {
 1055|      0|                return ERROR_EOB_FLUSHBITS_T;
 1056|      0|            }
 1057|      0|        }
 1058|       |
 1059|       |        /**************************************************************/
 1060|       |        /* Get next Macroblock address                                */
 1061|       |        /**************************************************************/
 1062|  1.01M|        i2_cur_mb_addr++;
 1063|       |
 1064|  1.01M|        u1_num_mbs++;
 1065|  1.01M|        u1_num_mbsNby2++;
 1066|  1.01M|        ps_parse_mb_data++;
 1067|       |
 1068|       |        /****************************************************************/
 1069|       |        /* Check for End Of Row and other flags that determine when to  */
 1070|       |        /* do DMA setup for N/2-Mb, Decode for N-Mb, and Transfer for   */
 1071|       |        /* N-Mb                                                         */
 1072|       |        /****************************************************************/
 1073|  1.01M|        u1_num_mbs_next = i2_pic_wdin_mbs - ps_dec->u2_mbx - 1;
 1074|  1.01M|        u1_end_of_row = (!u1_num_mbs_next) && (!(u1_mbaff && (u1_num_mbs & 0x01)));
  ------------------
  |  Branch (1074:25): [True: 184k, False: 834k]
  |  Branch (1074:50): [True: 0, False: 184k]
  |  Branch (1074:62): [True: 0, False: 0]
  ------------------
 1075|  1.01M|        u1_slice_end = (!(uc_more_data_flag || i2_mb_skip_run));
  ------------------
  |  Branch (1075:27): [True: 409k, False: 609k]
  |  Branch (1075:48): [True: 585k, False: 24.2k]
  ------------------
 1076|  1.01M|        u1_tfr_n_mb = (u1_num_mbs == ps_dec->u4_recon_mb_grp) || u1_end_of_row || u1_slice_end;
  ------------------
  |  Branch (1076:23): [True: 184k, False: 834k]
  |  Branch (1076:66): [True: 225, False: 834k]
  |  Branch (1076:83): [True: 21.2k, False: 812k]
  ------------------
 1077|  1.01M|        u1_decode_nmb = u1_tfr_n_mb || u1_slice_end;
  ------------------
  |  Branch (1077:25): [True: 205k, False: 812k]
  |  Branch (1077:40): [True: 0, False: 812k]
  ------------------
 1078|  1.01M|        ps_cur_mb_info->u1_end_of_slice = u1_slice_end;
 1079|       |
 1080|  1.01M|        if(u1_decode_nmb)
  ------------------
  |  Branch (1080:12): [True: 205k, False: 812k]
  ------------------
 1081|   205k|        {
 1082|   205k|            ret = ps_dec->pf_mvpred_ref_tfr_nby2mb(ps_dec, u1_mb_idx, u1_num_mbs);
 1083|   205k|            u1_num_mbsNby2 = 0;
 1084|   205k|            ps_parse_mb_data = ps_dec->ps_parse_mb_data;
 1085|   205k|            ps_dec->ps_part = ps_dec->ps_parse_part_params;
 1086|   205k|            if(ret != OK) return ret;
  ------------------
  |  |  114|   205k|#define OK        0
  ------------------
  |  Branch (1086:16): [True: 0, False: 205k]
  ------------------
 1087|   205k|        }
 1088|       |
 1089|  1.01M|        if(u1_decode_nmb)
  ------------------
  |  Branch (1089:12): [True: 205k, False: 812k]
  ------------------
 1090|   205k|        {
 1091|   205k|            if(ps_dec->u1_separate_parse)
  ------------------
  |  Branch (1091:16): [True: 52.1k, False: 153k]
  ------------------
 1092|  52.1k|            {
 1093|  52.1k|                ih264d_parse_tfr_nmb(ps_dec, u1_mb_idx, u1_num_mbs, u1_num_mbs_next, u1_tfr_n_mb,
 1094|  52.1k|                                     u1_end_of_row);
 1095|  52.1k|                ps_dec->ps_nmb_info += u1_num_mbs;
 1096|  52.1k|                ps_svc_lyr_dec->ps_svc_nmb_info += u1_num_mbs;
 1097|  52.1k|            }
 1098|   153k|            else
 1099|   153k|            {
 1100|   153k|                if(ps_svc_lyr_dec->u1_layer_identifier == TARGET_LAYER)
  ------------------
  |  |  110|   153k|#define TARGET_LAYER 2
  ------------------
  |  Branch (1100:20): [True: 43.2k, False: 110k]
  ------------------
 1101|  43.2k|                {
 1102|  43.2k|                    ih264d_decode_recon_tfr_nmb(ps_dec, u1_mb_idx, u1_num_mbs, u1_num_mbs_next,
 1103|  43.2k|                                                u1_tfr_n_mb, u1_end_of_row);
 1104|  43.2k|                }
 1105|   110k|                else
 1106|   110k|                {
 1107|   110k|                    isvcd_decode_recon_tfr_nmb_base_lyr(ps_svc_lyr_dec, u1_mb_idx, u1_num_mbs,
 1108|   110k|                                                        u1_num_mbs_next, u1_tfr_n_mb,
 1109|   110k|                                                        u1_end_of_row);
 1110|   110k|                }
 1111|   153k|            }
 1112|   205k|            ps_dec->u4_total_mbs_coded += u1_num_mbs;
 1113|   205k|            if(u1_tfr_n_mb) u1_num_mbs = 0;
  ------------------
  |  Branch (1113:16): [True: 205k, False: 0]
  ------------------
 1114|   205k|            u1_mb_idx = u1_num_mbs;
 1115|   205k|            ps_dec->u4_mb_idx = u1_num_mbs;
 1116|   205k|        }
 1117|  1.01M|    }
 1118|       |
 1119|  42.0k|    ps_dec->u4_num_mbs_cur_nmb = 0;
 1120|  42.0k|    ps_dec->ps_cur_slice->u4_mbs_in_slice = i2_cur_mb_addr - (u2_first_mb_in_slice << u1_mbaff);
 1121|       |
 1122|  42.0k|    return ret;
 1123|  56.8k|}
isvcd_parse_inter_slice_data_cabac_enh_lyr:
 1154|  5.06k|{
 1155|  5.06k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
 1156|  5.06k|    UWORD32 uc_more_data_flag;
 1157|  5.06k|    WORD32 i2_cur_mb_addr;
 1158|  5.06k|    UWORD32 u1_num_mbs, u1_num_mbsNby2, u1_mb_idx;
 1159|  5.06k|    UWORD32 u1_mbaff;
 1160|  5.06k|    UWORD32 u1_num_mbs_next, u1_end_of_row;
 1161|  5.06k|    const UWORD16 i2_pic_wdin_mbs = ps_dec->u2_frm_wd_in_mbs;
 1162|  5.06k|    UWORD32 u1_slice_end = 0;
 1163|  5.06k|    UWORD32 u1_tfr_n_mb = 0;
 1164|  5.06k|    UWORD32 u1_decode_nmb = 0;
 1165|       |
 1166|  5.06k|    dec_slice_svc_ext_params_t *ps_svc_slice_params = NULL;
 1167|  5.06k|    deblk_mb_t *ps_cur_deblk_mb;
 1168|  5.06k|    dec_mb_info_t *ps_cur_mb_info;
 1169|  5.06k|    dec_svc_mb_info_t *ps_svc_cur_mb_info;
 1170|       |
 1171|  5.06k|    parse_pmbarams_t *ps_parse_mb_data = ps_dec->ps_parse_mb_data;
 1172|  5.06k|    UWORD32 u1_inter_mb_skip_type;
 1173|  5.06k|    UWORD32 u1_inter_mb_type;
 1174|  5.06k|    UWORD32 u1_deblk_mb_type;
 1175|  5.06k|    UWORD32 u1_mb_threshold;
 1176|  5.06k|    dec_bit_stream_t *const ps_bitstrm = ps_dec->ps_bitstrm;
 1177|  5.06k|    decoding_envirnoment_t *ps_cab_env = NULL;
 1178|  5.06k|    WORD32 ret = OK;
  ------------------
  |  |  114|  5.06k|#define OK        0
  ------------------
 1179|       |
 1180|  5.06k|    ps_svc_slice_params = &ps_svc_lyr_dec->s_svc_slice_params;
 1181|       |
 1182|       |    /******************************************************/
 1183|       |    /* Initialisations specific to B or P slice           */
 1184|       |    /******************************************************/
 1185|  5.06k|    if(ps_slice->u1_slice_type == P_SLICE)
  ------------------
  |  |  368|  5.06k|#define P_SLICE  0
  ------------------
  |  Branch (1185:8): [True: 2.64k, False: 2.42k]
  ------------------
 1186|  2.64k|    {
 1187|  2.64k|        u1_inter_mb_skip_type = CAB_P_SKIP;
  ------------------
  |  |  403|  2.64k|#define CAB_P_SKIP        0x16 /* 0001 x11x */
  ------------------
 1188|  2.64k|        u1_inter_mb_type = P_MB;
  ------------------
  |  |  419|  2.64k|#define P_MB        2
  ------------------
 1189|  2.64k|        u1_deblk_mb_type = D_INTER_MB;
  ------------------
  |  |  381|  2.64k|#define D_INTER_MB        0
  ------------------
 1190|  2.64k|        u1_mb_threshold = 5;
 1191|  2.64k|    }
 1192|  2.42k|    else  // EB_SLICE
 1193|  2.42k|    {
 1194|  2.42k|        u1_inter_mb_skip_type = CAB_B_SKIP;
  ------------------
  |  |  404|  2.42k|#define CAB_B_SKIP        0x14 /* 0001 x100 */
  ------------------
 1195|  2.42k|        u1_inter_mb_type = B_MB;
  ------------------
  |  |  420|  2.42k|#define B_MB        3
  ------------------
 1196|  2.42k|        u1_deblk_mb_type = D_B_SLICE;
  ------------------
  |  |  384|  2.42k|#define D_B_SLICE         4
  ------------------
 1197|  2.42k|        u1_mb_threshold = 23;
 1198|  2.42k|    }
 1199|       |
 1200|       |    /******************************************************/
 1201|       |    /* Slice Level Initialisations                        */
 1202|       |    /******************************************************/
 1203|  5.06k|    i2_cur_mb_addr = u2_first_mb_in_slice;
 1204|  5.06k|    ps_dec->u1_qp = ps_slice->u1_slice_qp;
 1205|  5.06k|    ih264d_update_qp(ps_dec, 0);
 1206|  5.06k|    u1_mb_idx = ps_dec->u4_mb_idx;
 1207|  5.06k|    u1_num_mbs = u1_mb_idx;
 1208|  5.06k|    u1_num_mbsNby2 = 0;
 1209|  5.06k|    u1_mbaff = ps_slice->u1_mbaff_frame_flag;
 1210|  5.06k|    i2_cur_mb_addr = u2_first_mb_in_slice << u1_mbaff;
 1211|  5.06k|    uc_more_data_flag = 1;
 1212|       |
 1213|       |    /* Initialisations specific to cabac */
 1214|  5.06k|    if(ps_bitstrm->u4_ofst & 0x07)
  ------------------
  |  Branch (1214:8): [True: 4.43k, False: 636]
  ------------------
 1215|  4.43k|    {
 1216|  4.43k|        ps_bitstrm->u4_ofst += 8;
 1217|  4.43k|        ps_bitstrm->u4_ofst &= 0xFFFFFFF8;
 1218|  4.43k|    }
 1219|       |
 1220|  5.06k|    ret = ih264d_init_cabac_dec_envirnoment(&(ps_dec->s_cab_dec_env), ps_bitstrm);
 1221|  5.06k|    if(ret != OK) return ret;
  ------------------
  |  |  114|  5.06k|#define OK        0
  ------------------
  |  Branch (1221:8): [True: 109, False: 4.95k]
  ------------------
 1222|       |
 1223|  4.95k|    ps_cab_env = &ps_dec->s_cab_dec_env;
 1224|  4.95k|    ps_dec->i1_prev_mb_qp_delta = 0;
 1225|       |
 1226|  86.8k|    while(!u1_slice_end)
  ------------------
  |  Branch (1226:11): [True: 86.1k, False: 658]
  ------------------
 1227|  86.1k|    {
 1228|  86.1k|        UWORD8 u1_mb_type;
 1229|  86.1k|        UWORD32 u4_mb_skip;
 1230|  86.1k|        ps_dec->pv_prev_mb_parse_tu_coeff_data = ps_dec->pv_parse_tu_coeff_data;
 1231|  86.1k|        if(i2_cur_mb_addr > ps_dec->ps_cur_sps->u4_max_mb_addr)
  ------------------
  |  Branch (1231:12): [True: 907, False: 85.2k]
  ------------------
 1232|    907|        {
 1233|    907|            break;
 1234|    907|        }
 1235|       |
 1236|  85.2k|        ps_cur_mb_info = ps_dec->ps_nmb_info + u1_num_mbs;
 1237|  85.2k|        ps_svc_cur_mb_info = ps_svc_lyr_dec->ps_svc_nmb_info + u1_num_mbs;
 1238|  85.2k|        ps_dec->u4_num_mbs_cur_nmb = u1_num_mbs;
 1239|  85.2k|        ps_cur_mb_info->u1_Mux = 0;
 1240|  85.2k|        ps_dec->u4_num_pmbair = (u1_num_mbs >> u1_mbaff);
 1241|  85.2k|        ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + u1_num_mbs;
 1242|  85.2k|        ps_cur_mb_info->u1_end_of_slice = 0;
 1243|       |
 1244|       |        /* Storing Default partition info */
 1245|  85.2k|        ps_parse_mb_data->u1_num_part = 1;
 1246|  85.2k|        ps_parse_mb_data->u4_isI_mb = 0;
 1247|       |
 1248|       |        /***************************************************************/
 1249|       |        /* Get the required information for decoding of MB             */
 1250|       |        /* mb_x, mb_y , neighbour availablity,                         */
 1251|       |        /***************************************************************/
 1252|  85.2k|        u4_mb_skip = ps_dec->pf_get_mb_info(ps_dec, i2_cur_mb_addr, ps_cur_mb_info, 1);
 1253|  85.2k|        ps_svc_cur_mb_info->u1_crop_window_flag =
 1254|  85.2k|            *(ps_svc_lyr_dec->pu1_crop_wnd_flag + ps_cur_mb_info->u2_mbx +
 1255|  85.2k|              (ps_cur_mb_info->u2_mby * ps_dec->u2_frm_wd_in_mbs));
 1256|       |        /*********************************************************************/
 1257|       |        /* initialize u1_tran_form8x8 to zero to aviod uninitialized accesses */
 1258|       |        /*********************************************************************/
 1259|  85.2k|        ps_cur_mb_info->u1_tran_form8x8 = 0;
 1260|  85.2k|        ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
 1261|       |
 1262|       |        /***************************************************************/
 1263|       |        /* Set the deblocking parameters for this MB                   */
 1264|       |        /***************************************************************/
 1265|  85.2k|        if(ps_dec->u4_app_disable_deblk_frm == 0)
  ------------------
  |  Branch (1265:12): [True: 85.2k, False: 0]
  ------------------
 1266|  85.2k|            ih264d_set_deblocking_parameters(ps_cur_deblk_mb, ps_slice,
 1267|  85.2k|                                             ps_dec->u1_mb_ngbr_availablity,
 1268|  85.2k|                                             ps_dec->u1_cur_mb_fld_dec_flag);
 1269|       |
 1270|  85.2k|        if(u4_mb_skip)
  ------------------
  |  Branch (1270:12): [True: 25.1k, False: 60.1k]
  ------------------
 1271|  25.1k|        {
 1272|       |            /* Set appropriate flags in ps_cur_mb_info and ps_dec */
 1273|  25.1k|            memset(ps_dec->ps_curr_ctxt_mb_info, 0, sizeof(ctxt_inc_mb_info_t));
 1274|  25.1k|            ps_dec->ps_curr_ctxt_mb_info->u1_mb_type = u1_inter_mb_skip_type;
 1275|       |
 1276|  25.1k|            MEMSET_16BYTES(&ps_dec->pu1_left_mv_ctxt_inc[0][0], 0);
  ------------------
  |  |  652|  25.1k|#define MEMSET_16BYTES(pu4_start,value)                         \
  |  |  653|  25.1k|{                                                               \
  |  |  654|  25.1k|    memset(pu4_start,value,16);                                 \
  |  |  655|  25.1k|}
  ------------------
 1277|       |
 1278|  25.1k|            *((UWORD32 *) ps_dec->pi1_left_ref_idx_ctxt_inc) = 0;
 1279|  25.1k|            *(ps_dec->pu1_left_yuv_dc_csbp) = 0;
 1280|       |
 1281|  25.1k|            ps_dec->i1_prev_mb_qp_delta = 0;
 1282|  25.1k|            ps_cur_mb_info->u1_mb_type = MB_SKIP;
  ------------------
  |  |   59|  25.1k|#define MB_SKIP 255
  ------------------
 1283|  25.1k|            ps_cur_mb_info->u1_cbp = 0;
 1284|  25.1k|            ps_svc_cur_mb_info->u1_base_mode_flag = 0;
 1285|  25.1k|            ps_svc_cur_mb_info->au1_motion_pred_flag[0] = 0;
 1286|  25.1k|            ps_svc_cur_mb_info->au1_motion_pred_flag[1] = 0;
 1287|  25.1k|            ps_svc_cur_mb_info->u1_residual_prediction_flag = 0;
 1288|       |
 1289|  25.1k|            {
 1290|       |                /* Storing Skip partition info */
 1291|  25.1k|                parse_part_params_t *ps_part_info = ps_dec->ps_part;
 1292|  25.1k|                ps_part_info->u1_is_direct = PART_DIRECT_16x16;
  ------------------
  |  |  572|  25.1k|#define PART_DIRECT_16x16              2
  ------------------
 1293|  25.1k|                ps_part_info->u1_sub_mb_num = 0;
 1294|  25.1k|                ps_dec->ps_part++;
 1295|  25.1k|            }
 1296|       |
 1297|       |            /* Update Nnzs */
 1298|  25.1k|            ih264d_update_nnz_for_skipmb(ps_dec, ps_cur_mb_info, CABAC);
  ------------------
  |  |  339|  25.1k|#define CABAC  1
  ------------------
 1299|  25.1k|            ps_cur_mb_info->ps_curmb->u1_mb_type = u1_inter_mb_type;
 1300|  25.1k|            ps_cur_deblk_mb->u1_mb_type |= u1_deblk_mb_type;
 1301|  25.1k|            ps_cur_deblk_mb->u1_mb_qp = ps_dec->u1_qp;
 1302|  25.1k|            if(ps_svc_lyr_dec->u1_layer_identifier != TARGET_LAYER)
  ------------------
  |  |  110|  25.1k|#define TARGET_LAYER 2
  ------------------
  |  Branch (1302:16): [True: 0, False: 25.1k]
  ------------------
 1303|      0|            {
 1304|      0|                ps_cur_deblk_mb->u1_deblocking_mode = MB_DISABLE_FILTERING;
  ------------------
  |  |   70|      0|#define MB_DISABLE_FILTERING          0x01
  ------------------
 1305|      0|            }
 1306|  25.1k|        }
 1307|  60.1k|        else
 1308|  60.1k|        {
 1309|       |            /* Variables for handling Cabac contexts */
 1310|  60.1k|            UWORD8 *pu1_cur_svc_base_mode_flag;
 1311|  60.1k|            UWORD8 u1_left_svc_base_mode_flag;
 1312|  60.1k|            UWORD8 u1_top_svc_base_mode_flag;
 1313|       |
 1314|  60.1k|            UWORD32 u4_a, u4_b, u4_ctxt_inc;
 1315|  60.1k|            ps_svc_cur_mb_info->u1_base_mode_flag = 0;
 1316|       |            /* Macroblock Layer Begins */
 1317|  60.1k|            if(ps_svc_cur_mb_info->u1_crop_window_flag &&
  ------------------
  |  Branch (1317:16): [True: 59.6k, False: 462]
  ------------------
 1318|  59.6k|               ps_svc_slice_params->u1_adaptive_base_mode_flag)
  ------------------
  |  Branch (1318:16): [True: 15.7k, False: 43.8k]
  ------------------
 1319|  15.7k|            {
 1320|  15.7k|                pu1_cur_svc_base_mode_flag =
 1321|  15.7k|                    ps_svc_lyr_dec->pu1_svc_base_mode_flag + ps_cur_mb_info->u2_mbx;
 1322|  15.7k|                pu1_cur_svc_base_mode_flag +=
 1323|  15.7k|                    ps_cur_mb_info->u2_mby * ps_svc_lyr_dec->i4_frm_svc_base_mode_cabac_stride;
 1324|       |
 1325|  15.7k|                u1_left_svc_base_mode_flag = 0;
 1326|  15.7k|                if(ps_dec->u1_mb_ngbr_availablity & LEFT_MB_AVAILABLE_MASK)
  ------------------
  |  |   53|  15.7k|#define LEFT_MB_AVAILABLE_MASK      0x01
  ------------------
  |  Branch (1326:20): [True: 11.9k, False: 3.88k]
  ------------------
 1327|  11.9k|                    u1_left_svc_base_mode_flag = *(pu1_cur_svc_base_mode_flag - 1);
 1328|       |
 1329|  15.7k|                u1_top_svc_base_mode_flag = 0;
 1330|  15.7k|                if(ps_dec->u1_mb_ngbr_availablity & TOP_MB_AVAILABLE_MASK)
  ------------------
  |  |   55|  15.7k|#define TOP_MB_AVAILABLE_MASK       0x04
  ------------------
  |  Branch (1330:20): [True: 13.3k, False: 2.48k]
  ------------------
 1331|  13.3k|                    u1_top_svc_base_mode_flag =
 1332|  13.3k|                        *(pu1_cur_svc_base_mode_flag -
 1333|  13.3k|                          ps_svc_lyr_dec->i4_frm_svc_base_mode_cabac_stride);
 1334|       |
 1335|  15.7k|                u4_a = 1;
 1336|  15.7k|                u4_b = 1;
 1337|       |
 1338|  15.7k|                if(u1_top_svc_base_mode_flag)
  ------------------
  |  Branch (1338:20): [True: 4.80k, False: 10.9k]
  ------------------
 1339|  4.80k|                {
 1340|  4.80k|                    u4_a = 0;
 1341|  4.80k|                }
 1342|       |
 1343|  15.7k|                if(u1_left_svc_base_mode_flag)
  ------------------
  |  Branch (1343:20): [True: 4.34k, False: 11.4k]
  ------------------
 1344|  4.34k|                {
 1345|  4.34k|                    u4_b = 0;
 1346|  4.34k|                }
 1347|       |
 1348|  15.7k|                u4_ctxt_inc = u4_a + u4_b;
 1349|  15.7k|                ps_svc_cur_mb_info->u1_base_mode_flag = ih264d_decode_bin(
 1350|  15.7k|                    u4_ctxt_inc, ps_svc_lyr_dec->ps_base_mode_flag, ps_bitstrm, ps_cab_env);
 1351|  15.7k|                COPYTHECONTEXT("SVC ext: u1_base_mode_flag", ps_svc_cur_mb_info->u1_base_mode_flag);
 1352|  15.7k|                *pu1_cur_svc_base_mode_flag = ps_svc_cur_mb_info->u1_base_mode_flag;
 1353|  15.7k|            }
 1354|  44.3k|            else if(ps_svc_cur_mb_info->u1_crop_window_flag)
  ------------------
  |  Branch (1354:21): [True: 43.8k, False: 462]
  ------------------
 1355|  43.8k|            {
 1356|  43.8k|                ps_svc_cur_mb_info->u1_base_mode_flag =
 1357|  43.8k|                    ps_svc_slice_params->u1_default_base_mode_flag;
 1358|  43.8k|            }
 1359|       |
 1360|  60.1k|            if(!ps_svc_cur_mb_info->u1_base_mode_flag)
  ------------------
  |  Branch (1360:16): [True: 34.7k, False: 25.3k]
  ------------------
 1361|  34.7k|            {
 1362|       |                /* Decode the u1_mb_type */
 1363|  34.7k|                u1_mb_type = ih264d_parse_mb_type_cabac(ps_dec);
 1364|  34.7k|                ps_cur_mb_info->u1_mb_type = u1_mb_type;
 1365|  34.7k|                if(u1_mb_type > (25 + u1_mb_threshold)) return ERROR_MB_TYPE;
  ------------------
  |  Branch (1365:20): [True: 0, False: 34.7k]
  ------------------
 1366|  34.7k|                COPYTHECONTEXT("u1_mb_type", u1_mb_type);
 1367|  34.7k|            }
 1368|  25.3k|            else
 1369|  25.3k|            {
 1370|       |                // default intialization for Base mode flag : reserved
 1371|  25.3k|                ps_dec->ps_part += MAX_NUM_MB_PART;
  ------------------
  |  |   62|  25.3k|#define MAX_NUM_MB_PART NUM_MB_PARTS *NUM_SUB_MB_PARTS
  |  |  ------------------
  |  |  |  |   59|  25.3k|#define NUM_MB_PARTS 4
  |  |  ------------------
  |  |               #define MAX_NUM_MB_PART NUM_MB_PARTS *NUM_SUB_MB_PARTS
  |  |  ------------------
  |  |  |  |   60|  25.3k|#define NUM_SUB_MB_PARTS 4
  |  |  ------------------
  ------------------
 1372|       |
 1373|  25.3k|                ps_svc_cur_mb_info->au1_motion_pred_flag[0] = 0;
 1374|  25.3k|                ps_svc_cur_mb_info->au1_motion_pred_flag[1] = 0;
 1375|  25.3k|                ps_cur_mb_info->u1_mb_type = MB_INFER;
  ------------------
  |  |  112|  25.3k|#define MB_INFER 250
  ------------------
 1376|  25.3k|                ps_cur_deblk_mb->u1_mb_type |= u1_deblk_mb_type;
 1377|  25.3k|                if(ps_svc_lyr_dec->u1_layer_identifier != TARGET_LAYER)
  ------------------
  |  |  110|  25.3k|#define TARGET_LAYER 2
  ------------------
  |  Branch (1377:20): [True: 0, False: 25.3k]
  ------------------
 1378|      0|                {
 1379|      0|                    ps_cur_deblk_mb->u1_deblocking_mode = MB_DISABLE_FILTERING;
  ------------------
  |  |   70|      0|#define MB_DISABLE_FILTERING          0x01
  ------------------
 1380|      0|                }
 1381|       |
 1382|       |                /*SVC EXT needs to update incropwindow*/
 1383|  25.3k|                if(ps_svc_slice_params->u1_adaptive_residual_prediction_flag &&
  ------------------
  |  Branch (1383:20): [True: 19.9k, False: 5.45k]
  ------------------
 1384|  19.9k|                   ps_svc_cur_mb_info->u1_crop_window_flag)
  ------------------
  |  Branch (1384:20): [True: 19.9k, False: 0]
  ------------------
 1385|  19.9k|                {
 1386|  19.9k|                    ps_svc_cur_mb_info->u1_residual_prediction_flag = ih264d_decode_bin(
 1387|  19.9k|                        0, ps_svc_lyr_dec->ps_residual_prediction_flag, ps_bitstrm, ps_cab_env);
 1388|  19.9k|                    COPYTHECONTEXT("SVC ext: u1_residual_prediction_flag",
 1389|  19.9k|                                   ps_svc_cur_mb_info->u1_residual_prediction_flag);
 1390|  19.9k|                }
 1391|  5.45k|                else
 1392|  5.45k|                {
 1393|       |                    /*residual flag inference code */
 1394|  5.45k|                    if(1 == ps_svc_cur_mb_info->u1_crop_window_flag)
  ------------------
  |  Branch (1394:24): [True: 5.45k, False: 0]
  ------------------
 1395|  5.45k|                    {
 1396|  5.45k|                        ps_svc_cur_mb_info->u1_residual_prediction_flag =
 1397|  5.45k|                            ps_svc_slice_params->u1_default_residual_prediction_flag;
 1398|  5.45k|                    }
 1399|      0|                    else
 1400|      0|                    {
 1401|      0|                        ps_svc_cur_mb_info->u1_residual_prediction_flag = 0;
 1402|      0|                    }
 1403|  5.45k|                }
 1404|  25.3k|            }
 1405|       |
 1406|       |            /* Parse Macroblock Data */
 1407|  60.1k|            u1_mb_type = ps_cur_mb_info->u1_mb_type;
 1408|  60.1k|            if(u1_mb_type < u1_mb_threshold)
  ------------------
  |  Branch (1408:16): [True: 31.7k, False: 28.3k]
  ------------------
 1409|  31.7k|            {
 1410|  31.7k|                ps_cur_mb_info->ps_curmb->u1_mb_type = u1_inter_mb_type;
 1411|  31.7k|                *(ps_dec->pu1_left_yuv_dc_csbp) &= 0x6;
 1412|       |
 1413|  31.7k|                ret = ps_svc_lyr_dec->pf_parse_inter_mb_svc_ext(
 1414|  31.7k|                    ps_svc_lyr_dec, ps_cur_mb_info, ps_svc_cur_mb_info, u1_num_mbs, u1_num_mbsNby2);
 1415|  31.7k|                if(ret != OK) return ret;
  ------------------
  |  |  114|  31.7k|#define OK        0
  ------------------
  |  Branch (1415:20): [True: 1.94k, False: 29.7k]
  ------------------
 1416|  29.7k|                ps_cur_deblk_mb->u1_mb_qp = ps_dec->u1_qp;
 1417|  29.7k|                ps_cur_deblk_mb->u1_mb_type |= u1_deblk_mb_type;
 1418|  29.7k|                if(ps_svc_lyr_dec->u1_layer_identifier != TARGET_LAYER)
  ------------------
  |  |  110|  29.7k|#define TARGET_LAYER 2
  ------------------
  |  Branch (1418:20): [True: 0, False: 29.7k]
  ------------------
 1419|      0|                {
 1420|      0|                    ps_cur_deblk_mb->u1_deblocking_mode = MB_DISABLE_FILTERING;
  ------------------
  |  |   70|      0|#define MB_DISABLE_FILTERING          0x01
  ------------------
 1421|      0|                }
 1422|  29.7k|            }
 1423|  28.3k|            else
 1424|  28.3k|            {
 1425|       |                /* Storing Intra partition info */
 1426|  28.3k|                ps_parse_mb_data->u1_num_part = 0;
 1427|  28.3k|                ps_parse_mb_data->u4_isI_mb = 1;
 1428|       |
 1429|  28.3k|                if((25 + u1_mb_threshold) == u1_mb_type)
  ------------------
  |  Branch (1429:20): [True: 366, False: 28.0k]
  ------------------
 1430|    366|                {
 1431|       |                    /* I_PCM_MB */
 1432|    366|                    ps_cur_mb_info->ps_curmb->u1_mb_type = I_PCM_MB;
  ------------------
  |  |  423|    366|#define I_PCM_MB    6
  ------------------
 1433|    366|                    ret = ih264d_parse_ipcm_mb(ps_dec, ps_cur_mb_info, u1_num_mbs);
 1434|    366|                    if(ret != OK) return ret;
  ------------------
  |  |  114|    366|#define OK        0
  ------------------
  |  Branch (1434:24): [True: 300, False: 66]
  ------------------
 1435|     66|                    ps_cur_deblk_mb->u1_mb_qp = 0;
 1436|     66|                    ps_cur_deblk_mb->u1_mb_type |= D_INTRA_MB;
  ------------------
  |  |  382|     66|#define D_INTRA_MB        1
  ------------------
 1437|     66|                }
 1438|  28.0k|                else
 1439|  28.0k|                {
 1440|  28.0k|                    if(u1_mb_type == u1_mb_threshold)
  ------------------
  |  Branch (1440:24): [True: 1.41k, False: 26.6k]
  ------------------
 1441|  1.41k|                        ps_cur_mb_info->ps_curmb->u1_mb_type = I_4x4_MB;
  ------------------
  |  |  417|  1.41k|#define I_4x4_MB    0
  ------------------
 1442|  26.6k|                    else
 1443|  26.6k|                        ps_cur_mb_info->ps_curmb->u1_mb_type = I_16x16_MB;
  ------------------
  |  |  418|  26.6k|#define I_16x16_MB  1
  ------------------
 1444|       |
 1445|  28.0k|                    ret = isvcd_parse_imb_cabac(ps_svc_lyr_dec, ps_cur_mb_info, ps_svc_cur_mb_info,
 1446|  28.0k|                                                (UWORD8) (u1_mb_type - u1_mb_threshold));
 1447|  28.0k|                    if(ret != OK) return ret;
  ------------------
  |  |  114|  28.0k|#define OK        0
  ------------------
  |  Branch (1447:24): [True: 1.07k, False: 26.9k]
  ------------------
 1448|  26.9k|                    ps_cur_deblk_mb->u1_mb_qp = ps_dec->u1_qp;
 1449|  26.9k|                    if(0 == ps_svc_cur_mb_info->u1_base_mode_flag)
  ------------------
  |  Branch (1449:24): [True: 2.55k, False: 24.4k]
  ------------------
 1450|  2.55k|                    {
 1451|  2.55k|                        ps_cur_deblk_mb->u1_mb_type |= D_INTRA_MB;
  ------------------
  |  |  382|  2.55k|#define D_INTRA_MB        1
  ------------------
 1452|  2.55k|                    }
 1453|  26.9k|                }
 1454|  27.0k|                ps_parse_mb_data->u4_isI_mb = !ps_svc_cur_mb_info->u1_base_mode_flag;
 1455|  27.0k|            }
 1456|  60.1k|        }
 1457|       |
 1458|  81.9k|        if(ps_dec->u1_enable_mb_info)
  ------------------
  |  Branch (1458:12): [True: 0, False: 81.9k]
  ------------------
 1459|      0|        {
 1460|      0|            ih264d_populate_mb_info_map(ps_dec, ps_cur_mb_info, ps_cur_mb_info->u2_mbx << 1,
 1461|      0|                                        ps_cur_mb_info->u2_mby << 1, ps_cur_deblk_mb->u1_mb_qp);
 1462|      0|        }
 1463|  81.9k|        if(u1_mbaff)
  ------------------
  |  Branch (1463:12): [True: 0, False: 81.9k]
  ------------------
 1464|      0|        {
 1465|      0|            ih264d_update_mbaff_left_nnz(ps_dec, ps_cur_mb_info);
 1466|      0|        }
 1467|       |
 1468|  81.9k|        if(ps_cur_mb_info->u1_topmb && u1_mbaff)
  ------------------
  |  Branch (1468:12): [True: 81.9k, False: 0]
  |  Branch (1468:40): [True: 0, False: 81.9k]
  ------------------
 1469|      0|            uc_more_data_flag = 1;
 1470|  81.9k|        else
 1471|  81.9k|        {
 1472|  81.9k|            uc_more_data_flag = ih264d_decode_terminate(&ps_dec->s_cab_dec_env, ps_bitstrm);
 1473|  81.9k|            uc_more_data_flag = !uc_more_data_flag;
 1474|  81.9k|            COPYTHECONTEXT("Decode Sliceterm", !uc_more_data_flag);
 1475|  81.9k|        }
 1476|       |
 1477|  81.9k|        if(u1_mbaff)
  ------------------
  |  Branch (1477:12): [True: 0, False: 81.9k]
  ------------------
 1478|      0|        {
 1479|      0|            if(!uc_more_data_flag && (0 == (i2_cur_mb_addr & 1)))
  ------------------
  |  Branch (1479:16): [True: 0, False: 0]
  |  Branch (1479:38): [True: 0, False: 0]
  ------------------
 1480|      0|            {
 1481|      0|                return ERROR_EOB_FLUSHBITS_T;
 1482|      0|            }
 1483|      0|        }
 1484|       |        /* Next macroblock information */
 1485|  81.9k|        i2_cur_mb_addr++;
 1486|  81.9k|        u1_num_mbs++;
 1487|  81.9k|        u1_num_mbsNby2++;
 1488|  81.9k|        ps_parse_mb_data++;
 1489|       |
 1490|       |        /****************************************************************/
 1491|       |        /* Check for End Of Row and other flags that determine when to  */
 1492|       |        /* do DMA setup for N/2-Mb, Decode for N-Mb, and Transfer for   */
 1493|       |        /* N-Mb                                                         */
 1494|       |        /****************************************************************/
 1495|  81.9k|        u1_num_mbs_next = i2_pic_wdin_mbs - ps_dec->u2_mbx - 1;
 1496|  81.9k|        u1_end_of_row = (!u1_num_mbs_next) && (!(u1_mbaff && (u1_num_mbs & 0x01)));
  ------------------
  |  Branch (1496:25): [True: 19.2k, False: 62.6k]
  |  Branch (1496:50): [True: 0, False: 19.2k]
  |  Branch (1496:62): [True: 0, False: 0]
  ------------------
 1497|  81.9k|        u1_slice_end = !uc_more_data_flag;
 1498|  81.9k|        u1_tfr_n_mb = (u1_num_mbs == ps_dec->u4_recon_mb_grp) || u1_end_of_row || u1_slice_end;
  ------------------
  |  Branch (1498:23): [True: 19.0k, False: 62.9k]
  |  Branch (1498:66): [True: 259, False: 62.6k]
  |  Branch (1498:83): [True: 632, False: 62.0k]
  ------------------
 1499|  81.9k|        u1_decode_nmb = u1_tfr_n_mb || u1_slice_end;
  ------------------
  |  Branch (1499:25): [True: 19.9k, False: 62.0k]
  |  Branch (1499:40): [True: 0, False: 62.0k]
  ------------------
 1500|  81.9k|        ps_cur_mb_info->u1_end_of_slice = u1_slice_end;
 1501|       |
 1502|  81.9k|        if(u1_decode_nmb)
  ------------------
  |  Branch (1502:12): [True: 19.9k, False: 62.0k]
  ------------------
 1503|  19.9k|        {
 1504|  19.9k|            ret = ps_dec->pf_mvpred_ref_tfr_nby2mb(ps_dec, u1_mb_idx, u1_num_mbs);
 1505|  19.9k|            u1_num_mbsNby2 = 0;
 1506|  19.9k|            ps_parse_mb_data = ps_dec->ps_parse_mb_data;
 1507|  19.9k|            ps_dec->ps_part = ps_dec->ps_parse_part_params;
 1508|  19.9k|            if(ret != OK) return ret;
  ------------------
  |  |  114|  19.9k|#define OK        0
  ------------------
  |  Branch (1508:16): [True: 70, False: 19.8k]
  ------------------
 1509|  19.9k|        }
 1510|       |
 1511|  81.8k|        if(u1_decode_nmb)
  ------------------
  |  Branch (1511:12): [True: 19.8k, False: 62.0k]
  ------------------
 1512|  19.8k|        {
 1513|  19.8k|            if(ps_dec->u1_separate_parse)
  ------------------
  |  Branch (1513:16): [True: 13.0k, False: 6.81k]
  ------------------
 1514|  13.0k|            {
 1515|  13.0k|                ih264d_parse_tfr_nmb(ps_dec, u1_mb_idx, u1_num_mbs, u1_num_mbs_next, u1_tfr_n_mb,
 1516|  13.0k|                                     u1_end_of_row);
 1517|  13.0k|                ps_dec->ps_nmb_info += u1_num_mbs;
 1518|  13.0k|                ps_svc_lyr_dec->ps_svc_nmb_info += u1_num_mbs;
 1519|  13.0k|            }
 1520|  6.81k|            else
 1521|  6.81k|            {
 1522|  6.81k|                ret = isvcd_decode_recon_tfr_nmb_non_base_lyr(ps_svc_lyr_dec, u1_mb_idx, u1_num_mbs,
 1523|  6.81k|                                                              u1_num_mbs_next, u1_tfr_n_mb,
 1524|  6.81k|                                                              u1_end_of_row);
 1525|  6.81k|                if(ret != OK) return ret;
  ------------------
  |  |  114|  6.81k|#define OK        0
  ------------------
  |  Branch (1525:20): [True: 0, False: 6.81k]
  ------------------
 1526|  6.81k|            }
 1527|  19.8k|            ps_dec->u4_total_mbs_coded += u1_num_mbs;
 1528|  19.8k|            if(u1_tfr_n_mb) u1_num_mbs = 0;
  ------------------
  |  Branch (1528:16): [True: 19.8k, False: 0]
  ------------------
 1529|  19.8k|            u1_mb_idx = u1_num_mbs;
 1530|  19.8k|            ps_dec->u4_mb_idx = u1_num_mbs;
 1531|  19.8k|        }
 1532|  81.8k|    }
 1533|       |
 1534|  1.56k|    ps_dec->u4_num_mbs_cur_nmb = 0;
 1535|  1.56k|    ps_dec->ps_cur_slice->u4_mbs_in_slice = i2_cur_mb_addr - (u2_first_mb_in_slice << u1_mbaff);
 1536|       |
 1537|  1.56k|    return ret;
 1538|  4.95k|}
isvcd_parse_inter_slice_data_cavlc_enh_lyr:
 1569|  14.7k|{
 1570|  14.7k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
 1571|  14.7k|    UWORD32 uc_more_data_flag;
 1572|  14.7k|    WORD32 i2_cur_mb_addr;
 1573|  14.7k|    UWORD32 u1_num_mbs, u1_num_mbsNby2, u1_mb_idx;
 1574|  14.7k|    UWORD32 i2_mb_skip_run;
 1575|  14.7k|    UWORD32 u1_read_mb_type;
 1576|       |
 1577|  14.7k|    UWORD32 u1_mbaff;
 1578|  14.7k|    UWORD32 u1_num_mbs_next, u1_end_of_row;
 1579|  14.7k|    const UWORD32 i2_pic_wdin_mbs = ps_dec->u2_frm_wd_in_mbs;
 1580|  14.7k|    UWORD32 u1_slice_end = 0;
 1581|  14.7k|    UWORD32 u1_tfr_n_mb = 0;
 1582|  14.7k|    UWORD32 u1_decode_nmb = 0;
 1583|       |
 1584|  14.7k|    dec_bit_stream_t *const ps_bitstrm = ps_dec->ps_bitstrm;
 1585|  14.7k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
 1586|  14.7k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
 1587|       |
 1588|  14.7k|    dec_slice_svc_ext_params_t *ps_svc_slice_params = NULL;
 1589|       |
 1590|  14.7k|    deblk_mb_t *ps_cur_deblk_mb;
 1591|  14.7k|    dec_mb_info_t *ps_cur_mb_info;
 1592|  14.7k|    dec_svc_mb_info_t *ps_svc_cur_mb_info;
 1593|       |
 1594|  14.7k|    parse_pmbarams_t *ps_parse_mb_data = ps_dec->ps_parse_mb_data;
 1595|  14.7k|    UWORD32 u1_inter_mb_type;
 1596|  14.7k|    UWORD32 u1_deblk_mb_type;
 1597|  14.7k|    UWORD32 u1_mb_threshold;
 1598|  14.7k|    WORD32 ret = OK;
  ------------------
  |  |  114|  14.7k|#define OK        0
  ------------------
 1599|       |
 1600|  14.7k|    ps_svc_slice_params = &ps_svc_lyr_dec->s_svc_slice_params;
 1601|       |
 1602|       |    /******************************************************/
 1603|       |    /* Initialisations specific to EB or EP slice           */
 1604|       |    /******************************************************/
 1605|       |
 1606|  14.7k|    if(ps_slice->u1_slice_type == P_SLICE)
  ------------------
  |  |  368|  14.7k|#define P_SLICE  0
  ------------------
  |  Branch (1606:8): [True: 8.92k, False: 5.77k]
  ------------------
 1607|  8.92k|    {
 1608|  8.92k|        u1_inter_mb_type = P_MB;
  ------------------
  |  |  419|  8.92k|#define P_MB        2
  ------------------
 1609|  8.92k|        u1_deblk_mb_type = D_INTER_MB;
  ------------------
  |  |  381|  8.92k|#define D_INTER_MB        0
  ------------------
 1610|  8.92k|        u1_mb_threshold = 5;
 1611|  8.92k|    }
 1612|  5.77k|    else
 1613|  5.77k|    {
 1614|  5.77k|        u1_inter_mb_type = B_MB;
  ------------------
  |  |  420|  5.77k|#define B_MB        3
  ------------------
 1615|  5.77k|        u1_deblk_mb_type = D_B_SLICE;
  ------------------
  |  |  384|  5.77k|#define D_B_SLICE         4
  ------------------
 1616|  5.77k|        u1_mb_threshold = 23;
 1617|  5.77k|    }
 1618|       |
 1619|       |    /******************************************************/
 1620|       |    /* Slice Level Initialisations                        */
 1621|       |    /******************************************************/
 1622|  14.7k|    ps_dec->u1_qp = ps_slice->u1_slice_qp;
 1623|  14.7k|    ih264d_update_qp(ps_dec, 0);
 1624|  14.7k|    u1_mb_idx = ps_dec->u4_mb_idx;
 1625|  14.7k|    u1_num_mbs = u1_mb_idx;
 1626|  14.7k|    u1_num_mbsNby2 = 0;
 1627|  14.7k|    u1_mbaff = ps_slice->u1_mbaff_frame_flag;
 1628|  14.7k|    i2_cur_mb_addr = u2_first_mb_in_slice << u1_mbaff;
 1629|  14.7k|    i2_mb_skip_run = 0;
 1630|  14.7k|    uc_more_data_flag = 1;
 1631|  14.7k|    u1_read_mb_type = 0;
 1632|       |
 1633|   301k|    while(!u1_slice_end)
  ------------------
  |  Branch (1633:11): [True: 297k, False: 3.64k]
  ------------------
 1634|   297k|    {
 1635|   297k|        UWORD8 u1_mb_type;
 1636|       |
 1637|   297k|        ps_dec->pv_prev_mb_parse_tu_coeff_data = ps_dec->pv_parse_tu_coeff_data;
 1638|       |
 1639|   297k|        if(i2_cur_mb_addr > ps_dec->ps_cur_sps->u4_max_mb_addr)
  ------------------
  |  Branch (1639:12): [True: 5.11k, False: 292k]
  ------------------
 1640|  5.11k|        {
 1641|  5.11k|            break;
 1642|  5.11k|        }
 1643|       |
 1644|   292k|        ps_cur_mb_info = ps_dec->ps_nmb_info + u1_num_mbs;
 1645|   292k|        ps_svc_cur_mb_info = ps_svc_lyr_dec->ps_svc_nmb_info + u1_num_mbs;
 1646|   292k|        ps_dec->u4_num_mbs_cur_nmb = u1_num_mbs;
 1647|       |
 1648|   292k|        ps_cur_mb_info->u1_Mux = 0;
 1649|   292k|        ps_dec->u4_num_pmbair = (u1_num_mbs >> u1_mbaff);
 1650|   292k|        ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + u1_num_mbs;
 1651|       |
 1652|   292k|        ps_cur_mb_info->u1_end_of_slice = 0;
 1653|       |
 1654|       |        /* Storing Default partition info */
 1655|   292k|        ps_parse_mb_data->u1_num_part = 1;
 1656|   292k|        ps_parse_mb_data->u4_isI_mb = 0;
 1657|       |
 1658|   292k|        if((!i2_mb_skip_run) && (!u1_read_mb_type))
  ------------------
  |  Branch (1658:12): [True: 219k, False: 73.0k]
  |  Branch (1658:33): [True: 200k, False: 19.8k]
  ------------------
 1659|   200k|        {
 1660|   200k|            UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
 1661|   200k|            UWORD32 u4_word, u4_ldz;
 1662|       |
 1663|       |            /***************************************************************/
 1664|       |            /* Find leading zeros in next 32 bits                          */
 1665|       |            /***************************************************************/
 1666|   200k|            NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|   200k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|   200k|{                                                                           \
  |  |  152|   200k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|   200k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|   200k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|   200k|                                                                            \
  |  |  156|   200k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|   200k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 193k, False: 6.60k]
  |  |  ------------------
  |  |  158|   200k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|   193k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|   200k|}
  ------------------
 1667|       |
 1668|   200k|            u4_ldz = CLZ(u4_word);
 1669|       |
 1670|       |            /* Flush the ps_bitstrm */
 1671|   200k|            u4_bitstream_offset += (u4_ldz + 1);
 1672|       |            /* Read the suffix from the ps_bitstrm */
 1673|   200k|            u4_word = 0;
 1674|   200k|            if(u4_ldz)
  ------------------
  |  Branch (1674:16): [True: 23.8k, False: 176k]
  ------------------
 1675|  23.8k|            {
 1676|  23.8k|                GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  23.8k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  23.8k|{                                                                           \
  |  |  122|  23.8k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  23.8k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  23.8k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  23.8k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  23.8k|                                                                            \
  |  |  127|  23.8k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 22.7k, False: 1.08k]
  |  |  ------------------
  |  |  128|  23.8k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  22.7k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  23.8k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  23.8k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  23.8k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  23.8k|}                                                                           \
  ------------------
 1677|  23.8k|            }
 1678|   200k|            *pu4_bitstrm_ofst = u4_bitstream_offset;
 1679|   200k|            i2_mb_skip_run = ((1 << u4_ldz) + u4_word - 1);
 1680|       |
 1681|   200k|            COPYTHECONTEXT("mb_skip_run", i2_mb_skip_run);
 1682|   200k|            uc_more_data_flag = MORE_RBSP_DATA(ps_bitstrm);
  ------------------
  |  |   97|   200k|    CHECK_BITS_SUFFICIENT(ps_bitstrm, 1)
  |  |  ------------------
  |  |  |  |   95|   200k|  (ps_bitstrm->u4_ofst + bits_to_read <= ps_bitstrm->u4_max_ofst)
  |  |  ------------------
  ------------------
 1683|   200k|            u1_read_mb_type = uc_more_data_flag;
 1684|   200k|        }
 1685|       |
 1686|       |        /***************************************************************/
 1687|       |        /* Get the required information for decoding of MB             */
 1688|       |        /* mb_x, mb_y , neighbour availablity,                         */
 1689|       |        /***************************************************************/
 1690|   292k|        ps_dec->pf_get_mb_info(ps_dec, i2_cur_mb_addr, ps_cur_mb_info, i2_mb_skip_run);
 1691|   292k|        ps_svc_cur_mb_info->u1_crop_window_flag =
 1692|   292k|            *(ps_svc_lyr_dec->pu1_crop_wnd_flag + ps_cur_mb_info->u2_mbx +
 1693|   292k|              (ps_cur_mb_info->u2_mby * ps_dec->u2_frm_wd_in_mbs));
 1694|       |        /***************************************************************/
 1695|       |        /* Set the deblocking parameters for this MB                   */
 1696|       |        /***************************************************************/
 1697|   292k|        if(ps_dec->u4_app_disable_deblk_frm == 0)
  ------------------
  |  Branch (1697:12): [True: 292k, False: 0]
  ------------------
 1698|   292k|            ih264d_set_deblocking_parameters(ps_cur_deblk_mb, ps_slice,
 1699|   292k|                                             ps_dec->u1_mb_ngbr_availablity,
 1700|   292k|                                             ps_dec->u1_cur_mb_fld_dec_flag);
 1701|       |
 1702|   292k|        if(i2_mb_skip_run)
  ------------------
  |  Branch (1702:12): [True: 96.8k, False: 195k]
  ------------------
 1703|  96.8k|        {
 1704|       |            /* Set appropriate flags in ps_cur_mb_info and ps_dec */
 1705|  96.8k|            ps_dec->i1_prev_mb_qp_delta = 0;
 1706|  96.8k|            ps_dec->u1_sub_mb_num = 0;
 1707|  96.8k|            ps_cur_mb_info->u1_mb_type = MB_SKIP;
  ------------------
  |  |   59|  96.8k|#define MB_SKIP 255
  ------------------
 1708|  96.8k|            ps_cur_mb_info->u1_mb_mc_mode = PRED_16x16;
  ------------------
  |  |  450|  96.8k|#define PRED_16x16  0
  ------------------
 1709|  96.8k|            ps_cur_mb_info->u1_cbp = 0;
 1710|  96.8k|            ps_svc_cur_mb_info->u1_base_mode_flag = 0;
 1711|  96.8k|            ps_svc_cur_mb_info->au1_motion_pred_flag[0] = 0;
 1712|  96.8k|            ps_svc_cur_mb_info->au1_motion_pred_flag[1] = 0;
 1713|  96.8k|            ps_svc_cur_mb_info->u1_residual_prediction_flag = 0;
 1714|       |
 1715|  96.8k|            {
 1716|       |                /* Storing Skip partition info */
 1717|  96.8k|                parse_part_params_t *ps_part_info = ps_dec->ps_part;
 1718|  96.8k|                ps_part_info->u1_is_direct = PART_DIRECT_16x16;
  ------------------
  |  |  572|  96.8k|#define PART_DIRECT_16x16              2
  ------------------
 1719|  96.8k|                ps_part_info->u1_sub_mb_num = 0;
 1720|  96.8k|                ps_dec->ps_part++;
 1721|  96.8k|            }
 1722|       |
 1723|       |            /* Update Nnzs */
 1724|  96.8k|            ih264d_update_nnz_for_skipmb(ps_dec, ps_cur_mb_info, CAVLC);
  ------------------
  |  |  338|  96.8k|#define CAVLC  0
  ------------------
 1725|       |
 1726|  96.8k|            ps_cur_mb_info->ps_curmb->u1_mb_type = u1_inter_mb_type;
 1727|  96.8k|            ps_cur_deblk_mb->u1_mb_type |= u1_deblk_mb_type;
 1728|       |
 1729|  96.8k|            if(ps_svc_lyr_dec->u1_layer_identifier != TARGET_LAYER)
  ------------------
  |  |  110|  96.8k|#define TARGET_LAYER 2
  ------------------
  |  Branch (1729:16): [True: 0, False: 96.8k]
  ------------------
 1730|      0|            {
 1731|      0|                ps_cur_deblk_mb->u1_deblocking_mode = MB_DISABLE_FILTERING;
  ------------------
  |  |   70|      0|#define MB_DISABLE_FILTERING          0x01
  ------------------
 1732|      0|            }
 1733|  96.8k|            i2_mb_skip_run--;
 1734|  96.8k|        }
 1735|   195k|        else
 1736|   195k|        {
 1737|   195k|            UWORD32 u4_word, u4_ldz, u4_temp;
 1738|       |
 1739|   195k|            ps_svc_cur_mb_info->u1_base_mode_flag = 0;
 1740|   195k|            if(ps_svc_cur_mb_info->u1_crop_window_flag &&
  ------------------
  |  Branch (1740:16): [True: 195k, False: 949]
  ------------------
 1741|   195k|               ps_svc_slice_params->u1_adaptive_base_mode_flag)
  ------------------
  |  Branch (1741:16): [True: 104k, False: 90.9k]
  ------------------
 1742|   104k|            {
 1743|   104k|                ps_svc_cur_mb_info->u1_base_mode_flag = ih264d_get_bit_h264(ps_bitstrm);
 1744|   104k|                COPYTHECONTEXT("SVC :u1_base_mode_flag", ps_cur_mb_info->u1_base_mode_flag);
 1745|   104k|            }
 1746|  91.8k|            else if(ps_svc_cur_mb_info->u1_crop_window_flag)
  ------------------
  |  Branch (1746:21): [True: 90.9k, False: 949]
  ------------------
 1747|  90.9k|            {
 1748|  90.9k|                ps_svc_cur_mb_info->u1_base_mode_flag =
 1749|  90.9k|                    ps_svc_slice_params->u1_default_base_mode_flag;
 1750|  90.9k|            }
 1751|       |
 1752|   195k|            if(!ps_svc_cur_mb_info->u1_base_mode_flag)
  ------------------
  |  Branch (1752:16): [True: 46.9k, False: 149k]
  ------------------
 1753|  46.9k|            {
 1754|  46.9k|                UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
 1755|       |
 1756|  46.9k|                u1_read_mb_type = 0;
 1757|       |                /**************************************************************/
 1758|       |                /* Macroblock Layer Begins, Decode the u1_mb_type              */
 1759|       |                /**************************************************************/
 1760|  46.9k|                {
 1761|       |                    /***************************************************************/
 1762|       |                    /* Find leading zeros in next 32 bits                          */
 1763|       |                    /***************************************************************/
 1764|  46.9k|                    NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  46.9k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  46.9k|{                                                                           \
  |  |  152|  46.9k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  46.9k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  46.9k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  46.9k|                                                                            \
  |  |  156|  46.9k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  46.9k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 45.5k, False: 1.36k]
  |  |  ------------------
  |  |  158|  46.9k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  45.5k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  46.9k|}
  ------------------
 1765|  46.9k|                    u4_ldz = CLZ(u4_word);
 1766|       |                    /* Flush the ps_bitstrm */
 1767|  46.9k|                    u4_bitstream_offset += (u4_ldz + 1);
 1768|       |                    /* Read the suffix from the ps_bitstrm */
 1769|  46.9k|                    u4_word = 0;
 1770|  46.9k|                    if(u4_ldz) GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  25.3k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  25.3k|{                                                                           \
  |  |  122|  25.3k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  25.3k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  25.3k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  25.3k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  25.3k|                                                                            \
  |  |  127|  25.3k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 24.2k, False: 1.05k]
  |  |  ------------------
  |  |  128|  25.3k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  24.2k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  25.3k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  25.3k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  25.3k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  25.3k|}                                                                           \
  ------------------
  |  Branch (1770:24): [True: 25.3k, False: 21.6k]
  ------------------
 1771|  46.9k|                    *pu4_bitstrm_ofst = u4_bitstream_offset;
 1772|  46.9k|                    u4_temp = ((1 << u4_ldz) + u4_word - 1);
 1773|       |
 1774|  46.9k|                    if(u4_temp > (UWORD32) (25 + u1_mb_threshold)) return ERROR_MB_TYPE;
  ------------------
  |  Branch (1774:24): [True: 326, False: 46.6k]
  ------------------
 1775|  46.6k|                    u1_mb_type = u4_temp;
 1776|  46.6k|                    COPYTHECONTEXT("u1_mb_type", u1_mb_type);
 1777|  46.6k|                }
 1778|      0|                ps_cur_mb_info->u1_mb_type = u1_mb_type;
 1779|  46.6k|            }
 1780|   149k|            else
 1781|   149k|            {
 1782|       |                /* default intialization for Base mode flag : reserved */
 1783|   149k|                ps_dec->ps_part += MAX_NUM_MB_PART;
  ------------------
  |  |   62|   149k|#define MAX_NUM_MB_PART NUM_MB_PARTS *NUM_SUB_MB_PARTS
  |  |  ------------------
  |  |  |  |   59|   149k|#define NUM_MB_PARTS 4
  |  |  ------------------
  |  |               #define MAX_NUM_MB_PART NUM_MB_PARTS *NUM_SUB_MB_PARTS
  |  |  ------------------
  |  |  |  |   60|   149k|#define NUM_SUB_MB_PARTS 4
  |  |  ------------------
  ------------------
 1784|   149k|                u1_read_mb_type = 0;
 1785|   149k|                ps_svc_cur_mb_info->au1_motion_pred_flag[0] = 0;
 1786|   149k|                ps_svc_cur_mb_info->au1_motion_pred_flag[1] = 0;
 1787|   149k|                ps_cur_mb_info->u1_mb_type = MB_INFER;
  ------------------
  |  |  112|   149k|#define MB_INFER 250
  ------------------
 1788|   149k|                ps_cur_deblk_mb->u1_mb_type |= u1_deblk_mb_type;
 1789|   149k|                if(ps_svc_lyr_dec->u1_layer_identifier != TARGET_LAYER)
  ------------------
  |  |  110|   149k|#define TARGET_LAYER 2
  ------------------
  |  Branch (1789:20): [True: 0, False: 149k]
  ------------------
 1790|      0|                {
 1791|      0|                    ps_cur_deblk_mb->u1_deblocking_mode = MB_DISABLE_FILTERING;
  ------------------
  |  |   70|      0|#define MB_DISABLE_FILTERING          0x01
  ------------------
 1792|      0|                }
 1793|       |
 1794|       |                /*SVC EXT needs to update incropwindow*/
 1795|   149k|                if(ps_svc_slice_params->u1_adaptive_residual_prediction_flag &&
  ------------------
  |  Branch (1795:20): [True: 23.8k, False: 125k]
  ------------------
 1796|  23.8k|                   ps_svc_cur_mb_info->u1_crop_window_flag)
  ------------------
  |  Branch (1796:20): [True: 23.8k, False: 0]
  ------------------
 1797|  23.8k|                {
 1798|  23.8k|                    ps_svc_cur_mb_info->u1_residual_prediction_flag =
 1799|  23.8k|                        ih264d_get_bit_h264(ps_bitstrm);
 1800|  23.8k|                    COPYTHECONTEXT("SVC ext: u1_residual_prediction_flag",
 1801|  23.8k|                                   ps_cur_mb_info->u1_residual_prediction_flag);
 1802|  23.8k|                }
 1803|   125k|                else
 1804|   125k|                {
 1805|       |                    /*residual flag inference code */
 1806|   125k|                    if(1 == ps_svc_cur_mb_info->u1_crop_window_flag)
  ------------------
  |  Branch (1806:24): [True: 125k, False: 0]
  ------------------
 1807|   125k|                    {
 1808|   125k|                        ps_svc_cur_mb_info->u1_residual_prediction_flag =
 1809|   125k|                            ps_svc_slice_params->u1_default_residual_prediction_flag;
 1810|   125k|                    }
 1811|      0|                    else
 1812|      0|                    {
 1813|      0|                        ps_svc_cur_mb_info->u1_residual_prediction_flag = 0;
 1814|      0|                    }
 1815|   125k|                }
 1816|   149k|            }
 1817|       |
 1818|       |            /**************************************************************/
 1819|       |            /* Parse Macroblock data                                      */
 1820|       |            /**************************************************************/
 1821|   195k|            u1_mb_type = ps_cur_mb_info->u1_mb_type;
 1822|   195k|            if(u1_mb_type < u1_mb_threshold)
  ------------------
  |  Branch (1822:16): [True: 45.0k, False: 150k]
  ------------------
 1823|  45.0k|            {
 1824|  45.0k|                ps_cur_mb_info->ps_curmb->u1_mb_type = u1_inter_mb_type;
 1825|       |
 1826|  45.0k|                ret = ps_svc_lyr_dec->pf_parse_inter_mb_svc_ext(
 1827|  45.0k|                    ps_svc_lyr_dec, ps_cur_mb_info, ps_svc_cur_mb_info, u1_num_mbs, u1_num_mbsNby2);
 1828|  45.0k|                if(ret != OK) return ret;
  ------------------
  |  |  114|  45.0k|#define OK        0
  ------------------
  |  Branch (1828:20): [True: 3.08k, False: 41.9k]
  ------------------
 1829|  41.9k|                ps_cur_deblk_mb->u1_mb_type |= u1_deblk_mb_type;
 1830|       |
 1831|  41.9k|                if(ps_svc_lyr_dec->u1_layer_identifier != TARGET_LAYER)
  ------------------
  |  |  110|  41.9k|#define TARGET_LAYER 2
  ------------------
  |  Branch (1831:20): [True: 0, False: 41.9k]
  ------------------
 1832|      0|                {
 1833|      0|                    ps_cur_deblk_mb->u1_deblocking_mode = MB_DISABLE_FILTERING;
  ------------------
  |  |   70|      0|#define MB_DISABLE_FILTERING          0x01
  ------------------
 1834|      0|                }
 1835|  41.9k|            }
 1836|   150k|            else
 1837|   150k|            {
 1838|       |                /* Storing Intra partition info */
 1839|   150k|                ps_parse_mb_data->u1_num_part = 0;
 1840|       |
 1841|   150k|                if((25 + u1_mb_threshold) == u1_mb_type)
  ------------------
  |  Branch (1841:20): [True: 130, False: 150k]
  ------------------
 1842|    130|                {
 1843|       |                    /* I_PCM_MB */
 1844|    130|                    ps_cur_mb_info->ps_curmb->u1_mb_type = I_PCM_MB;
  ------------------
  |  |  423|    130|#define I_PCM_MB    6
  ------------------
 1845|    130|                    ret = ih264d_parse_ipcm_mb(ps_dec, ps_cur_mb_info, u1_num_mbs);
 1846|    130|                    if(ret != OK) return ret;
  ------------------
  |  |  114|    130|#define OK        0
  ------------------
  |  Branch (1846:24): [True: 0, False: 130]
  ------------------
 1847|    130|                    ps_dec->u1_qp = 0;
 1848|    130|                    ps_cur_deblk_mb->u1_mb_type |= D_INTRA_MB;
  ------------------
  |  |  382|    130|#define D_INTRA_MB        1
  ------------------
 1849|    130|                }
 1850|   150k|                else
 1851|   150k|                {
 1852|   150k|                    ret =
 1853|   150k|                        isvcd_parse_imb_cavlc(ps_svc_lyr_dec, ps_cur_mb_info, ps_svc_cur_mb_info,
 1854|   150k|                                              u1_num_mbs, (UWORD8) (u1_mb_type - u1_mb_threshold));
 1855|   150k|                    if(ret != OK) return ret;
  ------------------
  |  |  114|   150k|#define OK        0
  ------------------
  |  Branch (1855:24): [True: 2.34k, False: 148k]
  ------------------
 1856|       |
 1857|   148k|                    if(0 == ps_svc_cur_mb_info->u1_base_mode_flag)
  ------------------
  |  Branch (1857:24): [True: 845, False: 147k]
  ------------------
 1858|    845|                    {
 1859|    845|                        ps_cur_deblk_mb->u1_mb_type |= D_INTRA_MB;
  ------------------
  |  |  382|    845|#define D_INTRA_MB        1
  ------------------
 1860|    845|                    }
 1861|   148k|                }
 1862|   148k|                ps_parse_mb_data->u4_isI_mb = !ps_svc_cur_mb_info->u1_base_mode_flag;
 1863|   148k|            }
 1864|   190k|            uc_more_data_flag = MORE_RBSP_DATA(ps_bitstrm);
  ------------------
  |  |   97|   190k|    CHECK_BITS_SUFFICIENT(ps_bitstrm, 1)
  |  |  ------------------
  |  |  |  |   95|   190k|  (ps_bitstrm->u4_ofst + bits_to_read <= ps_bitstrm->u4_max_ofst)
  |  |  ------------------
  ------------------
 1865|   190k|        }
 1866|   287k|        ps_cur_deblk_mb->u1_mb_qp = ps_dec->u1_qp;
 1867|       |
 1868|   287k|        if(ps_dec->u1_enable_mb_info)
  ------------------
  |  Branch (1868:12): [True: 0, False: 287k]
  ------------------
 1869|      0|        {
 1870|      0|            ih264d_populate_mb_info_map(ps_dec, ps_cur_mb_info, ps_cur_mb_info->u2_mbx << 1,
 1871|      0|                                        ps_cur_mb_info->u2_mby << 1, ps_cur_deblk_mb->u1_mb_qp);
 1872|      0|        }
 1873|   287k|        if(u1_mbaff)
  ------------------
  |  Branch (1873:12): [True: 0, False: 287k]
  ------------------
 1874|      0|        {
 1875|      0|            ih264d_update_mbaff_left_nnz(ps_dec, ps_cur_mb_info);
 1876|      0|            if(!uc_more_data_flag && !i2_mb_skip_run && (0 == (i2_cur_mb_addr & 1)))
  ------------------
  |  Branch (1876:16): [True: 0, False: 0]
  |  Branch (1876:38): [True: 0, False: 0]
  |  Branch (1876:57): [True: 0, False: 0]
  ------------------
 1877|      0|            {
 1878|      0|                return ERROR_EOB_FLUSHBITS_T;
 1879|      0|            }
 1880|      0|        }
 1881|       |        /**************************************************************/
 1882|       |        /* Get next Macroblock address                                */
 1883|       |        /**************************************************************/
 1884|   287k|        i2_cur_mb_addr++;
 1885|   287k|        u1_num_mbs++;
 1886|   287k|        u1_num_mbsNby2++;
 1887|   287k|        ps_parse_mb_data++;
 1888|       |
 1889|       |        /****************************************************************/
 1890|       |        /* Check for End Of Row and other flags that determine when to  */
 1891|       |        /* do DMA setup for N/2-Mb, Decode for N-Mb, and Transfer for   */
 1892|       |        /* N-Mb                                                         */
 1893|       |        /****************************************************************/
 1894|   287k|        u1_num_mbs_next = i2_pic_wdin_mbs - ps_dec->u2_mbx - 1;
 1895|   287k|        u1_end_of_row = (!u1_num_mbs_next) && (!(u1_mbaff && (u1_num_mbs & 0x01)));
  ------------------
  |  Branch (1895:25): [True: 86.2k, False: 200k]
  |  Branch (1895:50): [True: 0, False: 86.2k]
  |  Branch (1895:62): [True: 0, False: 0]
  ------------------
 1896|   287k|        u1_slice_end = (!(uc_more_data_flag || i2_mb_skip_run));
  ------------------
  |  Branch (1896:27): [True: 248k, False: 39.0k]
  |  Branch (1896:48): [True: 35.2k, False: 3.76k]
  ------------------
 1897|   287k|        u1_tfr_n_mb = (u1_num_mbs == ps_dec->u4_recon_mb_grp) || u1_end_of_row || u1_slice_end;
  ------------------
  |  Branch (1897:23): [True: 86.1k, False: 200k]
  |  Branch (1897:66): [True: 69, False: 200k]
  |  Branch (1897:83): [True: 2.72k, False: 198k]
  ------------------
 1898|   287k|        u1_decode_nmb = u1_tfr_n_mb || u1_slice_end;
  ------------------
  |  Branch (1898:25): [True: 88.9k, False: 198k]
  |  Branch (1898:40): [True: 0, False: 198k]
  ------------------
 1899|   287k|        ps_cur_mb_info->u1_end_of_slice = u1_slice_end;
 1900|       |
 1901|   287k|        if(u1_decode_nmb)
  ------------------
  |  Branch (1901:12): [True: 88.9k, False: 198k]
  ------------------
 1902|  88.9k|        {
 1903|  88.9k|            ret = ps_dec->pf_mvpred_ref_tfr_nby2mb(ps_dec, u1_mb_idx, u1_num_mbs);
 1904|  88.9k|            u1_num_mbsNby2 = 0;
 1905|  88.9k|            ps_parse_mb_data = ps_dec->ps_parse_mb_data;
 1906|  88.9k|            ps_dec->ps_part = ps_dec->ps_parse_part_params;
 1907|  88.9k|            if(ret != OK) return ret;
  ------------------
  |  |  114|  88.9k|#define OK        0
  ------------------
  |  Branch (1907:16): [True: 202, False: 88.7k]
  ------------------
 1908|  88.9k|        }
 1909|       |
 1910|   286k|        if(u1_decode_nmb)
  ------------------
  |  Branch (1910:12): [True: 88.7k, False: 198k]
  ------------------
 1911|  88.7k|        {
 1912|  88.7k|            if(ps_dec->u1_separate_parse)
  ------------------
  |  Branch (1912:16): [True: 62.0k, False: 26.7k]
  ------------------
 1913|  62.0k|            {
 1914|  62.0k|                ih264d_parse_tfr_nmb(ps_dec, u1_mb_idx, u1_num_mbs, u1_num_mbs_next, u1_tfr_n_mb,
 1915|  62.0k|                                     u1_end_of_row);
 1916|  62.0k|                ps_dec->ps_nmb_info += u1_num_mbs;
 1917|  62.0k|                ps_svc_lyr_dec->ps_svc_nmb_info += u1_num_mbs;
 1918|  62.0k|            }
 1919|  26.7k|            else
 1920|  26.7k|            {
 1921|  26.7k|                ret = isvcd_decode_recon_tfr_nmb_non_base_lyr(ps_svc_lyr_dec, u1_mb_idx, u1_num_mbs,
 1922|  26.7k|                                                              u1_num_mbs_next, u1_tfr_n_mb,
 1923|  26.7k|                                                              u1_end_of_row);
 1924|  26.7k|                if(ret != OK) return ret;
  ------------------
  |  |  114|  26.7k|#define OK        0
  ------------------
  |  Branch (1924:20): [True: 0, False: 26.7k]
  ------------------
 1925|  26.7k|            }
 1926|  88.7k|            ps_dec->u4_total_mbs_coded += u1_num_mbs;
 1927|  88.7k|            if(u1_tfr_n_mb) u1_num_mbs = 0;
  ------------------
  |  Branch (1927:16): [True: 88.7k, False: 0]
  ------------------
 1928|  88.7k|            u1_mb_idx = u1_num_mbs;
 1929|  88.7k|            ps_dec->u4_mb_idx = u1_num_mbs;
 1930|  88.7k|        }
 1931|   286k|    }
 1932|       |
 1933|  8.75k|    ps_dec->u4_num_mbs_cur_nmb = 0;
 1934|  8.75k|    ps_dec->ps_cur_slice->u4_mbs_in_slice = i2_cur_mb_addr - (u2_first_mb_in_slice << u1_mbaff);
 1935|       |
 1936|  8.75k|    return ret;
 1937|  14.7k|}
isvcd_parse_pmb_cabac:
 1953|  16.8k|{
 1954|  16.8k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
 1955|  16.8k|    dec_slice_svc_ext_params_t *ps_svc_slice_params = NULL;
 1956|       |
 1957|  16.8k|    UWORD32 u1_num_mb_part;
 1958|  16.8k|    UWORD32 uc_sub_mb;
 1959|  16.8k|    parse_pmbarams_t *ps_parse_mb_data = ps_dec->ps_parse_mb_data + u4_num_mbsNby2;
 1960|  16.8k|    WORD8 *pi1_ref_idx = ps_parse_mb_data->i1_ref_idx[0];
 1961|  16.8k|    const UWORD8 *pu1_num_mb_part = (const UWORD8 *) gau1_ih264d_num_mb_part;
 1962|  16.8k|    const UWORD32 u1_mb_type = ps_cur_mb_info->u1_mb_type;
 1963|  16.8k|    UWORD8 *pu1_col_info = ps_parse_mb_data->u1_col_info;
 1964|  16.8k|    UWORD32 u1_mb_mc_mode = u1_mb_type;
 1965|  16.8k|    ctxt_inc_mb_info_t *p_curr_ctxt = ps_dec->ps_curr_ctxt_mb_info;
 1966|  16.8k|    decoding_envirnoment_t *ps_cab_env = &ps_dec->s_cab_dec_env;
 1967|  16.8k|    dec_bit_stream_t *ps_bitstrm = ps_dec->ps_bitstrm;
 1968|  16.8k|    UWORD32 u4_sub_mb_pack = 0;
 1969|  16.8k|    WORD32 ret;
 1970|       |
 1971|  16.8k|    UWORD8 u1_no_submb_part_size_lt8x8_flag = 1;
 1972|  16.8k|    ps_svc_slice_params = &ps_svc_lyr_dec->s_svc_slice_params;
 1973|       |
 1974|  16.8k|    ps_cur_mb_info->u1_tran_form8x8 = 0;
 1975|  16.8k|    ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
 1976|  16.8k|    ps_cur_mb_info->u1_yuv_dc_block_flag = 0;
 1977|  16.8k|    p_curr_ctxt->u1_mb_type = CAB_P;
  ------------------
  |  |  398|  16.8k|#define CAB_P             0x07 /* 0000 0111 */
  ------------------
 1978|  16.8k|    ps_cur_mb_info->u1_mb_mc_mode = u1_mb_type;
 1979|  16.8k|    uc_sub_mb = ((u1_mb_type == PRED_8x8) | (u1_mb_type == PRED_8x8R0));
  ------------------
  |  |  453|  16.8k|#define PRED_8x8    3
  ------------------
                  uc_sub_mb = ((u1_mb_type == PRED_8x8) | (u1_mb_type == PRED_8x8R0));
  ------------------
  |  |  454|  16.8k|#define PRED_8x8R0  4
  ------------------
 1980|       |
 1981|       |    /* Reading the subMB type */
 1982|  16.8k|    if(uc_sub_mb)
  ------------------
  |  Branch (1982:8): [True: 3.71k, False: 13.1k]
  ------------------
 1983|  3.71k|    {
 1984|  3.71k|        UWORD8 u1_colz = (PRED_8x8 << 6);
  ------------------
  |  |  453|  3.71k|#define PRED_8x8    3
  ------------------
 1985|  3.71k|        u1_mb_mc_mode = 0;
 1986|  3.71k|        {
 1987|  3.71k|            UWORD8 u1_sub_mb_mode;
 1988|  3.71k|            u1_sub_mb_mode =
 1989|  3.71k|                ih264d_parse_submb_type_cabac(0, ps_cab_env, ps_bitstrm, ps_dec->p_sub_mb_type_t);
 1990|  3.71k|            if(u1_sub_mb_mode > 3) return ERROR_SUB_MB_TYPE;
  ------------------
  |  Branch (1990:16): [True: 0, False: 3.71k]
  ------------------
 1991|       |
 1992|  3.71k|            u4_sub_mb_pack = (u4_sub_mb_pack << 8) | u1_sub_mb_mode;
 1993|       |            /* Storing collocated information */
 1994|  3.71k|            *pu1_col_info++ = u1_colz | ((UWORD8) (u1_sub_mb_mode << 4));
 1995|  3.71k|            COPYTHECONTEXT("sub_mb_type", u1_sub_mb_mode);
 1996|       |            /* check if Motion compensation is done below 8x8 */
 1997|  3.71k|            if(u1_sub_mb_mode != P_L0_8x8)
  ------------------
  |  |  459|  3.71k|#define P_L0_8x8    0
  ------------------
  |  Branch (1997:16): [True: 2.60k, False: 1.10k]
  ------------------
 1998|  2.60k|            {
 1999|  2.60k|                u1_no_submb_part_size_lt8x8_flag = 0;
 2000|  2.60k|            }
 2001|  3.71k|        }
 2002|      0|        {
 2003|  3.71k|            UWORD8 u1_sub_mb_mode;
 2004|  3.71k|            u1_sub_mb_mode =
 2005|  3.71k|                ih264d_parse_submb_type_cabac(0, ps_cab_env, ps_bitstrm, ps_dec->p_sub_mb_type_t);
 2006|  3.71k|            if(u1_sub_mb_mode > 3) return ERROR_SUB_MB_TYPE;
  ------------------
  |  Branch (2006:16): [True: 0, False: 3.71k]
  ------------------
 2007|       |
 2008|  3.71k|            u4_sub_mb_pack = (u4_sub_mb_pack << 8) | u1_sub_mb_mode;
 2009|       |            /* Storing collocated information */
 2010|  3.71k|            *pu1_col_info++ = u1_colz | ((UWORD8) (u1_sub_mb_mode << 4));
 2011|  3.71k|            COPYTHECONTEXT("sub_mb_type", u1_sub_mb_mode);
 2012|       |            /* check if Motion compensation is done below 8x8 */
 2013|  3.71k|            if(u1_sub_mb_mode != P_L0_8x8)
  ------------------
  |  |  459|  3.71k|#define P_L0_8x8    0
  ------------------
  |  Branch (2013:16): [True: 2.50k, False: 1.21k]
  ------------------
 2014|  2.50k|            {
 2015|  2.50k|                u1_no_submb_part_size_lt8x8_flag = 0;
 2016|  2.50k|            }
 2017|  3.71k|        }
 2018|      0|        {
 2019|  3.71k|            UWORD8 u1_sub_mb_mode;
 2020|  3.71k|            u1_sub_mb_mode =
 2021|  3.71k|                ih264d_parse_submb_type_cabac(0, ps_cab_env, ps_bitstrm, ps_dec->p_sub_mb_type_t);
 2022|  3.71k|            if(u1_sub_mb_mode > 3) return ERROR_SUB_MB_TYPE;
  ------------------
  |  Branch (2022:16): [True: 0, False: 3.71k]
  ------------------
 2023|       |
 2024|  3.71k|            u4_sub_mb_pack = (u4_sub_mb_pack << 8) | u1_sub_mb_mode;
 2025|       |            /* Storing collocated information */
 2026|  3.71k|            *pu1_col_info++ = u1_colz | ((UWORD8) (u1_sub_mb_mode << 4));
 2027|  3.71k|            COPYTHECONTEXT("sub_mb_type", u1_sub_mb_mode);
 2028|       |            /* check if Motion compensation is done below 8x8 */
 2029|  3.71k|            if(u1_sub_mb_mode != P_L0_8x8)
  ------------------
  |  |  459|  3.71k|#define P_L0_8x8    0
  ------------------
  |  Branch (2029:16): [True: 2.64k, False: 1.07k]
  ------------------
 2030|  2.64k|            {
 2031|  2.64k|                u1_no_submb_part_size_lt8x8_flag = 0;
 2032|  2.64k|            }
 2033|  3.71k|        }
 2034|      0|        {
 2035|  3.71k|            UWORD8 u1_sub_mb_mode;
 2036|  3.71k|            u1_sub_mb_mode =
 2037|  3.71k|                ih264d_parse_submb_type_cabac(0, ps_cab_env, ps_bitstrm, ps_dec->p_sub_mb_type_t);
 2038|  3.71k|            if(u1_sub_mb_mode > 3) return ERROR_SUB_MB_TYPE;
  ------------------
  |  Branch (2038:16): [True: 0, False: 3.71k]
  ------------------
 2039|       |
 2040|  3.71k|            u4_sub_mb_pack = (u4_sub_mb_pack << 8) | u1_sub_mb_mode;
 2041|       |            /* Storing collocated information */
 2042|  3.71k|            *pu1_col_info++ = u1_colz | ((UWORD8) (u1_sub_mb_mode << 4));
 2043|  3.71k|            COPYTHECONTEXT("sub_mb_type", u1_sub_mb_mode);
 2044|       |            /* check if Motion compensation is done below 8x8 */
 2045|  3.71k|            if(u1_sub_mb_mode != P_L0_8x8)
  ------------------
  |  |  459|  3.71k|#define P_L0_8x8    0
  ------------------
  |  Branch (2045:16): [True: 2.58k, False: 1.13k]
  ------------------
 2046|  2.58k|            {
 2047|  2.58k|                u1_no_submb_part_size_lt8x8_flag = 0;
 2048|  2.58k|            }
 2049|  3.71k|        }
 2050|      0|        u1_num_mb_part = 4;
 2051|  3.71k|    }
 2052|  13.1k|    else
 2053|  13.1k|    {
 2054|  13.1k|        u1_num_mb_part = pu1_num_mb_part[u1_mb_type];
 2055|       |        /* Storing collocated Mb and SubMb mode information */
 2056|  13.1k|        *pu1_col_info++ = (u1_mb_type << 6);
 2057|  13.1k|        if(u1_mb_type) *pu1_col_info++ = (u1_mb_type << 6);
  ------------------
  |  Branch (2057:12): [True: 1.93k, False: 11.2k]
  ------------------
 2058|  13.1k|    }
 2059|       |
 2060|       |    /*Adding code to get Motion Prediction Flags*/
 2061|  16.8k|    {
 2062|  16.8k|        UWORD8 uc_i;
 2063|  16.8k|        UWORD8 *pu1_motion_pred_flag_l0;
 2064|  16.8k|        UWORD8 u1_mvp_flag;
 2065|  16.8k|        WORD8 *pi1_top_ref_idx_ctx_inc_arr = p_curr_ctxt->i1_ref_idx;
 2066|  16.8k|        WORD8 *pi1_left_ref_idx_ctxt_inc = ps_dec->pi1_left_ref_idx_ctxt_inc;
 2067|  16.8k|        UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
 2068|  16.8k|        UWORD8 uc_field = ps_cur_mb_info->u1_mb_field_decodingflag;
 2069|  16.8k|        UWORD8 uc_num_ref_idx_l0_active_minus1 =
 2070|  16.8k|            (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0] << (u1_mbaff & uc_field)) - 1;
 2071|  16.8k|        pu1_motion_pred_flag_l0 = &ps_svc_cur_mb_info->au1_motion_pred_flag[0];
 2072|  16.8k|        *pu1_motion_pred_flag_l0 = 0;
 2073|       |
 2074|  16.8k|        if(ps_svc_cur_mb_info->u1_crop_window_flag &&
  ------------------
  |  Branch (2074:12): [True: 16.6k, False: 218]
  ------------------
 2075|  16.6k|           ps_svc_slice_params->u1_adaptive_motion_prediction_flag)
  ------------------
  |  Branch (2075:12): [True: 7.01k, False: 9.61k]
  ------------------
 2076|  7.01k|        {
 2077|  23.6k|            for(uc_i = 0; uc_i < u1_num_mb_part; uc_i++)
  ------------------
  |  Branch (2077:27): [True: 16.6k, False: 7.01k]
  ------------------
 2078|  16.6k|            {
 2079|       |                /*usage of bins and ctxt check*/
 2080|  16.6k|                u1_mvp_flag = ih264d_decode_bin(0, ps_svc_lyr_dec->ps_motion_prediction_flag_l0,
 2081|  16.6k|                                                ps_bitstrm, ps_cab_env);
 2082|  16.6k|                COPYTHECONTEXT("SVC ext: ps_motion_prediction_flag_l0", u1_mvp_flag);
 2083|       |
 2084|  16.6k|                *pu1_motion_pred_flag_l0 |= (u1_mvp_flag << uc_i);
 2085|  16.6k|                if(u1_mvp_flag)
  ------------------
  |  Branch (2085:20): [True: 5.08k, False: 11.5k]
  ------------------
 2086|  5.08k|                {
 2087|  5.08k|                    pi1_ref_idx[uc_i] = -1;
 2088|  5.08k|                }
 2089|  16.6k|            }
 2090|  7.01k|        }
 2091|  9.83k|        else if(ps_svc_cur_mb_info->u1_crop_window_flag)
  ------------------
  |  Branch (2091:17): [True: 9.61k, False: 218]
  ------------------
 2092|  9.61k|        {
 2093|  9.61k|            *pu1_motion_pred_flag_l0 = ps_svc_slice_params->u1_default_motion_prediction_flag
  ------------------
  |  Branch (2093:40): [True: 1.47k, False: 8.14k]
  ------------------
 2094|  9.61k|                                           ? ((1 << u1_num_mb_part) - 1)
 2095|  9.61k|                                           : 0;
 2096|  9.61k|            if(ps_svc_slice_params->u1_default_motion_prediction_flag)
  ------------------
  |  Branch (2096:16): [True: 1.47k, False: 8.14k]
  ------------------
 2097|  1.47k|            {
 2098|  1.47k|                pi1_ref_idx[0] = -1;
 2099|  1.47k|                pi1_ref_idx[1] = -1;
 2100|  1.47k|                pi1_ref_idx[2] = -1;
 2101|  1.47k|                pi1_ref_idx[3] = -1;
 2102|  1.47k|            }
 2103|  9.61k|        }
 2104|       |
 2105|       |        /* Decoding reference index 0: For simple profile the following   */
 2106|       |        /* conditions are always true (mb_field_decoding_flag == 0);      */
 2107|       |        /* (MbPartPredMode != PredL1)                                     */
 2108|  16.8k|        if((uc_num_ref_idx_l0_active_minus1 > 0) & (u1_mb_type != PRED_8x8R0))
  ------------------
  |  |  454|  16.8k|#define PRED_8x8R0  4
  ------------------
  |  Branch (2108:12): [True: 457, False: 16.3k]
  ------------------
 2109|    457|        {
 2110|       |            /* force the routine to decode ref idx for each partition */
 2111|       |            /*SVC added motion_prediction_flag to force it to take it only for
 2112|       |             * !motion_pred_flag_l0*/
 2113|       |
 2114|    457|            ret = ih264d_parse_ref_idx_cabac(u1_num_mb_part, 0, uc_num_ref_idx_l0_active_minus1,
 2115|    457|                                             u1_mb_mc_mode, pi1_ref_idx, pi1_left_ref_idx_ctxt_inc,
 2116|    457|                                             pi1_top_ref_idx_ctx_inc_arr, ps_cab_env, ps_bitstrm,
 2117|    457|                                             ps_dec->p_ref_idx_t);
 2118|    457|            if(ret != OK)
  ------------------
  |  |  114|    457|#define OK        0
  ------------------
  |  Branch (2118:16): [True: 69, False: 388]
  ------------------
 2119|     69|            {
 2120|     69|                return ret;
 2121|     69|            }
 2122|    457|        }
 2123|  16.3k|        else
 2124|  16.3k|        {
 2125|       |            /* When there exists only a single frame to predict from */
 2126|  16.3k|            pi1_left_ref_idx_ctxt_inc[0] = 0;
 2127|  16.3k|            pi1_left_ref_idx_ctxt_inc[1] = 0;
 2128|  16.3k|            pi1_top_ref_idx_ctx_inc_arr[0] = 0;
 2129|  16.3k|            pi1_top_ref_idx_ctx_inc_arr[1] = 0;
 2130|  16.3k|            *((UWORD32 *) pi1_ref_idx) = 0;
 2131|  16.3k|        }
 2132|  16.8k|    }
 2133|       |
 2134|  16.7k|    {
 2135|  16.7k|        UWORD8 u1_p_idx;
 2136|  16.7k|        UWORD32 uc_i;
 2137|  16.7k|        parse_part_params_t *ps_part = ps_dec->ps_part;
 2138|  16.7k|        UWORD8 u1_sub_mb_mode, u1_num_subpart, u1_mb_part_width, u1_mb_part_height;
 2139|  16.7k|        UWORD32 u4_sub_mb_num;
 2140|  16.7k|        const UWORD8 *pu1_top_left_sub_mb_indx;
 2141|  16.7k|        mv_pred_t *ps_mv_start = ps_dec->ps_mv_cur + (u4_mb_num << 4);
 2142|  16.7k|        UWORD16 u2_sub_mb_num_pack = 0x028A;
 2143|       |
 2144|       |        /* Loading the table pointers */
 2145|  16.7k|        const UWORD8 *pu1_mb_partw = (const UWORD8 *) gau1_ih264d_mb_partw;
 2146|  16.7k|        const UWORD8 *pu1_mb_parth = (const UWORD8 *) gau1_ih264d_mb_parth;
 2147|  16.7k|        const UWORD8 *pu1_sub_mb_indx_mod =
 2148|  16.7k|            (const UWORD8 *) (gau1_ih264d_submb_indx_mod) + (uc_sub_mb * 6);
 2149|  16.7k|        const UWORD8 *pu1_sub_mb_partw = (const UWORD8 *) gau1_ih264d_submb_partw;
 2150|  16.7k|        const UWORD8 *pu1_sub_mb_parth = (const UWORD8 *) gau1_ih264d_submb_parth;
 2151|  16.7k|        const UWORD8 *pu1_num_sub_mb_part = (const UWORD8 *) gau1_ih264d_num_submb_part;
 2152|       |
 2153|       |        /*********************************************************/
 2154|       |        /* default initialisations for condition (uc_sub_mb == 0) */
 2155|       |        /* i.e. all are subpartitions of 8x8                     */
 2156|       |        /*********************************************************/
 2157|  16.7k|        u1_sub_mb_mode = 0;
 2158|  16.7k|        u1_num_subpart = 1;
 2159|  16.7k|        u1_mb_part_width = pu1_mb_partw[u1_mb_type];
 2160|  16.7k|        u1_mb_part_height = pu1_mb_parth[u1_mb_type];
 2161|  16.7k|        pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + (u1_mb_type << 1);
 2162|  16.7k|        u4_sub_mb_num = 0;
 2163|       |
 2164|       |        /* Loop on number of partitions */
 2165|  46.5k|        for(uc_i = 0, u1_p_idx = 0; uc_i < u1_num_mb_part; uc_i++)
  ------------------
  |  Branch (2165:37): [True: 29.7k, False: 16.7k]
  ------------------
 2166|  29.7k|        {
 2167|  29.7k|            UWORD8 uc_j;
 2168|  29.7k|            if(uc_sub_mb)
  ------------------
  |  Branch (2168:16): [True: 14.8k, False: 14.9k]
  ------------------
 2169|  14.8k|            {
 2170|  14.8k|                u1_sub_mb_mode = u4_sub_mb_pack >> 24;
 2171|  14.8k|                u1_num_subpart = pu1_num_sub_mb_part[u1_sub_mb_mode];
 2172|  14.8k|                u1_mb_part_width = pu1_sub_mb_partw[u1_sub_mb_mode];
 2173|  14.8k|                u1_mb_part_height = pu1_sub_mb_parth[u1_sub_mb_mode];
 2174|  14.8k|                pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + (u1_sub_mb_mode << 1);
 2175|  14.8k|                u4_sub_mb_num = u2_sub_mb_num_pack >> 12;
 2176|  14.8k|                u4_sub_mb_pack <<= 8;
 2177|  14.8k|                u2_sub_mb_num_pack <<= 4;
 2178|  14.8k|            }
 2179|       |            /* Loop on Number of sub-partitions */
 2180|  72.9k|            for(uc_j = 0; uc_j < u1_num_subpart; uc_j++, pu1_top_left_sub_mb_indx++)
  ------------------
  |  Branch (2180:27): [True: 43.1k, False: 29.7k]
  ------------------
 2181|  43.1k|            {
 2182|  43.1k|                mv_pred_t *ps_mv;
 2183|  43.1k|                u4_sub_mb_num += *pu1_top_left_sub_mb_indx;
 2184|  43.1k|                ps_mv = ps_mv_start + u4_sub_mb_num;
 2185|       |
 2186|       |                /* Storing Info for partitions */
 2187|  43.1k|                ps_part->u1_is_direct = PART_NOT_DIRECT;
  ------------------
  |  |  570|  43.1k|#define PART_NOT_DIRECT                0
  ------------------
 2188|  43.1k|                ps_part->u1_sub_mb_num = u4_sub_mb_num;
 2189|  43.1k|                ps_part->u1_partheight = u1_mb_part_height;
 2190|  43.1k|                ps_part->u1_partwidth = u1_mb_part_width;
 2191|       |
 2192|       |                /* Increment partition Index */
 2193|  43.1k|                u1_p_idx++;
 2194|  43.1k|                ps_part++;
 2195|       |
 2196|  43.1k|                ih264d_get_mvd_cabac(u4_sub_mb_num, 0, u1_mb_part_width, u1_mb_part_height, 1,
 2197|  43.1k|                                     ps_dec, ps_mv);
 2198|  43.1k|            }
 2199|  29.7k|        }
 2200|  16.7k|        ps_parse_mb_data->u1_num_part = u1_p_idx;
 2201|  16.7k|        ps_dec->ps_part = ps_part;
 2202|  16.7k|    }
 2203|       |
 2204|       |    /* update incropwindow*/
 2205|  16.7k|    if(ps_svc_slice_params->u1_adaptive_residual_prediction_flag &&
  ------------------
  |  Branch (2205:8): [True: 2.02k, False: 14.7k]
  ------------------
 2206|  2.02k|       ps_svc_cur_mb_info->u1_crop_window_flag)
  ------------------
  |  Branch (2206:8): [True: 1.80k, False: 218]
  ------------------
 2207|  1.80k|    {
 2208|  1.80k|        ps_svc_cur_mb_info->u1_residual_prediction_flag = ih264d_decode_bin(
 2209|  1.80k|            1, ps_svc_lyr_dec->ps_residual_prediction_flag, ps_bitstrm, ps_cab_env);
 2210|  1.80k|        COPYTHECONTEXT("SVC ext: u1_residual_prediction_flag",
 2211|  1.80k|                       ps_cur_mb_info->u1_residual_prediction_flag);
 2212|  1.80k|    }
 2213|  14.9k|    else
 2214|  14.9k|    {
 2215|       |        /*residual flag inference code */
 2216|  14.9k|        if(1 == ps_svc_cur_mb_info->u1_crop_window_flag)
  ------------------
  |  Branch (2216:12): [True: 14.7k, False: 218]
  ------------------
 2217|  14.7k|        {
 2218|  14.7k|            ps_svc_cur_mb_info->u1_residual_prediction_flag =
 2219|  14.7k|                ps_svc_slice_params->u1_default_residual_prediction_flag;
 2220|  14.7k|        }
 2221|    218|        else
 2222|    218|        {
 2223|    218|            ps_svc_cur_mb_info->u1_residual_prediction_flag = 0;
 2224|    218|        }
 2225|  14.9k|    }
 2226|       |
 2227|  16.7k|    if(ps_svc_slice_params->u1_scan_idx_end >= ps_svc_slice_params->u1_scan_idx_start)
  ------------------
  |  Branch (2227:8): [True: 16.7k, False: 0]
  ------------------
 2228|  16.7k|    {
 2229|  16.7k|        UWORD8 u1_cbp;
 2230|       |        /* Read the Coded block pattern */
 2231|  16.7k|        u1_cbp = (WORD8) ih264d_parse_ctx_cbp_cabac(ps_dec);
 2232|  16.7k|        COPYTHECONTEXT("coded_block_pattern", u1_cbp);
 2233|  16.7k|        ps_cur_mb_info->u1_cbp = u1_cbp;
 2234|  16.7k|        p_curr_ctxt->u1_cbp = u1_cbp;
 2235|  16.7k|        p_curr_ctxt->u1_intra_chroma_pred_mode = 0;
 2236|  16.7k|        p_curr_ctxt->u1_yuv_dc_csbp &= 0xFE;
 2237|  16.7k|        ps_dec->pu1_left_yuv_dc_csbp[0] &= 0x6;
 2238|       |
 2239|  16.7k|        if(u1_cbp > 47) return ERROR_CBP;
  ------------------
  |  Branch (2239:12): [True: 0, False: 16.7k]
  ------------------
 2240|       |
 2241|  16.7k|        ps_cur_mb_info->u1_tran_form8x8 = 0;
 2242|  16.7k|        ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
 2243|       |
 2244|       |        /* Read the transform8x8 u4_flag if present */
 2245|  16.7k|        if((ps_dec->s_high_profile.u1_transform8x8_present) && (u1_cbp & 0xf) &&
  ------------------
  |  Branch (2245:12): [True: 13.5k, False: 3.20k]
  |  Branch (2245:64): [True: 9.98k, False: 3.59k]
  ------------------
 2246|  9.98k|           u1_no_submb_part_size_lt8x8_flag)
  ------------------
  |  Branch (2246:12): [True: 7.16k, False: 2.81k]
  ------------------
 2247|  7.16k|        {
 2248|  7.16k|            ps_cur_mb_info->u1_tran_form8x8 =
 2249|  7.16k|                ih264d_parse_transform8x8flag_cabac(ps_dec, ps_cur_mb_info);
 2250|  7.16k|            COPYTHECONTEXT("transform_size_8x8_flag", ps_cur_mb_info->u1_tran_form8x8);
 2251|  7.16k|            p_curr_ctxt->u1_transform8x8_ctxt = ps_cur_mb_info->u1_tran_form8x8;
 2252|  7.16k|            ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = ps_cur_mb_info->u1_tran_form8x8;
 2253|  7.16k|        }
 2254|  9.61k|        else
 2255|  9.61k|        {
 2256|  9.61k|            p_curr_ctxt->u1_transform8x8_ctxt = 0;
 2257|  9.61k|        }
 2258|       |
 2259|       |        /* Read mb_qp_delta */
 2260|  16.7k|        if(u1_cbp)
  ------------------
  |  Branch (2260:12): [True: 11.9k, False: 4.87k]
  ------------------
 2261|  11.9k|        {
 2262|  11.9k|            WORD8 c_temp;
 2263|  11.9k|            ret = ih264d_parse_mb_qp_delta_cabac(ps_dec, &c_temp);
 2264|  11.9k|            if(ret != OK) return ret;
  ------------------
  |  |  114|  11.9k|#define OK        0
  ------------------
  |  Branch (2264:16): [True: 135, False: 11.7k]
  ------------------
 2265|  11.7k|            COPYTHECONTEXT("mb_qp_delta", c_temp);
 2266|  11.7k|            if(c_temp != 0)
  ------------------
  |  Branch (2266:16): [True: 3.89k, False: 7.88k]
  ------------------
 2267|  3.89k|            {
 2268|  3.89k|                ret = ih264d_update_qp(ps_dec, c_temp);
 2269|  3.89k|                if(ret != OK) return ret;
  ------------------
  |  |  114|  3.89k|#define OK        0
  ------------------
  |  Branch (2269:20): [True: 0, False: 3.89k]
  ------------------
 2270|  3.89k|            }
 2271|  11.7k|        }
 2272|  4.87k|        else
 2273|  4.87k|            ps_dec->i1_prev_mb_qp_delta = 0;
 2274|       |
 2275|       |        /*residual from start to end idx */
 2276|  16.6k|        ih264d_parse_residual4x4_cabac(ps_dec, ps_cur_mb_info, 0);
 2277|  16.6k|        if(EXCEED_OFFSET(ps_dec->ps_bitstrm)) return ERROR_EOB_TERMINATE_T;
  ------------------
  |  |   93|  16.6k|  (ps_bitstrm->u4_ofst > ps_bitstrm->u4_max_ofst)
  |  |  ------------------
  |  |  |  Branch (93:3): [True: 839, False: 15.8k]
  |  |  ------------------
  ------------------
 2278|  16.6k|    }
 2279|  15.8k|    return OK;
  ------------------
  |  |  114|  15.8k|#define OK        0
  ------------------
 2280|  16.7k|}
isvcd_parse_pmb_cavlc:
 2296|  20.0k|{
 2297|  20.0k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
 2298|  20.0k|    dec_slice_svc_ext_params_t *ps_svc_slice_params = NULL;
 2299|  20.0k|    UWORD32 u1_num_mb_part;
 2300|  20.0k|    UWORD32 uc_sub_mb;
 2301|  20.0k|    dec_bit_stream_t *const ps_bitstrm = ps_dec->ps_bitstrm;
 2302|  20.0k|    UWORD32 *const pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
 2303|  20.0k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
 2304|  20.0k|    parse_pmbarams_t *ps_parse_mb_data = ps_dec->ps_parse_mb_data + u4_num_mbsNby2;
 2305|  20.0k|    WORD8 *pi1_ref_idx = ps_parse_mb_data->i1_ref_idx[0];
 2306|  20.0k|    const UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
 2307|  20.0k|    const UWORD8 *pu1_num_mb_part = (const UWORD8 *) gau1_ih264d_num_mb_part;
 2308|  20.0k|    UWORD8 *pu1_col_info = ps_parse_mb_data->u1_col_info;
 2309|  20.0k|    UWORD32 u1_mb_type = ps_cur_mb_info->u1_mb_type;
 2310|  20.0k|    UWORD32 u4_sum_mb_mode_pack = 0;
 2311|  20.0k|    WORD32 ret;
 2312|  20.0k|    UWORD8 u1_no_submb_part_size_lt8x8_flag = 1;
 2313|       |
 2314|  20.0k|    ps_svc_slice_params = &ps_svc_lyr_dec->s_svc_slice_params;
 2315|  20.0k|    ps_cur_mb_info->u1_tran_form8x8 = 0;
 2316|  20.0k|    ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
 2317|  20.0k|    ps_cur_mb_info->u1_yuv_dc_block_flag = 0;
 2318|  20.0k|    ps_cur_mb_info->u1_mb_mc_mode = u1_mb_type;
 2319|  20.0k|    uc_sub_mb = ((u1_mb_type == PRED_8x8) | (u1_mb_type == PRED_8x8R0));
  ------------------
  |  |  453|  20.0k|#define PRED_8x8    3
  ------------------
                  uc_sub_mb = ((u1_mb_type == PRED_8x8) | (u1_mb_type == PRED_8x8R0));
  ------------------
  |  |  454|  20.0k|#define PRED_8x8R0  4
  ------------------
 2320|       |
 2321|       |    /* Reading the subMB type */
 2322|  20.0k|    if(uc_sub_mb)
  ------------------
  |  Branch (2322:8): [True: 1.69k, False: 18.3k]
  ------------------
 2323|  1.69k|    {
 2324|  1.69k|        WORD32 i;
 2325|  1.69k|        UWORD8 u1_colz = (PRED_8x8 << 6);
  ------------------
  |  |  453|  1.69k|#define PRED_8x8    3
  ------------------
 2326|       |
 2327|  7.87k|        for(i = 0; i < 4; i++)
  ------------------
  |  Branch (2327:20): [True: 6.42k, False: 1.44k]
  ------------------
 2328|  6.42k|        {
 2329|  6.42k|            UWORD32 ui_sub_mb_mode;
 2330|       |
 2331|       |            // Inlined ih264d_uev
 2332|  6.42k|            UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
 2333|  6.42k|            UWORD32 u4_word, u4_ldz;
 2334|       |
 2335|       |            /***************************************************************/
 2336|       |            /* Find leading zeros in next 32 bits                          */
 2337|       |            /***************************************************************/
 2338|  6.42k|            NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  6.42k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  6.42k|{                                                                           \
  |  |  152|  6.42k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  6.42k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  6.42k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  6.42k|                                                                            \
  |  |  156|  6.42k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  6.42k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 5.87k, False: 551]
  |  |  ------------------
  |  |  158|  6.42k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  5.87k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  6.42k|}
  ------------------
 2339|  6.42k|            u4_ldz = CLZ(u4_word);
 2340|       |            /* Flush the ps_bitstrm */
 2341|  6.42k|            u4_bitstream_offset += (u4_ldz + 1);
 2342|       |            /* Read the suffix from the ps_bitstrm */
 2343|  6.42k|            u4_word = 0;
 2344|  6.42k|            if(u4_ldz) GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  2.92k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  2.92k|{                                                                           \
  |  |  122|  2.92k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  2.92k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  2.92k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  2.92k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  2.92k|                                                                            \
  |  |  127|  2.92k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 2.27k, False: 653]
  |  |  ------------------
  |  |  128|  2.92k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  2.27k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  2.92k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  2.92k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  2.92k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  2.92k|}                                                                           \
  ------------------
  |  Branch (2344:16): [True: 2.92k, False: 3.50k]
  ------------------
 2345|  6.42k|            *pu4_bitstrm_ofst = u4_bitstream_offset;
 2346|  6.42k|            ui_sub_mb_mode = ((1 << u4_ldz) + u4_word - 1);
 2347|       |            // Inlined ih264d_uev
 2348|       |
 2349|  6.42k|            if(ui_sub_mb_mode > 3)
  ------------------
  |  Branch (2349:16): [True: 249, False: 6.17k]
  ------------------
 2350|    249|            {
 2351|    249|                return ERROR_SUB_MB_TYPE;
 2352|    249|            }
 2353|  6.17k|            else
 2354|  6.17k|            {
 2355|  6.17k|                u4_sum_mb_mode_pack = (u4_sum_mb_mode_pack << 8) | ui_sub_mb_mode;
 2356|       |                /* Storing collocated information */
 2357|  6.17k|                *pu1_col_info++ = u1_colz | (UWORD8) (ui_sub_mb_mode << 4);
 2358|       |
 2359|  6.17k|                COPYTHECONTEXT("sub_mb_type", ui_sub_mb_mode);
 2360|  6.17k|            }
 2361|       |
 2362|       |            /* check if Motion compensation is done below 8x8 */
 2363|  6.17k|            if(ui_sub_mb_mode != P_L0_8x8)
  ------------------
  |  |  459|  6.17k|#define P_L0_8x8    0
  ------------------
  |  Branch (2363:16): [True: 2.67k, False: 3.50k]
  ------------------
 2364|  2.67k|            {
 2365|  2.67k|                u1_no_submb_part_size_lt8x8_flag = 0;
 2366|  2.67k|            }
 2367|  6.17k|        }
 2368|  1.44k|        u1_num_mb_part = 4;
 2369|  1.44k|    }
 2370|  18.3k|    else
 2371|  18.3k|    {
 2372|  18.3k|        *pu1_col_info++ = (u1_mb_type << 6);
 2373|  18.3k|        if(u1_mb_type) *pu1_col_info++ = (u1_mb_type << 6);
  ------------------
  |  Branch (2373:12): [True: 6.80k, False: 11.5k]
  ------------------
 2374|  18.3k|        u1_num_mb_part = pu1_num_mb_part[u1_mb_type];
 2375|  18.3k|    }
 2376|       |
 2377|       |    /*Adding code to get Motion Prediction Flags*/
 2378|  19.8k|    {
 2379|       |        /*free the scratch buffer once used*/
 2380|  19.8k|        UWORD8 uc_i;
 2381|  19.8k|        UWORD8 *pu1_motion_pred_flag_l0;
 2382|  19.8k|        UWORD8 u1_mvp_flag;
 2383|  19.8k|        UWORD8 uc_field = ps_cur_mb_info->u1_mb_field_decodingflag;
 2384|  19.8k|        UWORD8 uc_num_ref_idx_l0_active_minus1 =
 2385|  19.8k|            (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0] << (u1_mbaff & uc_field)) - 1;
 2386|       |
 2387|  19.8k|        pu1_motion_pred_flag_l0 = &ps_svc_cur_mb_info->au1_motion_pred_flag[0];
 2388|  19.8k|        *pu1_motion_pred_flag_l0 = 0;
 2389|  19.8k|        if(ps_svc_cur_mb_info->u1_crop_window_flag &&
  ------------------
  |  Branch (2389:12): [True: 19.6k, False: 198]
  ------------------
 2390|  19.6k|           ps_svc_slice_params->u1_adaptive_motion_prediction_flag)
  ------------------
  |  Branch (2390:12): [True: 7.20k, False: 12.4k]
  ------------------
 2391|  7.20k|        {
 2392|  20.7k|            for(uc_i = 0; uc_i < u1_num_mb_part; uc_i++)
  ------------------
  |  Branch (2392:27): [True: 13.5k, False: 7.20k]
  ------------------
 2393|  13.5k|            {
 2394|       |                /*usage of bins and ctxt check*/
 2395|  13.5k|                u1_mvp_flag = ih264d_get_bit_h264(ps_bitstrm);
 2396|  13.5k|                COPYTHECONTEXT("SVC ext: ps_motion_prediction_flag_l0", u1_mvp_flag);
 2397|       |
 2398|  13.5k|                *pu1_motion_pred_flag_l0 |= (u1_mvp_flag << uc_i);
 2399|  13.5k|            }
 2400|  7.20k|        }
 2401|  12.6k|        else if(ps_svc_cur_mb_info->u1_crop_window_flag)
  ------------------
  |  Branch (2401:17): [True: 12.4k, False: 198]
  ------------------
 2402|  12.4k|        {
 2403|  12.4k|            *pu1_motion_pred_flag_l0 = ps_svc_slice_params->u1_default_motion_prediction_flag
  ------------------
  |  Branch (2403:40): [True: 3.88k, False: 8.55k]
  ------------------
 2404|  12.4k|                                           ? ((1 << u1_num_mb_part) - 1)
 2405|  12.4k|                                           : 0;
 2406|  12.4k|        }
 2407|       |
 2408|       |        /* Decoding reference index 0: For simple profile the following   */
 2409|       |        /* conditions are always true (mb_field_decoding_flag == 0);      */
 2410|       |        /* (MbPartPredMode != PredL1)                                     */
 2411|  19.8k|        if((uc_num_ref_idx_l0_active_minus1 > 0) & (u1_mb_type != PRED_8x8R0))
  ------------------
  |  |  454|  19.8k|#define PRED_8x8R0  4
  ------------------
  |  Branch (2411:12): [True: 11.8k, False: 8.03k]
  ------------------
 2412|  11.8k|        {
 2413|  11.8k|            if(1 == uc_num_ref_idx_l0_active_minus1)
  ------------------
  |  Branch (2413:16): [True: 3.34k, False: 8.46k]
  ------------------
 2414|  3.34k|                isvcd_parse_pmb_ref_index_cavlc_range1(u1_num_mb_part, ps_bitstrm, pi1_ref_idx,
 2415|  3.34k|                                                       uc_num_ref_idx_l0_active_minus1,
 2416|  3.34k|                                                       pu1_motion_pred_flag_l0);
 2417|  8.46k|            else
 2418|  8.46k|            {
 2419|  8.46k|                ret = isvcd_parse_pmb_ref_index_cavlc(u1_num_mb_part, ps_bitstrm, pi1_ref_idx,
 2420|  8.46k|                                                      uc_num_ref_idx_l0_active_minus1,
 2421|  8.46k|                                                      pu1_motion_pred_flag_l0);
 2422|  8.46k|                if(ret != OK) return ret;
  ------------------
  |  |  114|  8.46k|#define OK        0
  ------------------
  |  Branch (2422:20): [True: 226, False: 8.24k]
  ------------------
 2423|  8.46k|            }
 2424|  11.8k|        }
 2425|  8.03k|        else
 2426|  8.03k|        {
 2427|       |            /* When there exists only a single frame to predict from */
 2428|  8.03k|            UWORD32 uc_i;
 2429|  18.9k|            for(uc_i = 0; uc_i < u1_num_mb_part; uc_i++) /* Storing Reference Idx Information */
  ------------------
  |  Branch (2429:27): [True: 10.9k, False: 8.03k]
  ------------------
 2430|  10.9k|                pi1_ref_idx[uc_i] = 0;
 2431|  8.03k|        }
 2432|  19.8k|    }
 2433|       |
 2434|  19.6k|    {
 2435|  19.6k|        UWORD8 u1_p_idx;
 2436|  19.6k|        UWORD32 uc_i;
 2437|  19.6k|        parse_part_params_t *ps_part = ps_dec->ps_part;
 2438|  19.6k|        UWORD8 u1_sub_mb_mode, u1_num_subpart, u1_mb_part_width, u1_mb_part_height;
 2439|  19.6k|        UWORD32 u4_sub_mb_num;
 2440|  19.6k|        const UWORD8 *pu1_top_left_sub_mb_indx;
 2441|  19.6k|        mv_pred_t *ps_mv, *ps_mv_start = ps_dec->ps_mv_cur + (u4_mb_num << 4);
 2442|       |        /* Loading the table pointers */
 2443|  19.6k|        const UWORD8 *pu1_mb_partw = (const UWORD8 *) gau1_ih264d_mb_partw;
 2444|  19.6k|        const UWORD8 *pu1_mb_parth = (const UWORD8 *) gau1_ih264d_mb_parth;
 2445|  19.6k|        const UWORD8 *pu1_sub_mb_indx_mod =
 2446|  19.6k|            (const UWORD8 *) (gau1_ih264d_submb_indx_mod) + (uc_sub_mb * 6);
 2447|  19.6k|        const UWORD8 *pu1_sub_mb_partw = (const UWORD8 *) gau1_ih264d_submb_partw;
 2448|  19.6k|        const UWORD8 *pu1_sub_mb_parth = (const UWORD8 *) gau1_ih264d_submb_parth;
 2449|  19.6k|        const UWORD8 *pu1_num_sub_mb_part = (const UWORD8 *) gau1_ih264d_num_submb_part;
 2450|  19.6k|        UWORD16 u2_sub_mb_num = 0x028A;
 2451|       |
 2452|       |        /*********************************************************/
 2453|       |        /* default initialisations for condition (uc_sub_mb == 0) */
 2454|       |        /* i.e. all are subpartitions of 8x8                     */
 2455|       |        /*********************************************************/
 2456|  19.6k|        u1_sub_mb_mode = 0;
 2457|  19.6k|        u1_num_subpart = 1;
 2458|  19.6k|        u1_mb_part_width = pu1_mb_partw[u1_mb_type];
 2459|  19.6k|        u1_mb_part_height = pu1_mb_parth[u1_mb_type];
 2460|  19.6k|        pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + (u1_mb_type << 1);
 2461|  19.6k|        u4_sub_mb_num = 0;
 2462|       |
 2463|       |        /* Loop on number of partitions */
 2464|  50.1k|        for(uc_i = 0, u1_p_idx = 0; uc_i < u1_num_mb_part; uc_i++)
  ------------------
  |  Branch (2464:37): [True: 30.5k, False: 19.6k]
  ------------------
 2465|  30.5k|        {
 2466|  30.5k|            UWORD8 uc_j;
 2467|  30.5k|            if(uc_sub_mb)
  ------------------
  |  Branch (2467:16): [True: 5.75k, False: 24.7k]
  ------------------
 2468|  5.75k|            {
 2469|  5.75k|                u1_sub_mb_mode = u4_sum_mb_mode_pack >> 24;
 2470|  5.75k|                u1_num_subpart = pu1_num_sub_mb_part[u1_sub_mb_mode];
 2471|  5.75k|                u1_mb_part_width = pu1_sub_mb_partw[u1_sub_mb_mode];
 2472|  5.75k|                u1_mb_part_height = pu1_sub_mb_parth[u1_sub_mb_mode];
 2473|  5.75k|                pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + (u1_sub_mb_mode << 1);
 2474|  5.75k|                u4_sub_mb_num = u2_sub_mb_num >> 12;
 2475|  5.75k|                u4_sum_mb_mode_pack <<= 8;
 2476|  5.75k|                u2_sub_mb_num <<= 4;
 2477|  5.75k|            }
 2478|       |            /* Loop on Number of sub-partitions */
 2479|  65.1k|            for(uc_j = 0; uc_j < u1_num_subpart; uc_j++, pu1_top_left_sub_mb_indx++)
  ------------------
  |  Branch (2479:27): [True: 34.6k, False: 30.5k]
  ------------------
 2480|  34.6k|            {
 2481|  34.6k|                WORD16 i2_mvx, i2_mvy;
 2482|  34.6k|                u4_sub_mb_num += *pu1_top_left_sub_mb_indx;
 2483|  34.6k|                ps_mv = ps_mv_start + u4_sub_mb_num;
 2484|       |
 2485|       |                /* Reading the differential Mv from the bitstream */
 2486|       |                // inlining ih264d_sev
 2487|  34.6k|                {
 2488|  34.6k|                    UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
 2489|  34.6k|                    UWORD32 u4_word, u4_ldz, u4_abs_val;
 2490|       |
 2491|       |                    /***************************************************************/
 2492|       |                    /* Find leading zeros in next 32 bits                          */
 2493|       |                    /***************************************************************/
 2494|  34.6k|                    NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  34.6k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  34.6k|{                                                                           \
  |  |  152|  34.6k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  34.6k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  34.6k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  34.6k|                                                                            \
  |  |  156|  34.6k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  34.6k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 33.5k, False: 1.13k]
  |  |  ------------------
  |  |  158|  34.6k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  33.5k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  34.6k|}
  ------------------
 2495|  34.6k|                    u4_ldz = CLZ(u4_word);
 2496|       |
 2497|       |                    /* Flush the ps_bitstrm */
 2498|  34.6k|                    u4_bitstream_offset += (u4_ldz + 1);
 2499|       |
 2500|       |                    /* Read the suffix from the ps_bitstrm */
 2501|  34.6k|                    u4_word = 0;
 2502|  34.6k|                    if(u4_ldz) GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  17.0k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  17.0k|{                                                                           \
  |  |  122|  17.0k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  17.0k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  17.0k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  17.0k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  17.0k|                                                                            \
  |  |  127|  17.0k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 15.7k, False: 1.24k]
  |  |  ------------------
  |  |  128|  17.0k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  15.7k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  17.0k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  17.0k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  17.0k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  17.0k|}                                                                           \
  ------------------
  |  Branch (2502:24): [True: 17.0k, False: 17.6k]
  ------------------
 2503|       |
 2504|  34.6k|                    *pu4_bitstrm_ofst = u4_bitstream_offset;
 2505|  34.6k|                    u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1;
 2506|       |
 2507|  34.6k|                    if(u4_word & 0x1)
  ------------------
  |  Branch (2507:24): [True: 10.6k, False: 24.0k]
  ------------------
 2508|  10.6k|                        i2_mvx = (-(WORD32) u4_abs_val);
 2509|  24.0k|                    else
 2510|  24.0k|                        i2_mvx = (u4_abs_val);
 2511|  34.6k|                }
 2512|       |                // inlinined ih264d_sev
 2513|  34.6k|                COPYTHECONTEXT("MVD", i2_mvx);
 2514|  34.6k|                i2_mvy = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 2515|  34.6k|                COPYTHECONTEXT("MVD", i2_mvy);
 2516|       |
 2517|       |                /* Storing Info for partitions */
 2518|  34.6k|                ps_part->u1_is_direct = PART_NOT_DIRECT;
  ------------------
  |  |  570|  34.6k|#define PART_NOT_DIRECT                0
  ------------------
 2519|  34.6k|                ps_part->u1_sub_mb_num = u4_sub_mb_num;
 2520|  34.6k|                ps_part->u1_partheight = u1_mb_part_height;
 2521|  34.6k|                ps_part->u1_partwidth = u1_mb_part_width;
 2522|       |
 2523|       |                /* Storing Mv residuals */
 2524|  34.6k|                ps_mv->i2_mv[0] = i2_mvx;
 2525|  34.6k|                ps_mv->i2_mv[1] = i2_mvy;
 2526|       |
 2527|       |                /* Increment partition Index */
 2528|  34.6k|                u1_p_idx++;
 2529|  34.6k|                ps_part++;
 2530|  34.6k|            }
 2531|  30.5k|        }
 2532|  19.6k|        ps_parse_mb_data->u1_num_part = u1_p_idx;
 2533|  19.6k|        ps_dec->ps_part = ps_part;
 2534|  19.6k|    }
 2535|       |
 2536|  19.6k|    if(ps_svc_slice_params->u1_adaptive_residual_prediction_flag &&
  ------------------
  |  Branch (2536:8): [True: 3.46k, False: 16.1k]
  ------------------
 2537|  3.46k|       ps_svc_cur_mb_info->u1_crop_window_flag)
  ------------------
  |  Branch (2537:8): [True: 3.27k, False: 198]
  ------------------
 2538|  3.27k|    {
 2539|  3.27k|        ps_svc_cur_mb_info->u1_residual_prediction_flag = ih264d_get_bit_h264(ps_bitstrm);
 2540|  3.27k|        COPYTHECONTEXT("SVC ext: u1_residual_prediction_flag",
 2541|  3.27k|                       ps_svc_cur_mb_info->u1_residual_prediction_flag);
 2542|  3.27k|    }
 2543|  16.3k|    else
 2544|  16.3k|    {
 2545|       |        /*residual flag inference code */
 2546|  16.3k|        if(1 == ps_svc_cur_mb_info->u1_crop_window_flag)
  ------------------
  |  Branch (2546:12): [True: 16.1k, False: 198]
  ------------------
 2547|  16.1k|        {
 2548|  16.1k|            ps_svc_cur_mb_info->u1_residual_prediction_flag =
 2549|  16.1k|                ps_svc_slice_params->u1_default_residual_prediction_flag;
 2550|  16.1k|        }
 2551|    198|        else
 2552|    198|        {
 2553|    198|            ps_svc_cur_mb_info->u1_residual_prediction_flag = 0;
 2554|    198|        }
 2555|  16.3k|    }
 2556|       |
 2557|  19.6k|    if(ps_svc_slice_params->u1_scan_idx_end >= ps_svc_slice_params->u1_scan_idx_start)
  ------------------
  |  Branch (2557:8): [True: 19.6k, False: 0]
  ------------------
 2558|  19.6k|    {
 2559|  19.6k|        UWORD32 u4_cbp;
 2560|       |        /* Read the Coded block pattern */
 2561|  19.6k|        UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
 2562|  19.6k|        UWORD32 u4_word, u4_ldz;
 2563|       |
 2564|       |        /***************************************************************/
 2565|       |        /* Find leading zeros in next 32 bits                          */
 2566|       |        /***************************************************************/
 2567|  19.6k|        NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  19.6k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  19.6k|{                                                                           \
  |  |  152|  19.6k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  19.6k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  19.6k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  19.6k|                                                                            \
  |  |  156|  19.6k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  19.6k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 19.0k, False: 578]
  |  |  ------------------
  |  |  158|  19.6k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  19.0k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  19.6k|}
  ------------------
 2568|  19.6k|        u4_ldz = CLZ(u4_word);
 2569|       |        /* Flush the ps_bitstrm */
 2570|  19.6k|        u4_bitstream_offset += (u4_ldz + 1);
 2571|       |        /* Read the suffix from the ps_bitstrm */
 2572|  19.6k|        u4_word = 0;
 2573|  19.6k|        if(u4_ldz) GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  6.74k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  6.74k|{                                                                           \
  |  |  122|  6.74k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  6.74k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  6.74k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  6.74k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  6.74k|                                                                            \
  |  |  127|  6.74k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 6.19k, False: 553]
  |  |  ------------------
  |  |  128|  6.74k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  6.19k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  6.74k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  6.74k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  6.74k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  6.74k|}                                                                           \
  ------------------
  |  Branch (2573:12): [True: 6.74k, False: 12.8k]
  ------------------
 2574|  19.6k|        *pu4_bitstrm_ofst = u4_bitstream_offset;
 2575|  19.6k|        u4_cbp = ((1 << u4_ldz) + u4_word - 1);
 2576|       |
 2577|  19.6k|        if(u4_cbp > 47) return ERROR_CBP;
  ------------------
  |  Branch (2577:12): [True: 362, False: 19.2k]
  ------------------
 2578|       |
 2579|  19.2k|        u4_cbp = *((UWORD8 *) gau1_ih264d_cbp_inter + u4_cbp);
 2580|  19.2k|        COPYTHECONTEXT("coded_block_pattern", u4_cbp);
 2581|  19.2k|        ps_cur_mb_info->u1_cbp = u4_cbp;
 2582|       |
 2583|       |        /* Read the transform8x8 u4_flag if present */
 2584|  19.2k|        if((ps_dec->s_high_profile.u1_transform8x8_present) && (u4_cbp & 0xf) &&
  ------------------
  |  Branch (2584:12): [True: 3.31k, False: 15.9k]
  |  Branch (2584:64): [True: 1.01k, False: 2.30k]
  ------------------
 2585|  1.01k|           u1_no_submb_part_size_lt8x8_flag)
  ------------------
  |  Branch (2585:12): [True: 940, False: 71]
  ------------------
 2586|    940|        {
 2587|    940|            ps_cur_mb_info->u1_tran_form8x8 = ih264d_get_bit_h264(ps_bitstrm);
 2588|    940|            COPYTHECONTEXT("transform_size_8x8_flag", ps_cur_mb_info->u1_tran_form8x8);
 2589|    940|            ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = ps_cur_mb_info->u1_tran_form8x8;
 2590|    940|        }
 2591|       |
 2592|       |        /* Read mb_qp_delta */
 2593|  19.2k|        if(u4_cbp)
  ------------------
  |  Branch (2593:12): [True: 6.38k, False: 12.8k]
  ------------------
 2594|  6.38k|        {
 2595|  6.38k|            WORD32 i_temp;
 2596|  6.38k|            UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
 2597|  6.38k|            UWORD32 u4_word, u4_ldz, u4_abs_val;
 2598|       |
 2599|       |            /***************************************************************/
 2600|       |            /* Find leading zeros in next 32 bits                          */
 2601|       |            /***************************************************************/
 2602|  6.38k|            NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
  ------------------
  |  |  150|  6.38k|#define     NEXTBITS_32(u4_word, u4_offset, pu4_bitstream)                  \
  |  |  151|  6.38k|{                                                                           \
  |  |  152|  6.38k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  153|  6.38k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  154|  6.38k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  155|  6.38k|                                                                            \
  |  |  156|  6.38k|    u4_word = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  157|  6.38k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (157:8): [True: 6.00k, False: 381]
  |  |  ------------------
  |  |  158|  6.38k|    u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));        \
  |  |  ------------------
  |  |  |  |   45|  6.00k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  159|  6.38k|}
  ------------------
 2603|  6.38k|            u4_ldz = CLZ(u4_word);
 2604|       |
 2605|       |            /* Flush the ps_bitstrm */
 2606|  6.38k|            u4_bitstream_offset += (u4_ldz + 1);
 2607|       |
 2608|       |            /* Read the suffix from the ps_bitstrm */
 2609|  6.38k|            u4_word = 0;
 2610|  6.38k|            if(u4_ldz) GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
  ------------------
  |  |  120|  3.94k|#define     GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits)          \
  |  |  121|  3.94k|{                                                                           \
  |  |  122|  3.94k|    UWORD32 *pu4_buf =  (pu4_bitstream);                                    \
  |  |  123|  3.94k|    UWORD32 u4_word_off = ((u4_offset) >> 5);                               \
  |  |  124|  3.94k|    UWORD32 u4_bit_off = (u4_offset) & 0x1F;                                \
  |  |  125|  3.94k|    u4_code = pu4_buf[u4_word_off++] << u4_bit_off;                         \
  |  |  126|  3.94k|                                                                            \
  |  |  127|  3.94k|    if(u4_bit_off)                                                          \
  |  |  ------------------
  |  |  |  Branch (127:8): [True: 3.58k, False: 353]
  |  |  ------------------
  |  |  128|  3.94k|        u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off));    \
  |  |  ------------------
  |  |  |  |   45|  3.58k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  129|  3.94k|    u4_code = u4_code >> (INT_IN_BITS - u4_no_bits);                        \
  |  |  ------------------
  |  |  |  |   45|  3.94k|#define INT_IN_BITS         32
  |  |  ------------------
  |  |  130|  3.94k|    (u4_offset) += u4_no_bits;                                              \
  |  |  131|  3.94k|}                                                                           \
  ------------------
  |  Branch (2610:16): [True: 3.94k, False: 2.44k]
  ------------------
 2611|       |
 2612|  6.38k|            *pu4_bitstrm_ofst = u4_bitstream_offset;
 2613|  6.38k|            u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1;
 2614|       |
 2615|  6.38k|            if(u4_word & 0x1)
  ------------------
  |  Branch (2615:16): [True: 1.91k, False: 4.47k]
  ------------------
 2616|  1.91k|                i_temp = (-(WORD32) u4_abs_val);
 2617|  4.47k|            else
 2618|  4.47k|                i_temp = (u4_abs_val);
 2619|       |
 2620|  6.38k|            if((i_temp < -26) || (i_temp > 25)) return ERROR_INV_RANGE_QP_T;
  ------------------
  |  Branch (2620:16): [True: 88, False: 6.29k]
  |  Branch (2620:34): [True: 139, False: 6.15k]
  ------------------
 2621|       |            // inlinined ih264d_sev
 2622|       |
 2623|  6.15k|            COPYTHECONTEXT("mb_qp_delta", i_temp);
 2624|  6.15k|            if(i_temp)
  ------------------
  |  Branch (2624:16): [True: 3.71k, False: 2.44k]
  ------------------
 2625|  3.71k|            {
 2626|  3.71k|                ret = ih264d_update_qp(ps_dec, (WORD8) i_temp);
 2627|  3.71k|                if(ret != OK) return ret;
  ------------------
  |  |  114|  3.71k|#define OK        0
  ------------------
  |  Branch (2627:20): [True: 0, False: 3.71k]
  ------------------
 2628|  3.71k|            }
 2629|       |
 2630|       |            /*change to support start to end idx*/
 2631|  6.15k|            ret = ih264d_parse_residual4x4_cavlc(ps_dec, ps_cur_mb_info, 0);
 2632|  6.15k|            if(ret != OK) return ret;
  ------------------
  |  |  114|  6.15k|#define OK        0
  ------------------
  |  Branch (2632:16): [True: 230, False: 5.92k]
  ------------------
 2633|  5.92k|            if(EXCEED_OFFSET(ps_bitstrm)) return ERROR_EOB_TERMINATE_T;
  ------------------
  |  |   93|  5.92k|  (ps_bitstrm->u4_ofst > ps_bitstrm->u4_max_ofst)
  |  |  ------------------
  |  |  |  Branch (93:3): [True: 226, False: 5.69k]
  |  |  ------------------
  ------------------
 2634|  5.92k|        }
 2635|  12.8k|        else
 2636|  12.8k|        {
 2637|  12.8k|            ih264d_update_nnz_for_skipmb(ps_dec, ps_cur_mb_info, CAVLC);
  ------------------
  |  |  338|  12.8k|#define CAVLC  0
  ------------------
 2638|  12.8k|        }
 2639|  19.2k|    }
 2640|       |
 2641|  18.5k|    return OK;
  ------------------
  |  |  114|  18.5k|#define OK        0
  ------------------
 2642|  19.6k|}
isvcd_mark_err_slice_skip:
 2657|  94.5k|{
 2658|  94.5k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
 2659|  94.5k|    WORD32 i2_cur_mb_addr;
 2660|  94.5k|    UWORD32 u1_num_mbs;
 2661|  94.5k|    UWORD32 u1_mb_idx = ps_dec->u4_mb_idx;
 2662|  94.5k|    UWORD32 i2_mb_skip_run;
 2663|  94.5k|    UWORD32 u1_num_mbs_next, u1_end_of_row;
 2664|  94.5k|    const UWORD32 i2_pic_wdin_mbs = ps_dec->u2_frm_wd_in_mbs;
 2665|  94.5k|    UWORD32 u1_slice_end;
 2666|  94.5k|    UWORD32 u1_tfr_n_mb;
 2667|  94.5k|    UWORD32 u1_decode_nmb;
 2668|  94.5k|    dec_slice_params_t *ps_slice = ps_dec->ps_cur_slice;
 2669|  94.5k|    deblk_mb_t *ps_cur_deblk_mb;
 2670|  94.5k|    dec_mb_info_t *ps_cur_mb_info;
 2671|  94.5k|    dec_svc_mb_info_t *ps_svc_cur_mb_info;
 2672|  94.5k|    parse_pmbarams_t *ps_parse_mb_data;
 2673|  94.5k|    UWORD32 u1_inter_mb_type;
 2674|  94.5k|    UWORD32 u1_deblk_mb_type;
 2675|  94.5k|    UWORD32 u1_mbaff;
 2676|  94.5k|    parse_part_params_t *ps_part_info;
 2677|  94.5k|    WORD32 ret;
 2678|  94.5k|    UNUSED(u1_is_idr_slice);
  ------------------
  |  |   45|  94.5k|#define UNUSED(x) ((void)(x))
  ------------------
 2679|       |
 2680|  94.5k|    ps_svc_lyr_dec->u1_error_in_cur_frame = 1;
 2681|  94.5k|    if(ps_dec->ps_dec_err_status->u1_err_flag & REJECT_CUR_PIC)
  ------------------
  |  |  602|  94.5k|#define REJECT_CUR_PIC    (0x01)
  ------------------
  |  Branch (2681:8): [True: 0, False: 94.5k]
  ------------------
 2682|      0|    {
 2683|      0|        ih264d_err_pic_dispbuf_mgr(ps_dec);
 2684|      0|        return 0;
 2685|      0|    }
 2686|       |
 2687|  94.5k|    if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag && (num_mb_skip & 1))
  ------------------
  |  Branch (2687:8): [True: 0, False: 94.5k]
  |  Branch (2687:53): [True: 0, False: 0]
  ------------------
 2688|      0|    {
 2689|      0|        num_mb_skip++;
 2690|      0|    }
 2691|  94.5k|    ps_dec->ps_dpb_cmds->u1_long_term_reference_flag = 0;
 2692|  94.5k|    if(prev_slice_err == 1)
  ------------------
  |  Branch (2692:8): [True: 49.5k, False: 44.9k]
  ------------------
 2693|  49.5k|    {
 2694|       |        /* first slice - missing/header corruption */
 2695|  49.5k|        ps_dec->ps_cur_slice->u2_frame_num = u2_frame_num;
 2696|  49.5k|        {
 2697|  49.5k|            WORD32 i, j, poc = 0;
 2698|  49.5k|            ps_dec->ps_cur_slice->u2_first_mb_in_slice = 0;
 2699|  49.5k|            ps_dec->pf_mvpred = ih264d_mvpred_nonmbaff;
 2700|  49.5k|            ps_dec->p_form_mb_part_info = ih264d_form_mb_part_info_bp;
 2701|  49.5k|            ps_dec->p_motion_compensate = ih264d_motion_compensate_bp;
 2702|       |
 2703|  49.5k|            if(ps_dec->ps_cur_pic != NULL)
  ------------------
  |  Branch (2703:16): [True: 49.5k, False: 0]
  ------------------
 2704|  49.5k|            {
 2705|  49.5k|                poc = ps_dec->ps_cur_pic->i4_poc;
 2706|  49.5k|                if(poc <= INT32_MAX - 2) poc += 2;
  ------------------
  |  Branch (2706:20): [True: 49.2k, False: 224]
  ------------------
 2707|  49.5k|            }
 2708|       |
 2709|  49.5k|            j = -1;
 2710|  81.8k|            for(i = 0; i < MAX_NUM_PIC_PARAMS; i++)
  ------------------
  |  |  524|  81.8k|#define MAX_NUM_PIC_PARAMS 256
  ------------------
  |  Branch (2710:24): [True: 81.8k, False: 0]
  ------------------
 2711|  81.8k|            {
 2712|  81.8k|                if(ps_dec->ps_pps[i].u1_is_valid == TRUE)
  ------------------
  |  |  591|  81.8k|#define TRUE    1
  ------------------
  |  Branch (2712:20): [True: 49.7k, False: 32.1k]
  ------------------
 2713|  49.7k|                {
 2714|  49.7k|                    if(ps_dec->ps_pps[i].ps_sps->u1_is_valid == TRUE)
  ------------------
  |  |  591|  49.7k|#define TRUE    1
  ------------------
  |  Branch (2714:24): [True: 49.5k, False: 224]
  ------------------
 2715|  49.5k|                    {
 2716|  49.5k|                        j = i;
 2717|  49.5k|                        break;
 2718|  49.5k|                    }
 2719|  49.7k|                }
 2720|  81.8k|            }
 2721|       |
 2722|       |            // if valid SPS PPS is not found return error
 2723|  49.5k|            if(j == -1)
  ------------------
  |  Branch (2723:16): [True: 0, False: 49.5k]
  ------------------
 2724|      0|            {
 2725|      0|                return ERROR_INV_SLICE_HDR_T;
 2726|      0|            }
 2727|       |
 2728|       |            /* call ih264d_start_of_pic only if it was not called earlier*/
 2729|  49.5k|            if(ps_dec->u4_pic_buf_got == 0)
  ------------------
  |  Branch (2729:16): [True: 0, False: 49.5k]
  ------------------
 2730|      0|            {
 2731|       |                // initialize slice params required by isvcd_start_of_pic to valid values
 2732|      0|                ps_dec->ps_cur_slice->u1_slice_type = P_SLICE;
  ------------------
  |  |  368|      0|#define P_SLICE  0
  ------------------
 2733|      0|                ps_dec->ps_cur_slice->u1_nal_ref_idc = 1;
 2734|      0|                ps_dec->ps_cur_slice->u1_nal_unit_type = 1;
 2735|      0|                ret = isvcd_start_of_pic(ps_svc_lyr_dec, poc, ps_cur_poc,
 2736|      0|                                         ps_dec->ps_cur_slice->u2_frame_num, &ps_dec->ps_pps[j]);
 2737|       |
 2738|      0|                if(ret != OK)
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
  |  Branch (2738:20): [True: 0, False: 0]
  ------------------
 2739|      0|                {
 2740|      0|                    return ret;
 2741|      0|                }
 2742|       |                /*inter layer buffer intialization */
 2743|      0|                ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb =
 2744|      0|                    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_frm_start;
 2745|      0|                ps_svc_lyr_dec->ps_il_pred_mv_bank_buf_cur_mb =
 2746|      0|                    ps_svc_lyr_dec->ps_il_pred_mv_bank_buf_base;
 2747|      0|            }
 2748|       |
 2749|  49.5k|            ps_dec->ps_ref_pic_buf_lx[0][0]->u1_pic_buf_id = 0;
 2750|  49.5k|            ps_dec->u4_output_present = 0;
 2751|       |
 2752|  49.5k|            {
 2753|  49.5k|                ih264d_get_next_display_field(ps_dec, ps_dec->ps_out_buffer, &(ps_dec->s_disp_op));
 2754|       |                /* If error code is non-zero then there is no buffer available for
 2755|       |                display, hence avoid format conversion */
 2756|       |
 2757|  49.5k|                if(0 != ps_dec->s_disp_op.u4_error_code)
  ------------------
  |  Branch (2757:20): [True: 46.5k, False: 3.00k]
  ------------------
 2758|  46.5k|                {
 2759|  46.5k|                    ps_dec->u4_output_present = 0;
 2760|  46.5k|                    ps_dec->u4_fmt_conv_cur_row = ps_dec->s_disp_frame_info.u4_y_ht;
 2761|  46.5k|                }
 2762|  3.00k|                else
 2763|  3.00k|                    ps_dec->u4_output_present = 1;
 2764|  49.5k|            }
 2765|       |
 2766|  49.5k|            if(ps_dec->u1_separate_parse == 1)
  ------------------
  |  Branch (2766:16): [True: 28.4k, False: 21.0k]
  ------------------
 2767|  28.4k|            {
 2768|       |#ifdef KEEP_THREADS_ACTIVE
 2769|       |                ret = ithread_mutex_lock(ps_dec->apv_proc_start_mutex[0]);
 2770|       |                RETURN_IF((ret != IV_SUCCESS), ret);
 2771|       |
 2772|       |                ps_dec->ai4_process_start[0] = PROC_START;
 2773|       |                ret = ithread_cond_signal(ps_dec->apv_proc_start_condition[0]);
 2774|       |                RETURN_IF((ret != IV_SUCCESS), ret);
 2775|       |
 2776|       |                ret = ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[0]);
 2777|       |                RETURN_IF((ret != IV_SUCCESS), ret);
 2778|       |#endif
 2779|       |#ifdef KEEP_THREADS_ACTIVE
 2780|       |                if(ps_dec->u4_bs_deblk_thread_created)
 2781|       |                {
 2782|       |                    ret = ithread_mutex_lock(ps_dec->apv_proc_start_mutex[1]);
 2783|       |                    RETURN_IF((ret != IV_SUCCESS), ret);
 2784|       |
 2785|       |                    ps_dec->ai4_process_start[1] = PROC_START;
 2786|       |                    ret = ithread_cond_signal(ps_dec->apv_proc_start_condition[1]);
 2787|       |                    RETURN_IF((ret != IV_SUCCESS), ret);
 2788|       |
 2789|       |                    ret = ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[1]);
 2790|       |                    RETURN_IF((ret != IV_SUCCESS), ret);
 2791|       |                }
 2792|       |#endif
 2793|  28.4k|            }
 2794|  49.5k|        }
 2795|  49.5k|    }
 2796|  44.9k|    else
 2797|  44.9k|    {
 2798|       |        // Middle / last slice
 2799|  44.9k|        dec_slice_struct_t *ps_parse_cur_slice;
 2800|  44.9k|        ps_parse_cur_slice = ps_dec->ps_dec_slice_buf + ps_dec->u2_cur_slice_num;
 2801|       |
 2802|  44.9k|        if(ps_dec->u1_slice_header_done && ps_parse_cur_slice == ps_dec->ps_parse_cur_slice)
  ------------------
  |  Branch (2802:12): [True: 44.7k, False: 212]
  |  Branch (2802:44): [True: 44.7k, False: 0]
  ------------------
 2803|  44.7k|        {
 2804|       |            // Slice data corrupted
 2805|       |            // in the case of mbaff, conceal from the even mb.
 2806|  44.7k|            if((ps_dec->ps_cur_slice->u1_mbaff_frame_flag) && (ps_dec->u4_num_mbs_cur_nmb & 1))
  ------------------
  |  Branch (2806:16): [True: 0, False: 44.7k]
  |  Branch (2806:63): [True: 0, False: 0]
  ------------------
 2807|      0|            {
 2808|      0|                ps_dec->u4_num_mbs_cur_nmb = ps_dec->u4_num_mbs_cur_nmb - 1;
 2809|      0|                ps_dec->u4_cur_mb_addr--;
 2810|      0|            }
 2811|       |
 2812|  44.7k|            u1_num_mbs = ps_dec->u4_num_mbs_cur_nmb;
 2813|  44.7k|            if(u1_num_mbs)
  ------------------
  |  Branch (2813:16): [True: 10.6k, False: 34.1k]
  ------------------
 2814|  10.6k|            {
 2815|  10.6k|                ps_cur_mb_info = ps_dec->ps_nmb_info + u1_num_mbs - 1;
 2816|  10.6k|            }
 2817|  34.1k|            else
 2818|  34.1k|            {
 2819|  34.1k|                if(ps_dec->u1_separate_parse)
  ------------------
  |  Branch (2819:20): [True: 9.54k, False: 24.5k]
  ------------------
 2820|  9.54k|                {
 2821|  9.54k|                    ps_cur_mb_info = ps_dec->ps_nmb_info;
 2822|  9.54k|                }
 2823|  24.5k|                else
 2824|  24.5k|                {
 2825|  24.5k|                    ps_cur_mb_info = ps_dec->ps_nmb_info + ps_dec->u4_num_mbs_prev_nmb - 1;
 2826|  24.5k|                }
 2827|  34.1k|            }
 2828|       |
 2829|  44.7k|            ps_dec->u2_mby = ps_cur_mb_info->u2_mby;
 2830|  44.7k|            ps_dec->u2_mbx = ps_cur_mb_info->u2_mbx;
 2831|  44.7k|            ps_dec->u1_mb_ngbr_availablity = ps_cur_mb_info->u1_mb_ngbr_availablity;
 2832|       |
 2833|  44.7k|            if(u1_num_mbs)
  ------------------
  |  Branch (2833:16): [True: 10.6k, False: 34.1k]
  ------------------
 2834|  10.6k|            {
 2835|       |                // Going back 1 mb
 2836|  10.6k|                ps_dec->pv_parse_tu_coeff_data = ps_dec->pv_prev_mb_parse_tu_coeff_data;
 2837|  10.6k|                ps_dec->u4_cur_mb_addr--;
 2838|  10.6k|                ps_dec->i4_submb_ofst -= SUB_BLK_SIZE;
  ------------------
  |  |  562|  10.6k|#define SUB_BLK_SIZE                  ((SUB_BLK_WIDTH) * (SUB_BLK_WIDTH))
  |  |  ------------------
  |  |  |  |  560|  10.6k|#define SUB_BLK_WIDTH                 4
  |  |  ------------------
  |  |               #define SUB_BLK_SIZE                  ((SUB_BLK_WIDTH) * (SUB_BLK_WIDTH))
  |  |  ------------------
  |  |  |  |  560|  10.6k|#define SUB_BLK_WIDTH                 4
  |  |  ------------------
  ------------------
 2839|       |
 2840|       |                // Parse/decode N-MB left unparsed
 2841|  10.6k|                if(ps_dec->u1_pr_sl_type == P_SLICE || ps_dec->u1_pr_sl_type == B_SLICE)
  ------------------
  |  |  368|  21.3k|#define P_SLICE  0
  ------------------
                              if(ps_dec->u1_pr_sl_type == P_SLICE || ps_dec->u1_pr_sl_type == B_SLICE)
  ------------------
  |  |  369|  6.32k|#define B_SLICE  1
  ------------------
  |  Branch (2841:20): [True: 4.35k, False: 6.32k]
  |  Branch (2841:56): [True: 4.33k, False: 1.99k]
  ------------------
 2842|  8.68k|                {
 2843|  8.68k|                    if((ps_dec->i4_submb_ofst - ((WORD32) ((u1_num_mbs - u1_mb_idx) << 4))) < 0)
  ------------------
  |  Branch (2843:24): [True: 0, False: 8.68k]
  ------------------
 2844|      0|                    {
 2845|      0|                        ps_dec->i4_submb_ofst = ((u1_num_mbs - u1_mb_idx) << 4);
 2846|      0|                    }
 2847|       |
 2848|  8.68k|                    ret = ps_dec->pf_mvpred_ref_tfr_nby2mb(ps_dec, u1_mb_idx, u1_num_mbs);
 2849|  8.68k|                    ps_dec->ps_part = ps_dec->ps_parse_part_params;
 2850|  8.68k|                    if(ret != OK) return ret;
  ------------------
  |  |  114|  8.68k|#define OK        0
  ------------------
  |  Branch (2850:24): [True: 188, False: 8.49k]
  ------------------
 2851|  8.68k|                }
 2852|       |
 2853|  10.4k|                u1_num_mbs_next = i2_pic_wdin_mbs - ps_dec->u2_mbx - 1;
 2854|  10.4k|                u1_end_of_row =
 2855|  10.4k|                    (!u1_num_mbs_next) &&
  ------------------
  |  Branch (2855:21): [True: 0, False: 10.4k]
  ------------------
 2856|      0|                    (!(ps_dec->ps_cur_slice->u1_mbaff_frame_flag && (u1_num_mbs & 0x01)));
  ------------------
  |  Branch (2856:24): [True: 0, False: 0]
  |  Branch (2856:69): [True: 0, False: 0]
  ------------------
 2857|  10.4k|                u1_slice_end = 1;
 2858|  10.4k|                u1_tfr_n_mb = 1;
 2859|  10.4k|                ps_cur_mb_info->u1_end_of_slice = u1_slice_end;
 2860|       |
 2861|  10.4k|                if(ps_dec->u1_separate_parse)
  ------------------
  |  Branch (2861:20): [True: 5.79k, False: 4.70k]
  ------------------
 2862|  5.79k|                {
 2863|  5.79k|                    ih264d_parse_tfr_nmb(ps_dec, u1_mb_idx, u1_num_mbs, u1_num_mbs_next,
 2864|  5.79k|                                         u1_tfr_n_mb, u1_end_of_row);
 2865|  5.79k|                    ps_dec->ps_nmb_info += u1_num_mbs;
 2866|  5.79k|                    ps_svc_lyr_dec->ps_svc_nmb_info += u1_num_mbs;
 2867|  5.79k|                }
 2868|  4.70k|                else
 2869|  4.70k|                {
 2870|  4.70k|                    ih264d_decode_recon_tfr_nmb(ps_dec, u1_mb_idx, u1_num_mbs, u1_num_mbs_next,
 2871|  4.70k|                                                u1_tfr_n_mb, u1_end_of_row);
 2872|  4.70k|                }
 2873|  10.4k|                ps_dec->u4_total_mbs_coded += u1_num_mbs;
 2874|  10.4k|                ps_dec->u4_mb_idx = 0;
 2875|  10.4k|                ps_dec->u4_num_mbs_cur_nmb = 0;
 2876|  10.4k|            }
 2877|       |
 2878|  44.5k|            if(ps_dec->u4_total_mbs_coded >= ps_dec->u2_frm_ht_in_mbs * ps_dec->u2_frm_wd_in_mbs)
  ------------------
  |  Branch (2878:16): [True: 0, False: 44.5k]
  ------------------
 2879|      0|            {
 2880|      0|                ps_dec->u1_pic_decode_done = 1;
 2881|      0|                return 0;
 2882|      0|            }
 2883|       |
 2884|       |            /* Inserting new slice only if the current slice has atleast 1 MB*/
 2885|  44.5k|            if(ps_dec->ps_parse_cur_slice->u4_first_mb_in_slice <
  ------------------
  |  Branch (2885:16): [True: 44.3k, False: 197]
  ------------------
 2886|  44.5k|               (UWORD32) (ps_dec->u4_total_mbs_coded >> ps_slice->u1_mbaff_frame_flag))
 2887|  44.3k|            {
 2888|  44.3k|                ps_dec->i2_prev_slice_mbx = ps_dec->u2_mbx;
 2889|  44.3k|                ps_dec->i2_prev_slice_mby = ps_dec->u2_mby;
 2890|  44.3k|                ps_dec->u2_cur_slice_num++;
 2891|  44.3k|                ps_dec->ps_parse_cur_slice++;
 2892|  44.3k|            }
 2893|  44.5k|        }
 2894|    212|        else
 2895|    212|        {
 2896|       |            // Slice missing / header corrupted
 2897|    212|            ps_dec->ps_parse_cur_slice = ps_dec->ps_dec_slice_buf + ps_dec->u2_cur_slice_num;
 2898|    212|        }
 2899|  44.9k|    }
 2900|       |
 2901|       |    /******************************************************/
 2902|       |    /* Initializations to new slice                       */
 2903|       |    /******************************************************/
 2904|  94.3k|    {
 2905|  94.3k|        WORD32 num_entries;
 2906|  94.3k|        WORD32 size;
 2907|  94.3k|        UWORD8 *pu1_buf;
 2908|       |
 2909|  94.3k|        num_entries = MAX_FRAMES;
  ------------------
  |  |  600|  94.3k|#define MAX_FRAMES              16
  ------------------
 2910|  94.3k|        if((1 >= ps_dec->ps_cur_sps->u1_num_ref_frames) && (0 == ps_dec->i4_display_delay))
  ------------------
  |  Branch (2910:12): [True: 68.9k, False: 25.3k]
  |  Branch (2910:60): [True: 0, False: 68.9k]
  ------------------
 2911|      0|        {
 2912|      0|            num_entries = 1;
 2913|      0|        }
 2914|  94.3k|        num_entries = ((2 * num_entries) + 1);
 2915|  94.3k|        num_entries *= 2;
 2916|       |
 2917|  94.3k|        size = num_entries * sizeof(void *);
 2918|  94.3k|        size += PAD_MAP_IDX_POC * sizeof(void *);
  ------------------
  |  |  100|  94.3k|#define PAD_MAP_IDX_POC             (1)
  ------------------
 2919|       |
 2920|  94.3k|        pu1_buf = (UWORD8 *) ps_dec->pv_map_ref_idx_to_poc_buf;
 2921|  94.3k|        pu1_buf += size * ps_dec->u2_cur_slice_num;
 2922|  94.3k|        ps_dec->ps_parse_cur_slice->ppv_map_ref_idx_to_poc = (volatile void **) pu1_buf;
 2923|  94.3k|    }
 2924|  94.3k|    u1_mbaff = ps_slice->u1_mbaff_frame_flag;
 2925|  94.3k|    ps_dec->ps_cur_slice->u2_first_mb_in_slice = ps_dec->u4_total_mbs_coded >> u1_mbaff;
 2926|  94.3k|    ps_dec->ps_cur_slice->i1_slice_alpha_c0_offset = 0;
 2927|  94.3k|    ps_dec->ps_cur_slice->i1_slice_beta_offset = 0;
 2928|       |
 2929|  94.3k|    if(ps_dec->ps_cur_slice->u1_field_pic_flag)
  ------------------
  |  Branch (2929:8): [True: 0, False: 94.3k]
  ------------------
 2930|      0|        ps_dec->u2_prv_frame_num = ps_dec->ps_cur_slice->u2_frame_num;
 2931|       |
 2932|  94.3k|    ps_dec->ps_parse_cur_slice->u4_first_mb_in_slice = ps_dec->u4_total_mbs_coded >> u1_mbaff;
 2933|  94.3k|    ps_dec->ps_parse_cur_slice->u2_log2Y_crwd = ps_dec->ps_cur_slice->u2_log2Y_crwd;
 2934|       |
 2935|  94.3k|    if(ps_dec->u1_separate_parse)
  ------------------
  |  Branch (2935:8): [True: 43.8k, False: 50.5k]
  ------------------
 2936|  43.8k|    {
 2937|  43.8k|        ps_dec->ps_parse_cur_slice->pv_tu_coeff_data_start = ps_dec->pv_parse_tu_coeff_data;
 2938|  43.8k|    }
 2939|  50.5k|    else
 2940|  50.5k|    {
 2941|  50.5k|        ps_dec->pv_proc_tu_coeff_data = ps_dec->pv_parse_tu_coeff_data;
 2942|  50.5k|    }
 2943|       |
 2944|       |    /******************************************************/
 2945|       |    /* Initializations specific to P slice                */
 2946|       |    /******************************************************/
 2947|  94.3k|    u1_inter_mb_type = P_MB;
  ------------------
  |  |  419|  94.3k|#define P_MB        2
  ------------------
 2948|  94.3k|    u1_deblk_mb_type = D_INTER_MB;
  ------------------
  |  |  381|  94.3k|#define D_INTER_MB        0
  ------------------
 2949|       |
 2950|  94.3k|    ps_dec->ps_cur_slice->u1_slice_type = P_SLICE;
  ------------------
  |  |  368|  94.3k|#define P_SLICE  0
  ------------------
 2951|  94.3k|    ps_dec->ps_parse_cur_slice->slice_type = P_SLICE;
  ------------------
  |  |  368|  94.3k|#define P_SLICE  0
  ------------------
 2952|  94.3k|    ps_dec->pf_mvpred_ref_tfr_nby2mb = ih264d_mv_pred_ref_tfr_nby2_pmb;
 2953|  94.3k|    ps_dec->ps_part = ps_dec->ps_parse_part_params;
 2954|  94.3k|    ps_dec->u2_mbx =
 2955|  94.3k|        (MOD(ps_dec->ps_cur_slice->u2_first_mb_in_slice - 1, ps_dec->u2_frm_wd_in_mbs));
  ------------------
  |  |   64|  94.3k|#define MOD(x,y) ((x)%(y))
  ------------------
 2956|  94.3k|    ps_dec->u2_mby =
 2957|  94.3k|        (DIV(ps_dec->ps_cur_slice->u2_first_mb_in_slice - 1, ps_dec->u2_frm_wd_in_mbs));
  ------------------
  |  |   65|  94.3k|#define DIV(x,y) ((x)/(y))
  ------------------
 2958|  94.3k|    ps_dec->u2_mby <<= u1_mbaff;
 2959|       |
 2960|       |    /******************************************************/
 2961|       |    /* Parsing / decoding the slice                       */
 2962|       |    /******************************************************/
 2963|  94.3k|    ret = isvcd_parse_interlayer_resamp_func_init(ps_svc_lyr_dec,
 2964|  94.3k|                                                  ps_dec->ps_cur_slice->u2_first_mb_in_slice);
 2965|  94.3k|    ps_dec->u1_slice_header_done = 2;
 2966|       |
 2967|  94.3k|    ps_dec->u1_qp = ps_slice->u1_slice_qp;
 2968|  94.3k|    ih264d_update_qp(ps_dec, 0);
 2969|  94.3k|    u1_mb_idx = ps_dec->u4_mb_idx;
 2970|  94.3k|    ps_parse_mb_data = ps_dec->ps_parse_mb_data;
 2971|  94.3k|    u1_num_mbs = u1_mb_idx;
 2972|  94.3k|    u1_slice_end = 0;
 2973|  94.3k|    u1_tfr_n_mb = 0;
 2974|  94.3k|    u1_decode_nmb = 0;
 2975|  94.3k|    i2_cur_mb_addr = ps_dec->u4_total_mbs_coded;
 2976|  94.3k|    i2_mb_skip_run = num_mb_skip;
 2977|  94.3k|    if(0 == ps_dec->u4_total_mbs_coded)
  ------------------
  |  Branch (2977:8): [True: 49.5k, False: 44.8k]
  ------------------
 2978|  49.5k|    {
 2979|  49.5k|        ps_dec->ps_cur_mb_row = ps_dec->ps_nbr_mb_row;  //[0];
 2980|       |        // Increment by 2 ,so that left mb (mbaff decrements by 2)  will always be valid
 2981|  49.5k|        ps_dec->ps_cur_mb_row += 2;
 2982|  49.5k|        ps_dec->ps_top_mb_row = ps_dec->ps_nbr_mb_row;
 2983|  49.5k|        ps_dec->ps_top_mb_row +=
 2984|  49.5k|            ((ps_dec->u2_frm_wd_in_mbs + 2) << (1 - ps_dec->ps_cur_sps->u1_frame_mbs_only_flag));
 2985|       |        // Increment by 2 ,so that left mb (mbaff decrements by 2)  will always be valid
 2986|  49.5k|        ps_dec->ps_top_mb_row += 2;
 2987|       |
 2988|       |        /* CHANGED CODE */
 2989|  49.5k|        ps_dec->ps_mv_cur = ps_dec->s_cur_pic.ps_mv;
 2990|  49.5k|        ps_dec->ps_mv_left = ps_dec->s_cur_pic.ps_mv;
 2991|  49.5k|        ps_dec->ps_mv_top = ps_dec->ps_mv_top_p[0];
 2992|  49.5k|        ps_dec->ps_deblk_mbn = ps_dec->ps_deblk_pic;
 2993|  49.5k|        ps_dec->ps_mv = ps_dec->s_cur_pic.ps_mv;
 2994|  49.5k|        ps_dec->ps_mv_bank_cur = ps_dec->s_cur_pic.ps_mv;
 2995|  49.5k|        ps_dec->pu1_col_zero_flag = ps_dec->s_cur_pic.pu1_col_zero_flag;
 2996|  49.5k|        ps_dec->ps_part = ps_dec->ps_parse_part_params;
 2997|  49.5k|        ps_dec->pf_mvpred_ref_tfr_nby2mb = isvcd_mv_pred_ref_tfr_nby2_epmb;
 2998|  49.5k|    }
 2999|  10.5M|    while(!u1_slice_end)
  ------------------
  |  Branch (2999:11): [True: 10.4M, False: 83.8k]
  ------------------
 3000|  10.4M|    {
 3001|  10.4M|        if(i2_cur_mb_addr > ps_dec->ps_cur_sps->u4_max_mb_addr) break;
  ------------------
  |  Branch (3001:12): [True: 10.4k, False: 10.4M]
  ------------------
 3002|       |
 3003|  10.4M|        ps_svc_cur_mb_info = ps_svc_lyr_dec->ps_svc_nmb_info + u1_num_mbs;
 3004|  10.4M|        ps_cur_mb_info = ps_dec->ps_nmb_info + u1_num_mbs;
 3005|  10.4M|        ps_dec->u4_num_mbs_cur_nmb = u1_num_mbs;
 3006|  10.4M|        ps_cur_mb_info->u1_Mux = 0;
 3007|  10.4M|        ps_dec->u4_num_pmbair = (u1_num_mbs >> u1_mbaff);
 3008|  10.4M|        ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + u1_num_mbs;
 3009|  10.4M|        ps_cur_mb_info->u1_end_of_slice = 0;
 3010|       |
 3011|       |        /* Storing Default partition info */
 3012|  10.4M|        ps_parse_mb_data->u1_num_part = 1;
 3013|  10.4M|        ps_parse_mb_data->u4_isI_mb = 0;
 3014|       |
 3015|       |        /**************************************************************/
 3016|       |        /* Get the required information for decoding of MB            */
 3017|       |        /**************************************************************/
 3018|       |        /* mb_x, mb_y, neighbor availablity, */
 3019|  10.4M|        if(u1_mbaff)
  ------------------
  |  Branch (3019:12): [True: 0, False: 10.4M]
  ------------------
 3020|      0|            ih264d_get_mb_info_cavlc_mbaff(ps_dec, i2_cur_mb_addr, ps_cur_mb_info, i2_mb_skip_run);
 3021|  10.4M|        else
 3022|  10.4M|            isvcd_get_mb_info_cavlc_nonmbaff(ps_dec, i2_cur_mb_addr, ps_cur_mb_info,
 3023|  10.4M|                                             i2_mb_skip_run);
 3024|       |
 3025|  10.4M|        {
 3026|  10.4M|            UWORD16 *pu2_res_luma_csbp;
 3027|       |
 3028|       |            /*Pointer assignment for Residual NNZ */
 3029|  10.4M|            pu2_res_luma_csbp = ps_svc_lyr_dec->pu2_frm_res_luma_csbp + ps_cur_mb_info->u2_mbx;
 3030|  10.4M|            pu2_res_luma_csbp +=
 3031|  10.4M|                ps_cur_mb_info->u2_mby * ps_svc_lyr_dec->i4_frm_res_luma_csbp_stride;
 3032|  10.4M|            *pu2_res_luma_csbp = 0;
 3033|  10.4M|            ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb =
 3034|  10.4M|                ps_svc_lyr_dec->ps_inter_lyr_mb_prms_frm_start + ps_cur_mb_info->u2_mbx +
 3035|  10.4M|                (ps_svc_lyr_dec->u2_inter_lyr_mb_prms_stride * (ps_cur_mb_info->u2_mby));
 3036|  10.4M|            ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->i1_mb_mode = SVC_INTER_MB;
  ------------------
  |  |  114|  10.4M|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
 3037|  10.4M|            ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->i1_tx_size =
 3038|  10.4M|                ps_cur_mb_info->u1_tran_form8x8;
 3039|  10.4M|            ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->u2_luma_nnz = 0;
 3040|  10.4M|            ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->u1_chroma_nnz = 0;
 3041|  10.4M|        }
 3042|       |
 3043|       |        /* Set the deblocking parameters for this MB */
 3044|  10.4M|        if(ps_dec->u4_app_disable_deblk_frm == 0)
  ------------------
  |  Branch (3044:12): [True: 10.4M, False: 0]
  ------------------
 3045|  10.4M|        {
 3046|  10.4M|            ih264d_set_deblocking_parameters(ps_cur_deblk_mb, ps_slice,
 3047|  10.4M|                                             ps_dec->u1_mb_ngbr_availablity,
 3048|  10.4M|                                             ps_dec->u1_cur_mb_fld_dec_flag);
 3049|  10.4M|        }
 3050|       |
 3051|       |        /* Set appropriate flags in ps_cur_mb_info and ps_dec */
 3052|  10.4M|        ps_dec->i1_prev_mb_qp_delta = 0;
 3053|  10.4M|        ps_dec->u1_sub_mb_num = 0;
 3054|  10.4M|        ps_cur_mb_info->u1_mb_type = MB_SKIP;
  ------------------
  |  |   59|  10.4M|#define MB_SKIP 255
  ------------------
 3055|  10.4M|        ps_cur_mb_info->u1_mb_mc_mode = PRED_16x16;
  ------------------
  |  |  450|  10.4M|#define PRED_16x16  0
  ------------------
 3056|  10.4M|        ps_cur_mb_info->u1_cbp = 0;
 3057|       |
 3058|       |        /* set appropriat flags in svc cur MB info */
 3059|  10.4M|        ps_svc_cur_mb_info->u1_base_mode_flag = 0;
 3060|  10.4M|        ps_svc_cur_mb_info->u1_residual_prediction_flag = 0;
 3061|  10.4M|        ps_svc_cur_mb_info->u1_crop_window_flag = 0;
 3062|  10.4M|        ps_svc_cur_mb_info->au1_motion_pred_flag[0] = 0;
 3063|  10.4M|        ps_svc_cur_mb_info->au1_motion_pred_flag[1] = 0;
 3064|       |
 3065|       |        /* Storing Skip partition info */
 3066|  10.4M|        ps_part_info = ps_dec->ps_part;
 3067|  10.4M|        ps_part_info->u1_is_direct = PART_DIRECT_16x16;
  ------------------
  |  |  572|  10.4M|#define PART_DIRECT_16x16              2
  ------------------
 3068|  10.4M|        ps_part_info->u1_sub_mb_num = 0;
 3069|  10.4M|        ps_dec->ps_part++;
 3070|       |
 3071|       |        /* Update Nnzs */
 3072|  10.4M|        ih264d_update_nnz_for_skipmb(ps_dec, ps_cur_mb_info, CAVLC);
  ------------------
  |  |  338|  10.4M|#define CAVLC  0
  ------------------
 3073|       |
 3074|  10.4M|        ps_cur_mb_info->ps_curmb->u1_mb_type = u1_inter_mb_type;
 3075|  10.4M|        ps_cur_deblk_mb->u1_mb_type |= u1_deblk_mb_type;
 3076|       |
 3077|  10.4M|        if(ps_svc_lyr_dec->u1_layer_identifier != TARGET_LAYER)
  ------------------
  |  |  110|  10.4M|#define TARGET_LAYER 2
  ------------------
  |  Branch (3077:12): [True: 317k, False: 10.1M]
  ------------------
 3078|   317k|        {
 3079|   317k|            ps_cur_deblk_mb->u1_deblocking_mode = MB_DISABLE_FILTERING;
  ------------------
  |  |   70|   317k|#define MB_DISABLE_FILTERING          0x01
  ------------------
 3080|   317k|        }
 3081|       |
 3082|  10.4M|        i2_mb_skip_run--;
 3083|  10.4M|        ps_cur_deblk_mb->u1_mb_qp = ps_dec->u1_qp;
 3084|       |
 3085|  10.4M|        if(u1_mbaff)
  ------------------
  |  Branch (3085:12): [True: 0, False: 10.4M]
  ------------------
 3086|      0|        {
 3087|      0|            ih264d_update_mbaff_left_nnz(ps_dec, ps_cur_mb_info);
 3088|      0|        }
 3089|       |
 3090|       |        /**************************************************************/
 3091|       |        /* Get next Macroblock address                                */
 3092|       |        /**************************************************************/
 3093|  10.4M|        i2_cur_mb_addr++;
 3094|  10.4M|        u1_num_mbs++;
 3095|  10.4M|        ps_parse_mb_data++;
 3096|       |
 3097|       |        /****************************************************************/
 3098|       |        /* Check for End Of Row and other flags that determine when to  */
 3099|       |        /* do DMA setup for N/2-Mb, Decode for N-Mb, and Transfer for   */
 3100|       |        /* N-Mb                                                         */
 3101|       |        /****************************************************************/
 3102|  10.4M|        u1_num_mbs_next = i2_pic_wdin_mbs - ps_dec->u2_mbx - 1;
 3103|  10.4M|        u1_end_of_row = (!u1_num_mbs_next) && (!(u1_mbaff && (u1_num_mbs & 0x01)));
  ------------------
  |  Branch (3103:25): [True: 1.80M, False: 8.64M]
  |  Branch (3103:50): [True: 0, False: 1.80M]
  |  Branch (3103:62): [True: 0, False: 0]
  ------------------
 3104|  10.4M|        u1_slice_end = !i2_mb_skip_run;
 3105|  10.4M|        u1_tfr_n_mb = (u1_num_mbs == ps_dec->u4_recon_mb_grp) || u1_end_of_row || u1_slice_end;
  ------------------
  |  Branch (3105:23): [True: 1.76M, False: 8.67M]
  |  Branch (3105:66): [True: 36.2k, False: 8.64M]
  |  Branch (3105:83): [True: 106, False: 8.64M]
  ------------------
 3106|  10.4M|        u1_decode_nmb = u1_tfr_n_mb || u1_slice_end;
  ------------------
  |  Branch (3106:25): [True: 1.80M, False: 8.64M]
  |  Branch (3106:40): [True: 0, False: 8.64M]
  ------------------
 3107|  10.4M|        ps_cur_mb_info->u1_end_of_slice = u1_slice_end;
 3108|       |
 3109|  10.4M|        if(u1_decode_nmb)
  ------------------
  |  Branch (3109:12): [True: 1.80M, False: 8.64M]
  ------------------
 3110|  1.80M|        {
 3111|  1.80M|            if((ps_dec->i4_submb_ofst - ((WORD32) ((u1_num_mbs - u1_mb_idx) << 4))) < 0)
  ------------------
  |  Branch (3111:16): [True: 30.5k, False: 1.77M]
  ------------------
 3112|  30.5k|            {
 3113|  30.5k|                ps_dec->i4_submb_ofst = ((u1_num_mbs - u1_mb_idx) << 4);
 3114|  30.5k|            }
 3115|       |
 3116|  1.80M|            ps_dec->pf_mvpred_ref_tfr_nby2mb(ps_dec, u1_mb_idx, u1_num_mbs);
 3117|  1.80M|            ps_parse_mb_data = ps_dec->ps_parse_mb_data;
 3118|  1.80M|            ps_dec->ps_part = ps_dec->ps_parse_part_params;
 3119|       |
 3120|  1.80M|            if(ps_dec->u1_separate_parse)
  ------------------
  |  Branch (3120:16): [True: 931k, False: 872k]
  ------------------
 3121|   931k|            {
 3122|   931k|                ih264d_parse_tfr_nmb(ps_dec, u1_mb_idx, u1_num_mbs, u1_num_mbs_next, u1_tfr_n_mb,
 3123|   931k|                                     u1_end_of_row);
 3124|   931k|                ps_dec->ps_nmb_info += u1_num_mbs;
 3125|   931k|                ps_svc_lyr_dec->ps_svc_nmb_info += u1_num_mbs;
 3126|   931k|            }
 3127|   872k|            else
 3128|   872k|            {
 3129|   872k|                if(ps_svc_lyr_dec->u1_layer_identifier == TARGET_LAYER)
  ------------------
  |  |  110|   872k|#define TARGET_LAYER 2
  ------------------
  |  Branch (3129:20): [True: 778k, False: 93.9k]
  ------------------
 3130|   778k|                {
 3131|   778k|                    ih264d_decode_recon_tfr_nmb(ps_dec, u1_mb_idx, u1_num_mbs, u1_num_mbs_next,
 3132|   778k|                                                u1_tfr_n_mb, u1_end_of_row);
 3133|   778k|                }
 3134|  93.9k|                else
 3135|  93.9k|                {
 3136|  93.9k|                    isvcd_decode_recon_tfr_nmb_base_lyr(ps_svc_lyr_dec, u1_mb_idx, u1_num_mbs,
 3137|  93.9k|                                                        u1_num_mbs_next, u1_tfr_n_mb,
 3138|  93.9k|                                                        u1_end_of_row);
 3139|  93.9k|                }
 3140|   872k|            }
 3141|  1.80M|            ps_dec->u4_total_mbs_coded += u1_num_mbs;
 3142|  1.80M|            if(u1_tfr_n_mb) u1_num_mbs = 0;
  ------------------
  |  Branch (3142:16): [True: 1.80M, False: 0]
  ------------------
 3143|  1.80M|            u1_mb_idx = u1_num_mbs;
 3144|  1.80M|            ps_dec->u4_mb_idx = u1_num_mbs;
 3145|  1.80M|        }
 3146|  10.4M|    }
 3147|       |
 3148|  94.3k|    ps_dec->u4_num_mbs_cur_nmb = 0;
 3149|  94.3k|    ps_dec->ps_cur_slice->u4_mbs_in_slice =
 3150|  94.3k|        i2_cur_mb_addr - ps_dec->ps_parse_cur_slice->u4_first_mb_in_slice;
 3151|       |
 3152|  94.3k|    H264_DEC_DEBUG_PRINT("Mbs in slice: %d\n", ps_dec->ps_cur_slice->u4_mbs_in_slice);
  ------------------
  |  |   39|  94.3k|#define H264_DEC_DEBUG_PRINT(...) {}
  ------------------
 3153|       |
 3154|       |    /* incremented here only if first slice is inserted */
 3155|  94.3k|    if(ps_dec->u4_first_slice_in_pic != 0)
  ------------------
  |  Branch (3155:8): [True: 0, False: 94.3k]
  ------------------
 3156|      0|    {
 3157|      0|        ps_dec->ps_parse_cur_slice++;
 3158|      0|        ps_dec->u2_cur_slice_num++;
 3159|      0|    }
 3160|       |
 3161|  94.3k|    ps_dec->i2_prev_slice_mbx = ps_dec->u2_mbx;
 3162|  94.3k|    ps_dec->i2_prev_slice_mby = ps_dec->u2_mby;
 3163|       |
 3164|  94.3k|    if(ps_dec->u4_total_mbs_coded >= ps_dec->u2_frm_ht_in_mbs * ps_dec->u2_frm_wd_in_mbs)
  ------------------
  |  Branch (3164:8): [True: 94.1k, False: 202]
  ------------------
 3165|  94.1k|    {
 3166|  94.1k|        ps_dec->u1_pic_decode_done = 1;
 3167|  94.1k|    }
 3168|       |
 3169|  94.3k|    return 0;
 3170|  94.5k|}
isvcd_parse_interlayer_resamp_func_init:
 3184|   227k|{
 3185|   227k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
 3186|   227k|    dec_slice_params_t *ps_slice = ps_dec->ps_cur_slice;
 3187|   227k|    WORD32 ret = OK;
  ------------------
  |  |  114|   227k|#define OK        0
  ------------------
 3188|   227k|    if(ps_svc_lyr_dec->u1_res_init_done == 1)
  ------------------
  |  Branch (3188:8): [True: 94.3k, False: 133k]
  ------------------
 3189|  94.3k|        return ret;
 3190|       |
 3191|   133k|    if(TARGET_LAYER != ps_svc_lyr_dec->u1_layer_identifier)
  ------------------
  |  |  110|   133k|#define TARGET_LAYER 2
  ------------------
  |  Branch (3191:8): [True: 44.4k, False: 88.9k]
  ------------------
 3192|  44.4k|    {
 3193|  44.4k|        ps_slice->u1_disable_dblk_filter_idc = ps_svc_lyr_dec->u1_inter_lyr_disable_dblk_filter_idc;
 3194|  44.4k|        ps_slice->i1_slice_alpha_c0_offset = ps_svc_lyr_dec->i1_inter_lyr_slice_alpha_c0_offset;
 3195|  44.4k|        ps_slice->i1_slice_beta_offset = ps_svc_lyr_dec->i1_inter_lyr_slice_beta_offset;
 3196|  44.4k|    }
 3197|       |
 3198|   133k|    if(0 == u2_first_mb_in_slice)
  ------------------
  |  Branch (3198:8): [True: 133k, False: 0]
  ------------------
 3199|   133k|    {
 3200|   133k|        ret = isvcd_populate_res_prms(ps_svc_lyr_dec);
 3201|   133k|        if(ret != OK) return NOT_OK;
  ------------------
  |  |  114|   133k|#define OK        0
  ------------------
                      if(ret != OK) return NOT_OK;
  ------------------
  |  |  116|    508|#define NOT_OK    -1
  ------------------
  |  Branch (3201:12): [True: 508, False: 132k]
  ------------------
 3202|   132k|        isvcd_crop_wnd_flag_res_int(ps_svc_lyr_dec);
 3203|   132k|        ret = isvcd_comp_mode_mv_res_init(ps_svc_lyr_dec);
 3204|   132k|        if(ret != OK) return NOT_OK;
  ------------------
  |  |  114|   132k|#define OK        0
  ------------------
                      if(ret != OK) return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  |  Branch (3204:12): [True: 0, False: 132k]
  ------------------
 3205|   132k|        ret = isvcd_ii_pred_res_init(ps_svc_lyr_dec);
 3206|   132k|        if(ret != OK) return NOT_OK;
  ------------------
  |  |  114|   132k|#define OK        0
  ------------------
                      if(ret != OK) return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  |  Branch (3206:12): [True: 0, False: 132k]
  ------------------
 3207|   132k|        ret = isvcd_intra_resamp_res_init(ps_svc_lyr_dec);
 3208|   132k|        if(ret != OK) return NOT_OK;
  ------------------
  |  |  114|   132k|#define OK        0
  ------------------
                      if(ret != OK) return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  |  Branch (3208:12): [True: 0, False: 132k]
  ------------------
 3209|   132k|        ret = isvcd_residual_samp_res_init(ps_svc_lyr_dec);
 3210|   132k|        if(ret != OK) return NOT_OK;
  ------------------
  |  |  114|   132k|#define OK        0
  ------------------
                      if(ret != OK) return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  |  Branch (3210:12): [True: 0, False: 132k]
  ------------------
 3211|       |
 3212|   132k|        ps_svc_lyr_dec->u1_res_init_done = 1;
 3213|   132k|    }
 3214|       |
 3215|   132k|    return ret;
 3216|   133k|}
isvcd_parse_pslice:
 3231|  63.4k|{
 3232|  63.4k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
 3233|  63.4k|    dec_pic_params_t *ps_pps = ps_dec->ps_cur_pps;
 3234|  63.4k|    dec_slice_params_t *ps_cur_slice = ps_dec->ps_cur_slice;
 3235|  63.4k|    dec_bit_stream_t *ps_bitstrm = ps_dec->ps_bitstrm;
 3236|  63.4k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
 3237|  63.4k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
 3238|  63.4k|    UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
 3239|  63.4k|    UWORD8 u1_field_pic_flag = ps_cur_slice->u1_field_pic_flag;
 3240|  63.4k|    UWORD64 u8_ref_idx_l0;
 3241|  63.4k|    UWORD32 u4_temp;
 3242|  63.4k|    WORD32 i_temp;
 3243|  63.4k|    WORD32 ret;
 3244|  63.4k|    WORD64 i8_temp;
 3245|       |
 3246|       |    /*--------------------------------------------------------------------*/
 3247|       |    /* Read remaining contents of the slice header                        */
 3248|       |    /*--------------------------------------------------------------------*/
 3249|  63.4k|    {
 3250|  63.4k|        WORD8 *pi1_buf;
 3251|  63.4k|        WORD16 *pi2_mv = ps_dec->s_default_mv_pred.i2_mv;
 3252|  63.4k|        WORD32 *pi4_mv = (WORD32 *) pi2_mv;
 3253|  63.4k|        WORD16 *pi16_refFrame;
 3254|       |
 3255|  63.4k|        pi1_buf = ps_dec->s_default_mv_pred.i1_ref_frame;
 3256|  63.4k|        pi16_refFrame = (WORD16 *) pi1_buf;
 3257|  63.4k|        *pi4_mv = 0;
 3258|  63.4k|        *(pi4_mv + 1) = 0;
 3259|  63.4k|        *pi16_refFrame = OUT_OF_RANGE_REF;
  ------------------
  |  |   45|  63.4k|#define OUT_OF_RANGE_REF  -1
  ------------------
 3260|  63.4k|        ps_dec->s_default_mv_pred.u1_col_ref_pic_idx = (UWORD8) -1;
 3261|  63.4k|        ps_dec->s_default_mv_pred.u1_pic_type = (UWORD8) -1;
 3262|  63.4k|    }
 3263|       |
 3264|  63.4k|    ps_cur_slice->u1_num_ref_idx_active_override_flag = ih264d_get_bit_h264(ps_bitstrm);
 3265|  63.4k|    COPYTHECONTEXT("SH: num_ref_idx_override_flag",
 3266|  63.4k|                   ps_cur_slice->u1_num_ref_idx_active_override_flag);
 3267|       |
 3268|  63.4k|    u8_ref_idx_l0 = ps_dec->ps_cur_pps->u1_num_ref_idx_lx_active[0];
 3269|  63.4k|    if(ps_cur_slice->u1_num_ref_idx_active_override_flag)
  ------------------
  |  Branch (3269:8): [True: 37.8k, False: 25.6k]
  ------------------
 3270|  37.8k|    {
 3271|  37.8k|        u8_ref_idx_l0 = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf) + (UWORD64) 1;
 3272|  37.8k|    }
 3273|       |
 3274|  63.4k|    {
 3275|  63.4k|        UWORD8 u1_max_ref_idx = H264_MAX_REF_PICS << u1_field_pic_flag;
  ------------------
  |  |  534|  63.4k|#define H264_MAX_REF_PICS         16
  ------------------
 3276|  63.4k|        if(u8_ref_idx_l0 >= u1_max_ref_idx)
  ------------------
  |  Branch (3276:12): [True: 736, False: 62.7k]
  ------------------
 3277|    736|        {
 3278|    736|            return ERROR_NUM_REF;
 3279|    736|        }
 3280|  62.7k|        ps_cur_slice->u1_num_ref_idx_lx_active[0] = (UWORD8) u8_ref_idx_l0;
 3281|  62.7k|        COPYTHECONTEXT("SH: num_ref_idx_l0_active_minus1",
 3282|  62.7k|                       ps_cur_slice->u1_num_ref_idx_lx_active[0] - 1);
 3283|  62.7k|    }
 3284|       |
 3285|      0|    {
 3286|  62.7k|        UWORD8 uc_refIdxReFlagL0 = ih264d_get_bit_h264(ps_bitstrm);
 3287|  62.7k|        COPYTHECONTEXT("SH: ref_pic_list_reordering_flag_l0", uc_refIdxReFlagL0);
 3288|       |
 3289|  62.7k|        ih264d_init_ref_idx_lx_p(ps_dec);
 3290|       |        /* Store the value for future slices in the same picture */
 3291|  62.7k|        ps_dec->u1_num_ref_idx_lx_active_prev = ps_cur_slice->u1_num_ref_idx_lx_active[0];
 3292|       |
 3293|       |        /* Modified temporarily */
 3294|  62.7k|        if(uc_refIdxReFlagL0)
  ------------------
  |  Branch (3294:12): [True: 25.7k, False: 36.9k]
  ------------------
 3295|  25.7k|        {
 3296|  25.7k|            WORD8 ret;
 3297|  25.7k|            ps_dec->ps_ref_pic_buf_lx[0] = ps_dec->ps_dpb_mgr->ps_mod_dpb[0];
 3298|  25.7k|            ret = ih264d_ref_idx_reordering(ps_dec, 0);
 3299|  25.7k|            if(ret == -1) return ERROR_REFIDX_ORDER_T;
  ------------------
  |  Branch (3299:16): [True: 0, False: 25.7k]
  ------------------
 3300|  25.7k|            ps_dec->ps_ref_pic_buf_lx[0] = ps_dec->ps_dpb_mgr->ps_mod_dpb[0];
 3301|  25.7k|        }
 3302|  36.9k|        else
 3303|  36.9k|            ps_dec->ps_ref_pic_buf_lx[0] = ps_dec->ps_dpb_mgr->ps_init_dpb[0];
 3304|  62.7k|    }
 3305|       |    /* Create refIdx to POC mapping */
 3306|  62.7k|    {
 3307|  62.7k|        void **pui_map_ref_idx_to_poc_lx0, **pui_map_ref_idx_to_poc_lx1;
 3308|  62.7k|        WORD8 idx;
 3309|  62.7k|        struct pic_buffer_t *ps_pic;
 3310|       |
 3311|  62.7k|        pui_map_ref_idx_to_poc_lx0 = ps_dec->ppv_map_ref_idx_to_poc + FRM_LIST_L0;
  ------------------
  |  |   89|  62.7k|#define FRM_LIST_L0             0                                               //0
  ------------------
 3312|  62.7k|        pui_map_ref_idx_to_poc_lx0[0] = 0;
 3313|  62.7k|        pui_map_ref_idx_to_poc_lx0++;
 3314|   193k|        for(idx = 0; idx < ps_cur_slice->u1_num_ref_idx_lx_active[0]; idx++)
  ------------------
  |  Branch (3314:22): [True: 130k, False: 62.7k]
  ------------------
 3315|   130k|        {
 3316|   130k|            ps_pic = ps_dec->ps_ref_pic_buf_lx[0][idx];
 3317|   130k|            pui_map_ref_idx_to_poc_lx0[idx] = (ps_pic->pu1_buf1);
 3318|   130k|        }
 3319|       |
 3320|       |        /* Bug Fix Deblocking */
 3321|  62.7k|        pui_map_ref_idx_to_poc_lx1 = ps_dec->ppv_map_ref_idx_to_poc + FRM_LIST_L1;
  ------------------
  |  |   90|  62.7k|#define FRM_LIST_L1             1 * POC_LIST_L0_TO_L1_DIFF//FRM_LIST_L0 + POC_LIST_L0_TO_L1_DIFF        //0+33                  //(1 * POC_LIST_L0_TO_L1_DIFF)
  |  |  ------------------
  |  |  |  |   86|  62.7k|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |  600|  62.7k|#define MAX_FRAMES              16
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 3322|  62.7k|        pui_map_ref_idx_to_poc_lx1[0] = 0;
 3323|       |
 3324|  62.7k|        if(u1_mbaff)
  ------------------
  |  Branch (3324:12): [True: 0, False: 62.7k]
  ------------------
 3325|      0|        {
 3326|      0|            void **ppv_map_ref_idx_to_poc_lx_t, **ppv_map_ref_idx_to_poc_lx_b;
 3327|      0|            void **ppv_map_ref_idx_to_poc_lx_t1, **ppv_map_ref_idx_to_poc_lx_b1;
 3328|      0|            ppv_map_ref_idx_to_poc_lx_t = ps_dec->ppv_map_ref_idx_to_poc + TOP_LIST_FLD_L0;
  ------------------
  |  |   91|      0|#define TOP_LIST_FLD_L0         2 * POC_LIST_L0_TO_L1_DIFF//FRM_LIST_L1 + POC_LIST_L0_TO_L1_DIFF        //0+33+33                   //(2 * POC_LIST_L0_TO_L1_DIFF)
  |  |  ------------------
  |  |  |  |   86|      0|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |  600|      0|#define MAX_FRAMES              16
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 3329|      0|            ppv_map_ref_idx_to_poc_lx_b = ps_dec->ppv_map_ref_idx_to_poc + BOT_LIST_FLD_L0;
  ------------------
  |  |   93|      0|#define BOT_LIST_FLD_L0         4 * POC_LIST_L0_TO_L1_DIFF//TOP_LIST_FLD_L1 + POC_LIST_L0_TO_L1_DIFF_1  //0+33+33+17+17
  |  |  ------------------
  |  |  |  |   86|      0|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |  600|      0|#define MAX_FRAMES              16
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 3330|      0|            ppv_map_ref_idx_to_poc_lx_t[0] = 0;
 3331|      0|            ppv_map_ref_idx_to_poc_lx_t++;
 3332|      0|            ppv_map_ref_idx_to_poc_lx_b[0] = 0;
 3333|      0|            ppv_map_ref_idx_to_poc_lx_b++;
 3334|      0|            idx = 0;
 3335|      0|            for(idx = 0; idx < ps_cur_slice->u1_num_ref_idx_lx_active[0]; idx++)
  ------------------
  |  Branch (3335:26): [True: 0, False: 0]
  ------------------
 3336|      0|            {
 3337|      0|                ps_pic = ps_dec->ps_ref_pic_buf_lx[0][idx];
 3338|      0|                ppv_map_ref_idx_to_poc_lx_t[0] = (ps_pic->pu1_buf1);
 3339|      0|                ppv_map_ref_idx_to_poc_lx_b[1] = (ps_pic->pu1_buf1);
 3340|      0|                ppv_map_ref_idx_to_poc_lx_b[0] = (ps_pic->pu1_buf1) + 1;
 3341|      0|                ppv_map_ref_idx_to_poc_lx_t[1] = (ps_pic->pu1_buf1) + 1;
 3342|      0|                ppv_map_ref_idx_to_poc_lx_t += 2;
 3343|      0|                ppv_map_ref_idx_to_poc_lx_b += 2;
 3344|      0|            }
 3345|      0|            ppv_map_ref_idx_to_poc_lx_t1 = ps_dec->ppv_map_ref_idx_to_poc + TOP_LIST_FLD_L1;
  ------------------
  |  |   92|      0|#define TOP_LIST_FLD_L1         3 * POC_LIST_L0_TO_L1_DIFF//TOP_LIST_FLD_L0 + POC_LIST_L0_TO_L1_DIFF_1  //0+33+33+17                //(3 * POC_LIST_L0_TO_L1_DIFF)
  |  |  ------------------
  |  |  |  |   86|      0|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |  600|      0|#define MAX_FRAMES              16
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 3346|      0|            ppv_map_ref_idx_to_poc_lx_t1[0] = 0;
 3347|      0|            ppv_map_ref_idx_to_poc_lx_b1 = ps_dec->ppv_map_ref_idx_to_poc + BOT_LIST_FLD_L1;
  ------------------
  |  |   94|      0|#define BOT_LIST_FLD_L1         5 * POC_LIST_L0_TO_L1_DIFF//BOT_LIST_FLD_L0 + POC_LIST_L0_TO_L1_DIFF_1  //0+33+33+17+17+17
  |  |  ------------------
  |  |  |  |   86|      0|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |  600|      0|#define MAX_FRAMES              16
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
 3348|      0|            ppv_map_ref_idx_to_poc_lx_b1[0] = 0;
 3349|      0|        }
 3350|  62.7k|    }
 3351|  62.7k|    if(ps_pps->u1_wted_pred_flag)
  ------------------
  |  Branch (3351:8): [True: 16.0k, False: 46.6k]
  ------------------
 3352|  16.0k|    {
 3353|  16.0k|        ret = ih264d_parse_pred_weight_table(ps_cur_slice, ps_bitstrm);
 3354|  16.0k|        if(ret != OK) return ret;
  ------------------
  |  |  114|  16.0k|#define OK        0
  ------------------
  |  Branch (3354:12): [True: 4.68k, False: 11.4k]
  ------------------
 3355|       |
 3356|  11.4k|        ih264d_form_pred_weight_matrix(ps_dec);
 3357|  11.4k|        ps_dec->pu4_wt_ofsts = ps_dec->pu4_wts_ofsts_mat;
 3358|  11.4k|    }
 3359|  46.6k|    else
 3360|  46.6k|    {
 3361|  46.6k|        ps_dec->ps_cur_slice->u2_log2Y_crwd = 0;
 3362|  46.6k|        ps_dec->pu4_wt_ofsts = ps_dec->pu4_wts_ofsts_mat;
 3363|  46.6k|    }
 3364|       |
 3365|  58.0k|    ps_dec->ps_parse_cur_slice->u2_log2Y_crwd = ps_dec->ps_cur_slice->u2_log2Y_crwd;
 3366|       |
 3367|  58.0k|    if(u1_mbaff && (u1_field_pic_flag == 0))
  ------------------
  |  Branch (3367:8): [True: 0, False: 58.0k]
  |  Branch (3367:20): [True: 0, False: 0]
  ------------------
 3368|      0|    {
 3369|      0|        ih264d_convert_frm_mbaff_list(ps_dec);
 3370|      0|    }
 3371|       |
 3372|       |    /* G050 */
 3373|  58.0k|    if(ps_cur_slice->u1_nal_ref_idc != 0)
  ------------------
  |  Branch (3373:8): [True: 56.0k, False: 2.06k]
  ------------------
 3374|  56.0k|    {
 3375|  56.0k|        if(!ps_dec->ps_dpb_cmds->u1_dpb_commands_read)
  ------------------
  |  Branch (3375:12): [True: 55.7k, False: 259]
  ------------------
 3376|  55.7k|        {
 3377|  55.7k|            dec_pic_params_t *ps_pps = ps_dec->ps_cur_pps;
 3378|  55.7k|            dec_seq_params_t *ps_sps_tmp = ps_pps->ps_sps;
 3379|  55.7k|            UWORD8 u1_nal_unit_type_tmp = ps_dec->u1_nal_unit_type;
 3380|       |
 3381|  55.7k|            ps_pps->ps_sps = ps_dec->ps_cur_sps;
 3382|  55.7k|            if(ps_svc_lyr_dec->ps_nal_svc_ext->u1_idr_flag)
  ------------------
  |  Branch (3382:16): [True: 54.1k, False: 1.58k]
  ------------------
 3383|  54.1k|                ps_dec->u1_nal_unit_type = IDR_SLICE_NAL;
  ------------------
  |  |  328|  54.1k|#define IDR_SLICE_NAL                   5
  ------------------
 3384|       |
 3385|  55.7k|            i_temp = ih264d_read_mmco_commands(ps_dec);
 3386|       |
 3387|  55.7k|            ps_pps->ps_sps = ps_sps_tmp;
 3388|  55.7k|            ps_dec->u1_nal_unit_type = u1_nal_unit_type_tmp;
 3389|       |
 3390|  55.7k|            if(i_temp < 0)
  ------------------
  |  Branch (3390:16): [True: 119, False: 55.6k]
  ------------------
 3391|    119|            {
 3392|    119|                return ERROR_DBP_MANAGER_T;
 3393|    119|            }
 3394|  55.6k|            ps_dec->u4_bitoffset = i_temp;
 3395|  55.6k|        }
 3396|    259|        else
 3397|    259|            ps_bitstrm->u4_ofst += ps_dec->u4_bitoffset;
 3398|  56.0k|    }
 3399|       |    /* G050 */
 3400|       |
 3401|  57.9k|    if(ps_pps->u1_entropy_coding_mode == CABAC)
  ------------------
  |  |  339|  57.9k|#define CABAC  1
  ------------------
  |  Branch (3401:8): [True: 8.63k, False: 49.3k]
  ------------------
 3402|  8.63k|    {
 3403|  8.63k|        u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 3404|       |
 3405|  8.63k|        if(u4_temp > MAX_CABAC_INIT_IDC)
  ------------------
  |  |  537|  8.63k|#define MAX_CABAC_INIT_IDC        2
  ------------------
  |  Branch (3405:12): [True: 820, False: 7.81k]
  ------------------
 3406|    820|        {
 3407|    820|            return ERROR_INV_SLICE_HDR_T;
 3408|    820|        }
 3409|  7.81k|        ps_cur_slice->u1_cabac_init_idc = u4_temp;
 3410|  7.81k|        COPYTHECONTEXT("SH: cabac_init_idc", ps_cur_slice->u1_cabac_init_idc);
 3411|  7.81k|    }
 3412|       |
 3413|       |    /* Read slice_qp_delta */
 3414|  57.1k|    i8_temp = (WORD64) ps_pps->u1_pic_init_qp + ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 3415|  57.1k|    if((i8_temp < MIN_H264_QP) || (i8_temp > MAX_H264_QP))
  ------------------
  |  |  629|  57.1k|#define MIN_H264_QP 0
  ------------------
                  if((i8_temp < MIN_H264_QP) || (i8_temp > MAX_H264_QP))
  ------------------
  |  |  634|  56.5k|#define MAX_H264_QP 51
  ------------------
  |  Branch (3415:8): [True: 582, False: 56.5k]
  |  Branch (3415:35): [True: 2.41k, False: 54.1k]
  ------------------
 3416|  2.99k|    {
 3417|  2.99k|        return ERROR_INV_RANGE_QP_T;
 3418|  2.99k|    }
 3419|  54.1k|    ps_cur_slice->u1_slice_qp = (UWORD8) i8_temp;
 3420|  54.1k|    COPYTHECONTEXT("SH: slice_qp_delta",
 3421|  54.1k|                   (WORD8) (ps_cur_slice->u1_slice_qp - ps_pps->u1_pic_init_qp));
 3422|       |
 3423|  54.1k|    if(ps_pps->u1_deblocking_filter_parameters_present_flag == 1)
  ------------------
  |  Branch (3423:8): [True: 30.8k, False: 23.2k]
  ------------------
 3424|  30.8k|    {
 3425|  30.8k|        u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 3426|  30.8k|        if(u4_temp > SLICE_BOUNDARY_DBLK_DISABLED)
  ------------------
  |  |  547|  30.8k|#define SLICE_BOUNDARY_DBLK_DISABLED  2
  ------------------
  |  Branch (3426:12): [True: 1.14k, False: 29.7k]
  ------------------
 3427|  1.14k|        {
 3428|  1.14k|            return ERROR_INV_SLICE_HDR_T;
 3429|  1.14k|        }
 3430|       |
 3431|  29.7k|        COPYTHECONTEXT("SH: disable_deblocking_filter_idc", u4_temp);
 3432|  29.7k|        ps_cur_slice->u1_disable_dblk_filter_idc = u4_temp;
 3433|  29.7k|        if(u4_temp != 1)
  ------------------
  |  Branch (3433:12): [True: 25.1k, False: 4.55k]
  ------------------
 3434|  25.1k|        {
 3435|  25.1k|            i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf) << 1;
 3436|  25.1k|            if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
  ------------------
  |  |  550|  25.1k|#define MIN_DBLK_FIL_OFF              -12
  ------------------
                          if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
  ------------------
  |  |  551|  24.9k|#define MAX_DBLK_FIL_OFF              12
  ------------------
  |  Branch (3436:16): [True: 201, False: 24.9k]
  |  Branch (3436:47): [True: 319, False: 24.6k]
  ------------------
 3437|    520|            {
 3438|    520|                return ERROR_INV_SLICE_HDR_T;
 3439|    520|            }
 3440|  24.6k|            ps_cur_slice->i1_slice_alpha_c0_offset = i_temp;
 3441|  24.6k|            COPYTHECONTEXT("SH: slice_alpha_c0_offset_div2",
 3442|  24.6k|                           ps_cur_slice->i1_slice_alpha_c0_offset >> 1);
 3443|       |
 3444|  24.6k|            i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf) << 1;
 3445|  24.6k|            if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
  ------------------
  |  |  550|  24.6k|#define MIN_DBLK_FIL_OFF              -12
  ------------------
                          if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
  ------------------
  |  |  551|  24.3k|#define MAX_DBLK_FIL_OFF              12
  ------------------
  |  Branch (3445:16): [True: 313, False: 24.3k]
  |  Branch (3445:47): [True: 173, False: 24.1k]
  ------------------
 3446|    486|            {
 3447|    486|                return ERROR_INV_SLICE_HDR_T;
 3448|    486|            }
 3449|  24.1k|            ps_cur_slice->i1_slice_beta_offset = i_temp;
 3450|  24.1k|            COPYTHECONTEXT("SH: slice_beta_offset_div2", ps_cur_slice->i1_slice_beta_offset >> 1);
 3451|  24.1k|        }
 3452|  4.55k|        else
 3453|  4.55k|        {
 3454|  4.55k|            ps_cur_slice->i1_slice_alpha_c0_offset = 0;
 3455|  4.55k|            ps_cur_slice->i1_slice_beta_offset = 0;
 3456|  4.55k|        }
 3457|  29.7k|    }
 3458|  23.2k|    else
 3459|  23.2k|    {
 3460|  23.2k|        ps_cur_slice->u1_disable_dblk_filter_idc = 0;
 3461|  23.2k|        ps_cur_slice->i1_slice_alpha_c0_offset = 0;
 3462|  23.2k|        ps_cur_slice->i1_slice_beta_offset = 0;
 3463|  23.2k|    }
 3464|       |
 3465|  51.9k|    ps_dec->u1_slice_header_done = 2;
 3466|  51.9k|    if(ps_pps->u1_entropy_coding_mode)
  ------------------
  |  Branch (3466:8): [True: 7.64k, False: 44.3k]
  ------------------
 3467|  7.64k|    {
 3468|  7.64k|        SWITCHOFFTRACE;
 3469|  7.64k|        SWITCHONTRACECABAC;
 3470|  7.64k|        ps_svc_lyr_dec->pf_parse_svc_inter_slice = isvcd_parse_inter_slice_data_cabac;
 3471|  7.64k|        ps_dec->pf_parse_inter_mb = ih264d_parse_pmb_cabac;
 3472|  7.64k|        ih264d_init_cabac_contexts(P_SLICE, ps_dec);
  ------------------
  |  |  368|  7.64k|#define P_SLICE  0
  ------------------
 3473|       |
 3474|  7.64k|        if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag)
  ------------------
  |  Branch (3474:12): [True: 0, False: 7.64k]
  ------------------
 3475|      0|            ps_dec->pf_get_mb_info = ih264d_get_mb_info_cabac_mbaff;
 3476|  7.64k|        else
 3477|  7.64k|            ps_dec->pf_get_mb_info = isvcd_get_mb_info_cabac_nonmbaff;
 3478|  7.64k|    }
 3479|  44.3k|    else
 3480|  44.3k|    {
 3481|  44.3k|        SWITCHONTRACE;
 3482|  44.3k|        SWITCHOFFTRACECABAC;
 3483|  44.3k|        ps_svc_lyr_dec->pf_parse_svc_inter_slice = isvcd_parse_inter_slice_data_cavlc;
 3484|  44.3k|        ps_dec->pf_parse_inter_mb = ih264d_parse_pmb_cavlc;
 3485|  44.3k|        if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag)
  ------------------
  |  Branch (3485:12): [True: 0, False: 44.3k]
  ------------------
 3486|      0|        {
 3487|      0|            ps_dec->pf_get_mb_info = ih264d_get_mb_info_cavlc_mbaff;
 3488|      0|        }
 3489|  44.3k|        else
 3490|  44.3k|            ps_dec->pf_get_mb_info = isvcd_get_mb_info_cavlc_nonmbaff;
 3491|  44.3k|    }
 3492|       |
 3493|  51.9k|    ps_dec->u1_B = 0;
 3494|  51.9k|    ps_dec->pf_mvpred_ref_tfr_nby2mb = ih264d_mv_pred_ref_tfr_nby2_pmb;
 3495|  51.9k|    ret = ps_svc_lyr_dec->pf_parse_svc_inter_slice(ps_svc_lyr_dec, ps_cur_slice,
 3496|  51.9k|                                                   u2_first_mb_in_slice);
 3497|  51.9k|    if(ret != OK) return ret;
  ------------------
  |  |  114|  51.9k|#define OK        0
  ------------------
  |  Branch (3497:8): [True: 13.7k, False: 38.1k]
  ------------------
 3498|       |
 3499|  38.1k|    return OK;
  ------------------
  |  |  114|  38.1k|#define OK        0
  ------------------
 3500|  51.9k|}

isvcd_set_default_seq_svc_ext:
   97|  19.7k|{
   98|  19.7k|    ps_seq_svc_ext->u1_inter_layer_deblocking_filter_control_present_flag = 0;
   99|  19.7k|    ps_seq_svc_ext->u1_extended_spatial_scalability_idc = 0;
  100|  19.7k|    ps_seq_svc_ext->u1_chroma_phase_x_plus1_flag = 1;
  101|  19.7k|    ps_seq_svc_ext->u1_chroma_phase_y_plus1 = 1;
  102|  19.7k|    ps_seq_svc_ext->u1_seq_ref_layer_chroma_phase_x_plus1_flag =
  103|  19.7k|        ps_seq_svc_ext->u1_chroma_phase_x_plus1_flag;
  104|  19.7k|    ps_seq_svc_ext->u1_seq_ref_layer_chroma_phase_y_plus1 = ps_seq_svc_ext->u1_chroma_phase_y_plus1;
  105|  19.7k|    ps_seq_svc_ext->i4_seq_scaled_ref_layer_left_offset = 0;
  106|  19.7k|    ps_seq_svc_ext->i4_seq_scaled_ref_layer_top_offset = 0;
  107|  19.7k|    ps_seq_svc_ext->i4_seq_scaled_ref_layer_right_offset = 0;
  108|  19.7k|    ps_seq_svc_ext->i4_seq_scaled_ref_layer_bottom_offset = 0;
  109|  19.7k|    ps_seq_svc_ext->u1_seq_tcoeff_level_prediction_flag =
  110|  19.7k|        ps_seq_svc_ext->u1_adaptive_tcoeff_level_prediction_flag = 0;
  111|  19.7k|    ps_seq_svc_ext->u1_slice_header_restriction_flag = 0;
  112|  19.7k|    ps_seq_svc_ext->u1_svc_vui_parameters_present_flag = 0;
  113|  19.7k|}
isvcd_parse_subset_sps:
  127|  24.0k|{
  128|  24.0k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
  129|  24.0k|    UWORD8 i;
  130|  24.0k|    dec_seq_params_t *ps_seq = NULL;
  131|  24.0k|    dec_svc_seq_params_t *ps_subset_seq = NULL;
  132|  24.0k|    dec_subset_seq_params_t *ps_seq_svc_ext;
  133|  24.0k|    UWORD8 u1_profile_idc, u1_level_idc, u1_seq_parameter_set_id, u1_mb_aff_flag = 0;
  134|  24.0k|    UWORD16 i2_max_frm_num;
  135|  24.0k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
  136|  24.0k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
  137|  24.0k|    UWORD8 u1_frm, uc_constraint_set0_flag, uc_constraint_set1_flag, uc_constraint_set2_flag;
  138|  24.0k|    WORD32 i4_cropped_ht, i4_cropped_wd;
  139|  24.0k|    UWORD32 u4_temp;
  140|  24.0k|    UWORD64 u8_temp;
  141|  24.0k|    UWORD32 u4_pic_height_in_map_units, u4_pic_width_in_mbs;
  142|  24.0k|    UWORD32 u2_pic_wd = 0;
  143|  24.0k|    UWORD32 u2_pic_ht = 0;
  144|  24.0k|    UWORD32 u2_frm_wd_y = 0;
  145|  24.0k|    UWORD32 u2_frm_ht_y = 0;
  146|  24.0k|    UWORD32 u2_frm_wd_uv = 0;
  147|  24.0k|    UWORD32 u2_frm_ht_uv = 0;
  148|  24.0k|    UWORD32 u2_crop_offset_y = 0;
  149|  24.0k|    UWORD32 u2_crop_offset_uv = 0;
  150|  24.0k|    WORD32 ret;
  151|       |    /* High profile related syntax element */
  152|  24.0k|    WORD32 i4_i;
  153|       |    /* G050 */
  154|  24.0k|    UWORD8 u1_frame_cropping_flag,
  155|  24.0k|        u1_frame_cropping_rect_left_ofst = 0, u1_frame_cropping_rect_right_ofst = 0,
  156|  24.0k|        u1_frame_cropping_rect_top_ofst = 0, u1_frame_cropping_rect_bottom_ofst = 0;
  157|       |    /* G050 */
  158|       |    /*--------------------------------------------------------------------*/
  159|       |    /* Decode seq_parameter_set_id and profile and level values           */
  160|       |    /*--------------------------------------------------------------------*/
  161|  24.0k|    SWITCHONTRACE;
  162|  24.0k|    u1_profile_idc = ih264d_get_bits_h264(ps_bitstrm, 8);
  163|  24.0k|    COPYTHECONTEXT("SPS: profile_idc", u1_profile_idc);
  164|       |
  165|       |    /* G050 */
  166|  24.0k|    uc_constraint_set0_flag = ih264d_get_bit_h264(ps_bitstrm);
  167|  24.0k|    uc_constraint_set1_flag = ih264d_get_bit_h264(ps_bitstrm);
  168|  24.0k|    uc_constraint_set2_flag = ih264d_get_bit_h264(ps_bitstrm);
  169|  24.0k|    UNUSED(uc_constraint_set1_flag);
  ------------------
  |  |   45|  24.0k|#define UNUSED(x) ((void)(x))
  ------------------
  170|  24.0k|    UNUSED(uc_constraint_set2_flag);
  ------------------
  |  |   45|  24.0k|#define UNUSED(x) ((void)(x))
  ------------------
  171|       |
  172|       |    /*****************************************************/
  173|       |    /* Read 5 bits for uc_constraint_set3_flag (1 bit)   */
  174|       |    /* and reserved_zero_4bits (4 bits) - Sushant        */
  175|       |    /*****************************************************/
  176|  24.0k|    ih264d_get_bits_h264(ps_bitstrm, 5);
  177|       |    /* G050 */
  178|  24.0k|    u1_level_idc = (UWORD8) ih264d_get_bits_h264(ps_bitstrm, 8);
  179|  24.0k|    COPYTHECONTEXT("SPS: u4_level_idc", u1_level_idc);
  180|       |
  181|  24.0k|    u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  182|  24.0k|    if(u4_temp & MASK_ERR_SEQ_SET_ID) return ERROR_INV_SPS_PPS_T;
  ------------------
  |  |  526|  24.0k|#define MASK_ERR_SEQ_SET_ID   (0xFFFFFFE0)
  ------------------
  |  Branch (182:8): [True: 512, False: 23.5k]
  ------------------
  183|  23.5k|    u1_seq_parameter_set_id = u4_temp;
  184|  23.5k|    COPYTHECONTEXT("SPS: seq_parameter_set_id", u1_seq_parameter_set_id);
  185|       |
  186|  23.5k|    if(u1_seq_parameter_set_id >= MAX_NUM_SEQ_PARAMS) return ERROR_INV_SPS_PPS_T;
  ------------------
  |  |  521|  23.5k|#define MAX_NUM_SEQ_PARAMS 32
  ------------------
  |  Branch (186:8): [True: 0, False: 23.5k]
  ------------------
  187|       |
  188|       |    /*--------------------------------------------------------------------*/
  189|       |    /* Find an seq param entry in seqparam array of decStruct             */
  190|       |    /*--------------------------------------------------------------------*/
  191|  23.5k|    ps_subset_seq = ps_svc_lyr_dec->pv_scratch_subset_sps;
  192|  23.5k|    memset(ps_subset_seq, 0, sizeof(dec_svc_seq_params_t));
  193|  23.5k|    ps_seq = ps_dec->pv_scratch_sps_pps;
  194|  23.5k|    memset(ps_seq, 0, sizeof(dec_seq_params_t));
  195|       |
  196|  23.5k|    ps_seq->u1_profile_idc = u1_profile_idc;
  197|  23.5k|    ps_seq->u1_level_idc = u1_level_idc;
  198|  23.5k|    ps_seq->u1_seq_parameter_set_id = u1_seq_parameter_set_id;
  199|       |
  200|       |    /* subset_seq_sps_will be stored from location 32 : MAX_NUM_SEQ_PARAMS*/
  201|  23.5k|    u1_seq_parameter_set_id += MAX_NUM_SEQ_PARAMS;
  ------------------
  |  |  521|  23.5k|#define MAX_NUM_SEQ_PARAMS 32
  ------------------
  202|  23.5k|    ps_subset_seq->ps_seq = &ps_dec->ps_sps[u1_seq_parameter_set_id];
  203|       |
  204|  23.5k|    if((ps_dec->i4_header_decoded & 1) &&
  ------------------
  |  Branch (204:8): [True: 11.1k, False: 12.3k]
  ------------------
  205|  11.1k|       (1 == ps_dec->ps_sps[u1_seq_parameter_set_id].u1_is_valid) &&
  ------------------
  |  Branch (205:8): [True: 9.74k, False: 1.43k]
  ------------------
  206|  9.74k|       (ps_dec->ps_sps[u1_seq_parameter_set_id].u1_profile_idc != u1_profile_idc))
  ------------------
  |  Branch (206:8): [True: 238, False: 9.50k]
  ------------------
  207|    238|    {
  208|    238|        ps_dec->u1_res_changed = 1;
  209|    238|        return IVD_RES_CHANGED;
  210|    238|    }
  211|       |
  212|  23.2k|    if((ps_dec->i4_header_decoded & 1) &&
  ------------------
  |  Branch (212:8): [True: 10.9k, False: 12.3k]
  ------------------
  213|  10.9k|       (1 == ps_dec->ps_sps[u1_seq_parameter_set_id].u1_is_valid) &&
  ------------------
  |  Branch (213:8): [True: 9.50k, False: 1.43k]
  ------------------
  214|  9.50k|       (ps_dec->ps_sps[u1_seq_parameter_set_id].u1_level_idc != u1_level_idc))
  ------------------
  |  Branch (214:8): [True: 168, False: 9.33k]
  ------------------
  215|    168|    {
  216|    168|        ps_dec->u1_res_changed = 1;
  217|    168|        return IVD_RES_CHANGED;
  218|    168|    }
  219|       |    /*******************************************************************/
  220|       |    /* Initializations for high profile - Sushant                      */
  221|       |    /*******************************************************************/
  222|  23.1k|    ps_seq->i4_chroma_format_idc = 1;
  223|  23.1k|    ps_seq->i4_bit_depth_luma_minus8 = 0;
  224|  23.1k|    ps_seq->i4_bit_depth_chroma_minus8 = 0;
  225|  23.1k|    ps_seq->i4_qpprime_y_zero_transform_bypass_flag = 0;
  226|  23.1k|    ps_seq->i4_seq_scaling_matrix_present_flag = 0;
  227|  23.1k|    if(u1_profile_idc == HIGH_PROFILE_IDC || u1_profile_idc == SCALABLE_BASELINE_PROFILE_IDC ||
  ------------------
  |  |  278|  46.2k|#define HIGH_PROFILE_IDC   100
  ------------------
                  if(u1_profile_idc == HIGH_PROFILE_IDC || u1_profile_idc == SCALABLE_BASELINE_PROFILE_IDC ||
  ------------------
  |  |   59|  45.9k|#define SCALABLE_BASELINE_PROFILE_IDC 83
  ------------------
  |  Branch (227:8): [True: 295, False: 22.8k]
  |  Branch (227:46): [True: 2.24k, False: 20.5k]
  ------------------
  228|  20.5k|       u1_profile_idc == SCALABLE_HIGH_PROFILE_IDC)
  ------------------
  |  |   60|  20.5k|#define SCALABLE_HIGH_PROFILE_IDC 86
  ------------------
  |  Branch (228:8): [True: 3.24k, False: 17.3k]
  ------------------
  229|  5.78k|    {
  230|       |        /* reading chroma_format_idc   */
  231|  5.78k|        ps_seq->i4_chroma_format_idc = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  232|       |
  233|       |        /* Monochrome is not supported */
  234|  5.78k|        if(ps_seq->i4_chroma_format_idc != 1)
  ------------------
  |  Branch (234:12): [True: 162, False: 5.62k]
  ------------------
  235|    162|        {
  236|    162|            return ERROR_FEATURE_UNAVAIL;
  237|    162|        }
  238|       |
  239|       |        /* reading bit_depth_luma_minus8   */
  240|  5.62k|        ps_seq->i4_bit_depth_luma_minus8 = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  241|       |
  242|  5.62k|        if(ps_seq->i4_bit_depth_luma_minus8 != 0)
  ------------------
  |  Branch (242:12): [True: 99, False: 5.52k]
  ------------------
  243|     99|        {
  244|     99|            return ERROR_FEATURE_UNAVAIL;
  245|     99|        }
  246|       |
  247|       |        /* reading bit_depth_chroma_minus8   */
  248|  5.52k|        ps_seq->i4_bit_depth_chroma_minus8 = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  249|       |
  250|  5.52k|        if(ps_seq->i4_bit_depth_chroma_minus8 != 0)
  ------------------
  |  Branch (250:12): [True: 84, False: 5.44k]
  ------------------
  251|     84|        {
  252|     84|            return ERROR_FEATURE_UNAVAIL;
  253|     84|        }
  254|       |
  255|       |        /* reading qpprime_y_zero_transform_bypass_flag   */
  256|  5.44k|        ps_seq->i4_qpprime_y_zero_transform_bypass_flag = (WORD32) ih264d_get_bit_h264(ps_bitstrm);
  257|       |
  258|  5.44k|        if(ps_seq->i4_qpprime_y_zero_transform_bypass_flag != 0)
  ------------------
  |  Branch (258:12): [True: 68, False: 5.37k]
  ------------------
  259|     68|        {
  260|     68|            return ERROR_INV_SPS_PPS_T;
  261|     68|        }
  262|       |
  263|       |        /* reading seq_scaling_matrix_present_flag   */
  264|  5.37k|        ps_seq->i4_seq_scaling_matrix_present_flag = (WORD32) ih264d_get_bit_h264(ps_bitstrm);
  265|       |
  266|  5.37k|        if(ps_seq->i4_seq_scaling_matrix_present_flag)
  ------------------
  |  Branch (266:12): [True: 3.74k, False: 1.63k]
  ------------------
  267|  3.74k|        {
  268|  32.6k|            for(i4_i = 0; i4_i < 8; i4_i++)
  ------------------
  |  Branch (268:27): [True: 29.1k, False: 3.56k]
  ------------------
  269|  29.1k|            {
  270|  29.1k|                ps_seq->u1_seq_scaling_list_present_flag[i4_i] = ih264d_get_bit_h264(ps_bitstrm);
  271|       |
  272|       |                /* initialize u1_use_default_scaling_matrix_flag[i4_i] to zero */
  273|       |                /* before calling scaling list                             */
  274|  29.1k|                ps_seq->u1_use_default_scaling_matrix_flag[i4_i] = 0;
  275|       |
  276|  29.1k|                if(ps_seq->u1_seq_scaling_list_present_flag[i4_i])
  ------------------
  |  Branch (276:20): [True: 13.7k, False: 15.3k]
  ------------------
  277|  13.7k|                {
  278|  13.7k|                    if(i4_i < 6)
  ------------------
  |  Branch (278:24): [True: 10.5k, False: 3.29k]
  ------------------
  279|  10.5k|                    {
  280|  10.5k|                        ret = ih264d_scaling_list(ps_seq->i2_scalinglist4x4[i4_i], 16,
  281|  10.5k|                                                  &ps_seq->u1_use_default_scaling_matrix_flag[i4_i],
  282|  10.5k|                                                  ps_bitstrm);
  283|  10.5k|                    }
  284|  3.29k|                    else
  285|  3.29k|                    {
  286|  3.29k|                        ret = ih264d_scaling_list(ps_seq->i2_scalinglist8x8[i4_i - 6], 64,
  287|  3.29k|                                                  &ps_seq->u1_use_default_scaling_matrix_flag[i4_i],
  288|  3.29k|                                                  ps_bitstrm);
  289|  3.29k|                    }
  290|  13.7k|                    if(ret != OK)
  ------------------
  |  |  114|  13.7k|#define OK        0
  ------------------
  |  Branch (290:24): [True: 177, False: 13.6k]
  ------------------
  291|    177|                    {
  292|    177|                        return ret;
  293|    177|                    }
  294|  13.7k|                }
  295|  29.1k|            }
  296|  3.74k|        }
  297|  5.37k|    }
  298|       |    /*--------------------------------------------------------------------*/
  299|       |    /* Decode MaxFrameNum                                                 */
  300|       |    /*--------------------------------------------------------------------*/
  301|  22.5k|    u8_temp = (UWORD64) 4 + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  302|  22.5k|    if(u8_temp > MAX_BITS_IN_FRAME_NUM)
  ------------------
  |  |  531|  22.5k|#define MAX_BITS_IN_FRAME_NUM     16
  ------------------
  |  Branch (302:8): [True: 235, False: 22.2k]
  ------------------
  303|    235|    {
  304|    235|        return ERROR_INV_SPS_PPS_T;
  305|    235|    }
  306|  22.2k|    ps_seq->u1_bits_in_frm_num = (UWORD8) u8_temp;
  307|  22.2k|    COPYTHECONTEXT("SPS: log2_max_frame_num_minus4", (ps_seq->u1_bits_in_frm_num - 4));
  308|       |
  309|  22.2k|    i2_max_frm_num = (1 << (ps_seq->u1_bits_in_frm_num));
  310|  22.2k|    ps_seq->u2_u4_max_pic_num_minus1 = i2_max_frm_num - 1;
  311|       |    /*--------------------------------------------------------------------*/
  312|       |    /* Decode picture order count and related values                      */
  313|       |    /*--------------------------------------------------------------------*/
  314|  22.2k|    u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  315|       |
  316|  22.2k|    if(u4_temp > MAX_PIC_ORDER_CNT_TYPE)
  ------------------
  |  |  529|  22.2k|#define MAX_PIC_ORDER_CNT_TYPE    2
  ------------------
  |  Branch (316:8): [True: 172, False: 22.1k]
  ------------------
  317|    172|    {
  318|    172|        return ERROR_INV_POC_TYPE_T;
  319|    172|    }
  320|  22.1k|    ps_seq->u1_pic_order_cnt_type = u4_temp;
  321|  22.1k|    COPYTHECONTEXT("SPS: pic_order_cnt_type", ps_seq->u1_pic_order_cnt_type);
  322|       |
  323|  22.1k|    ps_seq->u1_num_ref_frames_in_pic_order_cnt_cycle = 1;
  324|  22.1k|    if(ps_seq->u1_pic_order_cnt_type == 0)
  ------------------
  |  Branch (324:8): [True: 17.7k, False: 4.38k]
  ------------------
  325|  17.7k|    {
  326|  17.7k|        u8_temp = (UWORD64) 4 + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  327|  17.7k|        if(u8_temp > MAX_BITS_IN_POC_LSB)
  ------------------
  |  |  532|  17.7k|#define MAX_BITS_IN_POC_LSB       16
  ------------------
  |  Branch (327:12): [True: 94, False: 17.6k]
  ------------------
  328|     94|        {
  329|     94|            return ERROR_INV_SPS_PPS_T;
  330|     94|        }
  331|  17.6k|        ps_seq->u1_log2_max_pic_order_cnt_lsb_minus = (UWORD8) u8_temp;
  332|  17.6k|        ps_seq->i4_max_pic_order_cntLsb = (1 << u8_temp);
  333|  17.6k|        COPYTHECONTEXT("SPS: log2_max_pic_order_cnt_lsb_minus4", (u8_temp - 4));
  334|  17.6k|    }
  335|  4.38k|    else if(ps_seq->u1_pic_order_cnt_type == 1)
  ------------------
  |  Branch (335:13): [True: 2.64k, False: 1.74k]
  ------------------
  336|  2.64k|    {
  337|  2.64k|        ps_seq->u1_delta_pic_order_always_zero_flag = ih264d_get_bit_h264(ps_bitstrm);
  338|  2.64k|        COPYTHECONTEXT("SPS: delta_pic_order_always_zero_flag",
  339|  2.64k|                       ps_seq->u1_delta_pic_order_always_zero_flag);
  340|       |
  341|  2.64k|        ps_seq->i4_ofst_for_non_ref_pic = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  342|  2.64k|        COPYTHECONTEXT("SPS: offset_for_non_ref_pic", ps_seq->i4_ofst_for_non_ref_pic);
  343|       |
  344|  2.64k|        ps_seq->i4_ofst_for_top_to_bottom_field = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  345|  2.64k|        COPYTHECONTEXT("SPS: offset_for_top_to_bottom_field",
  346|  2.64k|                       ps_seq->i4_ofst_for_top_to_bottom_field);
  347|       |
  348|  2.64k|        u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  349|  2.64k|        if(u4_temp > 255) return ERROR_INV_SPS_PPS_T;
  ------------------
  |  Branch (349:12): [True: 118, False: 2.52k]
  ------------------
  350|  2.52k|        ps_seq->u1_num_ref_frames_in_pic_order_cnt_cycle = u4_temp;
  351|  2.52k|        COPYTHECONTEXT("SPS: num_ref_frames_in_pic_order_cnt_cycle",
  352|  2.52k|                       ps_seq->u1_num_ref_frames_in_pic_order_cnt_cycle);
  353|       |
  354|  19.0k|        for(i = 0; i < ps_seq->u1_num_ref_frames_in_pic_order_cnt_cycle; i++)
  ------------------
  |  Branch (354:20): [True: 16.5k, False: 2.52k]
  ------------------
  355|  16.5k|        {
  356|  16.5k|            ps_seq->i4_ofst_for_ref_frame[i] = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  357|  16.5k|            COPYTHECONTEXT("SPS: offset_for_ref_frame", ps_seq->i4_ofst_for_ref_frame[i]);
  358|  16.5k|        }
  359|  2.52k|    }
  360|       |
  361|  21.8k|    u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  362|       |
  363|  21.8k|    if((u4_temp > H264_MAX_REF_PICS))
  ------------------
  |  |  534|  21.8k|#define H264_MAX_REF_PICS         16
  ------------------
  |  Branch (363:8): [True: 168, False: 21.7k]
  ------------------
  364|    168|    {
  365|    168|        return ERROR_NUM_REF;
  366|    168|    }
  367|       |
  368|       |    /* Compare with older num_ref_frames is header is already once */
  369|  21.7k|    if((ps_dec->i4_header_decoded & 1) &&
  ------------------
  |  Branch (369:8): [True: 10.6k, False: 11.0k]
  ------------------
  370|  10.6k|       (1 == ps_dec->ps_sps[u1_seq_parameter_set_id].u1_is_valid) &&
  ------------------
  |  Branch (370:8): [True: 9.33k, False: 1.32k]
  ------------------
  371|  9.33k|       (ps_dec->ps_sps[u1_seq_parameter_set_id].u1_num_ref_frames != u4_temp))
  ------------------
  |  Branch (371:8): [True: 80, False: 9.25k]
  ------------------
  372|     80|    {
  373|     80|        ps_dec->u1_res_changed = 1;
  374|     80|        return IVD_RES_CHANGED;
  375|     80|    }
  376|  21.6k|    ps_seq->u1_num_ref_frames = u4_temp;
  377|  21.6k|    COPYTHECONTEXT("SPS: num_ref_frames", ps_seq->u1_num_ref_frames);
  378|       |
  379|  21.6k|    ps_seq->u1_gaps_in_frame_num_value_allowed_flag = ih264d_get_bit_h264(ps_bitstrm);
  380|  21.6k|    COPYTHECONTEXT("SPS: gaps_in_frame_num_value_allowed_flag",
  381|  21.6k|                   ps_seq->u1_gaps_in_frame_num_value_allowed_flag);
  382|       |    /* SVC_DEC_REVIEW */
  383|  21.6k|    ps_seq->u1_gaps_in_frame_num_value_allowed_flag = 0;
  384|       |
  385|       |    /*--------------------------------------------------------------------*/
  386|       |    /* Decode FrameWidth and FrameHeight and related values               */
  387|       |    /*--------------------------------------------------------------------*/
  388|  21.6k|    u8_temp = (UWORD64) 1 + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  389|       |    /* Check  for unsupported resolutions*/
  390|  21.6k|    if(u8_temp > (H264_MAX_FRAME_WIDTH >> 4))
  ------------------
  |  |   39|  21.6k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
  |  Branch (390:8): [True: 132, False: 21.5k]
  ------------------
  391|    132|    {
  392|    132|        return IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED;
  393|    132|    }
  394|  21.5k|    u4_pic_width_in_mbs = (UWORD32) u8_temp;
  395|  21.5k|    COPYTHECONTEXT("SPS: pic_width_in_mbs_minus1", u4_pic_width_in_mbs - 1);
  396|       |
  397|  21.5k|    u8_temp = (UWORD64) 1 + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  398|  21.5k|    if(u8_temp > (H264_MAX_FRAME_HEIGHT >> 4))
  ------------------
  |  |   40|  21.5k|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
  |  Branch (398:8): [True: 202, False: 21.3k]
  ------------------
  399|    202|    {
  400|    202|        return IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED;
  401|    202|    }
  402|  21.3k|    u4_pic_height_in_map_units = (UWORD32) u8_temp;
  403|       |
  404|  21.3k|    ps_seq->u2_frm_wd_in_mbs = u4_pic_width_in_mbs;
  405|  21.3k|    ps_seq->u2_frm_ht_in_mbs = u4_pic_height_in_map_units;
  406|       |
  407|  21.3k|    u2_pic_wd = (u4_pic_width_in_mbs << 4);
  408|  21.3k|    u2_pic_ht = (u4_pic_height_in_map_units << 4);
  409|  21.3k|    if(ps_svc_lyr_dec->pic_width < u2_pic_wd)
  ------------------
  |  Branch (409:8): [True: 7.90k, False: 13.4k]
  ------------------
  410|  7.90k|    {
  411|  7.90k|        ps_svc_lyr_dec->pic_width = u2_pic_wd;
  412|  7.90k|    }
  413|  21.3k|    if(ps_svc_lyr_dec->pic_height < u2_pic_ht)
  ------------------
  |  Branch (413:8): [True: 7.94k, False: 13.3k]
  ------------------
  414|  7.94k|    {
  415|  7.94k|        ps_svc_lyr_dec->pic_height = u2_pic_ht;
  416|  7.94k|    }
  417|       |
  418|       |    /*--------------------------------------------------------------------*/
  419|       |    /* Get the value of MaxMbAddress and Number of bits needed for it     */
  420|       |    /*--------------------------------------------------------------------*/
  421|  21.3k|    ps_seq->u4_max_mb_addr = (ps_seq->u2_frm_wd_in_mbs * ps_seq->u2_frm_ht_in_mbs) - 1;
  422|       |
  423|  21.3k|    ps_seq->u4_total_num_of_mbs = ps_seq->u4_max_mb_addr + 1;
  424|       |
  425|  21.3k|    ps_seq->u1_level_idc = ih264d_correct_level_idc(u1_level_idc, ps_seq->u4_total_num_of_mbs);
  426|       |
  427|  21.3k|    u1_frm = ih264d_get_bit_h264(ps_bitstrm);
  428|       |
  429|  21.3k|    if((ps_dec->i4_header_decoded & 1) &&
  ------------------
  |  Branch (429:8): [True: 10.4k, False: 10.8k]
  ------------------
  430|  10.4k|       (1 == ps_dec->ps_sps[u1_seq_parameter_set_id].u1_is_valid) &&
  ------------------
  |  Branch (430:8): [True: 9.25k, False: 1.22k]
  ------------------
  431|  9.25k|       (ps_dec->ps_sps[u1_seq_parameter_set_id].u1_frame_mbs_only_flag != u1_frm))
  ------------------
  |  Branch (431:8): [True: 107, False: 9.14k]
  ------------------
  432|    107|    {
  433|    107|        ps_dec->u1_res_changed = 1;
  434|    107|        return IVD_RES_CHANGED;
  435|    107|    }
  436|  21.2k|    ps_seq->u1_frame_mbs_only_flag = u1_frm;
  437|       |
  438|  21.2k|    COPYTHECONTEXT("SPS: frame_mbs_only_flag", u1_frm);
  439|       |
  440|  21.2k|    if(!u1_frm) u1_mb_aff_flag = ih264d_get_bit_h264(ps_bitstrm);
  ------------------
  |  Branch (440:8): [True: 3.76k, False: 17.4k]
  ------------------
  441|  21.2k|    if((ps_dec->i4_header_decoded & 1) &&
  ------------------
  |  Branch (441:8): [True: 10.3k, False: 10.8k]
  ------------------
  442|  10.3k|       (1 == ps_dec->ps_sps[u1_seq_parameter_set_id].u1_is_valid) &&
  ------------------
  |  Branch (442:8): [True: 9.14k, False: 1.22k]
  ------------------
  443|  9.14k|       (ps_dec->ps_sps[u1_seq_parameter_set_id].u1_mb_aff_flag != u1_mb_aff_flag))
  ------------------
  |  Branch (443:8): [True: 72, False: 9.07k]
  ------------------
  444|     72|    {
  445|     72|        ps_dec->u1_res_changed = 1;
  446|     72|        return IVD_RES_CHANGED;
  447|     72|    }
  448|  21.1k|    if(!u1_frm)
  ------------------
  |  Branch (448:8): [True: 3.69k, False: 17.4k]
  ------------------
  449|  3.69k|    {
  450|  3.69k|        u2_pic_ht <<= 1;
  451|  3.69k|        ps_seq->u1_mb_aff_flag = u1_mb_aff_flag;
  452|  3.69k|        COPYTHECONTEXT("SPS: mb_adaptive_frame_field_flag", ps_seq->u1_mb_aff_flag);
  453|  3.69k|    }
  454|  17.4k|    else
  455|  17.4k|        ps_seq->u1_mb_aff_flag = 0;
  456|       |
  457|  21.1k|    ps_seq->u1_direct_8x8_inference_flag = ih264d_get_bit_h264(ps_bitstrm);
  458|       |
  459|  21.1k|    COPYTHECONTEXT("SPS: direct_8x8_inference_flag", ps_seq->u1_direct_8x8_inference_flag);
  460|       |
  461|       |    /* G050 */
  462|  21.1k|    u1_frame_cropping_flag = ih264d_get_bit_h264(ps_bitstrm);
  463|  21.1k|    COPYTHECONTEXT("SPS: frame_cropping_flag", u1_frame_cropping_flag);
  464|       |
  465|  21.1k|    if(u1_frame_cropping_flag)
  ------------------
  |  Branch (465:8): [True: 5.84k, False: 15.2k]
  ------------------
  466|  5.84k|    {
  467|  5.84k|        u1_frame_cropping_rect_left_ofst = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  468|  5.84k|        COPYTHECONTEXT("SPS: frame_cropping_rect_left_offset", u1_frame_cropping_rect_left_ofst);
  469|  5.84k|        u1_frame_cropping_rect_right_ofst = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  470|  5.84k|        COPYTHECONTEXT("SPS: frame_cropping_rect_right_offset", u1_frame_cropping_rect_right_ofst);
  471|  5.84k|        u1_frame_cropping_rect_top_ofst = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  472|  5.84k|        COPYTHECONTEXT("SPS: frame_cropping_rect_top_offset", u1_frame_cropping_rect_top_ofst);
  473|  5.84k|        u1_frame_cropping_rect_bottom_ofst = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  474|  5.84k|        COPYTHECONTEXT("SPS: frame_cropping_rect_bottom_offset",
  475|  5.84k|                       u1_frame_cropping_rect_bottom_ofst);
  476|  5.84k|    }
  477|       |    /* G050 */
  478|  21.1k|    ps_seq->u1_vui_parameters_present_flag = ih264d_get_bit_h264(ps_bitstrm);
  479|  21.1k|    COPYTHECONTEXT("SPS: vui_parameters_present_flag", ps_seq->u1_vui_parameters_present_flag);
  480|       |
  481|  21.1k|    u2_frm_wd_y = u2_pic_wd + (UWORD8) (PAD_LEN_Y_H << 1);
  ------------------
  |  |  571|  21.1k|#define PAD_LEN_Y_H                   32
  ------------------
  482|  21.1k|    if(1 == ps_dec->u4_share_disp_buf)
  ------------------
  |  Branch (482:8): [True: 0, False: 21.1k]
  ------------------
  483|      0|    {
  484|      0|        if(ps_dec->u4_app_disp_width > u2_frm_wd_y) u2_frm_wd_y = ps_dec->u4_app_disp_width;
  ------------------
  |  Branch (484:12): [True: 0, False: 0]
  ------------------
  485|      0|    }
  486|       |
  487|  21.1k|    u2_frm_ht_y = u2_pic_ht + (UWORD8) (PAD_LEN_Y_V << 2);
  ------------------
  |  |  572|  21.1k|#define PAD_LEN_Y_V                   20
  ------------------
  488|  21.1k|    u2_frm_wd_uv = u2_pic_wd + (UWORD8) (PAD_LEN_UV_H << 2);
  ------------------
  |  |  573|  21.1k|#define PAD_LEN_UV_H                  16
  ------------------
  489|  21.1k|    u2_frm_wd_uv = MAX(u2_frm_wd_uv, u2_frm_wd_y);
  ------------------
  |  |   60|  21.1k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 0, False: 21.1k]
  |  |  ------------------
  ------------------
  490|       |
  491|  21.1k|    u2_frm_ht_uv = (u2_pic_ht >> 1) + (UWORD8) (PAD_LEN_UV_V << 2);
  ------------------
  |  |  574|  21.1k|#define PAD_LEN_UV_V                  8
  ------------------
  492|  21.1k|    u2_frm_ht_uv = MAX(u2_frm_ht_uv, (u2_frm_ht_y >> 1));
  ------------------
  |  |   60|  21.1k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 0, False: 21.1k]
  |  |  ------------------
  ------------------
  493|       |
  494|       |    /* Calculate display picture width, height and start u4_ofst from YUV420 */
  495|       |    /* pictute buffers as per cropping information parsed above             */
  496|  21.1k|    {
  497|  21.1k|        UWORD16 u2_rgt_ofst = 0;
  498|  21.1k|        UWORD16 u2_lft_ofst = 0;
  499|  21.1k|        UWORD16 u2_top_ofst = 0;
  500|  21.1k|        UWORD16 u2_btm_ofst = 0;
  501|  21.1k|        UWORD8 u1_frm_mbs_flag;
  502|  21.1k|        UWORD8 u1_vert_mult_factor;
  503|       |
  504|  21.1k|        if(u1_frame_cropping_flag)
  ------------------
  |  Branch (504:12): [True: 5.84k, False: 15.2k]
  ------------------
  505|  5.84k|        {
  506|       |            /* Calculate right and left u4_ofst for cropped picture           */
  507|  5.84k|            u2_rgt_ofst = u1_frame_cropping_rect_right_ofst << 1;
  508|  5.84k|            u2_lft_ofst = u1_frame_cropping_rect_left_ofst << 1;
  509|       |
  510|       |            /* Know frame MBs only u4_flag                                      */
  511|  5.84k|            u1_frm_mbs_flag = (1 == ps_seq->u1_frame_mbs_only_flag);
  512|       |
  513|       |            /* Simplify the vertical u4_ofst calculation from field/frame     */
  514|  5.84k|            u1_vert_mult_factor = (2 - u1_frm_mbs_flag);
  515|       |
  516|       |            /* Calculate bottom and top u4_ofst for cropped  picture          */
  517|  5.84k|            u2_btm_ofst = (u1_frame_cropping_rect_bottom_ofst << u1_vert_mult_factor);
  518|  5.84k|            u2_top_ofst = (u1_frame_cropping_rect_top_ofst << u1_vert_mult_factor);
  519|  5.84k|        }
  520|       |
  521|       |        /* Calculate u4_ofst from start of YUV 420 picture buffer to start of*/
  522|       |        /* cropped picture buffer                                           */
  523|  21.1k|        u2_crop_offset_y = (u2_frm_wd_y * u2_top_ofst) + (u2_lft_ofst);
  524|  21.1k|        u2_crop_offset_uv =
  525|  21.1k|            (u2_frm_wd_uv * (u2_top_ofst >> 1)) + (u2_lft_ofst >> 1) * YUV420SP_FACTOR;
  ------------------
  |  |  119|  21.1k|#define YUV420SP_FACTOR 2
  ------------------
  526|       |        /* Calculate the display picture width and height based on crop      */
  527|       |        /* information                                                       */
  528|  21.1k|        i4_cropped_ht = (WORD32) u2_pic_ht - (WORD32) (u2_btm_ofst + u2_top_ofst);
  529|  21.1k|        i4_cropped_wd = (WORD32) u2_pic_wd - (WORD32) (u2_rgt_ofst + u2_lft_ofst);
  530|       |
  531|  21.1k|        if((i4_cropped_ht < MB_SIZE) || (i4_cropped_wd < MB_SIZE))
  ------------------
  |  |  554|  21.1k|#define MB_SIZE             16
  ------------------
                      if((i4_cropped_ht < MB_SIZE) || (i4_cropped_wd < MB_SIZE))
  ------------------
  |  |  554|  21.0k|#define MB_SIZE             16
  ------------------
  |  Branch (531:12): [True: 120, False: 21.0k]
  |  Branch (531:41): [True: 97, False: 20.9k]
  ------------------
  532|    217|        {
  533|    217|            return ERROR_INV_SPS_PPS_T;
  534|    217|        }
  535|       |
  536|  20.9k|        if((ps_dec->i4_header_decoded & 1) &&
  ------------------
  |  Branch (536:12): [True: 10.2k, False: 10.6k]
  ------------------
  537|  10.2k|           (1 == ps_dec->ps_sps[u1_seq_parameter_set_id].u1_is_valid) &&
  ------------------
  |  Branch (537:12): [True: 9.07k, False: 1.21k]
  ------------------
  538|  9.07k|           (ps_svc_lyr_dec->ps_subset_sps[u1_seq_parameter_set_id].u2_pic_wd != u2_pic_wd))
  ------------------
  |  Branch (538:12): [True: 59, False: 9.01k]
  ------------------
  539|     59|        {
  540|     59|            ps_dec->u1_res_changed = 1;
  541|     59|            return IVD_RES_CHANGED;
  542|     59|        }
  543|       |
  544|  20.8k|        if((ps_dec->i4_header_decoded & 1) &&
  ------------------
  |  Branch (544:12): [True: 10.2k, False: 10.6k]
  ------------------
  545|  10.2k|           (1 == ps_dec->ps_sps[u1_seq_parameter_set_id].u1_is_valid) &&
  ------------------
  |  Branch (545:12): [True: 9.01k, False: 1.21k]
  ------------------
  546|  9.01k|           (ps_svc_lyr_dec->ps_subset_sps[u1_seq_parameter_set_id].u2_disp_width != i4_cropped_wd))
  ------------------
  |  Branch (546:12): [True: 76, False: 8.94k]
  ------------------
  547|     76|        {
  548|     76|            ps_dec->u1_res_changed = 1;
  549|     76|            return IVD_RES_CHANGED;
  550|     76|        }
  551|       |
  552|  20.7k|        if((ps_dec->i4_header_decoded & 1) &&
  ------------------
  |  Branch (552:12): [True: 10.1k, False: 10.6k]
  ------------------
  553|  10.1k|           (1 == ps_dec->ps_sps[u1_seq_parameter_set_id].u1_is_valid) &&
  ------------------
  |  Branch (553:12): [True: 8.94k, False: 1.21k]
  ------------------
  554|  8.94k|           (ps_svc_lyr_dec->ps_subset_sps[u1_seq_parameter_set_id].u2_pic_ht != u2_pic_ht))
  ------------------
  |  Branch (554:12): [True: 95, False: 8.84k]
  ------------------
  555|     95|        {
  556|     95|            ps_dec->u1_res_changed = 1;
  557|     95|            return IVD_RES_CHANGED;
  558|     95|        }
  559|       |
  560|  20.6k|        if((ps_dec->i4_header_decoded & 1) &&
  ------------------
  |  Branch (560:12): [True: 10.0k, False: 10.6k]
  ------------------
  561|  10.0k|           (1 == ps_dec->ps_sps[u1_seq_parameter_set_id].u1_is_valid) &&
  ------------------
  |  Branch (561:12): [True: 8.84k, False: 1.21k]
  ------------------
  562|  8.84k|           (ps_svc_lyr_dec->ps_subset_sps[u1_seq_parameter_set_id].u2_disp_height != i4_cropped_ht))
  ------------------
  |  Branch (562:12): [True: 67, False: 8.77k]
  ------------------
  563|     67|        {
  564|     67|            ps_dec->u1_res_changed = 1;
  565|     67|            return IVD_RES_CHANGED;
  566|     67|        }
  567|       |        /* Check again for unsupported resolutions with updated values*/
  568|  20.6k|        if((u2_pic_wd > SVCD_MAX_FRAME_WIDTH) || (u2_pic_ht > SVCD_MAX_FRAME_HEIGHT) ||
  ------------------
  |  |   52|  20.6k|#define SVCD_MAX_FRAME_WIDTH 4096
  ------------------
                      if((u2_pic_wd > SVCD_MAX_FRAME_WIDTH) || (u2_pic_ht > SVCD_MAX_FRAME_HEIGHT) ||
  ------------------
  |  |   53|  20.6k|#define SVCD_MAX_FRAME_HEIGHT 4096
  ------------------
  |  Branch (568:12): [True: 0, False: 20.6k]
  |  Branch (568:50): [True: 85, False: 20.5k]
  ------------------
  569|  20.5k|           (u2_pic_wd < SVCD_MIN_FRAME_WIDTH) || (u2_pic_ht < SVCD_MIN_FRAME_HEIGHT) ||
  ------------------
  |  |   56|  20.5k|#define SVCD_MIN_FRAME_WIDTH 32
  ------------------
                         (u2_pic_wd < SVCD_MIN_FRAME_WIDTH) || (u2_pic_ht < SVCD_MIN_FRAME_HEIGHT) ||
  ------------------
  |  |   57|  20.3k|#define SVCD_MIN_FRAME_HEIGHT 32
  ------------------
  |  Branch (569:12): [True: 218, False: 20.3k]
  |  Branch (569:50): [True: 82, False: 20.2k]
  ------------------
  570|  20.2k|           (u2_pic_wd * (UWORD32) u2_pic_ht > SVCD_MAX_FRAME_SIZE))
  ------------------
  |  |   54|  20.2k|#define SVCD_MAX_FRAME_SIZE (4096 * 4096)
  ------------------
  |  Branch (570:12): [True: 0, False: 20.2k]
  ------------------
  571|    385|        {
  572|    385|            return IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED;
  573|    385|        }
  574|       |
  575|       |        /* If MBAff is enabled, decoder support is limited to streams with
  576|       |         * width less than half of H264_MAX_FRAME_WIDTH.
  577|       |         * In case of MBAff decoder processes two rows at a time
  578|       |         */
  579|  20.2k|        if((u2_pic_wd << ps_seq->u1_mb_aff_flag) > H264_MAX_FRAME_WIDTH)
  ------------------
  |  |   39|  20.2k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
  |  Branch (579:12): [True: 140, False: 20.0k]
  ------------------
  580|    140|        {
  581|    140|            return IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED;
  582|    140|        }
  583|  20.2k|    }
  584|       |
  585|  20.0k|    if(1 == ps_seq->u1_vui_parameters_present_flag)
  ------------------
  |  Branch (585:8): [True: 4.41k, False: 15.6k]
  ------------------
  586|  4.41k|    {
  587|  4.41k|        ret = ih264d_parse_vui_parametres(&ps_seq->s_vui, ps_bitstrm);
  588|  4.41k|        if(ret != OK) return ret;
  ------------------
  |  |  114|  4.41k|#define OK        0
  ------------------
  |  Branch (588:12): [True: 322, False: 4.09k]
  ------------------
  589|  4.41k|    }
  590|  19.7k|    ps_seq_svc_ext = &ps_subset_seq->s_sps_svc_ext;
  591|       |
  592|  19.7k|    isvcd_set_default_seq_svc_ext(ps_seq_svc_ext);
  593|       |
  594|  19.7k|    if(SCALABLE_BASELINE_PROFILE_IDC == ps_seq->u1_profile_idc ||
  ------------------
  |  |   59|  19.7k|#define SCALABLE_BASELINE_PROFILE_IDC 83
  ------------------
  |  Branch (594:8): [True: 1.93k, False: 17.8k]
  ------------------
  595|  17.8k|       SCALABLE_HIGH_PROFILE_IDC == ps_seq->u1_profile_idc)
  ------------------
  |  |   60|  17.8k|#define SCALABLE_HIGH_PROFILE_IDC 86
  ------------------
  |  Branch (595:8): [True: 2.84k, False: 14.9k]
  ------------------
  596|  4.78k|    {
  597|  4.78k|        SWITCHONTRACE;
  598|  4.78k|        ps_seq_svc_ext->u1_inter_layer_deblocking_filter_control_present_flag =
  599|  4.78k|            ih264d_get_bit_h264(ps_bitstrm);
  600|  4.78k|        COPYTHECONTEXT("SPS_EXt: u1_inter_layer_deblocking_filter_control_present_flag",
  601|  4.78k|                       ps_seq_svc_ext->u1_inter_layer_deblocking_filter_control_present_flag);
  602|       |
  603|  4.78k|        ps_seq_svc_ext->u1_extended_spatial_scalability_idc = ih264d_get_bits_h264(ps_bitstrm, 2);
  604|  4.78k|        COPYTHECONTEXT("SPS_EXt: u1_extended_spatial_scalability_idc",
  605|  4.78k|                       ps_seq_svc_ext->u1_extended_spatial_scalability_idc);
  606|       |
  607|       |        /* u1_extended_spatial_scalability_idc value 0, 1 and 2 are supported */
  608|  4.78k|        if(ps_seq_svc_ext->u1_extended_spatial_scalability_idc > 2)
  ------------------
  |  Branch (608:12): [True: 174, False: 4.61k]
  ------------------
  609|    174|        {
  610|    174|            return ERROR_SVC_INV_SUBSET_SPS;
  611|    174|        }
  612|       |
  613|       |        /* ChromaArrayType = i4_chroma_format_idc  if  separate_colour_plane_flag =
  614|       |         * 0 for all chroma format except 4:4:4 */
  615|  4.61k|        if(1 == ps_seq->i4_chroma_format_idc || 2 == ps_seq->i4_chroma_format_idc)
  ------------------
  |  Branch (615:12): [True: 4.61k, False: 0]
  |  Branch (615:49): [True: 0, False: 0]
  ------------------
  616|  4.61k|        {
  617|  4.61k|            ps_seq_svc_ext->u1_chroma_phase_x_plus1_flag = ih264d_get_bit_h264(ps_bitstrm);
  618|  4.61k|            COPYTHECONTEXT("SPS_EXt: u1_chroma_phase_x_plus1_flag",
  619|  4.61k|                           ps_seq_svc_ext->u1_chroma_phase_x_plus1_flag);
  620|  4.61k|        }
  621|       |
  622|  4.61k|        if(1 == ps_seq->i4_chroma_format_idc)
  ------------------
  |  Branch (622:12): [True: 4.61k, False: 0]
  ------------------
  623|  4.61k|        {
  624|  4.61k|            ps_seq_svc_ext->u1_chroma_phase_y_plus1 = ih264d_get_bits_h264(ps_bitstrm, 2);
  625|  4.61k|            COPYTHECONTEXT("SPS_EXt: u1_chroma_phase_y_plus1",
  626|  4.61k|                           ps_seq_svc_ext->u1_chroma_phase_y_plus1);
  627|       |
  628|  4.61k|            if(ps_seq_svc_ext->u1_chroma_phase_y_plus1 >= 3)
  ------------------
  |  Branch (628:16): [True: 73, False: 4.53k]
  ------------------
  629|     73|            {
  630|     73|                return ERROR_SVC_INV_SUBSET_SPS;
  631|     73|            }
  632|  4.61k|        }
  633|       |
  634|       |        /* inferred values not covered in isvcd_set_default_seq_svc_ext*/
  635|  4.53k|        ps_seq_svc_ext->u1_seq_ref_layer_chroma_phase_x_plus1_flag =
  636|  4.53k|            ps_seq_svc_ext->u1_chroma_phase_x_plus1_flag;
  637|  4.53k|        ps_seq_svc_ext->u1_seq_ref_layer_chroma_phase_y_plus1 =
  638|  4.53k|            ps_seq_svc_ext->u1_chroma_phase_y_plus1;
  639|       |
  640|  4.53k|        if(1 == ps_seq_svc_ext->u1_extended_spatial_scalability_idc)
  ------------------
  |  Branch (640:12): [True: 1.90k, False: 2.63k]
  ------------------
  641|  1.90k|        {
  642|  1.90k|            if(ps_seq->i4_chroma_format_idc > 0)
  ------------------
  |  Branch (642:16): [True: 1.90k, False: 0]
  ------------------
  643|  1.90k|            {
  644|  1.90k|                ps_seq_svc_ext->u1_seq_ref_layer_chroma_phase_x_plus1_flag =
  645|  1.90k|                    ih264d_get_bit_h264(ps_bitstrm);
  646|  1.90k|                COPYTHECONTEXT("SPS_EXt: u1_seq_ref_layer_chroma_phase_x_plus1_flag",
  647|  1.90k|                               ps_seq_svc_ext->u1_seq_ref_layer_chroma_phase_x_plus1_flag);
  648|       |
  649|  1.90k|                ps_seq_svc_ext->u1_seq_ref_layer_chroma_phase_y_plus1 =
  650|  1.90k|                    ih264d_get_bits_h264(ps_bitstrm, 2);
  651|  1.90k|                COPYTHECONTEXT("SPS_EXt: u1_seq_ref_layer_chroma_phase_y_plus1",
  652|  1.90k|                               ps_seq_svc_ext->u1_seq_ref_layer_chroma_phase_y_plus1);
  653|       |
  654|  1.90k|                if(ps_seq_svc_ext->u1_seq_ref_layer_chroma_phase_y_plus1 >= 3)
  ------------------
  |  Branch (654:20): [True: 69, False: 1.83k]
  ------------------
  655|     69|                {
  656|     69|                    return ERROR_SVC_INV_SUBSET_SPS;
  657|     69|                }
  658|  1.90k|            }
  659|       |
  660|  1.83k|            ps_seq_svc_ext->i4_seq_scaled_ref_layer_left_offset =
  661|  1.83k|                ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  662|  1.83k|            COPYTHECONTEXT("SPS_EXt: i4_seq_scaled_ref_layer_left_offset",
  663|  1.83k|                           ps_seq_svc_ext->i4_seq_scaled_ref_layer_left_offset);
  664|       |
  665|  1.83k|            if(ps_seq_svc_ext->i4_seq_scaled_ref_layer_left_offset != 0)
  ------------------
  |  Branch (665:16): [True: 147, False: 1.68k]
  ------------------
  666|    147|            {
  667|    147|                return ERROR_SVC_INV_SUBSET_SPS;
  668|    147|            }
  669|       |
  670|  1.68k|            if(ps_seq_svc_ext->i4_seq_scaled_ref_layer_left_offset >= MAX_SCLD_REF_LAYER_OFFSET ||
  ------------------
  |  |   71|  3.36k|#define MAX_SCLD_REF_LAYER_OFFSET 32768
  ------------------
  |  Branch (670:16): [True: 0, False: 1.68k]
  ------------------
  671|  1.68k|               ps_seq_svc_ext->i4_seq_scaled_ref_layer_left_offset < MIN_SCLD_REF_LAYER_OFFSET)
  ------------------
  |  |   72|  1.68k|#define MIN_SCLD_REF_LAYER_OFFSET -32768
  ------------------
  |  Branch (671:16): [True: 0, False: 1.68k]
  ------------------
  672|      0|            {
  673|      0|                return ERROR_SVC_INV_SUBSET_SPS;
  674|      0|            }
  675|       |
  676|  1.68k|            ps_seq_svc_ext->i4_seq_scaled_ref_layer_top_offset =
  677|  1.68k|                ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  678|  1.68k|            COPYTHECONTEXT("SPS_EXt: i4_seq_scaled_ref_layer_top_offset",
  679|  1.68k|                           ps_seq_svc_ext->i4_seq_scaled_ref_layer_top_offset);
  680|       |
  681|  1.68k|            if(ps_seq_svc_ext->i4_seq_scaled_ref_layer_top_offset != 0)
  ------------------
  |  Branch (681:16): [True: 100, False: 1.58k]
  ------------------
  682|    100|            {
  683|    100|                return ERROR_SVC_INV_SUBSET_SPS;
  684|    100|            }
  685|       |
  686|  1.58k|            if(ps_seq_svc_ext->i4_seq_scaled_ref_layer_top_offset >= MAX_SCLD_REF_LAYER_OFFSET ||
  ------------------
  |  |   71|  3.16k|#define MAX_SCLD_REF_LAYER_OFFSET 32768
  ------------------
  |  Branch (686:16): [True: 0, False: 1.58k]
  ------------------
  687|  1.58k|               ps_seq_svc_ext->i4_seq_scaled_ref_layer_top_offset < MIN_SCLD_REF_LAYER_OFFSET)
  ------------------
  |  |   72|  1.58k|#define MIN_SCLD_REF_LAYER_OFFSET -32768
  ------------------
  |  Branch (687:16): [True: 0, False: 1.58k]
  ------------------
  688|      0|            {
  689|      0|                return ERROR_SVC_INV_SUBSET_SPS;
  690|      0|            }
  691|       |
  692|  1.58k|            ps_seq_svc_ext->i4_seq_scaled_ref_layer_right_offset =
  693|  1.58k|                ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  694|  1.58k|            COPYTHECONTEXT("SPS_EXt: i4_seq_scaled_ref_layer_right_offset",
  695|  1.58k|                           ps_seq_svc_ext->i4_seq_scaled_ref_layer_right_offset);
  696|       |
  697|  1.58k|            if(ps_seq_svc_ext->i4_seq_scaled_ref_layer_right_offset >= MAX_SCLD_REF_LAYER_OFFSET ||
  ------------------
  |  |   71|  3.16k|#define MAX_SCLD_REF_LAYER_OFFSET 32768
  ------------------
  |  Branch (697:16): [True: 79, False: 1.50k]
  ------------------
  698|  1.50k|               ps_seq_svc_ext->i4_seq_scaled_ref_layer_right_offset < MIN_SCLD_REF_LAYER_OFFSET)
  ------------------
  |  |   72|  1.50k|#define MIN_SCLD_REF_LAYER_OFFSET -32768
  ------------------
  |  Branch (698:16): [True: 96, False: 1.40k]
  ------------------
  699|    175|            {
  700|    175|                return ERROR_SVC_INV_SUBSET_SPS;
  701|    175|            }
  702|       |
  703|  1.40k|            ps_seq_svc_ext->i4_seq_scaled_ref_layer_bottom_offset =
  704|  1.40k|                ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  705|  1.40k|            COPYTHECONTEXT("SPS_EXt: i4_seq_scaled_ref_layer_bottom_offset",
  706|  1.40k|                           ps_seq_svc_ext->i4_seq_scaled_ref_layer_bottom_offset);
  707|       |
  708|  1.40k|            if(ps_seq_svc_ext->i4_seq_scaled_ref_layer_bottom_offset >= MAX_SCLD_REF_LAYER_OFFSET ||
  ------------------
  |  |   71|  2.81k|#define MAX_SCLD_REF_LAYER_OFFSET 32768
  ------------------
  |  Branch (708:16): [True: 79, False: 1.33k]
  ------------------
  709|  1.33k|               ps_seq_svc_ext->i4_seq_scaled_ref_layer_bottom_offset < MIN_SCLD_REF_LAYER_OFFSET)
  ------------------
  |  |   72|  1.33k|#define MIN_SCLD_REF_LAYER_OFFSET -32768
  ------------------
  |  Branch (709:16): [True: 90, False: 1.24k]
  ------------------
  710|    169|            {
  711|    169|                return ERROR_INV_SLICE_HDR_T;
  712|    169|            }
  713|  1.40k|        }
  714|       |
  715|  3.87k|        ps_seq_svc_ext->u1_seq_tcoeff_level_prediction_flag = ih264d_get_bit_h264(ps_bitstrm);
  716|  3.87k|        COPYTHECONTEXT("SPS_EXt: u1_seq_tcoeff_level_prediction_flag",
  717|  3.87k|                       ps_seq_svc_ext->u1_seq_tcoeff_level_prediction_flag);
  718|       |
  719|  3.87k|        if(1 == ps_seq_svc_ext->u1_seq_tcoeff_level_prediction_flag)
  ------------------
  |  Branch (719:12): [True: 2.35k, False: 1.52k]
  ------------------
  720|  2.35k|        {
  721|  2.35k|            ps_seq_svc_ext->u1_adaptive_tcoeff_level_prediction_flag =
  722|  2.35k|                ih264d_get_bit_h264(ps_bitstrm);
  723|  2.35k|            COPYTHECONTEXT("SPS_EXt: u1_adaptive_tcoeff_level_prediction_flag",
  724|  2.35k|                           ps_seq_svc_ext->u1_adaptive_tcoeff_level_prediction_flag);
  725|  2.35k|        }
  726|       |
  727|  3.87k|        ps_seq_svc_ext->u1_slice_header_restriction_flag = ih264d_get_bit_h264(ps_bitstrm);
  728|  3.87k|        COPYTHECONTEXT("SPS_EXt: u1_slice_header_restriction_flag",
  729|  3.87k|                       ps_seq_svc_ext->u1_slice_header_restriction_flag);
  730|       |
  731|  3.87k|        ps_seq_svc_ext->u1_svc_vui_parameters_present_flag = ih264d_get_bit_h264(ps_bitstrm);
  732|  3.87k|        COPYTHECONTEXT("SPS_EXt: u1_svc_vui_parameters_present_flag",
  733|  3.87k|                       ps_seq_svc_ext->u1_svc_vui_parameters_present_flag);
  734|       |
  735|  3.87k|        if(1 == ps_seq_svc_ext->u1_svc_vui_parameters_present_flag)
  ------------------
  |  Branch (735:12): [True: 1.17k, False: 2.70k]
  ------------------
  736|  1.17k|        {
  737|  1.17k|            if(NULL ==
  ------------------
  |  Branch (737:16): [True: 391, False: 785]
  ------------------
  738|  1.17k|               ps_svc_lyr_dec->ps_subset_sps[u1_seq_parameter_set_id].s_sps_svc_ext.ps_svc_vui_ext)
  739|    391|            {
  740|    391|                void *pv_buf;
  741|    391|                UWORD32 size;
  742|       |                /* Memory allocation only if VUI is enabled in a particular subset SPS*/
  743|    391|                size = sizeof(svc_vui_ext_t);
  744|    391|                pv_buf = ps_dec->pf_aligned_alloc(ps_dec->pv_mem_ctxt, 128, size);
  745|    391|                RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|    391|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 391]
  |  |  ------------------
  ------------------
  746|    391|                memset(pv_buf, 0, size);
  747|    391|                ps_seq_svc_ext->ps_svc_vui_ext = pv_buf;
  748|    391|                ps_svc_lyr_dec->ps_subset_sps[u1_seq_parameter_set_id]
  749|    391|                    .s_sps_svc_ext.ps_svc_vui_ext = pv_buf;
  750|    391|            }
  751|    785|            else
  752|    785|            {
  753|    785|                ps_seq_svc_ext->ps_svc_vui_ext =
  754|    785|                    ps_svc_lyr_dec->ps_subset_sps[u1_seq_parameter_set_id]
  755|    785|                        .s_sps_svc_ext.ps_svc_vui_ext;
  756|    785|            }
  757|  1.17k|            ret = isvcd_parse_vui_ext_parametres(ps_seq_svc_ext->ps_svc_vui_ext, ps_bitstrm);
  758|  1.17k|            if(ret != OK) return ret;
  ------------------
  |  |  114|  1.17k|#define OK        0
  ------------------
  |  Branch (758:16): [True: 246, False: 930]
  ------------------
  759|  1.17k|        }
  760|  3.87k|    }
  761|       |    /* Add conditions for SCALABLE BASELINE PROFILE */
  762|  18.6k|    if(SCALABLE_BASELINE_PROFILE_IDC == ps_seq->u1_profile_idc ||
  ------------------
  |  |   59|  18.6k|#define SCALABLE_BASELINE_PROFILE_IDC 83
  ------------------
  |  Branch (762:8): [True: 1.69k, False: 16.9k]
  ------------------
  763|  16.9k|       ((SCALABLE_HIGH_PROFILE_IDC == ps_seq->u1_profile_idc) && (1 == uc_constraint_set0_flag)))
  ------------------
  |  |   60|  16.9k|#define SCALABLE_HIGH_PROFILE_IDC 86
  ------------------
  |  Branch (763:9): [True: 1.94k, False: 14.9k]
  |  Branch (763:66): [True: 373, False: 1.56k]
  ------------------
  764|  2.06k|    {
  765|  2.06k|        if(ps_seq->i4_chroma_format_idc != 1)
  ------------------
  |  Branch (765:12): [True: 0, False: 2.06k]
  ------------------
  766|      0|        {
  767|      0|            return ERROR_FEATURE_UNAVAIL;
  768|      0|        }
  769|       |
  770|  2.06k|        if(ps_seq->i4_bit_depth_luma_minus8 != 0)
  ------------------
  |  Branch (770:12): [True: 0, False: 2.06k]
  ------------------
  771|      0|        {
  772|      0|            return ERROR_FEATURE_UNAVAIL;
  773|      0|        }
  774|       |
  775|  2.06k|        if(ps_seq->i4_bit_depth_chroma_minus8 != 0)
  ------------------
  |  Branch (775:12): [True: 0, False: 2.06k]
  ------------------
  776|      0|        {
  777|      0|            return ERROR_FEATURE_UNAVAIL;
  778|      0|        }
  779|       |
  780|  2.06k|        if(ps_seq->i4_qpprime_y_zero_transform_bypass_flag != 0)
  ------------------
  |  Branch (780:12): [True: 0, False: 2.06k]
  ------------------
  781|      0|        {
  782|      0|            return ERROR_FEATURE_UNAVAIL;
  783|      0|        }
  784|       |
  785|  2.06k|        if(ps_seq->u1_frame_mbs_only_flag != 1)
  ------------------
  |  Branch (785:12): [True: 271, False: 1.79k]
  ------------------
  786|    271|        {
  787|    271|            return ERROR_FEATURE_UNAVAIL;
  788|    271|        }
  789|       |
  790|  1.79k|        if((0 != ps_seq_svc_ext->i4_seq_scaled_ref_layer_left_offset % 16) &&
  ------------------
  |  Branch (790:12): [True: 0, False: 1.79k]
  ------------------
  791|      0|           (0 != ps_seq_svc_ext->i4_seq_scaled_ref_layer_top_offset % 16))
  ------------------
  |  Branch (791:12): [True: 0, False: 0]
  ------------------
  792|      0|        {
  793|      0|            return ERROR_FEATURE_UNAVAIL;
  794|      0|        }
  795|  1.79k|    }
  796|       |    /* Compare older num_reorder_frames with the new one if header is already
  797|       |     * decoded */
  798|  18.3k|    if((ps_dec->i4_header_decoded & 1) &&
  ------------------
  |  Branch (798:8): [True: 9.92k, False: 8.42k]
  ------------------
  799|  9.92k|       (1 == ps_dec->ps_sps[u1_seq_parameter_set_id].u1_is_valid) &&
  ------------------
  |  Branch (799:8): [True: 8.77k, False: 1.14k]
  ------------------
  800|  8.77k|       (1 == ps_dec->ps_sps[u1_seq_parameter_set_id].u1_vui_parameters_present_flag) &&
  ------------------
  |  Branch (800:8): [True: 1.56k, False: 7.21k]
  ------------------
  801|  1.56k|       (1 == ps_dec->ps_sps[u1_seq_parameter_set_id].s_vui.u1_bitstream_restriction_flag))
  ------------------
  |  Branch (801:8): [True: 78, False: 1.48k]
  ------------------
  802|     78|    {
  803|     78|        ps_dec->u1_res_changed = 1;
  804|     78|        return IVD_RES_CHANGED;
  805|     78|    }
  806|       |    /* In case bitstream read has exceeded the filled size, then return an error */
  807|  18.2k|    if(EXCEED_OFFSET(ps_bitstrm))
  ------------------
  |  |   93|  18.2k|  (ps_bitstrm->u4_ofst > ps_bitstrm->u4_max_ofst)
  |  |  ------------------
  |  |  |  Branch (93:3): [True: 609, False: 17.6k]
  |  |  ------------------
  ------------------
  808|    609|    {
  809|    609|        return ERROR_INV_SPS_PPS_T;
  810|    609|    }
  811|       |
  812|       |    /*--------------------------------------------------------------------*/
  813|       |    /* All initializations to ps_dec are beyond this point                */
  814|       |    /*--------------------------------------------------------------------*/
  815|  17.6k|    {
  816|  17.6k|        WORD32 reorder_depth = ih264d_get_dpb_size(ps_seq);
  817|  17.6k|        if((1 == ps_seq->u1_vui_parameters_present_flag) &&
  ------------------
  |  Branch (817:12): [True: 3.40k, False: 14.2k]
  ------------------
  818|  3.40k|           (1 == ps_seq->s_vui.u1_bitstream_restriction_flag))
  ------------------
  |  Branch (818:12): [True: 250, False: 3.15k]
  ------------------
  819|    250|        {
  820|    250|            reorder_depth = ps_seq->s_vui.u4_num_reorder_frames + 1;
  821|    250|        }
  822|       |
  823|  17.6k|        if(reorder_depth > H264_MAX_REF_PICS)
  ------------------
  |  |  534|  17.6k|#define H264_MAX_REF_PICS         16
  ------------------
  |  Branch (823:12): [True: 66, False: 17.5k]
  ------------------
  824|     66|        {
  825|     66|            return ERROR_INV_SPS_PPS_T;
  826|     66|        }
  827|       |
  828|  17.5k|        if(ps_seq->u1_frame_mbs_only_flag != 1) reorder_depth *= 2;
  ------------------
  |  Branch (828:12): [True: 1.34k, False: 16.2k]
  ------------------
  829|  17.5k|        ps_subset_seq->i4_reorder_depth = reorder_depth + DISPLAY_LATENCY;
  ------------------
  |  |   51|  17.5k|#define DISPLAY_LATENCY         2
  ------------------
  830|  17.5k|    }
  831|      0|    ps_subset_seq->u2_disp_height = i4_cropped_ht;
  832|  17.5k|    ps_subset_seq->u2_disp_width = i4_cropped_wd;
  833|  17.5k|    ps_subset_seq->u2_pic_wd = u2_pic_wd;
  834|  17.5k|    ps_subset_seq->u2_pic_ht = u2_pic_ht;
  835|       |
  836|       |    /* Assuming 8k is the maximum resolution svc dec supports*/
  837|  17.5k|    if(u2_frm_wd_y > H264_MAX_FRAME_WIDTH) return (NOT_OK);
  ------------------
  |  |   39|  17.5k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
                  if(u2_frm_wd_y > H264_MAX_FRAME_WIDTH) return (NOT_OK);
  ------------------
  |  |  116|    180|#define NOT_OK    -1
  ------------------
  |  Branch (837:8): [True: 180, False: 17.4k]
  ------------------
  838|  17.4k|    if(u2_frm_ht_y > H264_MAX_FRAME_HEIGHT) return (NOT_OK);
  ------------------
  |  |   40|  17.4k|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
                  if(u2_frm_ht_y > H264_MAX_FRAME_HEIGHT) return (NOT_OK);
  ------------------
  |  |  116|    111|#define NOT_OK    -1
  ------------------
  |  Branch (838:8): [True: 111, False: 17.3k]
  ------------------
  839|  17.3k|    if(u2_frm_wd_uv > H264_MAX_FRAME_WIDTH) return (NOT_OK);
  ------------------
  |  |   39|  17.3k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
                  if(u2_frm_wd_uv > H264_MAX_FRAME_WIDTH) return (NOT_OK);
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  |  Branch (839:8): [True: 0, False: 17.3k]
  ------------------
  840|  17.3k|    if(u2_frm_ht_uv > H264_MAX_FRAME_HEIGHT) return (NOT_OK);
  ------------------
  |  |   40|  17.3k|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
                  if(u2_frm_ht_uv > H264_MAX_FRAME_HEIGHT) return (NOT_OK);
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  |  Branch (840:8): [True: 0, False: 17.3k]
  ------------------
  841|       |
  842|       |    /* Determining the Width and Height of Frame from that of Picture */
  843|  17.3k|    ps_subset_seq->u2_frm_wd_y = u2_frm_wd_y;
  844|  17.3k|    ps_subset_seq->u2_frm_ht_y = u2_frm_ht_y;
  845|  17.3k|    ps_subset_seq->u2_frm_wd_uv = u2_frm_wd_uv;
  846|  17.3k|    ps_subset_seq->u2_frm_ht_uv = u2_frm_ht_uv;
  847|       |
  848|  17.3k|    ps_subset_seq->u1_pad_len_y_v = (UWORD8) (PAD_LEN_Y_V << (1 - u1_frm));
  ------------------
  |  |  572|  17.3k|#define PAD_LEN_Y_V                   20
  ------------------
  849|  17.3k|    ps_subset_seq->u1_pad_len_cr_v = (UWORD8) (PAD_LEN_UV_V << (1 - u1_frm));
  ------------------
  |  |  574|  17.3k|#define PAD_LEN_UV_V                  8
  ------------------
  850|       |
  851|  17.3k|    ps_subset_seq->u2_crop_offset_y = u2_crop_offset_y;
  852|  17.3k|    ps_subset_seq->u2_crop_offset_uv = u2_crop_offset_uv;
  853|       |
  854|  17.3k|    ps_seq->u1_is_valid = TRUE;
  ------------------
  |  |  591|  17.3k|#define TRUE    1
  ------------------
  855|  17.3k|    ps_dec->ps_sps[u1_seq_parameter_set_id] = *ps_seq;
  856|  17.3k|    if(NULL != ps_svc_lyr_dec->ps_subset_sps[u1_seq_parameter_set_id].s_sps_svc_ext.ps_svc_vui_ext)
  ------------------
  |  Branch (856:8): [True: 733, False: 16.5k]
  ------------------
  857|    733|    {
  858|    733|        ps_seq_svc_ext->ps_svc_vui_ext =
  859|    733|            ps_svc_lyr_dec->ps_subset_sps[u1_seq_parameter_set_id].s_sps_svc_ext.ps_svc_vui_ext;
  860|    733|    }
  861|  17.3k|    ps_svc_lyr_dec->ps_subset_sps[u1_seq_parameter_set_id] = *ps_subset_seq;
  862|  17.3k|    ps_svc_lyr_dec->ps_cur_subset_sps = &ps_svc_lyr_dec->ps_subset_sps[u1_seq_parameter_set_id];
  863|       |
  864|  17.3k|    return OK;
  ------------------
  |  |  114|  17.3k|#define OK        0
  ------------------
  865|  17.3k|}
isvcd_parse_nal_unit:
  943|   172k|{
  944|   172k|    dec_bit_stream_t *ps_bitstrm;
  945|       |
  946|   172k|    dec_struct_t *ps_dec;
  947|   172k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec;
  948|   172k|    UWORD8 u1_nal_unit_type;
  949|   172k|    WORD32 i_status = OK;
  ------------------
  |  |  114|   172k|#define OK        0
  ------------------
  950|       |
  951|   172k|    ps_svc_lyr_dec = (svc_dec_lyr_struct_t *) dec_svc_hdl;
  952|   172k|    ps_dec = &ps_svc_lyr_dec->s_dec;
  953|       |
  954|   172k|    {
  955|   172k|        SWITCHOFFTRACE;
  956|   172k|        u1_nal_unit_type = ps_dec->u1_nal_unit_type;
  957|       |
  958|   172k|        ps_bitstrm = ps_dec->ps_bitstrm;
  959|       |
  960|       |        // Skip all NALUs if SPS and PPS are not decoded
  961|   172k|        switch(u1_nal_unit_type)
  962|   172k|        {
  963|      0|            case SLICE_DATA_PARTITION_A_NAL:
  ------------------
  |  |  325|      0|#define SLICE_DATA_PARTITION_A_NAL      2
  ------------------
  |  Branch (963:13): [True: 0, False: 172k]
  ------------------
  964|      0|            case SLICE_DATA_PARTITION_B_NAL:
  ------------------
  |  |  326|      0|#define SLICE_DATA_PARTITION_B_NAL      3
  ------------------
  |  Branch (964:13): [True: 0, False: 172k]
  ------------------
  965|      0|            case SLICE_DATA_PARTITION_C_NAL:
  ------------------
  |  |  327|      0|#define SLICE_DATA_PARTITION_C_NAL      4
  ------------------
  |  Branch (965:13): [True: 0, False: 172k]
  ------------------
  966|      0|                if(!ps_dec->i4_decode_header) ih264d_parse_slice_partition(ps_dec, ps_bitstrm);
  ------------------
  |  Branch (966:20): [True: 0, False: 0]
  ------------------
  967|      0|                break;
  968|       |
  969|   107k|            case IDR_SLICE_NAL:
  ------------------
  |  |  328|   107k|#define IDR_SLICE_NAL                   5
  ------------------
  |  Branch (969:13): [True: 107k, False: 65.8k]
  ------------------
  970|   131k|            case SLICE_NAL:
  ------------------
  |  |  324|   131k|#define SLICE_NAL                       1
  ------------------
  |  Branch (970:13): [True: 23.9k, False: 149k]
  ------------------
  971|       |
  972|   131k|                if(ps_svc_lyr_dec->u1_base_res_flag != 1)
  ------------------
  |  Branch (972:20): [True: 66, False: 130k]
  ------------------
  973|     66|                {
  974|     66|                    return NOT_OK;
  ------------------
  |  |  116|     66|#define NOT_OK    -1
  ------------------
  975|     66|                }
  976|   130k|                if(!ps_dec->i4_decode_header)
  ------------------
  |  Branch (976:20): [True: 130k, False: 0]
  ------------------
  977|   130k|                {
  978|   130k|                    if(ps_dec->i4_header_decoded == 3)
  ------------------
  |  Branch (978:24): [True: 130k, False: 0]
  ------------------
  979|   130k|                    {
  980|       |                        /* ! */
  981|   130k|                        DEBUG_THREADS_PRINTF("Decoding  a slice NAL\n");
  982|   130k|                        {
  983|   130k|                            ih264d_get_pre_sei_params(ps_dec, u1_nal_unit_type);
  984|       |                            /* ! */
  985|   130k|                            ps_dec->u4_slice_start_code_found = 1;
  986|       |
  987|   130k|                            i_status = isvcd_parse_decode_slice(
  988|   130k|                                (UWORD8) (u1_nal_unit_type == IDR_SLICE_NAL), u1_nal_ref_idc,
  ------------------
  |  |  328|   130k|#define IDR_SLICE_NAL                   5
  ------------------
  989|   130k|                                ps_svc_lyr_dec);
  990|       |
  991|   130k|                            if(i_status != OK)
  ------------------
  |  |  114|   130k|#define OK        0
  ------------------
  |  Branch (991:32): [True: 77.1k, False: 53.8k]
  ------------------
  992|  77.1k|                            {
  993|  77.1k|                                return i_status;
  994|  77.1k|                            }
  995|   130k|                        }
  996|   130k|                    }
  997|   130k|                }
  998|  53.8k|                break;
  999|       |
 1000|  53.8k|            case SEI_NAL:
  ------------------
  |  |  329|      0|#define SEI_NAL                         6
  ------------------
  |  Branch (1000:13): [True: 0, False: 172k]
  ------------------
 1001|      0|            case PREFIX_UNIT_NAL:
  ------------------
  |  |   64|      0|#define PREFIX_UNIT_NAL 14
  ------------------
  |  Branch (1001:13): [True: 0, False: 172k]
  ------------------
 1002|      0|            case SEQ_PARAM_NAL:
  ------------------
  |  |  330|      0|#define SEQ_PARAM_NAL                   7
  ------------------
  |  Branch (1002:13): [True: 0, False: 172k]
  ------------------
 1003|      0|            case PIC_PARAM_NAL:
  ------------------
  |  |  331|      0|#define PIC_PARAM_NAL                   8
  ------------------
  |  Branch (1003:13): [True: 0, False: 172k]
  ------------------
 1004|      0|            case SUBSET_SPS_NAL:
  ------------------
  |  |   65|      0|#define SUBSET_SPS_NAL 15
  ------------------
  |  Branch (1004:13): [True: 0, False: 172k]
  ------------------
 1005|      0|                H264_DEC_DEBUG_PRINT("\nUnknown NAL type %d\n", u1_nal_unit_type);
  ------------------
  |  |   39|      0|#define H264_DEC_DEBUG_PRINT(...) {}
  ------------------
 1006|      0|                break;
 1007|       |
 1008|      0|            case ACCESS_UNIT_DELIMITER_RBSP:
  ------------------
  |  |  332|      0|#define ACCESS_UNIT_DELIMITER_RBSP      9
  ------------------
  |  Branch (1008:13): [True: 0, False: 172k]
  ------------------
 1009|      0|                if(!ps_dec->i4_decode_header)
  ------------------
  |  Branch (1009:20): [True: 0, False: 0]
  ------------------
 1010|      0|                {
 1011|      0|                    ih264d_access_unit_delimiter_rbsp(ps_dec);
 1012|      0|                }
 1013|      0|                break;
 1014|       |                // ignore the END_OF_SEQ_RBSP NAL and decode even after this NAL
 1015|      0|            case END_OF_STREAM_RBSP:
  ------------------
  |  |  334|      0|#define END_OF_STREAM_RBSP              11
  ------------------
  |  Branch (1015:13): [True: 0, False: 172k]
  ------------------
 1016|      0|                if(!ps_dec->i4_decode_header)
  ------------------
  |  Branch (1016:20): [True: 0, False: 0]
  ------------------
 1017|      0|                {
 1018|      0|                    ih264d_parse_end_of_stream(ps_dec);
 1019|      0|                }
 1020|      0|                break;
 1021|      0|            case FILLER_DATA_NAL:
  ------------------
  |  |  335|      0|#define FILLER_DATA_NAL                 12
  ------------------
  |  Branch (1021:13): [True: 0, False: 172k]
  ------------------
 1022|      0|                if(!ps_dec->i4_decode_header)
  ------------------
  |  Branch (1022:20): [True: 0, False: 0]
  ------------------
 1023|      0|                {
 1024|      0|                    ih264d_parse_filler_data(ps_dec, ps_bitstrm);
 1025|      0|                }
 1026|      0|                break;
 1027|  41.9k|            case CODED_SLICE_EXTENSION_NAL:
  ------------------
  |  |   66|  41.9k|#define CODED_SLICE_EXTENSION_NAL 20
  ------------------
  |  Branch (1027:13): [True: 41.9k, False: 131k]
  ------------------
 1028|       |
 1029|  41.9k|                if(ps_svc_lyr_dec->u1_base_res_flag == 1)
  ------------------
  |  Branch (1029:20): [True: 3.28k, False: 38.6k]
  ------------------
 1030|  3.28k|                {
 1031|  3.28k|                    return NOT_OK;
  ------------------
  |  |  116|  3.28k|#define NOT_OK    -1
  ------------------
 1032|  3.28k|                }
 1033|  38.6k|                if(!ps_dec->i4_decode_header)
  ------------------
  |  Branch (1033:20): [True: 38.6k, False: 0]
  ------------------
 1034|  38.6k|                {
 1035|  38.6k|                    if(ps_dec->i4_header_decoded == 3)
  ------------------
  |  Branch (1035:24): [True: 38.6k, False: 0]
  ------------------
 1036|  38.6k|                    {
 1037|       |                        /* ! */
 1038|  38.6k|                        DEBUG_THREADS_PRINTF("Decoding  an SVC slice NAL\n");
 1039|  38.6k|                        {
 1040|  38.6k|                            {
 1041|  38.6k|                                ih264d_get_pre_sei_params(ps_dec, u1_nal_unit_type);
 1042|       |                                /* ! */
 1043|  38.6k|                                ps_dec->u4_slice_start_code_found = 1;
 1044|       |
 1045|  38.6k|                                i_status = isvcd_parse_decode_slice_ext_nal(
 1046|  38.6k|                                    (UWORD8) (ps_svc_lyr_dec->ps_nal_svc_ext->u1_idr_flag),
 1047|  38.6k|                                    u1_nal_ref_idc, ps_svc_lyr_dec);
 1048|       |
 1049|  38.6k|                                if(i_status != OK)
  ------------------
  |  |  114|  38.6k|#define OK        0
  ------------------
  |  Branch (1049:36): [True: 26.8k, False: 11.8k]
  ------------------
 1050|  26.8k|                                {
 1051|  26.8k|                                    return i_status;
 1052|  26.8k|                                }
 1053|  38.6k|                            }
 1054|  38.6k|                        }
 1055|  38.6k|                    }
 1056|  38.6k|                }
 1057|  11.8k|                break;
 1058|       |
 1059|  11.8k|            default:
  ------------------
  |  Branch (1059:13): [True: 0, False: 172k]
  ------------------
 1060|      0|                H264_DEC_DEBUG_PRINT("\nUnknown NAL type %d\n", u1_nal_unit_type);
  ------------------
  |  |   39|      0|#define H264_DEC_DEBUG_PRINT(...) {}
  ------------------
 1061|      0|                break;
 1062|   172k|        }
 1063|   172k|    }
 1064|  65.6k|    return i_status;
 1065|   172k|}
isvcd_parse_sps:
 1079|  92.4k|{
 1080|  92.4k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
 1081|  92.4k|    UWORD8 i;
 1082|  92.4k|    dec_seq_params_t *ps_seq = NULL;
 1083|  92.4k|    dec_svc_seq_params_t *ps_subset_seq = NULL;
 1084|  92.4k|    UWORD8 u1_profile_idc, u1_level_idc, u1_seq_parameter_set_id, u1_mb_aff_flag = 0;
 1085|  92.4k|    UWORD16 i2_max_frm_num;
 1086|  92.4k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
 1087|  92.4k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
 1088|  92.4k|    UWORD8 u1_frm, uc_constraint_set0_flag, uc_constraint_set1_flag, uc_constraint_set2_flag;
 1089|  92.4k|    WORD32 i4_cropped_ht, i4_cropped_wd;
 1090|  92.4k|    UWORD32 u4_temp;
 1091|  92.4k|    UWORD64 u8_temp;
 1092|  92.4k|    UWORD32 u4_pic_height_in_map_units, u4_pic_width_in_mbs;
 1093|  92.4k|    UWORD32 u2_pic_wd = 0;
 1094|  92.4k|    UWORD32 u2_pic_ht = 0;
 1095|  92.4k|    UWORD32 u2_frm_wd_y = 0;
 1096|  92.4k|    UWORD32 u2_frm_ht_y = 0;
 1097|  92.4k|    UWORD32 u2_frm_wd_uv = 0;
 1098|  92.4k|    UWORD32 u2_frm_ht_uv = 0;
 1099|  92.4k|    UWORD32 u2_crop_offset_y = 0;
 1100|  92.4k|    UWORD32 u2_crop_offset_uv = 0;
 1101|  92.4k|    WORD32 ret;
 1102|  92.4k|    WORD32 num_reorder_frames;
 1103|       |    /* High profile related syntax element */
 1104|  92.4k|    WORD32 i4_i;
 1105|       |    /* G050 */
 1106|  92.4k|    UWORD8 u1_frame_cropping_flag,
 1107|  92.4k|        u1_frame_cropping_rect_left_ofst = 0, u1_frame_cropping_rect_right_ofst = 0,
 1108|  92.4k|        u1_frame_cropping_rect_top_ofst = 0, u1_frame_cropping_rect_bottom_ofst = 0;
 1109|       |    /* G050 */
 1110|       |    /*--------------------------------------------------------------------*/
 1111|       |    /* Decode seq_parameter_set_id and profile and level values           */
 1112|       |    /*--------------------------------------------------------------------*/
 1113|  92.4k|    SWITCHONTRACE;
 1114|  92.4k|    u1_profile_idc = ih264d_get_bits_h264(ps_bitstrm, 8);
 1115|  92.4k|    COPYTHECONTEXT("SPS: profile_idc", u1_profile_idc);
 1116|       |
 1117|       |    /* G050 */
 1118|  92.4k|    uc_constraint_set0_flag = ih264d_get_bit_h264(ps_bitstrm);
 1119|  92.4k|    uc_constraint_set1_flag = ih264d_get_bit_h264(ps_bitstrm);
 1120|  92.4k|    uc_constraint_set2_flag = ih264d_get_bit_h264(ps_bitstrm);
 1121|  92.4k|    UNUSED(uc_constraint_set2_flag);
  ------------------
  |  |   45|  92.4k|#define UNUSED(x) ((void)(x))
  ------------------
 1122|       |    /*****************************************************/
 1123|       |    /* Read 5 bits for uc_constraint_set3_flag (1 bit)   */
 1124|       |    /* and reserved_zero_4bits (4 bits) - Sushant        */
 1125|       |    /*****************************************************/
 1126|  92.4k|    ih264d_get_bits_h264(ps_bitstrm, 5);
 1127|       |    /* G050 */
 1128|       |    /* Check whether particular profile is suported or not */
 1129|       |    /* Check whether particular profile is suported or not */
 1130|  92.4k|    if((u1_profile_idc != MAIN_PROFILE_IDC) && (u1_profile_idc != BASE_PROFILE_IDC) &&
  ------------------
  |  |  276|  92.4k|#define MAIN_PROFILE_IDC    77
  ------------------
                  if((u1_profile_idc != MAIN_PROFILE_IDC) && (u1_profile_idc != BASE_PROFILE_IDC) &&
  ------------------
  |  |  275|  60.0k|#define BASE_PROFILE_IDC    66
  ------------------
  |  Branch (1130:8): [True: 60.0k, False: 32.4k]
  |  Branch (1130:48): [True: 49.5k, False: 10.4k]
  ------------------
 1131|  49.5k|       (u1_profile_idc != HIGH_PROFILE_IDC))
  ------------------
  |  |  278|  49.5k|#define HIGH_PROFILE_IDC   100
  ------------------
  |  Branch (1131:8): [True: 45.3k, False: 4.13k]
  ------------------
 1132|  45.3k|    {
 1133|       |        /* Apart from Baseline, main and high profile,
 1134|       |         * only extended profile is supported provided
 1135|       |         * uc_constraint_set0_flag or uc_constraint_set1_flag are set to 1
 1136|       |         */
 1137|  45.3k|        if((u1_profile_idc != EXTENDED_PROFILE_IDC) ||
  ------------------
  |  |  277|  45.3k|#define EXTENDED_PROFILE_IDC    88
  ------------------
  |  Branch (1137:12): [True: 540, False: 44.8k]
  ------------------
 1138|  44.8k|           ((uc_constraint_set1_flag != 1) && (uc_constraint_set0_flag != 1)))
  ------------------
  |  Branch (1138:13): [True: 22.0k, False: 22.7k]
  |  Branch (1138:47): [True: 136, False: 21.9k]
  ------------------
 1139|    676|        {
 1140|    676|            return (ERROR_FEATURE_UNAVAIL);
 1141|    676|        }
 1142|  45.3k|    }
 1143|       |
 1144|  91.7k|    u1_level_idc = ih264d_get_bits_h264(ps_bitstrm, 8);
 1145|  91.7k|    COPYTHECONTEXT("SPS: u4_level_idc", u1_level_idc);
 1146|       |
 1147|  91.7k|    u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1148|  91.7k|    if(u4_temp & MASK_ERR_SEQ_SET_ID) return ERROR_INV_SPS_PPS_T;
  ------------------
  |  |  526|  91.7k|#define MASK_ERR_SEQ_SET_ID   (0xFFFFFFE0)
  ------------------
  |  Branch (1148:8): [True: 383, False: 91.3k]
  ------------------
 1149|  91.3k|    u1_seq_parameter_set_id = u4_temp;
 1150|  91.3k|    COPYTHECONTEXT("SPS: seq_parameter_set_id", u1_seq_parameter_set_id);
 1151|       |
 1152|       |    /*--------------------------------------------------------------------*/
 1153|       |    /* Find an seq param entry in seqparam array of decStruct             */
 1154|       |    /*--------------------------------------------------------------------*/
 1155|  91.3k|    ps_subset_seq = ps_svc_lyr_dec->pv_scratch_subset_sps;
 1156|  91.3k|    memset(ps_subset_seq, 0, sizeof(dec_svc_seq_params_t));
 1157|  91.3k|    ps_seq = ps_dec->pv_scratch_sps_pps;
 1158|  91.3k|    memset(ps_seq, 0, sizeof(dec_seq_params_t));
 1159|       |
 1160|  91.3k|    if((ps_dec->i4_header_decoded & 1) &&
  ------------------
  |  Branch (1160:8): [True: 69.7k, False: 21.6k]
  ------------------
 1161|  69.7k|       (1 == ps_dec->ps_sps[u1_seq_parameter_set_id].u1_is_valid) &&
  ------------------
  |  Branch (1161:8): [True: 63.4k, False: 6.26k]
  ------------------
 1162|  63.4k|       (ps_dec->ps_sps[u1_seq_parameter_set_id].u1_profile_idc != u1_profile_idc))
  ------------------
  |  Branch (1162:8): [True: 436, False: 63.0k]
  ------------------
 1163|    436|    {
 1164|    436|        ps_dec->u1_res_changed = 1;
 1165|    436|        return IVD_RES_CHANGED;
 1166|    436|    }
 1167|       |
 1168|  90.9k|    if((ps_dec->i4_header_decoded & 1) &&
  ------------------
  |  Branch (1168:8): [True: 69.3k, False: 21.6k]
  ------------------
 1169|  69.3k|       (1 == ps_dec->ps_sps[u1_seq_parameter_set_id].u1_is_valid) &&
  ------------------
  |  Branch (1169:8): [True: 63.0k, False: 6.26k]
  ------------------
 1170|  63.0k|       (ps_dec->ps_sps[u1_seq_parameter_set_id].u1_level_idc != u1_level_idc))
  ------------------
  |  Branch (1170:8): [True: 1.29k, False: 61.7k]
  ------------------
 1171|  1.29k|    {
 1172|  1.29k|        ps_dec->u1_res_changed = 1;
 1173|  1.29k|        return IVD_RES_CHANGED;
 1174|  1.29k|    }
 1175|       |
 1176|  89.6k|    ps_seq->u1_profile_idc = u1_profile_idc;
 1177|  89.6k|    ps_seq->u1_level_idc = u1_level_idc;
 1178|  89.6k|    ps_seq->u1_seq_parameter_set_id = u1_seq_parameter_set_id;
 1179|  89.6k|    ps_subset_seq->ps_seq = &ps_dec->ps_sps[u1_seq_parameter_set_id];
 1180|       |
 1181|       |    /*******************************************************************/
 1182|       |    /* Initializations for high profile - Sushant                      */
 1183|       |    /*******************************************************************/
 1184|  89.6k|    ps_seq->i4_chroma_format_idc = 1;
 1185|  89.6k|    ps_seq->i4_bit_depth_luma_minus8 = 0;
 1186|  89.6k|    ps_seq->i4_bit_depth_chroma_minus8 = 0;
 1187|  89.6k|    ps_seq->i4_qpprime_y_zero_transform_bypass_flag = 0;
 1188|  89.6k|    ps_seq->i4_seq_scaling_matrix_present_flag = 0;
 1189|  89.6k|    if(u1_profile_idc == HIGH_PROFILE_IDC || u1_profile_idc == SCALABLE_BASELINE_PROFILE_IDC ||
  ------------------
  |  |  278|   179k|#define HIGH_PROFILE_IDC   100
  ------------------
                  if(u1_profile_idc == HIGH_PROFILE_IDC || u1_profile_idc == SCALABLE_BASELINE_PROFILE_IDC ||
  ------------------
  |  |   59|   175k|#define SCALABLE_BASELINE_PROFILE_IDC 83
  ------------------
  |  Branch (1189:8): [True: 4.04k, False: 85.6k]
  |  Branch (1189:46): [True: 0, False: 85.6k]
  ------------------
 1190|  85.6k|       u1_profile_idc == SCALABLE_HIGH_PROFILE_IDC)
  ------------------
  |  |   60|  85.6k|#define SCALABLE_HIGH_PROFILE_IDC 86
  ------------------
  |  Branch (1190:8): [True: 0, False: 85.6k]
  ------------------
 1191|  4.04k|    {
 1192|       |        /* reading chroma_format_idc   */
 1193|  4.04k|        ps_seq->i4_chroma_format_idc = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1194|       |
 1195|       |        /* Monochrome is not supported */
 1196|  4.04k|        if(ps_seq->i4_chroma_format_idc != 1)
  ------------------
  |  Branch (1196:12): [True: 121, False: 3.91k]
  ------------------
 1197|    121|        {
 1198|    121|            return ERROR_FEATURE_UNAVAIL;
 1199|    121|        }
 1200|       |
 1201|       |        /* reading bit_depth_luma_minus8   */
 1202|  3.91k|        ps_seq->i4_bit_depth_luma_minus8 = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1203|       |
 1204|  3.91k|        if(ps_seq->i4_bit_depth_luma_minus8 != 0)
  ------------------
  |  Branch (1204:12): [True: 117, False: 3.80k]
  ------------------
 1205|    117|        {
 1206|    117|            return ERROR_FEATURE_UNAVAIL;
 1207|    117|        }
 1208|       |
 1209|       |        /* reading bit_depth_chroma_minus8   */
 1210|  3.80k|        ps_seq->i4_bit_depth_chroma_minus8 = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1211|       |
 1212|  3.80k|        if(ps_seq->i4_bit_depth_chroma_minus8 != 0)
  ------------------
  |  Branch (1212:12): [True: 113, False: 3.68k]
  ------------------
 1213|    113|        {
 1214|    113|            return ERROR_FEATURE_UNAVAIL;
 1215|    113|        }
 1216|       |
 1217|       |        /* reading qpprime_y_zero_transform_bypass_flag   */
 1218|  3.68k|        ps_seq->i4_qpprime_y_zero_transform_bypass_flag = (WORD32) ih264d_get_bit_h264(ps_bitstrm);
 1219|       |
 1220|  3.68k|        if(ps_seq->i4_qpprime_y_zero_transform_bypass_flag != 0)
  ------------------
  |  Branch (1220:12): [True: 76, False: 3.61k]
  ------------------
 1221|     76|        {
 1222|     76|            return ERROR_INV_SPS_PPS_T;
 1223|     76|        }
 1224|       |
 1225|       |        /* reading seq_scaling_matrix_present_flag   */
 1226|  3.61k|        ps_seq->i4_seq_scaling_matrix_present_flag = (WORD32) ih264d_get_bit_h264(ps_bitstrm);
 1227|       |
 1228|  3.61k|        if(ps_seq->i4_seq_scaling_matrix_present_flag)
  ------------------
  |  Branch (1228:12): [True: 1.16k, False: 2.44k]
  ------------------
 1229|  1.16k|        {
 1230|  9.90k|            for(i4_i = 0; i4_i < 8; i4_i++)
  ------------------
  |  Branch (1230:27): [True: 8.98k, False: 921]
  ------------------
 1231|  8.98k|            {
 1232|  8.98k|                ps_seq->u1_seq_scaling_list_present_flag[i4_i] = ih264d_get_bit_h264(ps_bitstrm);
 1233|       |
 1234|       |                /* initialize u1_use_default_scaling_matrix_flag[i4_i] to zero */
 1235|       |                /* before calling scaling list                             */
 1236|  8.98k|                ps_seq->u1_use_default_scaling_matrix_flag[i4_i] = 0;
 1237|       |
 1238|  8.98k|                if(ps_seq->u1_seq_scaling_list_present_flag[i4_i])
  ------------------
  |  Branch (1238:20): [True: 2.36k, False: 6.62k]
  ------------------
 1239|  2.36k|                {
 1240|  2.36k|                    if(i4_i < 6)
  ------------------
  |  Branch (1240:24): [True: 1.83k, False: 536]
  ------------------
 1241|  1.83k|                    {
 1242|  1.83k|                        ret = ih264d_scaling_list(ps_seq->i2_scalinglist4x4[i4_i], 16,
 1243|  1.83k|                                                  &ps_seq->u1_use_default_scaling_matrix_flag[i4_i],
 1244|  1.83k|                                                  ps_bitstrm);
 1245|  1.83k|                    }
 1246|    536|                    else
 1247|    536|                    {
 1248|    536|                        ret = ih264d_scaling_list(ps_seq->i2_scalinglist8x8[i4_i - 6], 64,
 1249|    536|                                                  &ps_seq->u1_use_default_scaling_matrix_flag[i4_i],
 1250|    536|                                                  ps_bitstrm);
 1251|    536|                    }
 1252|  2.36k|                    if(ret != OK)
  ------------------
  |  |  114|  2.36k|#define OK        0
  ------------------
  |  Branch (1252:24): [True: 243, False: 2.12k]
  ------------------
 1253|    243|                    {
 1254|    243|                        return ret;
 1255|    243|                    }
 1256|  2.36k|                }
 1257|  8.98k|            }
 1258|  1.16k|        }
 1259|  3.61k|    }
 1260|       |    /*--------------------------------------------------------------------*/
 1261|       |    /* Decode MaxFrameNum                                                 */
 1262|       |    /*--------------------------------------------------------------------*/
 1263|  88.9k|    u8_temp = (UWORD64) 4 + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1264|  88.9k|    if(u8_temp > MAX_BITS_IN_FRAME_NUM)
  ------------------
  |  |  531|  88.9k|#define MAX_BITS_IN_FRAME_NUM     16
  ------------------
  |  Branch (1264:8): [True: 228, False: 88.7k]
  ------------------
 1265|    228|    {
 1266|    228|        return ERROR_INV_SPS_PPS_T;
 1267|    228|    }
 1268|  88.7k|    ps_seq->u1_bits_in_frm_num = (UWORD8) u8_temp;
 1269|  88.7k|    COPYTHECONTEXT("SPS: log2_max_frame_num_minus4", (ps_seq->u1_bits_in_frm_num - 4));
 1270|       |
 1271|  88.7k|    i2_max_frm_num = (1 << (ps_seq->u1_bits_in_frm_num));
 1272|  88.7k|    ps_seq->u2_u4_max_pic_num_minus1 = i2_max_frm_num - 1;
 1273|       |    /*--------------------------------------------------------------------*/
 1274|       |    /* Decode picture order count and related values                      */
 1275|       |    /*--------------------------------------------------------------------*/
 1276|  88.7k|    u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1277|  88.7k|    if(u4_temp > MAX_PIC_ORDER_CNT_TYPE)
  ------------------
  |  |  529|  88.7k|#define MAX_PIC_ORDER_CNT_TYPE    2
  ------------------
  |  Branch (1277:8): [True: 175, False: 88.5k]
  ------------------
 1278|    175|    {
 1279|    175|        return ERROR_INV_POC_TYPE_T;
 1280|    175|    }
 1281|  88.5k|    ps_seq->u1_pic_order_cnt_type = u4_temp;
 1282|  88.5k|    COPYTHECONTEXT("SPS: pic_order_cnt_type", ps_seq->u1_pic_order_cnt_type);
 1283|       |
 1284|  88.5k|    ps_seq->u1_num_ref_frames_in_pic_order_cnt_cycle = 1;
 1285|  88.5k|    if(ps_seq->u1_pic_order_cnt_type == 0)
  ------------------
  |  Branch (1285:8): [True: 75.2k, False: 13.3k]
  ------------------
 1286|  75.2k|    {
 1287|  75.2k|        u8_temp = (UWORD64) 4 + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1288|  75.2k|        if(u8_temp > MAX_BITS_IN_POC_LSB)
  ------------------
  |  |  532|  75.2k|#define MAX_BITS_IN_POC_LSB       16
  ------------------
  |  Branch (1288:12): [True: 78, False: 75.1k]
  ------------------
 1289|     78|        {
 1290|     78|            return ERROR_INV_SPS_PPS_T;
 1291|     78|        }
 1292|  75.1k|        ps_seq->u1_log2_max_pic_order_cnt_lsb_minus = (UWORD8) u8_temp;
 1293|  75.1k|        ps_seq->i4_max_pic_order_cntLsb = (1 << u8_temp);
 1294|  75.1k|        COPYTHECONTEXT("SPS: log2_max_pic_order_cnt_lsb_minus4", (u8_temp - 4));
 1295|  75.1k|    }
 1296|  13.3k|    else if(ps_seq->u1_pic_order_cnt_type == 1)
  ------------------
  |  Branch (1296:13): [True: 12.2k, False: 1.15k]
  ------------------
 1297|  12.2k|    {
 1298|  12.2k|        ps_seq->u1_delta_pic_order_always_zero_flag = ih264d_get_bit_h264(ps_bitstrm);
 1299|  12.2k|        COPYTHECONTEXT("SPS: delta_pic_order_always_zero_flag",
 1300|  12.2k|                       ps_seq->u1_delta_pic_order_always_zero_flag);
 1301|       |
 1302|  12.2k|        ps_seq->i4_ofst_for_non_ref_pic = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1303|  12.2k|        COPYTHECONTEXT("SPS: offset_for_non_ref_pic", ps_seq->i4_ofst_for_non_ref_pic);
 1304|       |
 1305|  12.2k|        ps_seq->i4_ofst_for_top_to_bottom_field = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1306|  12.2k|        COPYTHECONTEXT("SPS: offset_for_top_to_bottom_field",
 1307|  12.2k|                       ps_seq->i4_ofst_for_top_to_bottom_field);
 1308|       |
 1309|  12.2k|        u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1310|  12.2k|        if(u4_temp > 255) return ERROR_INV_SPS_PPS_T;
  ------------------
  |  Branch (1310:12): [True: 176, False: 12.0k]
  ------------------
 1311|  12.0k|        ps_seq->u1_num_ref_frames_in_pic_order_cnt_cycle = u4_temp;
 1312|  12.0k|        COPYTHECONTEXT("SPS: num_ref_frames_in_pic_order_cnt_cycle",
 1313|  12.0k|                       ps_seq->u1_num_ref_frames_in_pic_order_cnt_cycle);
 1314|       |
 1315|  41.7k|        for(i = 0; i < ps_seq->u1_num_ref_frames_in_pic_order_cnt_cycle; i++)
  ------------------
  |  Branch (1315:20): [True: 29.6k, False: 12.0k]
  ------------------
 1316|  29.6k|        {
 1317|  29.6k|            ps_seq->i4_ofst_for_ref_frame[i] = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1318|  29.6k|            COPYTHECONTEXT("SPS: offset_for_ref_frame", ps_seq->i4_ofst_for_ref_frame[i]);
 1319|  29.6k|        }
 1320|  12.0k|    }
 1321|       |
 1322|  88.3k|    u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1323|       |
 1324|  88.3k|    if((u4_temp > H264_MAX_REF_PICS))
  ------------------
  |  |  534|  88.3k|#define H264_MAX_REF_PICS         16
  ------------------
  |  Branch (1324:8): [True: 283, False: 88.0k]
  ------------------
 1325|    283|    {
 1326|    283|        return ERROR_NUM_REF;
 1327|    283|    }
 1328|       |
 1329|       |    /* Compare with older num_ref_frames is header is already once */
 1330|  88.0k|    if((ps_dec->i4_header_decoded & 1) &&
  ------------------
  |  Branch (1330:8): [True: 67.8k, False: 20.2k]
  ------------------
 1331|  67.8k|       (1 == ps_dec->ps_sps[u1_seq_parameter_set_id].u1_is_valid) &&
  ------------------
  |  Branch (1331:8): [True: 61.7k, False: 6.11k]
  ------------------
 1332|  61.7k|       (ps_dec->ps_sps[u1_seq_parameter_set_id].u1_num_ref_frames != u4_temp))
  ------------------
  |  Branch (1332:8): [True: 275, False: 61.4k]
  ------------------
 1333|    275|    {
 1334|    275|        ps_dec->u1_res_changed = 1;
 1335|    275|        return IVD_RES_CHANGED;
 1336|    275|    }
 1337|       |
 1338|  87.7k|    ps_seq->u1_num_ref_frames = u4_temp;
 1339|  87.7k|    COPYTHECONTEXT("SPS: num_ref_frames", ps_seq->u1_num_ref_frames);
 1340|       |
 1341|  87.7k|    ps_seq->u1_gaps_in_frame_num_value_allowed_flag = ih264d_get_bit_h264(ps_bitstrm);
 1342|  87.7k|    COPYTHECONTEXT("SPS: gaps_in_frame_num_value_allowed_flag",
 1343|  87.7k|                   ps_seq->u1_gaps_in_frame_num_value_allowed_flag);
 1344|       |
 1345|  87.7k|    ps_seq->u1_gaps_in_frame_num_value_allowed_flag = 0;
 1346|       |
 1347|       |    /*--------------------------------------------------------------------*/
 1348|       |    /* Decode FrameWidth and FrameHeight and related values               */
 1349|       |    /*--------------------------------------------------------------------*/
 1350|  87.7k|    u8_temp = (UWORD64) 1 + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1351|       |    /* Check  for unsupported resolutions*/
 1352|  87.7k|    if(u8_temp > (H264_MAX_FRAME_WIDTH >> 4))
  ------------------
  |  |   39|  87.7k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
  |  Branch (1352:8): [True: 142, False: 87.6k]
  ------------------
 1353|    142|    {
 1354|    142|        return IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED;
 1355|    142|    }
 1356|  87.6k|    u4_pic_width_in_mbs = (UWORD32) u8_temp;
 1357|  87.6k|    COPYTHECONTEXT("SPS: pic_width_in_mbs_minus1", u4_pic_width_in_mbs - 1);
 1358|       |
 1359|  87.6k|    u8_temp = (UWORD64) 1 + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1360|  87.6k|    if(u8_temp > (H264_MAX_FRAME_HEIGHT >> 4))
  ------------------
  |  |   40|  87.6k|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
  |  Branch (1360:8): [True: 124, False: 87.5k]
  ------------------
 1361|    124|    {
 1362|    124|        return IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED;
 1363|    124|    }
 1364|  87.5k|    u4_pic_height_in_map_units = (UWORD32) u8_temp;
 1365|       |
 1366|  87.5k|    ps_seq->u2_frm_wd_in_mbs = u4_pic_width_in_mbs;
 1367|  87.5k|    ps_seq->u2_frm_ht_in_mbs = u4_pic_height_in_map_units;
 1368|  87.5k|    u2_pic_wd = (u4_pic_width_in_mbs << 4);
 1369|  87.5k|    u2_pic_ht = (u4_pic_height_in_map_units << 4);
 1370|  87.5k|    if(ps_svc_lyr_dec->pic_width < u2_pic_wd)
  ------------------
  |  Branch (1370:8): [True: 13.2k, False: 74.2k]
  ------------------
 1371|  13.2k|    {
 1372|  13.2k|        ps_svc_lyr_dec->pic_width = u2_pic_wd;
 1373|  13.2k|    }
 1374|  87.5k|    if(ps_svc_lyr_dec->pic_height < u2_pic_ht)
  ------------------
  |  Branch (1374:8): [True: 13.3k, False: 74.1k]
  ------------------
 1375|  13.3k|    {
 1376|  13.3k|        ps_svc_lyr_dec->pic_height = u2_pic_ht;
 1377|  13.3k|    }
 1378|       |
 1379|       |    /*--------------------------------------------------------------------*/
 1380|       |    /* Get the value of MaxMbAddress and Number of bits needed for it     */
 1381|       |    /*--------------------------------------------------------------------*/
 1382|  87.5k|    ps_seq->u4_max_mb_addr = ((UWORD32)ps_seq->u2_frm_wd_in_mbs * (UWORD32)ps_seq->u2_frm_ht_in_mbs) - 1;
 1383|  87.5k|    ps_seq->u4_total_num_of_mbs = ps_seq->u4_max_mb_addr + 1;
 1384|  87.5k|    ps_seq->u1_level_idc = ih264d_correct_level_idc(u1_level_idc, ps_seq->u4_total_num_of_mbs);
 1385|       |
 1386|  87.5k|    u1_frm = ih264d_get_bit_h264(ps_bitstrm);
 1387|  87.5k|    if((ps_dec->i4_header_decoded & 1) &&
  ------------------
  |  Branch (1387:8): [True: 67.5k, False: 19.9k]
  ------------------
 1388|  67.5k|       (1 == ps_dec->ps_sps[u1_seq_parameter_set_id].u1_is_valid) &&
  ------------------
  |  Branch (1388:8): [True: 61.4k, False: 6.10k]
  ------------------
 1389|  61.4k|       (ps_dec->ps_sps[u1_seq_parameter_set_id].u1_frame_mbs_only_flag != u1_frm))
  ------------------
  |  Branch (1389:8): [True: 3.00k, False: 58.4k]
  ------------------
 1390|  3.00k|    {
 1391|  3.00k|        ps_dec->u1_res_changed = 1;
 1392|  3.00k|        return IVD_RES_CHANGED;
 1393|  3.00k|    }
 1394|  84.5k|    ps_seq->u1_frame_mbs_only_flag = u1_frm;
 1395|  84.5k|    COPYTHECONTEXT("SPS: frame_mbs_only_flag", u1_frm);
 1396|       |
 1397|  84.5k|    if(!u1_frm) u1_mb_aff_flag = ih264d_get_bit_h264(ps_bitstrm);
  ------------------
  |  Branch (1397:8): [True: 22.8k, False: 61.7k]
  ------------------
 1398|       |
 1399|  84.5k|    if((ps_dec->i4_header_decoded & 1) &&
  ------------------
  |  Branch (1399:8): [True: 64.5k, False: 19.9k]
  ------------------
 1400|  64.5k|       (1 == ps_dec->ps_sps[u1_seq_parameter_set_id].u1_is_valid) &&
  ------------------
  |  Branch (1400:8): [True: 58.4k, False: 6.10k]
  ------------------
 1401|  58.4k|       (ps_dec->ps_sps[u1_seq_parameter_set_id].u1_mb_aff_flag != u1_mb_aff_flag))
  ------------------
  |  Branch (1401:8): [True: 773, False: 57.6k]
  ------------------
 1402|    773|    {
 1403|    773|        ps_dec->u1_res_changed = 1;
 1404|    773|        return IVD_RES_CHANGED;
 1405|    773|    }
 1406|       |
 1407|  83.7k|    if(!u1_frm)
  ------------------
  |  Branch (1407:8): [True: 22.0k, False: 61.7k]
  ------------------
 1408|  22.0k|    {
 1409|  22.0k|        u2_pic_ht <<= 1;
 1410|  22.0k|        ps_seq->u1_mb_aff_flag = u1_mb_aff_flag;
 1411|  22.0k|        COPYTHECONTEXT("SPS: mb_adaptive_frame_field_flag", ps_seq->u1_mb_aff_flag);
 1412|  22.0k|    }
 1413|  61.7k|    else
 1414|  61.7k|        ps_seq->u1_mb_aff_flag = 0;
 1415|       |
 1416|  83.7k|    ps_seq->u1_direct_8x8_inference_flag = ih264d_get_bit_h264(ps_bitstrm);
 1417|  83.7k|    COPYTHECONTEXT("SPS: direct_8x8_inference_flag", ps_seq->u1_direct_8x8_inference_flag);
 1418|       |
 1419|       |    /* G050 */
 1420|  83.7k|    u1_frame_cropping_flag = ih264d_get_bit_h264(ps_bitstrm);
 1421|  83.7k|    COPYTHECONTEXT("SPS: frame_cropping_flag", u1_frame_cropping_flag);
 1422|       |
 1423|  83.7k|    if(u1_frame_cropping_flag)
  ------------------
  |  Branch (1423:8): [True: 15.1k, False: 68.5k]
  ------------------
 1424|  15.1k|    {
 1425|  15.1k|        u1_frame_cropping_rect_left_ofst = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1426|  15.1k|        COPYTHECONTEXT("SPS: frame_cropping_rect_left_offset", u1_frame_cropping_rect_left_ofst);
 1427|  15.1k|        u1_frame_cropping_rect_right_ofst = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1428|  15.1k|        COPYTHECONTEXT("SPS: frame_cropping_rect_right_offset", u1_frame_cropping_rect_right_ofst);
 1429|  15.1k|        u1_frame_cropping_rect_top_ofst = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1430|  15.1k|        COPYTHECONTEXT("SPS: frame_cropping_rect_top_offset", u1_frame_cropping_rect_top_ofst);
 1431|  15.1k|        u1_frame_cropping_rect_bottom_ofst = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1432|  15.1k|        COPYTHECONTEXT("SPS: frame_cropping_rect_bottom_offset",
 1433|  15.1k|                       u1_frame_cropping_rect_bottom_ofst);
 1434|  15.1k|    }
 1435|       |    /* G050 */
 1436|  83.7k|    ps_seq->u1_vui_parameters_present_flag = ih264d_get_bit_h264(ps_bitstrm);
 1437|  83.7k|    COPYTHECONTEXT("SPS: vui_parameters_present_flag", ps_seq->u1_vui_parameters_present_flag);
 1438|       |
 1439|  83.7k|    u2_frm_wd_y = u2_pic_wd + (UWORD8) (PAD_LEN_Y_H << 1);
  ------------------
  |  |  571|  83.7k|#define PAD_LEN_Y_H                   32
  ------------------
 1440|       |
 1441|  83.7k|    if(1 == ps_dec->u4_share_disp_buf)
  ------------------
  |  Branch (1441:8): [True: 0, False: 83.7k]
  ------------------
 1442|      0|    {
 1443|      0|        if(ps_dec->u4_app_disp_width > u2_frm_wd_y) u2_frm_wd_y = ps_dec->u4_app_disp_width;
  ------------------
  |  Branch (1443:12): [True: 0, False: 0]
  ------------------
 1444|      0|    }
 1445|       |
 1446|  83.7k|    u2_frm_ht_y = u2_pic_ht + (UWORD8) (PAD_LEN_Y_V << 2);
  ------------------
  |  |  572|  83.7k|#define PAD_LEN_Y_V                   20
  ------------------
 1447|  83.7k|    u2_frm_wd_uv = u2_pic_wd + (UWORD8) (PAD_LEN_UV_H << 2);
  ------------------
  |  |  573|  83.7k|#define PAD_LEN_UV_H                  16
  ------------------
 1448|  83.7k|    u2_frm_wd_uv = MAX(u2_frm_wd_uv, u2_frm_wd_y);
  ------------------
  |  |   60|  83.7k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 0, False: 83.7k]
  |  |  ------------------
  ------------------
 1449|  83.7k|    u2_frm_ht_uv = (u2_pic_ht >> 1) + (UWORD8) (PAD_LEN_UV_V << 2);
  ------------------
  |  |  574|  83.7k|#define PAD_LEN_UV_V                  8
  ------------------
 1450|  83.7k|    u2_frm_ht_uv = MAX(u2_frm_ht_uv, (u2_frm_ht_y >> 1));
  ------------------
  |  |   60|  83.7k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 0, False: 83.7k]
  |  |  ------------------
  ------------------
 1451|       |
 1452|       |    /* Calculate display picture width, height and start u4_ofst from YUV420 */
 1453|       |    /* pictute buffers as per cropping information parsed above             */
 1454|  83.7k|    {
 1455|  83.7k|        UWORD16 u2_rgt_ofst = 0;
 1456|  83.7k|        UWORD16 u2_lft_ofst = 0;
 1457|  83.7k|        UWORD16 u2_top_ofst = 0;
 1458|  83.7k|        UWORD16 u2_btm_ofst = 0;
 1459|  83.7k|        UWORD8 u1_frm_mbs_flag;
 1460|  83.7k|        UWORD8 u1_vert_mult_factor;
 1461|       |
 1462|  83.7k|        if(u1_frame_cropping_flag)
  ------------------
  |  Branch (1462:12): [True: 15.1k, False: 68.5k]
  ------------------
 1463|  15.1k|        {
 1464|       |            /* Calculate right and left u4_ofst for cropped picture           */
 1465|  15.1k|            u2_rgt_ofst = u1_frame_cropping_rect_right_ofst << 1;
 1466|  15.1k|            u2_lft_ofst = u1_frame_cropping_rect_left_ofst << 1;
 1467|       |
 1468|       |            /* Know frame MBs only u4_flag                                      */
 1469|  15.1k|            u1_frm_mbs_flag = (1 == ps_seq->u1_frame_mbs_only_flag);
 1470|       |
 1471|       |            /* Simplify the vertical u4_ofst calculation from field/frame     */
 1472|  15.1k|            u1_vert_mult_factor = (2 - u1_frm_mbs_flag);
 1473|       |
 1474|       |            /* Calculate bottom and top u4_ofst for cropped  picture          */
 1475|  15.1k|            u2_btm_ofst = (u1_frame_cropping_rect_bottom_ofst << u1_vert_mult_factor);
 1476|  15.1k|            u2_top_ofst = (u1_frame_cropping_rect_top_ofst << u1_vert_mult_factor);
 1477|  15.1k|        }
 1478|       |
 1479|       |        /* Calculate u4_ofst from start of YUV 420 picture buffer to start of*/
 1480|       |        /* cropped picture buffer                                           */
 1481|  83.7k|        u2_crop_offset_y = (u2_frm_wd_y * u2_top_ofst) + (u2_lft_ofst);
 1482|  83.7k|        u2_crop_offset_uv =
 1483|  83.7k|            (u2_frm_wd_uv * (u2_top_ofst >> 1)) + (u2_lft_ofst >> 1) * YUV420SP_FACTOR;
  ------------------
  |  |  119|  83.7k|#define YUV420SP_FACTOR 2
  ------------------
 1484|       |        /* Calculate the display picture width and height based on crop      */
 1485|       |        /* information                                                       */
 1486|  83.7k|        i4_cropped_ht = (WORD32) u2_pic_ht - (WORD32) (u2_btm_ofst + u2_top_ofst);
 1487|  83.7k|        i4_cropped_wd = (WORD32) u2_pic_wd - (WORD32) (u2_rgt_ofst + u2_lft_ofst);
 1488|       |
 1489|  83.7k|        if((i4_cropped_ht < MB_SIZE) || (i4_cropped_wd < MB_SIZE))
  ------------------
  |  |  554|  83.7k|#define MB_SIZE             16
  ------------------
                      if((i4_cropped_ht < MB_SIZE) || (i4_cropped_wd < MB_SIZE))
  ------------------
  |  |  554|  83.5k|#define MB_SIZE             16
  ------------------
  |  Branch (1489:12): [True: 178, False: 83.5k]
  |  Branch (1489:41): [True: 131, False: 83.4k]
  ------------------
 1490|    309|        {
 1491|    309|            return ERROR_INV_SPS_PPS_T;
 1492|    309|        }
 1493|       |
 1494|  83.4k|        if((ps_dec->i4_header_decoded & 1) &&
  ------------------
  |  Branch (1494:12): [True: 63.7k, False: 19.7k]
  ------------------
 1495|  63.7k|           (1 == ps_dec->ps_sps[u1_seq_parameter_set_id].u1_is_valid) &&
  ------------------
  |  Branch (1495:12): [True: 57.6k, False: 6.07k]
  ------------------
 1496|  57.6k|           (ps_svc_lyr_dec->ps_subset_sps[u1_seq_parameter_set_id].u2_pic_wd != u2_pic_wd))
  ------------------
  |  Branch (1496:12): [True: 260, False: 57.3k]
  ------------------
 1497|    260|        {
 1498|    260|            ps_dec->u1_res_changed = 1;
 1499|    260|            return IVD_RES_CHANGED;
 1500|    260|        }
 1501|       |
 1502|  83.1k|        if((ps_dec->i4_header_decoded & 1) &&
  ------------------
  |  Branch (1502:12): [True: 63.4k, False: 19.7k]
  ------------------
 1503|  63.4k|           (1 == ps_dec->ps_sps[u1_seq_parameter_set_id].u1_is_valid) &&
  ------------------
  |  Branch (1503:12): [True: 57.3k, False: 6.07k]
  ------------------
 1504|  57.3k|           (ps_svc_lyr_dec->ps_subset_sps[u1_seq_parameter_set_id].u2_disp_width != i4_cropped_wd))
  ------------------
  |  Branch (1504:12): [True: 96, False: 57.2k]
  ------------------
 1505|     96|        {
 1506|     96|            ps_dec->u1_res_changed = 1;
 1507|     96|            return IVD_RES_CHANGED;
 1508|     96|        }
 1509|       |
 1510|  83.0k|        if((ps_dec->i4_header_decoded & 1) &&
  ------------------
  |  Branch (1510:12): [True: 63.3k, False: 19.7k]
  ------------------
 1511|  63.3k|           (1 == ps_dec->ps_sps[u1_seq_parameter_set_id].u1_is_valid) &&
  ------------------
  |  Branch (1511:12): [True: 57.2k, False: 6.07k]
  ------------------
 1512|  57.2k|           (ps_svc_lyr_dec->ps_subset_sps[u1_seq_parameter_set_id].u2_pic_ht != u2_pic_ht))
  ------------------
  |  Branch (1512:12): [True: 369, False: 56.9k]
  ------------------
 1513|    369|        {
 1514|    369|            ps_dec->u1_res_changed = 1;
 1515|    369|            return IVD_RES_CHANGED;
 1516|    369|        }
 1517|       |
 1518|  82.7k|        if((ps_dec->i4_header_decoded & 1) &&
  ------------------
  |  Branch (1518:12): [True: 62.9k, False: 19.7k]
  ------------------
 1519|  62.9k|           (1 == ps_dec->ps_sps[u1_seq_parameter_set_id].u1_is_valid) &&
  ------------------
  |  Branch (1519:12): [True: 56.9k, False: 6.07k]
  ------------------
 1520|  56.9k|           (ps_svc_lyr_dec->ps_subset_sps[u1_seq_parameter_set_id].u2_disp_height != i4_cropped_ht))
  ------------------
  |  Branch (1520:12): [True: 100, False: 56.8k]
  ------------------
 1521|    100|        {
 1522|    100|            ps_dec->u1_res_changed = 1;
 1523|    100|            return IVD_RES_CHANGED;
 1524|    100|        }
 1525|       |        /* Check again for unsupported resolutions with updated values*/
 1526|  82.6k|        if((u2_pic_wd > SVCD_MAX_FRAME_WIDTH) || (u2_pic_ht > SVCD_MAX_FRAME_HEIGHT) ||
  ------------------
  |  |   52|  82.6k|#define SVCD_MAX_FRAME_WIDTH 4096
  ------------------
                      if((u2_pic_wd > SVCD_MAX_FRAME_WIDTH) || (u2_pic_ht > SVCD_MAX_FRAME_HEIGHT) ||
  ------------------
  |  |   53|  82.6k|#define SVCD_MAX_FRAME_HEIGHT 4096
  ------------------
  |  Branch (1526:12): [True: 0, False: 82.6k]
  |  Branch (1526:50): [True: 93, False: 82.5k]
  ------------------
 1527|  82.5k|           (u2_pic_wd < SVCD_MIN_FRAME_WIDTH) || (u2_pic_ht < SVCD_MIN_FRAME_HEIGHT) ||
  ------------------
  |  |   56|  82.5k|#define SVCD_MIN_FRAME_WIDTH 32
  ------------------
                         (u2_pic_wd < SVCD_MIN_FRAME_WIDTH) || (u2_pic_ht < SVCD_MIN_FRAME_HEIGHT) ||
  ------------------
  |  |   57|  81.9k|#define SVCD_MIN_FRAME_HEIGHT 32
  ------------------
  |  Branch (1527:12): [True: 511, False: 81.9k]
  |  Branch (1527:50): [True: 98, False: 81.9k]
  ------------------
 1528|  81.9k|           (u2_pic_wd * (UWORD32) u2_pic_ht > SVCD_MAX_FRAME_SIZE))
  ------------------
  |  |   54|  81.9k|#define SVCD_MAX_FRAME_SIZE (4096 * 4096)
  ------------------
  |  Branch (1528:12): [True: 0, False: 81.9k]
  ------------------
 1529|    702|        {
 1530|    702|            return IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED;
 1531|    702|        }
 1532|       |
 1533|       |        /* If MBAff is enabled, decoder support is limited to streams with
 1534|       |         * width less than half of H264_MAX_FRAME_WIDTH.
 1535|       |         * In case of MBAff decoder processes two rows at a time
 1536|       |         */
 1537|  81.9k|        if((u2_pic_wd << ps_seq->u1_mb_aff_flag) > H264_MAX_FRAME_WIDTH)
  ------------------
  |  |   39|  81.9k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
  |  Branch (1537:12): [True: 73, False: 81.8k]
  ------------------
 1538|     73|        {
 1539|     73|            return IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED;
 1540|     73|        }
 1541|  81.9k|    }
 1542|       |
 1543|       |    /* Backup num_reorder_frames if header is already decoded */
 1544|  81.8k|    if((ps_dec->i4_header_decoded & 1) && (1 == ps_seq->u1_vui_parameters_present_flag) &&
  ------------------
  |  Branch (1544:8): [True: 62.5k, False: 19.2k]
  |  Branch (1544:43): [True: 5.28k, False: 57.2k]
  ------------------
 1545|  5.28k|       (1 == ps_seq->s_vui.u1_bitstream_restriction_flag))
  ------------------
  |  Branch (1545:8): [True: 0, False: 5.28k]
  ------------------
 1546|      0|    {
 1547|      0|        num_reorder_frames = (WORD32) ps_seq->s_vui.u4_num_reorder_frames;
 1548|      0|    }
 1549|  81.8k|    else
 1550|  81.8k|    {
 1551|  81.8k|        num_reorder_frames = -1;
 1552|  81.8k|    }
 1553|  81.8k|    if(1 == ps_seq->u1_vui_parameters_present_flag)
  ------------------
  |  Branch (1553:8): [True: 6.36k, False: 75.4k]
  ------------------
 1554|  6.36k|    {
 1555|  6.36k|        ret = ih264d_parse_vui_parametres(&ps_seq->s_vui, ps_bitstrm);
 1556|  6.36k|        if(ret != OK) return ret;
  ------------------
  |  |  114|  6.36k|#define OK        0
  ------------------
  |  Branch (1556:12): [True: 163, False: 6.20k]
  ------------------
 1557|  6.36k|    }
 1558|       |
 1559|       |    /* Compare older num_reorder_frames with the new one if header is already
 1560|       |     * decoded */
 1561|  81.6k|    if((ps_dec->i4_header_decoded & 1) &&
  ------------------
  |  Branch (1561:8): [True: 62.5k, False: 19.1k]
  ------------------
 1562|  62.5k|       (1 == ps_dec->ps_sps[u1_seq_parameter_set_id].u1_is_valid) && (-1 != num_reorder_frames) &&
  ------------------
  |  Branch (1562:8): [True: 56.7k, False: 5.73k]
  |  Branch (1562:70): [True: 0, False: 56.7k]
  ------------------
 1563|      0|       (1 == ps_dec->ps_sps[u1_seq_parameter_set_id].u1_vui_parameters_present_flag) &&
  ------------------
  |  Branch (1563:8): [True: 0, False: 0]
  ------------------
 1564|      0|       (1 == ps_dec->ps_sps[u1_seq_parameter_set_id].s_vui.u1_bitstream_restriction_flag) &&
  ------------------
  |  Branch (1564:8): [True: 0, False: 0]
  ------------------
 1565|      0|       ((WORD32) ps_dec->ps_sps[u1_seq_parameter_set_id].s_vui.u4_num_reorder_frames !=
  ------------------
  |  Branch (1565:8): [True: 0, False: 0]
  ------------------
 1566|      0|        num_reorder_frames))
 1567|      0|    {
 1568|      0|        ps_dec->u1_res_changed = 1;
 1569|      0|        return IVD_RES_CHANGED;
 1570|      0|    }
 1571|       |
 1572|       |    /* In case bitstream read has exceeded the filled size, then return an error */
 1573|  81.6k|    if(EXCEED_OFFSET(ps_bitstrm))
  ------------------
  |  |   93|  81.6k|  (ps_bitstrm->u4_ofst > ps_bitstrm->u4_max_ofst)
  |  |  ------------------
  |  |  |  Branch (93:3): [True: 346, False: 81.3k]
  |  |  ------------------
  ------------------
 1574|    346|    {
 1575|    346|        return ERROR_INV_SPS_PPS_T;
 1576|    346|    }
 1577|       |
 1578|       |    /*--------------------------------------------------------------------*/
 1579|       |    /* All initializations to ps_dec are beyond this point                */
 1580|       |    /*--------------------------------------------------------------------*/
 1581|  81.3k|    {
 1582|  81.3k|        WORD32 reorder_depth = ih264d_get_dpb_size(ps_seq);
 1583|  81.3k|        if((1 == ps_seq->u1_vui_parameters_present_flag) &&
  ------------------
  |  Branch (1583:12): [True: 5.95k, False: 75.3k]
  ------------------
 1584|  5.95k|           (1 == ps_seq->s_vui.u1_bitstream_restriction_flag))
  ------------------
  |  Branch (1584:12): [True: 1.24k, False: 4.71k]
  ------------------
 1585|  1.24k|        {
 1586|  1.24k|            reorder_depth = ps_seq->s_vui.u4_num_reorder_frames + 1;
 1587|  1.24k|        }
 1588|       |
 1589|  81.3k|        if(reorder_depth > H264_MAX_REF_PICS)
  ------------------
  |  |  534|  81.3k|#define H264_MAX_REF_PICS         16
  ------------------
  |  Branch (1589:12): [True: 66, False: 81.2k]
  ------------------
 1590|     66|        {
 1591|     66|            return ERROR_INV_SPS_PPS_T;
 1592|     66|        }
 1593|       |
 1594|  81.2k|        if(ps_seq->u1_frame_mbs_only_flag != 1) reorder_depth *= 2;
  ------------------
  |  Branch (1594:12): [True: 20.9k, False: 60.2k]
  ------------------
 1595|  81.2k|        ps_subset_seq->i4_reorder_depth = reorder_depth + DISPLAY_LATENCY;
  ------------------
  |  |   51|  81.2k|#define DISPLAY_LATENCY         2
  ------------------
 1596|  81.2k|    }
 1597|      0|    ps_subset_seq->u2_disp_height = i4_cropped_ht;
 1598|  81.2k|    ps_subset_seq->u2_disp_width = i4_cropped_wd;
 1599|  81.2k|    ps_subset_seq->u2_pic_wd = u2_pic_wd;
 1600|  81.2k|    ps_subset_seq->u2_pic_ht = u2_pic_ht;
 1601|       |
 1602|       |    /* Determining the Width and Height of Frame from that of Picture */
 1603|  81.2k|    ps_subset_seq->u2_frm_wd_y = u2_frm_wd_y;
 1604|  81.2k|    ps_subset_seq->u2_frm_ht_y = u2_frm_ht_y;
 1605|  81.2k|    ps_subset_seq->u2_frm_wd_uv = u2_frm_wd_uv;
 1606|  81.2k|    ps_subset_seq->u2_frm_ht_uv = u2_frm_ht_uv;
 1607|       |
 1608|  81.2k|    ps_subset_seq->u1_pad_len_y_v = (UWORD8) (PAD_LEN_Y_V << (1 - u1_frm));
  ------------------
  |  |  572|  81.2k|#define PAD_LEN_Y_V                   20
  ------------------
 1609|  81.2k|    ps_subset_seq->u1_pad_len_cr_v = (UWORD8) (PAD_LEN_UV_V << (1 - u1_frm));
  ------------------
  |  |  574|  81.2k|#define PAD_LEN_UV_V                  8
  ------------------
 1610|       |
 1611|  81.2k|    ps_subset_seq->u2_crop_offset_y = u2_crop_offset_y;
 1612|  81.2k|    ps_subset_seq->u2_crop_offset_uv = u2_crop_offset_uv;
 1613|       |
 1614|  81.2k|    ps_seq->u1_is_valid = TRUE;
  ------------------
  |  |  591|  81.2k|#define TRUE    1
  ------------------
 1615|  81.2k|    ps_dec->ps_sps[u1_seq_parameter_set_id] = *ps_seq;
 1616|  81.2k|    ps_svc_lyr_dec->ps_subset_sps[u1_seq_parameter_set_id] = *ps_subset_seq;
 1617|  81.2k|    ps_svc_lyr_dec->ps_cur_subset_sps = &ps_svc_lyr_dec->ps_subset_sps[u1_seq_parameter_set_id];
 1618|       |
 1619|  81.2k|    return OK;
  ------------------
  |  |  114|  81.2k|#define OK        0
  ------------------
 1620|  81.3k|}
isvcd_parse_pps:
 1634|  72.7k|{
 1635|  72.7k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
 1636|  72.7k|    UWORD8 uc_temp;
 1637|  72.7k|    dec_seq_params_t *ps_sps = NULL;
 1638|  72.7k|    dec_pic_params_t *ps_pps = NULL;
 1639|  72.7k|    UWORD32 *pu4_bitstrm_buf = ps_dec->ps_bitstrm->pu4_buffer;
 1640|  72.7k|    UWORD32 *pu4_bitstrm_ofst = &ps_dec->ps_bitstrm->u4_ofst;
 1641|       |
 1642|       |    /* Variables used for error resilience checks */
 1643|  72.7k|    UWORD64 u8_temp;
 1644|  72.7k|    UWORD32 u4_temp;
 1645|  72.7k|    WORD32 i_temp;
 1646|       |
 1647|       |    /* For High profile related syntax elements */
 1648|  72.7k|    UWORD8 u1_more_data_flag;
 1649|  72.7k|    WORD32 i4_i;
 1650|       |
 1651|       |    /*--------------------------------------------------------------------*/
 1652|       |    /* Decode pic_parameter_set_id and find corresponding pic params      */
 1653|       |    /*--------------------------------------------------------------------*/
 1654|  72.7k|    u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1655|  72.7k|    if(u4_temp & MASK_ERR_PIC_SET_ID) return ERROR_INV_SPS_PPS_T;
  ------------------
  |  |  527|  72.7k|#define MASK_ERR_PIC_SET_ID   (0xFFFFFF00)
  ------------------
  |  Branch (1655:8): [True: 174, False: 72.6k]
  ------------------
 1656|  72.6k|    ps_pps = ps_dec->pv_scratch_sps_pps;
 1657|  72.6k|    *ps_pps = ps_dec->ps_pps[u4_temp];
 1658|  72.6k|    ps_pps->u1_pic_parameter_set_id = (UWORD8) u4_temp;
 1659|  72.6k|    COPYTHECONTEXT("PPS: pic_parameter_set_id", ps_pps->u1_pic_parameter_set_id);
 1660|       |
 1661|       |    /************************************************/
 1662|       |    /* initilization of High profile syntax element */
 1663|       |    /************************************************/
 1664|  72.6k|    ps_pps->i4_transform_8x8_mode_flag = 0;
 1665|  72.6k|    ps_pps->i4_pic_scaling_matrix_present_flag = 0;
 1666|       |
 1667|       |    /*--------------------------------------------------------------------*/
 1668|       |    /* Decode seq_parameter_set_id and map it to a seq_parameter_set      */
 1669|       |    /*--------------------------------------------------------------------*/
 1670|  72.6k|    u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1671|  72.6k|    if(u4_temp & MASK_ERR_SEQ_SET_ID) return ERROR_INV_SPS_PPS_T;
  ------------------
  |  |  526|  72.6k|#define MASK_ERR_SEQ_SET_ID   (0xFFFFFFE0)
  ------------------
  |  Branch (1671:8): [True: 217, False: 72.3k]
  ------------------
 1672|  72.3k|    COPYTHECONTEXT("PPS: seq_parameter_set_id", u4_temp);
 1673|  72.3k|    ps_sps = &ps_dec->ps_sps[u4_temp];
 1674|  72.3k|    ps_pps->ps_sps = ps_sps;
 1675|       |
 1676|       |    /*--------------------------------------------------------------------*/
 1677|       |    /* Decode entropy_coding_mode                                         */
 1678|       |    /*--------------------------------------------------------------------*/
 1679|  72.3k|    ps_pps->u1_entropy_coding_mode = ih264d_get_bit_h264(ps_bitstrm);
 1680|  72.3k|    COPYTHECONTEXT("PPS: entropy_coding_mode_flag", ps_pps->u1_entropy_coding_mode);
 1681|       |
 1682|  72.3k|    ps_pps->u1_pic_order_present_flag = ih264d_get_bit_h264(ps_bitstrm);
 1683|  72.3k|    COPYTHECONTEXT("PPS: pic_order_present_flag", ps_pps->u1_pic_order_present_flag);
 1684|       |
 1685|       |    /*--------------------------------------------------------------------*/
 1686|       |    /* Decode num_slice_groups_minus1                                     */
 1687|       |    /*--------------------------------------------------------------------*/
 1688|  72.3k|    u8_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf) + (UWORD64) 1;
 1689|  72.3k|    if(u8_temp != 1)
  ------------------
  |  Branch (1689:8): [True: 419, False: 71.9k]
  ------------------
 1690|    419|    {
 1691|    419|        return ERROR_FEATURE_UNAVAIL;
 1692|    419|    }
 1693|  71.9k|    ps_pps->u1_num_slice_groups = (UWORD8) u8_temp;
 1694|  71.9k|    COPYTHECONTEXT("PPS: num_slice_groups_minus1", ps_pps->u1_num_slice_groups - 1);
 1695|       |
 1696|       |    /*--------------------------------------------------------------------*/
 1697|       |    /* Other parameter set values                                         */
 1698|       |    /*--------------------------------------------------------------------*/
 1699|  71.9k|    u8_temp = (UWORD64) 1 + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1700|  71.9k|    if(u8_temp >= H264_MAX_REF_IDX) return ERROR_REF_IDX;
  ------------------
  |  |  535|  71.9k|#define H264_MAX_REF_IDX          32
  ------------------
  |  Branch (1700:8): [True: 116, False: 71.8k]
  ------------------
 1701|  71.8k|    ps_pps->u1_num_ref_idx_lx_active[0] = (UWORD8) u8_temp;
 1702|  71.8k|    COPYTHECONTEXT("PPS: num_ref_idx_l0_active_minus1", ps_pps->u1_num_ref_idx_lx_active[0] - 1);
 1703|       |
 1704|  71.8k|    u8_temp = (UWORD64) 1 + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1705|  71.8k|    if(u8_temp >= H264_MAX_REF_IDX) return ERROR_REF_IDX;
  ------------------
  |  |  535|  71.8k|#define H264_MAX_REF_IDX          32
  ------------------
  |  Branch (1705:8): [True: 291, False: 71.5k]
  ------------------
 1706|  71.5k|    ps_pps->u1_num_ref_idx_lx_active[1] = (UWORD8) u8_temp;
 1707|  71.5k|    COPYTHECONTEXT("PPS: num_ref_idx_l1_active_minus1", ps_pps->u1_num_ref_idx_lx_active[1] - 1);
 1708|       |
 1709|  71.5k|    ps_pps->u1_wted_pred_flag = ih264d_get_bit_h264(ps_bitstrm);
 1710|  71.5k|    COPYTHECONTEXT("PPS: weighted prediction u4_flag", ps_pps->u1_wted_pred_flag);
 1711|  71.5k|    uc_temp = (UWORD8) ih264d_get_bits_h264(ps_bitstrm, 2);
 1712|  71.5k|    COPYTHECONTEXT("PPS: weighted_bipred_idc", uc_temp);
 1713|  71.5k|    ps_pps->u1_wted_bipred_idc = uc_temp;
 1714|       |
 1715|  71.5k|    if(ps_pps->u1_wted_bipred_idc > MAX_WEIGHT_BIPRED_IDC) return ERROR_INV_SPS_PPS_T;
  ------------------
  |  |  536|  71.5k|#define MAX_WEIGHT_BIPRED_IDC     2
  ------------------
  |  Branch (1715:8): [True: 176, False: 71.3k]
  ------------------
 1716|       |
 1717|  71.3k|    {
 1718|  71.3k|        WORD64 i8_temp = (WORD64) 26 + ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1719|       |
 1720|  71.3k|        if((i8_temp < MIN_H264_QP) || (i8_temp > MAX_H264_QP)) return ERROR_INV_RANGE_QP_T;
  ------------------
  |  |  629|  71.3k|#define MIN_H264_QP 0
  ------------------
                      if((i8_temp < MIN_H264_QP) || (i8_temp > MAX_H264_QP)) return ERROR_INV_RANGE_QP_T;
  ------------------
  |  |  634|  71.2k|#define MAX_H264_QP 51
  ------------------
  |  Branch (1720:12): [True: 124, False: 71.2k]
  |  Branch (1720:39): [True: 175, False: 71.0k]
  ------------------
 1721|       |
 1722|  71.0k|        ps_pps->u1_pic_init_qp = (UWORD8) i8_temp;
 1723|  71.0k|        COPYTHECONTEXT("PPS: pic_init_qp_minus26", ps_pps->u1_pic_init_qp - 26);
 1724|       |
 1725|  71.0k|        i8_temp = (WORD64) 26 + ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1726|  71.0k|        if((i8_temp < MIN_H264_QP) || (i8_temp > MAX_H264_QP)) return ERROR_INV_RANGE_QP_T;
  ------------------
  |  |  629|  71.0k|#define MIN_H264_QP 0
  ------------------
                      if((i8_temp < MIN_H264_QP) || (i8_temp > MAX_H264_QP)) return ERROR_INV_RANGE_QP_T;
  ------------------
  |  |  634|  70.9k|#define MAX_H264_QP 51
  ------------------
  |  Branch (1726:12): [True: 109, False: 70.9k]
  |  Branch (1726:39): [True: 105, False: 70.8k]
  ------------------
 1727|       |
 1728|  70.8k|        ps_pps->u1_pic_init_qs = (UWORD8) i8_temp;
 1729|  70.8k|        COPYTHECONTEXT("PPS: pic_init_qs_minus26", ps_pps->u1_pic_init_qs - 26);
 1730|  70.8k|    }
 1731|       |
 1732|      0|    i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1733|  70.8k|    if((i_temp < -12) || (i_temp > 12)) return ERROR_INV_RANGE_QP_T;
  ------------------
  |  Branch (1733:8): [True: 114, False: 70.7k]
  |  Branch (1733:26): [True: 124, False: 70.6k]
  ------------------
 1734|  70.6k|    ps_pps->i1_chroma_qp_index_offset = i_temp;
 1735|  70.6k|    COPYTHECONTEXT("PPS: chroma_qp_index_offset", ps_pps->i1_chroma_qp_index_offset);
 1736|       |
 1737|       |    /***************************************************************************/
 1738|       |    /* initialize second_chroma_qp_index_offset to i1_chroma_qp_index_offset if */
 1739|       |    /* second_chroma_qp_index_offset is not present in bit-ps_bitstrm */
 1740|       |    /***************************************************************************/
 1741|  70.6k|    ps_pps->i1_second_chroma_qp_index_offset = ps_pps->i1_chroma_qp_index_offset;
 1742|       |
 1743|  70.6k|    ps_pps->u1_deblocking_filter_parameters_present_flag = ih264d_get_bit_h264(ps_bitstrm);
 1744|  70.6k|    COPYTHECONTEXT("PPS: deblocking_filter_control_present_flag",
 1745|  70.6k|                   ps_pps->u1_deblocking_filter_parameters_present_flag);
 1746|  70.6k|    ps_pps->u1_constrained_intra_pred_flag = ih264d_get_bit_h264(ps_bitstrm);
 1747|  70.6k|    COPYTHECONTEXT("PPS: constrained_intra_pred_flag", ps_pps->u1_constrained_intra_pred_flag);
 1748|  70.6k|    ps_pps->u1_redundant_pic_cnt_present_flag = ih264d_get_bit_h264(ps_bitstrm);
 1749|  70.6k|    COPYTHECONTEXT("PPS: redundant_pic_cnt_present_flag",
 1750|  70.6k|                   ps_pps->u1_redundant_pic_cnt_present_flag);
 1751|       |
 1752|       |    /* High profile related syntax elements */
 1753|  70.6k|    u1_more_data_flag = MORE_RBSP_DATA(ps_bitstrm);
  ------------------
  |  |   97|  70.6k|    CHECK_BITS_SUFFICIENT(ps_bitstrm, 1)
  |  |  ------------------
  |  |  |  |   95|  70.6k|  (ps_bitstrm->u4_ofst + bits_to_read <= ps_bitstrm->u4_max_ofst)
  |  |  ------------------
  ------------------
 1754|       |
 1755|  70.6k|    if(u1_more_data_flag)
  ------------------
  |  Branch (1755:8): [True: 67.4k, False: 3.19k]
  ------------------
 1756|  67.4k|    {
 1757|       |        /* read transform_8x8_mode_flag  */
 1758|  67.4k|        ps_pps->i4_transform_8x8_mode_flag = (WORD32) ih264d_get_bit_h264(ps_bitstrm);
 1759|       |
 1760|       |        /* read pic_scaling_matrix_present_flag */
 1761|  67.4k|        ps_pps->i4_pic_scaling_matrix_present_flag = (WORD32) ih264d_get_bit_h264(ps_bitstrm);
 1762|       |
 1763|  67.4k|        if(ps_pps->i4_pic_scaling_matrix_present_flag)
  ------------------
  |  Branch (1763:12): [True: 2.51k, False: 64.9k]
  ------------------
 1764|  2.51k|        {
 1765|       |            /* read the scaling matrices */
 1766|  18.2k|            for(i4_i = 0; i4_i < (6 + (ps_pps->i4_transform_8x8_mode_flag << 1)); i4_i++)
  ------------------
  |  Branch (1766:27): [True: 16.0k, False: 2.17k]
  ------------------
 1767|  16.0k|            {
 1768|  16.0k|                ps_pps->u1_pic_scaling_list_present_flag[i4_i] = ih264d_get_bit_h264(ps_bitstrm);
 1769|       |
 1770|  16.0k|                if(ps_pps->u1_pic_scaling_list_present_flag[i4_i])
  ------------------
  |  Branch (1770:20): [True: 3.28k, False: 12.7k]
  ------------------
 1771|  3.28k|                {
 1772|  3.28k|                    WORD32 ret;
 1773|  3.28k|                    if(i4_i < 6)
  ------------------
  |  Branch (1773:24): [True: 2.65k, False: 630]
  ------------------
 1774|  2.65k|                    {
 1775|  2.65k|                        ret = ih264d_scaling_list(
 1776|  2.65k|                            ps_pps->i2_pic_scalinglist4x4[i4_i], 16,
 1777|  2.65k|                            &ps_pps->u1_pic_use_default_scaling_matrix_flag[i4_i], ps_bitstrm);
 1778|  2.65k|                    }
 1779|    630|                    else
 1780|    630|                    {
 1781|    630|                        ret = ih264d_scaling_list(
 1782|    630|                            ps_pps->i2_pic_scalinglist8x8[i4_i - 6], 64,
 1783|    630|                            &ps_pps->u1_pic_use_default_scaling_matrix_flag[i4_i], ps_bitstrm);
 1784|    630|                    }
 1785|       |
 1786|  3.28k|                    if(ret != OK)
  ------------------
  |  |  114|  3.28k|#define OK        0
  ------------------
  |  Branch (1786:24): [True: 340, False: 2.94k]
  ------------------
 1787|    340|                    {
 1788|    340|                        return ret;
 1789|    340|                    }
 1790|  3.28k|                }
 1791|  16.0k|            }
 1792|  2.51k|        }
 1793|       |
 1794|       |        /* read second_chroma_qp_index_offset syntax element */
 1795|  67.0k|        i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1796|  67.0k|        if((i_temp < -12) || (i_temp > 12)) return ERROR_INV_RANGE_QP_T;
  ------------------
  |  Branch (1796:12): [True: 130, False: 66.9k]
  |  Branch (1796:30): [True: 212, False: 66.7k]
  ------------------
 1797|       |
 1798|  66.7k|        ps_pps->i1_second_chroma_qp_index_offset = i_temp;
 1799|  66.7k|    }
 1800|       |
 1801|  69.9k|    if(SCALABLE_BASELINE_PROFILE_IDC == ps_sps->u1_profile_idc)
  ------------------
  |  |   59|  69.9k|#define SCALABLE_BASELINE_PROFILE_IDC 83
  ------------------
  |  Branch (1801:8): [True: 0, False: 69.9k]
  ------------------
 1802|       |
 1803|      0|    {
 1804|      0|        if(ps_pps->u1_num_slice_groups > 7)
  ------------------
  |  Branch (1804:12): [True: 0, False: 0]
  ------------------
 1805|      0|        {
 1806|      0|            return ERROR_INV_SPS_PPS_T;
 1807|      0|        }
 1808|      0|    }
 1809|       |
 1810|       |    /* In case bitstream read has exceeded the filled size, then return an error */
 1811|  69.9k|    if(EXCEED_OFFSET(ps_bitstrm))
  ------------------
  |  |   93|  69.9k|  (ps_bitstrm->u4_ofst > ps_bitstrm->u4_max_ofst)
  |  |  ------------------
  |  |  |  Branch (93:3): [True: 90, False: 69.8k]
  |  |  ------------------
  ------------------
 1812|     90|    {
 1813|     90|        return ERROR_INV_SPS_PPS_T;
 1814|     90|    }
 1815|  69.8k|    ps_pps->u1_is_valid = TRUE;
  ------------------
  |  |  591|  69.8k|#define TRUE    1
  ------------------
 1816|  69.8k|    ps_dec->ps_pps[ps_pps->u1_pic_parameter_set_id] = *ps_pps;
 1817|  69.8k|    return OK;
  ------------------
  |  |  114|  69.8k|#define OK        0
  ------------------
 1818|  69.9k|}

isvcd_verify_level:
   95|   143k|{
   96|   143k|    switch(u1_level_idc)
   97|   143k|    {
   98|    497|        case H264_LEVEL_1_0:
  ------------------
  |  |  291|    497|#define H264_LEVEL_1_0     10
  ------------------
  |  Branch (98:9): [True: 497, False: 143k]
  ------------------
   99|  35.8k|        case H264_LEVEL_1_1:
  ------------------
  |  |  292|  35.8k|#define H264_LEVEL_1_1     11
  ------------------
  |  Branch (99:9): [True: 35.3k, False: 108k]
  ------------------
  100|  39.8k|        case H264_LEVEL_1_2:
  ------------------
  |  |  293|  39.8k|#define H264_LEVEL_1_2     12
  ------------------
  |  Branch (100:9): [True: 4.03k, False: 139k]
  ------------------
  101|  41.5k|        case H264_LEVEL_1_3:
  ------------------
  |  |  294|  41.5k|#define H264_LEVEL_1_3     13
  ------------------
  |  Branch (101:9): [True: 1.69k, False: 141k]
  ------------------
  102|  92.0k|        case H264_LEVEL_2_0:
  ------------------
  |  |  295|  92.0k|#define H264_LEVEL_2_0     20
  ------------------
  |  Branch (102:9): [True: 50.5k, False: 93.1k]
  ------------------
  103|  99.9k|        case H264_LEVEL_2_1:
  ------------------
  |  |  296|  99.9k|#define H264_LEVEL_2_1     21
  ------------------
  |  Branch (103:9): [True: 7.87k, False: 135k]
  ------------------
  104|   100k|        case H264_LEVEL_2_2:
  ------------------
  |  |  297|   100k|#define H264_LEVEL_2_2     22
  ------------------
  |  Branch (104:9): [True: 1.03k, False: 142k]
  ------------------
  105|   104k|        case H264_LEVEL_3_0:
  ------------------
  |  |  298|   104k|#define H264_LEVEL_3_0     30
  ------------------
  |  Branch (105:9): [True: 3.42k, False: 140k]
  ------------------
  106|   105k|        case H264_LEVEL_3_1:
  ------------------
  |  |  299|   105k|#define H264_LEVEL_3_1     31
  ------------------
  |  Branch (106:9): [True: 1.56k, False: 142k]
  ------------------
  107|   123k|        case H264_LEVEL_3_2:
  ------------------
  |  |  300|   123k|#define H264_LEVEL_3_2     32
  ------------------
  |  Branch (107:9): [True: 17.4k, False: 126k]
  ------------------
  108|   137k|        case H264_LEVEL_4_0:
  ------------------
  |  |  301|   137k|#define H264_LEVEL_4_0     40
  ------------------
  |  Branch (108:9): [True: 14.0k, False: 129k]
  ------------------
  109|   139k|        case H264_LEVEL_4_1:
  ------------------
  |  |  302|   139k|#define H264_LEVEL_4_1     41
  ------------------
  |  Branch (109:9): [True: 1.91k, False: 141k]
  ------------------
  110|   143k|        case H264_LEVEL_4_2:
  ------------------
  |  |  303|   143k|#define H264_LEVEL_4_2     42
  ------------------
  |  Branch (110:9): [True: 4.03k, False: 139k]
  ------------------
  111|   143k|        case H264_LEVEL_5_0:
  ------------------
  |  |  304|   143k|#define H264_LEVEL_5_0     50
  ------------------
  |  Branch (111:9): [True: 0, False: 143k]
  ------------------
  112|   143k|        case H264_LEVEL_5_1:
  ------------------
  |  |  305|   143k|#define H264_LEVEL_5_1     51
  ------------------
  |  Branch (112:9): [True: 0, False: 143k]
  ------------------
  113|   143k|            return OK;
  ------------------
  |  |  114|   143k|#define OK        0
  ------------------
  114|    243|        default:
  ------------------
  |  Branch (114:9): [True: 243, False: 143k]
  ------------------
  115|    243|            return NOT_OK;
  ------------------
  |  |  116|    243|#define NOT_OK    -1
  ------------------
  116|   143k|    }
  117|   143k|}
isvcd_start_of_pic:
  133|   135k|{
  134|   135k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
  135|   135k|    pocstruct_t *ps_prev_poc = &ps_dec->s_cur_pic_poc;
  136|   135k|    pocstruct_t *ps_cur_poc = ps_temp_poc;
  137|       |
  138|   135k|    dec_slice_params_t *ps_cur_slice = ps_dec->ps_cur_slice;
  139|   135k|    dec_seq_params_t *ps_seq = ps_dec->ps_cur_sps;
  140|   135k|    UWORD8 u1_bottom_field_flag = ps_cur_slice->u1_bottom_field_flag;
  141|   135k|    UWORD8 u1_field_pic_flag = ps_cur_slice->u1_field_pic_flag;
  142|       |    /* high profile related declarations */
  143|   135k|    WORD32 ret;
  144|       |
  145|   135k|    H264_MUTEX_LOCK(&ps_dec->process_disp_mutex);
  146|       |
  147|   135k|    if(u1_field_pic_flag == 1)
  ------------------
  |  Branch (147:8): [True: 0, False: 135k]
  ------------------
  148|      0|    {
  149|      0|        ps_dec->i4_error_code = ERROR_SVC_FIELD_PIC_UNSUPPORTED;
  150|      0|        return ERROR_SVC_FIELD_PIC_UNSUPPORTED;
  151|      0|    }
  152|       |
  153|       |    /* check output buffer size given by the application */
  154|   135k|    if(check_app_out_buf_size(ps_dec) != IV_SUCCESS) return IVD_DISP_FRM_ZERO_OP_BUF_SIZE;
  ------------------
  |  Branch (154:8): [True: 1.37k, False: 134k]
  ------------------
  155|       |
  156|   134k|    ps_prev_poc->i4_pic_order_cnt_lsb = ps_cur_poc->i4_pic_order_cnt_lsb;
  157|   134k|    ps_prev_poc->i4_pic_order_cnt_msb = ps_cur_poc->i4_pic_order_cnt_msb;
  158|   134k|    ps_prev_poc->i4_delta_pic_order_cnt_bottom = ps_cur_poc->i4_delta_pic_order_cnt_bottom;
  159|   134k|    ps_prev_poc->i4_delta_pic_order_cnt[0] = ps_cur_poc->i4_delta_pic_order_cnt[0];
  160|   134k|    ps_prev_poc->i4_delta_pic_order_cnt[1] = ps_cur_poc->i4_delta_pic_order_cnt[1];
  161|   134k|    ps_prev_poc->u1_bot_field = ps_dec->ps_cur_slice->u1_bottom_field_flag;
  162|   134k|    ps_prev_poc->i4_prev_frame_num_ofst = ps_cur_poc->i4_prev_frame_num_ofst;
  163|   134k|    ps_prev_poc->u2_frame_num = u2_frame_num;
  164|   134k|    ps_dec->i1_prev_mb_qp_delta = 0;
  165|   134k|    ps_dec->i1_next_ctxt_idx = 0;
  166|       |
  167|   134k|    ps_dec->u4_nmb_deblk = 0;
  168|   134k|    if(ps_dec->u4_num_cores == 1) ps_dec->u4_nmb_deblk = 1;
  ------------------
  |  Branch (168:8): [True: 82.5k, False: 51.5k]
  ------------------
  169|       |
  170|   134k|    if(ps_seq->u1_mb_aff_flag == 1)
  ------------------
  |  Branch (170:8): [True: 0, False: 134k]
  ------------------
  171|      0|    {
  172|      0|        ps_dec->u4_nmb_deblk = 0;
  173|      0|        if(ps_dec->u4_num_cores > 2) ps_dec->u4_num_cores = 2;
  ------------------
  |  Branch (173:12): [True: 0, False: 0]
  ------------------
  174|      0|    }
  175|       |
  176|   134k|    ps_dec->u4_use_intrapred_line_copy = 0;
  177|       |
  178|   134k|    if(ps_seq->u1_mb_aff_flag == 0)
  ------------------
  |  Branch (178:8): [True: 134k, False: 0]
  ------------------
  179|   134k|    {
  180|   134k|        ps_dec->u4_use_intrapred_line_copy = 1;
  181|   134k|    }
  182|       |
  183|   134k|    ps_dec->u4_app_disable_deblk_frm = 0;
  184|       |    /* If degrade is enabled, set the degrade flags appropriately */
  185|   134k|    if(ps_dec->i4_degrade_type && ps_dec->i4_degrade_pics)
  ------------------
  |  Branch (185:8): [True: 0, False: 134k]
  |  Branch (185:35): [True: 0, False: 0]
  ------------------
  186|      0|    {
  187|      0|        WORD32 degrade_pic;
  188|      0|        ps_dec->i4_degrade_pic_cnt++;
  189|      0|        degrade_pic = 0;
  190|       |
  191|       |        /* If degrade is to be done in all frames, then do not check further */
  192|      0|        switch(ps_dec->i4_degrade_pics)
  ------------------
  |  Branch (192:16): [True: 0, False: 0]
  ------------------
  193|      0|        {
  194|      0|            case 4:
  ------------------
  |  Branch (194:13): [True: 0, False: 0]
  ------------------
  195|      0|            {
  196|      0|                degrade_pic = 1;
  197|      0|                break;
  198|      0|            }
  199|      0|            case 3:
  ------------------
  |  Branch (199:13): [True: 0, False: 0]
  ------------------
  200|      0|            {
  201|      0|                if(ps_cur_slice->u1_slice_type != I_SLICE) degrade_pic = 1;
  ------------------
  |  |  370|      0|#define I_SLICE  2
  ------------------
  |  Branch (201:20): [True: 0, False: 0]
  ------------------
  202|       |
  203|      0|                break;
  204|      0|            }
  205|      0|            case 2:
  ------------------
  |  Branch (205:13): [True: 0, False: 0]
  ------------------
  206|      0|            {
  207|       |                /* If pic count hits non-degrade interval or it is an islice, then do not
  208|       |                 * degrade */
  209|      0|                if((ps_cur_slice->u1_slice_type != I_SLICE) &&
  ------------------
  |  |  370|      0|#define I_SLICE  2
  ------------------
  |  Branch (209:20): [True: 0, False: 0]
  ------------------
  210|      0|                   (ps_dec->i4_degrade_pic_cnt != ps_dec->i4_nondegrade_interval))
  ------------------
  |  Branch (210:20): [True: 0, False: 0]
  ------------------
  211|      0|                    degrade_pic = 1;
  212|       |
  213|      0|                break;
  214|      0|            }
  215|      0|            case 1:
  ------------------
  |  Branch (215:13): [True: 0, False: 0]
  ------------------
  216|      0|            {
  217|       |                /* Check if the current picture is non-ref */
  218|      0|                if(0 == ps_cur_slice->u1_nal_ref_idc)
  ------------------
  |  Branch (218:20): [True: 0, False: 0]
  ------------------
  219|      0|                {
  220|      0|                    degrade_pic = 1;
  221|      0|                }
  222|      0|                break;
  223|      0|            }
  224|      0|        }
  225|      0|        if(degrade_pic)
  ------------------
  |  Branch (225:12): [True: 0, False: 0]
  ------------------
  226|      0|        {
  227|      0|            if(ps_dec->i4_degrade_type & 0x2) ps_dec->u4_app_disable_deblk_frm = 1;
  ------------------
  |  Branch (227:16): [True: 0, False: 0]
  ------------------
  228|       |
  229|       |            /* MC degrading is done only for non-ref pictures */
  230|      0|            if(0 == ps_cur_slice->u1_nal_ref_idc)
  ------------------
  |  Branch (230:16): [True: 0, False: 0]
  ------------------
  231|      0|            {
  232|      0|                if(ps_dec->i4_degrade_type & 0x4) ps_dec->i4_mv_frac_mask = 0;
  ------------------
  |  Branch (232:20): [True: 0, False: 0]
  ------------------
  233|       |
  234|      0|                if(ps_dec->i4_degrade_type & 0x8) ps_dec->i4_mv_frac_mask = 0;
  ------------------
  |  Branch (234:20): [True: 0, False: 0]
  ------------------
  235|      0|            }
  236|      0|        }
  237|      0|        else
  238|      0|            ps_dec->i4_degrade_pic_cnt = 0;
  239|      0|    }
  240|       |
  241|   134k|    {
  242|   134k|        dec_err_status_t *ps_err = ps_dec->ps_dec_err_status;
  243|   134k|        if((ps_cur_slice->u1_slice_type == I_SLICE) || (ps_cur_slice->u1_slice_type == SI_SLICE))
  ------------------
  |  |  370|   134k|#define I_SLICE  2
  ------------------
                      if((ps_cur_slice->u1_slice_type == I_SLICE) || (ps_cur_slice->u1_slice_type == SI_SLICE))
  ------------------
  |  |  372|   121k|#define SI_SLICE 4
  ------------------
  |  Branch (243:12): [True: 12.1k, False: 121k]
  |  Branch (243:56): [True: 285, False: 121k]
  ------------------
  244|  12.4k|            ps_err->u1_cur_pic_type = PIC_TYPE_I;
  ------------------
  |  |  609|  12.4k|#define PIC_TYPE_I        (0x00)
  ------------------
  245|   121k|        else
  246|   121k|            ps_err->u1_cur_pic_type = PIC_TYPE_UNKNOWN;
  ------------------
  |  |  608|   121k|#define PIC_TYPE_UNKNOWN  (0xFF)
  ------------------
  247|       |
  248|   134k|        if(ps_err->u1_pic_aud_i == PIC_TYPE_I)
  ------------------
  |  |  609|   134k|#define PIC_TYPE_I        (0x00)
  ------------------
  |  Branch (248:12): [True: 0, False: 134k]
  ------------------
  249|      0|        {
  250|      0|            ps_err->u1_cur_pic_type = PIC_TYPE_I;
  ------------------
  |  |  609|      0|#define PIC_TYPE_I        (0x00)
  ------------------
  251|      0|            ps_err->u1_pic_aud_i = PIC_TYPE_UNKNOWN;
  ------------------
  |  |  608|      0|#define PIC_TYPE_UNKNOWN  (0xFF)
  ------------------
  252|      0|        }
  253|       |
  254|   134k|        if(ps_cur_slice->u1_nal_unit_type == IDR_SLICE_NAL)
  ------------------
  |  |  328|   134k|#define IDR_SLICE_NAL                   5
  ------------------
  |  Branch (254:12): [True: 111k, False: 22.4k]
  ------------------
  255|   111k|        {
  256|   111k|            if(ps_err->u1_err_flag) ih264d_reset_ref_bufs(ps_dec->ps_dpb_mgr);
  ------------------
  |  Branch (256:16): [True: 0, False: 111k]
  ------------------
  257|   111k|            ps_err->u1_err_flag = ACCEPT_ALL_PICS;
  ------------------
  |  |  601|   111k|#define ACCEPT_ALL_PICS   (0x00)
  ------------------
  258|   111k|        }
  259|   134k|    }
  260|       |
  261|   134k|    if(ps_dec->u1_init_dec_flag && ps_dec->s_prev_seq_params.u1_eoseq_pending)
  ------------------
  |  Branch (261:8): [True: 109k, False: 24.4k]
  |  Branch (261:36): [True: 0, False: 109k]
  ------------------
  262|      0|    {
  263|       |        /* Reset the decoder picture buffers */
  264|      0|        WORD32 j;
  265|      0|        for(j = 0; j < MAX_DISP_BUFS_NEW; j++)
  ------------------
  |  |   76|      0|#define MAX_DISP_BUFS_NEW 64
  ------------------
  |  Branch (265:20): [True: 0, False: 0]
  ------------------
  266|      0|        {
  267|      0|            ih264_buf_mgr_release((buf_mgr_t *) ps_dec->pv_pic_buf_mgr, j, BUF_MGR_REF);
  ------------------
  |  |   50|      0|#define BUF_MGR_REF          (1 << 2)
  ------------------
  268|      0|            ih264_buf_mgr_release((buf_mgr_t *) ps_dec->pv_mv_buf_mgr,
  269|      0|                                  ps_dec->as_buf_id_info_map[j].mv_buf_id, BUF_MGR_REF);
  ------------------
  |  |   50|      0|#define BUF_MGR_REF          (1 << 2)
  ------------------
  270|      0|            ih264_buf_mgr_release((buf_mgr_t *) ps_dec->pv_pic_buf_mgr, j, BUF_MGR_IO);
  ------------------
  |  |   53|      0|#define BUF_MGR_IO           (1 << 3)
  ------------------
  271|      0|        }
  272|       |
  273|       |        /* reset the decoder structure parameters related to buffer handling */
  274|      0|        ps_dec->u1_second_field = 0;
  275|      0|        ps_dec->i4_cur_display_seq = 0;
  276|       |
  277|       |        /********************************************************************/
  278|       |        /* indicate in the decoder output i4_status that some frames are being */
  279|       |        /* dropped, so that it resets timestamp and wait for a new sequence */
  280|       |        /********************************************************************/
  281|      0|        ps_dec->s_prev_seq_params.u1_eoseq_pending = 0;
  282|      0|    }
  283|   134k|    ret = isvcd_init_pic(ps_svc_lyr_dec, u2_frame_num, i4_poc, ps_pps);
  284|   134k|    if(ret != OK) return ret;
  ------------------
  |  |  114|   134k|#define OK        0
  ------------------
  |  Branch (284:8): [True: 0, False: 134k]
  ------------------
  285|       |
  286|   134k|    ps_dec->pv_parse_tu_coeff_data = ps_dec->pv_pic_tu_coeff_data;
  287|   134k|    ps_dec->pv_proc_tu_coeff_data = ps_dec->pv_pic_tu_coeff_data;
  288|   134k|    ps_dec->ps_nmb_info = ps_dec->ps_frm_mb_info;
  289|   134k|    ps_svc_lyr_dec->ps_svc_nmb_info = ps_svc_lyr_dec->ps_svc_frm_mb_info;
  290|   134k|    if(ps_dec->u1_separate_parse)
  ------------------
  |  Branch (290:8): [True: 51.5k, False: 82.5k]
  ------------------
  291|  51.5k|    {
  292|  51.5k|        UWORD32 num_mbs;
  293|  51.5k|        num_mbs = ps_dec->ps_cur_sps->u4_total_num_of_mbs
  294|  51.5k|                  << (1 - ps_dec->ps_cur_sps->u1_frame_mbs_only_flag);
  295|       |
  296|  51.5k|        if(ps_dec->pu1_dec_mb_map)
  ------------------
  |  Branch (296:12): [True: 51.5k, False: 0]
  ------------------
  297|  51.5k|        {
  298|  51.5k|            memset((void *) ps_dec->pu1_dec_mb_map, 0, num_mbs);
  299|  51.5k|        }
  300|       |
  301|  51.5k|        if(ps_dec->pu1_recon_mb_map)
  ------------------
  |  Branch (301:12): [True: 51.5k, False: 0]
  ------------------
  302|  51.5k|        {
  303|  51.5k|            memset((void *) ps_dec->pu1_recon_mb_map, 0, num_mbs);
  304|  51.5k|        }
  305|       |
  306|  51.5k|        if(ps_dec->pu2_slice_num_map)
  ------------------
  |  Branch (306:12): [True: 51.5k, False: 0]
  ------------------
  307|  51.5k|        {
  308|  51.5k|            memset((void *) ps_dec->pu2_slice_num_map, 0, (num_mbs * sizeof(UWORD16)));
  309|  51.5k|        }
  310|  51.5k|    }
  311|       |
  312|   134k|    ps_dec->ps_parse_cur_slice = &(ps_dec->ps_dec_slice_buf[0]);
  313|   134k|    ps_dec->ps_decode_cur_slice = &(ps_dec->ps_dec_slice_buf[0]);
  314|   134k|    ps_dec->ps_computebs_cur_slice = &(ps_dec->ps_dec_slice_buf[0]);
  315|   134k|    ps_dec->u2_cur_slice_num = 0;
  316|       |
  317|       |    /* Initialize all the HP toolsets to zero */
  318|   134k|    ps_dec->s_high_profile.u1_scaling_present = 0;
  319|   134k|    ps_dec->s_high_profile.u1_transform8x8_present = 0;
  320|       |
  321|       |    /* Get Next Free Picture */
  322|   134k|    if(1 == ps_dec->u4_share_disp_buf)
  ------------------
  |  Branch (322:8): [True: 0, False: 134k]
  ------------------
  323|      0|    {
  324|      0|        UWORD32 i;
  325|       |        /* Free any buffer that is in the queue to be freed */
  326|      0|        for(i = 0; i < MAX_DISP_BUFS_NEW; i++)
  ------------------
  |  |   76|      0|#define MAX_DISP_BUFS_NEW 64
  ------------------
  |  Branch (326:20): [True: 0, False: 0]
  ------------------
  327|      0|        {
  328|      0|            if(0 == ps_dec->u4_disp_buf_to_be_freed[i]) continue;
  ------------------
  |  Branch (328:16): [True: 0, False: 0]
  ------------------
  329|      0|            ih264_buf_mgr_release((buf_mgr_t *) ps_dec->pv_pic_buf_mgr, i, BUF_MGR_IO);
  ------------------
  |  |   53|      0|#define BUF_MGR_IO           (1 << 3)
  ------------------
  330|      0|            ps_dec->u4_disp_buf_to_be_freed[i] = 0;
  331|      0|            ps_dec->u4_disp_buf_mapping[i] = 0;
  332|      0|        }
  333|      0|    }
  334|   134k|    if(!(u1_field_pic_flag && 0 != ps_dec->u1_top_bottom_decoded))
  ------------------
  |  Branch (334:10): [True: 0, False: 134k]
  |  Branch (334:31): [True: 0, False: 0]
  ------------------
  335|   134k|    {
  336|   134k|        pic_buffer_t *ps_cur_pic;
  337|   134k|        WORD32 cur_pic_buf_id, cur_mv_buf_id;
  338|   134k|        col_mv_buf_t *ps_col_mv;
  339|   134k|        while(1)
  ------------------
  |  Branch (339:15): [True: 134k, Folded]
  ------------------
  340|   134k|        {
  341|   134k|            ps_cur_pic = (pic_buffer_t *) ih264_buf_mgr_get_next_free(
  342|   134k|                (buf_mgr_t *) ps_dec->pv_pic_buf_mgr, &cur_pic_buf_id);
  343|       |
  344|       |            /* In case of IDR slices, if there is no free picture buffer, then release
  345|       |             * all buffers from display and reference
  346|       |             */
  347|   134k|            if((ps_cur_pic == NULL) && (ps_cur_slice->u1_nal_unit_type == IDR_SLICE_NAL))
  ------------------
  |  |  328|  1.02k|#define IDR_SLICE_NAL                   5
  ------------------
  |  Branch (347:16): [True: 1.02k, False: 133k]
  |  Branch (347:40): [True: 905, False: 121]
  ------------------
  348|    905|            {
  349|    905|                WORD32 j;
  350|       |
  351|  58.8k|                for(j = 0; j < MAX_DISP_BUFS_NEW; j++)
  ------------------
  |  |   76|  58.8k|#define MAX_DISP_BUFS_NEW 64
  ------------------
  |  Branch (351:28): [True: 57.9k, False: 905]
  ------------------
  352|  57.9k|                {
  353|  57.9k|                    ih264_buf_mgr_release((buf_mgr_t *) ps_dec->pv_pic_buf_mgr, j, BUF_MGR_REF);
  ------------------
  |  |   50|  57.9k|#define BUF_MGR_REF          (1 << 2)
  ------------------
  354|  57.9k|                    ih264_buf_mgr_release((buf_mgr_t *) ps_dec->pv_mv_buf_mgr,
  355|  57.9k|                                          ps_dec->as_buf_id_info_map[j].mv_buf_id, BUF_MGR_REF);
  ------------------
  |  |   50|  57.9k|#define BUF_MGR_REF          (1 << 2)
  ------------------
  356|       |
  357|  57.9k|                    ih264_buf_mgr_release((buf_mgr_t *) ps_dec->pv_pic_buf_mgr, j, BUF_MGR_IO);
  ------------------
  |  |   53|  57.9k|#define BUF_MGR_IO           (1 << 3)
  ------------------
  358|  57.9k|                }
  359|    905|                ps_cur_pic = (pic_buffer_t *) ih264_buf_mgr_get_next_free(
  360|    905|                    (buf_mgr_t *) ps_dec->pv_pic_buf_mgr, &cur_pic_buf_id);
  361|    905|            }
  362|   134k|            if(ps_cur_pic == NULL)
  ------------------
  |  Branch (362:16): [True: 121, False: 133k]
  ------------------
  363|    121|            {
  364|    121|                ps_dec->i4_error_code = ERROR_UNAVAIL_PICBUF_T;
  365|    121|                ps_dec->ps_dec_err_status->u1_err_flag |= REJECT_CUR_PIC;
  ------------------
  |  |  602|    121|#define REJECT_CUR_PIC    (0x01)
  ------------------
  366|    121|                return ERROR_UNAVAIL_PICBUF_T;
  367|    121|            }
  368|   133k|            if(0 == ps_dec->u4_disp_buf_mapping[cur_pic_buf_id])
  ------------------
  |  Branch (368:16): [True: 133k, False: 0]
  ------------------
  369|   133k|            {
  370|   133k|                break;
  371|   133k|            }
  372|   133k|        }
  373|   133k|        ps_col_mv = (col_mv_buf_t *) ih264_buf_mgr_get_next_free(
  374|   133k|            (buf_mgr_t *) ps_dec->pv_mv_buf_mgr, &cur_mv_buf_id);
  375|   133k|        if(ps_col_mv == NULL)
  ------------------
  |  Branch (375:12): [True: 580, False: 133k]
  ------------------
  376|    580|        {
  377|    580|            ps_dec->i4_error_code = ERROR_UNAVAIL_MVBUF_T;
  378|    580|            ps_dec->ps_dec_err_status->u1_err_flag |= REJECT_CUR_PIC;
  ------------------
  |  |  602|    580|#define REJECT_CUR_PIC    (0x01)
  ------------------
  379|    580|            return ERROR_UNAVAIL_MVBUF_T;
  380|    580|        }
  381|       |
  382|   133k|        ps_dec->ps_cur_pic = ps_cur_pic;
  383|   133k|        ps_dec->u1_pic_buf_id = cur_pic_buf_id;
  384|   133k|        ps_cur_pic->u4_ts = ps_dec->u4_ts;
  385|   133k|        memcpy(&ps_cur_pic->s_sei_pic, ps_dec->ps_sei, sizeof(sei));
  386|       |
  387|   133k|        ps_cur_pic->u1_mv_buf_id = cur_mv_buf_id;
  388|   133k|        ps_dec->as_buf_id_info_map[cur_pic_buf_id].mv_buf_id = cur_mv_buf_id;
  389|       |
  390|   133k|        if(ps_dec->u1_enable_mb_info)
  ------------------
  |  Branch (390:12): [True: 0, False: 133k]
  ------------------
  391|      0|        {
  392|      0|            UWORD32 mb_info_map_size = ps_dec->u4_total_mbs << 2;
  393|      0|            ps_dec->as_buf_id_info_map[cur_pic_buf_id].pu1_qp_map =
  394|      0|                ps_dec->pu1_qp_map_base + cur_pic_buf_id * mb_info_map_size;
  395|      0|            ps_dec->as_buf_id_info_map[cur_pic_buf_id].pu1_mb_type_map =
  396|      0|                ps_dec->pu1_mb_type_map_base + cur_pic_buf_id * mb_info_map_size;
  397|      0|            memset(ps_dec->as_buf_id_info_map[cur_pic_buf_id].pu1_qp_map, 0, mb_info_map_size);
  398|      0|            memset(ps_dec->as_buf_id_info_map[cur_pic_buf_id].pu1_mb_type_map, 0, mb_info_map_size);
  399|      0|        }
  400|   133k|        ps_cur_pic->pu1_col_zero_flag = (UWORD8 *) ps_col_mv->pv_col_zero_flag;
  401|   133k|        ps_cur_pic->ps_mv = (mv_pred_t *) ps_col_mv->pv_mv;
  402|   133k|        ps_dec->au1_pic_buf_ref_flag[cur_pic_buf_id] = 0;
  403|       |
  404|   133k|        {
  405|       |            /*make first entry of list0 and list1 point to cur pic,
  406|       |             *so that if first slice is in error, ref pic struct will have valid
  407|       |             *entries*/
  408|   133k|            ps_dec->ps_ref_pic_buf_lx[0] = ps_dec->ps_dpb_mgr->ps_init_dpb[0];
  409|   133k|            ps_dec->ps_ref_pic_buf_lx[1] = ps_dec->ps_dpb_mgr->ps_init_dpb[1];
  410|   133k|            *(ps_dec->ps_dpb_mgr->ps_init_dpb[0][0]) = *ps_cur_pic;
  411|       |            /* Initialize for field reference as well */
  412|   133k|            *(ps_dec->ps_dpb_mgr->ps_init_dpb[0][MAX_REF_BUFS]) = *ps_cur_pic;
  ------------------
  |  |   75|   133k|#define MAX_REF_BUFS    32
  ------------------
  413|       |
  414|   133k|            *(ps_dec->ps_dpb_mgr->ps_mod_dpb[0][0]) = *ps_cur_pic;
  415|       |            /* Initialize for field reference as well */
  416|   133k|            *(ps_dec->ps_dpb_mgr->ps_mod_dpb[0][MAX_REF_BUFS]) = *ps_cur_pic;
  ------------------
  |  |   75|   133k|#define MAX_REF_BUFS    32
  ------------------
  417|   133k|            *(ps_dec->ps_dpb_mgr->ps_init_dpb[1][0]) = *ps_cur_pic;
  418|       |            /* Initialize for field reference as well */
  419|   133k|            *(ps_dec->ps_dpb_mgr->ps_init_dpb[1][MAX_REF_BUFS]) = *ps_cur_pic;
  ------------------
  |  |   75|   133k|#define MAX_REF_BUFS    32
  ------------------
  420|   133k|            *(ps_dec->ps_dpb_mgr->ps_mod_dpb[1][0]) = *ps_cur_pic;
  421|       |            /* Initialize for field reference as well */
  422|   133k|            *(ps_dec->ps_dpb_mgr->ps_mod_dpb[1][MAX_REF_BUFS]) = *ps_cur_pic;
  ------------------
  |  |   75|   133k|#define MAX_REF_BUFS    32
  ------------------
  423|   133k|        }
  424|       |
  425|   133k|        ps_dec->ps_cur_pic->u1_picturetype = u1_field_pic_flag;
  426|   133k|        ps_dec->ps_cur_pic->u4_pack_slc_typ = SKIP_NONE;
  ------------------
  |  |  375|   133k|#define SKIP_NONE  (0x0)
  ------------------
  427|   133k|        H264_DEC_DEBUG_PRINT("got a buffer\n");
  ------------------
  |  |   39|   133k|#define H264_DEC_DEBUG_PRINT(...) {}
  ------------------
  428|   133k|    }
  429|      0|    else
  430|      0|    {
  431|      0|        H264_DEC_DEBUG_PRINT("did not get a buffer\n");
  ------------------
  |  |   39|      0|#define H264_DEC_DEBUG_PRINT(...) {}
  ------------------
  432|      0|    }
  433|       |
  434|   133k|    ps_dec->u4_pic_buf_got = 1;
  435|       |
  436|   133k|    ps_dec->ps_cur_pic->i4_poc = i4_poc;
  437|   133k|    ps_dec->ps_cur_pic->i4_frame_num = u2_frame_num;
  438|   133k|    ps_dec->ps_cur_pic->i4_pic_num = u2_frame_num;
  439|   133k|    ps_dec->ps_cur_pic->i4_top_field_order_cnt = ps_pps->i4_top_field_order_cnt;
  440|   133k|    ps_dec->ps_cur_pic->i4_bottom_field_order_cnt = ps_pps->i4_bottom_field_order_cnt;
  441|   133k|    ps_dec->ps_cur_pic->i4_avg_poc = ps_pps->i4_avg_poc;
  442|   133k|    ps_dec->ps_cur_pic->u4_time_stamp = ps_dec->u4_pts;
  443|       |
  444|   133k|    ps_dec->s_cur_pic = *(ps_dec->ps_cur_pic);
  445|   133k|    if(u1_field_pic_flag && u1_bottom_field_flag)
  ------------------
  |  Branch (445:8): [True: 0, False: 133k]
  |  Branch (445:29): [True: 0, False: 0]
  ------------------
  446|      0|    {
  447|      0|        WORD32 i4_temp_poc;
  448|      0|        WORD32 i4_top_field_order_poc, i4_bot_field_order_poc;
  449|       |        /* Point to odd lines, since it's bottom field */
  450|      0|        ps_dec->s_cur_pic.pu1_buf1 += ps_dec->s_cur_pic.u2_frm_wd_y;
  451|      0|        ps_dec->s_cur_pic.pu1_buf2 += ps_dec->s_cur_pic.u2_frm_wd_uv;
  452|      0|        ps_dec->s_cur_pic.pu1_buf3 += ps_dec->s_cur_pic.u2_frm_wd_uv;
  453|      0|        ps_dec->s_cur_pic.ps_mv += ((ps_dec->u2_pic_ht * ps_dec->u2_pic_wd) >> 5);
  454|      0|        ps_dec->s_cur_pic.pu1_col_zero_flag += ((ps_dec->u2_pic_ht * ps_dec->u2_pic_wd) >> 5);
  455|      0|        ps_dec->ps_cur_pic->u1_picturetype |= BOT_FLD;
  ------------------
  |  |  354|      0|#define BOT_FLD         0x02
  ------------------
  456|      0|        i4_top_field_order_poc = ps_dec->ps_cur_pic->i4_top_field_order_cnt;
  457|      0|        i4_bot_field_order_poc = ps_dec->ps_cur_pic->i4_bottom_field_order_cnt;
  458|      0|        i4_temp_poc = MIN(i4_top_field_order_poc, i4_bot_field_order_poc);
  ------------------
  |  |   61|      0|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  459|      0|        ps_dec->ps_cur_pic->i4_avg_poc = i4_temp_poc;
  460|      0|    }
  461|       |
  462|   133k|    ps_cur_slice->u1_mbaff_frame_flag = ps_seq->u1_mb_aff_flag && (!u1_field_pic_flag);
  ------------------
  |  Branch (462:41): [True: 0, False: 133k]
  |  Branch (462:67): [True: 0, False: 0]
  ------------------
  463|   133k|    ps_dec->ps_cur_pic->u1_picturetype |= (ps_cur_slice->u1_mbaff_frame_flag << 2);
  464|       |
  465|   133k|    ps_dec->ps_cur_mb_row = ps_dec->ps_nbr_mb_row;
  466|   133k|    ps_dec->ps_cur_mb_row += 2;
  467|   133k|    ps_dec->ps_top_mb_row = ps_dec->ps_nbr_mb_row;
  468|   133k|    ps_dec->ps_top_mb_row +=
  469|   133k|        ((ps_dec->u2_frm_wd_in_mbs + 2) << (1 - ps_dec->ps_cur_sps->u1_frame_mbs_only_flag));
  470|       |    // Increment by 2 ,so that left mb (mbaff decrements by 2)  will always be valid
  471|   133k|    ps_dec->ps_top_mb_row += 2;
  472|   133k|    ps_dec->ps_mv_cur = ps_dec->s_cur_pic.ps_mv;
  473|   133k|    ps_dec->ps_mv_top = ps_dec->ps_mv_top_p[0];
  474|   133k|    ps_dec->u1_mv_top_p = 0;
  475|   133k|    ps_dec->u4_mb_idx = 0;
  476|   133k|    ps_dec->ps_mv_left = ps_dec->s_cur_pic.ps_mv;
  477|   133k|    ps_dec->u4_total_mbs_coded = 0;
  478|   133k|    ps_dec->i4_submb_ofst = -(SUB_BLK_SIZE);
  ------------------
  |  |  562|   133k|#define SUB_BLK_SIZE                  ((SUB_BLK_WIDTH) * (SUB_BLK_WIDTH))
  |  |  ------------------
  |  |  |  |  560|   133k|#define SUB_BLK_WIDTH                 4
  |  |  ------------------
  |  |               #define SUB_BLK_SIZE                  ((SUB_BLK_WIDTH) * (SUB_BLK_WIDTH))
  |  |  ------------------
  |  |  |  |  560|   133k|#define SUB_BLK_WIDTH                 4
  |  |  ------------------
  ------------------
  479|   133k|    ps_dec->u4_pred_info_idx = 0;
  480|   133k|    ps_dec->u4_pred_info_pkd_idx = 0;
  481|   133k|    ps_dec->u4_dma_buf_idx = 0;
  482|   133k|    ps_dec->ps_mv = ps_dec->s_cur_pic.ps_mv;
  483|   133k|    ps_dec->ps_mv_bank_cur = ps_dec->s_cur_pic.ps_mv;
  484|   133k|    ps_dec->pu1_col_zero_flag = ps_dec->s_cur_pic.pu1_col_zero_flag;
  485|   133k|    ps_dec->ps_part = ps_dec->ps_parse_part_params;
  486|   133k|    ps_dec->i2_prev_slice_mbx = -1;
  487|   133k|    ps_dec->i2_prev_slice_mby = 0;
  488|   133k|    ps_dec->u2_mv_2mb[0] = 0;
  489|   133k|    ps_dec->u2_mv_2mb[1] = 0;
  490|   133k|    ps_dec->u1_last_pic_not_decoded = 0;
  491|       |
  492|   133k|    ps_dec->u2_cur_slice_num_dec_thread = 0;
  493|   133k|    ps_dec->u2_cur_slice_num_bs = 0;
  494|   133k|    ps_dec->u4_intra_pred_line_ofst = 0;
  495|   133k|    ps_dec->pu1_cur_y_intra_pred_line = ps_dec->pu1_y_intra_pred_line;
  496|   133k|    ps_dec->pu1_cur_u_intra_pred_line = ps_dec->pu1_u_intra_pred_line;
  497|   133k|    ps_dec->pu1_cur_v_intra_pred_line = ps_dec->pu1_v_intra_pred_line;
  498|       |
  499|   133k|    ps_dec->pu1_cur_y_intra_pred_line_base = ps_dec->pu1_y_intra_pred_line;
  500|   133k|    ps_dec->pu1_cur_u_intra_pred_line_base = ps_dec->pu1_u_intra_pred_line;
  501|   133k|    ps_dec->pu1_cur_v_intra_pred_line_base = ps_dec->pu1_v_intra_pred_line;
  502|       |
  503|   133k|    ps_dec->pu1_prev_y_intra_pred_line =
  504|   133k|        ps_dec->pu1_y_intra_pred_line + (ps_dec->u2_frm_wd_in_mbs * MB_SIZE);
  ------------------
  |  |  554|   133k|#define MB_SIZE             16
  ------------------
  505|       |
  506|   133k|    ps_dec->pu1_prev_u_intra_pred_line =
  507|   133k|        ps_dec->pu1_u_intra_pred_line + ps_dec->u2_frm_wd_in_mbs * BLK8x8SIZE * YUV420SP_FACTOR;
  ------------------
  |  |  555|   133k|#define BLK8x8SIZE          8
  ------------------
                      ps_dec->pu1_u_intra_pred_line + ps_dec->u2_frm_wd_in_mbs * BLK8x8SIZE * YUV420SP_FACTOR;
  ------------------
  |  |  119|   133k|#define YUV420SP_FACTOR 2
  ------------------
  508|   133k|    ps_dec->pu1_prev_v_intra_pred_line =
  509|   133k|        ps_dec->pu1_v_intra_pred_line + ps_dec->u2_frm_wd_in_mbs * BLK8x8SIZE;
  ------------------
  |  |  555|   133k|#define BLK8x8SIZE          8
  ------------------
  510|       |
  511|   133k|    ps_dec->ps_deblk_mbn = ps_dec->ps_deblk_pic;
  512|       |    /* Initialize The Function Pointer Depending Upon the Entropy and MbAff Flag
  513|       |     */
  514|   133k|    {
  515|   133k|        if(ps_cur_slice->u1_mbaff_frame_flag)
  ------------------
  |  Branch (515:12): [True: 0, False: 133k]
  ------------------
  516|      0|        {
  517|      0|            ps_dec->pf_compute_bs = ih264d_compute_bs_mbaff;
  518|      0|            ps_dec->pf_mvpred = ih264d_mvpred_mbaff;
  519|      0|            ps_svc_lyr_dec->pf_svc_compute_bs = isvcd_compute_bs_non_mbaff;
  520|      0|        }
  521|   133k|        else
  522|   133k|        {
  523|   133k|            ps_dec->pf_compute_bs = ih264d_compute_bs_non_mbaff;
  524|   133k|            ps_svc_lyr_dec->pf_svc_compute_bs = isvcd_compute_bs_non_mbaff;
  525|   133k|            ps_dec->u1_cur_mb_fld_dec_flag = ps_cur_slice->u1_field_pic_flag;
  526|       |
  527|   133k|            if((ps_svc_lyr_dec->u1_layer_identifier == TARGET_LAYER) &&
  ------------------
  |  |  110|   133k|#define TARGET_LAYER 2
  ------------------
  |  Branch (527:16): [True: 88.9k, False: 44.4k]
  ------------------
  528|  88.9k|               (0 == ps_svc_lyr_dec->u1_base_res_flag))
  ------------------
  |  Branch (528:16): [True: 34.5k, False: 54.4k]
  ------------------
  529|  34.5k|            {
  530|  34.5k|                ps_svc_lyr_dec->pf_svc_compute_bs = isvcd_compute_bs_non_mbaff_target_lyr;
  531|  34.5k|            }
  532|       |
  533|   133k|            if((ps_svc_lyr_dec->u1_layer_identifier == TARGET_LAYER) &&
  ------------------
  |  |  110|   133k|#define TARGET_LAYER 2
  ------------------
  |  Branch (533:16): [True: 88.9k, False: 44.4k]
  ------------------
  534|  88.9k|               (1 == ps_svc_lyr_dec->u1_base_res_flag))
  ------------------
  |  Branch (534:16): [True: 54.4k, False: 34.5k]
  ------------------
  535|  54.4k|            {
  536|  54.4k|                ps_svc_lyr_dec->pf_svc_compute_bs =
  537|  54.4k|                    isvcd_compute_bs_non_mbaff_target_lyr_no_inter_layer;
  538|  54.4k|            }
  539|       |
  540|   133k|            if((ps_svc_lyr_dec->u1_layer_identifier == MEDIAL_ENHANCEMENT_LAYER) &&
  ------------------
  |  |  109|   133k|#define MEDIAL_ENHANCEMENT_LAYER 1
  ------------------
  |  Branch (540:16): [True: 0, False: 133k]
  ------------------
  541|      0|               (0 == ps_svc_lyr_dec->u1_base_res_flag))
  ------------------
  |  Branch (541:16): [True: 0, False: 0]
  ------------------
  542|      0|            {
  543|      0|                ps_svc_lyr_dec->pf_svc_compute_bs = isvcd_compute_bs_non_mbaff_medial_lyr;
  544|      0|            }
  545|   133k|        }
  546|   133k|    }
  547|       |    /* Set up the Parameter for DMA transfer */
  548|   133k|    {
  549|   133k|        UWORD8 u1_field_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag;
  550|   133k|        UWORD8 u1_mbaff = ps_cur_slice->u1_mbaff_frame_flag;
  551|   133k|        UWORD16 uc_lastmbs = (((ps_dec->u2_pic_wd) >> 4) % (ps_dec->u4_recon_mb_grp >> u1_mbaff));
  552|   133k|        UWORD16 ui16_lastmbs_widthY =
  553|   133k|            (uc_lastmbs ? (uc_lastmbs << 4) : ((ps_dec->u4_recon_mb_grp >> u1_mbaff) << 4));
  ------------------
  |  Branch (553:14): [True: 0, False: 133k]
  ------------------
  554|   133k|        UWORD16 ui16_lastmbs_widthUV =
  555|   133k|            uc_lastmbs ? (uc_lastmbs << 3) : ((ps_dec->u4_recon_mb_grp >> u1_mbaff) << 3);
  ------------------
  |  Branch (555:13): [True: 0, False: 133k]
  ------------------
  556|       |
  557|   133k|        ps_dec->s_tran_addrecon.pu1_dest_y = ps_dec->s_cur_pic.pu1_buf1;
  558|   133k|        ps_dec->s_tran_addrecon.pu1_dest_u = ps_dec->s_cur_pic.pu1_buf2;
  559|   133k|        ps_dec->s_tran_addrecon.pu1_dest_v = ps_dec->s_cur_pic.pu1_buf3;
  560|       |
  561|   133k|        ps_dec->s_tran_addrecon.u2_frm_wd_y = ps_dec->u2_frm_wd_y << u1_field_pic_flag;
  562|   133k|        ps_dec->s_tran_addrecon.u2_frm_wd_uv = ps_dec->u2_frm_wd_uv << u1_field_pic_flag;
  563|       |
  564|   133k|        if(u1_field_pic_flag)
  ------------------
  |  Branch (564:12): [True: 0, False: 133k]
  ------------------
  565|      0|        {
  566|      0|            ui16_lastmbs_widthY += ps_dec->u2_frm_wd_y;
  567|      0|            ui16_lastmbs_widthUV += ps_dec->u2_frm_wd_uv;
  568|      0|        }
  569|       |
  570|       |        /* Normal Increment of Pointer */
  571|   133k|        ps_dec->s_tran_addrecon.u4_inc_y[0] = ((ps_dec->u4_recon_mb_grp << 4) >> u1_mbaff);
  572|   133k|        ps_dec->s_tran_addrecon.u4_inc_uv[0] = ((ps_dec->u4_recon_mb_grp << 4) >> u1_mbaff);
  573|       |
  574|       |        /* End of Row Increment */
  575|   133k|        ps_dec->s_tran_addrecon.u4_inc_y[1] =
  576|   133k|            (ui16_lastmbs_widthY + (PAD_LEN_Y_H << 1) +
  ------------------
  |  |  571|   133k|#define PAD_LEN_Y_H                   32
  ------------------
  577|   133k|             ps_dec->s_tran_addrecon.u2_frm_wd_y * ((15 << u1_mbaff) + u1_mbaff));
  578|   133k|        ps_dec->s_tran_addrecon.u4_inc_uv[1] =
  579|   133k|            (ui16_lastmbs_widthUV + (PAD_LEN_UV_H << 2) +
  ------------------
  |  |  573|   133k|#define PAD_LEN_UV_H                  16
  ------------------
  580|   133k|             ps_dec->s_tran_addrecon.u2_frm_wd_uv * ((15 << u1_mbaff) + u1_mbaff));
  581|       |
  582|       |        /* Assign picture numbers to each frame/field  */
  583|       |        /* only once per picture.                      */
  584|   133k|        ih264d_assign_pic_num(ps_dec);
  585|   133k|        ps_dec->s_tran_addrecon.u2_mv_top_left_inc =
  586|   133k|            (ps_dec->u4_recon_mb_grp << 2) - 1 - (u1_mbaff << 2);
  587|   133k|        ps_dec->s_tran_addrecon.u2_mv_left_inc = ((ps_dec->u4_recon_mb_grp >> u1_mbaff) - 1)
  588|   133k|                                                 << (4 + u1_mbaff);
  589|   133k|    }
  590|       |    /**********************************************************************/
  591|       |    /* High profile related initialization at pictrue level               */
  592|       |    /**********************************************************************/
  593|   133k|    if((ps_seq->u1_profile_idc == HIGH_PROFILE_IDC) ||
  ------------------
  |  |  278|   133k|#define HIGH_PROFILE_IDC   100
  ------------------
  |  Branch (593:8): [True: 7.69k, False: 125k]
  ------------------
  594|   125k|       (ps_seq->u1_profile_idc == SCALABLE_HIGH_PROFILE_IDC) ||
  ------------------
  |  |   60|   125k|#define SCALABLE_HIGH_PROFILE_IDC 86
  ------------------
  |  Branch (594:8): [True: 1.96k, False: 123k]
  ------------------
  595|   123k|       (ps_seq->u1_profile_idc == SCALABLE_BASELINE_PROFILE_IDC))
  ------------------
  |  |   59|   123k|#define SCALABLE_BASELINE_PROFILE_IDC 83
  ------------------
  |  Branch (595:8): [True: 3.92k, False: 119k]
  ------------------
  596|  13.5k|    {
  597|  13.5k|        if((ps_seq->i4_seq_scaling_matrix_present_flag) ||
  ------------------
  |  Branch (597:12): [True: 8.81k, False: 4.76k]
  ------------------
  598|  4.76k|           (ps_pps->i4_pic_scaling_matrix_present_flag))
  ------------------
  |  Branch (598:12): [True: 990, False: 3.77k]
  ------------------
  599|  9.80k|        {
  600|  9.80k|            ret = ih264d_form_scaling_matrix_picture(ps_seq, ps_pps, ps_dec);
  601|  9.80k|            ps_dec->s_high_profile.u1_scaling_present = 1;
  602|  9.80k|        }
  603|  3.77k|        else
  604|  3.77k|        {
  605|  3.77k|            ret = ih264d_form_default_scaling_matrix(ps_dec);
  606|  3.77k|        }
  607|       |
  608|  13.5k|        if(ps_pps->i4_transform_8x8_mode_flag)
  ------------------
  |  Branch (608:12): [True: 8.04k, False: 5.54k]
  ------------------
  609|  8.04k|        {
  610|  8.04k|            ps_dec->s_high_profile.u1_transform8x8_present = 1;
  611|  8.04k|        }
  612|  13.5k|    }
  613|   119k|    else
  614|   119k|    {
  615|   119k|        ret = ih264d_form_default_scaling_matrix(ps_dec);
  616|   119k|    }
  617|       |
  618|   133k|    if(ret != OK) return ret;
  ------------------
  |  |  114|   133k|#define OK        0
  ------------------
  |  Branch (618:8): [True: 0, False: 133k]
  ------------------
  619|       |
  620|       |    /* required while reading the transform_size_8x8 u4_flag */
  621|   133k|    ps_dec->s_high_profile.u1_direct_8x8_inference_flag = ps_seq->u1_direct_8x8_inference_flag;
  622|   133k|    ps_dec->s_high_profile.s_cavlc_ctxt = ps_dec->s_cavlc_ctxt;
  623|       |
  624|   133k|    ps_dec->i1_recon_in_thread3_flag = 1;
  625|   133k|    ps_dec->ps_frame_buf_ip_recon = &ps_dec->s_tran_addrecon;
  626|   133k|    if(ps_dec->u1_separate_parse)
  ------------------
  |  Branch (626:8): [True: 51.1k, False: 82.2k]
  ------------------
  627|  51.1k|    {
  628|  51.1k|        memcpy(&ps_dec->s_tran_addrecon_parse, &ps_dec->s_tran_addrecon, sizeof(tfr_ctxt_t));
  629|  51.1k|    }
  630|       |
  631|   133k|    ih264d_init_deblk_tfr_ctxt(ps_dec, &(ps_dec->s_pad_mgr), &(ps_dec->s_tran_addrecon),
  632|   133k|                               ps_dec->u2_frm_wd_in_mbs, 0);
  633|       |
  634|   133k|    ps_dec->ps_cur_deblk_mb = ps_dec->ps_deblk_pic;
  635|   133k|    ps_dec->u4_cur_deblk_mb_num = 0;
  636|   133k|    ps_dec->u4_deblk_mb_x = 0;
  637|   133k|    ps_dec->u4_deblk_mb_y = 0;
  638|   133k|    ps_dec->pu4_wt_ofsts = ps_dec->pu4_wts_ofsts_mat;
  639|       |
  640|   133k|    ps_dec->u4_first_slice_in_pic = 0;
  641|   133k|    H264_MUTEX_UNLOCK(&ps_dec->process_disp_mutex);
  642|   133k|    return OK;
  ------------------
  |  |  114|   133k|#define OK        0
  ------------------
  643|   133k|}
isvcd_parse_decode_slice_ext_nal:
  657|  38.6k|{
  658|  38.6k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
  659|  38.6k|    dec_bit_stream_t *ps_bitstrm = ps_dec->ps_bitstrm;
  660|  38.6k|    dec_pic_params_t *ps_pps;
  661|  38.6k|    dec_seq_params_t *ps_seq;
  662|  38.6k|    dec_svc_seq_params_t *ps_subset_seq;
  663|  38.6k|    dec_slice_params_t *ps_cur_slice = NULL;
  664|  38.6k|    dec_slice_svc_ext_params_t *ps_svc_slice_params = NULL;
  665|       |
  666|  38.6k|    pocstruct_t s_tmp_poc = {0};
  667|  38.6k|    WORD32 i_delta_poc[2] = {0};
  668|  38.6k|    WORD32 i4_poc = 0;
  669|  38.6k|    UWORD16 u2_first_mb_in_slice, u2_frame_num;
  670|  38.6k|    UWORD8 u1_field_pic_flag, u1_redundant_pic_cnt = 0, u1_slice_type;
  671|  38.6k|    UWORD32 u4_idr_pic_id = 0;
  672|  38.6k|    UWORD8 u1_bottom_field_flag, u1_pic_order_cnt_type;
  673|       |
  674|  38.6k|    UWORD8 u1_nal_unit_type;
  675|  38.6k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
  676|  38.6k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
  677|  38.6k|    WORD8 i1_is_end_of_poc;
  678|  38.6k|    WORD32 ret;
  679|  38.6k|    WORD32 prev_slice_err, num_mb_skipped;
  680|  38.6k|    UWORD8 u1_mbaff;
  681|  38.6k|    pocstruct_t *ps_cur_poc;
  682|  38.6k|    UWORD32 u4_temp;
  683|  38.6k|    WORD32 i_temp;
  684|  38.6k|    svc_dec_ctxt_t *psvcd_dec_ctxt;
  685|  38.6k|    dec_struct_t *ps_dec_cur_lyr_minus_1;
  686|  38.6k|    svc_dec_lyr_struct_t *ps_svc_cur_lyr_dec_minus_1;
  687|       |
  688|  38.6k|    ps_cur_slice = ps_dec->ps_cur_slice;
  689|  38.6k|    ps_svc_slice_params = &ps_svc_lyr_dec->s_svc_slice_params;
  690|       |
  691|       |    /* read FirstMbInSlice  and slice type*/
  692|  38.6k|    ps_dec->ps_dpb_cmds->u1_dpb_commands_read_slc = 0;
  693|  38.6k|    u2_first_mb_in_slice = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  694|  38.6k|    if(u2_first_mb_in_slice > (ps_dec->u2_frm_ht_in_mbs * ps_dec->u2_frm_wd_in_mbs))
  ------------------
  |  Branch (694:8): [True: 171, False: 38.4k]
  ------------------
  695|    171|    {
  696|    171|        return ERROR_CORRUPTED_SLICE;
  697|    171|    }
  698|       |
  699|       |    /*we currently don not support ASO*/
  700|  38.4k|    if(((u2_first_mb_in_slice << ps_cur_slice->u1_mbaff_frame_flag) <= ps_dec->u4_cur_mb_addr) &&
  ------------------
  |  Branch (700:8): [True: 37.5k, False: 930]
  ------------------
  701|  37.5k|       (ps_dec->u4_first_slice_in_pic == 0))
  ------------------
  |  Branch (701:8): [True: 118, False: 37.4k]
  ------------------
  702|    118|    {
  703|    118|        return ERROR_CORRUPTED_SLICE;
  704|    118|    }
  705|       |
  706|  38.3k|    if(ps_dec->u4_first_slice_in_pic == 1)
  ------------------
  |  Branch (706:8): [True: 37.5k, False: 845]
  ------------------
  707|  37.5k|    {
  708|  37.5k|        if(u2_first_mb_in_slice != 0)
  ------------------
  |  Branch (708:12): [True: 85, False: 37.4k]
  ------------------
  709|     85|        {
  710|     85|            return ERROR_CORRUPTED_SLICE;
  711|     85|        }
  712|  37.5k|    }
  713|       |
  714|  38.2k|    COPYTHECONTEXT("Slice Header SVC ext: first_mb_in_slice", u2_first_mb_in_slice);
  715|       |
  716|  38.2k|    u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  717|       |
  718|  38.2k|    if(u4_temp > 9) return ERROR_INV_SLC_TYPE_T;
  ------------------
  |  Branch (718:8): [True: 93, False: 38.1k]
  ------------------
  719|       |
  720|  38.1k|    u1_slice_type = u4_temp;
  721|  38.1k|    COPYTHECONTEXT("Slice Header SVC ext: slice_type", (u1_slice_type));
  722|       |    /* Find Out the Slice Type is 5 to 9 or not then Set the Flag   */
  723|       |    /* u1_sl_typ_5_9 = 1 .Which tells that all the slices in the Pic*/
  724|       |    /* will be of same type of current                            */
  725|  38.1k|    if(u1_slice_type > 4)
  ------------------
  |  Branch (725:8): [True: 432, False: 37.7k]
  ------------------
  726|    432|    {
  727|    432|        u1_slice_type -= 5;
  728|    432|    }
  729|       |
  730|  38.1k|    u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  731|  38.1k|    if(u4_temp & MASK_ERR_PIC_SET_ID) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  |  527|  38.1k|#define MASK_ERR_PIC_SET_ID   (0xFFFFFF00)
  ------------------
  |  Branch (731:8): [True: 68, False: 38.1k]
  ------------------
  732|       |    /* discard slice if pic param is invalid */
  733|  38.1k|    COPYTHECONTEXT("Slice Header SVC ext: pic_parameter_set_id", u4_temp);
  734|  38.1k|    ps_pps = &ps_dec->ps_pps[u4_temp];
  735|  38.1k|    if(FALSE == ps_pps->u1_is_valid)
  ------------------
  |  |  592|  38.1k|#define FALSE   0
  ------------------
  |  Branch (735:8): [True: 81, False: 38.0k]
  ------------------
  736|     81|    {
  737|     81|        return ERROR_INV_SLICE_HDR_T;
  738|     81|    }
  739|       |    /* slices in a layer should have same PPS id*/
  740|  38.0k|    if(UINT32_MAX == ps_svc_lyr_dec->u4_pps_id_for_layer)
  ------------------
  |  Branch (740:8): [True: 37.2k, False: 831]
  ------------------
  741|  37.2k|    {
  742|  37.2k|        ps_svc_lyr_dec->u4_pps_id_for_layer = u4_temp;
  743|  37.2k|    }
  744|    831|    else if(u4_temp != ps_svc_lyr_dec->u4_pps_id_for_layer)
  ------------------
  |  Branch (744:13): [True: 81, False: 750]
  ------------------
  745|     81|    {
  746|     81|        return ERROR_INV_SLICE_HDR_T;
  747|     81|    }
  748|  37.9k|    ps_seq = ps_pps->ps_sps;
  749|  37.9k|    ps_seq += MAX_NUM_SEQ_PARAMS;
  ------------------
  |  |  521|  37.9k|#define MAX_NUM_SEQ_PARAMS 32
  ------------------
  750|  37.9k|    ps_subset_seq =
  751|  37.9k|        &ps_svc_lyr_dec->ps_subset_sps[MAX_NUM_SEQ_PARAMS + ps_seq->u1_seq_parameter_set_id];
  ------------------
  |  |  521|  37.9k|#define MAX_NUM_SEQ_PARAMS 32
  ------------------
  752|       |
  753|  37.9k|    ps_dec->ps_cur_sps = ps_seq;
  754|  37.9k|    ps_svc_lyr_dec->ps_cur_subset_sps = ps_subset_seq;
  755|       |
  756|  37.9k|    if(!ps_seq) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  Branch (756:8): [True: 0, False: 37.9k]
  ------------------
  757|  37.9k|    if(FALSE == ps_seq->u1_is_valid) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  |  592|  37.9k|#define FALSE   0
  ------------------
  |  Branch (757:8): [True: 252, False: 37.7k]
  ------------------
  758|  37.7k|    if(ps_seq->u1_mb_aff_flag) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  Branch (758:8): [True: 67, False: 37.6k]
  ------------------
  759|  37.6k|    if(ps_seq->u1_level_idc > H264_LEVEL_4_2) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  |  303|  37.6k|#define H264_LEVEL_4_2     42
  ------------------
  |  Branch (759:8): [True: 75, False: 37.5k]
  ------------------
  760|  37.5k|    if(!ps_seq->u1_frame_mbs_only_flag) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  Branch (760:8): [True: 67, False: 37.5k]
  ------------------
  761|  37.5k|    if(OK != isvcd_verify_level(ps_seq->u1_level_idc)) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  |  114|  37.5k|#define OK        0
  ------------------
  |  Branch (761:8): [True: 73, False: 37.4k]
  ------------------
  762|       |
  763|  37.4k|    if(ps_dec->u1_init_dec_flag == 1)
  ------------------
  |  Branch (763:8): [True: 30.1k, False: 7.26k]
  ------------------
  764|  30.1k|    {
  765|  30.1k|        if(ps_dec->u2_frm_wd_in_mbs != ps_seq->u2_frm_wd_in_mbs) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  Branch (765:12): [True: 68, False: 30.0k]
  ------------------
  766|  30.0k|        if(ps_dec->u2_frm_ht_in_mbs != ps_seq->u2_frm_ht_in_mbs) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  Branch (766:12): [True: 70, False: 30.0k]
  ------------------
  767|  30.0k|    }
  768|       |
  769|  37.2k|    if(ps_dec->u1_init_dec_flag == 1)
  ------------------
  |  Branch (769:8): [True: 30.0k, False: 7.26k]
  ------------------
  770|  30.0k|    {
  771|  30.0k|        if(ps_dec->u2_disp_height != ps_subset_seq->u2_disp_height) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  Branch (771:12): [True: 66, False: 29.9k]
  ------------------
  772|  29.9k|        if(ps_dec->u2_disp_width != ps_subset_seq->u2_disp_width) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  Branch (772:12): [True: 67, False: 29.8k]
  ------------------
  773|  29.9k|    }
  774|       |
  775|  37.1k|    ps_dec->i4_reorder_depth = ps_subset_seq->i4_reorder_depth;
  776|       |
  777|  37.1k|    ps_dec->u2_disp_height = ps_subset_seq->u2_disp_height;
  778|  37.1k|    ps_dec->u2_disp_width = ps_subset_seq->u2_disp_width;
  779|       |
  780|  37.1k|    if(ps_svc_lyr_dec->u1_layer_id > 0)
  ------------------
  |  Branch (780:8): [True: 37.1k, False: 0]
  ------------------
  781|  37.1k|    {
  782|  37.1k|        psvcd_dec_ctxt = ps_svc_lyr_dec->ps_svcd_ctxt;
  783|  37.1k|        ps_svc_cur_lyr_dec_minus_1 =
  784|  37.1k|            &psvcd_dec_ctxt->ps_svc_dec_lyr[ps_svc_lyr_dec->u1_layer_id - 1];
  785|       |
  786|  37.1k|        ps_dec_cur_lyr_minus_1 = &ps_svc_cur_lyr_dec_minus_1->s_dec;
  787|       |
  788|  37.1k|        if((ps_dec_cur_lyr_minus_1->u2_pic_wd > ps_subset_seq->u2_pic_wd) ||
  ------------------
  |  Branch (788:12): [True: 75, False: 37.0k]
  ------------------
  789|  37.0k|           (ps_dec_cur_lyr_minus_1->u2_pic_ht > ps_subset_seq->u2_pic_ht))
  ------------------
  |  Branch (789:12): [True: 71, False: 37.0k]
  ------------------
  790|    146|        {
  791|    146|            return ERROR_CORRUPTED_SLICE;
  792|    146|        }
  793|  37.1k|    }
  794|       |
  795|  37.0k|    ps_dec->u2_pic_wd = ps_subset_seq->u2_pic_wd;
  796|  37.0k|    ps_dec->u2_pic_ht = ps_subset_seq->u2_pic_ht;
  797|  37.0k|    ps_dec->u4_total_mbs = ps_seq->u4_total_num_of_mbs << (1 - ps_seq->u1_frame_mbs_only_flag);
  798|       |
  799|       |    /* Determining the Width and Height of Frame from that of Picture */
  800|  37.0k|    ps_dec->u2_frm_wd_y = ps_subset_seq->u2_frm_wd_y;
  801|  37.0k|    ps_dec->u2_frm_ht_y = ps_subset_seq->u2_frm_ht_y;
  802|       |
  803|  37.0k|    ps_dec->u2_frm_wd_uv = ps_subset_seq->u2_frm_wd_uv;
  804|  37.0k|    ps_dec->u2_frm_ht_uv = ps_subset_seq->u2_frm_ht_uv;
  805|       |
  806|  37.0k|    ps_dec->s_pad_mgr.u1_pad_len_y_v = ps_subset_seq->u1_pad_len_y_v;
  807|  37.0k|    ps_dec->s_pad_mgr.u1_pad_len_cr_v = ps_subset_seq->u1_pad_len_cr_v;
  808|       |
  809|  37.0k|    ps_dec->u2_frm_wd_in_mbs = ps_seq->u2_frm_wd_in_mbs;
  810|  37.0k|    ps_dec->u2_frm_ht_in_mbs = ps_seq->u2_frm_ht_in_mbs;
  811|       |
  812|  37.0k|    ps_dec->u2_crop_offset_y = ps_subset_seq->u2_crop_offset_y;
  813|  37.0k|    ps_dec->u2_crop_offset_uv = ps_subset_seq->u2_crop_offset_uv;
  814|       |
  815|       |    /* Get the frame num */
  816|  37.0k|    u2_frame_num = ih264d_get_bits_h264(ps_bitstrm, ps_seq->u1_bits_in_frm_num);
  817|       |
  818|  37.0k|    COPYTHECONTEXT("Slice Header SVC ext: frame_num", u2_frame_num);
  819|  37.0k|    if(!ps_dec->u1_first_slice_in_stream && ps_dec->u4_first_slice_in_pic)
  ------------------
  |  Branch (819:8): [True: 23.6k, False: 13.3k]
  |  Branch (819:45): [True: 22.8k, False: 750]
  ------------------
  820|  22.8k|    {
  821|  22.8k|        pocstruct_t *ps_prev_poc = &ps_dec->s_prev_pic_poc;
  822|  22.8k|        pocstruct_t *ps_cur_poc = &ps_dec->s_cur_pic_poc;
  823|       |
  824|  22.8k|        ps_dec->u2_mbx = 0xffff;
  825|  22.8k|        ps_dec->u2_mby = 0;
  826|       |
  827|  22.8k|        if((0 == u1_is_idr_slice) && ps_cur_slice->u1_nal_ref_idc)
  ------------------
  |  Branch (827:12): [True: 1.58k, False: 21.2k]
  |  Branch (827:38): [True: 1.44k, False: 147]
  ------------------
  828|  1.44k|            ps_dec->u2_prev_ref_frame_num = ps_cur_slice->u2_frame_num;
  829|       |
  830|  22.8k|        if(u1_is_idr_slice || ps_cur_slice->u1_mmco_equalto5) ps_dec->u2_prev_ref_frame_num = 0;
  ------------------
  |  Branch (830:12): [True: 21.2k, False: 1.58k]
  |  Branch (830:31): [True: 543, False: 1.04k]
  ------------------
  831|       |
  832|  22.8k|        if(ps_dec->ps_cur_sps->u1_gaps_in_frame_num_value_allowed_flag)
  ------------------
  |  Branch (832:12): [True: 0, False: 22.8k]
  ------------------
  833|      0|        {
  834|      0|            isvcd_decode_gaps_in_frame_num(ps_dec, u2_frame_num);
  835|      0|        }
  836|       |
  837|  22.8k|        ps_prev_poc->i4_prev_frame_num_ofst = ps_cur_poc->i4_prev_frame_num_ofst;
  838|  22.8k|        ps_prev_poc->u2_frame_num = ps_cur_poc->u2_frame_num;
  839|  22.8k|        ps_prev_poc->u1_mmco_equalto5 = ps_cur_slice->u1_mmco_equalto5;
  840|  22.8k|        if(ps_cur_slice->u1_nal_ref_idc)
  ------------------
  |  Branch (840:12): [True: 19.4k, False: 3.40k]
  ------------------
  841|  19.4k|        {
  842|  19.4k|            ps_prev_poc->i4_pic_order_cnt_lsb = ps_cur_poc->i4_pic_order_cnt_lsb;
  843|  19.4k|            ps_prev_poc->i4_pic_order_cnt_msb = ps_cur_poc->i4_pic_order_cnt_msb;
  844|  19.4k|            ps_prev_poc->i4_delta_pic_order_cnt_bottom = ps_cur_poc->i4_delta_pic_order_cnt_bottom;
  845|  19.4k|            ps_prev_poc->i4_delta_pic_order_cnt[0] = ps_cur_poc->i4_delta_pic_order_cnt[0];
  846|  19.4k|            ps_prev_poc->i4_delta_pic_order_cnt[1] = ps_cur_poc->i4_delta_pic_order_cnt[1];
  847|  19.4k|            ps_prev_poc->u1_bot_field = ps_cur_poc->u1_bot_field;
  848|  19.4k|        }
  849|       |
  850|  22.8k|        ps_dec->u4_total_mbs_coded = 0;
  851|  22.8k|    }
  852|       |    /* Get the field related flags  */
  853|  37.0k|    if(!ps_seq->u1_frame_mbs_only_flag)
  ------------------
  |  Branch (853:8): [True: 0, False: 37.0k]
  ------------------
  854|      0|    {
  855|      0|        u1_field_pic_flag = ih264d_get_bit_h264(ps_bitstrm);
  856|      0|        COPYTHECONTEXT("Slice Header SVC ext: field_pic_flag", u1_field_pic_flag);
  857|      0|        u1_bottom_field_flag = 0;
  858|       |
  859|      0|        if(u1_field_pic_flag)
  ------------------
  |  Branch (859:12): [True: 0, False: 0]
  ------------------
  860|      0|        {
  861|      0|            ps_dec->pu1_inv_scan = (UWORD8 *) gau1_ih264d_inv_scan_fld;
  862|      0|            u1_bottom_field_flag = ih264d_get_bit_h264(ps_bitstrm);
  863|      0|            COPYTHECONTEXT("Slice Header SVC ext: bottom_field_flag", u1_bottom_field_flag);
  864|      0|        }
  865|      0|        else
  866|      0|        {
  867|      0|            ps_dec->pu1_inv_scan = (UWORD8 *) gau1_ih264d_inv_scan;
  868|      0|        }
  869|      0|    }
  870|  37.0k|    else
  871|  37.0k|    {
  872|  37.0k|        u1_field_pic_flag = 0;
  873|  37.0k|        u1_bottom_field_flag = 0;
  874|  37.0k|        ps_dec->pu1_inv_scan = (UWORD8 *) gau1_ih264d_inv_scan;
  875|  37.0k|    }
  876|       |
  877|  37.0k|    u1_nal_unit_type = SLICE_NAL;
  ------------------
  |  |  324|  37.0k|#define SLICE_NAL                       1
  ------------------
  878|  37.0k|    if(u1_is_idr_slice)
  ------------------
  |  Branch (878:8): [True: 33.3k, False: 3.62k]
  ------------------
  879|  33.3k|    {
  880|  33.3k|        u1_nal_unit_type = IDR_SLICE_NAL;
  ------------------
  |  |  328|  33.3k|#define IDR_SLICE_NAL                   5
  ------------------
  881|  33.3k|        u4_idr_pic_id = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  882|  33.3k|        if(u4_idr_pic_id > 65535) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  Branch (882:12): [True: 246, False: 33.1k]
  ------------------
  883|  33.1k|        COPYTHECONTEXT("Slice Header SVC ext:  ", u4_idr_pic_id);
  884|  33.1k|    }
  885|       |
  886|       |    /* read delta pic order count information*/
  887|  36.7k|    i_delta_poc[0] = i_delta_poc[1] = 0;
  888|  36.7k|    s_tmp_poc.i4_pic_order_cnt_lsb = 0;
  889|  36.7k|    s_tmp_poc.i4_delta_pic_order_cnt_bottom = 0;
  890|  36.7k|    u1_pic_order_cnt_type = ps_seq->u1_pic_order_cnt_type;
  891|  36.7k|    if(u1_pic_order_cnt_type == 0)
  ------------------
  |  Branch (891:8): [True: 35.2k, False: 1.50k]
  ------------------
  892|  35.2k|    {
  893|  35.2k|        i_temp = ih264d_get_bits_h264(ps_bitstrm, ps_seq->u1_log2_max_pic_order_cnt_lsb_minus);
  894|  35.2k|        if(i_temp < 0 || i_temp >= ps_seq->i4_max_pic_order_cntLsb) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  Branch (894:12): [True: 0, False: 35.2k]
  |  Branch (894:26): [True: 0, False: 35.2k]
  ------------------
  895|  35.2k|        s_tmp_poc.i4_pic_order_cnt_lsb = i_temp;
  896|  35.2k|        COPYTHECONTEXT("Slice Header SVC ext: pic_order_cnt_lsb", s_tmp_poc.i4_pic_order_cnt_lsb);
  897|       |
  898|  35.2k|        if((ps_pps->u1_pic_order_present_flag == 1) && (!u1_field_pic_flag))
  ------------------
  |  Branch (898:12): [True: 3.91k, False: 31.3k]
  |  Branch (898:56): [True: 3.91k, False: 0]
  ------------------
  899|  3.91k|        {
  900|  3.91k|            s_tmp_poc.i4_delta_pic_order_cnt_bottom = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  901|  3.91k|            COPYTHECONTEXT("Slice Header SVC ext: delta_pic_order_cnt_bottom",
  902|  3.91k|                           s_tmp_poc.i4_delta_pic_order_cnt_bottom);
  903|  3.91k|        }
  904|  35.2k|    }
  905|       |
  906|  36.7k|    s_tmp_poc.i4_delta_pic_order_cnt[0] = 0;
  907|  36.7k|    s_tmp_poc.i4_delta_pic_order_cnt[1] = 0;
  908|  36.7k|    if(u1_pic_order_cnt_type == 1 && (!ps_seq->u1_delta_pic_order_always_zero_flag))
  ------------------
  |  Branch (908:8): [True: 1.21k, False: 35.5k]
  |  Branch (908:38): [True: 613, False: 606]
  ------------------
  909|    613|    {
  910|    613|        s_tmp_poc.i4_delta_pic_order_cnt[0] = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  911|    613|        COPYTHECONTEXT("Slice Header SVC ext: delta_pic_order_cnt[0]",
  912|    613|                       s_tmp_poc.i4_delta_pic_order_cnt[0]);
  913|       |
  914|    613|        if(ps_pps->u1_pic_order_present_flag && !u1_field_pic_flag)
  ------------------
  |  Branch (914:12): [True: 416, False: 197]
  |  Branch (914:49): [True: 416, False: 0]
  ------------------
  915|    416|        {
  916|    416|            s_tmp_poc.i4_delta_pic_order_cnt[1] = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  917|    416|            COPYTHECONTEXT("Slice Header SVC ext: delta_pic_order_cnt[1]",
  918|    416|                           s_tmp_poc.i4_delta_pic_order_cnt[1]);
  919|    416|        }
  920|    613|    }
  921|       |
  922|  36.7k|    if(ps_pps->u1_redundant_pic_cnt_present_flag)
  ------------------
  |  Branch (922:8): [True: 13.6k, False: 23.0k]
  ------------------
  923|  13.6k|    {
  924|  13.6k|        u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
  925|  13.6k|        if(u4_temp > MAX_REDUNDANT_PIC_CNT) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  |  611|  13.6k|#define MAX_REDUNDANT_PIC_CNT       127
  ------------------
  |  Branch (925:12): [True: 150, False: 13.5k]
  ------------------
  926|  13.5k|        u1_redundant_pic_cnt = u4_temp;
  927|  13.5k|        COPYTHECONTEXT("Slice Header SVC ext: redundant_pic_cnt", u1_redundant_pic_cnt);
  928|  13.5k|    }
  929|       |
  930|       |    /*--------------------------------------------------------------------*/
  931|       |    /* Check if the slice is part of new picture                          */
  932|       |    /*--------------------------------------------------------------------*/
  933|       |    /* First slice of a picture is always considered as part of new picture */
  934|  36.6k|    i1_is_end_of_poc = 1;
  935|  36.6k|    ps_dec->ps_dec_err_status->u1_err_flag &= MASK_REJECT_CUR_PIC;
  ------------------
  |  |  605|  36.6k|#define MASK_REJECT_CUR_PIC (0xFE)
  ------------------
  936|       |
  937|  36.6k|    if(ps_dec->u4_first_slice_in_pic == 0)
  ------------------
  |  Branch (937:8): [True: 750, False: 35.8k]
  ------------------
  938|    750|    {
  939|    750|        i1_is_end_of_poc =
  940|    750|            ih264d_is_end_of_pic(u2_frame_num, u1_nal_ref_idc, &s_tmp_poc, &ps_dec->s_cur_pic_poc,
  941|    750|                                 ps_cur_slice, u1_pic_order_cnt_type, u1_nal_unit_type,
  942|    750|                                 u4_idr_pic_id, u1_field_pic_flag, u1_bottom_field_flag);
  943|    750|        if(i1_is_end_of_poc)
  ------------------
  |  Branch (943:12): [True: 71, False: 679]
  ------------------
  944|     71|        {
  945|     71|            ps_dec->u1_first_slice_in_stream = 0;
  946|     71|            return ERROR_INCOMPLETE_FRAME;
  947|     71|        }
  948|    750|    }
  949|       |
  950|       |    /*--------------------------------------------------------------------*/
  951|       |    /* Check for error in slice and parse the missing/corrupted MB's      */
  952|       |    /* as skip-MB's in an inserted P-slice                                */
  953|       |    /*--------------------------------------------------------------------*/
  954|  36.5k|    u1_mbaff = ps_seq->u1_mb_aff_flag && (!u1_field_pic_flag);
  ------------------
  |  Branch (954:16): [True: 0, False: 36.5k]
  |  Branch (954:42): [True: 0, False: 0]
  ------------------
  955|  36.5k|    prev_slice_err = 0;
  956|       |
  957|  36.5k|    if(i1_is_end_of_poc || ps_dec->u1_first_slice_in_stream)
  ------------------
  |  Branch (957:8): [True: 35.8k, False: 679]
  |  Branch (957:28): [True: 0, False: 679]
  ------------------
  958|  35.8k|    {
  959|       |        /* If the current slice is not a field or frame number of the current
  960|       |         * slice doesn't match with previous slice, and decoder is expecting
  961|       |         * to decode a field i.e. ps_dec->u1_top_bottom_decoded is not 0 and
  962|       |         * is not (TOP_FIELD_ONLY | BOT_FIELD_ONLY), treat it as a dangling
  963|       |         * field */
  964|  35.8k|        if((u1_field_pic_flag == 0 || u2_frame_num != ps_dec->u2_prv_frame_num) &&
  ------------------
  |  Branch (964:13): [True: 35.8k, False: 0]
  |  Branch (964:39): [True: 0, False: 0]
  ------------------
  965|  35.8k|           ps_dec->u1_top_bottom_decoded != 0 &&
  ------------------
  |  Branch (965:12): [True: 0, False: 35.8k]
  ------------------
  966|      0|           ps_dec->u1_top_bottom_decoded != (TOP_FIELD_ONLY | BOT_FIELD_ONLY))
  ------------------
  |  |   65|      0|#define TOP_FIELD_ONLY      0x02
  ------------------
                         ps_dec->u1_top_bottom_decoded != (TOP_FIELD_ONLY | BOT_FIELD_ONLY))
  ------------------
  |  |   66|      0|#define BOT_FIELD_ONLY      0x01
  ------------------
  |  Branch (966:12): [True: 0, False: 0]
  ------------------
  967|      0|        {
  968|      0|            ps_dec->u1_dangling_field = 1;
  969|      0|            if(ps_dec->u4_first_slice_in_pic)
  ------------------
  |  Branch (969:16): [True: 0, False: 0]
  ------------------
  970|      0|            {
  971|       |                // first slice - dangling field
  972|      0|                prev_slice_err = 1;
  973|      0|            }
  974|      0|            else
  975|      0|            {
  976|       |                // last slice - dangling field
  977|      0|                prev_slice_err = 2;
  978|      0|            }
  979|       |
  980|      0|            if(ps_dec->u1_top_bottom_decoded == TOP_FIELD_ONLY)
  ------------------
  |  |   65|      0|#define TOP_FIELD_ONLY      0x02
  ------------------
  |  Branch (980:16): [True: 0, False: 0]
  ------------------
  981|      0|                ps_cur_slice->u1_bottom_field_flag = 1;
  982|      0|            else
  983|      0|                ps_cur_slice->u1_bottom_field_flag = 0;
  984|       |
  985|      0|            num_mb_skipped =
  986|      0|                (ps_dec->u2_frm_ht_in_mbs * ps_dec->u2_frm_wd_in_mbs) - ps_dec->u4_total_mbs_coded;
  987|      0|            ps_cur_poc = &ps_dec->s_cur_pic_poc;
  988|       |
  989|      0|            u1_is_idr_slice = ps_cur_slice->u1_nal_unit_type == IDR_SLICE_NAL;
  ------------------
  |  |  328|      0|#define IDR_SLICE_NAL                   5
  ------------------
  990|      0|        }
  991|  35.8k|        else if(ps_dec->u4_first_slice_in_pic)
  ------------------
  |  Branch (991:17): [True: 35.8k, False: 0]
  ------------------
  992|  35.8k|        {
  993|  35.8k|            if(u2_first_mb_in_slice > 0)
  ------------------
  |  Branch (993:16): [True: 0, False: 35.8k]
  ------------------
  994|      0|            {
  995|       |                // first slice - missing/header corruption
  996|      0|                prev_slice_err = 1;
  997|      0|                num_mb_skipped = u2_first_mb_in_slice << u1_mbaff;
  998|      0|                ps_cur_poc = &s_tmp_poc;
  999|       |
 1000|       |                // initializing slice parameters
 1001|      0|                ps_cur_slice->u4_idr_pic_id = u4_idr_pic_id;
 1002|      0|                ps_cur_slice->u1_field_pic_flag = u1_field_pic_flag;
 1003|      0|                ps_cur_slice->u1_bottom_field_flag = u1_bottom_field_flag;
 1004|      0|                ps_cur_slice->i4_pic_order_cnt_lsb = s_tmp_poc.i4_pic_order_cnt_lsb;
 1005|      0|                ps_cur_slice->u1_nal_unit_type = u1_nal_unit_type;
 1006|      0|                ps_cur_slice->u1_redundant_pic_cnt = u1_redundant_pic_cnt;
 1007|      0|                ps_cur_slice->u1_nal_ref_idc = u1_nal_ref_idc;
 1008|      0|                ps_cur_slice->u1_pic_order_cnt_type = u1_pic_order_cnt_type;
 1009|      0|                ps_cur_slice->u1_mbaff_frame_flag = ps_seq->u1_mb_aff_flag && (!u1_field_pic_flag);
  ------------------
  |  Branch (1009:53): [True: 0, False: 0]
  |  Branch (1009:79): [True: 0, False: 0]
  ------------------
 1010|      0|            }
 1011|  35.8k|        }
 1012|      0|        else
 1013|      0|        {
 1014|       |            /* since i1_is_end_of_poc is set ,means new frame num is encountered. so
 1015|       |             * conceal the current frame completely */
 1016|      0|            prev_slice_err = 2;
 1017|      0|            num_mb_skipped =
 1018|      0|                (ps_dec->u2_frm_ht_in_mbs * ps_dec->u2_frm_wd_in_mbs) - ps_dec->u4_total_mbs_coded;
 1019|      0|            ps_cur_poc = &s_tmp_poc;
 1020|      0|        }
 1021|  35.8k|    }
 1022|    679|    else
 1023|    679|    {
 1024|    679|        if((u2_first_mb_in_slice << u1_mbaff) > ps_dec->u4_total_mbs_coded)
  ------------------
  |  Branch (1024:12): [True: 109, False: 570]
  ------------------
 1025|    109|        {
 1026|       |            // previous slice - missing/corruption
 1027|    109|            prev_slice_err = 2;
 1028|    109|            num_mb_skipped = (u2_first_mb_in_slice << u1_mbaff) - ps_dec->u4_total_mbs_coded;
 1029|    109|            ps_cur_poc = &s_tmp_poc;
 1030|    109|        }
 1031|    570|        else if((u2_first_mb_in_slice << u1_mbaff) < ps_dec->u4_total_mbs_coded)
  ------------------
  |  Branch (1031:17): [True: 0, False: 570]
  ------------------
 1032|      0|        {
 1033|      0|            return ERROR_CORRUPTED_SLICE;
 1034|      0|        }
 1035|    679|    }
 1036|  36.5k|    if(prev_slice_err)
  ------------------
  |  Branch (1036:8): [True: 109, False: 36.4k]
  ------------------
 1037|    109|    {
 1038|    109|        ret = isvcd_mark_err_slice_skip((svc_dec_lyr_struct_t *) ps_dec, num_mb_skipped,
 1039|    109|                                        u1_is_idr_slice, u2_frame_num, ps_cur_poc, prev_slice_err);
 1040|       |
 1041|    109|        if(ps_dec->u1_dangling_field == 1)
  ------------------
  |  Branch (1041:12): [True: 0, False: 109]
  ------------------
 1042|      0|        {
 1043|      0|            ps_dec->u1_second_field = 1 - ps_dec->u1_second_field;
 1044|      0|            ps_dec->u1_first_slice_in_stream = 0;
 1045|      0|            ps_dec->u1_top_bottom_decoded = TOP_FIELD_ONLY | BOT_FIELD_ONLY;
  ------------------
  |  |   65|      0|#define TOP_FIELD_ONLY      0x02
  ------------------
                          ps_dec->u1_top_bottom_decoded = TOP_FIELD_ONLY | BOT_FIELD_ONLY;
  ------------------
  |  |   66|      0|#define BOT_FIELD_ONLY      0x01
  ------------------
 1046|      0|            return ERROR_DANGLING_FIELD_IN_PIC;
 1047|      0|        }
 1048|       |
 1049|    109|        if(prev_slice_err == 2)
  ------------------
  |  Branch (1049:12): [True: 109, False: 0]
  ------------------
 1050|    109|        {
 1051|    109|            ps_dec->u1_first_slice_in_stream = 0;
 1052|    109|            return ERROR_INCOMPLETE_FRAME;
 1053|    109|        }
 1054|       |
 1055|      0|        if(ps_dec->u4_total_mbs_coded >= ps_dec->u2_frm_ht_in_mbs * ps_dec->u2_frm_wd_in_mbs)
  ------------------
  |  Branch (1055:12): [True: 0, False: 0]
  ------------------
 1056|      0|        {
 1057|       |            /* return if all MBs in frame are parsed*/
 1058|      0|            ps_dec->u1_first_slice_in_stream = 0;
 1059|      0|            return ERROR_IN_LAST_SLICE_OF_PIC;
 1060|      0|        }
 1061|       |
 1062|      0|        if(ps_dec->ps_dec_err_status->u1_err_flag & REJECT_CUR_PIC)
  ------------------
  |  |  602|      0|#define REJECT_CUR_PIC    (0x01)
  ------------------
  |  Branch (1062:12): [True: 0, False: 0]
  ------------------
 1063|      0|        {
 1064|      0|            ih264d_err_pic_dispbuf_mgr(ps_dec);
 1065|      0|            return ERROR_NEW_FRAME_EXPECTED;
 1066|      0|        }
 1067|       |
 1068|      0|        if(ret != OK) return ret;
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
  |  Branch (1068:12): [True: 0, False: 0]
  ------------------
 1069|       |
 1070|      0|        i1_is_end_of_poc = 0;
 1071|      0|    }
 1072|       |
 1073|  36.4k|    if(u1_field_pic_flag)
  ------------------
  |  Branch (1073:8): [True: 0, False: 36.4k]
  ------------------
 1074|      0|    {
 1075|      0|        ps_dec->u2_prv_frame_num = u2_frame_num;
 1076|      0|    }
 1077|       |
 1078|  36.4k|    if(ps_cur_slice->u1_mmco_equalto5)
  ------------------
  |  Branch (1078:8): [True: 972, False: 35.4k]
  ------------------
 1079|    972|    {
 1080|    972|        WORD32 i4_temp_poc;
 1081|    972|        WORD32 i4_top_field_order_poc, i4_bot_field_order_poc;
 1082|    972|        WORD64 i8_result;
 1083|    972|        if(!ps_cur_slice->u1_field_pic_flag)  // or a complementary field pair
  ------------------
  |  Branch (1083:12): [True: 972, False: 0]
  ------------------
 1084|    972|        {
 1085|    972|            i4_top_field_order_poc = ps_dec->ps_cur_pic->i4_top_field_order_cnt;
 1086|    972|            i4_bot_field_order_poc = ps_dec->ps_cur_pic->i4_bottom_field_order_cnt;
 1087|    972|            i4_temp_poc = MIN(i4_top_field_order_poc, i4_bot_field_order_poc);
  ------------------
  |  |   61|    972|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 193, False: 779]
  |  |  ------------------
  ------------------
 1088|    972|        }
 1089|      0|        else if(!ps_cur_slice->u1_bottom_field_flag)
  ------------------
  |  Branch (1089:17): [True: 0, False: 0]
  ------------------
 1090|      0|            i4_temp_poc = ps_dec->ps_cur_pic->i4_top_field_order_cnt;
 1091|      0|        else
 1092|      0|            i4_temp_poc = ps_dec->ps_cur_pic->i4_bottom_field_order_cnt;
 1093|       |
 1094|    972|        i8_result = (WORD64) i4_temp_poc - ps_dec->ps_cur_pic->i4_top_field_order_cnt;
 1095|    972|        if(IS_OUT_OF_RANGE_S32(i8_result))
  ------------------
  |  |   58|    972|#define IS_OUT_OF_RANGE_S32(a) (((a) < INT32_MIN) || ((a) > INT32_MAX))
  |  |  ------------------
  |  |  |  Branch (58:33): [True: 0, False: 972]
  |  |  |  Branch (58:54): [True: 0, False: 972]
  |  |  ------------------
  ------------------
 1096|      0|        {
 1097|      0|            return ERROR_INV_POC;
 1098|      0|        }
 1099|    972|        ps_dec->ps_cur_pic->i4_top_field_order_cnt = (WORD32) i8_result;
 1100|    972|        i8_result = (WORD64) i4_temp_poc - ps_dec->ps_cur_pic->i4_bottom_field_order_cnt;
 1101|    972|        if(IS_OUT_OF_RANGE_S32(i8_result))
  ------------------
  |  |   58|    972|#define IS_OUT_OF_RANGE_S32(a) (((a) < INT32_MIN) || ((a) > INT32_MAX))
  |  |  ------------------
  |  |  |  Branch (58:33): [True: 0, False: 972]
  |  |  |  Branch (58:54): [True: 0, False: 972]
  |  |  ------------------
  ------------------
 1102|      0|        {
 1103|      0|            return ERROR_INV_POC;
 1104|      0|        }
 1105|    972|        ps_dec->ps_cur_pic->i4_bottom_field_order_cnt = (WORD32) i8_result;
 1106|    972|        ps_dec->ps_cur_pic->i4_poc = i4_temp_poc;
 1107|    972|        ps_dec->ps_cur_pic->i4_avg_poc = i4_temp_poc;
 1108|    972|    }
 1109|  36.4k|    if(ps_dec->u4_first_slice_in_pic)
  ------------------
  |  Branch (1109:8): [True: 35.8k, False: 570]
  ------------------
 1110|  35.8k|    {
 1111|  35.8k|        ret = isvcd_decode_pic_order_cnt(u1_is_idr_slice, u2_frame_num, &ps_dec->s_prev_pic_poc,
 1112|  35.8k|                                         &s_tmp_poc, ps_cur_slice, ps_pps, u1_nal_ref_idc,
 1113|  35.8k|                                         u1_bottom_field_flag, u1_field_pic_flag, &i4_poc, ps_dec);
 1114|  35.8k|        if(ret != OK) return ret;
  ------------------
  |  |  114|  35.8k|#define OK        0
  ------------------
  |  Branch (1114:12): [True: 78, False: 35.7k]
  ------------------
 1115|       |        /* Display seq no calculations */
 1116|  35.7k|        if(i4_poc >= ps_dec->i4_max_poc) ps_dec->i4_max_poc = i4_poc;
  ------------------
  |  Branch (1116:12): [True: 14.8k, False: 20.9k]
  ------------------
 1117|       |        /* IDR Picture or POC wrap around */
 1118|  35.7k|        if(i4_poc == 0)
  ------------------
  |  Branch (1118:12): [True: 4.28k, False: 31.5k]
  ------------------
 1119|  4.28k|        {
 1120|  4.28k|            WORD64 i8_temp;
 1121|  4.28k|            i8_temp = (WORD64) ps_dec->i4_prev_max_display_seq + ps_dec->i4_max_poc +
 1122|  4.28k|                      ps_dec->u1_max_dec_frame_buffering + 1;
 1123|       |            /*If i4_prev_max_display_seq overflows integer range, reset it */
 1124|  4.28k|            ps_dec->i4_prev_max_display_seq = IS_OUT_OF_RANGE_S32(i8_temp) ? 0 : i8_temp;
  ------------------
  |  |   58|  4.28k|#define IS_OUT_OF_RANGE_S32(a) (((a) < INT32_MIN) || ((a) > INT32_MAX))
  |  |  ------------------
  |  |  |  Branch (58:33): [True: 0, False: 4.28k]
  |  |  |  Branch (58:54): [True: 49, False: 4.23k]
  |  |  ------------------
  ------------------
 1125|  4.28k|            ps_dec->i4_max_poc = 0;
 1126|  4.28k|        }
 1127|  35.7k|    }
 1128|       |
 1129|       |    /* Increment only if the current slice has atleast 1 more MB */
 1130|  36.3k|    if(ps_dec->u4_first_slice_in_pic == 0 &&
  ------------------
  |  Branch (1130:8): [True: 570, False: 35.7k]
  ------------------
 1131|    570|       (ps_dec->ps_parse_cur_slice->u4_first_mb_in_slice <
  ------------------
  |  Branch (1131:8): [True: 570, False: 0]
  ------------------
 1132|    570|        (UWORD32) (ps_dec->u4_total_mbs_coded >> ps_dec->ps_cur_slice->u1_mbaff_frame_flag)))
 1133|    570|    {
 1134|    570|        ps_dec->ps_parse_cur_slice++;
 1135|    570|        ps_dec->u2_cur_slice_num++;
 1136|       |        // in the case of single core increment ps_decode_cur_slice
 1137|    570|        if(ps_dec->u1_separate_parse == 0)
  ------------------
  |  Branch (1137:12): [True: 343, False: 227]
  ------------------
 1138|    343|        {
 1139|    343|            ps_dec->ps_decode_cur_slice++;
 1140|    343|        }
 1141|    570|    }
 1142|       |
 1143|  36.3k|    ps_dec->u1_slice_header_done = 0;
 1144|       |
 1145|       |    /*--------------------------------------------------------------------*/
 1146|       |    /* Copy the values read from the bitstream to the slice header and then*/
 1147|       |    /* If the slice is first slice in picture, then do Start of Picture   */
 1148|       |    /* processing.                                                        */
 1149|       |    /*--------------------------------------------------------------------*/
 1150|  36.3k|    ps_cur_slice->i4_delta_pic_order_cnt[0] = i_delta_poc[0];
 1151|  36.3k|    ps_cur_slice->i4_delta_pic_order_cnt[1] = i_delta_poc[1];
 1152|  36.3k|    ps_cur_slice->u4_idr_pic_id = u4_idr_pic_id;
 1153|  36.3k|    ps_cur_slice->u2_first_mb_in_slice = u2_first_mb_in_slice;
 1154|  36.3k|    ps_cur_slice->u1_field_pic_flag = u1_field_pic_flag;
 1155|  36.3k|    ps_cur_slice->u1_bottom_field_flag = u1_bottom_field_flag;
 1156|  36.3k|    ps_cur_slice->u1_slice_type = u1_slice_type;
 1157|  36.3k|    ps_cur_slice->i4_pic_order_cnt_lsb = s_tmp_poc.i4_pic_order_cnt_lsb;
 1158|       |
 1159|  36.3k|    ps_cur_slice->u1_nal_unit_type = u1_nal_unit_type;
 1160|  36.3k|    ps_cur_slice->u1_redundant_pic_cnt = u1_redundant_pic_cnt;
 1161|  36.3k|    ps_cur_slice->u1_nal_ref_idc = u1_nal_ref_idc;
 1162|  36.3k|    ps_cur_slice->u1_pic_order_cnt_type = u1_pic_order_cnt_type;
 1163|       |
 1164|  36.3k|    if(ps_seq->u1_frame_mbs_only_flag)
  ------------------
  |  Branch (1164:8): [True: 36.3k, False: 0]
  ------------------
 1165|  36.3k|        ps_cur_slice->u1_direct_8x8_inference_flag = ps_seq->u1_direct_8x8_inference_flag;
 1166|      0|    else
 1167|      0|        ps_cur_slice->u1_direct_8x8_inference_flag = 1;
 1168|       |
 1169|  36.3k|    if(0 == ps_svc_lyr_dec->ps_nal_svc_ext->u1_quality_id)
  ------------------
  |  Branch (1169:8): [True: 36.3k, False: 0]
  ------------------
 1170|  36.3k|    {
 1171|  36.3k|        if(B_SLICE == u1_slice_type)
  ------------------
  |  |  369|  36.3k|#define B_SLICE  1
  ------------------
  |  Branch (1171:12): [True: 14.2k, False: 22.1k]
  ------------------
 1172|  14.2k|        {
 1173|  14.2k|            ps_cur_slice->u1_direct_spatial_mv_pred_flag = ih264d_get_bit_h264(ps_bitstrm);
 1174|  14.2k|            COPYTHECONTEXT("Slice Header SVC ext: direct_spatial_mv_pred_flag",
 1175|  14.2k|                           ps_cur_slice->u1_direct_spatial_mv_pred_flag);
 1176|       |
 1177|  14.2k|            if(ps_cur_slice->u1_direct_spatial_mv_pred_flag)
  ------------------
  |  Branch (1177:16): [True: 5.49k, False: 8.74k]
  ------------------
 1178|  5.49k|                ps_cur_slice->pf_decodeDirect = isvcd_decode_spatial_direct;
 1179|  8.74k|            else
 1180|  8.74k|                ps_cur_slice->pf_decodeDirect = ih264d_decode_temporal_direct;
 1181|  14.2k|            if(!((ps_seq->u1_mb_aff_flag) && (!u1_field_pic_flag)))
  ------------------
  |  Branch (1181:18): [True: 0, False: 14.2k]
  |  Branch (1181:46): [True: 0, False: 0]
  ------------------
 1182|  14.2k|                ps_dec->pf_mvpred = ih264d_mvpred_nonmbaffB;
 1183|  14.2k|        }
 1184|  22.1k|        else
 1185|  22.1k|        {
 1186|  22.1k|            if(!((ps_seq->u1_mb_aff_flag) && (!u1_field_pic_flag))) /*check if this is valid here */
  ------------------
  |  Branch (1186:18): [True: 0, False: 22.1k]
  |  Branch (1186:46): [True: 0, False: 0]
  ------------------
 1187|  22.1k|                ps_dec->pf_mvpred = ih264d_mvpred_nonmbaff;
 1188|  22.1k|        }
 1189|  36.3k|    }
 1190|       |
 1191|  36.3k|    if(ps_dec->u4_first_slice_in_pic)
  ------------------
  |  Branch (1191:8): [True: 35.7k, False: 570]
  ------------------
 1192|  35.7k|    {
 1193|  35.7k|        if(u2_first_mb_in_slice == 0)
  ------------------
  |  Branch (1193:12): [True: 35.7k, False: 0]
  ------------------
 1194|  35.7k|        {
 1195|  35.7k|            ret = isvcd_start_of_pic(ps_svc_lyr_dec, i4_poc, &s_tmp_poc, u2_frame_num, ps_pps);
 1196|  35.7k|            if(ret != OK) return ret;
  ------------------
  |  |  114|  35.7k|#define OK        0
  ------------------
  |  Branch (1196:16): [True: 1.27k, False: 34.5k]
  ------------------
 1197|       |            /*inter layer buffer intialization */
 1198|  34.5k|            ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb =
 1199|  34.5k|                ps_svc_lyr_dec->ps_inter_lyr_mb_prms_frm_start;
 1200|  34.5k|            ps_svc_lyr_dec->ps_il_pred_mv_bank_buf_cur_mb =
 1201|  34.5k|                ps_svc_lyr_dec->ps_il_pred_mv_bank_buf_base;
 1202|  34.5k|        }
 1203|       |
 1204|  34.5k|        ps_dec->u4_output_present = 0;
 1205|       |
 1206|  34.5k|        {
 1207|  34.5k|            ih264d_get_next_display_field(ps_dec, ps_dec->ps_out_buffer, &(ps_dec->s_disp_op));
 1208|       |            /* If error code is non-zero then there is no buffer available for
 1209|       |            display, hence avoid format conversion */
 1210|       |
 1211|  34.5k|            if(0 != ps_dec->s_disp_op.u4_error_code)
  ------------------
  |  Branch (1211:16): [True: 10.9k, False: 23.5k]
  ------------------
 1212|  10.9k|            {
 1213|  10.9k|                ps_dec->u4_output_present = 0;
 1214|  10.9k|                ps_dec->u4_fmt_conv_cur_row = ps_dec->s_disp_frame_info.u4_y_ht;
 1215|  10.9k|            }
 1216|  23.5k|            else
 1217|  23.5k|                ps_dec->u4_output_present = 1;
 1218|  34.5k|        }
 1219|  34.5k|        ret = isvcd_parse_interlayer_resamp_func_init(ps_svc_lyr_dec, u2_first_mb_in_slice);
 1220|  34.5k|        if(ret != OK)
  ------------------
  |  |  114|  34.5k|#define OK        0
  ------------------
  |  Branch (1220:12): [True: 508, False: 34.0k]
  ------------------
 1221|    508|        {
 1222|    508|            return ERROR_CORRUPTED_SLICE;
 1223|    508|        }
 1224|  34.0k|        if((ps_dec->u1_separate_parse == 1) &&
  ------------------
  |  Branch (1224:12): [True: 21.3k, False: 12.6k]
  ------------------
 1225|  21.3k|           (ps_svc_lyr_dec->u1_layer_identifier == TARGET_LAYER) && (ps_svc_lyr_dec->u1_res_init_done == 1))
  ------------------
  |  |  110|  21.3k|#define TARGET_LAYER 2
  ------------------
  |  Branch (1225:12): [True: 21.3k, False: 0]
  |  Branch (1225:69): [True: 21.3k, False: 0]
  ------------------
 1226|  21.3k|        {
 1227|  21.3k|            if(ps_dec->u4_dec_thread_created == 0)
  ------------------
  |  Branch (1227:16): [True: 21.3k, False: 0]
  ------------------
 1228|  21.3k|            {
 1229|  21.3k|                ithread_create(ps_dec->pv_dec_thread_handle, NULL,
 1230|  21.3k|                               (void *) isvcd_decode_picture_thread, (void *) ps_dec);
 1231|       |
 1232|  21.3k|                ps_dec->u4_dec_thread_created = 1;
 1233|  21.3k|            }
 1234|       |#ifdef KEEP_THREADS_ACTIVE
 1235|       |            ret = ithread_mutex_lock(ps_dec->apv_proc_start_mutex[0]);
 1236|       |            RETURN_IF((ret != IV_SUCCESS), ret);
 1237|       |
 1238|       |            ps_dec->ai4_process_start[0] = PROC_START;
 1239|       |            ret = ithread_cond_signal(ps_dec->apv_proc_start_condition[0]);
 1240|       |            RETURN_IF((ret != IV_SUCCESS), ret);
 1241|       |
 1242|       |            ret = ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[0]);
 1243|       |            RETURN_IF((ret != IV_SUCCESS), ret);
 1244|       |#endif
 1245|       |#ifdef KEEP_THREADS_ACTIVE
 1246|       |            if(ps_dec->u4_bs_deblk_thread_created)
 1247|       |            {
 1248|       |                ret = ithread_mutex_lock(ps_dec->apv_proc_start_mutex[1]);
 1249|       |                RETURN_IF((ret != IV_SUCCESS), ret);
 1250|       |
 1251|       |                ps_dec->ai4_process_start[1] = PROC_START;
 1252|       |                ret = ithread_cond_signal(ps_dec->apv_proc_start_condition[1]);
 1253|       |                RETURN_IF((ret != IV_SUCCESS), ret);
 1254|       |
 1255|       |                ret = ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[1]);
 1256|       |                RETURN_IF((ret != IV_SUCCESS), ret);
 1257|       |            }
 1258|       |#endif
 1259|  21.3k|        }
 1260|  34.0k|    }
 1261|       |
 1262|       |    /* INITIALIZATION of fn ptrs for MC and formMbPartInfo functions */
 1263|  34.5k|    {
 1264|  34.5k|        UWORD8 uc_nofield_nombaff;
 1265|       |
 1266|  34.5k|        uc_nofield_nombaff =
 1267|  34.5k|            ((ps_dec->ps_cur_slice->u1_field_pic_flag == 0) &&
  ------------------
  |  Branch (1267:14): [True: 34.5k, False: 0]
  ------------------
 1268|  34.5k|             (ps_dec->ps_cur_slice->u1_mbaff_frame_flag == 0) && (u1_slice_type != B_SLICE) &&
  ------------------
  |  |  369|  34.5k|#define B_SLICE  1
  ------------------
  |  Branch (1268:14): [True: 34.5k, False: 0]
  |  Branch (1268:66): [True: 20.8k, False: 13.6k]
  ------------------
 1269|  20.8k|             (ps_dec->ps_cur_pps->u1_wted_pred_flag == 0));
  ------------------
  |  Branch (1269:14): [True: 17.1k, False: 3.72k]
  ------------------
 1270|       |
 1271|       |        /* Initialise MC and formMbPartInfo fn ptrs one time based on profile_idc */
 1272|       |
 1273|  34.5k|        if(uc_nofield_nombaff)
  ------------------
  |  Branch (1273:12): [True: 17.1k, False: 17.4k]
  ------------------
 1274|  17.1k|        {
 1275|  17.1k|            ps_dec->p_form_mb_part_info = ih264d_form_mb_part_info_bp;
 1276|  17.1k|            ps_dec->p_motion_compensate = ih264d_motion_compensate_bp;
 1277|  17.1k|        }
 1278|  17.4k|        else
 1279|  17.4k|        {
 1280|  17.4k|            ps_dec->p_form_mb_part_info = ih264d_form_mb_part_info_mp;
 1281|  17.4k|            ps_dec->p_motion_compensate = ih264d_motion_compensate_mp;
 1282|  17.4k|        }
 1283|  34.5k|    }
 1284|       |
 1285|       |    /*
 1286|       |     * Decide whether to decode the current picture or not
 1287|       |     */
 1288|  34.5k|    {
 1289|  34.5k|        dec_err_status_t *ps_err = ps_dec->ps_dec_err_status;
 1290|  34.5k|        if(ps_err->u4_frm_sei_sync == u2_frame_num)
  ------------------
  |  Branch (1290:12): [True: 0, False: 34.5k]
  ------------------
 1291|      0|        {
 1292|      0|            ps_err->u1_err_flag = ACCEPT_ALL_PICS;
  ------------------
  |  |  601|      0|#define ACCEPT_ALL_PICS   (0x00)
  ------------------
 1293|      0|            ps_err->u4_frm_sei_sync = SYNC_FRM_DEFAULT;
  ------------------
  |  |  610|      0|#define SYNC_FRM_DEFAULT  (0xFFFFFFFF)
  ------------------
 1294|      0|        }
 1295|  34.5k|        ps_err->u4_cur_frm = u2_frame_num;
 1296|  34.5k|    }
 1297|       |
 1298|       |    /* Decision for decoding if the picture is to be skipped */
 1299|  34.5k|    {
 1300|  34.5k|        WORD32 i4_skip_b_pic, i4_skip_p_pic;
 1301|       |
 1302|  34.5k|        i4_skip_b_pic = (ps_dec->u4_skip_frm_mask & B_SLC_BIT) && (B_SLICE == u1_slice_type) &&
  ------------------
  |  |  378|  34.5k|#define B_SLC_BIT  (0x4)
  ------------------
                      i4_skip_b_pic = (ps_dec->u4_skip_frm_mask & B_SLC_BIT) && (B_SLICE == u1_slice_type) &&
  ------------------
  |  |  369|      0|#define B_SLICE  1
  ------------------
  |  Branch (1302:25): [True: 0, False: 34.5k]
  |  Branch (1302:67): [True: 0, False: 0]
  ------------------
 1303|      0|                        (0 == u1_nal_ref_idc);
  ------------------
  |  Branch (1303:25): [True: 0, False: 0]
  ------------------
 1304|       |
 1305|  34.5k|        i4_skip_p_pic = (ps_dec->u4_skip_frm_mask & P_SLC_BIT) && (P_SLICE == u1_slice_type) &&
  ------------------
  |  |  377|  34.5k|#define P_SLC_BIT  (0x2)
  ------------------
                      i4_skip_p_pic = (ps_dec->u4_skip_frm_mask & P_SLC_BIT) && (P_SLICE == u1_slice_type) &&
  ------------------
  |  |  368|      0|#define P_SLICE  0
  ------------------
  |  Branch (1305:25): [True: 0, False: 34.5k]
  |  Branch (1305:67): [True: 0, False: 0]
  ------------------
 1306|      0|                        (0 == u1_nal_ref_idc);
  ------------------
  |  Branch (1306:25): [True: 0, False: 0]
  ------------------
 1307|       |
 1308|       |        /**************************************************************/
 1309|       |        /* Skip the B picture if skip mask is set for B picture and   */
 1310|       |        /* Current B picture is a non reference B picture or there is */
 1311|       |        /* no user for reference B picture                            */
 1312|       |        /**************************************************************/
 1313|  34.5k|        if(i4_skip_b_pic)
  ------------------
  |  Branch (1313:12): [True: 0, False: 34.5k]
  ------------------
 1314|      0|        {
 1315|      0|            ps_dec->ps_cur_pic->u4_pack_slc_typ |= B_SLC_BIT;
  ------------------
  |  |  378|      0|#define B_SLC_BIT  (0x4)
  ------------------
 1316|       |            /* Don't decode the picture in SKIP-B mode if that picture is B */
 1317|       |            /* and also it is not to be used as a reference picture         */
 1318|      0|            ps_dec->u1_last_pic_not_decoded = 1;
 1319|       |
 1320|      0|            return OK;
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
 1321|      0|        }
 1322|       |        /**************************************************************/
 1323|       |        /* Skip the P picture if skip mask is set for P picture and   */
 1324|       |        /* Current P picture is a non reference P picture or there is */
 1325|       |        /* no user for reference P picture                            */
 1326|       |        /**************************************************************/
 1327|  34.5k|        if(i4_skip_p_pic)
  ------------------
  |  Branch (1327:12): [True: 0, False: 34.5k]
  ------------------
 1328|      0|        {
 1329|      0|            ps_dec->ps_cur_pic->u4_pack_slc_typ |= P_SLC_BIT;
  ------------------
  |  |  377|      0|#define P_SLC_BIT  (0x2)
  ------------------
 1330|       |            /* Don't decode the picture in SKIP-P mode if that picture is P */
 1331|       |            /* and also it is not to be used as a reference picture         */
 1332|      0|            ps_dec->u1_last_pic_not_decoded = 1;
 1333|       |
 1334|      0|            return OK;
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
 1335|      0|        }
 1336|  34.5k|    }
 1337|       |
 1338|  34.5k|    {
 1339|  34.5k|        UWORD16 u2_mb_x, u2_mb_y;
 1340|       |
 1341|  34.5k|        ps_dec->i4_submb_ofst =
 1342|  34.5k|            ((u2_first_mb_in_slice << ps_cur_slice->u1_mbaff_frame_flag) * SUB_BLK_SIZE) -
  ------------------
  |  |  562|  34.5k|#define SUB_BLK_SIZE                  ((SUB_BLK_WIDTH) * (SUB_BLK_WIDTH))
  |  |  ------------------
  |  |  |  |  560|  34.5k|#define SUB_BLK_WIDTH                 4
  |  |  ------------------
  |  |               #define SUB_BLK_SIZE                  ((SUB_BLK_WIDTH) * (SUB_BLK_WIDTH))
  |  |  ------------------
  |  |  |  |  560|  34.5k|#define SUB_BLK_WIDTH                 4
  |  |  ------------------
  ------------------
 1343|  34.5k|            SUB_BLK_SIZE;
  ------------------
  |  |  562|  34.5k|#define SUB_BLK_SIZE                  ((SUB_BLK_WIDTH) * (SUB_BLK_WIDTH))
  |  |  ------------------
  |  |  |  |  560|  34.5k|#define SUB_BLK_WIDTH                 4
  |  |  ------------------
  |  |               #define SUB_BLK_SIZE                  ((SUB_BLK_WIDTH) * (SUB_BLK_WIDTH))
  |  |  ------------------
  |  |  |  |  560|  34.5k|#define SUB_BLK_WIDTH                 4
  |  |  ------------------
  ------------------
 1344|  34.5k|        if(u2_first_mb_in_slice)
  ------------------
  |  Branch (1344:12): [True: 570, False: 34.0k]
  ------------------
 1345|    570|        {
 1346|    570|            UWORD8 u1_mb_aff;
 1347|    570|            UWORD8 u1_field_pic;
 1348|    570|            UWORD16 u2_frm_wd_in_mbs;
 1349|    570|            u2_frm_wd_in_mbs = ps_seq->u2_frm_wd_in_mbs;
 1350|    570|            u1_mb_aff = ps_cur_slice->u1_mbaff_frame_flag;
 1351|    570|            u1_field_pic = ps_cur_slice->u1_field_pic_flag;
 1352|       |
 1353|    570|            {
 1354|    570|                UWORD32 x_offset;
 1355|    570|                UWORD32 y_offset;
 1356|    570|                UWORD32 u4_frame_stride;
 1357|    570|                tfr_ctxt_t *ps_trns_addr;  // = &ps_dec->s_tran_addrecon_parse;
 1358|       |
 1359|    570|                if(ps_dec->u1_separate_parse)
  ------------------
  |  Branch (1359:20): [True: 227, False: 343]
  ------------------
 1360|    227|                {
 1361|    227|                    ps_trns_addr = &ps_dec->s_tran_addrecon_parse;
 1362|    227|                }
 1363|    343|                else
 1364|    343|                {
 1365|    343|                    ps_trns_addr = &ps_dec->s_tran_addrecon;
 1366|    343|                }
 1367|    570|                u2_mb_x = MOD(u2_first_mb_in_slice, u2_frm_wd_in_mbs);
  ------------------
  |  |   64|    570|#define MOD(x,y) ((x)%(y))
  ------------------
 1368|    570|                u2_mb_y = DIV(u2_first_mb_in_slice, u2_frm_wd_in_mbs);
  ------------------
  |  |   65|    570|#define DIV(x,y) ((x)/(y))
  ------------------
 1369|       |
 1370|    570|                u2_mb_y <<= u1_mb_aff;
 1371|       |
 1372|    570|                if((u2_mb_x > u2_frm_wd_in_mbs - 1) || (u2_mb_y > ps_dec->u2_frm_ht_in_mbs - 1))
  ------------------
  |  Branch (1372:20): [True: 0, False: 570]
  |  Branch (1372:56): [True: 0, False: 570]
  ------------------
 1373|      0|                {
 1374|      0|                    return ERROR_CORRUPTED_SLICE;
 1375|      0|                }
 1376|       |
 1377|    570|                u4_frame_stride = ps_dec->u2_frm_wd_y << u1_field_pic;
 1378|    570|                x_offset = u2_mb_x << 4;
 1379|    570|                y_offset = (u2_mb_y * u4_frame_stride) << 4;
 1380|       |
 1381|    570|                ps_trns_addr->pu1_dest_y = ps_dec->s_cur_pic.pu1_buf1 + x_offset + y_offset;
 1382|       |
 1383|    570|                u4_frame_stride = ps_dec->u2_frm_wd_uv << u1_field_pic;
 1384|    570|                x_offset >>= 1;
 1385|    570|                y_offset = (u2_mb_y * u4_frame_stride) << 3;
 1386|       |
 1387|    570|                x_offset *= YUV420SP_FACTOR;
  ------------------
  |  |  119|    570|#define YUV420SP_FACTOR 2
  ------------------
 1388|       |
 1389|    570|                ps_trns_addr->pu1_dest_u = ps_dec->s_cur_pic.pu1_buf2 + x_offset + y_offset;
 1390|    570|                ps_trns_addr->pu1_dest_v = ps_dec->s_cur_pic.pu1_buf3 + x_offset + y_offset;
 1391|       |
 1392|    570|                ps_trns_addr->pu1_mb_y = ps_trns_addr->pu1_dest_y;
 1393|    570|                ps_trns_addr->pu1_mb_u = ps_trns_addr->pu1_dest_u;
 1394|    570|                ps_trns_addr->pu1_mb_v = ps_trns_addr->pu1_dest_v;
 1395|       |
 1396|       |                // assign the deblock structure pointers to start of slice
 1397|    570|                if(ps_dec->u1_separate_parse == 1)
  ------------------
  |  Branch (1397:20): [True: 227, False: 343]
  ------------------
 1398|    227|                {
 1399|    227|                    ps_dec->ps_deblk_mbn =
 1400|    227|                        ps_dec->ps_deblk_pic + (u2_first_mb_in_slice << u1_mb_aff);
 1401|    227|                }
 1402|    343|                else
 1403|    343|                {
 1404|    343|                    ps_dec->ps_deblk_mbn =
 1405|    343|                        ps_dec->ps_deblk_pic + (u2_first_mb_in_slice << u1_mb_aff);
 1406|    343|                }
 1407|       |
 1408|    570|                ps_dec->u4_cur_mb_addr = (u2_first_mb_in_slice << u1_mb_aff);
 1409|       |
 1410|    570|                ps_dec->ps_mv_cur =
 1411|    570|                    ps_dec->s_cur_pic.ps_mv + ((u2_first_mb_in_slice << u1_mb_aff) << 4);
 1412|    570|            }
 1413|    570|        }
 1414|  34.0k|        else
 1415|  34.0k|        {
 1416|  34.0k|            tfr_ctxt_t *ps_trns_addr;
 1417|       |
 1418|  34.0k|            if(ps_dec->u1_separate_parse)
  ------------------
  |  Branch (1418:16): [True: 21.3k, False: 12.6k]
  ------------------
 1419|  21.3k|            {
 1420|  21.3k|                ps_trns_addr = &ps_dec->s_tran_addrecon_parse;
 1421|  21.3k|            }
 1422|  12.6k|            else
 1423|  12.6k|            {
 1424|  12.6k|                ps_trns_addr = &ps_dec->s_tran_addrecon;
 1425|  12.6k|            }
 1426|       |
 1427|  34.0k|            u2_mb_x = 0xffff;
 1428|  34.0k|            u2_mb_y = 0;
 1429|       |            // assign the deblock structure pointers to start of slice
 1430|  34.0k|            ps_dec->u4_cur_mb_addr = 0;
 1431|  34.0k|            ps_dec->ps_deblk_mbn = ps_dec->ps_deblk_pic;
 1432|  34.0k|            ps_dec->ps_mv_cur = ps_dec->s_cur_pic.ps_mv;
 1433|  34.0k|            ps_trns_addr->pu1_dest_y = ps_dec->s_cur_pic.pu1_buf1;
 1434|  34.0k|            ps_trns_addr->pu1_dest_u = ps_dec->s_cur_pic.pu1_buf2;
 1435|  34.0k|            ps_trns_addr->pu1_dest_v = ps_dec->s_cur_pic.pu1_buf3;
 1436|       |
 1437|  34.0k|            ps_trns_addr->pu1_mb_y = ps_trns_addr->pu1_dest_y;
 1438|  34.0k|            ps_trns_addr->pu1_mb_u = ps_trns_addr->pu1_dest_u;
 1439|  34.0k|            ps_trns_addr->pu1_mb_v = ps_trns_addr->pu1_dest_v;
 1440|  34.0k|        }
 1441|       |
 1442|  34.5k|        ps_dec->ps_part = ps_dec->ps_parse_part_params;
 1443|       |
 1444|  34.5k|        ps_dec->u2_mbx = (MOD(u2_first_mb_in_slice - 1, ps_seq->u2_frm_wd_in_mbs));
  ------------------
  |  |   64|  34.5k|#define MOD(x,y) ((x)%(y))
  ------------------
 1445|  34.5k|        ps_dec->u2_mby = (DIV(u2_first_mb_in_slice - 1, ps_seq->u2_frm_wd_in_mbs));
  ------------------
  |  |   65|  34.5k|#define DIV(x,y) ((x)/(y))
  ------------------
 1446|  34.5k|        ps_dec->u2_mby <<= ps_cur_slice->u1_mbaff_frame_flag;
 1447|  34.5k|        ps_dec->i2_prev_slice_mbx = ps_dec->u2_mbx;
 1448|  34.5k|        ps_dec->i2_prev_slice_mby = ps_dec->u2_mby;
 1449|  34.5k|    }
 1450|       |
 1451|       |    /* RBSP stop bit is used for CABAC decoding*/
 1452|      0|    ps_bitstrm->u4_max_ofst += ps_dec->ps_cur_pps->u1_entropy_coding_mode;
 1453|       |
 1454|  34.5k|    ps_dec->u1_B = (u1_slice_type == B_SLICE);
  ------------------
  |  |  369|  34.5k|#define B_SLICE  1
  ------------------
 1455|  34.5k|    ps_dec->u4_next_mb_skip = 0;
 1456|       |
 1457|  34.5k|    ps_dec->ps_parse_cur_slice->u4_first_mb_in_slice = ps_dec->ps_cur_slice->u2_first_mb_in_slice;
 1458|  34.5k|    ps_dec->ps_parse_cur_slice->slice_type = ps_dec->ps_cur_slice->u1_slice_type;
 1459|       |
 1460|  34.5k|    ps_dec->u4_start_recon_deblk = 1;
 1461|  34.5k|    {
 1462|  34.5k|        WORD32 num_entries;
 1463|  34.5k|        WORD32 size;
 1464|  34.5k|        UWORD8 *pu1_buf;
 1465|       |
 1466|  34.5k|        num_entries = MAX_FRAMES;
  ------------------
  |  |  600|  34.5k|#define MAX_FRAMES              16
  ------------------
 1467|  34.5k|        if((1 >= ps_dec->ps_cur_sps->u1_num_ref_frames) && (0 == ps_dec->i4_display_delay))
  ------------------
  |  Branch (1467:12): [True: 24.0k, False: 10.5k]
  |  Branch (1467:60): [True: 0, False: 24.0k]
  ------------------
 1468|      0|        {
 1469|      0|            num_entries = 1;
 1470|      0|        }
 1471|  34.5k|        num_entries = ((2 * num_entries) + 1);
 1472|  34.5k|        num_entries *= 2;
 1473|       |
 1474|  34.5k|        size = num_entries * sizeof(void *);
 1475|  34.5k|        size += PAD_MAP_IDX_POC * sizeof(void *);
  ------------------
  |  |  100|  34.5k|#define PAD_MAP_IDX_POC             (1)
  ------------------
 1476|       |
 1477|  34.5k|        pu1_buf = (UWORD8 *) ps_dec->pv_map_ref_idx_to_poc_buf;
 1478|  34.5k|        pu1_buf += size * ps_dec->u2_cur_slice_num;
 1479|  34.5k|        ps_dec->ps_parse_cur_slice->ppv_map_ref_idx_to_poc = (void *) pu1_buf;
 1480|  34.5k|    }
 1481|       |
 1482|  34.5k|    if(ps_dec->u1_separate_parse)
  ------------------
  |  Branch (1482:8): [True: 21.5k, False: 12.9k]
  ------------------
 1483|  21.5k|    {
 1484|  21.5k|        ps_dec->ps_parse_cur_slice->pv_tu_coeff_data_start = ps_dec->pv_parse_tu_coeff_data;
 1485|  21.5k|    }
 1486|  12.9k|    else
 1487|  12.9k|    {
 1488|  12.9k|        ps_dec->pv_proc_tu_coeff_data = ps_dec->pv_parse_tu_coeff_data;
 1489|  12.9k|    }
 1490|       |
 1491|  34.5k|    ret = ih264d_fix_error_in_dpb(ps_dec);
 1492|  34.5k|    if(ret < 0) return ERROR_DBP_MANAGER_T;
  ------------------
  |  Branch (1492:8): [True: 0, False: 34.5k]
  ------------------
 1493|       |
 1494|       |    /*Default initializing default values for some parameters*/
 1495|  34.5k|    ps_svc_slice_params->u1_slice_skip_flag = 0;
 1496|  34.5k|    ps_svc_slice_params->u1_adaptive_base_mode_flag = 0;
 1497|  34.5k|    ps_svc_slice_params->u1_default_base_mode_flag = 0;
 1498|  34.5k|    ps_svc_slice_params->u1_adaptive_motion_prediction_flag = 0;
 1499|  34.5k|    ps_svc_slice_params->u1_default_motion_prediction_flag = 0;
 1500|  34.5k|    ps_svc_slice_params->u1_adaptive_residual_prediction_flag = 0;
 1501|  34.5k|    ps_svc_slice_params->u1_default_residual_prediction_flag = 0;
 1502|       |
 1503|  34.5k|    if(u1_slice_type == I_SLICE)
  ------------------
  |  |  370|  34.5k|#define I_SLICE  2
  ------------------
  |  Branch (1503:8): [True: 5.79k, False: 28.7k]
  ------------------
 1504|  5.79k|    {
 1505|  5.79k|        ps_dec->ps_cur_pic->u4_pack_slc_typ |= I_SLC_BIT;
  ------------------
  |  |  376|  5.79k|#define I_SLC_BIT  (0x1)
  ------------------
 1506|       |
 1507|  5.79k|        ret = isvcd_parse_eislice(ps_svc_lyr_dec, u2_first_mb_in_slice);
 1508|  5.79k|        ps_dec->u1_pr_sl_type = u1_slice_type;
 1509|  5.79k|        if(ps_dec->i4_pic_type != B_SLICE && ps_dec->i4_pic_type != P_SLICE)
  ------------------
  |  |  369|  11.5k|#define B_SLICE  1
  ------------------
                      if(ps_dec->i4_pic_type != B_SLICE && ps_dec->i4_pic_type != P_SLICE)
  ------------------
  |  |  368|  5.62k|#define P_SLICE  0
  ------------------
  |  Branch (1509:12): [True: 5.62k, False: 168]
  |  Branch (1509:46): [True: 5.62k, False: 0]
  ------------------
 1510|  5.62k|            ps_dec->i4_pic_type = I_SLICE;
  ------------------
  |  |  370|  5.62k|#define I_SLICE  2
  ------------------
 1511|  5.79k|    }
 1512|  28.7k|    else if(u1_slice_type == P_SLICE)
  ------------------
  |  |  368|  28.7k|#define P_SLICE  0
  ------------------
  |  Branch (1512:13): [True: 15.0k, False: 13.7k]
  ------------------
 1513|  15.0k|    {
 1514|  15.0k|        ps_dec->ps_cur_pic->u4_pack_slc_typ |= P_SLC_BIT;
  ------------------
  |  |  377|  15.0k|#define P_SLC_BIT  (0x2)
  ------------------
 1515|  15.0k|        ret = isvcd_parse_epslice(ps_svc_lyr_dec, u2_first_mb_in_slice);
 1516|  15.0k|        ps_dec->u1_pr_sl_type = u1_slice_type;
 1517|  15.0k|        if(ps_dec->i4_pic_type != B_SLICE) ps_dec->i4_pic_type = P_SLICE;
  ------------------
  |  |  369|  15.0k|#define B_SLICE  1
  ------------------
                      if(ps_dec->i4_pic_type != B_SLICE) ps_dec->i4_pic_type = P_SLICE;
  ------------------
  |  |  368|  14.9k|#define P_SLICE  0
  ------------------
  |  Branch (1517:12): [True: 14.9k, False: 66]
  ------------------
 1518|  15.0k|    }
 1519|  13.7k|    else if(u1_slice_type == B_SLICE)
  ------------------
  |  |  369|  13.7k|#define B_SLICE  1
  ------------------
  |  Branch (1519:13): [True: 13.6k, False: 69]
  ------------------
 1520|  13.6k|    {
 1521|  13.6k|        ps_dec->ps_cur_pic->u4_pack_slc_typ |= B_SLC_BIT;
  ------------------
  |  |  378|  13.6k|#define B_SLC_BIT  (0x4)
  ------------------
 1522|  13.6k|        ret = isvcd_parse_ebslice(ps_svc_lyr_dec, u2_first_mb_in_slice);
 1523|  13.6k|        ps_dec->u1_pr_sl_type = u1_slice_type;
 1524|  13.6k|        ps_dec->i4_pic_type = B_SLICE;
  ------------------
  |  |  369|  13.6k|#define B_SLICE  1
  ------------------
 1525|  13.6k|    }
 1526|     69|    else
 1527|     69|        return ERROR_INV_SLC_TYPE_T;
 1528|       |
 1529|  34.5k|    if(ps_dec->u1_slice_header_done)
  ------------------
  |  Branch (1529:8): [True: 25.2k, False: 9.24k]
  ------------------
 1530|  25.2k|    {
 1531|       |        /* set to zero to indicate a valid slice has been decoded */
 1532|  25.2k|        ps_dec->u1_first_slice_in_stream = 0;
 1533|  25.2k|    }
 1534|       |
 1535|  34.5k|    if(ret != OK) return ret;
  ------------------
  |  |  114|  34.5k|#define OK        0
  ------------------
  |  Branch (1535:8): [True: 22.6k, False: 11.8k]
  ------------------
 1536|       |
 1537|  11.8k|    if(u1_nal_ref_idc != 0)
  ------------------
  |  Branch (1537:8): [True: 10.7k, False: 1.13k]
  ------------------
 1538|  10.7k|    {
 1539|  10.7k|        if(!ps_dec->ps_dpb_cmds->u1_dpb_commands_read)
  ------------------
  |  Branch (1539:12): [True: 10.4k, False: 227]
  ------------------
 1540|  10.4k|        {
 1541|  10.4k|            memcpy((void *) ps_dec->ps_dpb_cmds, (void *) (&(ps_dec->s_dpb_cmds_scratch)),
 1542|  10.4k|                   sizeof(dpb_commands_t));
 1543|  10.4k|        }
 1544|  10.7k|    }
 1545|       |
 1546|       |    /* storing last Mb X and MbY of the slice */
 1547|  11.8k|    ps_dec->i2_prev_slice_mbx = ps_dec->u2_mbx;
 1548|  11.8k|    ps_dec->i2_prev_slice_mby = ps_dec->u2_mby;
 1549|       |
 1550|       |    /* End of Picture detection */
 1551|  11.8k|    if(ps_dec->u4_total_mbs_coded >= (ps_seq->u4_max_mb_addr + 1))
  ------------------
  |  Branch (1551:8): [True: 7.29k, False: 4.54k]
  ------------------
 1552|  7.29k|    {
 1553|  7.29k|        ps_dec->u1_pic_decode_done = 1;
 1554|  7.29k|    }
 1555|       |
 1556|  11.8k|    {
 1557|  11.8k|        dec_err_status_t *ps_err = ps_dec->ps_dec_err_status;
 1558|  11.8k|        if((ps_err->u1_err_flag & REJECT_PB_PICS) && (ps_err->u1_cur_pic_type == PIC_TYPE_I))
  ------------------
  |  |  603|  11.8k|#define REJECT_PB_PICS    (0x02)
  ------------------
                      if((ps_err->u1_err_flag & REJECT_PB_PICS) && (ps_err->u1_cur_pic_type == PIC_TYPE_I))
  ------------------
  |  |  609|      0|#define PIC_TYPE_I        (0x00)
  ------------------
  |  Branch (1558:12): [True: 0, False: 11.8k]
  |  Branch (1558:54): [True: 0, False: 0]
  ------------------
 1559|      0|        {
 1560|      0|            ps_err->u1_err_flag = ACCEPT_ALL_PICS;
  ------------------
  |  |  601|      0|#define ACCEPT_ALL_PICS   (0x00)
  ------------------
 1561|      0|        }
 1562|  11.8k|    }
 1563|       |
 1564|  11.8k|    PRINT_BIN_BIT_RATIO(ps_dec)
 1565|       |
 1566|  11.8k|    return ret;
 1567|  34.5k|}
isvcd_set_default_slice_header_ext:
 1581|  26.9k|{
 1582|  26.9k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
 1583|  26.9k|    WORD32 i_status = OK;
  ------------------
  |  |  114|  26.9k|#define OK        0
  ------------------
 1584|  26.9k|    dec_pic_params_t *ps_pps = ps_dec->ps_cur_pps;
 1585|  26.9k|    dec_seq_params_t *ps_seq;
 1586|  26.9k|    dec_svc_seq_params_t *ps_subset_seq;
 1587|  26.9k|    dec_slice_svc_ext_params_t *ps_svc_slice_params = NULL;
 1588|  26.9k|    dec_subset_seq_params_t *ps_sps_svc_ext = NULL;
 1589|  26.9k|    ps_seq = ps_pps->ps_sps;
 1590|  26.9k|    ps_seq += MAX_NUM_SEQ_PARAMS;
  ------------------
  |  |  521|  26.9k|#define MAX_NUM_SEQ_PARAMS 32
  ------------------
 1591|  26.9k|    ps_subset_seq =
 1592|  26.9k|        &ps_svc_lyr_dec->ps_subset_sps[MAX_NUM_SEQ_PARAMS + ps_seq->u1_seq_parameter_set_id];
  ------------------
  |  |  521|  26.9k|#define MAX_NUM_SEQ_PARAMS 32
  ------------------
 1593|  26.9k|    ps_sps_svc_ext = &ps_subset_seq->s_sps_svc_ext;
 1594|  26.9k|    ps_svc_slice_params = &ps_svc_lyr_dec->s_svc_slice_params;
 1595|       |
 1596|  26.9k|    if(0 == ps_svc_lyr_dec->ps_nal_svc_ext->u1_quality_id)
  ------------------
  |  Branch (1596:8): [True: 26.9k, False: 0]
  ------------------
 1597|  26.9k|    {
 1598|  26.9k|        ps_svc_slice_params->u1_ref_layer_chroma_phase_y_plus1 =
 1599|  26.9k|            ps_sps_svc_ext->u1_seq_ref_layer_chroma_phase_y_plus1;
 1600|       |
 1601|  26.9k|        ps_svc_slice_params->u1_ref_layer_chroma_phase_x_plus1_flag =
 1602|  26.9k|            ps_sps_svc_ext->u1_seq_ref_layer_chroma_phase_x_plus1_flag;
 1603|  26.9k|    }
 1604|       |
 1605|  26.9k|    ps_svc_slice_params->u4_ref_layer_dq_id = UINT32_MAX;
 1606|  26.9k|    ps_svc_slice_params->u4_disable_inter_layer_deblk_filter_idc = 0;
 1607|  26.9k|    ps_svc_slice_params->u1_scan_idx_start = 0;
 1608|  26.9k|    ps_svc_slice_params->u1_scan_idx_end = 15;
 1609|  26.9k|    ps_svc_slice_params->i4_inter_layer_slice_alpha_c0_offset_div2 = 0;
 1610|  26.9k|    ps_svc_slice_params->i4_inter_layer_slice_beta_offset_div2 = 0;
 1611|  26.9k|    ps_svc_slice_params->u1_constrained_intra_resampling_flag = 0;
 1612|       |
 1613|  26.9k|    return i_status;
 1614|  26.9k|}
isvcd_parse_slice_header:
 1628|  26.9k|{
 1629|  26.9k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
 1630|  26.9k|    dec_pic_params_t *ps_pps = ps_dec->ps_cur_pps;
 1631|  26.9k|    dec_bit_stream_t *ps_bitstrm = ps_dec->ps_bitstrm;
 1632|  26.9k|    dec_seq_params_t *ps_seq;
 1633|  26.9k|    dec_svc_seq_params_t *ps_subset_seq;
 1634|  26.9k|    dec_slice_svc_ext_params_t *ps_svc_slice_params = NULL;
 1635|  26.9k|    dec_subset_seq_params_t *ps_sps_svc_ext = NULL;
 1636|  26.9k|    svc_dec_ctxt_t *ps_svcd_ctxt;
 1637|  26.9k|    UWORD32 *pu4_bitstrm_buf = ps_dec->ps_bitstrm->pu4_buffer;
 1638|  26.9k|    UWORD32 *pu4_bitstrm_ofst = &ps_dec->ps_bitstrm->u4_ofst;
 1639|  26.9k|    ps_svcd_ctxt = ps_svc_lyr_dec->ps_svcd_ctxt;
 1640|  26.9k|    ps_seq = ps_pps->ps_sps;
 1641|  26.9k|    ps_seq += MAX_NUM_SEQ_PARAMS;
  ------------------
  |  |  521|  26.9k|#define MAX_NUM_SEQ_PARAMS 32
  ------------------
 1642|  26.9k|    ps_subset_seq =
 1643|  26.9k|        &ps_svc_lyr_dec->ps_subset_sps[MAX_NUM_SEQ_PARAMS + ps_seq->u1_seq_parameter_set_id];
  ------------------
  |  |  521|  26.9k|#define MAX_NUM_SEQ_PARAMS 32
  ------------------
 1644|  26.9k|    ps_sps_svc_ext = &ps_subset_seq->s_sps_svc_ext;
 1645|  26.9k|    ps_svc_slice_params = &ps_svc_lyr_dec->s_svc_slice_params;
 1646|       |
 1647|  26.9k|    if(!ps_svc_lyr_dec->ps_nal_svc_ext->u1_no_inter_layer_pred_flag &&
  ------------------
  |  Branch (1647:8): [True: 26.9k, False: 0]
  ------------------
 1648|  26.9k|       (0 == ps_svc_lyr_dec->ps_nal_svc_ext->u1_quality_id))
  ------------------
  |  Branch (1648:8): [True: 26.9k, False: 0]
  ------------------
 1649|  26.9k|    {
 1650|  26.9k|        ps_svc_slice_params->u4_ref_layer_dq_id = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1651|  26.9k|        COPYTHECONTEXT("Slice Header SVC ext: u4_ref_layer_dq_id",
 1652|  26.9k|                       ps_svc_slice_params->u4_ref_layer_dq_id);
 1653|  26.9k|        if(ps_svc_slice_params->u4_ref_layer_dq_id > MAX_REF_DEP_ID)
  ------------------
  |  |  106|  26.9k|#define MAX_REF_DEP_ID ((MAX_DEPENDENCY_ID << 4) | MAX_QUALITY_ID)
  |  |  ------------------
  |  |  |  |  103|  26.9k|#define MAX_DEPENDENCY_ID 4
  |  |  ------------------
  |  |               #define MAX_REF_DEP_ID ((MAX_DEPENDENCY_ID << 4) | MAX_QUALITY_ID)
  |  |  ------------------
  |  |  |  |  102|  26.9k|#define MAX_QUALITY_ID 0
  |  |  ------------------
  ------------------
  |  Branch (1653:12): [True: 256, False: 26.7k]
  ------------------
 1654|    256|        {
 1655|    256|            return ERROR_INV_SLICE_HDR_T;
 1656|    256|        }
 1657|       |        /* Reference layer id update is taken care during resolution init */
 1658|       |        /*
 1659|       |        ps_svc_lyr_dec->u1_ref_layer_id = ps_svc_slice_params->u4_ref_layer_dq_id >> 4;
 1660|       |        if(ps_svc_lyr_dec->u1_ref_layer_id >= ps_svc_lyr_dec->u1_layer_id)
 1661|       |        {
 1662|       |            return ERROR_INV_SLICE_HDR_T;
 1663|       |        }
 1664|       |        */
 1665|  26.7k|        ps_svc_lyr_dec->ps_dec_svc_ref_layer =
 1666|  26.7k|            &ps_svcd_ctxt->ps_svc_dec_lyr[ps_svc_lyr_dec->u1_ref_layer_id];
 1667|       |
 1668|  26.7k|        if(ps_sps_svc_ext->u1_inter_layer_deblocking_filter_control_present_flag)
  ------------------
  |  Branch (1668:12): [True: 1.49k, False: 25.2k]
  ------------------
 1669|  1.49k|        {
 1670|  1.49k|            ps_svc_slice_params->u4_disable_inter_layer_deblk_filter_idc =
 1671|  1.49k|                ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1672|  1.49k|            COPYTHECONTEXT("Slice Header SVC ext: u4_disable_inter_layer_deblk_filter_idc",
 1673|  1.49k|                           ps_svc_slice_params->u4_disable_inter_layer_deblk_filter_idc);
 1674|       |
 1675|  1.49k|            if(ps_svc_slice_params->u4_disable_inter_layer_deblk_filter_idc > 6)
  ------------------
  |  Branch (1675:16): [True: 90, False: 1.40k]
  ------------------
 1676|     90|            {
 1677|     90|                return ERROR_INV_SLICE_HDR_T;
 1678|     90|            }
 1679|       |
 1680|  1.40k|            if(1 != ps_svc_slice_params->u4_disable_inter_layer_deblk_filter_idc)
  ------------------
  |  Branch (1680:16): [True: 1.30k, False: 102]
  ------------------
 1681|  1.30k|            {
 1682|  1.30k|                ps_svc_slice_params->i4_inter_layer_slice_alpha_c0_offset_div2 =
 1683|  1.30k|                    ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1684|  1.30k|                COPYTHECONTEXT("Slice Header SVC ext: i4_inter_layer_slice_alpha_c0_offset_div2",
 1685|  1.30k|                               ps_svc_slice_params->i4_inter_layer_slice_alpha_c0_offset_div2);
 1686|       |
 1687|  1.30k|                if(ps_svc_slice_params->i4_inter_layer_slice_alpha_c0_offset_div2 > 6 ||
  ------------------
  |  Branch (1687:20): [True: 74, False: 1.23k]
  ------------------
 1688|  1.23k|                   ps_svc_slice_params->i4_inter_layer_slice_alpha_c0_offset_div2 < -6)
  ------------------
  |  Branch (1688:20): [True: 92, False: 1.14k]
  ------------------
 1689|    166|                {
 1690|    166|                    return ERROR_INV_SLICE_HDR_T;
 1691|    166|                }
 1692|       |
 1693|  1.14k|                ps_svc_slice_params->i4_inter_layer_slice_beta_offset_div2 =
 1694|  1.14k|                    ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1695|  1.14k|                COPYTHECONTEXT("Slice Header SVC ext: i4_inter_layer_slice_beta_offset_div2",
 1696|  1.14k|                               ps_svc_slice_params->i4_inter_layer_slice_beta_offset_div2);
 1697|       |
 1698|  1.14k|                if(ps_svc_slice_params->i4_inter_layer_slice_beta_offset_div2 > 6 ||
  ------------------
  |  Branch (1698:20): [True: 121, False: 1.02k]
  ------------------
 1699|  1.02k|                   ps_svc_slice_params->i4_inter_layer_slice_beta_offset_div2 < -6)
  ------------------
  |  Branch (1699:20): [True: 128, False: 892]
  ------------------
 1700|    249|                {
 1701|    249|                    return ERROR_INV_SLICE_HDR_T;
 1702|    249|                }
 1703|  1.14k|            }
 1704|  1.40k|        }
 1705|       |
 1706|  26.2k|        ps_svc_slice_params->u1_constrained_intra_resampling_flag = ih264d_get_bit_h264(ps_bitstrm);
 1707|  26.2k|        COPYTHECONTEXT("Slice Header SVC ext: u1_constrained_intra_resampling_flag",
 1708|  26.2k|                       ps_svc_slice_params->u1_constrained_intra_resampling_flag);
 1709|       |
 1710|  26.2k|        ps_svc_lyr_dec->s_res_prms.i1_constrained_intra_rsmpl_flag =
 1711|  26.2k|            ps_svc_lyr_dec->s_svc_slice_params.u1_constrained_intra_resampling_flag;
 1712|  26.2k|        isvcd_intra_resamp_res_init_update_flags(ps_svc_lyr_dec);
 1713|       |
 1714|  26.2k|        if(2 == ps_sps_svc_ext->u1_extended_spatial_scalability_idc)
  ------------------
  |  Branch (1714:12): [True: 0, False: 26.2k]
  ------------------
 1715|      0|        {
 1716|       |            /* ChromaArrayType = i4_chroma_format_idc  if  separate_colour_plane_flag
 1717|       |             * = 0 for all chroma format except 4:4:4 */
 1718|      0|            if(ps_dec->ps_cur_sps->i4_chroma_format_idc >= 0)
  ------------------
  |  Branch (1718:16): [True: 0, False: 0]
  ------------------
 1719|      0|            {
 1720|      0|                ps_svc_slice_params->u1_ref_layer_chroma_phase_x_plus1_flag =
 1721|      0|                    ih264d_get_bit_h264(ps_bitstrm);
 1722|      0|                COPYTHECONTEXT("Slice Header SVC ext: u1_ref_layer_chroma_phase_x_plus1_flag",
 1723|      0|                               ps_svc_slice_params->u1_ref_layer_chroma_phase_x_plus1_flag);
 1724|       |
 1725|      0|                ps_svc_slice_params->u1_ref_layer_chroma_phase_y_plus1 =
 1726|      0|                    ih264d_get_bits_h264(ps_bitstrm, 2);
 1727|      0|                COPYTHECONTEXT("Slice Header SVC ext: u1_ref_layer_chroma_phase_y_plus1",
 1728|      0|                               ps_svc_slice_params->u1_ref_layer_chroma_phase_y_plus1);
 1729|       |
 1730|      0|                if(ps_svc_slice_params->u1_ref_layer_chroma_phase_y_plus1 > 2)
  ------------------
  |  Branch (1730:20): [True: 0, False: 0]
  ------------------
 1731|      0|                {
 1732|      0|                    return ERROR_INV_SLICE_HDR_T;
 1733|      0|                }
 1734|      0|            }
 1735|      0|            else
 1736|      0|            {
 1737|      0|                if(0 == ps_svc_lyr_dec->ps_nal_svc_ext->u1_quality_id)
  ------------------
  |  Branch (1737:20): [True: 0, False: 0]
  ------------------
 1738|      0|                {
 1739|      0|                    ps_svc_slice_params->u1_ref_layer_chroma_phase_y_plus1 =
 1740|      0|                        ps_sps_svc_ext->u1_seq_ref_layer_chroma_phase_y_plus1;
 1741|      0|                }
 1742|      0|            }
 1743|       |
 1744|      0|            ps_svc_slice_params->i4_scaled_ref_layer_left_offset =
 1745|      0|                ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1746|      0|            COPYTHECONTEXT("Slice Header SVC ext: i4_scaled_ref_layer_left_offset",
 1747|      0|                           ps_svc_slice_params->i4_scaled_ref_layer_left_offset);
 1748|       |
 1749|      0|            if(ps_svc_slice_params->i4_scaled_ref_layer_left_offset != 0)
  ------------------
  |  Branch (1749:16): [True: 0, False: 0]
  ------------------
 1750|      0|            {
 1751|      0|                return ERROR_INV_SLICE_HDR_T;
 1752|      0|            }
 1753|       |
 1754|      0|            if(ps_svc_slice_params->i4_scaled_ref_layer_left_offset >= MAX_SCLD_REF_LAYER_OFFSET ||
  ------------------
  |  |   71|      0|#define MAX_SCLD_REF_LAYER_OFFSET 32768
  ------------------
  |  Branch (1754:16): [True: 0, False: 0]
  ------------------
 1755|      0|               ps_svc_slice_params->i4_scaled_ref_layer_left_offset < MIN_SCLD_REF_LAYER_OFFSET)
  ------------------
  |  |   72|      0|#define MIN_SCLD_REF_LAYER_OFFSET -32768
  ------------------
  |  Branch (1755:16): [True: 0, False: 0]
  ------------------
 1756|      0|            {
 1757|      0|                return ERROR_INV_SLICE_HDR_T;
 1758|      0|            }
 1759|       |
 1760|      0|            ps_svc_slice_params->i4_scaled_ref_layer_top_offset =
 1761|      0|                ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1762|      0|            COPYTHECONTEXT("Slice Header SVC ext: i4_scaled_ref_layer_top_offset",
 1763|      0|                           ps_svc_slice_params->i4_scaled_ref_layer_top_offset);
 1764|       |
 1765|      0|            if(ps_svc_slice_params->i4_scaled_ref_layer_top_offset != 0)
  ------------------
  |  Branch (1765:16): [True: 0, False: 0]
  ------------------
 1766|      0|            {
 1767|      0|                return ERROR_INV_SLICE_HDR_T;
 1768|      0|            }
 1769|       |
 1770|      0|            if(ps_svc_slice_params->i4_scaled_ref_layer_top_offset >= MAX_SCLD_REF_LAYER_OFFSET ||
  ------------------
  |  |   71|      0|#define MAX_SCLD_REF_LAYER_OFFSET 32768
  ------------------
  |  Branch (1770:16): [True: 0, False: 0]
  ------------------
 1771|      0|               ps_svc_slice_params->i4_scaled_ref_layer_top_offset < MIN_SCLD_REF_LAYER_OFFSET)
  ------------------
  |  |   72|      0|#define MIN_SCLD_REF_LAYER_OFFSET -32768
  ------------------
  |  Branch (1771:16): [True: 0, False: 0]
  ------------------
 1772|      0|            {
 1773|      0|                return ERROR_INV_SLICE_HDR_T;
 1774|      0|            }
 1775|       |
 1776|      0|            ps_svc_slice_params->i4_scaled_ref_layer_right_offset =
 1777|      0|                ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1778|      0|            COPYTHECONTEXT("Slice Header SVC ext: i4_scaled_ref_layer_right_offset",
 1779|      0|                           ps_svc_slice_params->i4_scaled_ref_layer_right_offset);
 1780|       |
 1781|      0|            if(ps_svc_slice_params->i4_scaled_ref_layer_right_offset >= MAX_SCLD_REF_LAYER_OFFSET ||
  ------------------
  |  |   71|      0|#define MAX_SCLD_REF_LAYER_OFFSET 32768
  ------------------
  |  Branch (1781:16): [True: 0, False: 0]
  ------------------
 1782|      0|               ps_svc_slice_params->i4_scaled_ref_layer_right_offset < MIN_SCLD_REF_LAYER_OFFSET)
  ------------------
  |  |   72|      0|#define MIN_SCLD_REF_LAYER_OFFSET -32768
  ------------------
  |  Branch (1782:16): [True: 0, False: 0]
  ------------------
 1783|      0|            {
 1784|      0|                return ERROR_INV_SLICE_HDR_T;
 1785|      0|            }
 1786|       |
 1787|      0|            ps_svc_slice_params->i4_scaled_ref_layer_bottom_offset =
 1788|      0|                ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1789|      0|            COPYTHECONTEXT("Slice Header SVC ext: i4_scaled_ref_layer_bottom_offset",
 1790|      0|                           ps_svc_slice_params->i4_scaled_ref_layer_bottom_offset);
 1791|       |
 1792|      0|            if(ps_svc_slice_params->i4_scaled_ref_layer_bottom_offset >=
  ------------------
  |  Branch (1792:16): [True: 0, False: 0]
  ------------------
 1793|      0|                   MAX_SCLD_REF_LAYER_OFFSET ||
  ------------------
  |  |   71|      0|#define MAX_SCLD_REF_LAYER_OFFSET 32768
  ------------------
 1794|      0|               ps_svc_slice_params->i4_scaled_ref_layer_bottom_offset < MIN_SCLD_REF_LAYER_OFFSET)
  ------------------
  |  |   72|      0|#define MIN_SCLD_REF_LAYER_OFFSET -32768
  ------------------
  |  Branch (1794:16): [True: 0, False: 0]
  ------------------
 1795|      0|            {
 1796|      0|                return ERROR_INV_SLICE_HDR_T;
 1797|      0|            }
 1798|      0|        }
 1799|  26.2k|        else
 1800|  26.2k|        {
 1801|  26.2k|            ps_svc_slice_params->i4_scaled_ref_layer_left_offset =
 1802|  26.2k|                ps_sps_svc_ext->i4_seq_scaled_ref_layer_left_offset;
 1803|  26.2k|            ps_svc_slice_params->i4_scaled_ref_layer_top_offset =
 1804|  26.2k|                ps_sps_svc_ext->i4_seq_scaled_ref_layer_top_offset;
 1805|  26.2k|            ps_svc_slice_params->i4_scaled_ref_layer_right_offset =
 1806|  26.2k|                ps_sps_svc_ext->i4_seq_scaled_ref_layer_right_offset;
 1807|  26.2k|            ps_svc_slice_params->i4_scaled_ref_layer_bottom_offset =
 1808|  26.2k|                ps_sps_svc_ext->i4_seq_scaled_ref_layer_bottom_offset;
 1809|  26.2k|        }
 1810|  26.2k|    }
 1811|       |
 1812|  26.2k|    if(!ps_svc_lyr_dec->ps_nal_svc_ext->u1_no_inter_layer_pred_flag)
  ------------------
  |  Branch (1812:8): [True: 26.2k, False: 0]
  ------------------
 1813|  26.2k|    {
 1814|  26.2k|        ps_svc_slice_params->u1_slice_skip_flag = ih264d_get_bit_h264(ps_bitstrm);
 1815|  26.2k|        COPYTHECONTEXT("Slice Header SVC ext: u1_slice_skip_flag",
 1816|  26.2k|                       ps_svc_slice_params->u1_slice_skip_flag);
 1817|       |
 1818|  26.2k|        if(ps_svc_slice_params->u1_slice_skip_flag)
  ------------------
  |  Branch (1818:12): [True: 1.23k, False: 24.9k]
  ------------------
 1819|  1.23k|        {
 1820|  1.23k|            ps_svc_slice_params->u4_num_mbs_in_slice_minus1 =
 1821|  1.23k|                ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1822|  1.23k|            COPYTHECONTEXT("Slice Header SVC ext: u4_num_mbs_in_slice_minus1",
 1823|  1.23k|                           ps_svc_slice_params->u4_num_mbs_in_slice_minus1);
 1824|  1.23k|        }
 1825|  24.9k|        else
 1826|  24.9k|        {
 1827|  24.9k|            ps_svc_slice_params->u1_adaptive_base_mode_flag = ih264d_get_bit_h264(ps_bitstrm);
 1828|  24.9k|            COPYTHECONTEXT("Slice Header SVC ext: u1_adaptive_base_mode_flag",
 1829|  24.9k|                           ps_svc_slice_params->u1_adaptive_base_mode_flag);
 1830|       |
 1831|  24.9k|            if(!ps_svc_slice_params->u1_adaptive_base_mode_flag)
  ------------------
  |  Branch (1831:16): [True: 13.8k, False: 11.1k]
  ------------------
 1832|  13.8k|            {
 1833|  13.8k|                ps_svc_slice_params->u1_default_base_mode_flag = ih264d_get_bit_h264(ps_bitstrm);
 1834|  13.8k|                COPYTHECONTEXT("Slice Header SVC ext: u1_default_base_mode_flag",
 1835|  13.8k|                               ps_svc_slice_params->u1_default_base_mode_flag);
 1836|  13.8k|            }
 1837|  24.9k|            if(!ps_svc_slice_params->u1_default_base_mode_flag)
  ------------------
  |  Branch (1837:16): [True: 19.1k, False: 5.89k]
  ------------------
 1838|  19.1k|            {
 1839|  19.1k|                ps_svc_slice_params->u1_adaptive_motion_prediction_flag =
 1840|  19.1k|                    ih264d_get_bit_h264(ps_bitstrm);
 1841|  19.1k|                COPYTHECONTEXT("Slice Header SVC ext: u1_adaptive_motion_prediction_flag",
 1842|  19.1k|                               ps_svc_slice_params->u1_adaptive_motion_prediction_flag);
 1843|       |
 1844|  19.1k|                if(!ps_svc_slice_params->u1_adaptive_motion_prediction_flag)
  ------------------
  |  Branch (1844:20): [True: 12.6k, False: 6.40k]
  ------------------
 1845|  12.6k|                {
 1846|  12.6k|                    ps_svc_slice_params->u1_default_motion_prediction_flag =
 1847|  12.6k|                        ih264d_get_bit_h264(ps_bitstrm);
 1848|  12.6k|                    COPYTHECONTEXT("Slice Header SVC ext: u1_default_motion_prediction_flag",
 1849|  12.6k|                                   ps_svc_slice_params->u1_default_motion_prediction_flag);
 1850|  12.6k|                }
 1851|  19.1k|            }
 1852|  24.9k|            ps_svc_slice_params->u1_adaptive_residual_prediction_flag =
 1853|  24.9k|                ih264d_get_bit_h264(ps_bitstrm);
 1854|  24.9k|            COPYTHECONTEXT("Slice Header SVC ext: u1_adaptive_residual_prediction_flag",
 1855|  24.9k|                           ps_svc_slice_params->u1_adaptive_residual_prediction_flag);
 1856|       |
 1857|  24.9k|            if(!ps_svc_slice_params->u1_adaptive_residual_prediction_flag)
  ------------------
  |  Branch (1857:16): [True: 17.2k, False: 7.76k]
  ------------------
 1858|  17.2k|            {
 1859|  17.2k|                ps_svc_slice_params->u1_default_residual_prediction_flag =
 1860|  17.2k|                    ih264d_get_bit_h264(ps_bitstrm);
 1861|  17.2k|                COPYTHECONTEXT("Slice Header SVC ext: u1_default_residual_prediction_flag",
 1862|  17.2k|                               ps_svc_slice_params->u1_default_residual_prediction_flag);
 1863|  17.2k|            }
 1864|  24.9k|        }
 1865|       |
 1866|  26.2k|        if(ps_sps_svc_ext->u1_adaptive_tcoeff_level_prediction_flag)
  ------------------
  |  Branch (1866:12): [True: 1.33k, False: 24.8k]
  ------------------
 1867|  1.33k|        {
 1868|  1.33k|            ps_svc_slice_params->u1_tcoeff_level_prediction_flag = ih264d_get_bit_h264(ps_bitstrm);
 1869|  1.33k|            COPYTHECONTEXT("Slice Header SVC ext: u1_tcoeff_level_prediction_flag",
 1870|  1.33k|                           ps_svc_slice_params->u1_tcoeff_level_prediction_flag);
 1871|       |
 1872|  1.33k|            if(ps_svc_slice_params->u1_tcoeff_level_prediction_flag != 0)
  ------------------
  |  Branch (1872:16): [True: 147, False: 1.18k]
  ------------------
 1873|    147|            {
 1874|    147|                return ERROR_INV_SPS_PPS_T;
 1875|    147|            }
 1876|  1.33k|        }
 1877|  26.2k|    }
 1878|       |
 1879|  26.0k|    if(!ps_sps_svc_ext->u1_slice_header_restriction_flag &&
  ------------------
  |  Branch (1879:8): [True: 21.9k, False: 4.13k]
  ------------------
 1880|  21.9k|       !ps_svc_slice_params->u1_slice_skip_flag)
  ------------------
  |  Branch (1880:8): [True: 20.9k, False: 951]
  ------------------
 1881|  20.9k|    {
 1882|  20.9k|        ps_svc_slice_params->u1_scan_idx_start = ih264d_get_bits_h264(ps_bitstrm, 4);
 1883|  20.9k|        COPYTHECONTEXT("Slice Header SVC ext: u1_scan_idx_start",
 1884|  20.9k|                       ps_svc_slice_params->u1_scan_idx_start);
 1885|  20.9k|        ps_svc_slice_params->u1_scan_idx_end = ih264d_get_bits_h264(ps_bitstrm, 4);
 1886|  20.9k|        COPYTHECONTEXT("Slice Header SVC ext: u1_scan_idx_end",
 1887|  20.9k|                       ps_svc_slice_params->u1_scan_idx_end);
 1888|       |
 1889|  20.9k|        if(0 != ps_svc_slice_params->u1_scan_idx_start &&
  ------------------
  |  Branch (1889:12): [True: 12.3k, False: 8.68k]
  ------------------
 1890|  12.3k|           15 != ps_svc_slice_params->u1_scan_idx_end)
  ------------------
  |  Branch (1890:12): [True: 816, False: 11.4k]
  ------------------
 1891|    816|            return ERROR_SVC_INV_SCAN_IDX;
 1892|  20.9k|    }
 1893|  25.2k|    return OK;
  ------------------
  |  |  114|  25.2k|#define OK        0
  ------------------
 1894|  26.0k|}
isvcd_parse_decode_slice:
 1911|   130k|{
 1912|   130k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
 1913|   130k|    dec_bit_stream_t *ps_bitstrm = ps_dec->ps_bitstrm;
 1914|   130k|    dec_pic_params_t *ps_pps;
 1915|   130k|    dec_seq_params_t *ps_seq;
 1916|   130k|    dec_svc_seq_params_t *ps_subset_seq;
 1917|   130k|    dec_slice_params_t *ps_cur_slice = ps_dec->ps_cur_slice;
 1918|   130k|    pocstruct_t s_tmp_poc = {0};
 1919|   130k|    WORD32 i_delta_poc[2] = {0};
 1920|   130k|    WORD32 i4_poc = 0;
 1921|   130k|    UWORD16 u2_first_mb_in_slice, u2_frame_num;
 1922|   130k|    UWORD8 u1_field_pic_flag, u1_redundant_pic_cnt = 0, u1_slice_type;
 1923|   130k|    UWORD32 u4_idr_pic_id = 0;
 1924|   130k|    UWORD8 u1_bottom_field_flag, u1_pic_order_cnt_type;
 1925|   130k|    UWORD8 u1_nal_unit_type;
 1926|   130k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
 1927|   130k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
 1928|   130k|    WORD8 i1_is_end_of_poc;
 1929|       |
 1930|   130k|    WORD32 ret;
 1931|   130k|    WORD32 prev_slice_err, num_mb_skipped;
 1932|   130k|    UWORD32 u4_mbaff;
 1933|   130k|    pocstruct_t *ps_cur_poc;
 1934|       |
 1935|   130k|    UWORD32 u4_temp;
 1936|   130k|    WORD32 i_temp;
 1937|   130k|    svc_dec_ctxt_t *psvcd_dec_ctxt;
 1938|   130k|    dec_struct_t *ps_dec_cur_lyr_minus_1;
 1939|   130k|    svc_dec_lyr_struct_t *ps_svc_cur_lyr_dec_minus_1;
 1940|       |
 1941|       |    /* read FirstMbInSlice  and slice type*/
 1942|   130k|    ps_dec->ps_dpb_cmds->u1_dpb_commands_read_slc = 0;
 1943|   130k|    u2_first_mb_in_slice = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1944|   130k|    if(u2_first_mb_in_slice > (ps_dec->u2_frm_ht_in_mbs * ps_dec->u2_frm_wd_in_mbs))
  ------------------
  |  Branch (1944:8): [True: 13.8k, False: 117k]
  ------------------
 1945|  13.8k|    {
 1946|  13.8k|        return ERROR_CORRUPTED_SLICE;
 1947|  13.8k|    }
 1948|       |
 1949|       |    /*we currently don not support ASO*/
 1950|   117k|    if(((u2_first_mb_in_slice << ps_cur_slice->u1_mbaff_frame_flag) <= ps_dec->u4_cur_mb_addr) &&
  ------------------
  |  Branch (1950:8): [True: 109k, False: 7.84k]
  ------------------
 1951|   109k|       (ps_dec->u4_first_slice_in_pic == 0))
  ------------------
  |  Branch (1951:8): [True: 832, False: 108k]
  ------------------
 1952|    832|    {
 1953|    832|        return ERROR_CORRUPTED_SLICE;
 1954|    832|    }
 1955|       |
 1956|   116k|    if(ps_dec->u4_first_slice_in_pic == 1)
  ------------------
  |  Branch (1956:8): [True: 112k, False: 3.42k]
  ------------------
 1957|   112k|    {
 1958|   112k|        if(u2_first_mb_in_slice != 0)
  ------------------
  |  Branch (1958:12): [True: 4.42k, False: 108k]
  ------------------
 1959|  4.42k|        {
 1960|  4.42k|            return ERROR_CORRUPTED_SLICE;
 1961|  4.42k|        }
 1962|   112k|    }
 1963|       |
 1964|   111k|    COPYTHECONTEXT("SH: first_mb_in_slice", u2_first_mb_in_slice);
 1965|       |
 1966|   111k|    u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1967|   111k|    if(u4_temp > 9) return ERROR_INV_SLC_TYPE_T;
  ------------------
  |  Branch (1967:8): [True: 739, False: 111k]
  ------------------
 1968|       |
 1969|   111k|    u1_slice_type = u4_temp;
 1970|   111k|    COPYTHECONTEXT("SH: slice_type", (u1_slice_type));
 1971|       |    /* Find Out the Slice Type is 5 to 9 or not then Set the Flag   */
 1972|       |    /* u1_sl_typ_5_9 = 1 .Which tells that all the slices in the Pic*/
 1973|       |    /* will be of same type of current                            */
 1974|   111k|    if(u1_slice_type > 4)
  ------------------
  |  Branch (1974:8): [True: 4.37k, False: 106k]
  ------------------
 1975|  4.37k|    {
 1976|  4.37k|        u1_slice_type -= 5;
 1977|  4.37k|    }
 1978|       |
 1979|   111k|    u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 1980|   111k|    if(u4_temp & MASK_ERR_PIC_SET_ID) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  |  527|   111k|#define MASK_ERR_PIC_SET_ID   (0xFFFFFF00)
  ------------------
  |  Branch (1980:8): [True: 131, False: 110k]
  ------------------
 1981|       |    /* discard slice if pic param is invalid */
 1982|   110k|    COPYTHECONTEXT("SH: pic_parameter_set_id", u4_temp);
 1983|   110k|    ps_pps = &ps_dec->ps_pps[u4_temp];
 1984|   110k|    if(FALSE == ps_pps->u1_is_valid)
  ------------------
  |  |  592|   110k|#define FALSE   0
  ------------------
  |  Branch (1984:8): [True: 3.38k, False: 107k]
  ------------------
 1985|  3.38k|    {
 1986|  3.38k|        return ERROR_INV_SLICE_HDR_T;
 1987|  3.38k|    }
 1988|       |    /* slices in a layer should have same PPS id*/
 1989|   107k|    if(UINT32_MAX == ps_svc_lyr_dec->u4_pps_id_for_layer)
  ------------------
  |  Branch (1989:8): [True: 104k, False: 3.42k]
  ------------------
 1990|   104k|    {
 1991|   104k|        ps_svc_lyr_dec->u4_pps_id_for_layer = u4_temp;
 1992|   104k|    }
 1993|  3.42k|    else if(u4_temp != ps_svc_lyr_dec->u4_pps_id_for_layer)
  ------------------
  |  Branch (1993:13): [True: 76, False: 3.34k]
  ------------------
 1994|     76|    {
 1995|     76|        return ERROR_INV_SLICE_HDR_T;
 1996|     76|    }
 1997|   107k|    ps_seq = ps_pps->ps_sps;
 1998|   107k|    ps_dec->ps_cur_sps = ps_seq;
 1999|   107k|    ps_subset_seq = &ps_svc_lyr_dec->ps_subset_sps[ps_seq->u1_seq_parameter_set_id];
 2000|   107k|    ps_svc_lyr_dec->ps_cur_subset_sps = ps_subset_seq;
 2001|   107k|    if(!ps_seq) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  Branch (2001:8): [True: 0, False: 107k]
  ------------------
 2002|   107k|    if(FALSE == ps_seq->u1_is_valid) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  |  592|   107k|#define FALSE   0
  ------------------
  |  Branch (2002:8): [True: 786, False: 106k]
  ------------------
 2003|   106k|    if(ps_seq->u1_mb_aff_flag) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  Branch (2003:8): [True: 95, False: 106k]
  ------------------
 2004|   106k|    if(ps_seq->u1_level_idc > H264_LEVEL_4_2) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  |  303|   106k|#define H264_LEVEL_4_2     42
  ------------------
  |  Branch (2004:8): [True: 228, False: 106k]
  ------------------
 2005|   106k|    if(!ps_seq->u1_frame_mbs_only_flag) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  Branch (2005:8): [True: 195, False: 106k]
  ------------------
 2006|   106k|    if(OK != isvcd_verify_level(ps_seq->u1_level_idc)) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  |  114|   106k|#define OK        0
  ------------------
  |  Branch (2006:8): [True: 170, False: 106k]
  ------------------
 2007|   106k|    if(ps_dec->u1_init_dec_flag == 1)
  ------------------
  |  Branch (2007:8): [True: 87.1k, False: 18.8k]
  ------------------
 2008|  87.1k|    {
 2009|  87.1k|        if(ps_dec->u2_frm_wd_in_mbs != ps_seq->u2_frm_wd_in_mbs) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  Branch (2009:12): [True: 141, False: 87.0k]
  ------------------
 2010|  87.0k|        if(ps_dec->u2_frm_ht_in_mbs != ps_seq->u2_frm_ht_in_mbs) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  Branch (2010:12): [True: 68, False: 86.9k]
  ------------------
 2011|  87.0k|    }
 2012|       |
 2013|   105k|    if(ps_dec->u1_init_dec_flag == 1)
  ------------------
  |  Branch (2013:8): [True: 86.9k, False: 18.8k]
  ------------------
 2014|  86.9k|    {
 2015|  86.9k|        if(ps_dec->u2_disp_height != ps_subset_seq->u2_disp_height) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  Branch (2015:12): [True: 70, False: 86.8k]
  ------------------
 2016|  86.8k|        if(ps_dec->u2_disp_width != ps_subset_seq->u2_disp_width) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  Branch (2016:12): [True: 68, False: 86.8k]
  ------------------
 2017|  86.8k|    }
 2018|       |
 2019|   105k|    if(ps_seq->u1_profile_idc == BASE_PROFILE_IDC)
  ------------------
  |  |  275|   105k|#define BASE_PROFILE_IDC    66
  ------------------
  |  Branch (2019:8): [True: 18.9k, False: 86.7k]
  ------------------
 2020|  18.9k|    {
 2021|  18.9k|        if(ps_pps->u1_entropy_coding_mode != 0)
  ------------------
  |  Branch (2021:12): [True: 88, False: 18.8k]
  ------------------
 2022|     88|        {
 2023|     88|            return ERROR_INV_SPS_PPS_T;
 2024|     88|        }
 2025|  18.9k|    }
 2026|       |
 2027|   105k|    ps_dec->i4_reorder_depth = ps_subset_seq->i4_reorder_depth;
 2028|   105k|    ps_dec->u2_disp_height = ps_subset_seq->u2_disp_height;
 2029|   105k|    ps_dec->u2_disp_width = ps_subset_seq->u2_disp_width;
 2030|       |
 2031|   105k|    if(ps_svc_lyr_dec->u1_layer_id > 0)
  ------------------
  |  Branch (2031:8): [True: 0, False: 105k]
  ------------------
 2032|      0|    {
 2033|      0|        psvcd_dec_ctxt = ps_svc_lyr_dec->ps_svcd_ctxt;
 2034|      0|        ps_svc_cur_lyr_dec_minus_1 =
 2035|      0|            &psvcd_dec_ctxt->ps_svc_dec_lyr[ps_svc_lyr_dec->u1_layer_id - 1];
 2036|       |
 2037|      0|        ps_dec_cur_lyr_minus_1 = &ps_svc_cur_lyr_dec_minus_1->s_dec;
 2038|       |
 2039|      0|        if((ps_dec_cur_lyr_minus_1->u2_pic_wd > ps_subset_seq->u2_pic_wd) ||
  ------------------
  |  Branch (2039:12): [True: 0, False: 0]
  ------------------
 2040|      0|           (ps_dec_cur_lyr_minus_1->u2_pic_ht > ps_subset_seq->u2_pic_ht))
  ------------------
  |  Branch (2040:12): [True: 0, False: 0]
  ------------------
 2041|      0|        {
 2042|      0|            return ERROR_CORRUPTED_SLICE;
 2043|      0|        }
 2044|      0|    }
 2045|       |
 2046|   105k|    ps_dec->u2_pic_wd = ps_subset_seq->u2_pic_wd;
 2047|   105k|    ps_dec->u2_pic_ht = ps_subset_seq->u2_pic_ht;
 2048|   105k|    ps_dec->u4_total_mbs = ps_seq->u4_total_num_of_mbs << (1 - ps_seq->u1_frame_mbs_only_flag);
 2049|       |
 2050|       |    /* Determining the Width and Height of Frame from that of Picture */
 2051|   105k|    ps_dec->u2_frm_wd_y = ps_subset_seq->u2_frm_wd_y;
 2052|   105k|    ps_dec->u2_frm_ht_y = ps_subset_seq->u2_frm_ht_y;
 2053|       |
 2054|   105k|    ps_dec->u2_frm_wd_uv = ps_subset_seq->u2_frm_wd_uv;
 2055|   105k|    ps_dec->u2_frm_ht_uv = ps_subset_seq->u2_frm_ht_uv;
 2056|       |
 2057|   105k|    ps_dec->s_pad_mgr.u1_pad_len_y_v = ps_subset_seq->u1_pad_len_y_v;
 2058|   105k|    ps_dec->s_pad_mgr.u1_pad_len_cr_v = ps_subset_seq->u1_pad_len_cr_v;
 2059|   105k|    ps_dec->u2_frm_wd_in_mbs = ps_seq->u2_frm_wd_in_mbs;
 2060|   105k|    ps_dec->u2_frm_ht_in_mbs = ps_seq->u2_frm_ht_in_mbs;
 2061|       |
 2062|   105k|    ps_dec->u2_crop_offset_y = ps_subset_seq->u2_crop_offset_y;
 2063|   105k|    ps_dec->u2_crop_offset_uv = ps_subset_seq->u2_crop_offset_uv;
 2064|       |
 2065|       |    /* Get the frame num */
 2066|   105k|    u2_frame_num = ih264d_get_bits_h264(ps_bitstrm, ps_seq->u1_bits_in_frm_num);
 2067|   105k|    COPYTHECONTEXT("SH: frame_num", u2_frame_num);
 2068|       |
 2069|   105k|    if(!ps_dec->u1_first_slice_in_stream && ps_dec->u4_first_slice_in_pic)
  ------------------
  |  Branch (2069:8): [True: 79.6k, False: 25.9k]
  |  Branch (2069:45): [True: 76.3k, False: 3.34k]
  ------------------
 2070|  76.3k|    {
 2071|  76.3k|        pocstruct_t *ps_prev_poc = &ps_dec->s_prev_pic_poc;
 2072|  76.3k|        pocstruct_t *ps_cur_poc = &ps_dec->s_cur_pic_poc;
 2073|       |
 2074|  76.3k|        ps_dec->u2_mbx = 0xffff;
 2075|  76.3k|        ps_dec->u2_mby = 0;
 2076|       |
 2077|  76.3k|        if((0 == u1_is_idr_slice) && ps_cur_slice->u1_nal_ref_idc)
  ------------------
  |  Branch (2077:12): [True: 17.5k, False: 58.7k]
  |  Branch (2077:38): [True: 10.2k, False: 7.29k]
  ------------------
 2078|  10.2k|            ps_dec->u2_prev_ref_frame_num = ps_cur_slice->u2_frame_num;
 2079|       |
 2080|  76.3k|        if(u1_is_idr_slice || ps_cur_slice->u1_mmco_equalto5) ps_dec->u2_prev_ref_frame_num = 0;
  ------------------
  |  Branch (2080:12): [True: 58.7k, False: 17.5k]
  |  Branch (2080:31): [True: 1.33k, False: 16.2k]
  ------------------
 2081|       |
 2082|  76.3k|        if(ps_dec->ps_cur_sps->u1_gaps_in_frame_num_value_allowed_flag)
  ------------------
  |  Branch (2082:12): [True: 0, False: 76.3k]
  ------------------
 2083|      0|        {
 2084|      0|            isvcd_decode_gaps_in_frame_num(ps_dec, u2_frame_num);
 2085|      0|        }
 2086|       |
 2087|  76.3k|        ps_prev_poc->i4_prev_frame_num_ofst = ps_cur_poc->i4_prev_frame_num_ofst;
 2088|  76.3k|        ps_prev_poc->u2_frame_num = ps_cur_poc->u2_frame_num;
 2089|  76.3k|        ps_prev_poc->u1_mmco_equalto5 = ps_cur_slice->u1_mmco_equalto5;
 2090|  76.3k|        if(ps_cur_slice->u1_nal_ref_idc)
  ------------------
  |  Branch (2090:12): [True: 66.8k, False: 9.47k]
  ------------------
 2091|  66.8k|        {
 2092|  66.8k|            ps_prev_poc->i4_pic_order_cnt_lsb = ps_cur_poc->i4_pic_order_cnt_lsb;
 2093|  66.8k|            ps_prev_poc->i4_pic_order_cnt_msb = ps_cur_poc->i4_pic_order_cnt_msb;
 2094|  66.8k|            ps_prev_poc->i4_delta_pic_order_cnt_bottom = ps_cur_poc->i4_delta_pic_order_cnt_bottom;
 2095|  66.8k|            ps_prev_poc->i4_delta_pic_order_cnt[0] = ps_cur_poc->i4_delta_pic_order_cnt[0];
 2096|  66.8k|            ps_prev_poc->i4_delta_pic_order_cnt[1] = ps_cur_poc->i4_delta_pic_order_cnt[1];
 2097|  66.8k|            ps_prev_poc->u1_bot_field = ps_cur_poc->u1_bot_field;
 2098|  66.8k|        }
 2099|       |
 2100|  76.3k|        ps_dec->u4_total_mbs_coded = 0;
 2101|  76.3k|    }
 2102|       |    /* Get the field related flags  */
 2103|   105k|    if(!ps_seq->u1_frame_mbs_only_flag)
  ------------------
  |  Branch (2103:8): [True: 0, False: 105k]
  ------------------
 2104|      0|    {
 2105|      0|        u1_field_pic_flag = ih264d_get_bit_h264(ps_bitstrm);
 2106|      0|        COPYTHECONTEXT("SH: field_pic_flag", u1_field_pic_flag);
 2107|      0|        u1_bottom_field_flag = 0;
 2108|       |
 2109|      0|        if(u1_field_pic_flag)
  ------------------
  |  Branch (2109:12): [True: 0, False: 0]
  ------------------
 2110|      0|        {
 2111|      0|            ps_dec->pu1_inv_scan = (UWORD8 *) gau1_ih264d_inv_scan_fld;
 2112|      0|            u1_bottom_field_flag = ih264d_get_bit_h264(ps_bitstrm);
 2113|      0|            COPYTHECONTEXT("SH: bottom_field_flag", u1_bottom_field_flag);
 2114|      0|        }
 2115|      0|        else
 2116|      0|        {
 2117|      0|            ps_dec->pu1_inv_scan = (UWORD8 *) gau1_ih264d_inv_scan;
 2118|      0|        }
 2119|      0|    }
 2120|   105k|    else
 2121|   105k|    {
 2122|   105k|        u1_field_pic_flag = 0;
 2123|   105k|        u1_bottom_field_flag = 0;
 2124|       |
 2125|   105k|        ps_dec->pu1_inv_scan = (UWORD8 *) gau1_ih264d_inv_scan;
 2126|   105k|    }
 2127|       |
 2128|   105k|    u1_nal_unit_type = SLICE_NAL;
  ------------------
  |  |  324|   105k|#define SLICE_NAL                       1
  ------------------
 2129|   105k|    if(u1_is_idr_slice)
  ------------------
  |  Branch (2129:8): [True: 83.2k, False: 22.3k]
  ------------------
 2130|  83.2k|    {
 2131|  83.2k|        u1_nal_unit_type = IDR_SLICE_NAL;
  ------------------
  |  |  328|  83.2k|#define IDR_SLICE_NAL                   5
  ------------------
 2132|  83.2k|        u4_idr_pic_id = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 2133|  83.2k|        if(u4_idr_pic_id > 65535) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  Branch (2133:12): [True: 833, False: 82.3k]
  ------------------
 2134|  82.3k|        COPYTHECONTEXT("SH:  ", u4_idr_pic_id);
 2135|  82.3k|    }
 2136|       |
 2137|       |    /* read delta pic order count information*/
 2138|   104k|    i_delta_poc[0] = i_delta_poc[1] = 0;
 2139|   104k|    s_tmp_poc.i4_pic_order_cnt_lsb = 0;
 2140|   104k|    s_tmp_poc.i4_delta_pic_order_cnt_bottom = 0;
 2141|   104k|    u1_pic_order_cnt_type = ps_seq->u1_pic_order_cnt_type;
 2142|   104k|    if(u1_pic_order_cnt_type == 0)
  ------------------
  |  Branch (2142:8): [True: 78.3k, False: 26.3k]
  ------------------
 2143|  78.3k|    {
 2144|  78.3k|        i_temp = ih264d_get_bits_h264(ps_bitstrm, ps_seq->u1_log2_max_pic_order_cnt_lsb_minus);
 2145|  78.3k|        if(i_temp < 0 || i_temp >= ps_seq->i4_max_pic_order_cntLsb) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  Branch (2145:12): [True: 0, False: 78.3k]
  |  Branch (2145:26): [True: 0, False: 78.3k]
  ------------------
 2146|  78.3k|        s_tmp_poc.i4_pic_order_cnt_lsb = i_temp;
 2147|  78.3k|        COPYTHECONTEXT("SH: pic_order_cnt_lsb", s_tmp_poc.i4_pic_order_cnt_lsb);
 2148|       |
 2149|  78.3k|        if((ps_pps->u1_pic_order_present_flag == 1) && (!u1_field_pic_flag))
  ------------------
  |  Branch (2149:12): [True: 33.1k, False: 45.2k]
  |  Branch (2149:56): [True: 33.1k, False: 0]
  ------------------
 2150|  33.1k|        {
 2151|  33.1k|            s_tmp_poc.i4_delta_pic_order_cnt_bottom = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 2152|  33.1k|            COPYTHECONTEXT("SH: delta_pic_order_cnt_bottom",
 2153|  33.1k|                           s_tmp_poc.i4_delta_pic_order_cnt_bottom);
 2154|  33.1k|        }
 2155|  78.3k|    }
 2156|       |
 2157|   104k|    s_tmp_poc.i4_delta_pic_order_cnt[0] = 0;
 2158|   104k|    s_tmp_poc.i4_delta_pic_order_cnt[1] = 0;
 2159|   104k|    if(u1_pic_order_cnt_type == 1 && (!ps_seq->u1_delta_pic_order_always_zero_flag))
  ------------------
  |  Branch (2159:8): [True: 23.2k, False: 81.5k]
  |  Branch (2159:38): [True: 22.1k, False: 1.08k]
  ------------------
 2160|  22.1k|    {
 2161|  22.1k|        s_tmp_poc.i4_delta_pic_order_cnt[0] = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 2162|  22.1k|        COPYTHECONTEXT("SH: delta_pic_order_cnt[0]", s_tmp_poc.i4_delta_pic_order_cnt[0]);
 2163|       |
 2164|  22.1k|        if(ps_pps->u1_pic_order_present_flag && !u1_field_pic_flag)
  ------------------
  |  Branch (2164:12): [True: 19.6k, False: 2.43k]
  |  Branch (2164:49): [True: 19.6k, False: 0]
  ------------------
 2165|  19.6k|        {
 2166|  19.6k|            s_tmp_poc.i4_delta_pic_order_cnt[1] = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 2167|  19.6k|            COPYTHECONTEXT("SH: delta_pic_order_cnt[1]", s_tmp_poc.i4_delta_pic_order_cnt[1]);
 2168|  19.6k|        }
 2169|  22.1k|    }
 2170|       |
 2171|   104k|    if(ps_pps->u1_redundant_pic_cnt_present_flag)
  ------------------
  |  Branch (2171:8): [True: 31.5k, False: 73.1k]
  ------------------
 2172|  31.5k|    {
 2173|  31.5k|        u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
 2174|  31.5k|        if(u4_temp > MAX_REDUNDANT_PIC_CNT) return ERROR_INV_SLICE_HDR_T;
  ------------------
  |  |  611|  31.5k|#define MAX_REDUNDANT_PIC_CNT       127
  ------------------
  |  Branch (2174:12): [True: 599, False: 30.9k]
  ------------------
 2175|  30.9k|        u1_redundant_pic_cnt = u4_temp;
 2176|  30.9k|        COPYTHECONTEXT("SH: redundant_pic_cnt", u1_redundant_pic_cnt);
 2177|  30.9k|    }
 2178|       |
 2179|       |    /*--------------------------------------------------------------------*/
 2180|       |    /* Check if the slice is part of new picture                          */
 2181|       |    /*--------------------------------------------------------------------*/
 2182|       |    /* First slice of a picture is always considered as part of new picture */
 2183|   104k|    i1_is_end_of_poc = 1;
 2184|   104k|    ps_dec->ps_dec_err_status->u1_err_flag &= MASK_REJECT_CUR_PIC;
  ------------------
  |  |  605|   104k|#define MASK_REJECT_CUR_PIC (0xFE)
  ------------------
 2185|       |
 2186|   104k|    if(ps_dec->u4_first_slice_in_pic == 0)
  ------------------
  |  Branch (2186:8): [True: 3.34k, False: 100k]
  ------------------
 2187|  3.34k|    {
 2188|  3.34k|        i1_is_end_of_poc =
 2189|  3.34k|            ih264d_is_end_of_pic(u2_frame_num, u1_nal_ref_idc, &s_tmp_poc, &ps_dec->s_cur_pic_poc,
 2190|  3.34k|                                 ps_cur_slice, u1_pic_order_cnt_type, u1_nal_unit_type,
 2191|  3.34k|                                 u4_idr_pic_id, u1_field_pic_flag, u1_bottom_field_flag);
 2192|  3.34k|        if(i1_is_end_of_poc)
  ------------------
  |  Branch (2192:12): [True: 301, False: 3.04k]
  ------------------
 2193|    301|        {
 2194|    301|            ps_dec->u1_first_slice_in_stream = 0;
 2195|    301|            return ERROR_INCOMPLETE_FRAME;
 2196|    301|        }
 2197|  3.34k|    }
 2198|       |
 2199|       |    /*--------------------------------------------------------------------*/
 2200|       |    /* Check for error in slice and parse the missing/corrupted MB's      */
 2201|       |    /* as skip-MB's in an inserted P-slice                                */
 2202|       |    /*--------------------------------------------------------------------*/
 2203|   103k|    u4_mbaff = ps_seq->u1_mb_aff_flag && (!u1_field_pic_flag);
  ------------------
  |  Branch (2203:16): [True: 0, False: 103k]
  |  Branch (2203:42): [True: 0, False: 0]
  ------------------
 2204|   103k|    prev_slice_err = 0;
 2205|       |
 2206|   103k|    if(i1_is_end_of_poc || ps_dec->u1_first_slice_in_stream)
  ------------------
  |  Branch (2206:8): [True: 100k, False: 3.04k]
  |  Branch (2206:28): [True: 0, False: 3.04k]
  ------------------
 2207|   100k|    {
 2208|       |        /* If the current slice is not a field or frame number of the current
 2209|       |         * slice doesn't match with previous slice, and decoder is expecting
 2210|       |         * to decode a field i.e. ps_dec->u1_top_bottom_decoded is not 0 and
 2211|       |         * is not (TOP_FIELD_ONLY | BOT_FIELD_ONLY), treat it as a dangling
 2212|       |         * field */
 2213|   100k|        if((u1_field_pic_flag == 0 || u2_frame_num != ps_dec->u2_prv_frame_num) &&
  ------------------
  |  Branch (2213:13): [True: 100k, False: 0]
  |  Branch (2213:39): [True: 0, False: 0]
  ------------------
 2214|   100k|           ps_dec->u1_top_bottom_decoded != 0 &&
  ------------------
  |  Branch (2214:12): [True: 0, False: 100k]
  ------------------
 2215|      0|           ps_dec->u1_top_bottom_decoded != (TOP_FIELD_ONLY | BOT_FIELD_ONLY))
  ------------------
  |  |   65|      0|#define TOP_FIELD_ONLY      0x02
  ------------------
                         ps_dec->u1_top_bottom_decoded != (TOP_FIELD_ONLY | BOT_FIELD_ONLY))
  ------------------
  |  |   66|      0|#define BOT_FIELD_ONLY      0x01
  ------------------
  |  Branch (2215:12): [True: 0, False: 0]
  ------------------
 2216|      0|        {
 2217|      0|            ps_dec->u1_dangling_field = 1;
 2218|      0|            if(ps_dec->u4_first_slice_in_pic)
  ------------------
  |  Branch (2218:16): [True: 0, False: 0]
  ------------------
 2219|      0|            {
 2220|       |                // first slice - dangling field
 2221|      0|                prev_slice_err = 1;
 2222|      0|            }
 2223|      0|            else
 2224|      0|            {
 2225|       |                // last slice - dangling field
 2226|      0|                prev_slice_err = 2;
 2227|      0|            }
 2228|       |
 2229|      0|            if(ps_dec->u1_top_bottom_decoded == TOP_FIELD_ONLY)
  ------------------
  |  |   65|      0|#define TOP_FIELD_ONLY      0x02
  ------------------
  |  Branch (2229:16): [True: 0, False: 0]
  ------------------
 2230|      0|                ps_cur_slice->u1_bottom_field_flag = 1;
 2231|      0|            else
 2232|      0|                ps_cur_slice->u1_bottom_field_flag = 0;
 2233|       |
 2234|      0|            num_mb_skipped =
 2235|      0|                (ps_dec->u2_frm_ht_in_mbs * ps_dec->u2_frm_wd_in_mbs) - ps_dec->u4_total_mbs_coded;
 2236|      0|            ps_cur_poc = &ps_dec->s_cur_pic_poc;
 2237|       |
 2238|      0|            u1_is_idr_slice = ps_cur_slice->u1_nal_unit_type == IDR_SLICE_NAL;
  ------------------
  |  |  328|      0|#define IDR_SLICE_NAL                   5
  ------------------
 2239|      0|        }
 2240|   100k|        else if(ps_dec->u4_first_slice_in_pic)
  ------------------
  |  Branch (2240:17): [True: 100k, False: 0]
  ------------------
 2241|   100k|        {
 2242|   100k|            if(u2_first_mb_in_slice > 0)
  ------------------
  |  Branch (2242:16): [True: 0, False: 100k]
  ------------------
 2243|      0|            {
 2244|       |                /* first slice - missing/header corruption */
 2245|      0|                prev_slice_err = 1;
 2246|      0|                num_mb_skipped = u2_first_mb_in_slice << u4_mbaff;
 2247|      0|                ps_cur_poc = &s_tmp_poc;
 2248|       |
 2249|       |                /* initializing slice parameters */
 2250|      0|                ps_cur_slice->u4_idr_pic_id = u4_idr_pic_id;
 2251|      0|                ps_cur_slice->u1_field_pic_flag = u1_field_pic_flag;
 2252|      0|                ps_cur_slice->u1_bottom_field_flag = u1_bottom_field_flag;
 2253|      0|                ps_cur_slice->i4_pic_order_cnt_lsb = s_tmp_poc.i4_pic_order_cnt_lsb;
 2254|      0|                ps_cur_slice->u1_nal_unit_type = u1_nal_unit_type;
 2255|      0|                ps_cur_slice->u1_redundant_pic_cnt = u1_redundant_pic_cnt;
 2256|      0|                ps_cur_slice->u1_nal_ref_idc = u1_nal_ref_idc;
 2257|      0|                ps_cur_slice->u1_pic_order_cnt_type = u1_pic_order_cnt_type;
 2258|      0|                ps_cur_slice->u1_mbaff_frame_flag = ps_seq->u1_mb_aff_flag && (!u1_field_pic_flag);
  ------------------
  |  Branch (2258:53): [True: 0, False: 0]
  |  Branch (2258:79): [True: 0, False: 0]
  ------------------
 2259|      0|            }
 2260|   100k|        }
 2261|      0|        else
 2262|      0|        {
 2263|       |            /* since i1_is_end_of_poc is set ,means new frame num is encountered. so
 2264|       |             * conceal the current frame completely */
 2265|      0|            prev_slice_err = 2;
 2266|      0|            num_mb_skipped =
 2267|      0|                (ps_dec->u2_frm_ht_in_mbs * ps_dec->u2_frm_wd_in_mbs) - ps_dec->u4_total_mbs_coded;
 2268|      0|            ps_cur_poc = &s_tmp_poc;
 2269|      0|        }
 2270|   100k|    }
 2271|  3.04k|    else
 2272|  3.04k|    {
 2273|  3.04k|        if((u2_first_mb_in_slice << u4_mbaff) > ps_dec->u4_total_mbs_coded)
  ------------------
  |  Branch (2273:12): [True: 93, False: 2.95k]
  ------------------
 2274|     93|        {
 2275|       |            // previous slice - missing/corruption
 2276|     93|            prev_slice_err = 2;
 2277|     93|            num_mb_skipped = (u2_first_mb_in_slice << u4_mbaff) - ps_dec->u4_total_mbs_coded;
 2278|     93|            ps_cur_poc = &s_tmp_poc;
 2279|     93|        }
 2280|  2.95k|        else if((u2_first_mb_in_slice << u4_mbaff) < ps_dec->u4_total_mbs_coded)
  ------------------
  |  Branch (2280:17): [True: 0, False: 2.95k]
  ------------------
 2281|      0|        {
 2282|      0|            return ERROR_CORRUPTED_SLICE;
 2283|      0|        }
 2284|  3.04k|    }
 2285|   103k|    if(prev_slice_err)
  ------------------
  |  Branch (2285:8): [True: 93, False: 103k]
  ------------------
 2286|     93|    {
 2287|     93|        ret = isvcd_mark_err_slice_skip((svc_dec_lyr_struct_t *) ps_dec, num_mb_skipped,
 2288|     93|                                        u1_is_idr_slice, u2_frame_num, ps_cur_poc, prev_slice_err);
 2289|       |
 2290|     93|        if(ps_dec->u1_dangling_field == 1)
  ------------------
  |  Branch (2290:12): [True: 0, False: 93]
  ------------------
 2291|      0|        {
 2292|      0|            ps_dec->u1_second_field = 1 - ps_dec->u1_second_field;
 2293|      0|            ps_dec->u1_first_slice_in_stream = 0;
 2294|      0|            ps_dec->u1_top_bottom_decoded = TOP_FIELD_ONLY | BOT_FIELD_ONLY;
  ------------------
  |  |   65|      0|#define TOP_FIELD_ONLY      0x02
  ------------------
                          ps_dec->u1_top_bottom_decoded = TOP_FIELD_ONLY | BOT_FIELD_ONLY;
  ------------------
  |  |   66|      0|#define BOT_FIELD_ONLY      0x01
  ------------------
 2295|      0|            return ERROR_DANGLING_FIELD_IN_PIC;
 2296|      0|        }
 2297|       |
 2298|     93|        if(prev_slice_err == 2)
  ------------------
  |  Branch (2298:12): [True: 93, False: 0]
  ------------------
 2299|     93|        {
 2300|     93|            ps_dec->u1_first_slice_in_stream = 0;
 2301|     93|            return ERROR_INCOMPLETE_FRAME;
 2302|     93|        }
 2303|       |
 2304|      0|        if(ps_dec->u4_total_mbs_coded >= ps_dec->u2_frm_ht_in_mbs * ps_dec->u2_frm_wd_in_mbs)
  ------------------
  |  Branch (2304:12): [True: 0, False: 0]
  ------------------
 2305|      0|        {
 2306|       |            /* return if all MBs in frame are parsed*/
 2307|      0|            ps_dec->u1_first_slice_in_stream = 0;
 2308|      0|            return ERROR_IN_LAST_SLICE_OF_PIC;
 2309|      0|        }
 2310|       |
 2311|      0|        if(ps_dec->ps_dec_err_status->u1_err_flag & REJECT_CUR_PIC)
  ------------------
  |  |  602|      0|#define REJECT_CUR_PIC    (0x01)
  ------------------
  |  Branch (2311:12): [True: 0, False: 0]
  ------------------
 2312|      0|        {
 2313|      0|            ih264d_err_pic_dispbuf_mgr(ps_dec);
 2314|      0|            return ERROR_NEW_FRAME_EXPECTED;
 2315|      0|        }
 2316|       |
 2317|      0|        if(ret != OK) return ret;
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
  |  Branch (2317:12): [True: 0, False: 0]
  ------------------
 2318|       |
 2319|      0|        i1_is_end_of_poc = 0;
 2320|      0|    }
 2321|       |
 2322|   103k|    if(u1_field_pic_flag)
  ------------------
  |  Branch (2322:8): [True: 0, False: 103k]
  ------------------
 2323|      0|    {
 2324|      0|        ps_dec->u2_prv_frame_num = u2_frame_num;
 2325|      0|    }
 2326|       |
 2327|   103k|    if(ps_cur_slice->u1_mmco_equalto5 && NULL != ps_dec->ps_cur_pic)
  ------------------
  |  Branch (2327:8): [True: 2.33k, False: 101k]
  |  Branch (2327:42): [True: 2.20k, False: 121]
  ------------------
 2328|  2.20k|    {
 2329|  2.20k|        WORD32 i4_temp_poc;
 2330|  2.20k|        WORD32 i4_top_field_order_poc, i4_bot_field_order_poc;
 2331|  2.20k|        WORD64 i8_result;
 2332|  2.20k|        if(!ps_cur_slice->u1_field_pic_flag)
  ------------------
  |  Branch (2332:12): [True: 2.20k, False: 0]
  ------------------
 2333|  2.20k|        {
 2334|  2.20k|            i4_top_field_order_poc = ps_dec->ps_cur_pic->i4_top_field_order_cnt;
 2335|  2.20k|            i4_bot_field_order_poc = ps_dec->ps_cur_pic->i4_bottom_field_order_cnt;
 2336|  2.20k|            i4_temp_poc = MIN(i4_top_field_order_poc, i4_bot_field_order_poc);
  ------------------
  |  |   61|  2.20k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 1.08k, False: 1.12k]
  |  |  ------------------
  ------------------
 2337|  2.20k|        }
 2338|      0|        else if(!ps_cur_slice->u1_bottom_field_flag)
  ------------------
  |  Branch (2338:17): [True: 0, False: 0]
  ------------------
 2339|      0|            i4_temp_poc = ps_dec->ps_cur_pic->i4_top_field_order_cnt;
 2340|      0|        else
 2341|      0|            i4_temp_poc = ps_dec->ps_cur_pic->i4_bottom_field_order_cnt;
 2342|       |
 2343|  2.20k|        i8_result = (WORD64) i4_temp_poc - ps_dec->ps_cur_pic->i4_top_field_order_cnt;
 2344|  2.20k|        if(IS_OUT_OF_RANGE_S32(i8_result))
  ------------------
  |  |   58|  2.20k|#define IS_OUT_OF_RANGE_S32(a) (((a) < INT32_MIN) || ((a) > INT32_MAX))
  |  |  ------------------
  |  |  |  Branch (58:33): [True: 72, False: 2.13k]
  |  |  |  Branch (58:54): [True: 0, False: 2.13k]
  |  |  ------------------
  ------------------
 2345|     72|        {
 2346|     72|            return ERROR_INV_POC;
 2347|     72|        }
 2348|  2.13k|        ps_dec->ps_cur_pic->i4_top_field_order_cnt = (WORD32) i8_result;
 2349|  2.13k|        i8_result = (WORD64) i4_temp_poc - ps_dec->ps_cur_pic->i4_bottom_field_order_cnt;
 2350|  2.13k|        if(IS_OUT_OF_RANGE_S32(i8_result))
  ------------------
  |  |   58|  2.13k|#define IS_OUT_OF_RANGE_S32(a) (((a) < INT32_MIN) || ((a) > INT32_MAX))
  |  |  ------------------
  |  |  |  Branch (58:33): [True: 101, False: 2.03k]
  |  |  |  Branch (58:54): [True: 0, False: 2.03k]
  |  |  ------------------
  ------------------
 2351|    101|        {
 2352|    101|            return ERROR_INV_POC;
 2353|    101|        }
 2354|  2.03k|        ps_dec->ps_cur_pic->i4_bottom_field_order_cnt = (WORD32) i8_result;
 2355|  2.03k|        ps_dec->ps_cur_pic->i4_poc = i4_temp_poc;
 2356|  2.03k|        ps_dec->ps_cur_pic->i4_avg_poc = i4_temp_poc;
 2357|  2.03k|    }
 2358|   103k|    if(ps_dec->u4_first_slice_in_pic)
  ------------------
  |  Branch (2358:8): [True: 100k, False: 2.95k]
  ------------------
 2359|   100k|    {
 2360|   100k|        ret = isvcd_decode_pic_order_cnt(u1_is_idr_slice, u2_frame_num, &ps_dec->s_prev_pic_poc,
 2361|   100k|                                         &s_tmp_poc, ps_cur_slice, ps_pps, u1_nal_ref_idc,
 2362|   100k|                                         u1_bottom_field_flag, u1_field_pic_flag, &i4_poc, ps_dec);
 2363|   100k|        if(ret != OK) return ret;
  ------------------
  |  |  114|   100k|#define OK        0
  ------------------
  |  Branch (2363:12): [True: 986, False: 99.6k]
  ------------------
 2364|       |        /* Display seq no calculations */
 2365|  99.6k|        if(i4_poc >= ps_dec->i4_max_poc) ps_dec->i4_max_poc = i4_poc;
  ------------------
  |  Branch (2365:12): [True: 50.6k, False: 49.0k]
  ------------------
 2366|       |        /* IDR Picture or POC wrap around */
 2367|  99.6k|        if(i4_poc == 0)
  ------------------
  |  Branch (2367:12): [True: 22.0k, False: 77.5k]
  ------------------
 2368|  22.0k|        {
 2369|  22.0k|            WORD64 i8_temp;
 2370|  22.0k|            i8_temp = (WORD64) ps_dec->i4_prev_max_display_seq + ps_dec->i4_max_poc +
 2371|  22.0k|                      ps_dec->u1_max_dec_frame_buffering + 1;
 2372|       |            /*If i4_prev_max_display_seq overflows integer range, reset it */
 2373|  22.0k|            ps_dec->i4_prev_max_display_seq = IS_OUT_OF_RANGE_S32(i8_temp) ? 0 : i8_temp;
  ------------------
  |  |   58|  22.0k|#define IS_OUT_OF_RANGE_S32(a) (((a) < INT32_MIN) || ((a) > INT32_MAX))
  |  |  ------------------
  |  |  |  Branch (58:33): [True: 0, False: 22.0k]
  |  |  |  Branch (58:54): [True: 105, False: 21.9k]
  |  |  ------------------
  ------------------
 2374|  22.0k|            ps_dec->i4_max_poc = 0;
 2375|  22.0k|        }
 2376|  99.6k|    }
 2377|       |
 2378|       |    /* Increment only if the current slice has atleast 1 more MB */
 2379|   102k|    if(ps_dec->u4_first_slice_in_pic == 0 &&
  ------------------
  |  Branch (2379:8): [True: 2.95k, False: 99.6k]
  ------------------
 2380|  2.95k|       (ps_dec->ps_parse_cur_slice->u4_first_mb_in_slice <
  ------------------
  |  Branch (2380:8): [True: 2.95k, False: 0]
  ------------------
 2381|  2.95k|        (UWORD32) (ps_dec->u4_total_mbs_coded >> ps_dec->ps_cur_slice->u1_mbaff_frame_flag)))
 2382|  2.95k|    {
 2383|  2.95k|        ps_dec->ps_parse_cur_slice++;
 2384|  2.95k|        ps_dec->u2_cur_slice_num++;
 2385|       |        // in the case of single core increment ps_decode_cur_slice
 2386|  2.95k|        if(ps_dec->u1_separate_parse == 0)
  ------------------
  |  Branch (2386:12): [True: 1.64k, False: 1.30k]
  ------------------
 2387|  1.64k|        {
 2388|  1.64k|            ps_dec->ps_decode_cur_slice++;
 2389|  1.64k|        }
 2390|  2.95k|    }
 2391|       |
 2392|   102k|    ps_dec->u1_slice_header_done = 0;
 2393|       |
 2394|       |    /*--------------------------------------------------------------------*/
 2395|       |    /* Copy the values read from the bitstream to the slice header and then*/
 2396|       |    /* If the slice is first slice in picture, then do Start of Picture   */
 2397|       |    /* processing.                                                        */
 2398|       |    /*--------------------------------------------------------------------*/
 2399|   102k|    ps_cur_slice->i4_delta_pic_order_cnt[0] = i_delta_poc[0];
 2400|   102k|    ps_cur_slice->i4_delta_pic_order_cnt[1] = i_delta_poc[1];
 2401|   102k|    ps_cur_slice->u4_idr_pic_id = u4_idr_pic_id;
 2402|   102k|    ps_cur_slice->u2_first_mb_in_slice = u2_first_mb_in_slice;
 2403|   102k|    ps_cur_slice->u1_field_pic_flag = u1_field_pic_flag;
 2404|   102k|    ps_cur_slice->u1_bottom_field_flag = u1_bottom_field_flag;
 2405|   102k|    ps_cur_slice->u1_slice_type = u1_slice_type;
 2406|   102k|    ps_cur_slice->i4_pic_order_cnt_lsb = s_tmp_poc.i4_pic_order_cnt_lsb;
 2407|       |
 2408|   102k|    ps_cur_slice->u1_nal_unit_type = u1_nal_unit_type;
 2409|   102k|    ps_cur_slice->u1_redundant_pic_cnt = u1_redundant_pic_cnt;
 2410|   102k|    ps_cur_slice->u1_nal_ref_idc = u1_nal_ref_idc;
 2411|   102k|    ps_cur_slice->u1_pic_order_cnt_type = u1_pic_order_cnt_type;
 2412|       |
 2413|   102k|    if(ps_seq->u1_frame_mbs_only_flag)
  ------------------
  |  Branch (2413:8): [True: 102k, False: 0]
  ------------------
 2414|   102k|        ps_cur_slice->u1_direct_8x8_inference_flag = ps_seq->u1_direct_8x8_inference_flag;
 2415|      0|    else
 2416|      0|        ps_cur_slice->u1_direct_8x8_inference_flag = 1;
 2417|       |
 2418|   102k|    if(u1_slice_type == B_SLICE)
  ------------------
  |  |  369|   102k|#define B_SLICE  1
  ------------------
  |  Branch (2418:8): [True: 30.9k, False: 71.6k]
  ------------------
 2419|  30.9k|    {
 2420|  30.9k|        ps_cur_slice->u1_direct_spatial_mv_pred_flag = ih264d_get_bit_h264(ps_bitstrm);
 2421|  30.9k|        COPYTHECONTEXT("SH: direct_spatial_mv_pred_flag",
 2422|  30.9k|                       ps_cur_slice->u1_direct_spatial_mv_pred_flag);
 2423|       |
 2424|  30.9k|        if(ps_cur_slice->u1_direct_spatial_mv_pred_flag)
  ------------------
  |  Branch (2424:12): [True: 16.1k, False: 14.8k]
  ------------------
 2425|  16.1k|            ps_cur_slice->pf_decodeDirect = ih264d_decode_spatial_direct;
 2426|  14.8k|        else
 2427|  14.8k|            ps_cur_slice->pf_decodeDirect = ih264d_decode_temporal_direct;
 2428|  30.9k|        if(!((ps_seq->u1_mb_aff_flag) && (!u1_field_pic_flag)))
  ------------------
  |  Branch (2428:14): [True: 0, False: 30.9k]
  |  Branch (2428:42): [True: 0, False: 0]
  ------------------
 2429|  30.9k|            ps_dec->pf_mvpred = ih264d_mvpred_nonmbaffB;
 2430|  30.9k|    }
 2431|  71.6k|    else
 2432|  71.6k|    {
 2433|  71.6k|        if(!((ps_seq->u1_mb_aff_flag) && (!u1_field_pic_flag)))
  ------------------
  |  Branch (2433:14): [True: 0, False: 71.6k]
  |  Branch (2433:42): [True: 0, False: 0]
  ------------------
 2434|  71.6k|            ps_dec->pf_mvpred = ih264d_mvpred_nonmbaff;
 2435|  71.6k|    }
 2436|       |
 2437|   102k|    if(ps_dec->u4_first_slice_in_pic)
  ------------------
  |  Branch (2437:8): [True: 99.6k, False: 2.95k]
  ------------------
 2438|  99.6k|    {
 2439|  99.6k|        if(u2_first_mb_in_slice == 0)
  ------------------
  |  Branch (2439:12): [True: 99.6k, False: 0]
  ------------------
 2440|  99.6k|        {
 2441|  99.6k|            ret = isvcd_start_of_pic(ps_svc_lyr_dec, i4_poc, &s_tmp_poc, u2_frame_num, ps_pps);
 2442|  99.6k|            if(ret != OK) return ret;
  ------------------
  |  |  114|  99.6k|#define OK        0
  ------------------
  |  Branch (2442:16): [True: 804, False: 98.8k]
  ------------------
 2443|       |            /*inter layer buffer intialization */
 2444|  98.8k|            ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb =
 2445|  98.8k|                ps_svc_lyr_dec->ps_inter_lyr_mb_prms_frm_start;
 2446|  98.8k|            ps_svc_lyr_dec->ps_il_pred_mv_bank_buf_cur_mb =
 2447|  98.8k|                ps_svc_lyr_dec->ps_il_pred_mv_bank_buf_base;
 2448|  98.8k|        }
 2449|       |
 2450|  98.8k|        ps_dec->u4_output_present = 0;
 2451|       |
 2452|  98.8k|        {
 2453|  98.8k|            ih264d_get_next_display_field(ps_dec, ps_dec->ps_out_buffer, &(ps_dec->s_disp_op));
 2454|       |            /* If error code is non-zero then there is no buffer available for
 2455|       |            display, hence avoid format conversion */
 2456|       |
 2457|  98.8k|            if(0 != ps_dec->s_disp_op.u4_error_code)
  ------------------
  |  Branch (2457:16): [True: 41.1k, False: 57.6k]
  ------------------
 2458|  41.1k|            {
 2459|  41.1k|                ps_dec->u4_output_present = 0;
 2460|  41.1k|                ps_dec->u4_fmt_conv_cur_row = ps_dec->s_disp_frame_info.u4_y_ht;
 2461|  41.1k|            }
 2462|  57.6k|            else
 2463|  57.6k|                ps_dec->u4_output_present = 1;
 2464|  98.8k|        }
 2465|  98.8k|        ret = isvcd_parse_interlayer_resamp_func_init(ps_svc_lyr_dec, u2_first_mb_in_slice);
 2466|  98.8k|        if(ret != OK)
  ------------------
  |  |  114|  98.8k|#define OK        0
  ------------------
  |  Branch (2466:12): [True: 0, False: 98.8k]
  ------------------
 2467|      0|        {
 2468|      0|            return ERROR_CORRUPTED_SLICE;
 2469|      0|        }
 2470|  98.8k|        if((ps_dec->u1_separate_parse == 1) && (ps_svc_lyr_dec->u1_res_init_done == 1))
  ------------------
  |  Branch (2470:12): [True: 29.5k, False: 69.3k]
  |  Branch (2470:48): [True: 29.5k, False: 0]
  ------------------
 2471|  29.5k|        {
 2472|  29.5k|            if(ps_dec->u4_dec_thread_created == 0)
  ------------------
  |  Branch (2472:16): [True: 29.5k, False: 0]
  ------------------
 2473|  29.5k|            {
 2474|  29.5k|                if(ps_svc_lyr_dec->u1_layer_identifier != TARGET_LAYER)
  ------------------
  |  |  110|  29.5k|#define TARGET_LAYER 2
  ------------------
  |  Branch (2474:20): [True: 0, False: 29.5k]
  ------------------
 2475|      0|                {
 2476|      0|                    ithread_create(ps_dec->pv_dec_thread_handle, NULL,
 2477|      0|                                   (void *) isvcd_decode_picture_thread, (void *) ps_dec);
 2478|       |
 2479|      0|                    ps_dec->u4_dec_thread_created = 1;
 2480|      0|                }
 2481|  29.5k|                else
 2482|  29.5k|                {
 2483|  29.5k|                    ithread_create(ps_dec->pv_dec_thread_handle, NULL,
 2484|  29.5k|                                   (void *) ih264d_decode_picture_thread, (void *) ps_dec);
 2485|       |
 2486|  29.5k|                    ps_dec->u4_dec_thread_created = 1;
 2487|  29.5k|                }
 2488|  29.5k|            }
 2489|       |#ifdef KEEP_THREADS_ACTIVE
 2490|       |            ret = ithread_mutex_lock(ps_dec->apv_proc_start_mutex[0]);
 2491|       |            RETURN_IF((ret != IV_SUCCESS), ret);
 2492|       |
 2493|       |            ps_dec->ai4_process_start[0] = PROC_START;
 2494|       |            ret = ithread_cond_signal(ps_dec->apv_proc_start_condition[0]);
 2495|       |            RETURN_IF((ret != IV_SUCCESS), ret);
 2496|       |
 2497|       |            ret = ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[0]);
 2498|       |            RETURN_IF((ret != IV_SUCCESS), ret);
 2499|       |#endif
 2500|       |#ifdef KEEP_THREADS_ACTIVE
 2501|       |            if(ps_dec->u4_bs_deblk_thread_created)
 2502|       |            {
 2503|       |                ret = ithread_mutex_lock(ps_dec->apv_proc_start_mutex[1]);
 2504|       |                RETURN_IF((ret != IV_SUCCESS), ret);
 2505|       |
 2506|       |                ps_dec->ai4_process_start[1] = PROC_START;
 2507|       |                ret = ithread_cond_signal(ps_dec->apv_proc_start_condition[1]);
 2508|       |                RETURN_IF((ret != IV_SUCCESS), ret);
 2509|       |
 2510|       |                ret = ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[1]);
 2511|       |                RETURN_IF((ret != IV_SUCCESS), ret);
 2512|       |            }
 2513|       |#endif
 2514|  29.5k|        }
 2515|  98.8k|    }
 2516|       |
 2517|       |    /* INITIALIZATION of fn ptrs for MC and formMbPartInfo functions */
 2518|   101k|    {
 2519|   101k|        UWORD8 uc_nofield_nombaff;
 2520|       |
 2521|   101k|        uc_nofield_nombaff =
 2522|   101k|            ((ps_dec->ps_cur_slice->u1_field_pic_flag == 0) &&
  ------------------
  |  Branch (2522:14): [True: 101k, False: 0]
  ------------------
 2523|   101k|             (ps_dec->ps_cur_slice->u1_mbaff_frame_flag == 0) && (u1_slice_type != B_SLICE) &&
  ------------------
  |  |  369|   101k|#define B_SLICE  1
  ------------------
  |  Branch (2523:14): [True: 101k, False: 0]
  |  Branch (2523:66): [True: 71.2k, False: 30.5k]
  ------------------
 2524|  71.2k|             (ps_dec->ps_cur_pps->u1_wted_pred_flag == 0));
  ------------------
  |  Branch (2524:14): [True: 48.4k, False: 22.8k]
  ------------------
 2525|       |
 2526|       |        /* Initialise MC and formMbPartInfo fn ptrs one time based on profile_idc */
 2527|       |
 2528|   101k|        if(uc_nofield_nombaff)
  ------------------
  |  Branch (2528:12): [True: 48.4k, False: 53.3k]
  ------------------
 2529|  48.4k|        {
 2530|  48.4k|            ps_dec->p_form_mb_part_info = ih264d_form_mb_part_info_bp;
 2531|  48.4k|            ps_dec->p_motion_compensate = ih264d_motion_compensate_bp;
 2532|  48.4k|        }
 2533|  53.3k|        else
 2534|  53.3k|        {
 2535|  53.3k|            ps_dec->p_form_mb_part_info = ih264d_form_mb_part_info_mp;
 2536|  53.3k|            ps_dec->p_motion_compensate = ih264d_motion_compensate_mp;
 2537|  53.3k|        }
 2538|   101k|    }
 2539|       |
 2540|       |    /*
 2541|       |     * Decide whether to decode the current picture or not
 2542|       |     */
 2543|   101k|    {
 2544|   101k|        dec_err_status_t *ps_err = ps_dec->ps_dec_err_status;
 2545|   101k|        if(ps_err->u4_frm_sei_sync == u2_frame_num)
  ------------------
  |  Branch (2545:12): [True: 68, False: 101k]
  ------------------
 2546|     68|        {
 2547|     68|            ps_err->u1_err_flag = ACCEPT_ALL_PICS;
  ------------------
  |  |  601|     68|#define ACCEPT_ALL_PICS   (0x00)
  ------------------
 2548|     68|            ps_err->u4_frm_sei_sync = SYNC_FRM_DEFAULT;
  ------------------
  |  |  610|     68|#define SYNC_FRM_DEFAULT  (0xFFFFFFFF)
  ------------------
 2549|     68|        }
 2550|   101k|        ps_err->u4_cur_frm = u2_frame_num;
 2551|   101k|    }
 2552|       |
 2553|       |    /* Decision for decoding if the picture is to be skipped */
 2554|   101k|    {
 2555|   101k|        WORD32 i4_skip_b_pic, i4_skip_p_pic;
 2556|       |
 2557|   101k|        i4_skip_b_pic = (ps_dec->u4_skip_frm_mask & B_SLC_BIT) && (B_SLICE == u1_slice_type) &&
  ------------------
  |  |  378|   101k|#define B_SLC_BIT  (0x4)
  ------------------
                      i4_skip_b_pic = (ps_dec->u4_skip_frm_mask & B_SLC_BIT) && (B_SLICE == u1_slice_type) &&
  ------------------
  |  |  369|      0|#define B_SLICE  1
  ------------------
  |  Branch (2557:25): [True: 0, False: 101k]
  |  Branch (2557:67): [True: 0, False: 0]
  ------------------
 2558|      0|                        (0 == u1_nal_ref_idc);
  ------------------
  |  Branch (2558:25): [True: 0, False: 0]
  ------------------
 2559|       |
 2560|   101k|        i4_skip_p_pic = (ps_dec->u4_skip_frm_mask & P_SLC_BIT) && (P_SLICE == u1_slice_type) &&
  ------------------
  |  |  377|   101k|#define P_SLC_BIT  (0x2)
  ------------------
                      i4_skip_p_pic = (ps_dec->u4_skip_frm_mask & P_SLC_BIT) && (P_SLICE == u1_slice_type) &&
  ------------------
  |  |  368|      0|#define P_SLICE  0
  ------------------
  |  Branch (2560:25): [True: 0, False: 101k]
  |  Branch (2560:67): [True: 0, False: 0]
  ------------------
 2561|      0|                        (0 == u1_nal_ref_idc);
  ------------------
  |  Branch (2561:25): [True: 0, False: 0]
  ------------------
 2562|       |
 2563|       |        /**************************************************************/
 2564|       |        /* Skip the B picture if skip mask is set for B picture and   */
 2565|       |        /* Current B picture is a non reference B picture or there is */
 2566|       |        /* no user for reference B picture                            */
 2567|       |        /**************************************************************/
 2568|   101k|        if(i4_skip_b_pic)
  ------------------
  |  Branch (2568:12): [True: 0, False: 101k]
  ------------------
 2569|      0|        {
 2570|      0|            ps_dec->ps_cur_pic->u4_pack_slc_typ |= B_SLC_BIT;
  ------------------
  |  |  378|      0|#define B_SLC_BIT  (0x4)
  ------------------
 2571|       |            /* Don't decode the picture in SKIP-B mode if that picture is B */
 2572|       |            /* and also it is not to be used as a reference picture         */
 2573|      0|            ps_dec->u1_last_pic_not_decoded = 1;
 2574|       |
 2575|      0|            return OK;
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
 2576|      0|        }
 2577|       |        /**************************************************************/
 2578|       |        /* Skip the P picture if skip mask is set for P picture and   */
 2579|       |        /* Current P picture is a non reference P picture or there is */
 2580|       |        /* no user for reference P picture                            */
 2581|       |        /**************************************************************/
 2582|   101k|        if(i4_skip_p_pic)
  ------------------
  |  Branch (2582:12): [True: 0, False: 101k]
  ------------------
 2583|      0|        {
 2584|      0|            ps_dec->ps_cur_pic->u4_pack_slc_typ |= P_SLC_BIT;
  ------------------
  |  |  377|      0|#define P_SLC_BIT  (0x2)
  ------------------
 2585|       |            /* Don't decode the picture in SKIP-P mode if that picture is P */
 2586|       |            /* and also it is not to be used as a reference picture         */
 2587|      0|            ps_dec->u1_last_pic_not_decoded = 1;
 2588|       |
 2589|      0|            return OK;
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
 2590|      0|        }
 2591|   101k|    }
 2592|       |
 2593|   101k|    {
 2594|   101k|        UWORD16 u2_mb_x, u2_mb_y;
 2595|       |
 2596|   101k|        ps_dec->i4_submb_ofst =
 2597|   101k|            ((u2_first_mb_in_slice << ps_cur_slice->u1_mbaff_frame_flag) * SUB_BLK_SIZE) -
  ------------------
  |  |  562|   101k|#define SUB_BLK_SIZE                  ((SUB_BLK_WIDTH) * (SUB_BLK_WIDTH))
  |  |  ------------------
  |  |  |  |  560|   101k|#define SUB_BLK_WIDTH                 4
  |  |  ------------------
  |  |               #define SUB_BLK_SIZE                  ((SUB_BLK_WIDTH) * (SUB_BLK_WIDTH))
  |  |  ------------------
  |  |  |  |  560|   101k|#define SUB_BLK_WIDTH                 4
  |  |  ------------------
  ------------------
 2598|   101k|            SUB_BLK_SIZE;
  ------------------
  |  |  562|   101k|#define SUB_BLK_SIZE                  ((SUB_BLK_WIDTH) * (SUB_BLK_WIDTH))
  |  |  ------------------
  |  |  |  |  560|   101k|#define SUB_BLK_WIDTH                 4
  |  |  ------------------
  |  |               #define SUB_BLK_SIZE                  ((SUB_BLK_WIDTH) * (SUB_BLK_WIDTH))
  |  |  ------------------
  |  |  |  |  560|   101k|#define SUB_BLK_WIDTH                 4
  |  |  ------------------
  ------------------
 2599|   101k|        if(u2_first_mb_in_slice)
  ------------------
  |  Branch (2599:12): [True: 2.95k, False: 98.8k]
  ------------------
 2600|  2.95k|        {
 2601|  2.95k|            UWORD8 u1_mb_aff;
 2602|  2.95k|            UWORD8 u1_field_pic;
 2603|  2.95k|            UWORD16 u2_frm_wd_in_mbs;
 2604|  2.95k|            u2_frm_wd_in_mbs = ps_seq->u2_frm_wd_in_mbs;
 2605|  2.95k|            u1_mb_aff = ps_cur_slice->u1_mbaff_frame_flag;
 2606|  2.95k|            u1_field_pic = ps_cur_slice->u1_field_pic_flag;
 2607|       |
 2608|  2.95k|            {
 2609|  2.95k|                UWORD32 x_offset;
 2610|  2.95k|                UWORD32 y_offset;
 2611|  2.95k|                UWORD32 u4_frame_stride;
 2612|  2.95k|                tfr_ctxt_t *ps_trns_addr;
 2613|       |
 2614|  2.95k|                if(ps_dec->u1_separate_parse)
  ------------------
  |  Branch (2614:20): [True: 1.30k, False: 1.64k]
  ------------------
 2615|  1.30k|                {
 2616|  1.30k|                    ps_trns_addr = &ps_dec->s_tran_addrecon_parse;
 2617|  1.30k|                }
 2618|  1.64k|                else
 2619|  1.64k|                {
 2620|  1.64k|                    ps_trns_addr = &ps_dec->s_tran_addrecon;
 2621|  1.64k|                }
 2622|  2.95k|                u2_mb_x = MOD(u2_first_mb_in_slice, u2_frm_wd_in_mbs);
  ------------------
  |  |   64|  2.95k|#define MOD(x,y) ((x)%(y))
  ------------------
 2623|  2.95k|                u2_mb_y = DIV(u2_first_mb_in_slice, u2_frm_wd_in_mbs);
  ------------------
  |  |   65|  2.95k|#define DIV(x,y) ((x)/(y))
  ------------------
 2624|       |
 2625|  2.95k|                u2_mb_y <<= u1_mb_aff;
 2626|       |
 2627|  2.95k|                if((u2_mb_x > u2_frm_wd_in_mbs - 1) || (u2_mb_y > ps_dec->u2_frm_ht_in_mbs - 1))
  ------------------
  |  Branch (2627:20): [True: 0, False: 2.95k]
  |  Branch (2627:56): [True: 80, False: 2.87k]
  ------------------
 2628|     80|                {
 2629|     80|                    return ERROR_CORRUPTED_SLICE;
 2630|     80|                }
 2631|       |
 2632|  2.87k|                u4_frame_stride = ps_dec->u2_frm_wd_y << u1_field_pic;
 2633|  2.87k|                x_offset = u2_mb_x << 4;
 2634|  2.87k|                y_offset = (u2_mb_y * u4_frame_stride) << 4;
 2635|       |
 2636|  2.87k|                ps_trns_addr->pu1_dest_y = ps_dec->s_cur_pic.pu1_buf1 + x_offset + y_offset;
 2637|       |
 2638|  2.87k|                u4_frame_stride = ps_dec->u2_frm_wd_uv << u1_field_pic;
 2639|  2.87k|                x_offset >>= 1;
 2640|  2.87k|                y_offset = (u2_mb_y * u4_frame_stride) << 3;
 2641|       |
 2642|  2.87k|                x_offset *= YUV420SP_FACTOR;
  ------------------
  |  |  119|  2.87k|#define YUV420SP_FACTOR 2
  ------------------
 2643|       |
 2644|  2.87k|                ps_trns_addr->pu1_dest_u = ps_dec->s_cur_pic.pu1_buf2 + x_offset + y_offset;
 2645|  2.87k|                ps_trns_addr->pu1_dest_v = ps_dec->s_cur_pic.pu1_buf3 + x_offset + y_offset;
 2646|       |
 2647|  2.87k|                ps_trns_addr->pu1_mb_y = ps_trns_addr->pu1_dest_y;
 2648|  2.87k|                ps_trns_addr->pu1_mb_u = ps_trns_addr->pu1_dest_u;
 2649|  2.87k|                ps_trns_addr->pu1_mb_v = ps_trns_addr->pu1_dest_v;
 2650|       |
 2651|       |                /* assign the deblock structure pointers to start of slice */
 2652|  2.87k|                if(ps_dec->u1_separate_parse == 1)
  ------------------
  |  Branch (2652:20): [True: 1.24k, False: 1.62k]
  ------------------
 2653|  1.24k|                {
 2654|  1.24k|                    ps_dec->ps_deblk_mbn =
 2655|  1.24k|                        ps_dec->ps_deblk_pic + (u2_first_mb_in_slice << u1_mb_aff);
 2656|  1.24k|                }
 2657|  1.62k|                else
 2658|  1.62k|                {
 2659|  1.62k|                    ps_dec->ps_deblk_mbn =
 2660|  1.62k|                        ps_dec->ps_deblk_pic + (u2_first_mb_in_slice << u1_mb_aff);
 2661|  1.62k|                }
 2662|       |
 2663|  2.87k|                ps_dec->u4_cur_mb_addr = (u2_first_mb_in_slice << u1_mb_aff);
 2664|       |
 2665|  2.87k|                ps_dec->ps_mv_cur =
 2666|  2.87k|                    ps_dec->s_cur_pic.ps_mv + ((u2_first_mb_in_slice << u1_mb_aff) << 4);
 2667|  2.87k|            }
 2668|  2.87k|        }
 2669|  98.8k|        else
 2670|  98.8k|        {
 2671|  98.8k|            tfr_ctxt_t *ps_trns_addr;
 2672|       |
 2673|  98.8k|            if(ps_dec->u1_separate_parse)
  ------------------
  |  Branch (2673:16): [True: 29.5k, False: 69.3k]
  ------------------
 2674|  29.5k|            {
 2675|  29.5k|                ps_trns_addr = &ps_dec->s_tran_addrecon_parse;
 2676|  29.5k|            }
 2677|  69.3k|            else
 2678|  69.3k|            {
 2679|  69.3k|                ps_trns_addr = &ps_dec->s_tran_addrecon;
 2680|  69.3k|            }
 2681|       |
 2682|  98.8k|            u2_mb_x = 0xffff;
 2683|  98.8k|            u2_mb_y = 0;
 2684|       |            // assign the deblock structure pointers to start of slice
 2685|  98.8k|            ps_dec->u4_cur_mb_addr = 0;
 2686|  98.8k|            ps_dec->ps_deblk_mbn = ps_dec->ps_deblk_pic;
 2687|  98.8k|            ps_dec->ps_mv_cur = ps_dec->s_cur_pic.ps_mv;
 2688|  98.8k|            ps_trns_addr->pu1_dest_y = ps_dec->s_cur_pic.pu1_buf1;
 2689|  98.8k|            ps_trns_addr->pu1_dest_u = ps_dec->s_cur_pic.pu1_buf2;
 2690|  98.8k|            ps_trns_addr->pu1_dest_v = ps_dec->s_cur_pic.pu1_buf3;
 2691|       |
 2692|  98.8k|            ps_trns_addr->pu1_mb_y = ps_trns_addr->pu1_dest_y;
 2693|  98.8k|            ps_trns_addr->pu1_mb_u = ps_trns_addr->pu1_dest_u;
 2694|  98.8k|            ps_trns_addr->pu1_mb_v = ps_trns_addr->pu1_dest_v;
 2695|  98.8k|        }
 2696|       |
 2697|   101k|        ps_dec->ps_part = ps_dec->ps_parse_part_params;
 2698|       |
 2699|   101k|        ps_dec->u2_mbx = (MOD(u2_first_mb_in_slice - 1, ps_seq->u2_frm_wd_in_mbs));
  ------------------
  |  |   64|   101k|#define MOD(x,y) ((x)%(y))
  ------------------
 2700|   101k|        ps_dec->u2_mby = (DIV(u2_first_mb_in_slice - 1, ps_seq->u2_frm_wd_in_mbs));
  ------------------
  |  |   65|   101k|#define DIV(x,y) ((x)/(y))
  ------------------
 2701|   101k|        ps_dec->u2_mby <<= ps_cur_slice->u1_mbaff_frame_flag;
 2702|   101k|        ps_dec->i2_prev_slice_mbx = (WORD16) ps_dec->u2_mbx;
 2703|   101k|        ps_dec->i2_prev_slice_mby = (WORD16) ps_dec->u2_mby;
 2704|   101k|    }
 2705|       |
 2706|       |    /* RBSP stop bit is used for CABAC decoding*/
 2707|      0|    ps_bitstrm->u4_max_ofst += ps_dec->ps_cur_pps->u1_entropy_coding_mode;
 2708|       |
 2709|   101k|    ps_dec->u1_B = (u1_slice_type == B_SLICE);
  ------------------
  |  |  369|   101k|#define B_SLICE  1
  ------------------
 2710|   101k|    ps_dec->u4_next_mb_skip = 0;
 2711|       |
 2712|   101k|    ps_dec->ps_parse_cur_slice->u4_first_mb_in_slice = ps_dec->ps_cur_slice->u2_first_mb_in_slice;
 2713|   101k|    ps_dec->ps_parse_cur_slice->slice_type = ps_dec->ps_cur_slice->u1_slice_type;
 2714|       |
 2715|   101k|    ps_dec->u4_start_recon_deblk = 1;
 2716|   101k|    {
 2717|   101k|        WORD32 num_entries;
 2718|   101k|        WORD32 size;
 2719|   101k|        UWORD8 *pu1_buf;
 2720|       |
 2721|   101k|        num_entries = MAX_FRAMES;
  ------------------
  |  |  600|   101k|#define MAX_FRAMES              16
  ------------------
 2722|   101k|        if((1 >= ps_dec->ps_cur_sps->u1_num_ref_frames) && (0 == ps_dec->i4_display_delay))
  ------------------
  |  Branch (2722:12): [True: 81.5k, False: 20.1k]
  |  Branch (2722:60): [True: 0, False: 81.5k]
  ------------------
 2723|      0|        {
 2724|      0|            num_entries = 1;
 2725|      0|        }
 2726|   101k|        num_entries = ((2 * num_entries) + 1);
 2727|   101k|        num_entries *= 2;
 2728|       |
 2729|   101k|        size = num_entries * sizeof(void *);
 2730|   101k|        size += PAD_MAP_IDX_POC * sizeof(void *);
  ------------------
  |  |  100|   101k|#define PAD_MAP_IDX_POC             (1)
  ------------------
 2731|       |
 2732|   101k|        pu1_buf = (UWORD8 *) ps_dec->pv_map_ref_idx_to_poc_buf;
 2733|   101k|        pu1_buf += size * ps_dec->u2_cur_slice_num;
 2734|   101k|        ps_dec->ps_parse_cur_slice->ppv_map_ref_idx_to_poc = (void *) pu1_buf;
 2735|   101k|    }
 2736|       |
 2737|   101k|    if(ps_dec->u1_separate_parse)
  ------------------
  |  Branch (2737:8): [True: 30.7k, False: 70.9k]
  ------------------
 2738|  30.7k|    {
 2739|  30.7k|        ps_dec->ps_parse_cur_slice->pv_tu_coeff_data_start = ps_dec->pv_parse_tu_coeff_data;
 2740|  30.7k|    }
 2741|  70.9k|    else
 2742|  70.9k|    {
 2743|  70.9k|        ps_dec->pv_proc_tu_coeff_data = ps_dec->pv_parse_tu_coeff_data;
 2744|  70.9k|    }
 2745|       |
 2746|   101k|    ret = ih264d_fix_error_in_dpb(ps_dec);
 2747|   101k|    if(ret < 0) return ERROR_DBP_MANAGER_T;
  ------------------
  |  Branch (2747:8): [True: 0, False: 101k]
  ------------------
 2748|       |
 2749|   101k|    if(u1_slice_type == I_SLICE)
  ------------------
  |  |  370|   101k|#define I_SLICE  2
  ------------------
  |  Branch (2749:8): [True: 7.35k, False: 94.3k]
  ------------------
 2750|  7.35k|    {
 2751|  7.35k|        ps_dec->ps_cur_pic->u4_pack_slc_typ |= I_SLC_BIT;
  ------------------
  |  |  376|  7.35k|#define I_SLC_BIT  (0x1)
  ------------------
 2752|       |
 2753|  7.35k|        ret = isvcd_parse_islice(ps_svc_lyr_dec, u2_first_mb_in_slice);
 2754|  7.35k|        ps_dec->u1_pr_sl_type = u1_slice_type;
 2755|  7.35k|        if(ps_dec->i4_pic_type != B_SLICE && ps_dec->i4_pic_type != P_SLICE)
  ------------------
  |  |  369|  14.7k|#define B_SLICE  1
  ------------------
                      if(ps_dec->i4_pic_type != B_SLICE && ps_dec->i4_pic_type != P_SLICE)
  ------------------
  |  |  368|  7.24k|#define P_SLICE  0
  ------------------
  |  Branch (2755:12): [True: 7.24k, False: 108]
  |  Branch (2755:46): [True: 6.42k, False: 821]
  ------------------
 2756|  6.42k|            ps_dec->i4_pic_type = I_SLICE;
  ------------------
  |  |  370|  6.42k|#define I_SLICE  2
  ------------------
 2757|  7.35k|    }
 2758|  94.3k|    else if(u1_slice_type == P_SLICE)
  ------------------
  |  |  368|  94.3k|#define P_SLICE  0
  ------------------
  |  Branch (2758:13): [True: 63.4k, False: 30.8k]
  ------------------
 2759|  63.4k|    {
 2760|  63.4k|        ps_dec->ps_cur_pic->u4_pack_slc_typ |= P_SLC_BIT;
  ------------------
  |  |  377|  63.4k|#define P_SLC_BIT  (0x2)
  ------------------
 2761|  63.4k|        ret = isvcd_parse_pslice(ps_svc_lyr_dec, u2_first_mb_in_slice);
 2762|  63.4k|        ps_dec->u1_pr_sl_type = u1_slice_type;
 2763|  63.4k|        if(ps_dec->i4_pic_type != B_SLICE) ps_dec->i4_pic_type = P_SLICE;
  ------------------
  |  |  369|  63.4k|#define B_SLICE  1
  ------------------
                      if(ps_dec->i4_pic_type != B_SLICE) ps_dec->i4_pic_type = P_SLICE;
  ------------------
  |  |  368|  63.2k|#define P_SLICE  0
  ------------------
  |  Branch (2763:12): [True: 63.2k, False: 259]
  ------------------
 2764|  63.4k|    }
 2765|  30.8k|    else if(u1_slice_type == B_SLICE)
  ------------------
  |  |  369|  30.8k|#define B_SLICE  1
  ------------------
  |  Branch (2765:13): [True: 30.4k, False: 408]
  ------------------
 2766|  30.4k|    {
 2767|  30.4k|        ps_dec->ps_cur_pic->u4_pack_slc_typ |= B_SLC_BIT;
  ------------------
  |  |  378|  30.4k|#define B_SLC_BIT  (0x4)
  ------------------
 2768|  30.4k|        ret = isvcd_parse_bslice(ps_svc_lyr_dec, u2_first_mb_in_slice);
 2769|  30.4k|        ps_dec->u1_pr_sl_type = u1_slice_type;
 2770|  30.4k|        ps_dec->i4_pic_type = B_SLICE;
  ------------------
  |  |  369|  30.4k|#define B_SLICE  1
  ------------------
 2771|  30.4k|    }
 2772|    408|    else
 2773|    408|        return ERROR_INV_SLC_TYPE_T;
 2774|       |
 2775|   101k|    if(ps_dec->u1_slice_header_done)
  ------------------
  |  Branch (2775:8): [True: 79.7k, False: 21.5k]
  ------------------
 2776|  79.7k|    {
 2777|       |        /* set to zero to indicate a valid slice has been decoded */
 2778|  79.7k|        ps_dec->u1_first_slice_in_stream = 0;
 2779|  79.7k|    }
 2780|       |
 2781|   101k|    if(ret != OK) return ret;
  ------------------
  |  |  114|   101k|#define OK        0
  ------------------
  |  Branch (2781:8): [True: 47.4k, False: 53.8k]
  ------------------
 2782|       |
 2783|  53.8k|    if(u1_nal_ref_idc != 0)
  ------------------
  |  Branch (2783:8): [True: 48.1k, False: 5.63k]
  ------------------
 2784|  48.1k|    {
 2785|  48.1k|        if(!ps_dec->ps_dpb_cmds->u1_dpb_commands_read)
  ------------------
  |  Branch (2785:12): [True: 47.4k, False: 756]
  ------------------
 2786|  47.4k|        {
 2787|  47.4k|            memcpy((void *) ps_dec->ps_dpb_cmds, (void *) (&(ps_dec->s_dpb_cmds_scratch)),
 2788|  47.4k|                   sizeof(dpb_commands_t));
 2789|  47.4k|        }
 2790|  48.1k|    }
 2791|       |
 2792|       |    /* storing last Mb X and MbY of the slice */
 2793|  53.8k|    ps_dec->i2_prev_slice_mbx = ps_dec->u2_mbx;
 2794|  53.8k|    ps_dec->i2_prev_slice_mby = ps_dec->u2_mby;
 2795|       |
 2796|       |    /* End of Picture detection */
 2797|       |
 2798|  53.8k|    if(ps_dec->u4_total_mbs_coded >= (ps_seq->u4_max_mb_addr + 1))
  ------------------
  |  Branch (2798:8): [True: 25.8k, False: 27.9k]
  ------------------
 2799|  25.8k|    {
 2800|  25.8k|        ps_dec->u1_pic_decode_done = 1;
 2801|  25.8k|    }
 2802|       |
 2803|  53.8k|    {
 2804|  53.8k|        dec_err_status_t *ps_err = ps_dec->ps_dec_err_status;
 2805|  53.8k|        if((ps_err->u1_err_flag & REJECT_PB_PICS) && (ps_err->u1_cur_pic_type == PIC_TYPE_I))
  ------------------
  |  |  603|  53.8k|#define REJECT_PB_PICS    (0x02)
  ------------------
                      if((ps_err->u1_err_flag & REJECT_PB_PICS) && (ps_err->u1_cur_pic_type == PIC_TYPE_I))
  ------------------
  |  |  609|      0|#define PIC_TYPE_I        (0x00)
  ------------------
  |  Branch (2805:12): [True: 0, False: 53.8k]
  |  Branch (2805:54): [True: 0, False: 0]
  ------------------
 2806|      0|        {
 2807|      0|            ps_err->u1_err_flag = ACCEPT_ALL_PICS;
  ------------------
  |  |  601|      0|#define ACCEPT_ALL_PICS   (0x00)
  ------------------
 2808|      0|        }
 2809|  53.8k|    }
 2810|       |
 2811|  53.8k|    PRINT_BIN_BIT_RATIO(ps_dec)
 2812|       |
 2813|  53.8k|    return ret;
 2814|   101k|}

isvcd_pred_residual_recon_chroma_8x8:
   88|   160k|{
   89|   160k|    UWORD8 *pu1_pred_ptr = pu1_pred;
   90|   160k|    WORD16 *pi2_rsd_ptr = pi2_rsd;
   91|   160k|    UWORD8 *pu1_out_ptr = pu1_out;
   92|   160k|    WORD16 i, j;
   93|   160k|    WORD16 i_macro;
   94|       |
   95|  1.44M|    for(i = 0; i < 8; i++)
  ------------------
  |  Branch (95:16): [True: 1.28M, False: 160k]
  ------------------
   96|  1.28M|    {
   97|  1.28M|        pu1_pred_ptr = pu1_pred;
   98|  1.28M|        pi2_rsd_ptr = pi2_rsd;
   99|  1.28M|        pu1_out = pu1_out_ptr;
  100|       |
  101|  11.5M|        for(j = 0; j < 8; j++)
  ------------------
  |  Branch (101:20): [True: 10.2M, False: 1.28M]
  ------------------
  102|  10.2M|        {
  103|  10.2M|            i_macro = *pu1_pred_ptr + *pi2_rsd_ptr;
  104|  10.2M|            *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|  10.2M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  10.2M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 136k, False: 10.1M]
  |  |  |  |  |  Branch (77:54): [True: 2.78k, False: 10.1M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  105|  10.2M|            pu1_pred_ptr += pred_strd;
  106|  10.2M|            pi2_rsd_ptr += rsd_strd;
  107|  10.2M|            pu1_out += out_strd;
  108|  10.2M|        }
  109|       |
  110|  1.28M|        pu1_out_ptr += 2;  // Interleaved store for output
  111|  1.28M|        pu1_pred += 2;     // Interleaved load for pred buffer
  112|  1.28M|        pi2_rsd += 2;
  113|  1.28M|    }
  114|   160k|}
isvcd_pred_residual_recon_chroma_4x4:
  190|  20.3k|{
  191|  20.3k|    UWORD8 *pu1_pred_ptr = pu1_pred;
  192|  20.3k|    WORD16 *pi2_rsd_ptr = pi2_rsd;
  193|  20.3k|    UWORD8 *pu1_out_ptr = pu1_out;
  194|  20.3k|    WORD16 i, j;
  195|  20.3k|    WORD16 i_macro;
  196|       |
  197|   101k|    for(i = 0; i < 4; i++)
  ------------------
  |  Branch (197:16): [True: 81.5k, False: 20.3k]
  ------------------
  198|  81.5k|    {
  199|  81.5k|        pu1_pred_ptr = pu1_pred;
  200|  81.5k|        pi2_rsd_ptr = pi2_rsd;
  201|  81.5k|        pu1_out = pu1_out_ptr;
  202|       |
  203|   407k|        for(j = 0; j < 4; j++)
  ------------------
  |  Branch (203:20): [True: 326k, False: 81.5k]
  ------------------
  204|   326k|        {
  205|   326k|            i_macro = *pu1_pred_ptr + *pi2_rsd_ptr;
  206|   326k|            *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|   326k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   326k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 4.16k, False: 321k]
  |  |  |  |  |  Branch (77:54): [True: 236, False: 321k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  207|   326k|            pu1_pred_ptr += pred_strd;
  208|   326k|            pi2_rsd_ptr += rsd_strd;
  209|   326k|            pu1_out += out_strd;
  210|   326k|        }
  211|       |
  212|  81.5k|        pu1_out_ptr += 2;  // Interleaved store for output
  213|  81.5k|        pu1_pred += 2;     // Interleaved load for pred buffer
  214|  81.5k|        pi2_rsd += 2;
  215|  81.5k|    }
  216|  20.3k|}
isvcd_pred_residual_recon_16x16:
  242|  72.9k|{
  243|  72.9k|    WORD32 i4_nnz = 0, i4_nnz_blk[4][4] = {0};
  244|  72.9k|    UWORD8 *pu1_pred_ptr = pu1_pred;
  245|  72.9k|    WORD16 *pi2_rsd_ptr = pi2_rsd;
  246|  72.9k|    UWORD8 *pu1_out_ptr = pu1_out;
  247|  72.9k|    WORD16 i, j;
  248|  72.9k|    WORD16 i_macro;
  249|       |
  250|  1.24M|    for(i = 0; i < 16; i++)
  ------------------
  |  Branch (250:16): [True: 1.16M, False: 72.9k]
  ------------------
  251|  1.16M|    {
  252|  1.16M|        pu1_pred_ptr = pu1_pred;
  253|  1.16M|        pi2_rsd_ptr = pi2_rsd;
  254|  1.16M|        pu1_out = pu1_out_ptr;
  255|       |
  256|  19.8M|        for(j = 0; j < 16; j++)
  ------------------
  |  Branch (256:20): [True: 18.6M, False: 1.16M]
  ------------------
  257|  18.6M|        {
  258|  18.6M|            i_macro = *pi2_rsd_ptr;
  259|  18.6M|            i4_nnz_blk[j >> 2][i >> 2] |= !!i_macro;
  260|  18.6M|            i_macro += *pu1_pred_ptr;
  261|  18.6M|            *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|  18.6M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  18.6M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 71.6k, False: 18.6M]
  |  |  |  |  |  Branch (77:54): [True: 2.52k, False: 18.6M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  262|  18.6M|            pu1_pred_ptr += pred_strd;
  263|  18.6M|            pi2_rsd_ptr += rsd_strd;
  264|  18.6M|            pu1_out += out_strd;
  265|  18.6M|        }
  266|       |
  267|  1.16M|        pu1_out_ptr++;
  268|  1.16M|        pi2_rsd++;
  269|  1.16M|        pu1_pred++;
  270|  1.16M|    }
  271|       |
  272|   364k|    for(i = 0; i < 4; i++)
  ------------------
  |  Branch (272:16): [True: 291k, False: 72.9k]
  ------------------
  273|   291k|    {
  274|  1.45M|        for(j = 0; j < 4; j++)
  ------------------
  |  Branch (274:20): [True: 1.16M, False: 291k]
  ------------------
  275|  1.16M|        {
  276|  1.16M|            i4_nnz |= (i4_nnz_blk[j][i]) << (i + (j << 2));
  277|  1.16M|        }
  278|   291k|    }
  279|       |
  280|  72.9k|    return i4_nnz;
  281|  72.9k|}
isvcd_pred_residual_recon_4x4:
  307|   101k|{
  308|   101k|    WORD32 i4_nnz_blk[4][4] = {0};
  309|   101k|    UWORD8 *pu1_pred_ptr = pu1_pred;
  310|   101k|    WORD16 *pi2_rsd_ptr = pi2_rsd;
  311|   101k|    UWORD8 *pu1_out_ptr = pu1_out;
  312|   101k|    WORD16 i, j;
  313|   101k|    WORD16 i_macro;
  314|       |
  315|   506k|    for(i = 0; i < 4; i++)
  ------------------
  |  Branch (315:16): [True: 405k, False: 101k]
  ------------------
  316|   405k|    {
  317|   405k|        pu1_pred_ptr = pu1_pred;
  318|   405k|        pi2_rsd_ptr = pi2_rsd;
  319|   405k|        pu1_out = pu1_out_ptr;
  320|       |
  321|  2.02M|        for(j = 0; j < 4; j++)
  ------------------
  |  Branch (321:20): [True: 1.62M, False: 405k]
  ------------------
  322|  1.62M|        {
  323|  1.62M|            i_macro = *pi2_rsd_ptr;
  324|  1.62M|            i4_nnz_blk[j >> 2][i >> 2] |= !!i_macro;
  325|  1.62M|            i_macro += *pu1_pred_ptr;
  326|  1.62M|            *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|  1.62M|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|  1.62M|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 15.8k, False: 1.60M]
  |  |  |  |  |  Branch (77:54): [True: 1.13k, False: 1.60M]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  327|  1.62M|            pu1_pred_ptr += pred_strd;
  328|  1.62M|            pi2_rsd_ptr += rsd_strd;
  329|  1.62M|            pu1_out += out_strd;
  330|  1.62M|        }
  331|       |
  332|   405k|        pu1_out_ptr++;
  333|   405k|        pi2_rsd++;
  334|   405k|        pu1_pred++;
  335|   405k|    }
  336|       |
  337|   101k|    return i4_nnz_blk[0][0];
  338|   101k|}
isvcd_pred_residual_recon_8x8:
  364|  9.84k|{
  365|  9.84k|    WORD32 i4_nnz = 0, i4_nnz_blk[4][4] = {0};
  366|  9.84k|    UWORD8 *pu1_pred_ptr = pu1_pred;
  367|  9.84k|    WORD16 *pi2_rsd_ptr = pi2_rsd;
  368|  9.84k|    UWORD8 *pu1_out_ptr = pu1_out;
  369|  9.84k|    WORD16 i, j;
  370|  9.84k|    WORD16 i_macro;
  371|       |
  372|  88.5k|    for(i = 0; i < 8; i++)
  ------------------
  |  Branch (372:16): [True: 78.7k, False: 9.84k]
  ------------------
  373|  78.7k|    {
  374|  78.7k|        pu1_pred_ptr = pu1_pred;
  375|  78.7k|        pi2_rsd_ptr = pi2_rsd;
  376|  78.7k|        pu1_out = pu1_out_ptr;
  377|       |
  378|   708k|        for(j = 0; j < 8; j++)
  ------------------
  |  Branch (378:20): [True: 629k, False: 78.7k]
  ------------------
  379|   629k|        {
  380|   629k|            i_macro = *pi2_rsd_ptr;
  381|   629k|            i4_nnz_blk[j >> 2][i >> 2] |= !!i_macro;
  382|   629k|            i_macro += *pu1_pred_ptr;
  383|   629k|            *pu1_out = CLIP_U8(i_macro);
  ------------------
  |  |   58|   629k|#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
  |  |  ------------------
  |  |  |  |   77|   629k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (77:31): [True: 15.0k, False: 614k]
  |  |  |  |  |  Branch (77:54): [True: 1.31k, False: 613k]
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  384|   629k|            pu1_pred_ptr += pred_strd;
  385|   629k|            pi2_rsd_ptr += rsd_strd;
  386|   629k|            pu1_out += out_strd;
  387|   629k|        }
  388|       |
  389|  78.7k|        pu1_out_ptr++;
  390|  78.7k|        pi2_rsd++;
  391|  78.7k|        pu1_pred++;
  392|  78.7k|    }
  393|       |
  394|  9.84k|    i4_nnz = i4_nnz_blk[0][0] | (i4_nnz_blk[1][0] << 4);
  395|  9.84k|    i4_nnz |= (i4_nnz_blk[0][1] << 1) | (i4_nnz_blk[1][1] << 5);
  396|       |
  397|  9.84k|    return i4_nnz;
  398|  9.84k|}

isvcd_one_to_one:
   72|  78.7k|{
   73|  78.7k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
   74|  78.7k|    UWORD8 *pu1_col_zero_flag_start, u1_col_mb_pred_mode, u1_num_blks, u1_sub_mb_num;
   75|  78.7k|    UWORD8 u1_init_colzero_flag;
   76|  78.7k|    UNUSED(ps_cur_mb_info);
  ------------------
  |  |   45|  78.7k|#define UNUSED(x) ((void)(x))
  ------------------
   77|  78.7k|    pu1_col_zero_flag_start = ps_col_pic->pu1_col_zero_flag + u2_sub_mb_ofst;
   78|  78.7k|    u1_col_mb_pred_mode = pu1_col_zero_flag_start[ps_dec->u1_sub_mb_num];
   79|  78.7k|    u1_init_colzero_flag = u1_col_mb_pred_mode & 1;
   80|  78.7k|    u1_col_mb_pred_mode >>= 6;
   81|  78.7k|    ps_direct->u1_vert_mv_scale = ONE_TO_ONE;
  ------------------
  |  |   47|  78.7k|#define ONE_TO_ONE    0
  ------------------
   82|  78.7k|    ps_direct->u1_col_zeroflag_change = (ps_svc_lyr_dec->u1_base_res_flag) ? 0 : 1;
  ------------------
  |  Branch (82:41): [True: 0, False: 78.7k]
  ------------------
   83|       |
   84|  78.7k|    if(u1_wd_x == MB_SIZE)
  ------------------
  |  |  554|  78.7k|#define MB_SIZE             16
  ------------------
  |  Branch (84:8): [True: 74.5k, False: 4.22k]
  ------------------
   85|  74.5k|    {
   86|  74.5k|        ps_dec->u1_currB_type = (!!u1_col_mb_pred_mode);
   87|  74.5k|        if(u1_col_mb_pred_mode == PRED_16x16)
  ------------------
  |  |  450|  74.5k|#define PRED_16x16  0
  ------------------
  |  Branch (87:12): [True: 69.7k, False: 4.77k]
  ------------------
   88|  69.7k|        {
   89|  69.7k|            ps_direct->i1_num_partitions = 1;
   90|  69.7k|            ps_direct->i4_mv_indices[0] = u2_sub_mb_ofst;
   91|  69.7k|            ps_direct->i1_submb_num[0] = 0;
   92|  69.7k|            ps_direct->i1_partitionsize[0] = PRED_16x16;
  ------------------
  |  |  450|  69.7k|#define PRED_16x16  0
  ------------------
   93|       |
   94|  69.7k|            return;
   95|  69.7k|        }
   96|  4.77k|        else if(u1_col_mb_pred_mode < PRED_8x8)
  ------------------
  |  |  453|  4.77k|#define PRED_8x8    3
  ------------------
  |  Branch (96:17): [True: 2.36k, False: 2.41k]
  ------------------
   97|  2.36k|        {
   98|  2.36k|            ps_direct->i1_num_partitions = 2;
   99|  2.36k|            ps_direct->i4_mv_indices[0] = u2_sub_mb_ofst;
  100|  2.36k|            ps_direct->i1_submb_num[0] = 0;
  101|  2.36k|            ps_direct->i1_partitionsize[0] = u1_col_mb_pred_mode;
  102|  2.36k|            u1_sub_mb_num = (u1_col_mb_pred_mode == PRED_16x8) ? 8 : 2;
  ------------------
  |  |  451|  2.36k|#define PRED_16x8   1
  ------------------
  |  Branch (102:29): [True: 1.76k, False: 604]
  ------------------
  103|  2.36k|            ps_direct->i1_submb_num[1] = u1_sub_mb_num;
  104|  2.36k|            ps_direct->i4_mv_indices[1] = u2_sub_mb_ofst + ps_direct->i1_submb_num[1];
  105|  2.36k|            ps_direct->i1_partitionsize[1] = u1_col_mb_pred_mode;
  106|  2.36k|            if((pu1_col_zero_flag_start[u1_sub_mb_num] & 1) != u1_init_colzero_flag)
  ------------------
  |  Branch (106:16): [True: 664, False: 1.70k]
  ------------------
  107|    664|                ps_direct->u1_col_zeroflag_change = 1;
  108|  2.36k|            return;
  109|  2.36k|        }
  110|  2.41k|        else
  111|  2.41k|        {
  112|  2.41k|            u1_num_blks = 4;
  113|  2.41k|        }
  114|  74.5k|    }
  115|  4.22k|    else
  116|  4.22k|    {
  117|  4.22k|        u1_num_blks = 1;
  118|  4.22k|    }
  119|       |
  120|  6.64k|    {
  121|  6.64k|        const UWORD8 *pu1_top_lt_mb_part_idx;
  122|  6.64k|        UWORD8 u1_col_sub_mb_pred_mode, uc_blk, u1_sub_blk, u1_submb_col = 0;
  123|  6.64k|        UWORD8 u1_num_sub_blks, uc_direct8x8inf, *pu1_col_zero_flag, u1_sub_mb_num;
  124|  6.64k|        const UWORD8 *pu1_num_sub_mb_part = (const UWORD8 *) gau1_ih264d_num_submb_part;
  125|  6.64k|        UWORD8 i1_num_partitions = 0, partition_size;
  126|  6.64k|        WORD32 mv_index;
  127|  6.64k|        const UWORD8 *pu1_top_lt_sub_mb_idx = gau1_ih264d_submb_indx_mod_sp_drct;
  128|       |
  129|  6.64k|        u1_sub_mb_num = ps_dec->u1_sub_mb_num;
  130|  6.64k|        uc_direct8x8inf = ps_dec->ps_cur_slice->u1_direct_8x8_inference_flag;
  131|  6.64k|        pu1_top_lt_mb_part_idx = gau1_ih264d_top_left_mb_part_indx_mod + (PRED_8x8 << 1) + 1;
  ------------------
  |  |  453|  6.64k|#define PRED_8x8    3
  ------------------
  132|       |
  133|  20.5k|        for(uc_blk = 0; uc_blk < u1_num_blks; uc_blk++)
  ------------------
  |  Branch (133:25): [True: 13.8k, False: 6.64k]
  ------------------
  134|  13.8k|        {
  135|  13.8k|            partition_size = PRED_8x8;
  ------------------
  |  |  453|  13.8k|#define PRED_8x8    3
  ------------------
  136|  13.8k|            pu1_top_lt_sub_mb_idx = gau1_ih264d_submb_indx_mod_sp_drct;
  137|  13.8k|            if(uc_direct8x8inf == 1)
  ------------------
  |  Branch (137:16): [True: 1.99k, False: 11.8k]
  ------------------
  138|  1.99k|            {
  139|  1.99k|                u1_submb_col = u1_sub_mb_num | (u1_sub_mb_num >> 1);
  140|  1.99k|                mv_index = u2_sub_mb_ofst + u1_submb_col;
  141|  1.99k|                u1_num_sub_blks = 1;
  142|  1.99k|            }
  143|  11.8k|            else
  144|  11.8k|            {
  145|       |                /* colMbPart is either 8x8, 8x4, 4x8, 4x4 */
  146|  11.8k|                pu1_col_zero_flag = pu1_col_zero_flag_start + u1_sub_mb_num;
  147|  11.8k|                u1_col_sub_mb_pred_mode = *pu1_col_zero_flag;
  148|  11.8k|                u1_col_sub_mb_pred_mode = (u1_col_sub_mb_pred_mode & 0x30) >> 4;
  149|  11.8k|                partition_size = (UWORD8) ((u1_col_sub_mb_pred_mode) | (PRED_8x8 << 2));
  ------------------
  |  |  453|  11.8k|#define PRED_8x8    3
  ------------------
  150|  11.8k|                mv_index = u2_sub_mb_ofst + u1_sub_mb_num;
  151|  11.8k|                pu1_top_lt_sub_mb_idx += (u1_col_sub_mb_pred_mode << 1);
  152|  11.8k|                u1_num_sub_blks = pu1_num_sub_mb_part[u1_col_sub_mb_pred_mode];
  153|  11.8k|            }
  154|       |
  155|  30.0k|            for(u1_sub_blk = 0; u1_sub_blk < u1_num_sub_blks; u1_sub_blk++, pu1_top_lt_sub_mb_idx++)
  ------------------
  |  Branch (155:33): [True: 16.1k, False: 13.8k]
  ------------------
  156|  16.1k|            {
  157|  16.1k|                u1_sub_mb_num += *pu1_top_lt_sub_mb_idx;
  158|  16.1k|                mv_index += *pu1_top_lt_sub_mb_idx;
  159|  16.1k|                ps_direct->i4_mv_indices[i1_num_partitions] = mv_index;
  160|  16.1k|                ps_direct->i1_submb_num[i1_num_partitions] = u1_sub_mb_num;
  161|  16.1k|                ps_direct->i1_partitionsize[i1_num_partitions] = partition_size;
  162|  16.1k|                i1_num_partitions++;
  163|  16.1k|                if(!uc_direct8x8inf) u1_submb_col = u1_sub_mb_num;
  ------------------
  |  Branch (163:20): [True: 14.1k, False: 1.99k]
  ------------------
  164|  16.1k|                if((pu1_col_zero_flag_start[u1_submb_col] & 1) != u1_init_colzero_flag)
  ------------------
  |  Branch (164:20): [True: 1.77k, False: 14.3k]
  ------------------
  165|  1.77k|                    ps_direct->u1_col_zeroflag_change = 1;
  166|  16.1k|            }
  167|  13.8k|            u1_sub_mb_num = *pu1_top_lt_mb_part_idx++;
  168|  13.8k|        }
  169|  6.64k|        ps_direct->i1_num_partitions = i1_num_partitions;
  170|  6.64k|    }
  171|  6.64k|}
isvcd_decode_spatial_direct:
  187|  78.7k|{
  188|  78.7k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec = (svc_dec_lyr_struct_t *) ps_dec;
  189|  78.7k|    mv_pred_t s_mv_pred = {0};
  190|  78.7k|    mv_pred_t *ps_mv;
  191|  78.7k|    UWORD8 u1_col_zero_flag, u1_direct_zero_pred_flag = 0;
  192|  78.7k|    UWORD32 u4_sub_mb_num;
  193|  78.7k|    UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
  194|  78.7k|    mv_pred_t *ps_mv_ntop_start;
  195|  78.7k|    mv_pred_t *ps_mv_nmb_start = ps_dec->ps_mv_cur + (u4_mb_num << 4);
  196|  78.7k|    UWORD8 partition_size, sub_partition, u1_mb_partw, u1_mb_parth;
  197|  78.7k|    UWORD8 i;
  198|  78.7k|    WORD8 i1_pred, i1_ref_frame0, i1_ref_frame1;
  199|  78.7k|    struct pic_buffer_t *ps_ref_frame = NULL, *ps_col_pic, *ps_pic_buff0 = NULL,
  200|  78.7k|                        *ps_pic_buff1 = NULL;
  201|       |
  202|  78.7k|    UWORD8 u1_zero_pred_cond_f, u1_zero_pred_cond_b;
  203|  78.7k|    WORD16 i2_spat_pred_mv[4] = {0};
  204|  78.7k|    WORD16 *pi2_final_mv0, *pi2_final_mv1;
  205|  78.7k|    UWORD16 ui2_mask_fwd = 0, ui2_mask_bwd = 0;
  206|  78.7k|    UWORD32 *pui32_weight_ofsts = NULL;
  207|  78.7k|    directmv_t s_mvdirect = {0};
  208|  78.7k|    UWORD8 u1_colz;
  209|  78.7k|    UWORD8 u1_final_ref_idx = 0;
  210|  78.7k|    const UWORD8 *pu1_mb_parth = (const UWORD8 *) gau1_ih264d_mb_parth;
  211|  78.7k|    const UWORD8 *pu1_mb_partw = (const UWORD8 *) gau1_ih264d_mb_partw;
  212|       |
  213|  78.7k|    mv_pred_t s_temp_mv_pred = {0};
  214|  78.7k|    ps_mv_ntop_start =
  215|  78.7k|        ps_dec->ps_mv_cur + (u4_mb_num << 4) - (ps_dec->u2_frm_wd_in_mbs << (4 + u1_mbaff)) + 12;
  216|       |
  217|  78.7k|    u1_direct_zero_pred_flag =
  218|  78.7k|        ps_dec->pf_mvpred(ps_dec, ps_cur_mb_info, (ps_mv_nmb_start + ps_dec->u1_sub_mb_num),
  219|  78.7k|                          ps_mv_ntop_start + (ps_dec->u1_sub_mb_num & 0x03), &s_mv_pred,
  220|  78.7k|                          ps_dec->u1_sub_mb_num, (u1_wd_x >> 2), 0, 1, B_DIRECT_SPATIAL);
  ------------------
  |  |  489|  78.7k|#define B_DIRECT_SPATIAL  26
  ------------------
  221|       |
  222|  78.7k|    i2_spat_pred_mv[0] = s_mv_pred.i2_mv[0];
  223|  78.7k|    i2_spat_pred_mv[1] = s_mv_pred.i2_mv[1];
  224|  78.7k|    i2_spat_pred_mv[2] = s_mv_pred.i2_mv[2];
  225|  78.7k|    i2_spat_pred_mv[3] = s_mv_pred.i2_mv[3];
  226|       |
  227|  78.7k|    i1_ref_frame0 = s_mv_pred.i1_ref_frame[0];
  228|  78.7k|    i1_ref_frame1 = s_mv_pred.i1_ref_frame[1];
  229|       |
  230|  78.7k|    i1_ref_frame0 = (i1_ref_frame0 < 0) ? -1 : i1_ref_frame0;
  ------------------
  |  Branch (230:21): [True: 3.72k, False: 75.0k]
  ------------------
  231|  78.7k|    i1_ref_frame1 = (i1_ref_frame1 < 0) ? -1 : i1_ref_frame1;
  ------------------
  |  Branch (231:21): [True: 8.20k, False: 70.5k]
  ------------------
  232|       |
  233|  78.7k|    i1_pred = 0;
  234|       |
  235|  78.7k|    {
  236|  78.7k|        WORD8 u1_ref_idx, u1_ref_idx1;
  237|  78.7k|        UWORD32 uc_Idx, uc_Idx1;
  238|  78.7k|        UWORD8 u1_scale_ref =
  239|  78.7k|            (ps_dec->ps_cur_slice->u1_mbaff_frame_flag && ps_cur_mb_info->u1_mb_field_decodingflag);
  ------------------
  |  Branch (239:14): [True: 0, False: 78.7k]
  |  Branch (239:59): [True: 0, False: 0]
  ------------------
  240|  78.7k|        u1_final_ref_idx = i1_ref_frame0;
  241|  78.7k|        if(i1_ref_frame0 >= 0)
  ------------------
  |  Branch (241:12): [True: 75.0k, False: 3.72k]
  ------------------
  242|  75.0k|        {
  243|       |            /* convert RefIdx if it is MbAff */
  244|  75.0k|            u1_ref_idx = i1_ref_frame0;
  245|  75.0k|            u1_ref_idx1 = i1_ref_frame0;
  246|  75.0k|            if(u1_scale_ref)
  ------------------
  |  Branch (246:16): [True: 0, False: 75.0k]
  ------------------
  247|      0|            {
  248|      0|                u1_ref_idx1 = u1_ref_idx >> 1;
  249|      0|                if((u1_ref_idx & 0x01) != (1 - ps_cur_mb_info->u1_topmb))
  ------------------
  |  Branch (249:20): [True: 0, False: 0]
  ------------------
  250|      0|                    u1_ref_idx1 += MAX_REF_BUFS;
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
  251|      0|            }
  252|       |            /* If i1_ref_frame0 < 0 then refIdxCol is obtained from ps_pic_buff1 */
  253|  75.0k|            ps_pic_buff0 = ps_dec->ps_ref_pic_buf_lx[0][u1_ref_idx1];
  254|  75.0k|            ps_ref_frame = ps_pic_buff0;
  255|  75.0k|            i1_pred = PRED_L0;
  ------------------
  |  |  483|  75.0k|#define PRED_L0   1
  ------------------
  256|  75.0k|        }
  257|       |
  258|  78.7k|        if(i1_ref_frame1 >= 0)
  ------------------
  |  Branch (258:12): [True: 70.5k, False: 8.20k]
  ------------------
  259|  70.5k|        {
  260|       |            /* convert RefIdx if it is MbAff */
  261|  70.5k|            u1_ref_idx = i1_ref_frame1;
  262|  70.5k|            u1_ref_idx1 = i1_ref_frame1;
  263|  70.5k|            if(u1_scale_ref)
  ------------------
  |  Branch (263:16): [True: 0, False: 70.5k]
  ------------------
  264|      0|            {
  265|      0|                u1_ref_idx1 = u1_ref_idx >> 1;
  266|      0|                if((u1_ref_idx & 0x01) != (1 - ps_cur_mb_info->u1_topmb))
  ------------------
  |  Branch (266:20): [True: 0, False: 0]
  ------------------
  267|      0|                    u1_ref_idx1 += MAX_REF_BUFS;
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
  268|      0|            }
  269|  70.5k|            ps_pic_buff1 = ps_dec->ps_ref_pic_buf_lx[1][u1_ref_idx1];
  270|  70.5k|            i1_pred = i1_pred | PRED_L1;
  ------------------
  |  |  484|  70.5k|#define PRED_L1   2
  ------------------
  271|  70.5k|        }
  272|  78.7k|        if(i1_ref_frame0 < 0)
  ------------------
  |  Branch (272:12): [True: 3.72k, False: 75.0k]
  ------------------
  273|  3.72k|        {
  274|  3.72k|            ps_ref_frame = ps_pic_buff1;
  275|  3.72k|            u1_final_ref_idx = i1_ref_frame1;
  276|  3.72k|        }
  277|       |
  278|  78.7k|        u1_zero_pred_cond_f = (u1_direct_zero_pred_flag) || (i1_ref_frame0 < 0);
  ------------------
  |  Branch (278:31): [True: 4.71k, False: 74.0k]
  |  Branch (278:61): [True: 3.72k, False: 70.3k]
  ------------------
  279|  78.7k|        u1_zero_pred_cond_b = (u1_direct_zero_pred_flag) || (i1_ref_frame1 < 0);
  ------------------
  |  Branch (279:31): [True: 4.71k, False: 74.0k]
  |  Branch (279:61): [True: 8.20k, False: 65.8k]
  ------------------
  280|       |
  281|  78.7k|        if(ps_dec->ps_cur_pps->u1_wted_bipred_idc)
  ------------------
  |  Branch (281:12): [True: 45.4k, False: 33.3k]
  ------------------
  282|  45.4k|        {
  283|  45.4k|            uc_Idx = ((i1_ref_frame0 < 1) ? 0 : i1_ref_frame0) *
  ------------------
  |  Branch (283:23): [True: 44.8k, False: 628]
  ------------------
  284|  45.4k|                     ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1];
  285|  45.4k|            if(u1_scale_ref) uc_Idx >>= 1;
  ------------------
  |  Branch (285:16): [True: 0, False: 45.4k]
  ------------------
  286|  45.4k|            uc_Idx1 = (i1_ref_frame1 < 0) ? 0 : i1_ref_frame1;
  ------------------
  |  Branch (286:23): [True: 4.76k, False: 40.7k]
  ------------------
  287|  45.4k|            uc_Idx += (u1_scale_ref) ? (uc_Idx1 >> 1) : uc_Idx1;
  ------------------
  |  Branch (287:23): [True: 0, False: 45.4k]
  ------------------
  288|  45.4k|            pui32_weight_ofsts = (UWORD32 *) &ps_dec->pu4_wt_ofsts[2 * X3(uc_Idx)];
  ------------------
  |  |   92|  45.4k|#define X3(a)   (((a) << 1) + (a))
  ------------------
  289|       |
  290|  45.4k|            if(i1_ref_frame0 < 0) pui32_weight_ofsts += 1;
  ------------------
  |  Branch (290:16): [True: 2.46k, False: 43.0k]
  ------------------
  291|       |
  292|  45.4k|            if(u1_scale_ref && (ps_dec->ps_cur_pps->u1_wted_bipred_idc == 2))
  ------------------
  |  Branch (292:16): [True: 0, False: 45.4k]
  |  Branch (292:32): [True: 0, False: 0]
  ------------------
  293|      0|            {
  294|      0|                WORD16 i2_ref_idx;
  295|      0|                i2_ref_idx = MAX(i1_ref_frame0, 0);
  ------------------
  |  |   60|      0|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  296|      0|                i2_ref_idx *= (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1] << 1);
  297|      0|                i2_ref_idx += MAX(i1_ref_frame1, 0);
  ------------------
  |  |   60|      0|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  298|      0|                if(!ps_cur_mb_info->u1_topmb)
  ------------------
  |  Branch (298:20): [True: 0, False: 0]
  ------------------
  299|      0|                    i2_ref_idx += (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0] << 1) *
  300|      0|                                  (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1] << 1);
  301|      0|                pui32_weight_ofsts = (UWORD32 *) &ps_dec->pu4_mbaff_wt_mat[2 * X3(i2_ref_idx)];
  ------------------
  |  |   92|      0|#define X3(a)   (((a) << 1) + (a))
  ------------------
  302|      0|            }
  303|  45.4k|        }
  304|  78.7k|    }
  305|       |
  306|  78.7k|    s_temp_mv_pred.i1_ref_frame[0] = i1_ref_frame0;
  307|  78.7k|    s_temp_mv_pred.i1_ref_frame[1] = i1_ref_frame1;
  308|  78.7k|    s_temp_mv_pred.u1_col_ref_pic_idx = ps_ref_frame->u1_mv_buf_id;
  309|  78.7k|    s_temp_mv_pred.u1_pic_type = ps_ref_frame->u1_pic_type;
  310|       |
  311|       |    /**********************************************************************/
  312|       |    /* Call the function which gets the number of partitions and          */
  313|       |    /* partition info of colocated Mb                                     */
  314|       |    /**********************************************************************/
  315|       |
  316|  78.7k|    isvcd_one_to_one(ps_svc_lyr_dec, ps_dec->ps_col_pic, &s_mvdirect, u1_wd_x,
  317|  78.7k|                     ps_dec->i4_submb_ofst, ps_cur_mb_info);
  318|       |
  319|  78.7k|    ps_col_pic = ps_dec->ps_col_pic;
  320|  78.7k|    if((s_mvdirect.u1_col_zeroflag_change == 0) || u1_direct_zero_pred_flag)
  ------------------
  |  Branch (320:8): [True: 0, False: 78.7k]
  |  Branch (320:52): [True: 4.71k, False: 74.0k]
  ------------------
  321|  4.71k|    {
  322|  4.71k|        WORD16 i2_mv_x, i2_mv_y, i2_mvX1, i2_mvY1;
  323|       |        /* Most probable case */
  324|  4.71k|        u1_col_zero_flag = *(ps_col_pic->pu1_col_zero_flag + s_mvdirect.i4_mv_indices[0]);
  325|  4.71k|        u1_col_zero_flag = u1_col_zero_flag & 0x01;
  326|       |
  327|  4.71k|        if(u1_zero_pred_cond_f || ((i1_ref_frame0 == 0) && (u1_col_zero_flag == 1)))
  ------------------
  |  Branch (327:12): [True: 4.71k, False: 0]
  |  Branch (327:36): [True: 0, False: 0]
  |  Branch (327:60): [True: 0, False: 0]
  ------------------
  328|  4.71k|        {
  329|  4.71k|            i2_mv_x = 0;
  330|  4.71k|            i2_mv_y = 0;
  331|  4.71k|        }
  332|      0|        else
  333|      0|        {
  334|      0|            i2_mv_x = i2_spat_pred_mv[0];
  335|      0|            i2_mv_y = i2_spat_pred_mv[1];
  336|      0|        }
  337|       |
  338|  4.71k|        if(u1_zero_pred_cond_b || ((i1_ref_frame1 == 0) && (u1_col_zero_flag == 1)))
  ------------------
  |  Branch (338:12): [True: 4.71k, False: 0]
  |  Branch (338:36): [True: 0, False: 0]
  |  Branch (338:60): [True: 0, False: 0]
  ------------------
  339|  4.71k|        {
  340|  4.71k|            i2_mvX1 = 0;
  341|  4.71k|            i2_mvY1 = 0;
  342|  4.71k|        }
  343|      0|        else
  344|      0|        {
  345|      0|            i2_mvX1 = i2_spat_pred_mv[2];
  346|      0|            i2_mvY1 = i2_spat_pred_mv[3];
  347|      0|        }
  348|       |
  349|  4.71k|        u4_sub_mb_num = ps_dec->u1_sub_mb_num;
  350|  4.71k|        u1_mb_partw = (u1_wd_x >> 2);
  351|       |
  352|  4.71k|        if(i1_ref_frame0 >= 0)
  ------------------
  |  Branch (352:12): [True: 4.71k, False: 0]
  ------------------
  353|  4.71k|        {
  354|  4.71k|            {
  355|  4.71k|                pred_info_pkd_t *ps_pred_pkd;
  356|  4.71k|                WORD16 i2_mv[2];
  357|  4.71k|                WORD8 i1_ref_idx = 0;
  358|       |
  359|  4.71k|                i2_mv[0] = i2_mv_x;
  360|  4.71k|                i2_mv[1] = i2_mv_y;
  361|       |
  362|  4.71k|                ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
  363|  4.71k|                ih264d_fill_pred_info(i2_mv, u1_mb_partw, u1_mb_partw, u4_sub_mb_num, i1_pred,
  364|  4.71k|                                      ps_pred_pkd, ps_pic_buff0->u1_pic_buf_id, i1_ref_idx,
  365|  4.71k|                                      pui32_weight_ofsts, ps_pic_buff0->u1_pic_type);
  366|  4.71k|                ps_dec->u4_pred_info_pkd_idx++;
  367|  4.71k|                ps_cur_mb_info->u1_num_pred_parts++;
  368|  4.71k|            }
  369|  4.71k|        }
  370|       |
  371|  4.71k|        if(i1_ref_frame1 >= 0)
  ------------------
  |  Branch (371:12): [True: 4.71k, False: 0]
  ------------------
  372|  4.71k|        {
  373|  4.71k|            {
  374|  4.71k|                pred_info_pkd_t *ps_pred_pkd;
  375|  4.71k|                WORD16 i2_mv[2];
  376|  4.71k|                WORD8 i1_ref_idx = 0;
  377|       |
  378|  4.71k|                i2_mv[0] = i2_mvX1;
  379|  4.71k|                i2_mv[1] = i2_mvY1;
  380|       |
  381|  4.71k|                ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
  382|  4.71k|                ih264d_fill_pred_info(i2_mv, u1_mb_partw, u1_mb_partw, u4_sub_mb_num, i1_pred,
  383|  4.71k|                                      ps_pred_pkd, ps_pic_buff1->u1_pic_buf_id, i1_ref_idx,
  384|  4.71k|                                      pui32_weight_ofsts, ps_pic_buff1->u1_pic_type);
  385|  4.71k|                ps_dec->u4_pred_info_pkd_idx++;
  386|  4.71k|                ps_cur_mb_info->u1_num_pred_parts++;
  387|  4.71k|            }
  388|  4.71k|        }
  389|       |
  390|       |        /* Replication optimisation */
  391|  4.71k|        s_temp_mv_pred.i2_mv[0] = i2_mv_x;
  392|  4.71k|        s_temp_mv_pred.i2_mv[1] = i2_mv_y;
  393|  4.71k|        s_temp_mv_pred.i2_mv[2] = i2_mvX1;
  394|  4.71k|        s_temp_mv_pred.i2_mv[3] = i2_mvY1;
  395|       |
  396|       |        /* Calculating colocated zero information */
  397|  4.71k|        {
  398|       |            /*************************************/
  399|       |            /* If(bit2 and bit3 set)             */
  400|       |            /* then                              */
  401|       |            /*  (bit0 and bit1) => submmbmode    */
  402|       |            /*  (bit2 and bit3) => mbmode        */
  403|       |            /* else                              */
  404|       |            /*  (bit0 and bit1) => mbmode        */
  405|       |            /*************************************/
  406|       |            /*UWORD8 u1_packed_mb_sub_mb_mode = sub_partition ?
  407|       |             (s_mvdirect.i1_partitionsize[0]) : ((s_mvdirect.i1_partitionsize[0]) <<
  408|       |             2);*/
  409|  4.71k|            UWORD8 u1_packed_mb_sub_mb_mode = (u1_mb_partw == 2) ? 0x03 : 0;
  ------------------
  |  Branch (409:47): [True: 184, False: 4.53k]
  ------------------
  410|       |
  411|  4.71k|            if(i1_ref_frame0 < 0)
  ------------------
  |  Branch (411:16): [True: 0, False: 4.71k]
  ------------------
  412|      0|            {
  413|      0|                i2_mv_x = i2_mvX1;
  414|      0|                i2_mv_y = i2_mvY1;
  415|      0|            }
  416|       |
  417|       |            /* Change from left shift 4 to 6 - Varun */
  418|  4.71k|            u1_colz = (ps_cur_mb_info->u1_mb_field_decodingflag << 1) |
  419|  4.71k|                      ((u1_final_ref_idx == 0) && (ABS(i2_mv_x) <= 1) && (ABS(i2_mv_y) <= 1));
  ------------------
  |  |  100|  4.71k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 0, False: 4.71k]
  |  |  ------------------
  ------------------
                                    ((u1_final_ref_idx == 0) && (ABS(i2_mv_x) <= 1) && (ABS(i2_mv_y) <= 1));
  ------------------
  |  |  100|  4.71k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 0, False: 4.71k]
  |  |  ------------------
  ------------------
  |  Branch (419:24): [True: 4.71k, False: 0]
  |  Branch (419:51): [True: 4.71k, False: 0]
  |  Branch (419:74): [True: 4.71k, False: 0]
  ------------------
  420|  4.71k|            u1_colz |= (u1_packed_mb_sub_mb_mode << 6);
  421|  4.71k|        }
  422|  4.71k|        ps_mv = ps_mv_nmb_start + u4_sub_mb_num;
  423|  4.71k|        if(ps_mv)
  ------------------
  |  Branch (423:12): [True: 4.71k, False: 0]
  ------------------
  424|  4.71k|        {
  425|  4.71k|            ih264d_rep_mv_colz(ps_dec, &s_temp_mv_pred, ps_mv, u4_sub_mb_num, u1_colz, u1_mb_partw,
  426|  4.71k|                               u1_mb_partw);
  427|  4.71k|        }
  428|      0|        else
  429|      0|        {
  430|      0|            return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  431|      0|        }
  432|       |
  433|  4.71k|        if(u1_wd_x == MB_SIZE) ps_dec->u1_currB_type = 0;
  ------------------
  |  |  554|  4.71k|#define MB_SIZE             16
  ------------------
  |  Branch (433:12): [True: 4.53k, False: 184]
  ------------------
  434|       |
  435|  4.71k|        return OK;
  ------------------
  |  |  114|  4.71k|#define OK        0
  ------------------
  436|  4.71k|    }
  437|       |
  438|       |    /***************************************************************************/
  439|       |    /* If present MB is 16x16 and the partition of colocated Mb is >= PRED_8x8 */
  440|       |    /* i.e 8x8 or less than 8x8 partitions then set up DMA for (0,0) and       */
  441|       |    /* spatially predicted motion vector and do the multiplexing after         */
  442|       |    /* motion compensation                                                     */
  443|       |    /***************************************************************************/
  444|       |
  445|  74.0k|    if((u1_wd_x == MB_SIZE) && (s_mvdirect.i1_num_partitions > 2))
  ------------------
  |  |  554|  74.0k|#define MB_SIZE             16
  ------------------
  |  Branch (445:8): [True: 70.0k, False: 4.04k]
  |  Branch (445:32): [True: 2.36k, False: 67.6k]
  ------------------
  446|  2.36k|    {
  447|  2.36k|        ps_cur_mb_info->u1_Mux = 1;
  448|  2.36k|        if(i1_ref_frame0 >= 0)
  ------------------
  |  Branch (448:12): [True: 2.15k, False: 207]
  ------------------
  449|  2.15k|        {
  450|  2.15k|            {
  451|  2.15k|                pred_info_pkd_t *ps_pred_pkd;
  452|  2.15k|                WORD8 i1_ref_idx = 0;
  453|       |
  454|  2.15k|                ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
  455|  2.15k|                ih264d_fill_pred_info(&(i2_spat_pred_mv[0]), 4, 4, 0, i1_pred, ps_pred_pkd,
  456|  2.15k|                                      ps_pic_buff0->u1_pic_buf_id, i1_ref_idx, pui32_weight_ofsts,
  457|  2.15k|                                      ps_pic_buff0->u1_pic_type);
  458|  2.15k|                ps_dec->u4_pred_info_pkd_idx++;
  459|  2.15k|                ps_cur_mb_info->u1_num_pred_parts++;
  460|  2.15k|            }
  461|       |
  462|       |            /******    (0,0) Motion vectors DMA     *****/
  463|  2.15k|            {
  464|  2.15k|                pred_info_pkd_t *ps_pred_pkd;
  465|  2.15k|                WORD16 i2_mv[2];
  466|  2.15k|                WORD8 i1_ref_idx = 0;
  467|       |
  468|  2.15k|                i2_mv[0] = 0;
  469|  2.15k|                i2_mv[1] = 0;
  470|       |
  471|  2.15k|                ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
  472|  2.15k|                ih264d_fill_pred_info(i2_mv, 4, 4, 0, i1_pred, ps_pred_pkd,
  473|  2.15k|                                      ps_pic_buff0->u1_pic_buf_id, i1_ref_idx, pui32_weight_ofsts,
  474|  2.15k|                                      ps_pic_buff0->u1_pic_type);
  475|  2.15k|                ps_dec->u4_pred_info_pkd_idx++;
  476|  2.15k|                ps_cur_mb_info->u1_num_pred_parts++;
  477|  2.15k|            }
  478|  2.15k|        }
  479|  2.36k|        if(i1_ref_frame1 >= 0)
  ------------------
  |  Branch (479:12): [True: 1.82k, False: 534]
  ------------------
  480|  1.82k|        {
  481|  1.82k|            {
  482|  1.82k|                pred_info_pkd_t *ps_pred_pkd;
  483|  1.82k|                WORD8 i1_ref_idx = 0;
  484|       |
  485|  1.82k|                ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
  486|  1.82k|                ih264d_fill_pred_info(&(i2_spat_pred_mv[2]), 4, 4, 0, i1_pred, ps_pred_pkd,
  487|  1.82k|                                      ps_pic_buff1->u1_pic_buf_id, i1_ref_idx, pui32_weight_ofsts,
  488|  1.82k|                                      ps_pic_buff1->u1_pic_type);
  489|  1.82k|                ps_dec->u4_pred_info_pkd_idx++;
  490|  1.82k|                ps_cur_mb_info->u1_num_pred_parts++;
  491|  1.82k|            }
  492|       |
  493|       |            /******    (0,0) Motion vectors DMA     *****/
  494|  1.82k|            {
  495|  1.82k|                pred_info_pkd_t *ps_pred_pkd;
  496|  1.82k|                WORD16 i2_mv[2];
  497|  1.82k|                WORD8 i1_ref_idx = 0;
  498|       |
  499|  1.82k|                i2_mv[0] = 0;
  500|  1.82k|                i2_mv[1] = 0;
  501|       |
  502|  1.82k|                ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
  503|  1.82k|                ih264d_fill_pred_info(i2_mv, 4, 4, 0, i1_pred, ps_pred_pkd,
  504|  1.82k|                                      ps_pic_buff1->u1_pic_buf_id, i1_ref_idx, pui32_weight_ofsts,
  505|  1.82k|                                      ps_pic_buff1->u1_pic_type);
  506|  1.82k|                ps_dec->u4_pred_info_pkd_idx++;
  507|  1.82k|                ps_cur_mb_info->u1_num_pred_parts++;
  508|  1.82k|            }
  509|  1.82k|        }
  510|  2.36k|    }
  511|       |
  512|   159k|    for(i = 0; i < s_mvdirect.i1_num_partitions; i++)
  ------------------
  |  Branch (512:16): [True: 85.6k, False: 74.0k]
  ------------------
  513|  85.6k|    {
  514|  85.6k|        partition_size = s_mvdirect.i1_partitionsize[i];
  515|  85.6k|        u4_sub_mb_num = s_mvdirect.i1_submb_num[i];
  516|       |
  517|  85.6k|        sub_partition = partition_size >> 2;
  518|  85.6k|        partition_size &= 0x3;
  519|  85.6k|        u1_mb_partw = pu1_mb_partw[partition_size];
  520|  85.6k|        u1_mb_parth = pu1_mb_parth[partition_size];
  521|  85.6k|        if(sub_partition != 0)
  ------------------
  |  Branch (521:12): [True: 13.6k, False: 71.9k]
  ------------------
  522|  13.6k|        {
  523|  13.6k|            u1_mb_partw >>= 1;
  524|  13.6k|            u1_mb_parth >>= 1;
  525|  13.6k|        }
  526|       |
  527|  85.6k|        u1_col_zero_flag = *(ps_col_pic->pu1_col_zero_flag + s_mvdirect.i4_mv_indices[i]);
  528|  85.6k|        u1_col_zero_flag = u1_col_zero_flag & 0x01;
  529|       |
  530|       |        /*if(u1_col != u1_col_zero_flag)
  531|       |         u1_init = 1;*/
  532|       |
  533|  85.6k|        pi2_final_mv0 = &i2_spat_pred_mv[0];
  534|  85.6k|        pi2_final_mv1 = &i2_spat_pred_mv[2];
  535|       |
  536|  85.6k|        if(ps_cur_mb_info->u1_Mux != 1)
  ------------------
  |  Branch (536:12): [True: 74.0k, False: 11.6k]
  ------------------
  537|  74.0k|        {
  538|  74.0k|            if(i1_ref_frame0 >= 0)
  ------------------
  |  Branch (538:16): [True: 70.4k, False: 3.64k]
  ------------------
  539|  70.4k|            {
  540|  70.4k|                {
  541|  70.4k|                    pred_info_pkd_t *ps_pred_pkd;
  542|  70.4k|                    WORD8 i1_ref_idx = 0;
  543|       |
  544|  70.4k|                    ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
  545|  70.4k|                    ih264d_fill_pred_info(pi2_final_mv0, u1_mb_partw, u1_mb_parth, u4_sub_mb_num,
  546|  70.4k|                                          i1_pred, ps_pred_pkd, ps_pic_buff0->u1_pic_buf_id,
  547|  70.4k|                                          i1_ref_idx, pui32_weight_ofsts,
  548|  70.4k|                                          ps_pic_buff0->u1_pic_type);
  549|  70.4k|                    ps_dec->u4_pred_info_pkd_idx++;
  550|  70.4k|                    ps_cur_mb_info->u1_num_pred_parts++;
  551|  70.4k|                }
  552|  70.4k|            }
  553|       |
  554|  74.0k|            if(i1_ref_frame1 >= 0)
  ------------------
  |  Branch (554:16): [True: 65.2k, False: 8.85k]
  ------------------
  555|  65.2k|            {
  556|  65.2k|                pred_info_pkd_t *ps_pred_pkd;
  557|  65.2k|                WORD8 i1_ref_idx = 0;
  558|       |
  559|  65.2k|                ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
  560|  65.2k|                ih264d_fill_pred_info(pi2_final_mv1, u1_mb_partw, u1_mb_parth, u4_sub_mb_num,
  561|  65.2k|                                      i1_pred, ps_pred_pkd, ps_pic_buff1->u1_pic_buf_id, i1_ref_idx,
  562|  65.2k|                                      pui32_weight_ofsts, ps_pic_buff1->u1_pic_type);
  563|  65.2k|                ps_dec->u4_pred_info_pkd_idx++;
  564|  65.2k|                ps_cur_mb_info->u1_num_pred_parts++;
  565|  65.2k|            }
  566|  74.0k|        }
  567|       |
  568|       |        /* Replication optimisation */
  569|  85.6k|        s_temp_mv_pred.i2_mv[0] = pi2_final_mv0[0];
  570|  85.6k|        s_temp_mv_pred.i2_mv[1] = pi2_final_mv0[1];
  571|  85.6k|        s_temp_mv_pred.i2_mv[2] = pi2_final_mv1[0];
  572|  85.6k|        s_temp_mv_pred.i2_mv[3] = pi2_final_mv1[1];
  573|       |
  574|       |        /* Calculating colocated zero information */
  575|  85.6k|        {
  576|  85.6k|            WORD16 i2_mv_x = 0, i2_mv_y = 0;
  577|       |            /*************************************/
  578|       |            /* If(bit2 and bit3 set)             */
  579|       |            /* then                              */
  580|       |            /*  (bit0 and bit1) => submmbmode    */
  581|       |            /*  (bit2 and bit3) => mbmode        */
  582|       |            /* else                              */
  583|       |            /*  (bit0 and bit1) => mbmode        */
  584|       |            /*************************************/
  585|  85.6k|            UWORD8 u1_packed_mb_sub_mb_mode = sub_partition
  ------------------
  |  Branch (585:47): [True: 13.6k, False: 71.9k]
  ------------------
  586|  85.6k|                                                  ? (s_mvdirect.i1_partitionsize[i])
  587|  85.6k|                                                  : ((s_mvdirect.i1_partitionsize[i]) << 2);
  588|       |
  589|  85.6k|            if(i1_ref_frame0 >= 0)
  ------------------
  |  Branch (589:16): [True: 81.0k, False: 4.67k]
  ------------------
  590|  81.0k|            {
  591|  81.0k|                i2_mv_x = pi2_final_mv0[0];
  592|  81.0k|                i2_mv_y = pi2_final_mv0[1];
  593|  81.0k|            }
  594|  4.67k|            else
  595|  4.67k|            {
  596|  4.67k|                i2_mv_x = pi2_final_mv1[0];
  597|  4.67k|                i2_mv_y = pi2_final_mv1[1];
  598|  4.67k|            }
  599|       |
  600|  85.6k|            u1_colz = (ps_cur_mb_info->u1_mb_field_decodingflag << 1) |
  601|  85.6k|                      ((u1_final_ref_idx == 0) && (ABS(i2_mv_x) <= 1) && (ABS(i2_mv_y) <= 1));
  ------------------
  |  |  100|  84.0k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 5.14k, False: 78.9k]
  |  |  ------------------
  ------------------
                                    ((u1_final_ref_idx == 0) && (ABS(i2_mv_x) <= 1) && (ABS(i2_mv_y) <= 1));
  ------------------
  |  |  100|  80.4k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 1.96k, False: 78.5k]
  |  |  ------------------
  ------------------
  |  Branch (601:24): [True: 84.0k, False: 1.58k]
  |  Branch (601:51): [True: 80.4k, False: 3.62k]
  |  Branch (601:74): [True: 76.1k, False: 4.32k]
  ------------------
  602|  85.6k|            u1_colz |= (u1_packed_mb_sub_mb_mode << 4);
  603|  85.6k|        }
  604|  85.6k|        ps_mv = ps_mv_nmb_start + u4_sub_mb_num;
  605|  85.6k|        if(ps_mv)
  ------------------
  |  Branch (605:12): [True: 85.6k, False: 0]
  ------------------
  606|  85.6k|        {
  607|  85.6k|            ih264d_rep_mv_colz(ps_dec, &s_temp_mv_pred, ps_mv, u4_sub_mb_num, u1_colz, u1_mb_parth,
  608|  85.6k|                               u1_mb_partw);
  609|  85.6k|        }
  610|      0|        else
  611|      0|        {
  612|      0|            return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  613|      0|        }
  614|  85.6k|    }
  615|  74.0k|    i = 0;
  616|  74.0k|    if(i1_ref_frame0 >= 0) ps_cur_mb_info->u2_mask[i++] = ui2_mask_fwd;
  ------------------
  |  Branch (616:8): [True: 70.3k, False: 3.72k]
  ------------------
  617|  74.0k|    if(i1_ref_frame1 >= 0) ps_cur_mb_info->u2_mask[i] = ui2_mask_bwd;
  ------------------
  |  Branch (617:8): [True: 65.8k, False: 8.20k]
  ------------------
  618|       |
  619|  74.0k|    return OK;
  ------------------
  |  |  114|  74.0k|#define OK        0
  ------------------
  620|  74.0k|}

isvcd_retrive_infer_mode_mv:
   80|   291k|{
   81|   291k|    mode_motion_ctxt_t *ps_ctxt;
   82|   291k|    mv_pred_t *ps_motion_pred;
   83|   291k|    UWORD8 u1_tmp_lx = (u1_lx << 1);
   84|       |
   85|   291k|    ps_ctxt = (mode_motion_ctxt_t *) ps_svc_lyr_dec->pv_mode_mv_sample_ctxt;
   86|   291k|    ps_motion_pred = ps_ctxt->ps_motion_pred_struct;
   87|   291k|    ps_motion_pred += u1_sub_mb_num;
   88|   291k|    ps_mvpred->i2_mv[u1_tmp_lx] = ps_motion_pred->i2_mv[u1_tmp_lx];
   89|   291k|    ps_mvpred->i2_mv[u1_tmp_lx + 1] = ps_motion_pred->i2_mv[u1_tmp_lx + 1];
   90|       |
   91|   291k|    return;
   92|   291k|}
isvcd_interlyr_motion_mode_pred:
  109|  6.66M|{
  110|  6.66M|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
  111|  6.66M|    WORD32 i4_inter_layer_pred_req_flag;
  112|  6.66M|    WORD32 i4_listx;
  113|  6.66M|    WORD32 i4_mb_mode = -1;
  114|  6.66M|    i4_inter_layer_pred_req_flag = SVCD_FALSE;
  ------------------
  |  |   45|  6.66M|#define SVCD_FALSE 0
  ------------------
  115|  6.66M|    i4_listx = (ps_dec->ps_cur_slice->u1_slice_type == B_SLICE) ? 2 : 1;
  ------------------
  |  |  369|  6.66M|#define B_SLICE  1
  ------------------
  |  Branch (115:16): [True: 139k, False: 6.52M]
  ------------------
  116|       |    /* check Base mode flag and motion predcition flags */
  117|  6.66M|    if(1 == ps_svc_cur_mb_info->u1_base_mode_flag)
  ------------------
  |  Branch (117:8): [True: 171k, False: 6.48M]
  ------------------
  118|   171k|    {
  119|   171k|        i4_inter_layer_pred_req_flag = SVCD_TRUE;
  ------------------
  |  |   46|   171k|#define SVCD_TRUE 1
  ------------------
  120|   171k|    }
  121|  6.48M|    else
  122|  6.48M|    {
  123|  6.48M|        UWORD8 u1_mot_pred_flag;
  124|       |
  125|       |        /* get the packed the motion pred flag of list 0 */
  126|  6.48M|        u1_mot_pred_flag = ps_svc_cur_mb_info->au1_motion_pred_flag[0];
  127|       |
  128|       |        /* extract the last 4 bits */
  129|  6.48M|        u1_mot_pred_flag &= 0x0F;
  130|       |
  131|  6.48M|        if(0 != u1_mot_pred_flag)
  ------------------
  |  Branch (131:12): [True: 22.8k, False: 6.46M]
  ------------------
  132|  22.8k|        {
  133|  22.8k|            i4_inter_layer_pred_req_flag = SVCD_TRUE;
  ------------------
  |  |   46|  22.8k|#define SVCD_TRUE 1
  ------------------
  134|  22.8k|        }
  135|       |
  136|       |        /* check for list 1 flags if required */
  137|  6.48M|        if((2 == i4_listx) && (SVCD_FALSE == i4_inter_layer_pred_req_flag))
  ------------------
  |  |   45|   100k|#define SVCD_FALSE 0
  ------------------
  |  Branch (137:12): [True: 100k, False: 6.38M]
  |  Branch (137:31): [True: 91.2k, False: 9.25k]
  ------------------
  138|  91.2k|        {
  139|       |            /* get the packed the motion pred flag of list 1 */
  140|  91.2k|            u1_mot_pred_flag = ps_svc_cur_mb_info->au1_motion_pred_flag[1];
  141|       |
  142|       |            /* extract the last 4 bits */
  143|  91.2k|            u1_mot_pred_flag &= 0x0F;
  144|       |
  145|  91.2k|            if(0 != u1_mot_pred_flag)
  ------------------
  |  Branch (145:16): [True: 4.67k, False: 86.5k]
  ------------------
  146|  4.67k|            {
  147|  4.67k|                i4_inter_layer_pred_req_flag = SVCD_TRUE;
  ------------------
  |  |   46|  4.67k|#define SVCD_TRUE 1
  ------------------
  148|  4.67k|            }
  149|  91.2k|        }
  150|  6.48M|    }
  151|       |
  152|  6.66M|    if(SVCD_TRUE == i4_inter_layer_pred_req_flag)
  ------------------
  |  |   46|  6.66M|#define SVCD_TRUE 1
  ------------------
  |  Branch (152:8): [True: 198k, False: 6.46M]
  ------------------
  153|   198k|    {
  154|   198k|        mode_motion_ctxt_t *ps_ctxt;
  155|   198k|        mode_motion_lyr_ctxt *ps_lyr_mem;
  156|       |
  157|   198k|        ps_ctxt = (mode_motion_ctxt_t *) ps_svc_lyr_dec->pv_mode_mv_sample_ctxt;
  158|       |        /* get the current layer ctxt */
  159|   198k|        ps_lyr_mem = &ps_ctxt->as_res_lyr_mem[ps_ctxt->i4_res_id];
  160|       |
  161|   198k|        {
  162|   198k|            ps_ctxt->i4_listx = i4_listx;
  163|       |
  164|   198k|            i4_mb_mode =
  165|   198k|                ps_lyr_mem->pf_inter_lyr_pred(ps_svc_lyr_dec->pv_mode_mv_sample_ctxt, ps_cur_mb_info,
  166|   198k|                                          ps_svc_cur_mb_info, ps_dec, ps_mb_part_info, ps_part);
  167|   198k|        }
  168|   198k|    }
  169|  6.66M|    return i4_mb_mode;
  170|  6.66M|}
isvcd_mv_pred_ref_tfr_nby2_epmb:
  182|  1.21M|{
  183|  1.21M|    svc_dec_lyr_struct_t *ps_svc_lyr_dec = (svc_dec_lyr_struct_t *) ps_dec;
  184|  1.21M|    parse_pmbarams_t *ps_mb_part_info;
  185|  1.21M|    parse_part_params_t *ps_part;
  186|  1.21M|    mv_pred_t *ps_mv_nmb, *ps_mv_nmb_start, *ps_mv_ntop, *ps_mv_ntop_start;
  187|  1.21M|    UWORD32 i, j;
  188|  1.21M|    const UWORD32 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
  189|  1.21M|    dec_mb_info_t *ps_cur_mb_info;
  190|  1.21M|    dec_svc_mb_info_t *ps_svc_cur_mb_info;
  191|  1.21M|    WORD32 i2_mv_x, i2_mv_y;
  192|       |
  193|  1.21M|    ps_dec->i4_submb_ofst -= (u4_num_mbs - u4_mb_idx) << 4;
  194|  1.21M|    ps_mb_part_info = ps_dec->ps_parse_mb_data;
  195|  1.21M|    ps_part = ps_dec->ps_parse_part_params;
  196|       |
  197|       |    /* N/2 Mb MvPred and Transfer Setup Loop */
  198|  7.73M|    for(i = u4_mb_idx; i < u4_num_mbs; i++, ps_mb_part_info++)
  ------------------
  |  Branch (198:24): [True: 6.52M, False: 1.21M]
  ------------------
  199|  6.52M|    {
  200|  6.52M|        UWORD32 u1_colz;
  201|  6.52M|        UWORD32 u1_field;
  202|  6.52M|        mv_pred_t s_mvPred = {0};
  203|  6.52M|        mv_pred_t *ps_mv_pred = &s_mvPred;
  204|       |
  205|  6.52M|        *ps_mv_pred = ps_dec->s_default_mv_pred;
  206|       |
  207|  6.52M|        ps_dec->i4_submb_ofst += SUB_BLK_SIZE;
  ------------------
  |  |  562|  6.52M|#define SUB_BLK_SIZE                  ((SUB_BLK_WIDTH) * (SUB_BLK_WIDTH))
  |  |  ------------------
  |  |  |  |  560|  6.52M|#define SUB_BLK_WIDTH                 4
  |  |  ------------------
  |  |               #define SUB_BLK_SIZE                  ((SUB_BLK_WIDTH) * (SUB_BLK_WIDTH))
  |  |  ------------------
  |  |  |  |  560|  6.52M|#define SUB_BLK_WIDTH                 4
  |  |  ------------------
  ------------------
  208|       |
  209|       |        /* Restore the slice scratch MbX and MbY context */
  210|  6.52M|        ps_cur_mb_info = ps_dec->ps_nmb_info + i;
  211|  6.52M|        ps_svc_cur_mb_info = ps_svc_lyr_dec->ps_svc_nmb_info + i;
  212|  6.52M|        u1_field = ps_cur_mb_info->u1_mb_field_decodingflag;
  213|       |
  214|  6.52M|        ps_mv_nmb_start = ps_dec->ps_mv_cur + (i << 4);
  215|  6.52M|        ps_dec->u2_mbx = ps_cur_mb_info->u2_mbx;
  216|  6.52M|        ps_dec->u2_mby = ps_cur_mb_info->u2_mby;
  217|  6.52M|        ps_dec->u2_mv_2mb[i & 0x1] = 0;
  218|       |
  219|       |        /* Look for MV Prediction and Reference Transfer in Non-I Mbs */
  220|  6.52M|        if(!ps_mb_part_info->u4_isI_mb)
  ------------------
  |  Branch (220:12): [True: 6.52M, False: 3.03k]
  ------------------
  221|  6.52M|        {
  222|  6.52M|            UWORD32 u1_blk_no;
  223|  6.52M|            WORD32 i1_ref_idx, i1_ref_idx1;
  224|  6.52M|            UWORD32 u1_sub_mb_x, u1_sub_mb_y, u1_sub_mb_num;
  225|  6.52M|            UWORD32 u1_num_part, u1_num_ref, u1_wd, u1_ht;
  226|  6.52M|            UWORD32 *pu4_wt_offst, **ppu4_wt_ofst;
  227|  6.52M|            UWORD32 u1_scale_ref, u4_bot_mb;
  228|  6.52M|            WORD8 *pi1_ref_idx = ps_mb_part_info->i1_ref_idx[0];
  229|  6.52M|            pic_buffer_t *ps_ref_frame, **pps_ref_frame;
  230|  6.52M|            deblk_mb_t *ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + i;
  231|  6.52M|            WORD32 i4_mb_mode_svc;
  232|  6.52M|            UWORD8 u1_motion_pred_flag_l0 = ps_svc_cur_mb_info->au1_motion_pred_flag[0];
  233|       |
  234|       |            /* MB Level initialisations */
  235|  6.52M|            ps_dec->u4_num_pmbair = i >> u1_mbaff;
  236|  6.52M|            ps_dec->u4_mb_idx_mv = i;
  237|  6.52M|            ppu4_wt_ofst = ps_mb_part_info->pu4_wt_offst;
  238|  6.52M|            pps_ref_frame = ps_dec->ps_ref_pic_buf_lx[0];
  239|       |
  240|  6.52M|            i4_mb_mode_svc = isvcd_interlyr_motion_mode_pred(
  241|  6.52M|                ps_svc_lyr_dec, ps_cur_mb_info, ps_svc_cur_mb_info, ps_mb_part_info, ps_part);
  242|       |
  243|  6.52M|            if((-1 == i4_mb_mode_svc) || (SVC_INTER_MB == i4_mb_mode_svc))
  ------------------
  |  |  114|   132k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
  |  Branch (243:16): [True: 6.38M, False: 132k]
  |  Branch (243:42): [True: 112k, False: 20.1k]
  ------------------
  244|  6.50M|            {
  245|  6.50M|                ps_mv_ntop_start =
  246|  6.50M|                    ps_mv_nmb_start - (ps_dec->u2_frm_wd_in_mbs << (4 + u1_mbaff)) + 12;
  247|       |
  248|  6.50M|                u1_num_part = ps_mb_part_info->u1_num_part;
  249|  6.50M|                ps_cur_deblk_mb->u1_mb_type |= (u1_num_part > 1) << 1;
  250|  6.50M|                ps_cur_mb_info->u4_pred_info_pkd_idx = ps_dec->u4_pred_info_pkd_idx;
  251|  6.50M|                ps_cur_mb_info->u1_num_pred_parts = 0;
  252|       |
  253|       |                /****************************************************/
  254|       |                /* weighted u4_ofst pointer calculations, this loop  */
  255|       |                /* runs maximum 4 times, even in direct cases       */
  256|       |                /****************************************************/
  257|  6.50M|                u1_scale_ref = u1_mbaff & u1_field;
  258|       |
  259|  6.50M|                u4_bot_mb = 1 - ps_cur_mb_info->u1_topmb;
  260|  6.50M|                if(ps_dec->ps_cur_pps->u1_wted_pred_flag)
  ------------------
  |  Branch (260:20): [True: 3.70M, False: 2.80M]
  ------------------
  261|  3.70M|                {
  262|  3.70M|                    u1_num_ref = MIN(u1_num_part, 4);
  ------------------
  |  |   61|  3.70M|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 3.69M, False: 4.48k]
  |  |  ------------------
  ------------------
  263|  7.41M|                    for(u1_blk_no = 0; u1_blk_no < u1_num_ref; u1_blk_no++)
  ------------------
  |  Branch (263:40): [True: 3.71M, False: 3.70M]
  ------------------
  264|  3.71M|                    {
  265|  3.71M|                        i1_ref_idx = pi1_ref_idx[u1_blk_no];
  266|  3.71M|                        if(u1_scale_ref) i1_ref_idx >>= 1;
  ------------------
  |  Branch (266:28): [True: 0, False: 3.71M]
  ------------------
  267|  3.71M|                        pu4_wt_offst = (UWORD32 *) &ps_dec->pu4_wt_ofsts[2 * X3(i1_ref_idx)];
  ------------------
  |  |   92|  3.71M|#define X3(a)   (((a) << 1) + (a))
  ------------------
  268|  3.71M|                        ppu4_wt_ofst[u1_blk_no] = pu4_wt_offst;
  269|  3.71M|                    }
  270|  3.70M|                }
  271|  2.80M|                else
  272|  2.80M|                {
  273|  2.80M|                    ppu4_wt_ofst[0] = NULL;
  274|  2.80M|                    ppu4_wt_ofst[1] = NULL;
  275|  2.80M|                    ppu4_wt_ofst[2] = NULL;
  276|  2.80M|                    ppu4_wt_ofst[3] = NULL;
  277|  2.80M|                }
  278|       |
  279|       |                /**************************************************/
  280|       |                /* Loop on Partitions                             */
  281|       |                /**************************************************/
  282|  13.0M|                for(j = 0; j < u1_num_part; j++, ps_part++)
  ------------------
  |  Branch (282:28): [True: 6.59M, False: 6.50M]
  ------------------
  283|  6.59M|                {
  284|  6.59M|                    u1_sub_mb_num = ps_part->u1_sub_mb_num;
  285|  6.59M|                    ps_dec->u1_sub_mb_num = u1_sub_mb_num;
  286|       |
  287|  6.59M|                    if(PART_NOT_DIRECT != ps_part->u1_is_direct)
  ------------------
  |  |  570|  6.59M|#define PART_NOT_DIRECT                0
  ------------------
  |  Branch (287:24): [True: 6.35M, False: 240k]
  ------------------
  288|  6.35M|                    {
  289|       |                        /* Mb Skip Mode */
  290|       |                        /* Setting the default and other members of MvPred Structure */
  291|  6.35M|                        s_mvPred.i2_mv[2] = -1;
  292|  6.35M|                        s_mvPred.i2_mv[3] = -1;
  293|  6.35M|                        s_mvPred.i1_ref_frame[0] = 0;
  294|  6.35M|                        i1_ref_idx = (u1_scale_ref && u4_bot_mb) ? MAX_REF_BUFS : 0;
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
  |  Branch (294:39): [True: 0, False: 6.35M]
  |  Branch (294:55): [True: 0, False: 0]
  ------------------
  295|  6.35M|                        ps_ref_frame = pps_ref_frame[i1_ref_idx];
  296|  6.35M|                        s_mvPred.u1_col_ref_pic_idx = ps_ref_frame->u1_mv_buf_id;
  297|  6.35M|                        s_mvPred.u1_pic_type = ps_ref_frame->u1_pic_type;
  298|  6.35M|                        pu4_wt_offst = (UWORD32 *) &ps_dec->pu4_wt_ofsts[0];
  299|       |
  300|  6.35M|                        ps_dec->pf_mvpred(ps_dec, ps_cur_mb_info, ps_mv_nmb_start, ps_mv_ntop_start,
  301|  6.35M|                                          &s_mvPred, 0, 4, 0, 1, MB_SKIP);
  ------------------
  |  |   59|  6.35M|#define MB_SKIP 255
  ------------------
  302|       |
  303|  6.35M|                        {
  304|  6.35M|                            pred_info_pkd_t *ps_pred_pkd;
  305|  6.35M|                            ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
  306|  6.35M|                            ih264d_fill_pred_info(s_mvPred.i2_mv, 4, 4, 0, PRED_L0, ps_pred_pkd,
  ------------------
  |  |  483|  6.35M|#define PRED_L0   1
  ------------------
  307|  6.35M|                                                  ps_ref_frame->u1_pic_buf_id,
  308|  6.35M|                                                  (i1_ref_idx >> u1_scale_ref), pu4_wt_offst,
  309|  6.35M|                                                  ps_ref_frame->u1_pic_type);
  310|       |
  311|  6.35M|                            ps_dec->u4_pred_info_pkd_idx++;
  312|  6.35M|                            ps_cur_mb_info->u1_num_pred_parts++;
  313|  6.35M|                        }
  314|       |
  315|       |                        /* Storing colocated zero information */
  316|  6.35M|                        u1_colz = ((ABS(s_mvPred.i2_mv[0]) <= 1) && (ABS(s_mvPred.i2_mv[1]) <= 1)) +
  ------------------
  |  |  100|  6.35M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 520, False: 6.35M]
  |  |  ------------------
  ------------------
                                      u1_colz = ((ABS(s_mvPred.i2_mv[0]) <= 1) && (ABS(s_mvPred.i2_mv[1]) <= 1)) +
  ------------------
  |  |  100|  6.35M|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 755, False: 6.35M]
  |  |  ------------------
  ------------------
  |  Branch (316:36): [True: 6.35M, False: 2.00k]
  |  Branch (316:69): [True: 6.35M, False: 706]
  ------------------
  317|  6.35M|                                  (u1_field << 1);
  318|       |
  319|  6.35M|                        if(ps_mv_nmb_start)
  ------------------
  |  Branch (319:28): [True: 6.35M, False: 0]
  ------------------
  320|  6.35M|                        {
  321|  6.35M|                            ih264d_rep_mv_colz(ps_dec, &s_mvPred, ps_mv_nmb_start, 0, u1_colz, 4,
  322|  6.35M|                                               4);
  323|  6.35M|                        }
  324|      0|                        else
  325|      0|                        {
  326|      0|                            return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  327|      0|                        }
  328|  6.35M|                    }
  329|   240k|                    else
  330|   240k|                    {
  331|   240k|                        u1_sub_mb_x = u1_sub_mb_num & 0x03;
  332|   240k|                        u1_sub_mb_y = u1_sub_mb_num >> 2;
  333|   240k|                        u1_blk_no = (u1_num_part < 4)
  ------------------
  |  Branch (333:37): [True: 143k, False: 96.3k]
  ------------------
  334|   240k|                                        ? j
  335|   240k|                                        : (((u1_sub_mb_y >> 1) << 1) + (u1_sub_mb_x >> 1));
  336|       |
  337|   240k|                        ps_mv_ntop = ps_mv_ntop_start + u1_sub_mb_x;
  338|   240k|                        ps_mv_nmb = ps_mv_nmb_start + u1_sub_mb_num;
  339|       |
  340|   240k|                        u1_wd = ps_part->u1_partwidth;
  341|   240k|                        u1_ht = ps_part->u1_partheight;
  342|       |
  343|       |                        /* Populate the colpic info and reference frames */
  344|   240k|                        i1_ref_idx = pi1_ref_idx[u1_blk_no];
  345|       |                        /********************************************************************/
  346|       |                        /* If reference index is inferred from the base layer and it is     */
  347|       |                        /* exceeding the number of active reference in the current layer.   */
  348|       |                        /* Then reference index is clipped to the max in the current layer  */
  349|       |                        /********************************************************************/
  350|   240k|                        if(ps_svc_cur_mb_info->u1_base_mode_flag == 1)
  ------------------
  |  Branch (350:28): [True: 170k, False: 69.6k]
  ------------------
  351|   170k|                        {
  352|   170k|                            if(i1_ref_idx > (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0] - 1))
  ------------------
  |  Branch (352:32): [True: 5.59k, False: 164k]
  ------------------
  353|  5.59k|                            {
  354|  5.59k|                                i1_ref_idx = ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0] - 1;
  355|  5.59k|                            }
  356|   170k|                        }
  357|   240k|                        s_mvPred.i1_ref_frame[0] = i1_ref_idx;
  358|       |
  359|   240k|                        if((1 != ps_svc_cur_mb_info->u1_base_mode_flag) &&
  ------------------
  |  Branch (359:28): [True: 69.6k, False: 170k]
  ------------------
  360|  69.6k|                           (0 == (u1_motion_pred_flag_l0 & (1 << u1_blk_no))))
  ------------------
  |  Branch (360:28): [True: 45.9k, False: 23.6k]
  ------------------
  361|  45.9k|                        {
  362|       |                            /********************************************************/
  363|       |                            /* Predict Mv                                           */
  364|       |                            /* Add Mv Residuals and store back                      */
  365|       |                            /********************************************************/
  366|  45.9k|                            ps_dec->pf_mvpred(ps_dec, ps_cur_mb_info, ps_mv_nmb, ps_mv_ntop,
  367|  45.9k|                                              &s_mvPred, u1_sub_mb_num, u1_wd, 0, 1,
  368|  45.9k|                                              ps_cur_mb_info->u1_mb_mc_mode);
  369|       |
  370|  45.9k|                            i2_mv_x = ps_mv_nmb->i2_mv[0];
  371|  45.9k|                            i2_mv_y = ps_mv_nmb->i2_mv[1];
  372|  45.9k|                            i2_mv_x += s_mvPred.i2_mv[0];
  373|  45.9k|                            i2_mv_y += s_mvPred.i2_mv[1];
  374|  45.9k|                            s_mvPred.i2_mv[0] = i2_mv_x;
  375|  45.9k|                            s_mvPred.i2_mv[1] = i2_mv_y;
  376|  45.9k|                        }
  377|   194k|                        else
  378|   194k|                        {
  379|   194k|                            isvcd_retrive_infer_mode_mv(ps_svc_lyr_dec, &s_mvPred, 0,
  380|   194k|                                                        u1_sub_mb_num);
  381|       |
  382|   194k|                            if(0 != (u1_motion_pred_flag_l0 & (1 << u1_blk_no)))
  ------------------
  |  Branch (382:32): [True: 23.6k, False: 170k]
  ------------------
  383|  23.6k|                            {
  384|  23.6k|                                i2_mv_x = ps_mv_nmb->i2_mv[0];
  385|  23.6k|                                i2_mv_y = ps_mv_nmb->i2_mv[1];
  386|  23.6k|                                i2_mv_x += s_mvPred.i2_mv[0];
  387|  23.6k|                                i2_mv_y += s_mvPred.i2_mv[1];
  388|  23.6k|                                s_mvPred.i2_mv[0] = i2_mv_x;
  389|  23.6k|                                s_mvPred.i2_mv[1] = i2_mv_y;
  390|  23.6k|                            }
  391|   194k|                            i2_mv_x = s_mvPred.i2_mv[0];
  392|   194k|                            i2_mv_y = s_mvPred.i2_mv[1];
  393|   194k|                        }
  394|       |                        /********************************************************/
  395|       |                        /* Transfer setup call                                  */
  396|       |                        /* convert RefIdx if it is MbAff                        */
  397|       |                        /* Pass Weight Offset and refFrame                      */
  398|       |                        /********************************************************/
  399|   240k|                        i1_ref_idx1 = i1_ref_idx >> u1_scale_ref;
  400|   240k|                        if(u1_scale_ref && ((i1_ref_idx & 0x01) != u4_bot_mb))
  ------------------
  |  Branch (400:28): [True: 0, False: 240k]
  |  Branch (400:44): [True: 0, False: 0]
  ------------------
  401|      0|                            i1_ref_idx1 += MAX_REF_BUFS;
  ------------------
  |  |   75|      0|#define MAX_REF_BUFS    32
  ------------------
  402|   240k|                        if(-1 == i1_ref_idx1) return NOT_OK;
  ------------------
  |  |  116|    143|#define NOT_OK    -1
  ------------------
  |  Branch (402:28): [True: 143, False: 239k]
  ------------------
  403|   239k|                        ps_ref_frame = pps_ref_frame[i1_ref_idx1];
  404|   239k|                        pu4_wt_offst = ppu4_wt_ofst[u1_blk_no];
  405|       |
  406|   239k|                        {
  407|   239k|                            pred_info_pkd_t *ps_pred_pkd;
  408|   239k|                            ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
  409|   239k|                            ih264d_fill_pred_info(s_mvPred.i2_mv, u1_wd, u1_ht, u1_sub_mb_num,
  410|   239k|                                                  PRED_L0, ps_pred_pkd, ps_ref_frame->u1_pic_buf_id,
  ------------------
  |  |  483|   239k|#define PRED_L0   1
  ------------------
  411|   239k|                                                  (i1_ref_idx >> u1_scale_ref), pu4_wt_offst,
  412|   239k|                                                  ps_ref_frame->u1_pic_type);
  413|       |
  414|   239k|                            ps_dec->u4_pred_info_pkd_idx++;
  415|   239k|                            ps_cur_mb_info->u1_num_pred_parts++;
  416|   239k|                        }
  417|       |
  418|       |                        /* Fill colocated info in MvPred structure */
  419|   239k|                        s_mvPred.u1_col_ref_pic_idx = ps_ref_frame->u1_mv_buf_id;
  420|   239k|                        s_mvPred.u1_pic_type = ps_ref_frame->u1_pic_type;
  421|       |
  422|       |                        /* Calculating colocated zero information */
  423|   239k|                        u1_colz = (u1_field << 1) |
  424|   239k|                                  ((i1_ref_idx == 0) && (ABS(i2_mv_x) <= 1) && (ABS(i2_mv_y) <= 1));
  ------------------
  |  |  100|   229k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 25.3k, False: 204k]
  |  |  ------------------
  ------------------
                                                ((i1_ref_idx == 0) && (ABS(i2_mv_x) <= 1) && (ABS(i2_mv_y) <= 1));
  ------------------
  |  |  100|   156k|#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
  |  |  ------------------
  |  |  |  Branch (100:26): [True: 16.5k, False: 140k]
  |  |  ------------------
  ------------------
  |  Branch (424:36): [True: 229k, False: 9.95k]
  |  Branch (424:57): [True: 156k, False: 73.0k]
  |  Branch (424:80): [True: 138k, False: 18.3k]
  ------------------
  425|   239k|                        u1_colz |= ps_mb_part_info->u1_col_info[u1_blk_no];
  426|       |
  427|       |                        /* Replicate the motion vectors and colzero u4_flag  */
  428|       |                        /* for all sub-partitions                         */
  429|       |
  430|   239k|                        if(ps_mv_nmb)
  ------------------
  |  Branch (430:28): [True: 239k, False: 0]
  ------------------
  431|   239k|                        {
  432|   239k|                            ih264d_rep_mv_colz(ps_dec, &s_mvPred, ps_mv_nmb, u1_sub_mb_num, u1_colz,
  433|   239k|                                               u1_ht, u1_wd);
  434|   239k|                        }
  435|      0|                        else
  436|      0|                        {
  437|      0|                            return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  438|      0|                        }
  439|   239k|                    }
  440|  6.59M|                }
  441|       |
  442|       |                /* to take care of 16 parttitions increment for base mode flag case*/
  443|  6.50M|                if(1 == ps_svc_cur_mb_info->u1_base_mode_flag)
  ------------------
  |  Branch (443:20): [True: 112k, False: 6.38M]
  ------------------
  444|   112k|                {
  445|   112k|                    ps_part += (MAX_NUM_MB_PART - u1_num_part);
  ------------------
  |  |   62|   112k|#define MAX_NUM_MB_PART NUM_MB_PARTS *NUM_SUB_MB_PARTS
  |  |  ------------------
  |  |  |  |   59|   112k|#define NUM_MB_PARTS 4
  |  |  ------------------
  |  |               #define MAX_NUM_MB_PART NUM_MB_PARTS *NUM_SUB_MB_PARTS
  |  |  ------------------
  |  |  |  |   60|   112k|#define NUM_SUB_MB_PARTS 4
  |  |  ------------------
  ------------------
  446|   112k|                }
  447|  6.50M|            }
  448|  20.1k|            else
  449|  20.1k|            {
  450|  20.1k|                ps_cur_deblk_mb->u1_mb_type |= D_INTRA_IBL;
  ------------------
  |  |   72|  20.1k|#define D_INTRA_IBL 16
  ------------------
  451|  20.1k|                if((ps_svc_lyr_dec->u1_layer_identifier != TARGET_LAYER) &&
  ------------------
  |  |  110|  20.1k|#define TARGET_LAYER 2
  ------------------
  |  Branch (451:20): [True: 0, False: 20.1k]
  ------------------
  452|      0|                   (DBLK_ENABLED == ps_dec->ps_cur_slice->u1_disable_dblk_filter_idc))
  ------------------
  |  |  549|      0|#define DBLK_ENABLED                  0
  ------------------
  |  Branch (452:20): [True: 0, False: 0]
  ------------------
  453|      0|                {
  454|      0|                    ps_cur_deblk_mb->u1_deblocking_mode = MB_ENABLE_FILTERING;
  ------------------
  |  |   69|      0|#define MB_ENABLE_FILTERING           0x00
  ------------------
  455|      0|                }
  456|       |                /* to take care of 16 parttitions increment for base mode flag case*/
  457|  20.1k|                if(1 != ps_svc_cur_mb_info->u1_base_mode_flag)
  ------------------
  |  Branch (457:20): [True: 0, False: 20.1k]
  ------------------
  458|      0|                {
  459|      0|                    return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  460|      0|                }
  461|  20.1k|                {
  462|  20.1k|                    ps_part += (MAX_NUM_MB_PART);
  ------------------
  |  |   62|  20.1k|#define MAX_NUM_MB_PART NUM_MB_PARTS *NUM_SUB_MB_PARTS
  |  |  ------------------
  |  |  |  |   59|  20.1k|#define NUM_MB_PARTS 4
  |  |  ------------------
  |  |               #define MAX_NUM_MB_PART NUM_MB_PARTS *NUM_SUB_MB_PARTS
  |  |  ------------------
  |  |  |  |   60|  20.1k|#define NUM_SUB_MB_PARTS 4
  |  |  ------------------
  ------------------
  463|  20.1k|                }
  464|       |                /* Storing colocated zero information */
  465|  20.1k|                if(ps_mv_nmb_start)
  ------------------
  |  Branch (465:20): [True: 20.1k, False: 0]
  ------------------
  466|  20.1k|                {
  467|  20.1k|                    ih264d_rep_mv_colz(ps_dec, &s_mvPred, ps_mv_nmb_start, 0,
  468|  20.1k|                                       (UWORD8) (u1_field << 1), 4, 4);
  469|  20.1k|                }
  470|      0|                else
  471|      0|                {
  472|      0|                    return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  473|      0|                }
  474|  20.1k|            }
  475|  6.52M|        }
  476|  3.03k|        else
  477|  3.03k|        {
  478|       |            /* Storing colocated zero information */
  479|  3.03k|            if(ps_mv_nmb_start)
  ------------------
  |  Branch (479:16): [True: 3.03k, False: 0]
  ------------------
  480|  3.03k|            {
  481|  3.03k|                ih264d_rep_mv_colz(ps_dec, &s_mvPred, ps_mv_nmb_start, 0, (UWORD8) (u1_field << 1),
  482|  3.03k|                                   4, 4);
  483|  3.03k|            }
  484|      0|            else
  485|      0|            {
  486|      0|                return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  487|      0|            }
  488|  3.03k|        }
  489|  6.52M|    }
  490|       |
  491|  1.21M|    return OK;
  ------------------
  |  |  114|  1.21M|#define OK        0
  ------------------
  492|  1.21M|}
isvcd_update_intra_mb_inter_layer_info:
  507|  44.1k|{
  508|  44.1k|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb =
  509|  44.1k|        ps_svc_lyr_dec->ps_inter_lyr_mb_prms_frm_start + ps_cur_mb_info->u2_mbx +
  510|  44.1k|        (ps_svc_lyr_dec->u2_inter_lyr_mb_prms_stride * (ps_cur_mb_info->u2_mby));
  511|       |
  512|  44.1k|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->i1_mb_mode = SVC_INTRA_MB;
  ------------------
  |  |  115|  44.1k|#define SVC_INTRA_MB (1 << 1)       /*!< P or B MBs decoded or inferred*/
  ------------------
  513|  44.1k|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->i1_tx_size = ps_cur_mb_info->u1_tran_form8x8;
  514|  44.1k|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->u2_luma_nnz = 0;
  515|  44.1k|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->u1_chroma_nnz = 0;
  516|  44.1k|}
isvcd_update_ipcm_mb_inter_layer_info:
  532|    998|{
  533|    998|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->i1_mb_mode = SVC_IPCM_MB;
  ------------------
  |  |  116|    998|#define SVC_IPCM_MB (1 << 2)        /*!< IPCM_MB  decoder or inferred*/
  ------------------
  534|    998|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->i1_tx_size = ps_cur_mb_info->u1_tran_form8x8;
  535|    998|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->u2_luma_nnz = 0;
  536|    998|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->u1_chroma_nnz = 0;
  537|    998|}
isvcd_update_ibl_mb_inter_layer_info:
  553|  76.1k|{
  554|  76.1k|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->i1_mb_mode = SVC_IBL_MB;
  ------------------
  |  |  117|  76.1k|#define SVC_IBL_MB (1 << 3)         /*!< I_BL MB always inferred */
  ------------------
  555|  76.1k|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->i1_tx_size = ps_cur_mb_info->u1_tran_form8x8;
  556|  76.1k|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->u2_luma_nnz = 0;
  557|  76.1k|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->u1_chroma_nnz = 0;
  558|  76.1k|}
isvcd_update_inter_mb_inter_layer_info:
  573|   135k|{
  574|   135k|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb =
  575|   135k|        ps_svc_lyr_dec->ps_inter_lyr_mb_prms_frm_start + ps_cur_mb_info->u2_mbx +
  576|   135k|        (ps_svc_lyr_dec->u2_inter_lyr_mb_prms_stride * (ps_cur_mb_info->u2_mby));
  577|   135k|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->i1_mb_mode =
  578|   135k|        u1_inference_mode ? SVC_IBL_MB : SVC_INTER_MB;
  ------------------
  |  |  117|  76.1k|#define SVC_IBL_MB (1 << 3)         /*!< I_BL MB always inferred */
  ------------------
                      u1_inference_mode ? SVC_IBL_MB : SVC_INTER_MB;
  ------------------
  |  |  114|   194k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
  |  Branch (578:9): [True: 76.1k, False: 59.2k]
  ------------------
  579|   135k|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->i1_tx_size = ps_cur_mb_info->u1_tran_form8x8;
  580|   135k|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->u2_luma_nnz = ps_cur_mb_info->u2_luma_csbp;
  581|   135k|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->u1_chroma_nnz =
  582|   135k|        (UWORD8) ps_cur_mb_info->u2_chroma_csbp;
  583|   135k|    if(CHECKBIT(ps_cur_mb_info->u1_yuv_dc_block_flag, 1))
  ------------------
  |  |   54|   135k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 8.45k, False: 126k]
  |  |  ------------------
  ------------------
  584|  8.45k|    {
  585|       |        /* Four bits for Cb in DC only cbp */
  586|  8.45k|        ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->u1_chroma_nnz |= 0x0F;
  587|  8.45k|    }
  588|   135k|    if(CHECKBIT(ps_cur_mb_info->u1_yuv_dc_block_flag, 2))
  ------------------
  |  |   54|   135k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 8.52k, False: 126k]
  |  |  ------------------
  ------------------
  589|  8.52k|    {
  590|       |        /* Four bits for Cr in DC only cbp */
  591|  8.52k|        ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->u1_chroma_nnz |= 0xF0;
  592|  8.52k|    }
  593|   135k|}
isvcd_process_inter_mb_no_rsd_pred_non_target:
  609|  79.7k|{
  610|  79.7k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
  611|  79.7k|    UWORD16 u2_luma_stride, u2_chroma_stride;
  612|  79.7k|    WORD16 *pi2_y_coeff, *pi2_luma_res_ptr, *pi2_chroma_res_ptr;
  613|  79.7k|    UWORD32 u4_luma_dc_only_csbp = 0;
  614|  79.7k|    UWORD32 u4_luma_dc_only_cbp = 0;
  615|       |
  616|  79.7k|    if(0 != ps_dec->ps_cur_slice->u1_mbaff_frame_flag)
  ------------------
  |  Branch (616:8): [True: 0, False: 79.7k]
  ------------------
  617|      0|    {
  618|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  619|      0|    }
  620|  79.7k|    u2_luma_stride = ps_svc_lyr_dec->u2_residual_resample_luma_stride;
  621|  79.7k|    pi2_luma_res_ptr = ps_svc_lyr_dec->pi2_il_residual_resample_mb_luma_frm_start +
  622|  79.7k|                       (ps_cur_mb_info->u2_mbx << 4) +
  623|  79.7k|                       ((ps_cur_mb_info->u2_mby << 4) * u2_luma_stride);
  624|       |
  625|  79.7k|    u2_chroma_stride = ps_svc_lyr_dec->u2_residual_resample_chroma_stride;
  626|  79.7k|    pi2_chroma_res_ptr = ps_svc_lyr_dec->pi2_il_residual_resample_mb_chroma_frm_start +
  627|  79.7k|                         (ps_cur_mb_info->u2_mbx << 4) +
  628|  79.7k|                         ((ps_cur_mb_info->u2_mby << 3) * u2_chroma_stride);
  629|       |
  630|  79.7k|    if(!ps_cur_mb_info->u1_tran_form8x8)
  ------------------
  |  Branch (630:8): [True: 72.8k, False: 6.93k]
  ------------------
  631|  72.8k|    {
  632|  72.8k|        u4_luma_dc_only_csbp = ih264d_unpack_luma_coeff4x4_mb(ps_dec, ps_cur_mb_info, 0);
  633|  72.8k|    }
  634|  6.93k|    else
  635|  6.93k|    {
  636|  6.93k|        if(!ps_dec->ps_cur_pps->u1_entropy_coding_mode)
  ------------------
  |  Branch (636:12): [True: 778, False: 6.16k]
  ------------------
  637|    778|        {
  638|    778|            u4_luma_dc_only_cbp = ih264d_unpack_luma_coeff4x4_mb(ps_dec, ps_cur_mb_info, 0);
  639|    778|        }
  640|  6.16k|        else
  641|  6.16k|        {
  642|  6.16k|            u4_luma_dc_only_cbp = ih264d_unpack_luma_coeff8x8_mb(ps_dec, ps_cur_mb_info);
  643|  6.16k|        }
  644|  6.93k|    }
  645|       |
  646|  79.7k|    pi2_y_coeff = ps_dec->pi2_coeff_data;
  647|       |    /* Inverse Transform and Reconstruction */
  648|  79.7k|    if(ps_cur_mb_info->u1_cbp & 0x0f)
  ------------------
  |  Branch (648:8): [True: 17.6k, False: 62.1k]
  ------------------
  649|  17.6k|    {
  650|  17.6k|        if(!ps_cur_mb_info->u1_tran_form8x8)
  ------------------
  |  Branch (650:12): [True: 10.6k, False: 6.93k]
  ------------------
  651|  10.6k|        {
  652|  10.6k|            UWORD32 i;
  653|  10.6k|            WORD16 ai2_tmp[16] = {0};
  654|   181k|            for(i = 0; i < 16; i++)
  ------------------
  |  Branch (654:24): [True: 170k, False: 10.6k]
  ------------------
  655|   170k|            {
  656|   170k|                if(CHECKBIT(ps_cur_mb_info->u2_luma_csbp, i))
  ------------------
  |  |   54|   170k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 64.5k, False: 106k]
  |  |  ------------------
  ------------------
  657|  64.5k|                {
  658|  64.5k|                    WORD16 *pi2_level = pi2_y_coeff + (i << 4);
  659|  64.5k|                    WORD16 *pi2_out = pi2_luma_res_ptr + ((i & 0x3) * BLK_SIZE) +
  ------------------
  |  |  556|  64.5k|#define BLK_SIZE             4
  ------------------
  660|  64.5k|                                      (i >> 2) * (u2_luma_stride << 2);
  661|  64.5k|                    PROFILE_DISABLE_IQ_IT_RECON()
  ------------------
  |  |   98|  64.5k|#define PROFILE_DISABLE_IQ_IT_RECON() ;
  ------------------
  662|  64.5k|                    {
  663|  64.5k|                        if(CHECKBIT(u4_luma_dc_only_csbp, i))
  ------------------
  |  |   54|  64.5k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 7.11k, False: 57.3k]
  |  |  ------------------
  ------------------
  664|  7.11k|                        {
  665|  7.11k|                            ps_svc_lyr_dec->pf_iquant_itrans_luma_4x4_dc(
  666|  7.11k|                                pi2_level, pi2_out, u2_luma_stride,
  667|  7.11k|                                gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qp_rem6],
  668|  7.11k|                                (UWORD16 *) ps_dec->s_high_profile.i2_scalinglist4x4[3],
  669|  7.11k|                                ps_cur_mb_info->u1_qp_div6, ai2_tmp, 0, NULL);
  670|  7.11k|                        }
  671|  57.3k|                        else
  672|  57.3k|                        {
  673|  57.3k|                            ps_svc_lyr_dec->pf_iquant_itrans_luma_4x4(
  674|  57.3k|                                pi2_level, pi2_out, u2_luma_stride,
  675|  57.3k|                                gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qp_rem6],
  676|  57.3k|                                (UWORD16 *) ps_dec->s_high_profile.i2_scalinglist4x4[3],
  677|  57.3k|                                ps_cur_mb_info->u1_qp_div6, ai2_tmp, 0, NULL);
  678|  57.3k|                        }
  679|  64.5k|                    }
  680|  64.5k|                }
  681|   170k|            }
  682|  10.6k|        }
  683|  6.93k|        else
  684|  6.93k|        {
  685|  6.93k|            WORD16 *pi2_scale_matrix_ptr;
  686|  6.93k|            WORD32 i;
  687|       |
  688|  6.93k|            pi2_scale_matrix_ptr = ps_dec->s_high_profile.i2_scalinglist8x8[1];
  689|       |
  690|  34.6k|            for(i = 0; i < 4; i++)
  ------------------
  |  Branch (690:24): [True: 27.7k, False: 6.93k]
  ------------------
  691|  27.7k|            {
  692|  27.7k|                WORD16 ai2_tmp[64] = {0};
  693|  27.7k|                WORD16 *pi16_levelBlock =
  694|  27.7k|                    pi2_y_coeff + (i << 6); /* move to the next 8x8 adding 64 */
  695|       |
  696|  27.7k|                WORD16 *pi2_out =
  697|  27.7k|                    pi2_luma_res_ptr + ((i & 0x1) * BLK8x8SIZE) + (i >> 1) * (u2_luma_stride << 3);
  ------------------
  |  |  555|  27.7k|#define BLK8x8SIZE          8
  ------------------
  698|  27.7k|                if(CHECKBIT(ps_cur_mb_info->u1_cbp, i))
  ------------------
  |  |   54|  27.7k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 15.8k, False: 11.8k]
  |  |  ------------------
  ------------------
  699|  15.8k|                {
  700|  15.8k|                    PROFILE_DISABLE_IQ_IT_RECON()
  ------------------
  |  |   98|  15.8k|#define PROFILE_DISABLE_IQ_IT_RECON() ;
  ------------------
  701|  15.8k|                    {
  702|  15.8k|                        if(CHECKBIT(u4_luma_dc_only_cbp, i))
  ------------------
  |  |   54|  15.8k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 4.20k, False: 11.6k]
  |  |  ------------------
  ------------------
  703|  4.20k|                        {
  704|  4.20k|                            ps_svc_lyr_dec->pf_iquant_itrans_luma_8x8_dc(
  705|  4.20k|                                pi16_levelBlock, pi2_out, u2_luma_stride,
  706|  4.20k|                                gau1_ih264d_dequant8x8_cavlc[ps_cur_mb_info->u1_qp_rem6],
  707|  4.20k|                                (UWORD16 *) pi2_scale_matrix_ptr, ps_cur_mb_info->u1_qp_div6,
  708|  4.20k|                                ai2_tmp, 0, NULL);
  709|  4.20k|                        }
  710|  11.6k|                        else
  711|  11.6k|                        {
  712|  11.6k|                            ps_svc_lyr_dec->pf_iquant_itrans_luma_8x8(
  713|  11.6k|                                pi16_levelBlock, pi2_out, u2_luma_stride,
  714|  11.6k|                                gau1_ih264d_dequant8x8_cavlc[ps_cur_mb_info->u1_qp_rem6],
  715|  11.6k|                                (UWORD16 *) pi2_scale_matrix_ptr, ps_cur_mb_info->u1_qp_div6,
  716|  11.6k|                                ai2_tmp, 0, NULL);
  717|  11.6k|                        }
  718|  15.8k|                    }
  719|  15.8k|                }
  720|  27.7k|            }
  721|  6.93k|        }
  722|  17.6k|    }
  723|       |
  724|       |    /* Decode Chroma Block */
  725|  79.7k|    ih264d_unpack_chroma_coeff4x4_mb(ps_dec, ps_cur_mb_info);
  726|       |    /*--------------------------------------------------------------------*/
  727|       |    /* Chroma Blocks decoding                                             */
  728|       |    /*--------------------------------------------------------------------*/
  729|  79.7k|    {
  730|  79.7k|        UWORD8 u1_chroma_cbp = (UWORD8) (ps_cur_mb_info->u1_cbp >> 4);
  731|       |
  732|  79.7k|        if(u1_chroma_cbp != CBPC_ALLZERO)
  ------------------
  |  |  507|  79.7k|#define CBPC_ALLZERO    0
  ------------------
  |  Branch (732:12): [True: 8.60k, False: 71.1k]
  ------------------
  733|  8.60k|        {
  734|  8.60k|            UWORD32 u4_scale_u = ps_cur_mb_info->u1_qpc_div6;
  735|  8.60k|            UWORD32 u4_scale_v = ps_cur_mb_info->u1_qpcr_div6;
  736|  8.60k|            UWORD16 u2_chroma_csbp = ps_cur_mb_info->u2_chroma_csbp;
  737|       |
  738|  8.60k|            pi2_y_coeff = ps_dec->pi2_coeff_data;
  739|       |
  740|  8.60k|            {
  741|  8.60k|                UWORD32 i;
  742|  8.60k|                WORD16 ai2_tmp[16] = {0};
  743|  43.0k|                for(i = 0; i < 4; i++)
  ------------------
  |  Branch (743:28): [True: 34.4k, False: 8.60k]
  ------------------
  744|  34.4k|                {
  745|  34.4k|                    WORD16 *pi2_level = pi2_y_coeff + (i << 4);
  746|  34.4k|                    WORD16 *pi2_out = pi2_chroma_res_ptr +
  747|  34.4k|                                      ((i & 0x1) * BLK_SIZE * YUV420SP_FACTOR) +
  ------------------
  |  |  556|  34.4k|#define BLK_SIZE             4
  ------------------
                                                    ((i & 0x1) * BLK_SIZE * YUV420SP_FACTOR) +
  ------------------
  |  |  119|  34.4k|#define YUV420SP_FACTOR 2
  ------------------
  748|  34.4k|                                      (i >> 1) * (u2_chroma_stride << 2);
  749|  34.4k|                    PROFILE_DISABLE_IQ_IT_RECON()
  ------------------
  |  |   98|  34.4k|#define PROFILE_DISABLE_IQ_IT_RECON() ;
  ------------------
  750|  34.4k|                    {
  751|  34.4k|                        if(CHECKBIT(u2_chroma_csbp, i))
  ------------------
  |  |   54|  34.4k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 4.33k, False: 30.0k]
  |  |  ------------------
  ------------------
  752|  4.33k|                        {
  753|  4.33k|                            ps_svc_lyr_dec->pf_iquant_itrans_chroma_4x4(
  754|  4.33k|                                pi2_level, pi2_out, u2_chroma_stride,
  755|  4.33k|                                gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpc_rem6],
  756|  4.33k|                                (UWORD16 *) ps_dec->s_high_profile.i2_scalinglist4x4[4], u4_scale_u,
  757|  4.33k|                                ai2_tmp, pi2_level);
  758|  4.33k|                        }
  759|  30.0k|                        else if(pi2_level[0] != 0)
  ------------------
  |  Branch (759:33): [True: 13.2k, False: 16.8k]
  ------------------
  760|  13.2k|                        {
  761|  13.2k|                            ps_svc_lyr_dec->pf_iquant_itrans_chroma_4x4_dc(
  762|  13.2k|                                pi2_level, pi2_out, u2_chroma_stride,
  763|  13.2k|                                gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpc_rem6],
  764|  13.2k|                                (UWORD16 *) ps_dec->s_high_profile.i2_scalinglist4x4[4], u4_scale_u,
  765|  13.2k|                                ai2_tmp, pi2_level);
  766|  13.2k|                        }
  767|  34.4k|                    }
  768|  34.4k|                }
  769|  8.60k|            }
  770|       |
  771|  8.60k|            pi2_y_coeff += MB_CHROM_SIZE;
  ------------------
  |  |  564|  8.60k|#define MB_CHROM_SIZE                 64
  ------------------
  772|  8.60k|            u2_chroma_csbp >>= 4;
  773|       |
  774|  8.60k|            {
  775|  8.60k|                UWORD32 i;
  776|  8.60k|                WORD16 ai2_tmp[16] = {0};
  777|  43.0k|                for(i = 0; i < 4; i++)
  ------------------
  |  Branch (777:28): [True: 34.4k, False: 8.60k]
  ------------------
  778|  34.4k|                {
  779|  34.4k|                    WORD16 *pi2_level = pi2_y_coeff + (i << 4);
  780|  34.4k|                    WORD16 *pi2_out = pi2_chroma_res_ptr + 1 +
  781|  34.4k|                                      ((i & 0x1) * BLK_SIZE * YUV420SP_FACTOR) +
  ------------------
  |  |  556|  34.4k|#define BLK_SIZE             4
  ------------------
                                                    ((i & 0x1) * BLK_SIZE * YUV420SP_FACTOR) +
  ------------------
  |  |  119|  34.4k|#define YUV420SP_FACTOR 2
  ------------------
  782|  34.4k|                                      (i >> 1) * (u2_chroma_stride << 2);
  783|  34.4k|                    PROFILE_DISABLE_IQ_IT_RECON()
  ------------------
  |  |   98|  34.4k|#define PROFILE_DISABLE_IQ_IT_RECON() ;
  ------------------
  784|  34.4k|                    {
  785|  34.4k|                        if(CHECKBIT(u2_chroma_csbp, i))
  ------------------
  |  |   54|  34.4k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 5.69k, False: 28.7k]
  |  |  ------------------
  ------------------
  786|  5.69k|                        {
  787|  5.69k|                            ps_svc_lyr_dec->pf_iquant_itrans_chroma_4x4(
  788|  5.69k|                                pi2_level, pi2_out, u2_chroma_stride,
  789|  5.69k|                                gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpcr_rem6],
  790|  5.69k|                                (UWORD16 *) ps_dec->s_high_profile.i2_scalinglist4x4[5], u4_scale_v,
  791|  5.69k|                                ai2_tmp, pi2_level);
  792|  5.69k|                        }
  793|  28.7k|                        else if(pi2_level[0] != 0)
  ------------------
  |  Branch (793:33): [True: 11.6k, False: 17.0k]
  ------------------
  794|  11.6k|                        {
  795|  11.6k|                            ps_svc_lyr_dec->pf_iquant_itrans_chroma_4x4_dc(
  796|  11.6k|                                pi2_level, pi2_out, u2_chroma_stride,
  797|  11.6k|                                gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpcr_rem6],
  798|  11.6k|                                (UWORD16 *) ps_dec->s_high_profile.i2_scalinglist4x4[5], u4_scale_v,
  799|  11.6k|                                ai2_tmp, pi2_level);
  800|  11.6k|                        }
  801|  34.4k|                    }
  802|  34.4k|                }
  803|  8.60k|            }
  804|  8.60k|        }
  805|  79.7k|    }
  806|       |
  807|  79.7k|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb =
  808|  79.7k|        ps_svc_lyr_dec->ps_inter_lyr_mb_prms_frm_start + ps_cur_mb_info->u2_mbx +
  809|  79.7k|        (ps_svc_lyr_dec->u2_inter_lyr_mb_prms_stride * (ps_cur_mb_info->u2_mby));
  810|  79.7k|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->i1_mb_mode =
  811|  79.7k|        u1_inference_mode ? SVC_IBL_MB : SVC_INTER_MB;
  ------------------
  |  |  117|      0|#define SVC_IBL_MB (1 << 3)         /*!< I_BL MB always inferred */
  ------------------
                      u1_inference_mode ? SVC_IBL_MB : SVC_INTER_MB;
  ------------------
  |  |  114|   159k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
  |  Branch (811:9): [True: 0, False: 79.7k]
  ------------------
  812|  79.7k|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->i1_tx_size = ps_cur_mb_info->u1_tran_form8x8;
  813|  79.7k|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->u2_luma_nnz = ps_cur_mb_info->u2_luma_csbp;
  814|  79.7k|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->u1_chroma_nnz =
  815|  79.7k|        (UWORD8) ps_cur_mb_info->u2_chroma_csbp;
  816|  79.7k|    if(CHECKBIT(ps_cur_mb_info->u1_yuv_dc_block_flag, 1))
  ------------------
  |  |   54|  79.7k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 4.43k, False: 75.3k]
  |  |  ------------------
  ------------------
  817|  4.43k|    {
  818|       |        /* Four bits for Cb in DC only cbp */
  819|  4.43k|        ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->u1_chroma_nnz |= 0x0F;
  820|  4.43k|    }
  821|  79.7k|    if(CHECKBIT(ps_cur_mb_info->u1_yuv_dc_block_flag, 2))
  ------------------
  |  |   54|  79.7k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 4.55k, False: 75.1k]
  |  |  ------------------
  ------------------
  822|  4.55k|    {
  823|       |        /* Four bits for Cr in DC only cbp */
  824|  4.55k|        ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->u1_chroma_nnz |= 0xF0;
  825|  4.55k|    }
  826|  79.7k|    return OK;
  ------------------
  |  |  114|  79.7k|#define OK        0
  ------------------
  827|  79.7k|}
isvcd_process_ii_mb:
 1120|   131k|{
 1121|   131k|    res_prms_t *ps_res_prms;
 1122|   131k|    WORD32 i4_status;
 1123|   131k|    UWORD8 u1_ii_mb_mode = 0;
 1124|   131k|    mb_coord_t s_mb_coord = {0};
 1125|   131k|    mem_element_t s_ref_mb_mode = {0};
 1126|   131k|    svc_dec_lyr_struct_t *ps_svc_dec_ref_layer;
 1127|       |
 1128|   131k|    ps_svc_dec_ref_layer = ps_svc_lyr_dec->ps_dec_svc_ref_layer;
 1129|   131k|    ps_res_prms = &ps_svc_lyr_dec->s_res_prms;
 1130|   131k|    s_mb_coord.u2_mb_x = ps_cur_mb_info->u2_mbx;
 1131|   131k|    s_mb_coord.u2_mb_y = ps_cur_mb_info->u2_mby;
 1132|       |
 1133|       |    /* Restricted resolution change has significance only */
 1134|       |    /* at resolution change layer                         */
 1135|   131k|    if(SVCD_FALSE == ps_res_prms->u1_rstrct_res_change_flag)
  ------------------
  |  |   45|   131k|#define SVCD_FALSE 0
  ------------------
  |  Branch (1135:8): [True: 131k, False: 0]
  ------------------
 1136|   131k|    {
 1137|   131k|        s_ref_mb_mode.pv_buffer = ps_svc_dec_ref_layer->ps_inter_lyr_mb_prms_frm_start;
 1138|   131k|        s_ref_mb_mode.i4_element_size = sizeof(inter_lyr_mb_prms_t);
 1139|   131k|        s_ref_mb_mode.i4_num_element_stride = ps_svc_dec_ref_layer->u2_inter_lyr_mb_prms_stride;
 1140|       |
 1141|   131k|        i4_status = isvcd_ii_pred_compute_flags_mb(ps_svc_lyr_dec->pv_ii_pred_ctxt, &s_ref_mb_mode,
 1142|   131k|                                                   &s_mb_coord, ps_cur_mb_info, ps_svc_cur_mb_info,
 1143|   131k|                                                   &u1_ii_mb_mode);
 1144|       |
 1145|   131k|        if(OK != i4_status)
  ------------------
  |  |  114|   131k|#define OK        0
  ------------------
  |  Branch (1145:12): [True: 0, False: 131k]
  ------------------
 1146|      0|        {
 1147|      0|            return i4_status;
 1148|      0|        }
 1149|   131k|    }
 1150|       |
 1151|   131k|    if(SVC_INTRA_INTER_MB == u1_ii_mb_mode)
  ------------------
  |  |  118|   131k|#define SVC_INTRA_INTER_MB (1 << 4) /*!< Intra Inter MB */
  ------------------
  |  Branch (1151:8): [True: 14.6k, False: 117k]
  ------------------
 1152|  14.6k|    {
 1153|  14.6k|        i4_status = isvcd_process_ibl_mb(ps_svc_lyr_dec, ps_cur_mb_info, u1_mb_num, 1);
 1154|  14.6k|        if(OK != i4_status)
  ------------------
  |  |  114|  14.6k|#define OK        0
  ------------------
  |  Branch (1154:12): [True: 0, False: 14.6k]
  ------------------
 1155|      0|        {
 1156|      0|            return i4_status;
 1157|      0|        }
 1158|  14.6k|        isvcd_ii_pred_mb(ps_svc_lyr_dec, ps_cur_mb_info);
 1159|  14.6k|    }
 1160|   131k|    return OK;
  ------------------
  |  |  114|   131k|#define OK        0
  ------------------
 1161|   131k|}
isvcd_decode_recon_tfr_nmb_non_base_lyr:
 1177|  40.8k|{
 1178|  40.8k|    WORD32 i, j;
 1179|  40.8k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
 1180|  40.8k|    UWORD32 u1_end_of_row_next;
 1181|  40.8k|    dec_mb_info_t *ps_cur_mb_info;
 1182|  40.8k|    dec_svc_mb_info_t *ps_svc_cur_mb_info;
 1183|  40.8k|    UWORD16 *pu2_res_luma_csbp;
 1184|  40.8k|    const UWORD32 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
 1185|  40.8k|    const UWORD32 u1_slice_type = ps_dec->ps_cur_slice->u1_slice_type;
 1186|  40.8k|    const WORD32 u1_skip_th =
 1187|  40.8k|        ((u1_slice_type != I_SLICE) ? (ps_dec->u1_B ? B_8x8 : PRED_8x8R0) : -1);
  ------------------
  |  |  370|  40.8k|#define I_SLICE  2
  ------------------
                      ((u1_slice_type != I_SLICE) ? (ps_dec->u1_B ? B_8x8 : PRED_8x8R0) : -1);
  ------------------
  |  |  480|  12.7k|#define B_8x8    22
  ------------------
                      ((u1_slice_type != I_SLICE) ? (ps_dec->u1_B ? B_8x8 : PRED_8x8R0) : -1);
  ------------------
  |  |  454|  20.8k|#define PRED_8x8R0  4
  ------------------
  |  Branch (1187:10): [True: 33.5k, False: 7.29k]
  |  Branch (1187:40): [True: 12.7k, False: 20.8k]
  ------------------
 1188|  40.8k|    const UWORD32 u1_ipcm_th = ((u1_slice_type != I_SLICE) ? (ps_dec->u1_B ? 23 : 5) : 0);
  ------------------
  |  |  370|  40.8k|#define I_SLICE  2
  ------------------
  |  Branch (1188:33): [True: 33.5k, False: 7.29k]
  |  Branch (1188:63): [True: 12.7k, False: 20.8k]
  ------------------
 1189|  40.8k|    WORD32 ret = OK;
  ------------------
  |  |  114|  40.8k|#define OK        0
  ------------------
 1190|       |
 1191|  40.8k|    if(!((0 == ps_svc_lyr_dec->u1_base_res_flag) ||
  ------------------
  |  Branch (1191:10): [True: 40.8k, False: 0]
  ------------------
 1192|      0|         ((1 == ps_svc_lyr_dec->u1_base_res_flag) &&
  ------------------
  |  Branch (1192:11): [True: 0, False: 0]
  ------------------
 1193|      0|          (1 == ps_svc_lyr_dec->ps_nal_svc_ext->u1_no_inter_layer_pred_flag))))
  ------------------
  |  Branch (1193:11): [True: 0, False: 0]
  ------------------
 1194|      0|    {
 1195|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 1196|      0|    }
 1197|       |    /* N Mb MC Loop */
 1198|   181k|    for(i = u4_mb_idx; i < u4_num_mbs; i++)
  ------------------
  |  Branch (1198:24): [True: 140k, False: 40.8k]
  ------------------
 1199|   140k|    {
 1200|   140k|        ps_cur_mb_info = ps_dec->ps_nmb_info + i;
 1201|   140k|        ps_dec->u4_dma_buf_idx = 0;
 1202|   140k|        ps_dec->u4_pred_info_idx = 0;
 1203|       |
 1204|       |        /*Pointer assignment for Residual NNZ */
 1205|   140k|        pu2_res_luma_csbp = ps_svc_lyr_dec->pu2_frm_res_luma_csbp + ps_cur_mb_info->u2_mbx;
 1206|   140k|        pu2_res_luma_csbp += ps_cur_mb_info->u2_mby * ps_svc_lyr_dec->i4_frm_res_luma_csbp_stride;
 1207|       |
 1208|   140k|        if(ps_cur_mb_info->u1_mb_type <= u1_skip_th)
  ------------------
  |  Branch (1208:12): [True: 64.9k, False: 76.0k]
  ------------------
 1209|  64.9k|        {
 1210|  64.9k|            {
 1211|  64.9k|                WORD32 pred_cnt = 0;
 1212|  64.9k|                pred_info_pkd_t *ps_pred_pkd;
 1213|  64.9k|                UWORD32 u4_pred_info_pkd_idx;
 1214|       |
 1215|  64.9k|                u4_pred_info_pkd_idx = ps_cur_mb_info->u4_pred_info_pkd_idx;
 1216|       |
 1217|   178k|                while(pred_cnt < ps_cur_mb_info->u1_num_pred_parts)
  ------------------
  |  Branch (1217:23): [True: 113k, False: 64.9k]
  ------------------
 1218|   113k|                {
 1219|   113k|                    ps_pred_pkd = ps_dec->ps_pred_pkd + u4_pred_info_pkd_idx;
 1220|       |
 1221|   113k|                    ps_dec->p_form_mb_part_info(ps_pred_pkd, ps_dec, ps_cur_mb_info->u2_mbx,
 1222|   113k|                                                ps_cur_mb_info->u2_mby, (i >> u1_mbaff),
 1223|   113k|                                                ps_cur_mb_info);
 1224|   113k|                    u4_pred_info_pkd_idx++;
 1225|   113k|                    pred_cnt++;
 1226|   113k|                }
 1227|  64.9k|            }
 1228|  64.9k|            if(ps_svc_lyr_dec->u1_layer_identifier == TARGET_LAYER)
  ------------------
  |  |  110|  64.9k|#define TARGET_LAYER 2
  ------------------
  |  Branch (1228:16): [True: 64.9k, False: 0]
  ------------------
 1229|  64.9k|            {
 1230|  64.9k|                ps_dec->p_motion_compensate(ps_dec, ps_cur_mb_info);
 1231|  64.9k|            }
 1232|  64.9k|        }
 1233|  76.0k|        else if(ps_cur_mb_info->u1_mb_type == MB_SKIP)
  ------------------
  |  |   59|  76.0k|#define MB_SKIP 255
  ------------------
  |  Branch (1233:17): [True: 41.3k, False: 34.6k]
  ------------------
 1234|  41.3k|        {
 1235|  41.3k|            {
 1236|  41.3k|                WORD32 pred_cnt = 0;
 1237|  41.3k|                pred_info_pkd_t *ps_pred_pkd;
 1238|  41.3k|                UWORD32 u4_pred_info_pkd_idx;
 1239|       |
 1240|  41.3k|                u4_pred_info_pkd_idx = ps_cur_mb_info->u4_pred_info_pkd_idx;
 1241|       |
 1242|   103k|                while(pred_cnt < ps_cur_mb_info->u1_num_pred_parts)
  ------------------
  |  Branch (1242:23): [True: 62.0k, False: 41.3k]
  ------------------
 1243|  62.0k|                {
 1244|  62.0k|                    ps_pred_pkd = ps_dec->ps_pred_pkd + u4_pred_info_pkd_idx;
 1245|       |
 1246|  62.0k|                    ps_dec->p_form_mb_part_info(ps_pred_pkd, ps_dec, ps_cur_mb_info->u2_mbx,
 1247|  62.0k|                                                ps_cur_mb_info->u2_mby, (i >> u1_mbaff),
 1248|  62.0k|                                                ps_cur_mb_info);
 1249|       |
 1250|  62.0k|                    u4_pred_info_pkd_idx++;
 1251|  62.0k|                    pred_cnt++;
 1252|  62.0k|                }
 1253|  41.3k|            }
 1254|  41.3k|            if(ps_svc_lyr_dec->u1_layer_identifier == TARGET_LAYER)
  ------------------
  |  |  110|  41.3k|#define TARGET_LAYER 2
  ------------------
  |  Branch (1254:16): [True: 41.3k, False: 0]
  ------------------
 1255|  41.3k|            {
 1256|       |                /* Decode MB skip */
 1257|  41.3k|                ps_dec->p_motion_compensate(ps_dec, ps_cur_mb_info);
 1258|  41.3k|            }
 1259|       |
 1260|  41.3k|            *pu2_res_luma_csbp = 0;
 1261|  41.3k|            ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb =
 1262|  41.3k|                ps_svc_lyr_dec->ps_inter_lyr_mb_prms_frm_start + ps_cur_mb_info->u2_mbx +
 1263|  41.3k|                (ps_svc_lyr_dec->u2_inter_lyr_mb_prms_stride * (ps_cur_mb_info->u2_mby));
 1264|  41.3k|            ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->i1_mb_mode = SVC_INTER_MB;
  ------------------
  |  |  114|  41.3k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
 1265|  41.3k|            ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->i1_tx_size =
 1266|  41.3k|                ps_cur_mb_info->u1_tran_form8x8;
 1267|  41.3k|            ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->u2_luma_nnz = 0;
 1268|  41.3k|            ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->u1_chroma_nnz = 0;
 1269|  41.3k|        }
 1270|   140k|    }
 1271|       |
 1272|       |    /* N Mb IQ IT RECON  Loop */
 1273|   181k|    for(j = u4_mb_idx; j < i; j++)
  ------------------
  |  Branch (1273:24): [True: 140k, False: 40.8k]
  ------------------
 1274|   140k|    {
 1275|   140k|        ps_cur_mb_info = ps_dec->ps_nmb_info + j;
 1276|   140k|        ps_svc_cur_mb_info = ps_svc_lyr_dec->ps_svc_nmb_info + j;
 1277|       |
 1278|   140k|        ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb =
 1279|   140k|            ps_svc_lyr_dec->ps_inter_lyr_mb_prms_frm_start + ps_cur_mb_info->u2_mbx +
 1280|   140k|            (ps_svc_lyr_dec->u2_inter_lyr_mb_prms_stride * (ps_cur_mb_info->u2_mby));
 1281|       |
 1282|   140k|        ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->i1_slice_id = (WORD8) ps_dec->u2_cur_slice_num;
 1283|       |
 1284|       |        /*Pointer assignment for Residual NNZ */
 1285|   140k|        pu2_res_luma_csbp = ps_svc_lyr_dec->pu2_frm_res_luma_csbp + ps_cur_mb_info->u2_mbx;
 1286|   140k|        pu2_res_luma_csbp += ps_cur_mb_info->u2_mby * ps_svc_lyr_dec->i4_frm_res_luma_csbp_stride;
 1287|       |
 1288|   140k|        if(ps_cur_mb_info->u1_mb_type <= u1_skip_th)
  ------------------
  |  Branch (1288:12): [True: 64.9k, False: 76.0k]
  ------------------
 1289|  64.9k|        {
 1290|  64.9k|            if(ps_svc_lyr_dec->u1_layer_identifier == TARGET_LAYER)
  ------------------
  |  |  110|  64.9k|#define TARGET_LAYER 2
  ------------------
  |  Branch (1290:16): [True: 64.9k, False: 0]
  ------------------
 1291|  64.9k|            {
 1292|       |                /* inter intra pred generation */
 1293|  64.9k|                if(SVCD_FALSE == ps_svc_lyr_dec->u1_dyadic_flag)
  ------------------
  |  |   45|  64.9k|#define SVCD_FALSE 0
  ------------------
  |  Branch (1293:20): [True: 30.0k, False: 34.9k]
  ------------------
 1294|  30.0k|                {
 1295|  30.0k|                    ret =
 1296|  30.0k|                        isvcd_process_ii_mb(ps_svc_lyr_dec, ps_cur_mb_info, ps_svc_cur_mb_info, j);
 1297|  30.0k|                    if(ret != OK) return ret;
  ------------------
  |  |  114|  30.0k|#define OK        0
  ------------------
  |  Branch (1297:24): [True: 0, False: 30.0k]
  ------------------
 1298|  30.0k|                }
 1299|  64.9k|                if(0 == ps_svc_cur_mb_info->u1_residual_prediction_flag)
  ------------------
  |  Branch (1299:20): [True: 23.8k, False: 41.0k]
  ------------------
 1300|  23.8k|                {
 1301|       |                    // IT + Recon
 1302|  23.8k|                    ih264d_process_inter_mb(ps_dec, ps_cur_mb_info, j);
 1303|  23.8k|                    isvcd_update_inter_mb_inter_layer_info(ps_svc_lyr_dec, ps_cur_mb_info, 0);
 1304|  23.8k|                    *pu2_res_luma_csbp = ps_cur_mb_info->u2_luma_csbp;
 1305|  23.8k|                }
 1306|  41.0k|                else
 1307|  41.0k|                {
 1308|       |                    // IT + Residual + Recon
 1309|  41.0k|                    ret = isvcd_process_inter_mb_rsd_pred_target_lyr(ps_svc_lyr_dec, ps_cur_mb_info,
 1310|  41.0k|                                                                     j, 0, pu2_res_luma_csbp);
 1311|  41.0k|                    if(ret != OK) return ret;
  ------------------
  |  |  114|  41.0k|#define OK        0
  ------------------
  |  Branch (1311:24): [True: 0, False: 41.0k]
  ------------------
 1312|  41.0k|                }
 1313|  64.9k|            }
 1314|      0|            else if(ps_svc_lyr_dec->u1_layer_identifier == MEDIAL_ENHANCEMENT_LAYER)
  ------------------
  |  |  109|      0|#define MEDIAL_ENHANCEMENT_LAYER 1
  ------------------
  |  Branch (1314:21): [True: 0, False: 0]
  ------------------
 1315|      0|            {
 1316|      0|                if(0 == ps_svc_cur_mb_info->u1_residual_prediction_flag)
  ------------------
  |  Branch (1316:20): [True: 0, False: 0]
  ------------------
 1317|      0|                {
 1318|       |                    // IT : to be consumed by Target
 1319|      0|                    ret = isvcd_process_inter_mb_no_rsd_pred_non_target(ps_svc_lyr_dec,
 1320|      0|                                                                        ps_cur_mb_info, 0);
 1321|      0|                    *pu2_res_luma_csbp = ps_cur_mb_info->u2_luma_csbp;
 1322|      0|                    if(ret != OK) return ret;
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
  |  Branch (1322:24): [True: 0, False: 0]
  ------------------
 1323|      0|                }
 1324|      0|                else
 1325|      0|                {
 1326|       |                    // IT + Residual : to be consumed by target
 1327|      0|                    ret = isvcd_process_inter_mb_rsd_pred_non_target(ps_svc_lyr_dec, ps_cur_mb_info,
 1328|      0|                                                                     0, pu2_res_luma_csbp);
 1329|      0|                    if(ret != OK) return ret;
  ------------------
  |  |  114|      0|#define OK        0
  ------------------
  |  Branch (1329:24): [True: 0, False: 0]
  ------------------
 1330|      0|                }
 1331|      0|            }
 1332|      0|            else
 1333|      0|            {
 1334|      0|                return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 1335|      0|            }
 1336|  64.9k|        }
 1337|  76.0k|        else if((ps_cur_mb_info->u1_mb_type != MB_SKIP) && (ps_cur_mb_info->u1_mb_type != MB_INFER))
  ------------------
  |  |   59|  76.0k|#define MB_SKIP 255
  ------------------
                      else if((ps_cur_mb_info->u1_mb_type != MB_SKIP) && (ps_cur_mb_info->u1_mb_type != MB_INFER))
  ------------------
  |  |  112|  34.6k|#define MB_INFER 250
  ------------------
  |  Branch (1337:17): [True: 34.6k, False: 41.3k]
  |  Branch (1337:60): [True: 8.16k, False: 26.4k]
  ------------------
 1338|  8.16k|        {
 1339|  8.16k|            if((u1_ipcm_th + 25) != ps_cur_mb_info->u1_mb_type)
  ------------------
  |  Branch (1339:16): [True: 8.00k, False: 152]
  ------------------
 1340|  8.00k|            {
 1341|  8.00k|                ps_cur_mb_info->u1_mb_type -= (u1_skip_th + 1);
 1342|  8.00k|                ih264d_process_intra_mb(ps_dec, ps_cur_mb_info, j);
 1343|  8.00k|                isvcd_update_intra_mb_inter_layer_info(ps_svc_lyr_dec, ps_cur_mb_info);
 1344|  8.00k|            }
 1345|    152|            else
 1346|    152|            {
 1347|    152|                isvcd_update_ipcm_mb_inter_layer_info(ps_svc_lyr_dec, ps_cur_mb_info);
 1348|    152|            }
 1349|  8.16k|            *pu2_res_luma_csbp = 0;
 1350|  8.16k|        }
 1351|  67.8k|        else if(ps_cur_mb_info->u1_mb_type == MB_INFER)
  ------------------
  |  |  112|  67.8k|#define MB_INFER 250
  ------------------
  |  Branch (1351:17): [True: 26.4k, False: 41.3k]
  ------------------
 1352|  26.4k|        {
 1353|       |            /* inter layer intra prediction : intra upsample, IQ, IT ,deblock */
 1354|       |            /* Intra resample for IBL mode */
 1355|  26.4k|            ret = isvcd_process_ibl_mb(ps_svc_lyr_dec, ps_cur_mb_info, j, 0);
 1356|  26.4k|            if(ret != OK) return ret;
  ------------------
  |  |  114|  26.4k|#define OK        0
  ------------------
  |  Branch (1356:16): [True: 0, False: 26.4k]
  ------------------
 1357|  26.4k|            ih264d_process_inter_mb(ps_dec, ps_cur_mb_info, j);
 1358|  26.4k|            isvcd_update_inter_mb_inter_layer_info(ps_svc_lyr_dec, ps_cur_mb_info, 1);
 1359|  26.4k|            *pu2_res_luma_csbp = ps_cur_mb_info->u2_luma_csbp;
 1360|       |
 1361|  26.4k|            ps_dec->pi1_left_pred_mode[0] = DC;
  ------------------
  |  |  431|  26.4k|#define DC      2
  ------------------
 1362|  26.4k|            ps_dec->pi1_left_pred_mode[1] = DC;
  ------------------
  |  |  431|  26.4k|#define DC      2
  ------------------
 1363|  26.4k|            ps_dec->pi1_left_pred_mode[2] = DC;
  ------------------
  |  |  431|  26.4k|#define DC      2
  ------------------
 1364|  26.4k|            ps_dec->pi1_left_pred_mode[3] = DC;
  ------------------
  |  |  431|  26.4k|#define DC      2
  ------------------
 1365|       |
 1366|  26.4k|            ps_cur_mb_info->ps_curmb->pi1_intrapredmodes[0] = DC;
  ------------------
  |  |  431|  26.4k|#define DC      2
  ------------------
 1367|  26.4k|            ps_cur_mb_info->ps_curmb->pi1_intrapredmodes[1] = DC;
  ------------------
  |  |  431|  26.4k|#define DC      2
  ------------------
 1368|  26.4k|            ps_cur_mb_info->ps_curmb->pi1_intrapredmodes[2] = DC;
  ------------------
  |  |  431|  26.4k|#define DC      2
  ------------------
 1369|  26.4k|            ps_cur_mb_info->ps_curmb->pi1_intrapredmodes[3] = DC;
  ------------------
  |  |  431|  26.4k|#define DC      2
  ------------------
 1370|       |
 1371|  26.4k|            isvcd_update_ibl_mb_inter_layer_info(ps_svc_lyr_dec, ps_cur_mb_info);
 1372|  26.4k|        }
 1373|       |
 1374|   140k|        if(ps_svc_lyr_dec->u1_layer_identifier == TARGET_LAYER)
  ------------------
  |  |  110|   140k|#define TARGET_LAYER 2
  ------------------
  |  Branch (1374:12): [True: 140k, False: 0]
  ------------------
 1375|   140k|        {
 1376|   140k|            if(ps_dec->u4_num_cores < 3)
  ------------------
  |  Branch (1376:16): [True: 140k, False: 0]
  ------------------
 1377|   140k|            {
 1378|   140k|                if(ps_dec->u4_app_disable_deblk_frm == 0)
  ------------------
  |  Branch (1378:20): [True: 140k, False: 0]
  ------------------
 1379|   140k|                    ps_svc_lyr_dec->pf_svc_compute_bs(ps_svc_lyr_dec, ps_cur_mb_info,
 1380|   140k|                                                      (UWORD16) (j >> u1_mbaff));
 1381|   140k|            }
 1382|   140k|        }
 1383|      0|        else if(ps_svc_lyr_dec->u1_layer_identifier == MEDIAL_ENHANCEMENT_LAYER)
  ------------------
  |  |  109|      0|#define MEDIAL_ENHANCEMENT_LAYER 1
  ------------------
  |  Branch (1383:17): [True: 0, False: 0]
  ------------------
 1384|      0|        {
 1385|      0|            if(ps_dec->u4_num_cores < 3)
  ------------------
  |  Branch (1385:16): [True: 0, False: 0]
  ------------------
 1386|      0|            {
 1387|      0|                if(ps_dec->u4_app_disable_deblk_frm == 0)
  ------------------
  |  Branch (1387:20): [True: 0, False: 0]
  ------------------
 1388|      0|                    ps_svc_lyr_dec->pf_svc_compute_bs(ps_svc_lyr_dec, ps_cur_mb_info,
 1389|      0|                                                      (UWORD16) (j >> u1_mbaff));
 1390|      0|            }
 1391|      0|        }
 1392|       |
 1393|   140k|        if(ps_dec->u4_use_intrapred_line_copy)
  ------------------
  |  Branch (1393:12): [True: 140k, False: 0]
  ------------------
 1394|   140k|        {
 1395|   140k|            ih264d_copy_intra_pred_line(ps_dec, ps_cur_mb_info, j);
 1396|   140k|        }
 1397|   140k|    }
 1398|       |
 1399|       |    /*MB deblocking*/
 1400|  40.8k|    if(ps_dec->u4_nmb_deblk == 1)
  ------------------
  |  Branch (1400:8): [True: 40.8k, False: 0]
  ------------------
 1401|  40.8k|    {
 1402|  40.8k|        UWORD32 u4_wd_y, u4_wd_uv;
 1403|  40.8k|        tfr_ctxt_t *ps_tfr_cxt = &(ps_dec->s_tran_addrecon);
 1404|  40.8k|        UWORD8 u1_field_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag;
 1405|  40.8k|        const WORD32 i4_cb_qp_idx_ofst = ps_dec->ps_cur_pps->i1_chroma_qp_index_offset;
 1406|  40.8k|        const WORD32 i4_cr_qp_idx_ofst = ps_dec->ps_cur_pps->i1_second_chroma_qp_index_offset;
 1407|       |
 1408|  40.8k|        u4_wd_y = ps_dec->u2_frm_wd_y << u1_field_pic_flag;
 1409|  40.8k|        u4_wd_uv = ps_dec->u2_frm_wd_uv << u1_field_pic_flag;
 1410|       |
 1411|  40.8k|        ps_cur_mb_info = ps_dec->ps_nmb_info + u4_mb_idx;
 1412|       |
 1413|  40.8k|        ps_dec->u4_deblk_mb_x = ps_cur_mb_info->u2_mbx;
 1414|  40.8k|        ps_dec->u4_deblk_mb_y = ps_cur_mb_info->u2_mby;
 1415|       |
 1416|   181k|        for(j = u4_mb_idx; j < i; j++)
  ------------------
  |  Branch (1416:28): [True: 140k, False: 40.8k]
  ------------------
 1417|   140k|        {
 1418|   140k|            if(ps_dec->u4_cur_deblk_mb_num > ps_dec->ps_cur_sps->u4_max_mb_addr)
  ------------------
  |  Branch (1418:16): [True: 0, False: 140k]
  ------------------
 1419|      0|            {
 1420|      0|                return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 1421|      0|            }
 1422|   140k|            ih264d_deblock_mb_nonmbaff(ps_dec, ps_tfr_cxt, i4_cb_qp_idx_ofst, i4_cr_qp_idx_ofst,
 1423|   140k|                                       u4_wd_y, u4_wd_uv);
 1424|   140k|        }
 1425|  40.8k|    }
 1426|       |
 1427|  40.8k|    if(u4_tfr_n_mb)
  ------------------
  |  Branch (1427:8): [True: 40.8k, False: 0]
  ------------------
 1428|  40.8k|    {
 1429|       |        /****************************************************************/
 1430|       |        /* Check for End Of Row in Next iteration                       */
 1431|       |        /****************************************************************/
 1432|  40.8k|        u1_end_of_row_next =
 1433|  40.8k|            u4_num_mbs_next && (u4_num_mbs_next <= (ps_dec->u4_recon_mb_grp >> u1_mbaff));
  ------------------
  |  Branch (1433:13): [True: 1.66k, False: 39.1k]
  |  Branch (1433:32): [True: 1.66k, False: 0]
  ------------------
 1434|       |
 1435|       |        /****************************************************************/
 1436|       |        /* Transfer the Following things                                */
 1437|       |        /* N-Mb DeblkParams Data    ( To Ext DeblkParams Buffer )       */
 1438|       |        /* N-Mb Recon Data          ( To Ext Frame Buffer )             */
 1439|       |        /* N-Mb Intrapredline Data  ( Updated Internally)               */
 1440|       |        /* N-Mb MV Data             ( To Ext MV Buffer )                */
 1441|       |        /* N-Mb MVTop/TopRight Data ( To Int MV Top Scratch Buffers)    */
 1442|       |        /****************************************************************/
 1443|  40.8k|        ih264d_transfer_mb_group_data(ps_dec, u4_num_mbs, u4_end_of_row, u1_end_of_row_next);
 1444|  40.8k|        ps_dec->u4_num_mbs_prev_nmb = u4_num_mbs;
 1445|  40.8k|        ps_dec->u4_pred_info_idx = 0;
 1446|  40.8k|        ps_dec->u4_dma_buf_idx = 0;
 1447|  40.8k|    }
 1448|  40.8k|    return OK;
  ------------------
  |  |  114|  40.8k|#define OK        0
  ------------------
 1449|  40.8k|}
isvcd_decode_recon_tfr_nmb_base_lyr:
 1464|   257k|{
 1465|   257k|    WORD32 j;
 1466|   257k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
 1467|   257k|    UWORD32 u1_end_of_row_next;
 1468|   257k|    dec_mb_info_t *ps_cur_mb_info;
 1469|   257k|    const UWORD32 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
 1470|   257k|    const UWORD32 u1_slice_type = ps_dec->ps_cur_slice->u1_slice_type;
 1471|   257k|    const WORD32 u1_skip_th =
 1472|   257k|        ((u1_slice_type != I_SLICE) ? (ps_dec->u1_B ? B_8x8 : PRED_8x8R0) : -1);
  ------------------
  |  |  370|   257k|#define I_SLICE  2
  ------------------
                      ((u1_slice_type != I_SLICE) ? (ps_dec->u1_B ? B_8x8 : PRED_8x8R0) : -1);
  ------------------
  |  |  480|  38.5k|#define B_8x8    22
  ------------------
                      ((u1_slice_type != I_SLICE) ? (ps_dec->u1_B ? B_8x8 : PRED_8x8R0) : -1);
  ------------------
  |  |  454|   216k|#define PRED_8x8R0  4
  ------------------
  |  Branch (1472:10): [True: 255k, False: 2.23k]
  |  Branch (1472:40): [True: 38.5k, False: 216k]
  ------------------
 1473|   257k|    const UWORD32 u1_ipcm_th = ((u1_slice_type != I_SLICE) ? (ps_dec->u1_B ? 23 : 5) : 0);
  ------------------
  |  |  370|   257k|#define I_SLICE  2
  ------------------
  |  Branch (1473:33): [True: 255k, False: 2.23k]
  |  Branch (1473:63): [True: 38.5k, False: 216k]
  ------------------
 1474|   257k|    WORD32 ret = OK;
  ------------------
  |  |  114|   257k|#define OK        0
  ------------------
 1475|       |
 1476|   257k|    if(1 != ps_svc_lyr_dec->u1_base_res_flag)
  ------------------
  |  Branch (1476:8): [True: 0, False: 257k]
  ------------------
 1477|      0|    {
 1478|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 1479|      0|    }
 1480|   257k|    if(ps_svc_lyr_dec->u1_layer_identifier == TARGET_LAYER)
  ------------------
  |  |  110|   257k|#define TARGET_LAYER 2
  ------------------
  |  Branch (1480:8): [True: 0, False: 257k]
  ------------------
 1481|      0|    {
 1482|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 1483|      0|    }
 1484|       |
 1485|       |    /* N Mb IQ IT + Residual Store for Inter / + Recon for Intra Loop */
 1486|  1.35M|    for(j = u1_mb_idx; j < u4_num_mbs; j++)
  ------------------
  |  Branch (1486:24): [True: 1.10M, False: 257k]
  ------------------
 1487|  1.10M|    {
 1488|  1.10M|        ps_dec->u4_dma_buf_idx = 0;
 1489|  1.10M|        ps_dec->u4_pred_info_idx = 0;
 1490|  1.10M|        ps_cur_mb_info = ps_dec->ps_nmb_info + j;
 1491|       |
 1492|  1.10M|        ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb =
 1493|  1.10M|            ps_svc_lyr_dec->ps_inter_lyr_mb_prms_frm_start + ps_cur_mb_info->u2_mbx +
 1494|  1.10M|            (ps_svc_lyr_dec->u2_inter_lyr_mb_prms_stride * (ps_cur_mb_info->u2_mby));
 1495|       |
 1496|  1.10M|        ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->i1_slice_id = (WORD8) ps_dec->u2_cur_slice_num;
 1497|       |
 1498|  1.10M|        if(ps_cur_mb_info->u1_mb_type == MB_SKIP)
  ------------------
  |  |   59|  1.10M|#define MB_SKIP 255
  ------------------
  |  Branch (1498:12): [True: 996k, False: 103k]
  ------------------
 1499|   996k|        {
 1500|   996k|            ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->i1_mb_mode = SVC_INTER_MB;
  ------------------
  |  |  114|   996k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
 1501|   996k|            ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->i1_tx_size =
 1502|   996k|                ps_cur_mb_info->u1_tran_form8x8;
 1503|   996k|            ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->u2_luma_nnz = 0;
 1504|   996k|            ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->u1_chroma_nnz = 0;
 1505|   996k|        }
 1506|   103k|        else if(ps_cur_mb_info->u1_mb_type <= u1_skip_th)
  ------------------
  |  Branch (1506:17): [True: 79.7k, False: 24.2k]
  ------------------
 1507|  79.7k|        {
 1508|       |            /* Only IT : Store Residual (WORD16) for Higher Layers : Base layer*/
 1509|  79.7k|            ret = isvcd_process_inter_mb_no_rsd_pred_non_target(ps_svc_lyr_dec, ps_cur_mb_info, 0);
 1510|  79.7k|            if(ret != OK) return ret;
  ------------------
  |  |  114|  79.7k|#define OK        0
  ------------------
  |  Branch (1510:16): [True: 0, False: 79.7k]
  ------------------
 1511|  79.7k|        }
 1512|  24.2k|        else if(ps_cur_mb_info->u1_mb_type != MB_SKIP)
  ------------------
  |  |   59|  24.2k|#define MB_SKIP 255
  ------------------
  |  Branch (1512:17): [True: 24.2k, False: 0]
  ------------------
 1513|  24.2k|        {
 1514|  24.2k|            if((u1_ipcm_th + 25) != ps_cur_mb_info->u1_mb_type)
  ------------------
  |  Branch (1514:16): [True: 23.5k, False: 701]
  ------------------
 1515|  23.5k|            {
 1516|  23.5k|                ps_cur_mb_info->u1_mb_type -= (u1_skip_th + 1);
 1517|  23.5k|                ih264d_process_intra_mb(ps_dec, ps_cur_mb_info, j);
 1518|  23.5k|                isvcd_update_intra_mb_inter_layer_info(ps_svc_lyr_dec, ps_cur_mb_info);
 1519|  23.5k|            }
 1520|    701|            else
 1521|    701|            {
 1522|    701|                isvcd_update_ipcm_mb_inter_layer_info(ps_svc_lyr_dec, ps_cur_mb_info);
 1523|    701|            }
 1524|  24.2k|        }
 1525|       |
 1526|  1.10M|        if(ps_dec->u4_use_intrapred_line_copy)
  ------------------
  |  Branch (1526:12): [True: 1.10M, False: 0]
  ------------------
 1527|  1.10M|        {
 1528|  1.10M|            ih264d_copy_intra_pred_line(ps_dec, ps_cur_mb_info, j);
 1529|  1.10M|        }
 1530|  1.10M|    }
 1531|       |
 1532|       |    /*MB deblocking*/
 1533|   257k|    if(ps_dec->u4_nmb_deblk == 1)
  ------------------
  |  Branch (1533:8): [True: 257k, False: 0]
  ------------------
 1534|   257k|    {
 1535|   257k|        UWORD32 u4_wd_y, u4_wd_uv;
 1536|   257k|        tfr_ctxt_t *ps_tfr_cxt = &(ps_dec->s_tran_addrecon);
 1537|   257k|        UWORD8 u1_field_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag;
 1538|   257k|        const WORD32 i4_cb_qp_idx_ofst = ps_dec->ps_cur_pps->i1_chroma_qp_index_offset;
 1539|   257k|        const WORD32 i4_cr_qp_idx_ofst = ps_dec->ps_cur_pps->i1_second_chroma_qp_index_offset;
 1540|       |
 1541|   257k|        u4_wd_y = ps_dec->u2_frm_wd_y << u1_field_pic_flag;
 1542|   257k|        u4_wd_uv = ps_dec->u2_frm_wd_uv << u1_field_pic_flag;
 1543|       |
 1544|   257k|        ps_cur_mb_info = ps_dec->ps_nmb_info + u1_mb_idx;
 1545|       |
 1546|   257k|        ps_dec->u4_deblk_mb_x = ps_cur_mb_info->u2_mbx;
 1547|   257k|        ps_dec->u4_deblk_mb_y = ps_cur_mb_info->u2_mby;
 1548|       |
 1549|  1.35M|        for(j = u1_mb_idx; j < u4_num_mbs; j++)
  ------------------
  |  Branch (1549:28): [True: 1.10M, False: 257k]
  ------------------
 1550|  1.10M|        {
 1551|       |            /* IN SVC base layers only intra MB's Need to be deblocked*/
 1552|  1.10M|            deblk_mb_t *ps_top_mb, *ps_left_mb, *ps_cur_mb;
 1553|  1.10M|            ps_cur_mb = ps_dec->ps_cur_deblk_mb;
 1554|  1.10M|            if(!(ps_cur_mb->u1_deblocking_mode & MB_DISABLE_FILTERING))
  ------------------
  |  |   70|  1.10M|#define MB_DISABLE_FILTERING          0x01
  ------------------
  |  Branch (1554:16): [True: 24.2k, False: 1.07M]
  ------------------
 1555|  24.2k|            {
 1556|  24.2k|                if(ps_dec->u4_deblk_mb_x)
  ------------------
  |  Branch (1556:20): [True: 13.2k, False: 10.9k]
  ------------------
 1557|  13.2k|                {
 1558|  13.2k|                    ps_left_mb = ps_cur_mb - 1;
 1559|  13.2k|                }
 1560|  10.9k|                else
 1561|  10.9k|                {
 1562|  10.9k|                    ps_left_mb = NULL;
 1563|  10.9k|                }
 1564|  24.2k|                if(ps_dec->u4_deblk_mb_y != 0)
  ------------------
  |  Branch (1564:20): [True: 17.7k, False: 6.49k]
  ------------------
 1565|  17.7k|                {
 1566|  17.7k|                    ps_top_mb = ps_cur_mb - (ps_dec->u2_frm_wd_in_mbs);
 1567|  17.7k|                }
 1568|  6.49k|                else
 1569|  6.49k|                {
 1570|  6.49k|                    ps_top_mb = NULL;
 1571|  6.49k|                }
 1572|       |
 1573|  24.2k|                if(ps_cur_mb->u1_deblocking_mode & MB_DISABLE_LEFT_EDGE) ps_left_mb = NULL;
  ------------------
  |  |   72|  24.2k|#define MB_DISABLE_LEFT_EDGE          0x04
  ------------------
  |  Branch (1573:20): [True: 1.08k, False: 23.1k]
  ------------------
 1574|  24.2k|                if(ps_cur_mb->u1_deblocking_mode & MB_DISABLE_TOP_EDGE) ps_top_mb = NULL;
  ------------------
  |  |   71|  24.2k|#define MB_DISABLE_TOP_EDGE           0x02
  ------------------
  |  Branch (1574:20): [True: 902, False: 23.3k]
  ------------------
 1575|       |
 1576|       |                /* Top Horizontal Edge*/
 1577|  24.2k|                if(NULL != ps_top_mb)
  ------------------
  |  Branch (1577:20): [True: 17.7k, False: 6.49k]
  ------------------
 1578|  17.7k|                {
 1579|  17.7k|                    if(!(ps_top_mb->u1_mb_type & D_INTRA_MB))
  ------------------
  |  |  382|  17.7k|#define D_INTRA_MB        1
  ------------------
  |  Branch (1579:24): [True: 11.9k, False: 5.75k]
  ------------------
 1580|  11.9k|                    {
 1581|  11.9k|                        ps_cur_mb->u4_bs_table[0] = 0;
 1582|  11.9k|                    }
 1583|  17.7k|                }
 1584|  6.49k|                else
 1585|  6.49k|                {
 1586|  6.49k|                    ps_cur_mb->u4_bs_table[0] = 0;
 1587|  6.49k|                }
 1588|       |
 1589|       |                /* Left Vertical Edge*/
 1590|  24.2k|                if(NULL != ps_left_mb)
  ------------------
  |  Branch (1590:20): [True: 13.2k, False: 10.9k]
  ------------------
 1591|  13.2k|                {
 1592|  13.2k|                    if(!(ps_left_mb->u1_mb_type & D_INTRA_MB))
  ------------------
  |  |  382|  13.2k|#define D_INTRA_MB        1
  ------------------
  |  Branch (1592:24): [True: 7.86k, False: 5.34k]
  ------------------
 1593|  7.86k|                    {
 1594|  7.86k|                        ps_cur_mb->u4_bs_table[4] = 0;
 1595|  7.86k|                    }
 1596|  13.2k|                }
 1597|  10.9k|                else
 1598|  10.9k|                {
 1599|  10.9k|                    ps_cur_mb->u4_bs_table[4] = 0;
 1600|  10.9k|                }
 1601|  24.2k|            }
 1602|       |
 1603|  1.10M|            if(ps_dec->u4_cur_deblk_mb_num > ps_dec->ps_cur_sps->u4_max_mb_addr)
  ------------------
  |  Branch (1603:16): [True: 0, False: 1.10M]
  ------------------
 1604|      0|            {
 1605|      0|                return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 1606|      0|            }
 1607|       |
 1608|  1.10M|            ih264d_deblock_mb_nonmbaff(ps_dec, ps_tfr_cxt, i4_cb_qp_idx_ofst, i4_cr_qp_idx_ofst,
 1609|  1.10M|                                       u4_wd_y, u4_wd_uv);
 1610|  1.10M|        }
 1611|   257k|    }
 1612|       |
 1613|   257k|    if(u4_tfr_n_mb)
  ------------------
  |  Branch (1613:8): [True: 257k, False: 0]
  ------------------
 1614|   257k|    {
 1615|       |        /****************************************************************/
 1616|       |        /* Check for End Of Row in Next iteration                       */
 1617|       |        /****************************************************************/
 1618|   257k|        u1_end_of_row_next =
 1619|   257k|            u4_num_mbs_next && (u4_num_mbs_next <= (ps_dec->u4_recon_mb_grp >> u1_mbaff));
  ------------------
  |  Branch (1619:13): [True: 15.5k, False: 242k]
  |  Branch (1619:32): [True: 15.5k, False: 0]
  ------------------
 1620|       |
 1621|       |        /****************************************************************/
 1622|       |        /* Transfer the Following things                                */
 1623|       |        /* N-Mb DeblkParams Data    ( To Ext DeblkParams Buffer )       */
 1624|       |        /* N-Mb Recon Data          ( To Ext Frame Buffer )             */
 1625|       |        /* N-Mb Intrapredline Data  ( Updated Internally)               */
 1626|       |        /* N-Mb MV Data             ( To Ext MV Buffer )                */
 1627|       |        /* N-Mb MVTop/TopRight Data ( To Int MV Top Scratch Buffers)    */
 1628|       |        /****************************************************************/
 1629|   257k|        ih264d_transfer_mb_group_data(ps_dec, u4_num_mbs, u4_end_of_row, u1_end_of_row_next);
 1630|   257k|        ps_dec->u4_num_mbs_prev_nmb = u4_num_mbs;
 1631|   257k|        ps_dec->u4_pred_info_idx = 0;
 1632|   257k|        ps_dec->u4_dma_buf_idx = 0;
 1633|   257k|    }
 1634|   257k|    return OK;
  ------------------
  |  |  114|   257k|#define OK        0
  ------------------
 1635|   257k|}
isvcd_process_ibl_mb:
 1650|  90.8k|{
 1651|  90.8k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
 1652|  90.8k|    intra_sampling_ctxt_t *ps_ctxt;
 1653|  90.8k|    svc_dec_lyr_struct_t *ps_svc_dec_ref_layer;
 1654|  90.8k|    pic_buffer_t *ps_frame_buf = ps_dec->ps_cur_pic;
 1655|  90.8k|    pic_buffer_t *ps_frame_buf_ref_layer;
 1656|  90.8k|    intra_samp_lyr_ctxt *ps_lyr_ctxt;
 1657|  90.8k|    mem_element_t s_ref_mb_mode = {0};
 1658|  90.8k|    mem_element_t s_inp_luma = {0};
 1659|  90.8k|    mem_element_t s_inp_chroma = {0};
 1660|  90.8k|    mem_element_t s_out_luma = {0};
 1661|  90.8k|    mem_element_t s_out_chroma = {0};
 1662|  90.8k|    WORD32 i4_ref_x_luma, i4_ref_y_luma, i4_luma_incr = 0;
 1663|  90.8k|    WORD32 i4_ref_x_chroma, i4_ref_y_chroma, i4_chroma_incr = 0;
 1664|  90.8k|    UWORD32 u4_cur_y_stride, u4_cur_uv_stride;
 1665|  90.8k|    UWORD32 u4_ref_y_stride, u4_ref_uv_stride;
 1666|  90.8k|    WORD32 i4_ref_luma_instra_sample_correction_offset = 0;
 1667|  90.8k|    WORD32 i4_ref_chroma_instra_sample_correction_offset = 0;
 1668|  90.8k|    ref_mb_map_t *ps_x_off_len_luma;
 1669|  90.8k|    ref_mb_map_t *ps_y_off_len_luma;
 1670|  90.8k|    ref_mb_map_t *ps_x_off_len_chroma;
 1671|  90.8k|    ref_mb_map_t *ps_y_off_len_chroma;
 1672|  90.8k|    mb_coord_t s_mb_coord = {0};
 1673|  90.8k|    WORD32 ret = OK;
  ------------------
  |  |  114|  90.8k|#define OK        0
  ------------------
 1674|  90.8k|    UNUSED(u4_mb_num);
  ------------------
  |  |   45|  90.8k|#define UNUSED(x) ((void)(x))
  ------------------
 1675|       |
 1676|  90.8k|    ps_ctxt = (intra_sampling_ctxt_t *) ps_svc_lyr_dec->pv_intra_sample_ctxt;
 1677|  90.8k|    ps_svc_dec_ref_layer = ps_svc_lyr_dec->ps_dec_svc_ref_layer;
 1678|       |
 1679|  90.8k|    ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id];
 1680|  90.8k|    u4_cur_y_stride = ps_dec->u2_frm_wd_y;
 1681|  90.8k|    u4_cur_uv_stride = ps_dec->u2_frm_wd_uv;
 1682|  90.8k|    u4_ref_y_stride = ps_svc_dec_ref_layer->s_dec.u2_frm_wd_y;
 1683|  90.8k|    u4_ref_uv_stride = ps_svc_dec_ref_layer->s_dec.u2_frm_wd_uv;
 1684|       |
 1685|  90.8k|    ps_frame_buf_ref_layer = ps_svc_dec_ref_layer->s_dec.ps_cur_pic;
 1686|  90.8k|    if(0 == u1_inter_intra_mode)
  ------------------
  |  Branch (1686:8): [True: 76.1k, False: 14.6k]
  ------------------
 1687|  76.1k|    {
 1688|  76.1k|        s_out_luma.pv_buffer = ps_frame_buf->pu1_buf1 + (ps_cur_mb_info->u2_mbx << 4) +
 1689|  76.1k|                               (u4_cur_y_stride * (ps_cur_mb_info->u2_mby << 4));
 1690|  76.1k|        s_out_luma.i4_element_size = 1;
 1691|  76.1k|        s_out_luma.i4_num_element_stride = u4_cur_y_stride;
 1692|       |
 1693|  76.1k|        s_out_chroma.pv_buffer = ps_frame_buf->pu1_buf2 +
 1694|  76.1k|                                 (ps_cur_mb_info->u2_mbx << 3) * YUV420SP_FACTOR +
  ------------------
  |  |  119|  76.1k|#define YUV420SP_FACTOR 2
  ------------------
 1695|  76.1k|                                 (u4_cur_uv_stride * (ps_cur_mb_info->u2_mby << 3));
 1696|  76.1k|        s_out_chroma.i4_element_size = 1;
 1697|  76.1k|        s_out_chroma.i4_num_element_stride = u4_cur_uv_stride;
 1698|  76.1k|    }
 1699|  14.6k|    else
 1700|  14.6k|    {
 1701|  14.6k|        if(SVCD_TRUE == ps_svc_lyr_dec->s_res_prms.u1_dyadic_flag)
  ------------------
  |  |   46|  14.6k|#define SVCD_TRUE 1
  ------------------
  |  Branch (1701:12): [True: 0, False: 14.6k]
  ------------------
 1702|      0|        {
 1703|      0|            return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 1704|      0|        }
 1705|       |
 1706|  14.6k|        s_out_luma.pv_buffer = ps_svc_lyr_dec->pu1_ii_resamp_buffer_luma;
 1707|  14.6k|        s_out_luma.i4_element_size = 1;
 1708|  14.6k|        s_out_luma.i4_num_element_stride = MB_SIZE;
  ------------------
  |  |  554|  14.6k|#define MB_SIZE             16
  ------------------
 1709|       |
 1710|  14.6k|        s_out_chroma.pv_buffer = ps_svc_lyr_dec->pu1_ii_resamp_buffer_chroma;
 1711|  14.6k|        s_out_chroma.i4_element_size = 1;
 1712|  14.6k|        s_out_chroma.i4_num_element_stride = MB_SIZE;
  ------------------
  |  |  554|  14.6k|#define MB_SIZE             16
  ------------------
 1713|  14.6k|    }
 1714|       |
 1715|       |    /* get the projected locations buffer pointers */
 1716|  90.8k|    {
 1717|  90.8k|        intra_samp_map_ctxt_t *ps_luma_map, *ps_chroma_map;
 1718|       |
 1719|  90.8k|        ps_luma_map = &ps_lyr_ctxt->s_luma_map_ctxt;
 1720|  90.8k|        ps_chroma_map = &ps_lyr_ctxt->s_chroma_map_ctxt;
 1721|       |
 1722|  90.8k|        ps_x_off_len_luma = ps_luma_map->ps_x_offset_length;
 1723|  90.8k|        ps_y_off_len_luma = ps_luma_map->ps_y_offset_length;
 1724|  90.8k|        ps_x_off_len_chroma = ps_chroma_map->ps_x_offset_length;
 1725|  90.8k|        ps_y_off_len_chroma = ps_chroma_map->ps_y_offset_length;
 1726|  90.8k|    }
 1727|  90.8k|    i4_ref_x_luma = ps_svc_lyr_dec->ps_intsam_luma_map_horz[ps_cur_mb_info->u2_mbx].i2_offset;
 1728|  90.8k|    i4_ref_y_luma = ps_svc_lyr_dec->ps_intsam_luma_map_vert[ps_cur_mb_info->u2_mby].i2_offset;
 1729|       |
 1730|  90.8k|    i4_luma_incr = ps_x_off_len_luma[ps_cur_mb_info->u2_mbx].i2_offset - i4_ref_x_luma;
 1731|  90.8k|    i4_luma_incr +=
 1732|  90.8k|        (ps_y_off_len_luma[ps_cur_mb_info->u2_mby].i2_offset - i4_ref_y_luma) * u4_ref_y_stride;
 1733|       |
 1734|  90.8k|    i4_ref_x_chroma = ps_svc_lyr_dec->ps_intsam_chroma_map_horz[ps_cur_mb_info->u2_mbx].i2_offset;
 1735|  90.8k|    i4_ref_y_chroma = ps_svc_lyr_dec->ps_intsam_chroma_map_vert[ps_cur_mb_info->u2_mby].i2_offset;
 1736|       |
 1737|  90.8k|    i4_chroma_incr = ps_x_off_len_chroma[ps_cur_mb_info->u2_mbx].i2_offset - i4_ref_x_chroma;
 1738|  90.8k|    i4_chroma_incr <<= 1;
 1739|  90.8k|    i4_chroma_incr += (ps_y_off_len_chroma[ps_cur_mb_info->u2_mby].i2_offset - i4_ref_y_chroma) *
 1740|  90.8k|                      u4_ref_uv_stride;
 1741|  90.8k|    if(SVCD_FALSE == ps_svc_lyr_dec->s_res_prms.u1_dyadic_flag)
  ------------------
  |  |   45|  90.8k|#define SVCD_FALSE 0
  ------------------
  |  Branch (1741:8): [True: 37.8k, False: 52.9k]
  ------------------
 1742|  37.8k|    {
 1743|  37.8k|        i4_ref_x_luma = CLIP3(0, (ps_svc_dec_ref_layer->s_dec.u2_frm_wd_y - 1), i4_ref_x_luma);
  ------------------
  |  |   77|  37.8k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 11.0k, False: 26.8k]
  |  |  |  Branch (77:54): [True: 0, False: 26.8k]
  |  |  ------------------
  ------------------
 1744|  37.8k|        i4_ref_y_luma = CLIP3(0, (ps_svc_dec_ref_layer->s_dec.u2_frm_ht_y - 1), i4_ref_y_luma);
  ------------------
  |  |   77|  37.8k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 4.67k, False: 33.1k]
  |  |  |  Branch (77:54): [True: 0, False: 33.1k]
  |  |  ------------------
  ------------------
 1745|  37.8k|    }
 1746|       |
 1747|  90.8k|    i4_ref_luma_instra_sample_correction_offset =
 1748|  90.8k|        i4_ref_x_luma + (i4_ref_y_luma) * (WORD32) u4_ref_y_stride;
 1749|       |
 1750|  90.8k|    s_inp_luma.pv_buffer = ps_frame_buf_ref_layer->pu1_buf1 + i4_luma_incr +
 1751|  90.8k|                           i4_ref_luma_instra_sample_correction_offset;
 1752|  90.8k|    s_inp_luma.i4_element_size = 1;
 1753|  90.8k|    s_inp_luma.i4_num_element_stride = u4_ref_y_stride;
 1754|       |
 1755|  90.8k|    if(SVCD_FALSE == ps_svc_lyr_dec->s_res_prms.u1_dyadic_flag)
  ------------------
  |  |   45|  90.8k|#define SVCD_FALSE 0
  ------------------
  |  Branch (1755:8): [True: 37.8k, False: 52.9k]
  ------------------
 1756|  37.8k|    {
 1757|  37.8k|        i4_ref_x_chroma = CLIP3(0, (ps_svc_dec_ref_layer->s_dec.u2_frm_wd_uv - 1), i4_ref_x_chroma);
  ------------------
  |  |   77|  37.8k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 26.0k, False: 11.7k]
  |  |  |  Branch (77:54): [True: 0, False: 11.7k]
  |  |  ------------------
  ------------------
 1758|  37.8k|        i4_ref_y_chroma = CLIP3(0, (ps_svc_dec_ref_layer->s_dec.u2_frm_ht_uv - 1), i4_ref_y_chroma);
  ------------------
  |  |   77|  37.8k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 10.4k, False: 27.3k]
  |  |  |  Branch (77:54): [True: 0, False: 27.3k]
  |  |  ------------------
  ------------------
 1759|  37.8k|    }
 1760|  90.8k|    i4_ref_chroma_instra_sample_correction_offset =
 1761|  90.8k|        (i4_ref_x_chroma << 1) + (i4_ref_y_chroma) * (WORD32) u4_ref_uv_stride;
 1762|       |
 1763|  90.8k|    s_inp_chroma.pv_buffer = ps_frame_buf_ref_layer->pu1_buf2 + i4_chroma_incr +
 1764|  90.8k|                             i4_ref_chroma_instra_sample_correction_offset;
 1765|  90.8k|    s_inp_chroma.i4_element_size = 1;
 1766|  90.8k|    s_inp_chroma.i4_num_element_stride = u4_ref_uv_stride;
 1767|       |
 1768|  90.8k|    s_ref_mb_mode.pv_buffer = ps_svc_dec_ref_layer->ps_inter_lyr_mb_prms_frm_start;
 1769|  90.8k|    s_ref_mb_mode.i4_element_size = sizeof(inter_lyr_mb_prms_t);
 1770|  90.8k|    s_ref_mb_mode.i4_num_element_stride = ps_svc_dec_ref_layer->u2_inter_lyr_mb_prms_stride;
 1771|       |
 1772|  90.8k|    s_mb_coord.u2_mb_x = ps_cur_mb_info->u2_mbx;
 1773|  90.8k|    s_mb_coord.u2_mb_y = ps_cur_mb_info->u2_mby;
 1774|       |
 1775|  90.8k|    if(SVCD_TRUE == ps_svc_lyr_dec->s_res_prms.u1_dyadic_flag)
  ------------------
  |  |   46|  90.8k|#define SVCD_TRUE 1
  ------------------
  |  Branch (1775:8): [True: 52.9k, False: 37.8k]
  ------------------
 1776|  52.9k|    {
 1777|  52.9k|        ret = isvcd_intra_resamp_mb_dyadic(ps_ctxt, &s_inp_luma, &s_inp_chroma, &s_ref_mb_mode,
 1778|  52.9k|                                           &s_out_luma, &s_out_chroma, &s_mb_coord, ps_svc_lyr_dec);
 1779|  52.9k|    }
 1780|  37.8k|    else
 1781|  37.8k|    {
 1782|  37.8k|        ret = isvcd_intra_resamp_mb(ps_ctxt, &s_inp_luma, &s_inp_chroma, &s_ref_mb_mode,
 1783|  37.8k|                                    &s_out_luma, &s_out_chroma, &s_mb_coord);
 1784|  37.8k|    }
 1785|  90.8k|    if(OK != ret) return ret;
  ------------------
  |  |  114|  90.8k|#define OK        0
  ------------------
  |  Branch (1785:8): [True: 0, False: 90.8k]
  ------------------
 1786|  90.8k|    return OK;
  ------------------
  |  |  114|  90.8k|#define OK        0
  ------------------
 1787|  90.8k|}
isvcd_process_residual_resample_mb:
 1802|   157k|{
 1803|   157k|    residual_sampling_ctxt_t *ps_ctxt;
 1804|   157k|    svc_dec_lyr_struct_t *ps_svc_dec_ref_layer;
 1805|   157k|    res_lyr_ctxt *ps_lyr_ctxt;
 1806|   157k|    mem_element_t s_ref_mb_mode = {0};
 1807|   157k|    mem_element_t s_inp_luma = {0};
 1808|   157k|    mem_element_t s_inp_chroma = {0};
 1809|   157k|    mem_element_t s_out_luma = {0};
 1810|   157k|    mem_element_t s_out_chroma = {0};
 1811|       |
 1812|       |    /* projected locations pointer */
 1813|   157k|    ref_mb_map_t *ps_x_off_len_luma;
 1814|   157k|    ref_mb_map_t *ps_y_off_len_luma;
 1815|   157k|    ref_mb_map_t *ps_x_off_len_chroma;
 1816|   157k|    ref_mb_map_t *ps_y_off_len_chroma;
 1817|   157k|    WORD32 i4_ref_x_luma, i4_ref_y_luma;
 1818|   157k|    WORD32 i4_ref_x_chroma, i4_ref_y_chroma;
 1819|   157k|    WORD32 i4_ref_luma_ressam_correction_offset = 0;
 1820|   157k|    WORD32 i4_ref_chroma_ressam_correction_offset = 0;
 1821|   157k|    WORD32 i4_inp_luma_stride, i4_inp_chroma_stride;
 1822|   157k|    WORD32 i4_out_luma_stride, i4_out_chroma_stride;
 1823|   157k|    WORD32 i4_inp_luma_offset = 0, i4_inp_chroma_offset = 0;
 1824|   157k|    WORD32 ret;
 1825|       |
 1826|   157k|    ps_svc_dec_ref_layer = ps_svc_lyr_dec->ps_dec_svc_ref_layer;
 1827|   157k|    ps_ctxt = (residual_sampling_ctxt_t *) ps_svc_lyr_dec->pv_residual_sample_ctxt;
 1828|   157k|    ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id];
 1829|       |
 1830|   157k|    i4_inp_luma_stride = ps_svc_dec_ref_layer->u2_residual_resample_luma_stride;
 1831|   157k|    i4_inp_chroma_stride = ps_svc_dec_ref_layer->u2_residual_resample_chroma_stride;
 1832|   157k|    i4_out_luma_stride = ps_svc_lyr_dec->u2_residual_resample_luma_stride;
 1833|   157k|    i4_out_chroma_stride = ps_svc_lyr_dec->u2_residual_resample_chroma_stride;
 1834|       |
 1835|   157k|    {
 1836|   157k|        residual_samp_map_ctxt_t *ps_luma_map, *ps_chroma_map;
 1837|       |
 1838|   157k|        ps_luma_map = &ps_lyr_ctxt->s_luma_map_ctxt;
 1839|   157k|        ps_chroma_map = &ps_lyr_ctxt->s_chroma_map_ctxt;
 1840|   157k|        ps_x_off_len_luma = ps_luma_map->ps_x_offset_length;
 1841|   157k|        ps_y_off_len_luma = ps_luma_map->ps_y_offset_length;
 1842|   157k|        ps_x_off_len_chroma = ps_chroma_map->ps_x_offset_length;
 1843|   157k|        ps_y_off_len_chroma = ps_chroma_map->ps_y_offset_length;
 1844|   157k|    }
 1845|   157k|    i4_ref_x_luma = ps_svc_lyr_dec->ps_ressam_luma_map_horz[ps_cur_mb_info->u2_mbx].i2_offset;
 1846|   157k|    i4_ref_y_luma = ps_svc_lyr_dec->ps_ressam_luma_map_vert[ps_cur_mb_info->u2_mby].i2_offset;
 1847|       |
 1848|   157k|    i4_ref_x_chroma = ps_svc_lyr_dec->ps_ressam_chroma_map_horz[ps_cur_mb_info->u2_mbx].i2_offset;
 1849|   157k|    i4_ref_y_chroma = ps_svc_lyr_dec->ps_ressam_chroma_map_vert[ps_cur_mb_info->u2_mby].i2_offset;
 1850|       |
 1851|   157k|    i4_ref_x_luma = CLIP3(0, (ps_lyr_ctxt->i4_ref_width - 1), i4_ref_x_luma);
  ------------------
  |  |   77|   157k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 48.5k, False: 108k]
  |  |  |  Branch (77:54): [True: 0, False: 108k]
  |  |  ------------------
  ------------------
 1852|   157k|    i4_ref_y_luma = CLIP3(0, (ps_lyr_ctxt->i4_ref_height - 1), i4_ref_y_luma);
  ------------------
  |  |   77|   157k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 22.9k, False: 134k]
  |  |  |  Branch (77:54): [True: 0, False: 134k]
  |  |  ------------------
  ------------------
 1853|   157k|    i4_ref_x_chroma = CLIP3(0, ((ps_lyr_ctxt->i4_ref_width >> 1) - 1), i4_ref_x_chroma);
  ------------------
  |  |   77|   157k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 48.5k, False: 108k]
  |  |  |  Branch (77:54): [True: 0, False: 108k]
  |  |  ------------------
  ------------------
 1854|   157k|    i4_ref_y_chroma = CLIP3(0, ((ps_lyr_ctxt->i4_ref_height >> 1) - 1), i4_ref_y_chroma);
  ------------------
  |  |   77|   157k|#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
  |  |  ------------------
  |  |  |  Branch (77:31): [True: 22.9k, False: 134k]
  |  |  |  Branch (77:54): [True: 0, False: 134k]
  |  |  ------------------
  ------------------
 1855|       |
 1856|   157k|    {
 1857|   157k|        WORD32 i4_offset_x, i4_offset_y;
 1858|       |
 1859|   157k|        i4_offset_x = ps_x_off_len_luma[ps_cur_mb_info->u2_mbx].i2_offset;
 1860|   157k|        i4_offset_y = ps_y_off_len_luma[ps_cur_mb_info->u2_mby].i2_offset;
 1861|       |
 1862|       |        /* check for offsets inside frame dimensions */
 1863|   157k|        if(0 <= i4_offset_x)
  ------------------
  |  Branch (1863:12): [True: 108k, False: 48.5k]
  ------------------
 1864|   108k|        {
 1865|       |            /* validity of pointer passed */
 1866|   108k|            if(!(i4_offset_x >= i4_ref_x_luma))
  ------------------
  |  Branch (1866:16): [True: 0, False: 108k]
  ------------------
 1867|      0|            {
 1868|      0|                return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 1869|      0|            }
 1870|   108k|            i4_inp_luma_offset += (i4_offset_x - i4_ref_x_luma);
 1871|   108k|        }
 1872|       |
 1873|   157k|        if(0 <= i4_offset_y)
  ------------------
  |  Branch (1873:12): [True: 134k, False: 22.9k]
  ------------------
 1874|   134k|        {
 1875|       |            /* validity of pointer passed */
 1876|   134k|            if(!(i4_offset_y >= i4_ref_y_luma))
  ------------------
  |  Branch (1876:16): [True: 0, False: 134k]
  ------------------
 1877|      0|            {
 1878|      0|                return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 1879|      0|            }
 1880|   134k|            i4_inp_luma_offset += (i4_offset_y - i4_ref_y_luma) * i4_inp_luma_stride;
 1881|   134k|        }
 1882|       |
 1883|   157k|        i4_offset_x = ps_x_off_len_chroma[ps_cur_mb_info->u2_mbx].i2_offset;
 1884|   157k|        i4_offset_y = ps_y_off_len_chroma[ps_cur_mb_info->u2_mby].i2_offset;
 1885|       |
 1886|       |        /* check for offsets inside frame dimensions */
 1887|   157k|        if(0 <= i4_offset_x)
  ------------------
  |  Branch (1887:12): [True: 108k, False: 48.5k]
  ------------------
 1888|   108k|        {
 1889|       |            /* validity of pointer passed */
 1890|   108k|            if(!(i4_offset_x >= i4_ref_x_chroma))
  ------------------
  |  Branch (1890:16): [True: 0, False: 108k]
  ------------------
 1891|      0|            {
 1892|      0|                return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 1893|      0|            }
 1894|   108k|            i4_inp_chroma_offset += (i4_offset_x - i4_ref_x_chroma) << 1;
 1895|   108k|        }
 1896|       |
 1897|   157k|        if(0 <= i4_offset_y)
  ------------------
  |  Branch (1897:12): [True: 134k, False: 22.9k]
  ------------------
 1898|   134k|        {
 1899|       |            /* validity of pointer passed */
 1900|   134k|            if(!(i4_offset_y >= i4_ref_y_chroma))
  ------------------
  |  Branch (1900:16): [True: 0, False: 134k]
  ------------------
 1901|      0|            {
 1902|      0|                return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 1903|      0|            }
 1904|   134k|            i4_inp_chroma_offset += (i4_offset_y - i4_ref_y_chroma) * (i4_inp_chroma_stride << 1);
 1905|   134k|        }
 1906|   157k|    }
 1907|       |
 1908|   157k|    i4_ref_luma_ressam_correction_offset = i4_ref_x_luma + (i4_ref_y_luma) *i4_inp_luma_stride;
 1909|       |
 1910|   157k|    s_inp_luma.pv_buffer = ps_svc_dec_ref_layer->pi2_il_residual_resample_mb_luma_frm_start +
 1911|   157k|                           i4_inp_luma_offset + i4_ref_luma_ressam_correction_offset;
 1912|   157k|    s_inp_luma.i4_element_size = 1;
 1913|   157k|    s_inp_luma.i4_num_element_stride = i4_inp_luma_stride;
 1914|       |
 1915|   157k|    i4_ref_chroma_ressam_correction_offset =
 1916|   157k|        (i4_ref_x_chroma << 1) + (i4_ref_y_chroma) *i4_inp_chroma_stride;
 1917|       |
 1918|   157k|    s_inp_chroma.pv_buffer = ps_svc_dec_ref_layer->pi2_il_residual_resample_mb_chroma_frm_start +
 1919|   157k|                             i4_inp_chroma_offset + i4_ref_chroma_ressam_correction_offset;
 1920|   157k|    s_inp_chroma.i4_element_size = 1;
 1921|   157k|    s_inp_chroma.i4_num_element_stride = i4_inp_luma_stride;
 1922|       |
 1923|   157k|    s_ref_mb_mode.pv_buffer = ps_svc_dec_ref_layer->ps_inter_lyr_mb_prms_frm_start;
 1924|   157k|    s_ref_mb_mode.i4_element_size = sizeof(inter_lyr_mb_prms_t);
 1925|   157k|    s_ref_mb_mode.i4_num_element_stride = ps_svc_dec_ref_layer->u2_inter_lyr_mb_prms_stride;
 1926|       |
 1927|   157k|    s_out_luma.i4_element_size = 1;
 1928|   157k|    s_out_luma.pv_buffer =
 1929|   157k|        ps_svc_lyr_dec->pi2_il_residual_resample_mb_luma_frm_start +
 1930|   157k|        ((ps_cur_mb_info->u2_mbx << 4) +
 1931|   157k|         (i4_out_luma_stride * (ps_cur_mb_info->u2_mby << 4)) * s_out_luma.i4_element_size);
 1932|       |
 1933|   157k|    s_out_luma.i4_num_element_stride = i4_out_luma_stride;
 1934|       |
 1935|   157k|    s_out_chroma.i4_element_size = 1;
 1936|   157k|    s_out_chroma.pv_buffer =
 1937|   157k|        ps_svc_lyr_dec->pi2_il_residual_resample_mb_chroma_frm_start +
 1938|   157k|        ((ps_cur_mb_info->u2_mbx << 4) +
 1939|   157k|         (i4_out_chroma_stride * (ps_cur_mb_info->u2_mby << 3)) * s_out_chroma.i4_element_size);
 1940|   157k|    s_out_chroma.i4_num_element_stride = i4_out_chroma_stride;
 1941|       |
 1942|   157k|    ret = ps_lyr_ctxt->pf_residual_samp_mb(ps_ctxt, &s_inp_luma, &s_inp_chroma, &s_ref_mb_mode,
 1943|   157k|                                           &s_out_luma, &s_out_chroma, ps_cur_mb_info->u2_mbx,
 1944|   157k|                                           ps_cur_mb_info->u2_mby);
 1945|   157k|    if(ret != OK)
  ------------------
  |  |  114|   157k|#define OK        0
  ------------------
  |  Branch (1945:8): [True: 0, False: 157k]
  ------------------
 1946|      0|    {
 1947|      0|        return ret;
 1948|      0|    }
 1949|   157k|    return OK;
  ------------------
  |  |  114|   157k|#define OK        0
  ------------------
 1950|   157k|}
isvcd_process_inter_mb_rsd_pred_target_lyr:
 1968|   157k|{
 1969|   157k|    UWORD8 *pu1_rec_y, *pu1_rec_u;
 1970|   157k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
 1971|   157k|    UWORD32 ui_rec_width, u4_recwidth_cr;
 1972|   157k|    UWORD16 u2_luma_stride, u2_chroma_stride;
 1973|   157k|    WORD16 *pi2_y_coeff, *pi2_luma_res_ptr, *pi2_chroma_res_ptr;
 1974|   157k|    UWORD32 u1_mb_field_decoding_flag;
 1975|   157k|    const UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
 1976|   157k|    UWORD32 uc_botMb;
 1977|   157k|    UWORD32 u4_num_pmbair;
 1978|   157k|    tfr_ctxt_t *ps_frame_buf = ps_dec->ps_frame_buf_ip_recon;
 1979|   157k|    UWORD32 u4_luma_dc_only_csbp = 0;
 1980|   157k|    UWORD32 u4_luma_dc_only_cbp = 0;
 1981|   157k|    UWORD16 u2_res_luma_csbp = 0;
 1982|   157k|    WORD32 ret;
 1983|       |
 1984|   157k|    if(0 != ps_dec->ps_cur_slice->u1_mbaff_frame_flag)
  ------------------
  |  Branch (1984:8): [True: 0, False: 157k]
  ------------------
 1985|      0|    {
 1986|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 1987|      0|    }
 1988|   157k|    uc_botMb = 1 - ps_cur_mb_info->u1_topmb;
 1989|   157k|    u4_num_pmbair = (u4_mb_num >> u1_mbaff);
 1990|   157k|    u1_mb_field_decoding_flag = ps_cur_mb_info->u1_mb_field_decodingflag;
 1991|       |
 1992|   157k|    pu1_rec_y = ps_frame_buf->pu1_dest_y + (u4_num_pmbair << 4);
 1993|   157k|    pu1_rec_u = ps_frame_buf->pu1_dest_u + (u4_num_pmbair << 3) * YUV420SP_FACTOR;
  ------------------
  |  |  119|   157k|#define YUV420SP_FACTOR 2
  ------------------
 1994|   157k|    ui_rec_width = ps_dec->u2_frm_wd_y << u1_mb_field_decoding_flag;
 1995|   157k|    u4_recwidth_cr = ps_dec->u2_frm_wd_uv << u1_mb_field_decoding_flag;
 1996|       |
 1997|   157k|    u2_luma_stride = ps_svc_lyr_dec->u2_residual_resample_luma_stride;
 1998|   157k|    pi2_luma_res_ptr = ps_svc_lyr_dec->pi2_il_residual_resample_mb_luma_frm_start +
 1999|   157k|                       (ps_cur_mb_info->u2_mbx << 4) +
 2000|   157k|                       ((ps_cur_mb_info->u2_mby << 4) * u2_luma_stride);
 2001|       |
 2002|   157k|    u2_chroma_stride = ps_svc_lyr_dec->u2_residual_resample_chroma_stride;
 2003|   157k|    pi2_chroma_res_ptr = ps_svc_lyr_dec->pi2_il_residual_resample_mb_chroma_frm_start +
 2004|   157k|                         (ps_cur_mb_info->u2_mbx << 4) +
 2005|   157k|                         ((ps_cur_mb_info->u2_mby << 3) * u2_chroma_stride);
 2006|       |
 2007|   157k|    ret = isvcd_process_residual_resample_mb(ps_svc_lyr_dec, ps_cur_mb_info);
 2008|   157k|    if(ret != OK)
  ------------------
  |  |  114|   157k|#define OK        0
  ------------------
  |  Branch (2008:8): [True: 0, False: 157k]
  ------------------
 2009|      0|    {
 2010|      0|        return ret;
 2011|      0|    }
 2012|   157k|    if(u1_mbaff)
  ------------------
  |  Branch (2012:8): [True: 0, False: 157k]
  ------------------
 2013|      0|    {
 2014|      0|        if(uc_botMb)
  ------------------
  |  Branch (2014:12): [True: 0, False: 0]
  ------------------
 2015|      0|        {
 2016|      0|            pu1_rec_y += (u1_mb_field_decoding_flag ? (ui_rec_width >> 1) : (ui_rec_width << 4));
  ------------------
  |  Branch (2016:27): [True: 0, False: 0]
  ------------------
 2017|      0|            pu1_rec_u +=
 2018|      0|                (u1_mb_field_decoding_flag ? (u4_recwidth_cr >> 1) : (u4_recwidth_cr << 3));
  ------------------
  |  Branch (2018:18): [True: 0, False: 0]
  ------------------
 2019|      0|        }
 2020|      0|    }
 2021|       |
 2022|   157k|    if(!ps_cur_mb_info->u1_tran_form8x8)
  ------------------
  |  Branch (2022:8): [True: 141k, False: 16.0k]
  ------------------
 2023|   141k|    {
 2024|   141k|        u4_luma_dc_only_csbp = ih264d_unpack_luma_coeff4x4_mb(ps_dec, ps_cur_mb_info, 0);
 2025|   141k|    }
 2026|  16.0k|    else
 2027|  16.0k|    {
 2028|  16.0k|        if(!ps_dec->ps_cur_pps->u1_entropy_coding_mode)
  ------------------
  |  Branch (2028:12): [True: 2.50k, False: 13.5k]
  ------------------
 2029|  2.50k|        {
 2030|  2.50k|            u4_luma_dc_only_cbp = ih264d_unpack_luma_coeff4x4_mb(ps_dec, ps_cur_mb_info, 0);
 2031|  2.50k|        }
 2032|  13.5k|        else
 2033|  13.5k|        {
 2034|  13.5k|            u4_luma_dc_only_cbp = ih264d_unpack_luma_coeff8x8_mb(ps_dec, ps_cur_mb_info);
 2035|  13.5k|        }
 2036|  16.0k|    }
 2037|       |
 2038|   157k|    *pu2_res_luma_csbp = 0;
 2039|   157k|    pi2_y_coeff = ps_dec->pi2_coeff_data;
 2040|       |
 2041|       |    /* Inverse Transform and Reconstruction */
 2042|   157k|    if(ps_cur_mb_info->u1_cbp & 0x0f)
  ------------------
  |  Branch (2042:8): [True: 30.6k, False: 126k]
  ------------------
 2043|  30.6k|    {
 2044|  30.6k|        if(!ps_cur_mb_info->u1_tran_form8x8)
  ------------------
  |  Branch (2044:12): [True: 14.5k, False: 16.0k]
  ------------------
 2045|  14.5k|        {
 2046|  14.5k|            UWORD32 i;
 2047|  14.5k|            WORD16 ai2_tmp[16] = {0};
 2048|   247k|            for(i = 0; i < 16; i++)
  ------------------
  |  Branch (2048:24): [True: 233k, False: 14.5k]
  ------------------
 2049|   233k|            {
 2050|   233k|                if(CHECKBIT(ps_cur_mb_info->u2_luma_csbp, i))
  ------------------
  |  |   54|   233k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 57.5k, False: 175k]
  |  |  ------------------
  ------------------
 2051|  57.5k|                {
 2052|  57.5k|                    WORD16 *pi2_level = pi2_y_coeff + (i << 4);
 2053|  57.5k|                    UWORD8 *pu1_pred_sblk =
 2054|  57.5k|                        pu1_rec_y + ((i & 0x3) * BLK_SIZE) + (i >> 2) * (ui_rec_width << 2);
  ------------------
  |  |  556|  57.5k|#define BLK_SIZE             4
  ------------------
 2055|  57.5k|                    WORD16 *pi2_out = pi2_luma_res_ptr + ((i & 0x3) * BLK_SIZE) +
  ------------------
  |  |  556|  57.5k|#define BLK_SIZE             4
  ------------------
 2056|  57.5k|                                      (i >> 2) * (u2_luma_stride << 2);
 2057|  57.5k|                    PROFILE_DISABLE_IQ_IT_RECON()
  ------------------
  |  |   98|  57.5k|#define PROFILE_DISABLE_IQ_IT_RECON() ;
  ------------------
 2058|  57.5k|                    {
 2059|  57.5k|                        if(CHECKBIT(u4_luma_dc_only_csbp, i))
  ------------------
  |  |   54|  57.5k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 5.39k, False: 52.1k]
  |  |  ------------------
  ------------------
 2060|  5.39k|                        {
 2061|  5.39k|                            u2_res_luma_csbp =
 2062|  5.39k|                                ps_svc_lyr_dec->pf_iquant_itrans_residual_recon_luma_4x4_dc(
 2063|  5.39k|                                    pi2_level, pu1_pred_sblk, pi2_out, pu1_pred_sblk, ui_rec_width,
 2064|  5.39k|                                    u2_luma_stride, ui_rec_width,
 2065|  5.39k|                                    gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qp_rem6],
 2066|  5.39k|                                    (UWORD16 *) ps_dec->s_high_profile.i2_scalinglist4x4[3],
 2067|  5.39k|                                    ps_cur_mb_info->u1_qp_div6, ai2_tmp, 0, NULL);
 2068|  5.39k|                        }
 2069|  52.1k|                        else
 2070|  52.1k|                        {
 2071|  52.1k|                            u2_res_luma_csbp =
 2072|  52.1k|                                ps_svc_lyr_dec->pf_iquant_itrans_residual_recon_luma_4x4(
 2073|  52.1k|                                    pi2_level, pu1_pred_sblk, pi2_out, pu1_pred_sblk, ui_rec_width,
 2074|  52.1k|                                    u2_luma_stride, ui_rec_width,
 2075|  52.1k|                                    gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qp_rem6],
 2076|  52.1k|                                    (UWORD16 *) ps_dec->s_high_profile.i2_scalinglist4x4[3],
 2077|  52.1k|                                    ps_cur_mb_info->u1_qp_div6, ai2_tmp, 0, NULL);
 2078|  52.1k|                        }
 2079|  57.5k|                    }
 2080|  57.5k|                }
 2081|   175k|                else
 2082|   175k|                {
 2083|   175k|                    UWORD8 *pu1_pred_sblk =
 2084|   175k|                        pu1_rec_y + ((i & 0x3) * BLK_SIZE) + (i >> 2) * (ui_rec_width << 2);
  ------------------
  |  |  556|   175k|#define BLK_SIZE             4
  ------------------
 2085|   175k|                    WORD16 *pi2_out = pi2_luma_res_ptr + ((i & 0x3) * BLK_SIZE) +
  ------------------
  |  |  556|   175k|#define BLK_SIZE             4
  ------------------
 2086|   175k|                                      (i >> 2) * (u2_luma_stride << 2);
 2087|       |
 2088|   175k|                    u2_res_luma_csbp = ps_svc_lyr_dec->pf_pred_residual_recon_luma_4x4(
 2089|   175k|                        pu1_pred_sblk, pi2_out, pu1_pred_sblk, ui_rec_width, u2_luma_stride,
 2090|   175k|                        ui_rec_width);
 2091|   175k|                }
 2092|   233k|                *pu2_res_luma_csbp |= (u2_res_luma_csbp << i);
 2093|   233k|            }
 2094|  14.5k|        }
 2095|  16.0k|        else
 2096|  16.0k|        {
 2097|  16.0k|            WORD16 *pi2_scale_matrix_ptr;
 2098|  16.0k|            WORD32 i;
 2099|       |
 2100|  16.0k|            pi2_scale_matrix_ptr = ps_dec->s_high_profile.i2_scalinglist8x8[1];
 2101|       |
 2102|  80.3k|            for(i = 0; i < 4; i++)
  ------------------
  |  Branch (2102:24): [True: 64.2k, False: 16.0k]
  ------------------
 2103|  64.2k|            {
 2104|  64.2k|                WORD16 ai2_tmp[64] = {0};
 2105|  64.2k|                WORD16 *pi16_levelBlock =
 2106|  64.2k|                    pi2_y_coeff + (i << 6); /* move to the next 8x8 adding 64 */
 2107|       |
 2108|  64.2k|                UWORD8 *pu1_pred_sblk =
 2109|  64.2k|                    pu1_rec_y + ((i & 0x1) * BLK8x8SIZE) + (i >> 1) * (ui_rec_width << 3);
  ------------------
  |  |  555|  64.2k|#define BLK8x8SIZE          8
  ------------------
 2110|  64.2k|                WORD16 *pi2_out =
 2111|  64.2k|                    pi2_luma_res_ptr + ((i & 0x1) * BLK8x8SIZE) + (i >> 1) * (u2_luma_stride << 3);
  ------------------
  |  |  555|  64.2k|#define BLK8x8SIZE          8
  ------------------
 2112|  64.2k|                if(CHECKBIT(ps_cur_mb_info->u1_cbp, i))
  ------------------
  |  |   54|  64.2k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 48.1k, False: 16.0k]
  |  |  ------------------
  ------------------
 2113|  48.1k|                {
 2114|  48.1k|                    PROFILE_DISABLE_IQ_IT_RECON()
  ------------------
  |  |   98|  48.1k|#define PROFILE_DISABLE_IQ_IT_RECON() ;
  ------------------
 2115|  48.1k|                    {
 2116|  48.1k|                        if(CHECKBIT(u4_luma_dc_only_cbp, i))
  ------------------
  |  |   54|  48.1k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 3.78k, False: 44.4k]
  |  |  ------------------
  ------------------
 2117|  3.78k|                        {
 2118|  3.78k|                            u2_res_luma_csbp =
 2119|  3.78k|                                ps_svc_lyr_dec->pf_iquant_itrans_residual_recon_luma_8x8_dc(
 2120|  3.78k|                                    pi16_levelBlock, pu1_pred_sblk, pi2_out, pu1_pred_sblk,
 2121|  3.78k|                                    ui_rec_width, u2_luma_stride, ui_rec_width,
 2122|  3.78k|                                    gau1_ih264d_dequant8x8_cavlc[ps_cur_mb_info->u1_qp_rem6],
 2123|  3.78k|                                    (UWORD16 *) pi2_scale_matrix_ptr, ps_cur_mb_info->u1_qp_div6,
 2124|  3.78k|                                    ai2_tmp, 0, NULL);
 2125|  3.78k|                        }
 2126|  44.4k|                        else
 2127|  44.4k|                        {
 2128|  44.4k|                            u2_res_luma_csbp =
 2129|  44.4k|                                ps_svc_lyr_dec->pf_iquant_itrans_residual_recon_luma_8x8(
 2130|  44.4k|                                    pi16_levelBlock, pu1_pred_sblk, pi2_out, pu1_pred_sblk,
 2131|  44.4k|                                    ui_rec_width, u2_luma_stride, ui_rec_width,
 2132|  44.4k|                                    gau1_ih264d_dequant8x8_cavlc[ps_cur_mb_info->u1_qp_rem6],
 2133|  44.4k|                                    (UWORD16 *) pi2_scale_matrix_ptr, ps_cur_mb_info->u1_qp_div6,
 2134|  44.4k|                                    ai2_tmp, 0, NULL);
 2135|  44.4k|                        }
 2136|  48.1k|                    }
 2137|  48.1k|                }
 2138|  16.0k|                else
 2139|  16.0k|                {
 2140|  16.0k|                    UWORD8 *pu1_pred_sblk =
 2141|  16.0k|                        pu1_rec_y + ((i & 0x1) * BLK8x8SIZE) + (i >> 1) * (ui_rec_width << 3);
  ------------------
  |  |  555|  16.0k|#define BLK8x8SIZE          8
  ------------------
 2142|  16.0k|                    WORD16 *pi2_out = pi2_luma_res_ptr + ((i & 0x1) * BLK8x8SIZE) +
  ------------------
  |  |  555|  16.0k|#define BLK8x8SIZE          8
  ------------------
 2143|  16.0k|                                      (i >> 1) * (u2_luma_stride << 3);
 2144|       |
 2145|  16.0k|                    u2_res_luma_csbp = ps_svc_lyr_dec->pf_pred_residual_recon_luma_8x8(
 2146|  16.0k|                        pu1_pred_sblk, pi2_out, pu1_pred_sblk, ui_rec_width, u2_luma_stride,
 2147|  16.0k|                        ui_rec_width);
 2148|  16.0k|                }
 2149|  64.2k|                *pu2_res_luma_csbp |= (u2_res_luma_csbp << (((i >> 1) << 3) + ((i & 0x01) << 1)));
 2150|  64.2k|            }
 2151|  16.0k|        }
 2152|  30.6k|    }
 2153|   126k|    else
 2154|   126k|    {
 2155|   126k|        UWORD8 *pu1_pred_sblk = pu1_rec_y;
 2156|   126k|        WORD16 *pi2_out = pi2_luma_res_ptr;
 2157|       |
 2158|   126k|        *pu2_res_luma_csbp = ps_svc_lyr_dec->pf_pred_residual_recon_luma_16x16(
 2159|   126k|            pu1_pred_sblk, pi2_out, pu1_pred_sblk, ui_rec_width, u2_luma_stride, ui_rec_width);
 2160|   126k|    }
 2161|       |
 2162|       |    /* Decode Chroma Block */
 2163|   157k|    ih264d_unpack_chroma_coeff4x4_mb(ps_dec, ps_cur_mb_info);
 2164|       |    /*--------------------------------------------------------------------*/
 2165|       |    /* Chroma Blocks decoding                                             */
 2166|       |    /*--------------------------------------------------------------------*/
 2167|   157k|    {
 2168|   157k|        UWORD8 u1_chroma_cbp = (UWORD8) (ps_cur_mb_info->u1_cbp >> 4);
 2169|       |
 2170|   157k|        if(u1_chroma_cbp != CBPC_ALLZERO)
  ------------------
  |  |  507|   157k|#define CBPC_ALLZERO    0
  ------------------
  |  Branch (2170:12): [True: 13.9k, False: 143k]
  ------------------
 2171|  13.9k|        {
 2172|  13.9k|            UWORD32 u4_scale_u = ps_cur_mb_info->u1_qpc_div6;
 2173|  13.9k|            UWORD32 u4_scale_v = ps_cur_mb_info->u1_qpcr_div6;
 2174|  13.9k|            UWORD16 u2_chroma_csbp = ps_cur_mb_info->u2_chroma_csbp;
 2175|       |
 2176|  13.9k|            pi2_y_coeff = ps_dec->pi2_coeff_data;
 2177|       |
 2178|  13.9k|            {
 2179|  13.9k|                UWORD32 i;
 2180|  13.9k|                WORD16 ai2_tmp[16] = {0};
 2181|  69.7k|                for(i = 0; i < 4; i++)
  ------------------
  |  Branch (2181:28): [True: 55.7k, False: 13.9k]
  ------------------
 2182|  55.7k|                {
 2183|  55.7k|                    WORD16 *pi2_level = pi2_y_coeff + (i << 4);
 2184|  55.7k|                    UWORD8 *pu1_pred_sblk = pu1_rec_u + ((i & 0x1) * BLK_SIZE * YUV420SP_FACTOR) +
  ------------------
  |  |  556|  55.7k|#define BLK_SIZE             4
  ------------------
                                  UWORD8 *pu1_pred_sblk = pu1_rec_u + ((i & 0x1) * BLK_SIZE * YUV420SP_FACTOR) +
  ------------------
  |  |  119|  55.7k|#define YUV420SP_FACTOR 2
  ------------------
 2185|  55.7k|                                            (i >> 1) * (u4_recwidth_cr << 2);
 2186|  55.7k|                    WORD16 *pi2_out = pi2_chroma_res_ptr +
 2187|  55.7k|                                      ((i & 0x1) * BLK_SIZE * YUV420SP_FACTOR) +
  ------------------
  |  |  556|  55.7k|#define BLK_SIZE             4
  ------------------
                                                    ((i & 0x1) * BLK_SIZE * YUV420SP_FACTOR) +
  ------------------
  |  |  119|  55.7k|#define YUV420SP_FACTOR 2
  ------------------
 2188|  55.7k|                                      (i >> 1) * (u2_chroma_stride << 2);
 2189|  55.7k|                    PROFILE_DISABLE_IQ_IT_RECON()
  ------------------
  |  |   98|  55.7k|#define PROFILE_DISABLE_IQ_IT_RECON() ;
  ------------------
 2190|  55.7k|                    {
 2191|  55.7k|                        if(CHECKBIT(u2_chroma_csbp, i))
  ------------------
  |  |   54|  55.7k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 5.98k, False: 49.7k]
  |  |  ------------------
  ------------------
 2192|  5.98k|                        {
 2193|  5.98k|                            ps_svc_lyr_dec->pf_iquant_itrans_residual_recon_chroma_4x4(
 2194|  5.98k|                                pi2_level, pu1_pred_sblk, pi2_out, pu1_pred_sblk, u4_recwidth_cr,
 2195|  5.98k|                                u2_chroma_stride, u4_recwidth_cr,
 2196|  5.98k|                                gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpc_rem6],
 2197|  5.98k|                                (UWORD16 *) ps_dec->s_high_profile.i2_scalinglist4x4[4], u4_scale_u,
 2198|  5.98k|                                ai2_tmp, pi2_level);
 2199|  5.98k|                        }
 2200|  49.7k|                        else if(pi2_level[0] != 0)
  ------------------
  |  Branch (2200:33): [True: 35.0k, False: 14.7k]
  ------------------
 2201|  35.0k|                        {
 2202|  35.0k|                            ps_svc_lyr_dec->pf_iquant_itrans_residual_recon_chroma_4x4_dc(
 2203|  35.0k|                                pi2_level, pu1_pred_sblk, pi2_out, pu1_pred_sblk, u4_recwidth_cr,
 2204|  35.0k|                                u2_chroma_stride, u4_recwidth_cr,
 2205|  35.0k|                                gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpc_rem6],
 2206|  35.0k|                                (UWORD16 *) ps_dec->s_high_profile.i2_scalinglist4x4[4], u4_scale_u,
 2207|  35.0k|                                ai2_tmp, pi2_level);
 2208|  35.0k|                        }
 2209|  14.7k|                        else
 2210|  14.7k|                        {
 2211|  14.7k|                            ps_svc_lyr_dec->pf_pred_residual_recon_chroma_4x4(
 2212|  14.7k|                                pu1_pred_sblk, pi2_out, pu1_pred_sblk, u4_recwidth_cr,
 2213|  14.7k|                                u2_chroma_stride, u4_recwidth_cr);
 2214|  14.7k|                        }
 2215|  55.7k|                    }
 2216|  55.7k|                }
 2217|  13.9k|            }
 2218|       |
 2219|  13.9k|            pi2_y_coeff += MB_CHROM_SIZE;
  ------------------
  |  |  564|  13.9k|#define MB_CHROM_SIZE                 64
  ------------------
 2220|  13.9k|            u2_chroma_csbp >>= 4;
 2221|       |
 2222|  13.9k|            {
 2223|  13.9k|                UWORD32 i;
 2224|  13.9k|                WORD16 ai2_tmp[16] = {0};
 2225|  69.7k|                for(i = 0; i < 4; i++)
  ------------------
  |  Branch (2225:28): [True: 55.7k, False: 13.9k]
  ------------------
 2226|  55.7k|                {
 2227|  55.7k|                    WORD16 *pi2_level = pi2_y_coeff + (i << 4);
 2228|  55.7k|                    UWORD8 *pu1_pred_sblk = pu1_rec_u + 1 +
 2229|  55.7k|                                            ((i & 0x1) * BLK_SIZE * YUV420SP_FACTOR) +
  ------------------
  |  |  556|  55.7k|#define BLK_SIZE             4
  ------------------
                                                          ((i & 0x1) * BLK_SIZE * YUV420SP_FACTOR) +
  ------------------
  |  |  119|  55.7k|#define YUV420SP_FACTOR 2
  ------------------
 2230|  55.7k|                                            (i >> 1) * (u4_recwidth_cr << 2);
 2231|  55.7k|                    WORD16 *pi2_out = pi2_chroma_res_ptr + 1 +
 2232|  55.7k|                                      ((i & 0x1) * BLK_SIZE * YUV420SP_FACTOR) +
  ------------------
  |  |  556|  55.7k|#define BLK_SIZE             4
  ------------------
                                                    ((i & 0x1) * BLK_SIZE * YUV420SP_FACTOR) +
  ------------------
  |  |  119|  55.7k|#define YUV420SP_FACTOR 2
  ------------------
 2233|  55.7k|                                      (i >> 1) * (u2_chroma_stride << 2);
 2234|  55.7k|                    PROFILE_DISABLE_IQ_IT_RECON()
  ------------------
  |  |   98|  55.7k|#define PROFILE_DISABLE_IQ_IT_RECON() ;
  ------------------
 2235|  55.7k|                    {
 2236|  55.7k|                        if(CHECKBIT(u2_chroma_csbp, i))
  ------------------
  |  |   54|  55.7k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 6.07k, False: 49.6k]
  |  |  ------------------
  ------------------
 2237|  6.07k|                        {
 2238|  6.07k|                            ps_svc_lyr_dec->pf_iquant_itrans_residual_recon_chroma_4x4(
 2239|  6.07k|                                pi2_level, pu1_pred_sblk, pi2_out, pu1_pred_sblk, u4_recwidth_cr,
 2240|  6.07k|                                u2_chroma_stride, u4_recwidth_cr,
 2241|  6.07k|                                gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpcr_rem6],
 2242|  6.07k|                                (UWORD16 *) ps_dec->s_high_profile.i2_scalinglist4x4[5], u4_scale_v,
 2243|  6.07k|                                ai2_tmp, pi2_level);
 2244|  6.07k|                        }
 2245|  49.6k|                        else if(pi2_level[0] != 0)
  ------------------
  |  Branch (2245:33): [True: 33.1k, False: 16.4k]
  ------------------
 2246|  33.1k|                        {
 2247|  33.1k|                            ps_svc_lyr_dec->pf_iquant_itrans_residual_recon_chroma_4x4_dc(
 2248|  33.1k|                                pi2_level, pu1_pred_sblk, pi2_out, pu1_pred_sblk, u4_recwidth_cr,
 2249|  33.1k|                                u2_chroma_stride, u4_recwidth_cr,
 2250|  33.1k|                                gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpcr_rem6],
 2251|  33.1k|                                (UWORD16 *) ps_dec->s_high_profile.i2_scalinglist4x4[5], u4_scale_v,
 2252|  33.1k|                                ai2_tmp, pi2_level);
 2253|  33.1k|                        }
 2254|  16.4k|                        else
 2255|  16.4k|                        {
 2256|  16.4k|                            ps_svc_lyr_dec->pf_pred_residual_recon_chroma_4x4(
 2257|  16.4k|                                pu1_pred_sblk, pi2_out, pu1_pred_sblk, u4_recwidth_cr,
 2258|  16.4k|                                u2_chroma_stride, u4_recwidth_cr);
 2259|  16.4k|                        }
 2260|  55.7k|                    }
 2261|  55.7k|                }
 2262|  13.9k|            }
 2263|  13.9k|        }
 2264|   143k|        else
 2265|   143k|        {
 2266|       |            /* Cr*/
 2267|   143k|            {
 2268|   143k|                UWORD8 *pu1_pred_sblk = pu1_rec_u;
 2269|   143k|                WORD16 *pi2_out = pi2_chroma_res_ptr;
 2270|       |
 2271|   143k|                ps_svc_lyr_dec->pf_pred_residual_recon_chroma_8x8(pu1_pred_sblk, pi2_out,
 2272|   143k|                                                                  pu1_pred_sblk, u4_recwidth_cr,
 2273|   143k|                                                                  u2_chroma_stride, u4_recwidth_cr);
 2274|   143k|            }
 2275|       |
 2276|       |            /* Cb*/
 2277|   143k|            {
 2278|   143k|                UWORD8 *pu1_pred_sblk = pu1_rec_u + 1;
 2279|   143k|                WORD16 *pi2_out = pi2_chroma_res_ptr + 1;
 2280|   143k|                ps_svc_lyr_dec->pf_pred_residual_recon_chroma_8x8(pu1_pred_sblk, pi2_out,
 2281|   143k|                                                                  pu1_pred_sblk, u4_recwidth_cr,
 2282|   143k|                                                                  u2_chroma_stride, u4_recwidth_cr);
 2283|   143k|            }
 2284|   143k|        }
 2285|   157k|    }
 2286|   157k|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb =
 2287|   157k|        ps_svc_lyr_dec->ps_inter_lyr_mb_prms_frm_start + ps_cur_mb_info->u2_mbx +
 2288|   157k|        (ps_svc_lyr_dec->u2_inter_lyr_mb_prms_stride * (ps_cur_mb_info->u2_mby));
 2289|   157k|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->i1_mb_mode =
 2290|   157k|        u1_inference_mode ? SVC_IBL_MB : SVC_INTER_MB;
  ------------------
  |  |  117|      0|#define SVC_IBL_MB (1 << 3)         /*!< I_BL MB always inferred */
  ------------------
                      u1_inference_mode ? SVC_IBL_MB : SVC_INTER_MB;
  ------------------
  |  |  114|   314k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
  |  Branch (2290:9): [True: 0, False: 157k]
  ------------------
 2291|   157k|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->i1_tx_size = ps_cur_mb_info->u1_tran_form8x8;
 2292|   157k|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->u2_luma_nnz = ps_cur_mb_info->u2_luma_csbp;
 2293|   157k|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->u1_chroma_nnz =
 2294|   157k|        (UWORD8) ps_cur_mb_info->u2_chroma_csbp;
 2295|   157k|    if(CHECKBIT(ps_cur_mb_info->u1_yuv_dc_block_flag, 1))
  ------------------
  |  |   54|   157k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 10.2k, False: 146k]
  |  |  ------------------
  ------------------
 2296|  10.2k|    {
 2297|       |        /* Four bits for Cb in DC only cbp */
 2298|  10.2k|        ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->u1_chroma_nnz |= 0x0F;
 2299|  10.2k|    }
 2300|   157k|    if(CHECKBIT(ps_cur_mb_info->u1_yuv_dc_block_flag, 2))
  ------------------
  |  |   54|   157k|#define CHECKBIT(a,i) ((a) &  (1 << i))
  |  |  ------------------
  |  |  |  Branch (54:23): [True: 10.0k, False: 147k]
  |  |  ------------------
  ------------------
 2301|  10.0k|    {
 2302|       |        /* Four bits for Cr in DC only cbp */
 2303|  10.0k|        ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->u1_chroma_nnz |= 0xF0;
 2304|  10.0k|    }
 2305|   157k|    return (0);
 2306|   157k|}

isvcd_residual_chroma_dyadic:
  407|  1.83k|{
  408|  1.83k|    residual_sampling_ctxt_t *ps_ctxt;
  409|  1.83k|    res_lyr_ctxt *ps_lyr_ctxt;
  410|  1.83k|    ps_ctxt = (residual_sampling_ctxt_t *) pv_residual_samp_ctxt;
  411|  1.83k|    ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id];
  412|       |    /* ----------------- Processing ------------------------------- */
  413|  1.83k|    {
  414|  1.83k|        WORD32 i4_i;
  415|  1.83k|        WORD16 *pi2_ref_data_byte;
  416|  1.83k|        WORD32 *pi4_ref_array;
  417|  1.83k|        ref_pixel_map_t *ps_pos_phase;
  418|  1.83k|        WORD32 i4_phase1, i4_phase2;
  419|       |
  420|  1.83k|        pi2_ref_data_byte = pi2_inp_data;
  421|  1.83k|        ps_pos_phase = ps_lyr_ctxt->s_chroma_map_ctxt.ps_x_pos_phase;
  422|       |
  423|       |        /* ----------- Horizontal Interpolation ---------------- */
  424|  1.83k|        pi4_ref_array = (WORD32 *) ps_ctxt->pi2_refarray_buffer;
  425|  1.83k|        i4_phase1 = ps_pos_phase[0].i2_phase;
  426|  1.83k|        i4_phase2 = (i4_phase1 + 8) & 0x0F;
  427|       |
  428|  9.17k|        for(i4_i = 0; i4_i < SUB_BLOCK_HEIGHT; i4_i++)
  ------------------
  |  |   62|  9.17k|#define SUB_BLOCK_HEIGHT 4
  ------------------
  |  Branch (428:23): [True: 7.33k, False: 1.83k]
  ------------------
  429|  7.33k|        {
  430|  7.33k|            WORD16 i2_coeff1, i2_coeff2;
  431|  7.33k|            i2_coeff1 = (WORD16) (pi2_ref_data_byte[0]);
  432|       |
  433|       |            /* populate the first inter sample */
  434|  7.33k|            *pi4_ref_array++ = i2_coeff1 << 4;
  435|       |
  436|       |            /* unroll count 1 */
  437|  7.33k|            i2_coeff2 = (WORD16) (pi2_ref_data_byte[2]);
  438|       |
  439|       |            /* populate 2 samples based on current coeffs */
  440|  7.33k|            *pi4_ref_array++ = ((16 - i4_phase2) * i2_coeff1 + i4_phase2 * i2_coeff2);
  441|       |
  442|       |            /* unroll count 2 */
  443|  7.33k|            *pi4_ref_array++ = ((16 - i4_phase1) * i2_coeff1 + i4_phase1 * i2_coeff2);
  444|       |
  445|       |            /* unroll count 3 */
  446|  7.33k|            i2_coeff1 = (WORD16) (pi2_ref_data_byte[4]);
  447|       |
  448|       |            /* populate 2 samples based on current coeffs */
  449|  7.33k|            *pi4_ref_array++ = ((16 - i4_phase2) * i2_coeff2 + i4_phase2 * i2_coeff1);
  450|       |
  451|       |            /* unroll count 4 */
  452|  7.33k|            *pi4_ref_array++ = ((16 - i4_phase1) * i2_coeff2 + i4_phase1 * i2_coeff1);
  453|       |
  454|       |            /* unroll count 5 */
  455|  7.33k|            i2_coeff2 = (WORD16) (pi2_ref_data_byte[6]);
  456|       |
  457|       |            /* populate 2 samples based on current coeffs */
  458|  7.33k|            *pi4_ref_array++ = ((16 - i4_phase2) * i2_coeff1 + i4_phase2 * i2_coeff2);
  459|       |
  460|       |            /* unroll count 6 */
  461|  7.33k|            *pi4_ref_array++ = ((16 - i4_phase1) * i2_coeff1 + i4_phase1 * i2_coeff2);
  462|       |
  463|       |            /* populate the last inter sample */
  464|  7.33k|            *pi4_ref_array++ = i2_coeff2 << 4;
  465|       |
  466|       |            /* vertical loop uopdates */
  467|  7.33k|            pi2_ref_data_byte = pi2_inp_data + ((i4_i + 1) * i4_inp_data_stride);
  468|  7.33k|        }
  469|       |
  470|       |        /* ----------- Vertical Interpolation ---------------- */
  471|  1.83k|        pi4_ref_array = (WORD32 *) ps_ctxt->pi2_refarray_buffer;
  472|  1.83k|        ps_pos_phase = ps_lyr_ctxt->s_chroma_map_ctxt.ps_y_pos_phase;
  473|  1.83k|        i4_phase1 = ps_pos_phase[0].i2_phase;
  474|  1.83k|        i4_phase2 = (i4_phase1 + 8) & 0x0F;
  475|       |
  476|  16.5k|        for(i4_i = 0; i4_i < BLOCK_WIDTH; i4_i++)
  ------------------
  |  |   64|  16.5k|#define BLOCK_WIDTH 8
  ------------------
  |  Branch (476:23): [True: 14.6k, False: 1.83k]
  ------------------
  477|  14.6k|        {
  478|  14.6k|            WORD16 *pi2_out;
  479|  14.6k|            WORD32 *pi4_ref_array_temp;
  480|  14.6k|            WORD32 i4_horz_samp_1, i4_horz_samp_2;
  481|  14.6k|            pi2_out = pi2_out_res;
  482|  14.6k|            pi4_ref_array_temp = pi4_ref_array;
  483|       |
  484|       |            /* populate the first inter sample */
  485|  14.6k|            i4_horz_samp_1 = *pi4_ref_array_temp;
  486|  14.6k|            pi4_ref_array_temp += BLOCK_WIDTH;
  ------------------
  |  |   64|  14.6k|#define BLOCK_WIDTH 8
  ------------------
  487|  14.6k|            *pi2_out = (i4_horz_samp_1 + 8) >> 4;
  488|  14.6k|            pi2_out += i4_out_res_stride;
  489|       |
  490|       |            /* unroll count 1 */
  491|  14.6k|            i4_horz_samp_2 = *pi4_ref_array_temp;
  492|  14.6k|            pi4_ref_array_temp += BLOCK_WIDTH;
  ------------------
  |  |   64|  14.6k|#define BLOCK_WIDTH 8
  ------------------
  493|       |
  494|       |            /* populate 2 samples based on current coeffs */
  495|  14.6k|            *pi2_out = ((16 - i4_phase2) * i4_horz_samp_1 + i4_phase2 * i4_horz_samp_2 + 128) >> 8;
  496|  14.6k|            pi2_out += i4_out_res_stride;
  497|       |
  498|       |            /* unroll count 2 */
  499|  14.6k|            *pi2_out = ((16 - i4_phase1) * i4_horz_samp_1 + i4_phase1 * i4_horz_samp_2 + 128) >> 8;
  500|  14.6k|            pi2_out += i4_out_res_stride;
  501|       |
  502|       |            /* unroll count 3 */
  503|  14.6k|            i4_horz_samp_1 = *pi4_ref_array_temp;
  504|  14.6k|            pi4_ref_array_temp += BLOCK_WIDTH;
  ------------------
  |  |   64|  14.6k|#define BLOCK_WIDTH 8
  ------------------
  505|       |
  506|       |            /* populate 2 samples based on current coeffs */
  507|  14.6k|            *pi2_out = ((16 - i4_phase2) * i4_horz_samp_2 + i4_phase2 * i4_horz_samp_1 + 128) >> 8;
  508|  14.6k|            pi2_out += i4_out_res_stride;
  509|       |
  510|       |            /* unroll count 4 */
  511|  14.6k|            *pi2_out = ((16 - i4_phase1) * i4_horz_samp_2 + i4_phase1 * i4_horz_samp_1 + 128) >> 8;
  512|  14.6k|            pi2_out += i4_out_res_stride;
  513|       |
  514|       |            /* unroll count 5 */
  515|  14.6k|            i4_horz_samp_2 = *pi4_ref_array_temp;
  516|       |
  517|       |            /* populate 2 samples based on current coeffs */
  518|  14.6k|            *pi2_out = ((16 - i4_phase2) * i4_horz_samp_1 + i4_phase2 * i4_horz_samp_2 + 128) >> 8;
  519|  14.6k|            pi2_out += i4_out_res_stride;
  520|       |
  521|       |            /* unroll count 6 */
  522|  14.6k|            *pi2_out = ((16 - i4_phase1) * i4_horz_samp_1 + i4_phase1 * i4_horz_samp_2 + 128) >> 8;
  523|  14.6k|            pi2_out += i4_out_res_stride;
  524|       |
  525|       |            /* populate the last inter sample */
  526|  14.6k|            *pi2_out = (i4_horz_samp_2 + 8) >> 4;
  527|       |
  528|       |            /* horizontal loop updates */
  529|  14.6k|            pi4_ref_array++;
  530|  14.6k|            pi2_out_res += 2;
  531|  14.6k|        }
  532|  1.83k|    }
  533|  1.83k|    return;
  534|  1.83k|}
isvcd_residual_luma_dyadic:
  572|  22.8k|{
  573|  22.8k|    WORD16 *pi2_refarray_buffer;
  574|  22.8k|    WORD32 i4_blk_ctr;
  575|  22.8k|    residual_sampling_ctxt_t *ps_ctxt;
  576|       |
  577|  22.8k|    UNUSED(ps_ref_mb_mode);
  ------------------
  |  |   45|  22.8k|#define UNUSED(x) ((void)(x))
  ------------------
  578|  22.8k|    UNUSED(u2_mb_x);
  ------------------
  |  |   45|  22.8k|#define UNUSED(x) ((void)(x))
  ------------------
  579|  22.8k|    UNUSED(u2_mb_y);
  ------------------
  |  |   45|  22.8k|#define UNUSED(x) ((void)(x))
  ------------------
  580|       |
  581|  22.8k|    ps_ctxt = (residual_sampling_ctxt_t *) pv_residual_samp_ctxt;
  582|  22.8k|    pi2_refarray_buffer = ps_ctxt->pi2_refarray_buffer;
  583|       |
  584|       |    /* based on transform size the counter and interpolation width and */
  585|       |    /* height are intialised as follows                                */
  586|  22.8k|    if((i4_ref_tx_size) && (0 != i4_ref_nnz))
  ------------------
  |  Branch (586:8): [True: 1.68k, False: 21.1k]
  |  Branch (586:28): [True: 1.39k, False: 290]
  ------------------
  587|  1.39k|    {
  588|  1.39k|        WORD16 *pi2_ref_data_byte;
  589|  1.39k|        WORD32 *pi4_ref_array;
  590|  1.39k|        WORD32 i4_i, i4_j;
  591|       |
  592|  1.39k|        pi2_ref_data_byte = pi2_inp_data;
  593|       |
  594|       |        /* ----------- Horizontal Interpolation ---------------- */
  595|  1.39k|        pi4_ref_array = (WORD32 *) pi2_refarray_buffer;
  596|  12.5k|        for(i4_i = 0; i4_i < BLOCK_HEIGHT; i4_i++)
  ------------------
  |  |   65|  12.5k|#define BLOCK_HEIGHT 8
  ------------------
  |  Branch (596:23): [True: 11.1k, False: 1.39k]
  ------------------
  597|  11.1k|        {
  598|  11.1k|            WORD16 i2_coeff1, i2_coeff2;
  599|  11.1k|            i2_coeff1 = (WORD16) (*pi2_ref_data_byte++);
  600|       |
  601|       |            /* populate the first inter sample */
  602|  11.1k|            *pi4_ref_array++ = i2_coeff1 << 2;
  603|       |
  604|  89.4k|            for(i4_j = 0; i4_j < 14; i4_j += 2)
  ------------------
  |  Branch (604:27): [True: 78.2k, False: 11.1k]
  ------------------
  605|  78.2k|            {
  606|  78.2k|                i2_coeff2 = (WORD16) (*pi2_ref_data_byte++);
  607|       |
  608|       |                /* populate 2 samples based on current coeffs */
  609|  78.2k|                *pi4_ref_array++ = ((i2_coeff1 << 1) + (i2_coeff1) + (i2_coeff2));
  610|  78.2k|                *pi4_ref_array++ = ((i2_coeff2 << 1) + (i2_coeff2) + (i2_coeff1));
  611|       |
  612|       |                /* store the coeff 2 to coeff 1 */
  613|       |                /* (used in next iteration)     */
  614|  78.2k|                i2_coeff1 = i2_coeff2;
  615|  78.2k|            }
  616|       |
  617|       |            /* populate the last inter sample */
  618|  11.1k|            *pi4_ref_array++ = i2_coeff1 << 2;
  619|       |
  620|       |            /* vertical loop uopdates */
  621|  11.1k|            pi2_ref_data_byte = pi2_inp_data + ((i4_i + 1) * i4_inp_data_stride);
  622|  11.1k|        }
  623|       |
  624|       |        /* ----------- Vertical Interpolation ---------------- */
  625|  1.39k|        pi4_ref_array = (WORD32 *) pi2_refarray_buffer;
  626|       |
  627|  23.7k|        for(i4_i = 0; i4_i < MB_WIDTH; i4_i++)
  ------------------
  |  |   67|  23.7k|#define MB_WIDTH 16
  ------------------
  |  Branch (627:23): [True: 22.3k, False: 1.39k]
  ------------------
  628|  22.3k|        {
  629|  22.3k|            WORD32 *pi4_ref_array_temp;
  630|  22.3k|            WORD16 *pi2_out;
  631|  22.3k|            WORD32 i4_horz_samp_1, i4_horz_samp_2;
  632|       |
  633|  22.3k|            pi4_ref_array_temp = pi4_ref_array;
  634|  22.3k|            pi2_out = pi2_out_res;
  635|  22.3k|            i4_horz_samp_1 = *pi4_ref_array_temp;
  636|       |
  637|       |            /* populate the first inter sample */
  638|  22.3k|            *pi2_out = (i4_horz_samp_1 + 2) >> 2;
  639|  22.3k|            pi2_out += i4_out_res_stride;
  640|       |
  641|   178k|            for(i4_j = 0; i4_j < 14; i4_j += 2)
  ------------------
  |  Branch (641:27): [True: 156k, False: 22.3k]
  ------------------
  642|   156k|            {
  643|   156k|                pi4_ref_array_temp += MB_WIDTH;
  ------------------
  |  |   67|   156k|#define MB_WIDTH 16
  ------------------
  644|   156k|                i4_horz_samp_2 = *pi4_ref_array_temp;
  645|       |
  646|       |                /* populate 2 samples based on current coeffs */
  647|   156k|                *pi2_out = ((i4_horz_samp_1 << 1) + (i4_horz_samp_1) + (i4_horz_samp_2) + 8) >> 4;
  648|   156k|                pi2_out += i4_out_res_stride;
  649|   156k|                *pi2_out = ((i4_horz_samp_2 << 1) + (i4_horz_samp_2) + (i4_horz_samp_1) + 8) >> 4;
  650|   156k|                pi2_out += i4_out_res_stride;
  651|       |
  652|       |                /* store the coeff 2 to coeff 1 */
  653|       |                /* (used in next iteration)     */
  654|   156k|                i4_horz_samp_1 = i4_horz_samp_2;
  655|   156k|            }
  656|       |
  657|       |            /* populate the first inter sample */
  658|  22.3k|            *pi2_out = (i4_horz_samp_1 + 2) >> 2;
  659|       |
  660|       |            /* horizontal loop updates */
  661|  22.3k|            pi4_ref_array++;
  662|  22.3k|            pi2_out_res++;
  663|  22.3k|        }
  664|  1.39k|    }
  665|  21.4k|    else
  666|  21.4k|    {
  667|       |        /* ----------------------------------------------------------------- */
  668|       |        /* LOOP over number of blocks                                        */
  669|       |        /* ----------------------------------------------------------------- */
  670|   107k|        for(i4_blk_ctr = 0; i4_blk_ctr < 4; i4_blk_ctr++)
  ------------------
  |  Branch (670:29): [True: 85.6k, False: 21.4k]
  ------------------
  671|  85.6k|        {
  672|  85.6k|            WORD16 *pi2_ref_data_byte;
  673|  85.6k|            WORD32 *pi4_ref_array;
  674|  85.6k|            WORD32 i4_i;
  675|       |
  676|       |            /* if reference layer is not coded then no processing */
  677|  85.6k|            if(0 != (i4_ref_nnz & 0x1))
  ------------------
  |  Branch (677:16): [True: 12.9k, False: 72.6k]
  ------------------
  678|  12.9k|            {
  679|  12.9k|                pi2_ref_data_byte = pi2_inp_data;
  680|       |
  681|       |                /* ----------- Horizontal Interpolation ---------------- */
  682|  12.9k|                pi4_ref_array = (WORD32 *) pi2_refarray_buffer;
  683|       |
  684|  64.9k|                for(i4_i = 0; i4_i < SUB_BLOCK_HEIGHT; i4_i++)
  ------------------
  |  |   62|  64.9k|#define SUB_BLOCK_HEIGHT 4
  ------------------
  |  Branch (684:31): [True: 51.9k, False: 12.9k]
  ------------------
  685|  51.9k|                {
  686|  51.9k|                    WORD16 i2_coeff1, i2_coeff2;
  687|  51.9k|                    i2_coeff1 = (WORD16) (*pi2_ref_data_byte++);
  688|       |
  689|       |                    /* populate the first inter sample */
  690|  51.9k|                    *pi4_ref_array++ = i2_coeff1 << 2;
  691|       |
  692|  51.9k|                    i2_coeff2 = (WORD16) (*pi2_ref_data_byte++);
  693|       |
  694|       |                    /* populate 2 samples based on current coeffs */
  695|  51.9k|                    *pi4_ref_array++ = ((i2_coeff1 << 1) + (i2_coeff1) + (i2_coeff2));
  696|  51.9k|                    *pi4_ref_array++ = ((i2_coeff2 << 1) + (i2_coeff2) + (i2_coeff1));
  697|       |
  698|  51.9k|                    i2_coeff1 = (WORD16) (*pi2_ref_data_byte++);
  699|       |
  700|       |                    /* populate 2 samples based on current coeffs */
  701|  51.9k|                    *pi4_ref_array++ = ((i2_coeff2 << 1) + (i2_coeff2) + (i2_coeff1));
  702|  51.9k|                    *pi4_ref_array++ = ((i2_coeff1 << 1) + (i2_coeff1) + (i2_coeff2));
  703|       |
  704|  51.9k|                    i2_coeff2 = (WORD16) (*pi2_ref_data_byte++);
  705|       |
  706|       |                    /* populate 2 samples based on current coeffs */
  707|  51.9k|                    *pi4_ref_array++ = ((i2_coeff1 << 1) + (i2_coeff1) + (i2_coeff2));
  708|  51.9k|                    *pi4_ref_array++ = ((i2_coeff2 << 1) + (i2_coeff2) + (i2_coeff1));
  709|       |
  710|       |                    /* populate the last inter sample */
  711|  51.9k|                    *pi4_ref_array++ = i2_coeff2 << 2;
  712|       |
  713|       |                    /* vertical loop uopdates */
  714|  51.9k|                    pi2_ref_data_byte = pi2_inp_data + ((i4_i + 1) * i4_inp_data_stride);
  715|  51.9k|                }
  716|       |
  717|       |                /* ----------- Vertical Interpolation ---------------- */
  718|  12.9k|                pi4_ref_array = (WORD32 *) pi2_refarray_buffer;
  719|       |
  720|   116k|                for(i4_i = 0; i4_i < BLOCK_WIDTH; i4_i++)
  ------------------
  |  |   64|   116k|#define BLOCK_WIDTH 8
  ------------------
  |  Branch (720:31): [True: 103k, False: 12.9k]
  ------------------
  721|   103k|                {
  722|   103k|                    WORD32 *pi4_ref_array_temp;
  723|   103k|                    WORD16 *pi2_out;
  724|   103k|                    WORD32 i4_horz_samp_1, i4_horz_samp_2;
  725|       |
  726|   103k|                    pi4_ref_array_temp = pi4_ref_array;
  727|   103k|                    pi2_out = pi2_out_res;
  728|   103k|                    i4_horz_samp_1 = *pi4_ref_array_temp;
  729|       |
  730|       |                    /* populate the first inter sample */
  731|   103k|                    *pi2_out = (i4_horz_samp_1 + 2) >> 2;
  732|   103k|                    pi2_out += i4_out_res_stride;
  733|       |
  734|       |                    /* unroll loop count 1 */
  735|   103k|                    pi4_ref_array_temp += BLOCK_WIDTH;
  ------------------
  |  |   64|   103k|#define BLOCK_WIDTH 8
  ------------------
  736|   103k|                    i4_horz_samp_2 = *pi4_ref_array_temp;
  737|       |
  738|       |                    /* populate 2 samples based on current coeffs */
  739|   103k|                    *pi2_out =
  740|   103k|                        ((i4_horz_samp_1 << 1) + (i4_horz_samp_1) + (i4_horz_samp_2) + 8) >> 4;
  741|   103k|                    pi2_out += i4_out_res_stride;
  742|   103k|                    *pi2_out =
  743|   103k|                        ((i4_horz_samp_2 << 1) + (i4_horz_samp_2) + (i4_horz_samp_1) + 8) >> 4;
  744|   103k|                    pi2_out += i4_out_res_stride;
  745|       |
  746|       |                    /* unroll loop count 2 */
  747|   103k|                    pi4_ref_array_temp += BLOCK_WIDTH;
  ------------------
  |  |   64|   103k|#define BLOCK_WIDTH 8
  ------------------
  748|   103k|                    i4_horz_samp_1 = *pi4_ref_array_temp;
  749|       |
  750|       |                    /* populate 2 samples based on current coeffs */
  751|   103k|                    *pi2_out =
  752|   103k|                        ((i4_horz_samp_2 << 1) + (i4_horz_samp_2) + (i4_horz_samp_1) + 8) >> 4;
  753|   103k|                    pi2_out += i4_out_res_stride;
  754|   103k|                    *pi2_out =
  755|   103k|                        ((i4_horz_samp_1 << 1) + (i4_horz_samp_1) + (i4_horz_samp_2) + 8) >> 4;
  756|   103k|                    pi2_out += i4_out_res_stride;
  757|       |
  758|       |                    /* unroll loop count 3 */
  759|   103k|                    pi4_ref_array_temp += BLOCK_WIDTH;
  ------------------
  |  |   64|   103k|#define BLOCK_WIDTH 8
  ------------------
  760|   103k|                    i4_horz_samp_2 = *pi4_ref_array_temp;
  761|       |
  762|       |                    /* populate 2 samples based on current coeffs */
  763|   103k|                    *pi2_out =
  764|   103k|                        ((i4_horz_samp_1 << 1) + (i4_horz_samp_1) + (i4_horz_samp_2) + 8) >> 4;
  765|   103k|                    pi2_out += i4_out_res_stride;
  766|   103k|                    *pi2_out =
  767|   103k|                        ((i4_horz_samp_2 << 1) + (i4_horz_samp_2) + (i4_horz_samp_1) + 8) >> 4;
  768|   103k|                    pi2_out += i4_out_res_stride;
  769|       |
  770|       |                    /* populate the last inter sample */
  771|   103k|                    *pi2_out = (i4_horz_samp_2 + 2) >> 2;
  772|       |
  773|       |                    /* horizontal loop updates */
  774|   103k|                    pi4_ref_array++;
  775|   103k|                    pi2_out_res++;
  776|   103k|                }
  777|  12.9k|            }
  778|  72.6k|            else
  779|  72.6k|            {
  780|  72.6k|                pi2_out_res += BLOCK_WIDTH;
  ------------------
  |  |   64|  72.6k|#define BLOCK_WIDTH 8
  ------------------
  781|  72.6k|            }
  782|       |
  783|       |            /* Block level loop updates */
  784|  85.6k|            if(1 == i4_blk_ctr)
  ------------------
  |  Branch (784:16): [True: 21.4k, False: 64.2k]
  ------------------
  785|  21.4k|            {
  786|  21.4k|                pi2_inp_data -= SUB_BLOCK_WIDTH;
  ------------------
  |  |   61|  21.4k|#define SUB_BLOCK_WIDTH 4
  ------------------
  787|  21.4k|                pi2_inp_data += (i4_inp_data_stride * SUB_BLOCK_HEIGHT);
  ------------------
  |  |   62|  21.4k|#define SUB_BLOCK_HEIGHT 4
  ------------------
  788|  21.4k|                pi2_out_res -= MB_WIDTH;
  ------------------
  |  |   67|  21.4k|#define MB_WIDTH 16
  ------------------
  789|  21.4k|                pi2_out_res += (i4_out_res_stride * BLOCK_HEIGHT);
  ------------------
  |  |   65|  21.4k|#define BLOCK_HEIGHT 8
  ------------------
  790|  21.4k|                i4_ref_nnz >>= 2;
  791|  21.4k|            }
  792|  64.2k|            else
  793|  64.2k|            {
  794|  64.2k|                pi2_inp_data += SUB_BLOCK_WIDTH;
  ------------------
  |  |   61|  64.2k|#define SUB_BLOCK_WIDTH 4
  ------------------
  795|  64.2k|            }
  796|       |
  797|  85.6k|            i4_ref_nnz >>= 1;
  798|       |
  799|  85.6k|        } /* end of loop over all the blocks */
  800|  21.4k|    }
  801|  22.8k|    return;
  802|  22.8k|}
isvcd_ref_layer_ptr_incr:
  839|   337k|{
  840|   337k|    WORD32 i4_x, i4_y;
  841|   337k|    WORD32 i4_x_idx, i4_y_idx;
  842|   337k|    WORD32 i4_prev_x, i4_prev_y;
  843|   337k|    WORD32 i4_const_val;
  844|   337k|    WORD32 i4_pos_x, i4_pos_y;
  845|   337k|    WORD32 i4_trans_size;
  846|   337k|    WORD32 i4_act_ary_wd, i4_act_ary_ht;
  847|   337k|    WORD32 i4_and_const;
  848|   337k|    UWORD8 *pu1_incr_x, *pu1_incr_y;
  849|   337k|    WORD32 i4_mb_sft;
  850|   337k|    WORD32 i4_mb_x, i4_mb_y;
  851|   337k|    WORD8 *pi1_ref_mb_modes_incr;
  852|   337k|    WORD8 *pi1_ref_mb_modes_incr_temp;
  853|   337k|    inter_lyr_mb_prms_t *ps_inter_lyr_mb_prms;
  854|   337k|    WORD32 i4_mb_x_strt, i4_mb_y_strt;
  855|   337k|    WORD32 i4_mb_quard1_part_x, i4_mb_quard1_part_y;
  856|   337k|    WORD32 i4_x_ref, i4_y_ref;
  857|   337k|    WORD32 i4_tx_size, i4_tx_size_q0, i4_tx_size_q1, i4_tx_size_q2, i4_tx_size_q3;
  858|   337k|    WORD8 i1_mb_mode_q0, i1_mb_mode_q1, i1_mb_mode_q2, i1_mb_mode_q3;
  859|   337k|    WORD32 i4_mb_wd;
  860|   337k|    WORD32 i4_mb_ht;
  861|       |
  862|   337k|    i4_mb_wd = MB_WIDTH >> i4_chroma_flag;
  ------------------
  |  |   67|   337k|#define MB_WIDTH 16
  ------------------
  863|   337k|    i4_mb_ht = MB_HEIGHT >> i4_chroma_flag;
  ------------------
  |  |   68|   337k|#define MB_HEIGHT 16
  ------------------
  864|       |
  865|       |    /* Memset to 1 the increment buffers */
  866|   337k|    memset(pu1_ref_x_ptr_incr, 1, (i4_refary_wd * i4_refary_ht));
  867|   337k|    memset(pu1_ref_y_ptr_incr, 1, (i4_refary_wd * i4_refary_ht));
  868|       |
  869|       |    /* Initialise actual width and height */
  870|   337k|    i4_act_ary_wd = i4_refary_wd;
  871|   337k|    i4_act_ary_ht = i4_refary_ht;
  872|       |
  873|       |    /* Initialize x and y */
  874|   337k|    i4_x = 0;
  875|   337k|    i4_y = 0;
  876|   337k|    i4_prev_y = 0;
  877|   337k|    i4_mb_sft = (MB_WIDTH_SHIFT - i4_chroma_flag);
  ------------------
  |  |   70|   337k|#define MB_WIDTH_SHIFT 4
  ------------------
  878|       |
  879|       |    /* Loop over all MBs in the reference array */
  880|   337k|    if(0 == i4_chroma_flag)
  ------------------
  |  Branch (880:8): [True: 112k, False: 224k]
  ------------------
  881|   112k|    {
  882|   112k|        i4_x_ref = i4_x_offset + 0;
  883|   112k|        i4_y_ref = i4_y_offset + 0;
  884|   112k|        i4_mb_x_strt = i4_x_ref % i4_mb_wd;
  885|   112k|        i4_mb_y_strt = i4_y_ref % i4_mb_ht;
  886|   112k|        i4_mb_quard1_part_x = i4_mb_wd - i4_mb_x_strt;
  887|   112k|        i4_mb_quard1_part_y = i4_mb_ht - i4_mb_y_strt;
  888|       |
  889|   112k|        if(!(i4_mb_quard1_part_x >= 0))
  ------------------
  |  Branch (889:12): [True: 0, False: 112k]
  ------------------
  890|      0|        {
  891|      0|            return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  892|      0|        }
  893|   112k|        if(!(i4_mb_quard1_part_y >= 0))
  ------------------
  |  Branch (893:12): [True: 0, False: 112k]
  ------------------
  894|      0|        {
  895|      0|            return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  896|      0|        }
  897|       |
  898|       |        /* Take care of negative offsets */
  899|   112k|        if(i4_x_ref > 0)
  ------------------
  |  Branch (899:12): [True: 73.8k, False: 38.6k]
  ------------------
  900|  73.8k|        {
  901|  73.8k|            i4_mb_x = (i4_x_ref >> i4_mb_sft);
  902|  73.8k|        }
  903|  38.6k|        else
  904|  38.6k|        {
  905|  38.6k|            i4_mb_x = 0;
  906|  38.6k|        }
  907|   112k|        if(i4_y_ref > 0)
  ------------------
  |  Branch (907:12): [True: 94.9k, False: 17.4k]
  ------------------
  908|  94.9k|        {
  909|  94.9k|            i4_mb_y = (i4_y_ref >> i4_mb_sft);
  910|  94.9k|        }
  911|  17.4k|        else
  912|  17.4k|        {
  913|  17.4k|            i4_mb_y = 0;
  914|  17.4k|        }
  915|       |
  916|       |        /* get the location of the byte which has the current mb mode */
  917|   112k|        pi1_ref_mb_modes_incr = pi1_ref_mb_modes + (i4_mb_y * i4_ref_mode_stride * i4_element_size);
  918|   112k|        pi1_ref_mb_modes_incr += (i4_mb_x * i4_element_size);
  919|   112k|        ps_inter_lyr_mb_prms = (inter_lyr_mb_prms_t *) pi1_ref_mb_modes_incr;
  920|   112k|        i1_mb_mode_q0 = ps_inter_lyr_mb_prms->i1_mb_mode;
  921|   112k|        i4_tx_size_q0 =
  922|   112k|            (i1_mb_mode_q0 <= SVC_INTER_MB)
  ------------------
  |  |  114|   112k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
  |  Branch (922:13): [True: 106k, False: 5.95k]
  ------------------
  923|   112k|                ? ((ps_inter_lyr_mb_prms->i1_tx_size < 0) ? 1 : ps_inter_lyr_mb_prms->i1_tx_size)
  ------------------
  |  Branch (923:20): [True: 0, False: 106k]
  ------------------
  924|   112k|                : 1;
  925|       |
  926|   112k|        pi1_ref_mb_modes_incr_temp = pi1_ref_mb_modes_incr;
  927|   112k|        if(i4_mb_quard1_part_x > 0)
  ------------------
  |  Branch (927:12): [True: 112k, False: 0]
  ------------------
  928|   112k|        {
  929|   112k|            pi1_ref_mb_modes_incr_temp = pi1_ref_mb_modes_incr + i4_element_size;
  930|   112k|            ps_inter_lyr_mb_prms = (inter_lyr_mb_prms_t *) pi1_ref_mb_modes_incr_temp;
  931|   112k|            i1_mb_mode_q1 = ps_inter_lyr_mb_prms->i1_mb_mode;
  932|   112k|            i4_tx_size_q1 =
  933|   112k|                (i1_mb_mode_q1 <= SVC_INTER_MB)
  ------------------
  |  |  114|   112k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
  |  Branch (933:17): [True: 105k, False: 7.06k]
  ------------------
  934|   112k|                    ? ((ps_inter_lyr_mb_prms->i1_tx_size < 0) ? 1
  ------------------
  |  Branch (934:24): [True: 34.9k, False: 70.4k]
  ------------------
  935|   105k|                                                              : ps_inter_lyr_mb_prms->i1_tx_size)
  936|   112k|                    : 1;
  937|   112k|        }
  938|       |
  939|   112k|        if(i4_mb_quard1_part_y > 0)
  ------------------
  |  Branch (939:12): [True: 112k, False: 0]
  ------------------
  940|   112k|        {
  941|   112k|            pi1_ref_mb_modes_incr_temp =
  942|   112k|                pi1_ref_mb_modes_incr + (i4_ref_mode_stride * i4_element_size);
  943|   112k|            ps_inter_lyr_mb_prms = (inter_lyr_mb_prms_t *) pi1_ref_mb_modes_incr_temp;
  944|   112k|            i1_mb_mode_q2 = ps_inter_lyr_mb_prms->i1_mb_mode;
  945|   112k|            i4_tx_size_q2 =
  946|   112k|                (i1_mb_mode_q2 <= SVC_INTER_MB)
  ------------------
  |  |  114|   112k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
  |  Branch (946:17): [True: 105k, False: 6.67k]
  ------------------
  947|   112k|                    ? ((ps_inter_lyr_mb_prms->i1_tx_size < 0) ? 1
  ------------------
  |  Branch (947:24): [True: 6.07k, False: 99.7k]
  ------------------
  948|   105k|                                                              : ps_inter_lyr_mb_prms->i1_tx_size)
  949|   112k|                    : 1;
  950|   112k|        }
  951|       |
  952|   112k|        if((i4_mb_quard1_part_x > 0) && (i4_mb_quard1_part_y > 0))
  ------------------
  |  Branch (952:12): [True: 112k, False: 0]
  |  Branch (952:41): [True: 112k, False: 0]
  ------------------
  953|   112k|        {
  954|   112k|            pi1_ref_mb_modes_incr_temp =
  955|   112k|                pi1_ref_mb_modes_incr + (i4_ref_mode_stride * i4_element_size) + i4_element_size;
  956|   112k|            ps_inter_lyr_mb_prms = (inter_lyr_mb_prms_t *) pi1_ref_mb_modes_incr_temp;
  957|   112k|            i1_mb_mode_q3 = ps_inter_lyr_mb_prms->i1_mb_mode;
  958|   112k|            i4_tx_size_q3 =
  959|   112k|                (i1_mb_mode_q3 <= SVC_INTER_MB)
  ------------------
  |  |  114|   112k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
  |  Branch (959:17): [True: 106k, False: 5.90k]
  ------------------
  960|   112k|                    ? ((ps_inter_lyr_mb_prms->i1_tx_size < 0) ? 1
  ------------------
  |  Branch (960:24): [True: 39.7k, False: 66.8k]
  ------------------
  961|   106k|                                                              : ps_inter_lyr_mb_prms->i1_tx_size)
  962|   112k|                    : 1;
  963|   112k|        }
  964|       |
  965|   112k|        do
  966|   449k|        {
  967|   449k|            WORD32 i4_idx;
  968|   449k|            WORD32 i4_wd, i4_ht;
  969|   449k|            WORD32 i4_max_pos_x, i4_max_pos_y;
  970|       |
  971|   449k|            i4_prev_x = i4_x;
  972|   449k|            i4_x_ref = i4_x_offset + i4_x;
  973|   449k|            i4_y_ref = i4_y_offset + i4_y;
  974|   449k|            i4_tx_size = i4_tx_size_q0;
  975|   449k|            if(i4_x >= i4_mb_quard1_part_x)
  ------------------
  |  Branch (975:16): [True: 147k, False: 302k]
  ------------------
  976|   147k|            {
  977|   147k|                if(i4_y < i4_mb_quard1_part_y)
  ------------------
  |  Branch (977:20): [True: 85.6k, False: 62.0k]
  ------------------
  978|  85.6k|                {
  979|  85.6k|                    i4_tx_size = i4_tx_size_q1;
  980|  85.6k|                }
  981|  62.0k|                else if(i4_y >= i4_mb_quard1_part_y)
  ------------------
  |  Branch (981:25): [True: 62.0k, False: 0]
  ------------------
  982|  62.0k|                {
  983|  62.0k|                    i4_tx_size = i4_tx_size_q3;
  984|  62.0k|                }
  985|   147k|            }
  986|   302k|            else if(i4_x < i4_mb_quard1_part_x)
  ------------------
  |  Branch (986:21): [True: 302k, False: 0]
  ------------------
  987|   302k|            {
  988|   302k|                if(i4_y >= i4_mb_quard1_part_y)
  ------------------
  |  Branch (988:20): [True: 127k, False: 174k]
  ------------------
  989|   127k|                {
  990|   127k|                    i4_tx_size = i4_tx_size_q2;
  991|   127k|                }
  992|   302k|            }
  993|       |
  994|       |            /* Get the transform size as 4 or 8 */
  995|   449k|            i4_trans_size = ((i4_tx_size + 1) << 2);
  996|   449k|            i4_const_val = i4_trans_size - 1;
  997|   449k|            i4_and_const = i4_const_val;
  998|       |
  999|       |            /* Fill horizontal tx block edges of current reference mb with 0 */
 1000|   449k|            pu1_incr_x = pu1_ref_x_ptr_incr + i4_x;
 1001|   449k|            pu1_incr_x += (i4_y * i4_refary_wd);
 1002|   449k|            i4_ht = (16 - (i4_y_ref & 0xF));
 1003|   449k|            i4_ht = MIN((i4_act_ary_ht - i4_y), i4_ht);
  ------------------
  |  |   61|   449k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 224k, False: 224k]
  |  |  ------------------
  ------------------
 1004|       |
 1005|   449k|            i4_x_idx = i4_x;
 1006|   449k|            i4_pos_x = i4_x_ref & 0xF;
 1007|   449k|            i4_max_pos_x = 16;
 1008|   449k|            i4_x += (16 - i4_pos_x);
 1009|       |
 1010|       |            /* Get the transform block edge pos */
 1011|   449k|            i4_idx = (i4_const_val - (i4_pos_x & i4_and_const));
 1012|   449k|            i4_x_idx += i4_idx;
 1013|       |
 1014|  1.09M|            while((i4_pos_x < i4_max_pos_x) && (i4_x_idx < i4_act_ary_wd))
  ------------------
  |  Branch (1014:19): [True: 872k, False: 224k]
  |  Branch (1014:48): [True: 647k, False: 224k]
  ------------------
 1015|   647k|            {
 1016|   647k|                WORD32 i4_i;
 1017|   647k|                UWORD8 *pu1_incr;
 1018|       |
 1019|   647k|                pu1_incr = pu1_incr_x + i4_idx;
 1020|  4.59M|                for(i4_i = 0; i4_i < i4_ht; i4_i++)
  ------------------
  |  Branch (1020:31): [True: 3.94M, False: 647k]
  ------------------
 1021|  3.94M|                {
 1022|       |                    /* Fill the block edge with 0s */
 1023|  3.94M|                    *pu1_incr = 0;
 1024|  3.94M|                    pu1_incr += i4_refary_wd;
 1025|  3.94M|                }
 1026|       |
 1027|       |                /* Updates */
 1028|   647k|                i4_pos_x += i4_trans_size;
 1029|   647k|                pu1_incr_x += i4_trans_size;
 1030|   647k|                i4_x_idx += MIN(i4_trans_size, (i4_act_ary_wd - i4_x_idx));
  ------------------
  |  |   61|   647k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 424k, False: 223k]
  |  |  ------------------
  ------------------
 1031|   647k|            }
 1032|       |
 1033|       |            /* Fill vertical tx block edges of current reference mb with 0 */
 1034|   449k|            pu1_incr_y = pu1_ref_y_ptr_incr + i4_prev_x;
 1035|   449k|            pu1_incr_y += (i4_y * i4_refary_wd);
 1036|   449k|            i4_wd = (16 - (i4_x_ref & 0xF));
 1037|   449k|            i4_wd = MIN((i4_act_ary_wd - i4_prev_x), i4_wd);
  ------------------
  |  |   61|   449k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 224k, False: 224k]
  |  |  ------------------
  ------------------
 1038|   449k|            i4_y_idx = i4_y;
 1039|   449k|            i4_pos_y = i4_y_ref & 0xF;
 1040|   449k|            i4_max_pos_y = 16;
 1041|   449k|            i4_y += (16 - i4_pos_y);
 1042|       |
 1043|       |            /* Get the transform block edge pos */
 1044|   449k|            i4_idx = (i4_const_val - (i4_pos_y & i4_and_const));
 1045|   449k|            i4_y_idx += i4_idx;
 1046|       |
 1047|  1.06M|            while((i4_pos_y < i4_max_pos_y) && (i4_y_idx < i4_act_ary_ht))
  ------------------
  |  Branch (1047:19): [True: 838k, False: 224k]
  |  Branch (1047:48): [True: 613k, False: 224k]
  ------------------
 1048|   613k|            {
 1049|   613k|                WORD32 i4_i;
 1050|   613k|                UWORD8 *pu1_incr;
 1051|       |
 1052|   613k|                pu1_incr = pu1_incr_y + i4_idx * i4_refary_wd;
 1053|  4.52M|                for(i4_i = 0; i4_i < i4_wd; i4_i++)
  ------------------
  |  Branch (1053:31): [True: 3.90M, False: 613k]
  ------------------
 1054|  3.90M|                {
 1055|       |                    /* Fill the block edge with 0s */
 1056|  3.90M|                    *pu1_incr = 0;
 1057|  3.90M|                    pu1_incr++;
 1058|  3.90M|                }
 1059|       |
 1060|       |                /* Updates */
 1061|   613k|                i4_pos_y += i4_trans_size;
 1062|   613k|                pu1_incr_y += i4_trans_size * i4_refary_wd;
 1063|   613k|                i4_y_idx += MIN(i4_trans_size, (i4_act_ary_ht - i4_y_idx));
  ------------------
  |  |   61|   613k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 389k, False: 224k]
  |  |  ------------------
  ------------------
 1064|   613k|            }
 1065|       |
 1066|       |            /* Loop updates */
 1067|   449k|            if(i4_x < i4_act_ary_wd)
  ------------------
  |  Branch (1067:16): [True: 224k, False: 224k]
  ------------------
 1068|   224k|            {
 1069|   224k|                i4_y = i4_prev_y;
 1070|   224k|            }
 1071|   224k|            else if(i4_y < i4_act_ary_ht)
  ------------------
  |  Branch (1071:21): [True: 112k, False: 112k]
  ------------------
 1072|   112k|            {
 1073|   112k|                i4_prev_y = i4_y;
 1074|   112k|                i4_x = 0;
 1075|   112k|            }
 1076|       |
 1077|   449k|        } while((i4_y < i4_act_ary_ht) || (i4_x < i4_act_ary_wd));
  ------------------
  |  Branch (1077:17): [True: 337k, False: 112k]
  |  Branch (1077:43): [True: 0, False: 112k]
  ------------------
 1078|       |
 1079|   112k|    } /* End of if 0 == i4_chroma_flag */
 1080|   224k|    else
 1081|   224k|    {
 1082|       |        /* Set the transform size as 4 */
 1083|   224k|        i4_trans_size = 4;
 1084|   224k|        i4_const_val = 3;
 1085|       |
 1086|   224k|        do
 1087|   899k|        {
 1088|   899k|            WORD32 i4_x_ref, i4_y_ref;
 1089|   899k|            WORD32 i4_idx;
 1090|   899k|            WORD32 i4_wd, i4_ht;
 1091|   899k|            WORD32 i4_max_pos_x, i4_max_pos_y;
 1092|       |
 1093|   899k|            i4_prev_x = i4_x;
 1094|   899k|            i4_x_ref = i4_x_offset + i4_x;
 1095|   899k|            i4_y_ref = i4_y_offset + i4_y;
 1096|       |
 1097|       |            /* Fill horizontal tx block edges of current reference mb with 0 */
 1098|   899k|            pu1_incr_x = pu1_ref_x_ptr_incr + i4_x;
 1099|   899k|            pu1_incr_x += (i4_y * i4_refary_wd);
 1100|   899k|            i4_ht = (8 - (i4_y_ref & 0x7));
 1101|   899k|            i4_ht = MIN((i4_act_ary_ht - i4_y), i4_ht);
  ------------------
  |  |   61|   899k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 449k, False: 449k]
  |  |  ------------------
  ------------------
 1102|   899k|            i4_x_idx = i4_x;
 1103|   899k|            i4_pos_x = i4_x_ref & 0x7;
 1104|   899k|            i4_max_pos_x = 8;
 1105|   899k|            i4_x += (8 - i4_pos_x);
 1106|       |
 1107|       |            /* Get the transform block edge pos */
 1108|   899k|            i4_idx = (i4_const_val - (i4_pos_x & 0x3));
 1109|   899k|            i4_x_idx += i4_idx;
 1110|       |
 1111|  1.64M|            while((i4_pos_x < i4_max_pos_x) && (i4_x_idx < i4_act_ary_wd))
  ------------------
  |  Branch (1111:19): [True: 1.19M, False: 449k]
  |  Branch (1111:48): [True: 744k, False: 449k]
  ------------------
 1112|   744k|            {
 1113|   744k|                WORD32 i4_i;
 1114|   744k|                UWORD8 *pu1_incr;
 1115|       |
 1116|   744k|                pu1_incr = pu1_incr_x + i4_idx;
 1117|  3.22M|                for(i4_i = 0; i4_i < i4_ht; i4_i++)
  ------------------
  |  Branch (1117:31): [True: 2.47M, False: 744k]
  ------------------
 1118|  2.47M|                {
 1119|       |                    /* Fill the block edge with 0s */
 1120|  2.47M|                    *pu1_incr = 0;
 1121|  2.47M|                    pu1_incr += i4_refary_wd;
 1122|  2.47M|                }
 1123|       |
 1124|       |                /* Updates */
 1125|   744k|                i4_pos_x += i4_trans_size;
 1126|   744k|                pu1_incr_x += i4_trans_size;
 1127|   744k|                i4_x_idx += MIN(i4_trans_size, (i4_act_ary_wd - i4_x_idx));
  ------------------
  |  |   61|   744k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 294k, False: 449k]
  |  |  ------------------
  ------------------
 1128|   744k|            }
 1129|       |
 1130|       |            /* Fill vertical tx block edges of current reference mb with 0 */
 1131|   899k|            pu1_incr_y = pu1_ref_y_ptr_incr + i4_prev_x;
 1132|   899k|            pu1_incr_y += (i4_y * i4_refary_wd);
 1133|   899k|            i4_wd = (8 - (i4_x_ref & 0x7));
 1134|   899k|            i4_wd = MIN((i4_act_ary_wd - i4_prev_x), i4_wd);
  ------------------
  |  |   61|   899k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 449k, False: 449k]
  |  |  ------------------
  ------------------
 1135|   899k|            i4_y_idx = i4_y;
 1136|   899k|            i4_pos_y = i4_y_ref & 0x7;
 1137|   899k|            i4_max_pos_y = 8;
 1138|   899k|            i4_y += (8 - i4_pos_y);
 1139|       |
 1140|       |            /* Get the transform block edge pos */
 1141|   899k|            i4_idx = (i4_const_val - (i4_pos_y & 0x3));
 1142|   899k|            i4_y_idx += i4_idx;
 1143|       |
 1144|  1.64M|            while((i4_pos_y < i4_max_pos_y) && (i4_y_idx < i4_act_ary_ht))
  ------------------
  |  Branch (1144:19): [True: 1.19M, False: 449k]
  |  Branch (1144:48): [True: 747k, False: 449k]
  ------------------
 1145|   747k|            {
 1146|   747k|                WORD32 i4_i;
 1147|   747k|                UWORD8 *pu1_incr;
 1148|       |
 1149|   747k|                pu1_incr = pu1_incr_y + i4_idx * i4_refary_wd;
 1150|  3.23M|                for(i4_i = 0; i4_i < i4_wd; i4_i++)
  ------------------
  |  Branch (1150:31): [True: 2.48M, False: 747k]
  ------------------
 1151|  2.48M|                {
 1152|       |                    /* Fill the block edge with 0s */
 1153|  2.48M|                    *pu1_incr = 0;
 1154|  2.48M|                    pu1_incr++;
 1155|  2.48M|                }
 1156|       |
 1157|       |                /* Updates */
 1158|   747k|                i4_pos_y += i4_trans_size;
 1159|   747k|                pu1_incr_y += i4_trans_size * i4_refary_wd;
 1160|   747k|                i4_y_idx += MIN(i4_trans_size, (i4_act_ary_ht - i4_y_idx));
  ------------------
  |  |   61|   747k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 297k, False: 449k]
  |  |  ------------------
  ------------------
 1161|   747k|            }
 1162|       |
 1163|       |            /* Loop updates */
 1164|   899k|            if(i4_x < i4_act_ary_wd)
  ------------------
  |  Branch (1164:16): [True: 449k, False: 449k]
  ------------------
 1165|   449k|            {
 1166|   449k|                i4_y = i4_prev_y;
 1167|   449k|            }
 1168|   449k|            else if(i4_y < i4_act_ary_ht)
  ------------------
  |  Branch (1168:21): [True: 224k, False: 224k]
  ------------------
 1169|   224k|            {
 1170|   224k|                i4_prev_y = i4_y;
 1171|   224k|                i4_x = 0;
 1172|   224k|            }
 1173|       |
 1174|   899k|        } while((i4_y < i4_act_ary_ht) || (i4_x < i4_act_ary_wd));
  ------------------
  |  Branch (1174:17): [True: 674k, False: 224k]
  |  Branch (1174:43): [True: 0, False: 224k]
  ------------------
 1175|       |
 1176|   224k|    } /* End of chroma */
 1177|   337k|    return OK;
  ------------------
  |  |  114|   337k|#define OK        0
  ------------------
 1178|       |
 1179|   337k|} /* End of "isvcd_ref_layer_ptr_incr" */
isvcd_residual_reflayer_const_non_boundary_mb:
 1186|  52.3k|{
 1187|  52.3k|    WORD32 i4_x_ref, i4_y_ref;
 1188|  52.3k|    WORD32 i4_x, i4_y;
 1189|  52.3k|    WORD32 i4_ref_mb_type;
 1190|  52.3k|    WORD16 *pi2_ref_data_byte;
 1191|  52.3k|    WORD16 *pi2_ref_array_temp;
 1192|       |
 1193|   491k|    for(i4_y = 0; i4_y < i4_refarray_ht; i4_y++)
  ------------------
  |  Branch (1193:19): [True: 438k, False: 52.3k]
  ------------------
 1194|   438k|    {
 1195|  4.32M|        for(i4_x = 0; i4_x < i4_refarray_wd; i4_x++)
  ------------------
  |  Branch (1195:23): [True: 3.89M, False: 438k]
  ------------------
 1196|  3.89M|        {
 1197|  3.89M|            i4_y_ref = i4_y;
 1198|  3.89M|            i4_x_ref = i4_x;
 1199|       |
 1200|  3.89M|            i4_ref_mb_type = i4_ref_mb_type_q0;
 1201|  3.89M|            if(i4_x >= i4_mb_quard1_part_x)
  ------------------
  |  Branch (1201:16): [True: 1.94M, False: 1.94M]
  ------------------
 1202|  1.94M|            {
 1203|  1.94M|                if(i4_y < i4_mb_quard1_part_y)
  ------------------
  |  Branch (1203:20): [True: 981k, False: 964k]
  ------------------
 1204|   981k|                {
 1205|   981k|                    i4_ref_mb_type = i4_ref_mb_type_q1;
 1206|   981k|                }
 1207|   964k|                else if(i4_y >= i4_mb_quard1_part_y)
  ------------------
  |  Branch (1207:25): [True: 964k, False: 0]
  ------------------
 1208|   964k|                {
 1209|   964k|                    i4_ref_mb_type = i4_ref_mb_type_q3;
 1210|   964k|                }
 1211|  1.94M|            }
 1212|  1.94M|            else if(i4_x < i4_mb_quard1_part_x)
  ------------------
  |  Branch (1212:21): [True: 1.94M, False: 0]
  ------------------
 1213|  1.94M|            {
 1214|  1.94M|                if(i4_y >= i4_mb_quard1_part_y)
  ------------------
  |  Branch (1214:20): [True: 964k, False: 981k]
  ------------------
 1215|   964k|                {
 1216|   964k|                    i4_ref_mb_type = i4_ref_mb_type_q2;
 1217|   964k|                }
 1218|  1.94M|            }
 1219|       |
 1220|       |            /****************************************************************/
 1221|       |            /* Reference layer Residual Buffer is maintained as 8-bit data  */
 1222|       |            /* Buffer and 1-bit sign bit packed buffer. Sign Byte is read   */
 1223|       |            /* and sign of the data sample is extracted depending upon bit  */
 1224|       |            /* postition.                                                   */
 1225|       |            /****************************************************************/
 1226|       |
 1227|       |            /* update the buffer pointers to appropriate locations */
 1228|  3.89M|            pi2_ref_array_temp = pi2_ref_array + i4_x;
 1229|  3.89M|            pi2_ref_array_temp += i4_y * i4_refarray_wd;
 1230|       |
 1231|       |            /* extract the residual value and fill the buffer */
 1232|  3.89M|            if(SVC_INTER_MB == i4_ref_mb_type)
  ------------------
  |  |  114|  3.89M|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
  |  Branch (1232:16): [True: 3.49M, False: 393k]
  ------------------
 1233|  3.49M|            {
 1234|       |                /* derive the reference data pointers */
 1235|  3.49M|                pi2_ref_data_byte = pi2_inp_data + (i4_x_ref << i4_chroma_flag);
 1236|  3.49M|                pi2_ref_data_byte += i4_y_ref * i4_inp_data_stride;
 1237|       |
 1238|       |                /* store the residual value */
 1239|  3.49M|                *pi2_ref_array_temp = (WORD16) (*pi2_ref_data_byte);
 1240|  3.49M|            }
 1241|   393k|            else
 1242|   393k|            {
 1243|       |                /* if non inter MB then store the 0 */
 1244|   393k|                *pi2_ref_array_temp = 0;
 1245|   393k|            }
 1246|  3.89M|        }
 1247|   438k|    }
 1248|  52.3k|}
isvcd_residual_reflayer_const_boundary_mb:
 1258|   245k|{
 1259|   245k|    WORD32 i4_x_ref, i4_y_ref;
 1260|   245k|    WORD32 i4_x, i4_y;
 1261|   245k|    WORD16 *pi2_ref_data_byte;
 1262|   245k|    WORD16 *pi2_ref_array_temp;
 1263|       |
 1264|       |    /*Quard 0*/
 1265|  1.68M|    for(i4_y = 0; i4_y < i4_mb_quard1_part_y; i4_y++)
  ------------------
  |  Branch (1265:19): [True: 1.43M, False: 245k]
  ------------------
 1266|  1.43M|    {
 1267|  14.7M|        for(i4_x = 0; i4_x < i4_mb_quard1_part_x; i4_x++)
  ------------------
  |  Branch (1267:23): [True: 13.2M, False: 1.43M]
  ------------------
 1268|  13.2M|        {
 1269|  13.2M|            i4_y_ref = MAX(0, MIN(i4_ref_ht - 1, i4_y + i4_y_offset));
  ------------------
  |  |   60|  26.5M|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 387k, False: 12.8M]
  |  |  |  Branch (60:24): [True: 0, False: 13.2M]
  |  |  |  Branch (60:32): [True: 0, False: 12.8M]
  |  |  ------------------
  ------------------
 1270|  13.2M|            i4_x_ref = MAX(0, MIN(i4_ref_wd - 1, i4_x + i4_x_offset));
  ------------------
  |  |   60|  26.5M|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 625k, False: 12.6M]
  |  |  |  Branch (60:24): [True: 0, False: 13.2M]
  |  |  |  Branch (60:32): [True: 0, False: 12.6M]
  |  |  ------------------
  ------------------
 1271|       |
 1272|       |            /****************************************************************/
 1273|       |            /* Reference layer Residual Buffer is maintained as 8-bit data  */
 1274|       |            /* Buffer and 1-bit sign bit packed buffer. Sign Byte is read   */
 1275|       |            /* and sign of the data sample is extracted depending upon bit  */
 1276|       |            /* postition.                                                   */
 1277|       |            /****************************************************************/
 1278|       |
 1279|       |            /* update the buffer pointers to appropriate locations */
 1280|  13.2M|            pi2_ref_array_temp = pi2_ref_array + i4_x;
 1281|  13.2M|            pi2_ref_array_temp += i4_y * i4_refarray_wd;
 1282|       |
 1283|       |            /* extract the residual value and fill the buffer */
 1284|  13.2M|            if(SVC_INTER_MB == i4_ref_mb_type_q0)
  ------------------
  |  |  114|  13.2M|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
  |  Branch (1284:16): [True: 12.9M, False: 370k]
  ------------------
 1285|  12.9M|            {
 1286|       |                /* input pointer will be pointing to (xoffset,yoffset) */
 1287|       |                /* So subtract the correction to reference location    */
 1288|  12.9M|                if(0 <= i4_x_offset)
  ------------------
  |  Branch (1288:20): [True: 5.66M, False: 7.24M]
  ------------------
 1289|  5.66M|                {
 1290|       |                    /* if only inside frame dimension */
 1291|  5.66M|                    i4_x_ref = i4_x_ref - i4_x_offset;
 1292|  5.66M|                }
 1293|       |
 1294|  12.9M|                if(0 <= i4_y_offset)
  ------------------
  |  Branch (1294:20): [True: 8.36M, False: 4.54M]
  ------------------
 1295|  8.36M|                {
 1296|       |                    /* if only inside frame dimension */
 1297|  8.36M|                    i4_y_ref = i4_y_ref - i4_y_offset;
 1298|  8.36M|                }
 1299|       |                /* derive the reference data pointers */
 1300|       |
 1301|  12.9M|                pi2_ref_data_byte = pi2_inp_data + (i4_x_ref << i4_chroma_flag);
 1302|  12.9M|                pi2_ref_data_byte += i4_y_ref * i4_inp_data_stride;
 1303|       |
 1304|       |                /* store the residual value */
 1305|  12.9M|                *pi2_ref_array_temp = (WORD16) (*pi2_ref_data_byte);
 1306|  12.9M|            }
 1307|   370k|            else
 1308|   370k|            {
 1309|       |                /* if non inter MB then store the 0 */
 1310|   370k|                *pi2_ref_array_temp = 0;
 1311|   370k|            }
 1312|  13.2M|        }
 1313|  1.43M|    }
 1314|       |
 1315|       |    /*Quard 1*/
 1316|  1.68M|    for(i4_y = 0; i4_y < i4_mb_quard1_part_y; i4_y++)
  ------------------
  |  Branch (1316:19): [True: 1.43M, False: 245k]
  ------------------
 1317|  1.43M|    {
 1318|  3.10M|        for(i4_x = i4_mb_quard1_part_x; i4_x < i4_refarray_wd; i4_x++)
  ------------------
  |  Branch (1318:41): [True: 1.66M, False: 1.43M]
  ------------------
 1319|  1.66M|        {
 1320|  1.66M|            i4_y_ref = MAX(0, MIN(i4_ref_ht - 1, i4_y + i4_y_offset));
  ------------------
  |  |   60|  3.33M|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 89.5k, False: 1.57M]
  |  |  |  Branch (60:24): [True: 0, False: 1.66M]
  |  |  |  Branch (60:32): [True: 0, False: 1.57M]
  |  |  ------------------
  ------------------
 1321|  1.66M|            i4_x_ref = MAX(0, MIN(i4_ref_wd - 1, i4_x + i4_x_offset));
  ------------------
  |  |   60|  3.33M|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 0, False: 1.66M]
  |  |  |  Branch (60:24): [True: 563k, False: 1.10M]
  |  |  |  Branch (60:32): [True: 563k, False: 1.10M]
  |  |  ------------------
  ------------------
 1322|       |
 1323|       |            /****************************************************************/
 1324|       |            /* Reference layer Residual Buffer is maintained as 8-bit data  */
 1325|       |            /* Buffer and 1-bit sign bit packed buffer. Sign Byte is read   */
 1326|       |            /* and sign of the data sample is extracted depending upon bit  */
 1327|       |            /* postition.                                                   */
 1328|       |            /****************************************************************/
 1329|       |
 1330|       |            /* update the buffer pointers to appropriate locations */
 1331|  1.66M|            pi2_ref_array_temp = pi2_ref_array + i4_x;
 1332|  1.66M|            pi2_ref_array_temp += i4_y * i4_refarray_wd;
 1333|       |
 1334|       |            /* extract the residual value and fill the buffer */
 1335|  1.66M|            if(SVC_INTER_MB == i4_ref_mb_type_q1)
  ------------------
  |  |  114|  1.66M|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
  |  Branch (1335:16): [True: 1.56M, False: 96.7k]
  ------------------
 1336|  1.56M|            {
 1337|       |                /* input pointer will be pointing to (xoffset,yoffset) */
 1338|       |                /* So subtract the correction to reference location    */
 1339|  1.56M|                if(0 <= i4_x_offset)
  ------------------
  |  Branch (1339:20): [True: 1.56M, False: 0]
  ------------------
 1340|  1.56M|                {
 1341|       |                    /* if only inside frame dimension */
 1342|  1.56M|                    i4_x_ref = i4_x_ref - i4_x_offset;
 1343|  1.56M|                }
 1344|  1.56M|                if(0 <= i4_y_offset)
  ------------------
  |  Branch (1344:20): [True: 583k, False: 985k]
  ------------------
 1345|   583k|                {
 1346|       |                    /* if only inside frame dimension */
 1347|   583k|                    i4_y_ref = i4_y_ref - i4_y_offset;
 1348|   583k|                }
 1349|       |                /* derive the reference data pointers */
 1350|       |
 1351|  1.56M|                pi2_ref_data_byte = pi2_inp_data + (i4_x_ref << i4_chroma_flag);
 1352|  1.56M|                pi2_ref_data_byte += i4_y_ref * i4_inp_data_stride;
 1353|       |
 1354|       |                /* store the residual value */
 1355|  1.56M|                *pi2_ref_array_temp = (WORD16) (*pi2_ref_data_byte);
 1356|  1.56M|            }
 1357|  96.7k|            else
 1358|  96.7k|            {
 1359|       |                /* if non inter MB then store the 0 */
 1360|  96.7k|                *pi2_ref_array_temp = 0;
 1361|  96.7k|            }
 1362|  1.66M|        }
 1363|  1.43M|    }
 1364|       |
 1365|       |    /*Quard 2*/
 1366|   995k|    for(i4_y = i4_mb_quard1_part_y; i4_y < i4_refarray_ht; i4_y++)
  ------------------
  |  Branch (1366:37): [True: 749k, False: 245k]
  ------------------
 1367|   749k|    {
 1368|  8.32M|        for(i4_x = 0; i4_x < i4_mb_quard1_part_x; i4_x++)
  ------------------
  |  Branch (1368:23): [True: 7.57M, False: 749k]
  ------------------
 1369|  7.57M|        {
 1370|  7.57M|            i4_y_ref = MAX(0, MIN(i4_ref_ht - 1, i4_y + i4_y_offset));
  ------------------
  |  |   60|  15.1M|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 0, False: 7.57M]
  |  |  |  Branch (60:24): [True: 136k, False: 7.43M]
  |  |  |  Branch (60:32): [True: 136k, False: 7.43M]
  |  |  ------------------
  ------------------
 1371|  7.57M|            i4_x_ref = MAX(0, MIN(i4_ref_wd - 1, i4_x + i4_x_offset));
  ------------------
  |  |   60|  15.1M|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 385k, False: 7.18M]
  |  |  |  Branch (60:24): [True: 0, False: 7.57M]
  |  |  |  Branch (60:32): [True: 0, False: 7.18M]
  |  |  ------------------
  ------------------
 1372|       |
 1373|       |            /****************************************************************/
 1374|       |            /* Reference layer Residual Buffer is maintained as 8-bit data  */
 1375|       |            /* Buffer and 1-bit sign bit packed buffer. Sign Byte is read   */
 1376|       |            /* and sign of the data sample is extracted depending upon bit  */
 1377|       |            /* postition.                                                   */
 1378|       |            /****************************************************************/
 1379|       |
 1380|       |            /* update the buffer pointers to appropriate locations */
 1381|  7.57M|            pi2_ref_array_temp = pi2_ref_array + i4_x;
 1382|  7.57M|            pi2_ref_array_temp += i4_y * i4_refarray_wd;
 1383|       |
 1384|       |            /* extract the residual value and fill the buffer */
 1385|  7.57M|            if(SVC_INTER_MB == i4_ref_mb_type_q2)
  ------------------
  |  |  114|  7.57M|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
  |  Branch (1385:16): [True: 7.28M, False: 283k]
  ------------------
 1386|  7.28M|            {
 1387|       |                /* input pointer will be pointing to (xoffset,yoffset) */
 1388|       |                /* So subtract the correction to reference location    */
 1389|  7.28M|                if(0 <= i4_x_offset)
  ------------------
  |  Branch (1389:20): [True: 2.86M, False: 4.42M]
  ------------------
 1390|  2.86M|                {
 1391|       |                    /* if only inside frame dimension */
 1392|  2.86M|                    i4_x_ref = i4_x_ref - i4_x_offset;
 1393|  2.86M|                }
 1394|  7.28M|                if(0 <= i4_y_offset)
  ------------------
  |  Branch (1394:20): [True: 7.28M, False: 0]
  ------------------
 1395|  7.28M|                {
 1396|       |                    /* if only inside frame dimension */
 1397|  7.28M|                    i4_y_ref = i4_y_ref - i4_y_offset;
 1398|  7.28M|                }
 1399|       |                /* derive the reference data pointers */
 1400|  7.28M|                pi2_ref_data_byte = pi2_inp_data + (i4_x_ref << i4_chroma_flag);
 1401|  7.28M|                pi2_ref_data_byte += i4_y_ref * i4_inp_data_stride;
 1402|       |
 1403|       |                /* store the residual value */
 1404|  7.28M|                *pi2_ref_array_temp = (WORD16) (*pi2_ref_data_byte);
 1405|  7.28M|            }
 1406|   283k|            else
 1407|   283k|            {
 1408|       |                /* if non inter MB then store the 0 */
 1409|   283k|                *pi2_ref_array_temp = 0;
 1410|   283k|            }
 1411|  7.57M|        }
 1412|   749k|    }
 1413|       |
 1414|       |    /*Quard 3*/
 1415|   995k|    for(i4_y = i4_mb_quard1_part_y; i4_y < i4_refarray_ht; i4_y++)
  ------------------
  |  Branch (1415:37): [True: 749k, False: 245k]
  ------------------
 1416|   749k|    {
 1417|  1.13M|        for(i4_x = i4_mb_quard1_part_x; i4_x < i4_refarray_wd; i4_x++)
  ------------------
  |  Branch (1417:41): [True: 384k, False: 749k]
  ------------------
 1418|   384k|        {
 1419|   384k|            i4_y_ref = MAX(0, MIN(i4_ref_ht - 1, i4_y + i4_y_offset));
  ------------------
  |  |   60|   769k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 0, False: 384k]
  |  |  |  Branch (60:24): [True: 31.7k, False: 353k]
  |  |  |  Branch (60:32): [True: 31.7k, False: 353k]
  |  |  ------------------
  ------------------
 1420|   384k|            i4_x_ref = MAX(0, MIN(i4_ref_wd - 1, i4_x + i4_x_offset));
  ------------------
  |  |   60|   769k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 0, False: 384k]
  |  |  |  Branch (60:24): [True: 357k, False: 27.8k]
  |  |  |  Branch (60:32): [True: 357k, False: 27.8k]
  |  |  ------------------
  ------------------
 1421|       |
 1422|       |            /****************************************************************/
 1423|       |            /* Reference layer Residual Buffer is maintained as 8-bit data  */
 1424|       |            /* Buffer and 1-bit sign bit packed buffer. Sign Byte is read   */
 1425|       |            /* and sign of the data sample is extracted depending upon bit  */
 1426|       |            /* postition.                                                   */
 1427|       |            /****************************************************************/
 1428|       |
 1429|       |            /* update the buffer pointers to appropriate locations */
 1430|   384k|            pi2_ref_array_temp = pi2_ref_array + i4_x;
 1431|   384k|            pi2_ref_array_temp += i4_y * i4_refarray_wd;
 1432|       |
 1433|       |            /* extract the residual value and fill the buffer */
 1434|   384k|            if(SVC_INTER_MB == i4_ref_mb_type_q3)
  ------------------
  |  |  114|   384k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
  |  Branch (1434:16): [True: 384k, False: 0]
  ------------------
 1435|   384k|            {
 1436|       |                /* input pointer will be pointing to (xoffset,yoffset) */
 1437|       |                /* So subtract the correction to reference location    */
 1438|   384k|                if(0 <= i4_x_offset)
  ------------------
  |  Branch (1438:20): [True: 384k, False: 0]
  ------------------
 1439|   384k|                {
 1440|       |                    /* if only inside frame dimension */
 1441|   384k|                    i4_x_ref = i4_x_ref - i4_x_offset;
 1442|   384k|                }
 1443|   384k|                if(0 <= i4_y_offset)
  ------------------
  |  Branch (1443:20): [True: 384k, False: 0]
  ------------------
 1444|   384k|                {
 1445|       |                    /* if only inside frame dimension */
 1446|   384k|                    i4_y_ref = i4_y_ref - i4_y_offset;
 1447|   384k|                }
 1448|       |                /* derive the reference data pointers */
 1449|   384k|                pi2_ref_data_byte = pi2_inp_data + (i4_x_ref << i4_chroma_flag);
 1450|   384k|                pi2_ref_data_byte += i4_y_ref * i4_inp_data_stride;
 1451|       |
 1452|       |                /* store the residual value */
 1453|   384k|                *pi2_ref_array_temp = (WORD16) (*pi2_ref_data_byte);
 1454|   384k|            }
 1455|      0|            else
 1456|      0|            {
 1457|       |                /* if non inter MB then store the 0 */
 1458|      0|                *pi2_ref_array_temp = 0;
 1459|      0|            }
 1460|   384k|        }
 1461|   749k|    }
 1462|   245k|}
isvcd_residual_reflayer_const:
 1502|   337k|{
 1503|   337k|    residual_sampling_ctxt_t *ps_ctxt;
 1504|   337k|    res_lyr_ctxt *ps_lyr_ctxt;
 1505|   337k|    WORD8 *pi1_ref_mb_modes;
 1506|   337k|    WORD32 i4_ref_mode_stride;
 1507|   337k|    WORD32 i4_element_size;
 1508|   337k|    WORD32 i4_ref_wd;
 1509|   337k|    WORD32 i4_ref_ht;
 1510|   337k|    WORD32 i4_x_offset;
 1511|   337k|    WORD32 i4_y_offset;
 1512|   337k|    WORD32 i4_refarray_wd;
 1513|   337k|    WORD32 i4_refarray_ht;
 1514|   337k|    WORD8 i1_edge_mb;
 1515|   337k|    WORD16 *pi2_ref_array;
 1516|   337k|    WORD32 i4_mb_sft;
 1517|   337k|    WORD32 i4_mb_x, i4_mb_y;
 1518|   337k|    WORD32 i4_mb_x_strt, i4_mb_y_strt;
 1519|   337k|    WORD32 i4_mb_quard1_part_x, i4_mb_quard1_part_y;
 1520|   337k|    WORD8 *pi1_ref_mb_modes_incr;
 1521|   337k|    WORD8 *pi1_ref_mb_modes_incr_temp;
 1522|   337k|    inter_lyr_mb_prms_t *ps_inter_lyr_mb_prms;
 1523|       |
 1524|   337k|    WORD32 i4_mb_wd;
 1525|   337k|    WORD32 i4_mb_ht;
 1526|   337k|    WORD32 i4_x_ref, i4_y_ref;
 1527|   337k|    WORD32 i4_ref_mb_type_q0, i4_ref_mb_type_q1, i4_ref_mb_type_q2, i4_ref_mb_type_q3;
 1528|   337k|    WORD8 i1_mb_mode_q0, i1_mb_mode_q1, i1_mb_mode_q2, i1_mb_mode_q3;
 1529|   337k|    WORD32 ret;
 1530|       |
 1531|   337k|    ps_ctxt = (residual_sampling_ctxt_t *) pv_residual_samp_ctxt;
 1532|   337k|    ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id];
 1533|   337k|    pi2_ref_array = ps_ctxt->pi2_refarray_buffer;
 1534|       |
 1535|   337k|    pi1_ref_mb_modes = (WORD8 *) ps_ref_mb_mode->pv_buffer;
 1536|   337k|    i4_ref_mode_stride = ps_ref_mb_mode->i4_num_element_stride;
 1537|   337k|    i4_element_size = ps_ref_mb_mode->i4_element_size;
 1538|       |
 1539|   337k|    if(NULL == pi1_ref_mb_modes)
  ------------------
  |  Branch (1539:8): [True: 0, False: 337k]
  ------------------
 1540|      0|    {
 1541|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 1542|      0|    }
 1543|       |
 1544|   337k|    i4_mb_wd = MB_WIDTH >> i4_chroma_flag;
  ------------------
  |  |   67|   337k|#define MB_WIDTH 16
  ------------------
 1545|   337k|    i4_mb_ht = MB_HEIGHT >> i4_chroma_flag;
  ------------------
  |  |   68|   337k|#define MB_HEIGHT 16
  ------------------
 1546|       |
 1547|       |    /* ----------------------------------------------------------------- */
 1548|       |    /* Deriving the parameters required for further processing           */
 1549|       |    /* ----------------------------------------------------------------- */
 1550|   337k|    {
 1551|   337k|        ref_mb_map_t *ps_x_off_len;
 1552|   337k|        ref_mb_map_t *ps_y_off_len;
 1553|   337k|        WORD32 i4_mbaddr_x;
 1554|   337k|        WORD32 i4_mbaddr_y;
 1555|   337k|        WORD32 i4_base_width;
 1556|   337k|        WORD32 i4_base_height;
 1557|   337k|        residual_samp_map_ctxt_t *ps_map_ctxt;
 1558|       |
 1559|   337k|        if(1 == i4_chroma_flag)
  ------------------
  |  Branch (1559:12): [True: 224k, False: 112k]
  ------------------
 1560|   224k|            ps_map_ctxt = &ps_lyr_ctxt->s_chroma_map_ctxt;
 1561|   112k|        else
 1562|   112k|            ps_map_ctxt = &ps_lyr_ctxt->s_luma_map_ctxt;
 1563|       |
 1564|   337k|        i4_mbaddr_y = ps_coord->u2_mb_y;
 1565|   337k|        i4_mbaddr_x = ps_coord->u2_mb_x;
 1566|   337k|        i4_base_width = ps_lyr_ctxt->i4_ref_width;
 1567|   337k|        i4_base_height = ps_lyr_ctxt->i4_ref_height;
 1568|   337k|        i4_ref_wd = i4_base_width >> i4_chroma_flag;
 1569|   337k|        i4_ref_ht = i4_base_height >> i4_chroma_flag;
 1570|       |
 1571|       |        /* --------------------------------------------------------------------- */
 1572|       |        /* Extracting information from the mapping context                       */
 1573|       |        /* --------------------------------------------------------------------- */
 1574|   337k|        ps_x_off_len = ps_map_ctxt->ps_x_offset_length;
 1575|   337k|        ps_y_off_len = ps_map_ctxt->ps_y_offset_length;
 1576|   337k|        i4_x_offset = ps_x_off_len[i4_mbaddr_x].i2_offset;
 1577|   337k|        i4_y_offset = ps_y_off_len[i4_mbaddr_y].i2_offset;
 1578|   337k|        i4_refarray_wd = ps_x_off_len[i4_mbaddr_x].i2_length;
 1579|   337k|        i4_refarray_ht = ps_y_off_len[i4_mbaddr_y].i2_length;
 1580|   337k|    }
 1581|       |
 1582|       |    /* Call the module to fill the increments based on transform blocks */
 1583|   337k|    ret = isvcd_ref_layer_ptr_incr(pi1_ref_mb_modes, i4_ref_mode_stride, i4_element_size,
 1584|   337k|                                   i4_x_offset, i4_y_offset, i4_refarray_wd, i4_refarray_ht,
 1585|   337k|                                   ps_ctxt->pu1_ref_x_ptr_incr, ps_ctxt->pu1_ref_y_ptr_incr,
 1586|   337k|                                   i4_chroma_flag);
 1587|       |
 1588|   337k|    if(ret != OK)
  ------------------
  |  |  114|   337k|#define OK        0
  ------------------
  |  Branch (1588:8): [True: 0, False: 337k]
  ------------------
 1589|      0|    {
 1590|      0|        return ret;
 1591|      0|    }
 1592|   337k|    i4_mb_sft = (MB_WIDTH_SHIFT - i4_chroma_flag);
  ------------------
  |  |   70|   337k|#define MB_WIDTH_SHIFT 4
  ------------------
 1593|       |
 1594|       |    /* --------------------------------------------------------------------- */
 1595|       |    /* MB Level Resampling for the MB - Pointers sent for MB in both layers  */
 1596|       |    /* This has been written according to the dyadic case                    */
 1597|       |    /* --------------------------------------------------------------------- */
 1598|   337k|    i4_y_ref = MAX(0, MIN(i4_ref_ht - 1, 0 + i4_y_offset));
  ------------------
  |  |   60|   674k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 52.4k, False: 284k]
  |  |  |  Branch (60:24): [True: 0, False: 337k]
  |  |  |  Branch (60:32): [True: 0, False: 284k]
  |  |  ------------------
  ------------------
 1599|   337k|    i4_x_ref = MAX(0, MIN(i4_ref_wd - 1, 0 + i4_x_offset));
  ------------------
  |  |   60|   674k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 115k, False: 221k]
  |  |  |  Branch (60:24): [True: 0, False: 337k]
  |  |  |  Branch (60:32): [True: 0, False: 221k]
  |  |  ------------------
  ------------------
 1600|   337k|    i4_mb_x_strt = i4_x_ref % i4_mb_wd;
 1601|   337k|    i4_mb_y_strt = i4_y_ref % i4_mb_ht;
 1602|       |
 1603|   337k|    i4_mb_quard1_part_x = i4_mb_wd - i4_mb_x_strt;
 1604|   337k|    i4_mb_quard1_part_y = i4_mb_ht - i4_mb_y_strt;
 1605|   337k|    if(!(i4_mb_quard1_part_x >= 0))
  ------------------
  |  Branch (1605:8): [True: 0, False: 337k]
  ------------------
 1606|      0|    {
 1607|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 1608|      0|    }
 1609|   337k|    if(!(i4_mb_quard1_part_y >= 0))
  ------------------
  |  Branch (1609:8): [True: 0, False: 337k]
  ------------------
 1610|      0|    {
 1611|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 1612|      0|    }
 1613|       |
 1614|   337k|    i4_mb_x = (i4_x_ref >> i4_mb_sft);
 1615|   337k|    i4_mb_y = (i4_y_ref >> i4_mb_sft);
 1616|       |
 1617|       |    /* get the location of the byte which has the current mb mode */
 1618|   337k|    pi1_ref_mb_modes_incr = pi1_ref_mb_modes + (i4_mb_y * i4_ref_mode_stride * i4_element_size);
 1619|   337k|    pi1_ref_mb_modes_incr += (i4_mb_x * i4_element_size);
 1620|   337k|    ps_inter_lyr_mb_prms = (inter_lyr_mb_prms_t *) pi1_ref_mb_modes_incr;
 1621|   337k|    i1_mb_mode_q0 = ps_inter_lyr_mb_prms->i1_mb_mode;
 1622|   337k|    i1_mb_mode_q1 = i1_mb_mode_q0;
 1623|   337k|    i1_mb_mode_q2 = i1_mb_mode_q0;
 1624|   337k|    i1_mb_mode_q3 = i1_mb_mode_q0;
 1625|       |
 1626|   337k|    pi1_ref_mb_modes_incr_temp = pi1_ref_mb_modes_incr;
 1627|   337k|    if(i4_mb_quard1_part_x > 0)
  ------------------
  |  Branch (1627:8): [True: 337k, False: 0]
  ------------------
 1628|   337k|    {
 1629|   337k|        pi1_ref_mb_modes_incr_temp = pi1_ref_mb_modes_incr + i4_element_size;
 1630|   337k|        ps_inter_lyr_mb_prms = (inter_lyr_mb_prms_t *) pi1_ref_mb_modes_incr_temp;
 1631|   337k|        i1_mb_mode_q1 = ps_inter_lyr_mb_prms->i1_mb_mode;
 1632|   337k|    }
 1633|       |
 1634|   337k|    if(i4_mb_quard1_part_y > 0)
  ------------------
  |  Branch (1634:8): [True: 337k, False: 0]
  ------------------
 1635|   337k|    {
 1636|   337k|        pi1_ref_mb_modes_incr_temp = pi1_ref_mb_modes_incr + (i4_ref_mode_stride * i4_element_size);
 1637|   337k|        ps_inter_lyr_mb_prms = (inter_lyr_mb_prms_t *) pi1_ref_mb_modes_incr_temp;
 1638|   337k|        i1_mb_mode_q2 = ps_inter_lyr_mb_prms->i1_mb_mode;
 1639|   337k|    }
 1640|       |
 1641|   337k|    if((i4_mb_quard1_part_x > 0) && (i4_mb_quard1_part_y > 0))
  ------------------
  |  Branch (1641:8): [True: 337k, False: 0]
  |  Branch (1641:37): [True: 337k, False: 0]
  ------------------
 1642|   337k|    {
 1643|   337k|        pi1_ref_mb_modes_incr_temp =
 1644|   337k|            pi1_ref_mb_modes_incr + (i4_ref_mode_stride * i4_element_size) + i4_element_size;
 1645|   337k|        ps_inter_lyr_mb_prms = (inter_lyr_mb_prms_t *) pi1_ref_mb_modes_incr_temp;
 1646|   337k|        i1_mb_mode_q3 = ps_inter_lyr_mb_prms->i1_mb_mode;
 1647|   337k|    }
 1648|       |
 1649|   337k|    i4_ref_mb_type_q0 = (i1_mb_mode_q0 <= SVC_INTER_MB) ? SVC_INTER_MB : SVC_INTRA_MB;
  ------------------
  |  |  114|   337k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
                  i4_ref_mb_type_q0 = (i1_mb_mode_q0 <= SVC_INTER_MB) ? SVC_INTER_MB : SVC_INTRA_MB;
  ------------------
  |  |  114|   319k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
                  i4_ref_mb_type_q0 = (i1_mb_mode_q0 <= SVC_INTER_MB) ? SVC_INTER_MB : SVC_INTRA_MB;
  ------------------
  |  |  115|   355k|#define SVC_INTRA_MB (1 << 1)       /*!< P or B MBs decoded or inferred*/
  ------------------
  |  Branch (1649:25): [True: 319k, False: 17.8k]
  ------------------
 1650|   337k|    i4_ref_mb_type_q1 = (i1_mb_mode_q1 <= SVC_INTER_MB) ? SVC_INTER_MB : SVC_INTRA_MB;
  ------------------
  |  |  114|   337k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
                  i4_ref_mb_type_q1 = (i1_mb_mode_q1 <= SVC_INTER_MB) ? SVC_INTER_MB : SVC_INTRA_MB;
  ------------------
  |  |  114|   316k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
                  i4_ref_mb_type_q1 = (i1_mb_mode_q1 <= SVC_INTER_MB) ? SVC_INTER_MB : SVC_INTRA_MB;
  ------------------
  |  |  115|   358k|#define SVC_INTRA_MB (1 << 1)       /*!< P or B MBs decoded or inferred*/
  ------------------
  |  Branch (1650:25): [True: 316k, False: 21.1k]
  ------------------
 1651|   337k|    i4_ref_mb_type_q2 = (i1_mb_mode_q2 <= SVC_INTER_MB) ? SVC_INTER_MB : SVC_INTRA_MB;
  ------------------
  |  |  114|   337k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
                  i4_ref_mb_type_q2 = (i1_mb_mode_q2 <= SVC_INTER_MB) ? SVC_INTER_MB : SVC_INTRA_MB;
  ------------------
  |  |  114|   317k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
                  i4_ref_mb_type_q2 = (i1_mb_mode_q2 <= SVC_INTER_MB) ? SVC_INTER_MB : SVC_INTRA_MB;
  ------------------
  |  |  115|   357k|#define SVC_INTRA_MB (1 << 1)       /*!< P or B MBs decoded or inferred*/
  ------------------
  |  Branch (1651:25): [True: 317k, False: 20.0k]
  ------------------
 1652|   337k|    i4_ref_mb_type_q3 = (i1_mb_mode_q3 <= SVC_INTER_MB) ? SVC_INTER_MB : SVC_INTRA_MB;
  ------------------
  |  |  114|   337k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
                  i4_ref_mb_type_q3 = (i1_mb_mode_q3 <= SVC_INTER_MB) ? SVC_INTER_MB : SVC_INTRA_MB;
  ------------------
  |  |  114|   319k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
                  i4_ref_mb_type_q3 = (i1_mb_mode_q3 <= SVC_INTER_MB) ? SVC_INTER_MB : SVC_INTRA_MB;
  ------------------
  |  |  115|   355k|#define SVC_INTRA_MB (1 << 1)       /*!< P or B MBs decoded or inferred*/
  ------------------
  |  Branch (1652:25): [True: 319k, False: 17.7k]
  ------------------
 1653|       |
 1654|   337k|    i1_edge_mb = (ps_coord->u2_mb_x == 0 || ps_coord->u2_mb_y == 0 ||
  ------------------
  |  Branch (1654:19): [True: 115k, False: 221k]
  |  Branch (1654:45): [True: 35.4k, False: 186k]
  ------------------
 1655|   186k|                  (ps_coord->u2_mb_x == ((ps_lyr_ctxt->i4_curr_width >> MB_WIDTH_SHIFT) - 1)) ||
  ------------------
  |  |   70|   186k|#define MB_WIDTH_SHIFT 4
  ------------------
  |  Branch (1655:19): [True: 87.5k, False: 98.5k]
  ------------------
 1656|  98.5k|                  (ps_coord->u2_mb_y == ((ps_lyr_ctxt->i4_curr_height >> MB_HEIGHT_SHIFT) - 1)));
  ------------------
  |  |   71|  98.5k|#define MB_HEIGHT_SHIFT 4
  ------------------
  |  Branch (1656:19): [True: 6.96k, False: 91.6k]
  ------------------
 1657|   337k|    if(i1_edge_mb)
  ------------------
  |  Branch (1657:8): [True: 245k, False: 91.6k]
  ------------------
 1658|   245k|    {
 1659|   245k|        ps_ctxt->pf_residual_reflayer_const_boundary_mb(
 1660|   245k|            pi2_inp_data, i4_inp_data_stride, pi2_ref_array, i4_refarray_wd, i4_refarray_ht,
 1661|   245k|            i4_ref_wd, i4_ref_ht, i4_x_offset, i4_y_offset, i4_ref_mb_type_q0, i4_ref_mb_type_q1,
 1662|   245k|            i4_ref_mb_type_q2, i4_ref_mb_type_q3, i4_mb_quard1_part_x, i4_mb_quard1_part_y,
 1663|   245k|            i4_chroma_flag);
 1664|   245k|    }
 1665|  91.6k|    else
 1666|  91.6k|    {
 1667|  91.6k|        ps_ctxt->pf_residual_reflayer_const_non_boundary_mb(
 1668|  91.6k|            pi2_inp_data, i4_inp_data_stride, pi2_ref_array, i4_refarray_wd, i4_refarray_ht,
 1669|  91.6k|            i4_ref_mb_type_q0, i4_ref_mb_type_q1, i4_ref_mb_type_q2, i4_ref_mb_type_q3,
 1670|  91.6k|            i4_mb_quard1_part_x, i4_mb_quard1_part_y, i4_chroma_flag);
 1671|  91.6k|    }
 1672|       |    /* store the values into the place holders */
 1673|   337k|    *pi4_refarr_wd = i4_refarray_wd;
 1674|       |
 1675|   337k|    return OK;
  ------------------
  |  |  114|   337k|#define OK        0
  ------------------
 1676|   337k|}
isvcd_interpolate_residual:
 1709|   192k|{
 1710|   192k|    residual_sampling_ctxt_t *ps_ctxt;
 1711|   192k|    residual_samp_map_ctxt_t *ps_map_ctxt;
 1712|   192k|    res_lyr_ctxt *ps_lyr_ctxt;
 1713|   192k|    ref_pixel_map_t *ps_x_pos_phase;
 1714|   192k|    ref_pixel_map_t *ps_y_pos_phase;
 1715|       |
 1716|   192k|    WORD32 i4_x, i4_y;
 1717|   192k|    WORD32 i4_frm_mb_x, i4_frm_mb_y;
 1718|   192k|    WORD32 i4_temp_array_ht;
 1719|   192k|    WORD32 i4_mb_wd;
 1720|   192k|    WORD32 i4_mb_ht;
 1721|   192k|    WORD16 *pi2_ref_array;
 1722|   192k|    UWORD8 *pu1_ref_x_ptr_incr, *pu1_ref_y_ptr_incr;
 1723|       |
 1724|   192k|    ps_ctxt = (residual_sampling_ctxt_t *) pv_residual_samp_ctxt;
 1725|   192k|    ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id];
 1726|   192k|    pi2_ref_array = ps_ctxt->pi2_refarray_buffer;
 1727|   192k|    pu1_ref_x_ptr_incr = ps_ctxt->pu1_ref_x_ptr_incr;
 1728|   192k|    pu1_ref_y_ptr_incr = ps_ctxt->pu1_ref_y_ptr_incr;
 1729|       |
 1730|       |    /* --------------------------------------------------------------------- */
 1731|       |    /* Extracting information from the mapping context                       */
 1732|       |    /* --------------------------------------------------------------------- */
 1733|   192k|    if(1 == i4_chroma_flag)
  ------------------
  |  Branch (1733:8): [True: 128k, False: 64.0k]
  ------------------
 1734|   128k|        ps_map_ctxt = &ps_lyr_ctxt->s_chroma_map_ctxt;
 1735|  64.0k|    else
 1736|  64.0k|        ps_map_ctxt = &ps_lyr_ctxt->s_luma_map_ctxt;
 1737|       |
 1738|   192k|    i4_mb_wd = MB_WIDTH >> i4_chroma_flag;
  ------------------
  |  |   67|   192k|#define MB_WIDTH 16
  ------------------
 1739|   192k|    i4_mb_ht = MB_HEIGHT >> i4_chroma_flag;
  ------------------
  |  |   68|   192k|#define MB_HEIGHT 16
  ------------------
 1740|       |
 1741|   192k|    ps_x_pos_phase = ps_map_ctxt->ps_x_pos_phase;
 1742|   192k|    ps_y_pos_phase = ps_map_ctxt->ps_y_pos_phase;
 1743|   192k|    i4_temp_array_ht = i4_mb_ht;
 1744|   192k|    i4_frm_mb_y = u2_mb_y * i4_mb_ht;
 1745|   192k|    i4_frm_mb_x = u2_mb_x * i4_mb_wd;
 1746|       |
 1747|       |    /* --------------------------------------------------------------------- */
 1748|       |    /* Loop for interpolation                                                */
 1749|       |    /* --------------------------------------------------------------------- */
 1750|  2.24M|    for(i4_y = 0; i4_y < (i4_temp_array_ht); i4_y++)
  ------------------
  |  Branch (1750:19): [True: 2.04M, False: 192k]
  ------------------
 1751|  2.04M|    {
 1752|  26.6M|        for(i4_x = 0; i4_x < (i4_mb_wd); i4_x++)
  ------------------
  |  Branch (1752:23): [True: 24.5M, False: 2.04M]
  ------------------
 1753|  24.5M|        {
 1754|  24.5M|            WORD32 i4_i;
 1755|  24.5M|            WORD32 i4_y_ref;
 1756|  24.5M|            WORD32 i4_y_phase;
 1757|  24.5M|            WORD32 i4_x_ref;
 1758|  24.5M|            WORD32 i4_x_phase;
 1759|  24.5M|            WORD32 i4_x_ref_round;
 1760|  24.5M|            WORD16 *pi2_out_curr;
 1761|  24.5M|            WORD32 ai4_temp_pred[2] = {0};
 1762|  24.5M|            UWORD8 *pu1_ref_y_ptr_incr_temp;
 1763|  24.5M|            WORD32 *pi4_temp_pred;
 1764|  24.5M|            UWORD8 u1_incr_y;
 1765|  24.5M|            WORD16 i2_res;
 1766|       |
 1767|       |            /* derive the current output pointer */
 1768|  24.5M|            pi2_out_curr = pi2_out + (i4_x << i4_chroma_flag) + (i4_y * i4_out_stride);
 1769|       |
 1770|       |            /* -------------------------------------------------------------- */
 1771|       |            /* Finding the offset                                             */
 1772|       |            /* -------------------------------------------------------------- */
 1773|  24.5M|            i4_y_ref = ps_y_pos_phase[i4_y + i4_frm_mb_y].i2_ref_pos;
 1774|  24.5M|            i4_y_phase = ps_y_pos_phase[i4_y + i4_frm_mb_y].i2_phase;
 1775|  24.5M|            i4_x_ref = ps_x_pos_phase[i4_x + i4_frm_mb_x].i2_ref_pos;
 1776|  24.5M|            i4_x_phase = ps_x_pos_phase[i4_x + i4_frm_mb_x].i2_phase;
 1777|       |
 1778|       |            /* horizontal processing*/
 1779|  73.7M|            for(i4_i = 0; i4_i < 2; i4_i++)
  ------------------
  |  Branch (1779:27): [True: 49.1M, False: 24.5M]
  ------------------
 1780|  49.1M|            {
 1781|  49.1M|                UWORD8 *pu1_ref_x_ptr_incr_temp;
 1782|  49.1M|                UWORD8 u1_incr;
 1783|  49.1M|                WORD16 *pi2_ref_array_1, *pi2_ref_array_2;
 1784|       |
 1785|       |                /* derive appropriate pointers */
 1786|  49.1M|                pu1_ref_x_ptr_incr_temp = pu1_ref_x_ptr_incr + i4_x_ref;
 1787|  49.1M|                pu1_ref_x_ptr_incr_temp += ((i4_y_ref + i4_i) * i4_refarray_wd);
 1788|  49.1M|                u1_incr = *pu1_ref_x_ptr_incr_temp;
 1789|  49.1M|                pi2_ref_array_1 = pi2_ref_array + i4_x_ref;
 1790|  49.1M|                pi2_ref_array_1 += ((i4_y_ref + i4_i) * i4_refarray_wd);
 1791|       |
 1792|  49.1M|                if(!u1_incr)
  ------------------
  |  Branch (1792:20): [True: 15.9M, False: 33.1M]
  ------------------
 1793|  15.9M|                {
 1794|  15.9M|                    pi2_ref_array_1 += (i4_x_phase >> 3);
 1795|  15.9M|                }
 1796|       |
 1797|  49.1M|                pi2_ref_array_2 = pi2_ref_array_1 + u1_incr;
 1798|  49.1M|                ai4_temp_pred[i4_i] =
 1799|  49.1M|                    (16 - i4_x_phase) * (*pi2_ref_array_1) + i4_x_phase * (*pi2_ref_array_2);
 1800|  49.1M|            }
 1801|       |
 1802|       |            /* vertical processing */
 1803|  24.5M|            i4_x_ref_round = (i4_x_ref + (i4_x_phase >> 3));
 1804|  24.5M|            pu1_ref_y_ptr_incr_temp =
 1805|  24.5M|                pu1_ref_y_ptr_incr + i4_x_ref_round + (i4_y_ref * i4_refarray_wd);
 1806|  24.5M|            u1_incr_y = *pu1_ref_y_ptr_incr_temp;
 1807|       |
 1808|  24.5M|            pi4_temp_pred = &ai4_temp_pred[0];
 1809|  24.5M|            if(!u1_incr_y)
  ------------------
  |  Branch (1809:16): [True: 7.99M, False: 16.5M]
  ------------------
 1810|  7.99M|            {
 1811|  7.99M|                pi4_temp_pred += (i4_y_phase >> 3);
 1812|  7.99M|            }
 1813|       |
 1814|  24.5M|            i2_res = (((16 - i4_y_phase) * pi4_temp_pred[0] +
 1815|  24.5M|                       i4_y_phase * pi4_temp_pred[u1_incr_y] + 128) >>
 1816|  24.5M|                      8);
 1817|       |
 1818|       |            /* store back the final residual */
 1819|  24.5M|            *pi2_out_curr = i2_res;
 1820|  24.5M|        } /* end of loop over width */
 1821|  2.04M|    }     /* end of loop over height */
 1822|       |
 1823|   192k|    return;
 1824|   192k|} /* End of Interpolation Function */
isvcd_residual_samp_mb:
 1859|   112k|{
 1860|       |    /* --------------------------------------------------------------------- */
 1861|       |    /* I/O buffer params                                                     */
 1862|       |    /* --------------------------------------------------------------------- */
 1863|   112k|    residual_sampling_ctxt_t *ps_ctxt = (residual_sampling_ctxt_t *) pv_residual_samp_ctxt;
 1864|   112k|    WORD16 *pi2_inp;
 1865|   112k|    WORD16 *pi2_out;
 1866|   112k|    WORD32 i4_inp_stride;
 1867|   112k|    WORD32 i4_out_stride;
 1868|   112k|    WORD32 i4_refarray_wd;
 1869|   112k|    mb_coord_t s_mb_coord = {0};
 1870|   112k|    WORD32 ret;
 1871|   112k|    s_mb_coord.u2_mb_x = u2_mb_x;
 1872|   112k|    s_mb_coord.u2_mb_y = u2_mb_y;
 1873|       |
 1874|       |    /* --------------------------------------------------------------------- */
 1875|       |    /* LUMA PROCESSING                                                        */
 1876|       |    /* --------------------------------------------------------------------- */
 1877|   112k|    pi2_inp = (WORD16 *) ps_ref_luma->pv_buffer;
 1878|   112k|    pi2_out = (WORD16 *) ps_out_luma->pv_buffer;
 1879|   112k|    i4_inp_stride = ps_ref_luma->i4_num_element_stride;
 1880|   112k|    i4_out_stride = ps_out_luma->i4_num_element_stride;
 1881|       |
 1882|       |    /* ------- Constructing refSampleArray ----------------------- */
 1883|   112k|    ret = isvcd_residual_reflayer_const(pv_residual_samp_ctxt, pi2_inp, i4_inp_stride,
 1884|   112k|                                        ps_ref_mb_mode, &i4_refarray_wd, &s_mb_coord, 0);
 1885|       |
 1886|   112k|    if(ret != OK) return ret;
  ------------------
  |  |  114|   112k|#define OK        0
  ------------------
  |  Branch (1886:8): [True: 0, False: 112k]
  ------------------
 1887|       |    /* ---- Interpolation process for Residual prediction     ------ */
 1888|   112k|    ps_ctxt->pf_interpolate_residual(pv_residual_samp_ctxt, pi2_out, i4_out_stride, i4_refarray_wd,
 1889|   112k|                                     s_mb_coord.u2_mb_x, s_mb_coord.u2_mb_y, 0);
 1890|       |
 1891|       |    /* --------------------------------------------------------------------- */
 1892|       |    /* CHROMA PROCESSING                                                       */
 1893|       |    /* --------------------------------------------------------------------- */
 1894|       |    /* CB */
 1895|   112k|    pi2_inp = (WORD16 *) ps_ref_chroma->pv_buffer;
 1896|   112k|    pi2_out = (WORD16 *) ps_out_chroma->pv_buffer;
 1897|   112k|    i4_inp_stride = ps_ref_chroma->i4_num_element_stride;
 1898|   112k|    i4_out_stride = ps_out_chroma->i4_num_element_stride;
 1899|       |
 1900|       |    /* ------- Constructing refSampleArray ----------------------- */
 1901|   112k|    ret = isvcd_residual_reflayer_const(pv_residual_samp_ctxt, pi2_inp, i4_inp_stride,
 1902|   112k|                                        ps_ref_mb_mode, &i4_refarray_wd, &s_mb_coord, 1);
 1903|       |
 1904|   112k|    if(ret != OK) return ret;
  ------------------
  |  |  114|   112k|#define OK        0
  ------------------
  |  Branch (1904:8): [True: 0, False: 112k]
  ------------------
 1905|       |    /* ---- Interpolation process for Residual prediction     ------ */
 1906|   112k|    ps_ctxt->pf_interpolate_residual(pv_residual_samp_ctxt, pi2_out, i4_out_stride, i4_refarray_wd,
 1907|   112k|                                     s_mb_coord.u2_mb_x, s_mb_coord.u2_mb_y, 1);
 1908|       |
 1909|       |    /* CR */
 1910|   112k|    pi2_inp += 1;
 1911|   112k|    pi2_out += 1;
 1912|       |
 1913|       |    /* ------- Constructing refSampleArray ----------------------- */
 1914|   112k|    ret = isvcd_residual_reflayer_const(pv_residual_samp_ctxt, pi2_inp, i4_inp_stride,
 1915|   112k|                                        ps_ref_mb_mode, &i4_refarray_wd, &s_mb_coord, 1);
 1916|       |
 1917|   112k|    if(ret != OK) return ret;
  ------------------
  |  |  114|   112k|#define OK        0
  ------------------
  |  Branch (1917:8): [True: 0, False: 112k]
  ------------------
 1918|       |    /* ---- Interpolation process for Residual prediction --------- */
 1919|   112k|    ps_ctxt->pf_interpolate_residual(pv_residual_samp_ctxt, pi2_out, i4_out_stride, i4_refarray_wd,
 1920|   112k|                                     s_mb_coord.u2_mb_x, s_mb_coord.u2_mb_y, 1);
 1921|   112k|    return OK;
  ------------------
  |  |  114|   112k|#define OK        0
  ------------------
 1922|   112k|}
isvcd_residual_samp_mb_dyadic:
 1957|  44.6k|{
 1958|  44.6k|    residual_sampling_ctxt_t *ps_ctxt;
 1959|  44.6k|    res_lyr_ctxt *ps_lyr_ctxt;
 1960|       |    /* --------------------------------------------------------------------- */
 1961|       |    /* I/O buffer params                                                     */
 1962|       |    /* --------------------------------------------------------------------- */
 1963|  44.6k|    WORD16 *pi2_inp;
 1964|  44.6k|    WORD16 *pi2_out;
 1965|  44.6k|    WORD32 i4_inp_stride;
 1966|  44.6k|    WORD32 i4_out_stride;
 1967|  44.6k|    WORD32 i4_luma_nnz;
 1968|  44.6k|    WORD32 i4_chroma_nnz;
 1969|  44.6k|    WORD32 i4_tx_size;
 1970|       |
 1971|  44.6k|    ps_ctxt = (residual_sampling_ctxt_t *) pv_residual_samp_ctxt;
 1972|  44.6k|    ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id];
 1973|       |
 1974|       |    /* --------------------------------------------------------------------- */
 1975|       |    /* LUMA PROCESSING                                                        */
 1976|       |    /* --------------------------------------------------------------------- */
 1977|  44.6k|    pi2_inp = (WORD16 *) ps_ref_luma->pv_buffer;
 1978|  44.6k|    pi2_out = (WORD16 *) ps_out_luma->pv_buffer;
 1979|  44.6k|    i4_inp_stride = ps_ref_luma->i4_num_element_stride;
 1980|  44.6k|    i4_out_stride = ps_out_luma->i4_num_element_stride;
 1981|       |
 1982|  44.6k|    {
 1983|  44.6k|        WORD32 i4_offset_x, i4_offset_y;
 1984|  44.6k|        residual_samp_map_ctxt_t *ps_luma_map;
 1985|  44.6k|        ref_mb_map_t *ps_x_off_len_luma;
 1986|  44.6k|        ref_mb_map_t *ps_y_off_len_luma;
 1987|       |
 1988|  44.6k|        ps_luma_map = &ps_lyr_ctxt->s_luma_map_ctxt;
 1989|  44.6k|        ps_x_off_len_luma = ps_luma_map->ps_x_offset_length;
 1990|  44.6k|        ps_y_off_len_luma = ps_luma_map->ps_y_offset_length;
 1991|       |
 1992|       |        /* get the actual offset for the buffers */
 1993|  44.6k|        i4_offset_x = ps_x_off_len_luma[u2_mb_x].i2_offset;
 1994|  44.6k|        i4_offset_y = ps_y_off_len_luma[u2_mb_y].i2_offset;
 1995|       |
 1996|  44.6k|        {
 1997|  44.6k|            inter_lyr_mb_prms_t *ps_inter_lyr_mb_prms;
 1998|  44.6k|            WORD32 i4_mb_x, i4_mb_y;
 1999|  44.6k|            UWORD16 u2_luma_mask = 0x0033;
 2000|  44.6k|            UWORD8 u1_chrm_mask = 0x11;
 2001|  44.6k|            WORD32 i4_luma_rt_sft_amt = 0;
 2002|  44.6k|            WORD32 i4_chrm_rt_sft_amt = 0;
 2003|       |
 2004|  44.6k|            i4_mb_x = ((i4_offset_x + 1) >> MB_WIDTH_SHIFT);
  ------------------
  |  |   70|  44.6k|#define MB_WIDTH_SHIFT 4
  ------------------
 2005|  44.6k|            i4_mb_y = ((i4_offset_y + 1) >> MB_HEIGHT_SHIFT);
  ------------------
  |  |   71|  44.6k|#define MB_HEIGHT_SHIFT 4
  ------------------
 2006|       |
 2007|       |            /* get the location of the byte which has the current mb mode */
 2008|  44.6k|            ps_inter_lyr_mb_prms = (inter_lyr_mb_prms_t *) ps_ref_mb_mode->pv_buffer;
 2009|  44.6k|            ps_inter_lyr_mb_prms += i4_mb_x;
 2010|  44.6k|            ps_inter_lyr_mb_prms += i4_mb_y * ps_ref_mb_mode->i4_num_element_stride;
 2011|       |
 2012|       |            /* get the approp block in base layer in horz direction */
 2013|  44.6k|            if(0 != ((i4_offset_x + 1) & 15))
  ------------------
  |  Branch (2013:16): [True: 22.9k, False: 21.6k]
  ------------------
 2014|  22.9k|            {
 2015|  22.9k|                u2_luma_mask <<= 2;
 2016|  22.9k|                i4_luma_rt_sft_amt += 2;
 2017|  22.9k|                u1_chrm_mask <<= 1;
 2018|  22.9k|                i4_chrm_rt_sft_amt += 1;
 2019|  22.9k|            }
 2020|       |            /* get the approp block in base layer in vert direction */
 2021|  44.6k|            if(0 != ((i4_offset_y + 1) & 15))
  ------------------
  |  Branch (2021:16): [True: 21.4k, False: 23.1k]
  ------------------
 2022|  21.4k|            {
 2023|  21.4k|                u2_luma_mask <<= 8;
 2024|  21.4k|                i4_luma_rt_sft_amt += 8;
 2025|       |
 2026|  21.4k|                u1_chrm_mask <<= 2;
 2027|  21.4k|                i4_chrm_rt_sft_amt += 2;
 2028|  21.4k|            }
 2029|       |
 2030|       |            /* extract the nnz and store it */
 2031|  44.6k|            i4_luma_nnz = (ps_inter_lyr_mb_prms->u2_luma_nnz & u2_luma_mask) >> i4_luma_rt_sft_amt;
 2032|  44.6k|            i4_chroma_nnz =
 2033|  44.6k|                (ps_inter_lyr_mb_prms->u1_chroma_nnz & u1_chrm_mask) >> i4_chrm_rt_sft_amt;
 2034|  44.6k|            i4_tx_size =
 2035|  44.6k|                (ps_inter_lyr_mb_prms->i1_tx_size < 0) ? 1 : ps_inter_lyr_mb_prms->i1_tx_size;
  ------------------
  |  Branch (2035:17): [True: 0, False: 44.6k]
  ------------------
 2036|  44.6k|        }
 2037|       |
 2038|       |        /* since in dyadic case the window width and height will be 10x10   */
 2039|       |        /* and the window start offsets will be always 1 column left and    */
 2040|       |        /* 1 row above the block boundary. so the pointer and the required  */
 2041|       |        /* positions are appropriately modified                             */
 2042|  44.6k|        if(i4_offset_x >= 0)
  ------------------
  |  Branch (2042:12): [True: 34.7k, False: 9.87k]
  ------------------
 2043|  34.7k|        {
 2044|  34.7k|            pi2_inp++;
 2045|  34.7k|        }
 2046|       |
 2047|  44.6k|        if(i4_offset_y >= 0)
  ------------------
  |  Branch (2047:12): [True: 39.1k, False: 5.43k]
  ------------------
 2048|  39.1k|        {
 2049|  39.1k|            pi2_inp += i4_inp_stride;
 2050|  39.1k|        }
 2051|       |
 2052|  44.6k|        ps_ctxt->pf_residual_luma_dyadic(pv_residual_samp_ctxt, pi2_inp, i4_inp_stride, pi2_out,
 2053|  44.6k|                                         i4_out_stride, ps_ref_mb_mode, u2_mb_x, u2_mb_y,
 2054|  44.6k|                                         i4_luma_nnz, i4_tx_size);
 2055|  44.6k|    }
 2056|       |
 2057|       |    /* --------------------------------------------------------------------- */
 2058|       |    /* CHROMA PROCESSING                                                       */
 2059|       |    /* --------------------------------------------------------------------- */
 2060|       |    /* CB */
 2061|  44.6k|    pi2_inp = (WORD16 *) ps_ref_chroma->pv_buffer;
 2062|  44.6k|    pi2_out = (WORD16 *) ps_out_chroma->pv_buffer;
 2063|  44.6k|    i4_inp_stride = ps_ref_chroma->i4_num_element_stride;
 2064|  44.6k|    i4_out_stride = ps_out_chroma->i4_num_element_stride;
 2065|       |
 2066|       |    /* choose the appropriate chroma processing routine */
 2067|  44.6k|    if(SVCD_FALSE == ps_lyr_ctxt->i4_chrm_alt_proc)
  ------------------
  |  |   45|  44.6k|#define SVCD_FALSE 0
  ------------------
  |  Branch (2067:8): [True: 44.6k, False: 0]
  ------------------
 2068|  44.6k|    {
 2069|  44.6k|        WORD32 i4_offset_x, i4_offset_y;
 2070|  44.6k|        residual_samp_map_ctxt_t *ps_chroma_map;
 2071|  44.6k|        ref_mb_map_t *ps_x_off_len_chroma;
 2072|  44.6k|        ref_mb_map_t *ps_y_off_len_chroma;
 2073|       |
 2074|  44.6k|        ps_chroma_map = &ps_lyr_ctxt->s_chroma_map_ctxt;
 2075|  44.6k|        ps_x_off_len_chroma = ps_chroma_map->ps_x_offset_length;
 2076|  44.6k|        ps_y_off_len_chroma = ps_chroma_map->ps_y_offset_length;
 2077|       |
 2078|       |        /* get the actual offset for the buffers */
 2079|  44.6k|        i4_offset_x = ps_x_off_len_chroma[u2_mb_x].i2_offset;
 2080|  44.6k|        i4_offset_y = ps_y_off_len_chroma[u2_mb_y].i2_offset;
 2081|       |
 2082|       |        /* since in dyadic case the window width and height will be 6x6     */
 2083|       |        /* and the window start offsets will be always 1 column left and    */
 2084|       |        /* 1 row above the block boundary. so the pointer and the required  */
 2085|       |        /* positions are appropriately modified                             */
 2086|  44.6k|        if(i4_offset_x >= 0)
  ------------------
  |  Branch (2086:12): [True: 34.7k, False: 9.87k]
  ------------------
 2087|  34.7k|        {
 2088|  34.7k|            pi2_inp += 2;
 2089|  34.7k|        }
 2090|       |
 2091|  44.6k|        if(i4_offset_y >= 0)
  ------------------
  |  Branch (2091:12): [True: 39.1k, False: 5.43k]
  ------------------
 2092|  39.1k|        {
 2093|  39.1k|            pi2_inp += i4_inp_stride;
 2094|  39.1k|        }
 2095|       |
 2096|  44.6k|        if(0 != (i4_chroma_nnz & 0x01))
  ------------------
  |  Branch (2096:12): [True: 835, False: 43.7k]
  ------------------
 2097|    835|        {
 2098|    835|            ps_ctxt->pf_residual_chroma_dyadic(pv_residual_samp_ctxt, pi2_inp, i4_inp_stride,
 2099|    835|                                               pi2_out, i4_out_stride);
 2100|    835|        }
 2101|  44.6k|    }
 2102|      0|    else
 2103|      0|    {
 2104|      0|        ps_ctxt->pf_residual_chroma_dyadic_alt(pv_residual_samp_ctxt, u2_mb_x, u2_mb_y,
 2105|      0|                                               ps_ref_mb_mode, pi2_inp, i4_inp_stride, pi2_out,
 2106|      0|                                               i4_out_stride, SVCD_FALSE);
  ------------------
  |  |   45|      0|#define SVCD_FALSE 0
  ------------------
 2107|      0|    }
 2108|       |
 2109|       |    /* CR */
 2110|  44.6k|    pi2_inp += 1;
 2111|  44.6k|    pi2_out += 1;
 2112|       |
 2113|  44.6k|    if(SVCD_FALSE == ps_lyr_ctxt->i4_chrm_alt_proc)
  ------------------
  |  |   45|  44.6k|#define SVCD_FALSE 0
  ------------------
  |  Branch (2113:8): [True: 44.6k, False: 0]
  ------------------
 2114|  44.6k|    {
 2115|  44.6k|        if(0 != (i4_chroma_nnz & 0x10))
  ------------------
  |  Branch (2115:12): [True: 999, False: 43.6k]
  ------------------
 2116|    999|        {
 2117|    999|            ps_ctxt->pf_residual_chroma_dyadic(pv_residual_samp_ctxt, pi2_inp, i4_inp_stride,
 2118|    999|                                               pi2_out, i4_out_stride);
 2119|    999|        }
 2120|  44.6k|    }
 2121|      0|    else
 2122|      0|    {
 2123|      0|        ps_ctxt->pf_residual_chroma_dyadic_alt(pv_residual_samp_ctxt, u2_mb_x, u2_mb_y,
 2124|      0|                                               ps_ref_mb_mode, pi2_inp, i4_inp_stride, pi2_out,
 2125|      0|                                               i4_out_stride, SVCD_TRUE);
  ------------------
  |  |   46|      0|#define SVCD_TRUE 1
  ------------------
 2126|      0|    }
 2127|  44.6k|    return OK;
  ------------------
  |  |  114|  44.6k|#define OK        0
  ------------------
 2128|  44.6k|}
isvcd_residual_samp_populate_list:
 2157|  68.0k|{
 2158|       |    /* --------------------------------------------------------------------- */
 2159|       |    /* Local variables required for finding the mapping between the layers     */
 2160|       |    /* --------------------------------------------------------------------- */
 2161|  68.0k|    UWORD32 u4_shift_x;
 2162|  68.0k|    UWORD32 u4_shift_y;
 2163|  68.0k|    UWORD32 u4_scale_x;
 2164|  68.0k|    UWORD32 u4_scale_y;
 2165|  68.0k|    WORD32 i4_offset_x;
 2166|  68.0k|    WORD32 i4_offset_y;
 2167|  68.0k|    WORD32 i4_add_x;
 2168|  68.0k|    WORD32 i4_add_y;
 2169|  68.0k|    WORD32 i4_delta_x;
 2170|  68.0k|    WORD32 i4_delta_y;
 2171|  68.0k|    WORD32 i4_refphase_x;
 2172|  68.0k|    WORD32 i4_refphase_y;
 2173|  68.0k|    WORD32 i4_phase_x;
 2174|  68.0k|    WORD32 i4_phase_y;
 2175|  68.0k|    WORD32 i4_sub_wd;
 2176|  68.0k|    WORD32 i4_sub_ht;
 2177|  68.0k|    WORD32 i4_mb_wd;
 2178|  68.0k|    WORD32 i4_mb_ht;
 2179|       |    /* --------------------------------------------------------------------- */
 2180|       |    /* Local Pointer Declaration for arrays in Mapping context                 */
 2181|       |    /* --------------------------------------------------------------------- */
 2182|  68.0k|    ref_mb_map_t *ps_x_off_len;
 2183|  68.0k|    ref_mb_map_t *ps_y_off_len;
 2184|  68.0k|    UWORD32 i4_ref_wd;
 2185|  68.0k|    UWORD32 i4_ref_ht;
 2186|  68.0k|    UWORD32 i4_scaled_wd;
 2187|  68.0k|    UWORD32 i4_scaled_ht;
 2188|  68.0k|    WORD32 i4_curr_lyr_width;
 2189|  68.0k|    WORD32 i4_curr_lyr_height;
 2190|       |
 2191|       |    /* --------------------------------------------------------------------- */
 2192|       |    /* Local Flag Declaration                                                 */
 2193|       |    /* --------------------------------------------------------------------- */
 2194|  68.0k|    WORD32 i4_ref_layer_field_pic_flag;
 2195|  68.0k|    WORD32 i4_field_pic_flag;
 2196|  68.0k|    WORD32 i4_frame_mbs_only_flag;
 2197|  68.0k|    WORD32 i4_ref_layer_frame_Mbs_only_flag;
 2198|  68.0k|    WORD32 i4_field_Mb_flag;
 2199|  68.0k|    WORD32 i4_bot_field_flag;
 2200|       |
 2201|       |    /* --------------------------------------------------------------------- */
 2202|       |    /* Cropping Parameters Declaration                                         */
 2203|       |    /* --------------------------------------------------------------------- */
 2204|  68.0k|    WORD32 i4_scaled_ref_layer_left_offset;
 2205|  68.0k|    WORD32 i4_scaled_ref_layer_top_offset;
 2206|       |
 2207|       |    /* --------------------------------------------------------------------- */
 2208|       |    /* Hardcoding flag information    (assuming no field support) */
 2209|       |    /* --------------------------------------------------------------------- */
 2210|  68.0k|    i4_ref_layer_field_pic_flag = SVCD_FALSE;
  ------------------
  |  |   45|  68.0k|#define SVCD_FALSE 0
  ------------------
 2211|  68.0k|    i4_field_pic_flag = SVCD_FALSE;
  ------------------
  |  |   45|  68.0k|#define SVCD_FALSE 0
  ------------------
 2212|  68.0k|    i4_frame_mbs_only_flag = SVCD_TRUE;
  ------------------
  |  |   46|  68.0k|#define SVCD_TRUE 1
  ------------------
 2213|  68.0k|    i4_field_Mb_flag = SVCD_FALSE;
  ------------------
  |  |   45|  68.0k|#define SVCD_FALSE 0
  ------------------
 2214|  68.0k|    i4_bot_field_flag = SVCD_FALSE;
  ------------------
  |  |   45|  68.0k|#define SVCD_FALSE 0
  ------------------
 2215|  68.0k|    i4_ref_layer_frame_Mbs_only_flag = SVCD_TRUE;
  ------------------
  |  |   46|  68.0k|#define SVCD_TRUE 1
  ------------------
 2216|       |
 2217|       |    /* --------------------------------------------------------------------- */
 2218|       |    /* Pointer and Paramater are intialized    - Chroma and Luma */
 2219|       |    /* --------------------------------------------------------------------- */
 2220|  68.0k|    {
 2221|  68.0k|        WORD32 i4_base_width;
 2222|  68.0k|        WORD32 i4_base_height;
 2223|  68.0k|        WORD32 i4_ref_layer_chroma_phase_x_plus1_flag;
 2224|  68.0k|        WORD32 i4_ref_layer_chroma_phase_y_plus1;
 2225|  68.0k|        WORD32 i4_chroma_phase_x_plus1_flag;
 2226|  68.0k|        WORD32 i4_chroma_phase_y_plus1;
 2227|       |
 2228|       |        /* ------------------------------------------------------------- */
 2229|       |        /* HARD CODED FOR 420                                             */
 2230|       |        /* ------------------------------------------------------------- */
 2231|  68.0k|        WORD32 i4_sub_wd_chroma = 2;
 2232|  68.0k|        WORD32 i4_sub_ht_chroma = 2;
 2233|       |
 2234|  68.0k|        i4_base_width = ps_ref_res_prms->i4_res_width;
 2235|  68.0k|        i4_base_height = ps_ref_res_prms->i4_res_height;
 2236|       |
 2237|  68.0k|        i4_ref_layer_chroma_phase_x_plus1_flag =
 2238|  68.0k|            ps_curr_res_prms->i1_ref_lyr_chroma_phase_x_plus1_flag;
 2239|  68.0k|        i4_ref_layer_chroma_phase_y_plus1 = ps_curr_res_prms->i1_ref_lyr_chroma_phase_y_plus1;
 2240|  68.0k|        i4_chroma_phase_x_plus1_flag =
 2241|  68.0k|            ps_subset_sps->s_sps_svc_ext.u1_seq_ref_layer_chroma_phase_x_plus1_flag;
 2242|  68.0k|        i4_chroma_phase_y_plus1 =
 2243|  68.0k|            ps_subset_sps->s_sps_svc_ext.u1_seq_ref_layer_chroma_phase_y_plus1;
 2244|  68.0k|        i4_scaled_ref_layer_left_offset = ps_curr_res_prms->s_ref_lyr_scaled_offset.i2_left;
 2245|  68.0k|        i4_scaled_ref_layer_top_offset = ps_curr_res_prms->s_ref_lyr_scaled_offset.i2_top;
 2246|       |
 2247|       |        /* ----------------------------------------------------------------- */
 2248|       |        /* Computing Effective Frame Dimensions                                 */
 2249|       |        /* ------------------------------------------------------------------*/
 2250|  68.0k|        i4_ref_wd = (i4_base_width >> i4_chroma_flag);
 2251|  68.0k|        i4_ref_ht = (i4_base_height >> i4_chroma_flag) * (1 + i4_ref_layer_field_pic_flag);
 2252|  68.0k|        i4_scaled_wd = ps_curr_res_prms->u2_scaled_ref_width;
 2253|  68.0k|        i4_scaled_ht = ps_curr_res_prms->u2_scaled_ref_height;
 2254|  68.0k|        i4_scaled_wd = (i4_scaled_wd >> i4_chroma_flag);
 2255|  68.0k|        i4_scaled_ht = (i4_scaled_ht >> i4_chroma_flag) * (1 + i4_field_pic_flag);
 2256|       |
 2257|  68.0k|        if(1 == i4_chroma_flag)
  ------------------
  |  Branch (2257:12): [True: 34.0k, False: 34.0k]
  ------------------
 2258|  34.0k|        {
 2259|  34.0k|            i4_refphase_x = i4_ref_layer_chroma_phase_x_plus1_flag - 1;
 2260|  34.0k|            i4_refphase_y = i4_ref_layer_chroma_phase_y_plus1 - 1;
 2261|  34.0k|            i4_phase_x = i4_chroma_phase_x_plus1_flag - 1;
 2262|  34.0k|            i4_phase_y = i4_chroma_phase_y_plus1 - 1;
 2263|  34.0k|            i4_sub_wd = i4_sub_wd_chroma;
 2264|  34.0k|            i4_sub_ht = i4_sub_ht_chroma;
 2265|  34.0k|            i4_mb_wd = MB_WIDTH >> 1;
  ------------------
  |  |   67|  34.0k|#define MB_WIDTH 16
  ------------------
 2266|  34.0k|            i4_mb_ht = MB_HEIGHT >> 1;
  ------------------
  |  |   68|  34.0k|#define MB_HEIGHT 16
  ------------------
 2267|  34.0k|        }
 2268|  34.0k|        else
 2269|  34.0k|        {
 2270|  34.0k|            i4_refphase_x = 0;
 2271|  34.0k|            i4_refphase_y = 0;
 2272|  34.0k|            i4_phase_x = 0;
 2273|  34.0k|            i4_phase_y = 0;
 2274|  34.0k|            i4_sub_wd = 1;
 2275|  34.0k|            i4_sub_ht = 1;
 2276|  34.0k|            i4_mb_wd = MB_WIDTH;
  ------------------
  |  |   67|  34.0k|#define MB_WIDTH 16
  ------------------
 2277|  34.0k|            i4_mb_ht = MB_HEIGHT;
  ------------------
  |  |   68|  34.0k|#define MB_HEIGHT 16
  ------------------
 2278|  34.0k|        }
 2279|  68.0k|    }
 2280|       |
 2281|       |    /* --------------------------------------------------------------------- */
 2282|       |    /* Derive shift x and y based on level idd                               */
 2283|       |    /* --------------------------------------------------------------------- */
 2284|  68.0k|    if(ps_sps->u1_level_idc <= 30)
  ------------------
  |  Branch (2284:8): [True: 62.8k, False: 5.14k]
  ------------------
 2285|  62.8k|    {
 2286|  62.8k|        u4_shift_x = 16;
 2287|  62.8k|        u4_shift_y = 16;
 2288|  62.8k|    }
 2289|  5.14k|    else
 2290|  5.14k|    {
 2291|  5.14k|        u4_shift_x = 31 - isvcd_get_ceil_log2(i4_ref_wd);
 2292|  5.14k|        u4_shift_y = 31 - isvcd_get_ceil_log2(i4_ref_ht);
 2293|  5.14k|    }
 2294|       |
 2295|       |    /* --------------------------------------------------------------------- */
 2296|       |    /* The following condition is not true in our case for time being         */
 2297|       |    /* --------------------------------------------------------------------- */
 2298|  68.0k|    if((SVCD_FALSE == i4_frame_mbs_only_flag) || (SVCD_FALSE == i4_ref_layer_frame_Mbs_only_flag))
  ------------------
  |  |   45|  68.0k|#define SVCD_FALSE 0
  ------------------
                  if((SVCD_FALSE == i4_frame_mbs_only_flag) || (SVCD_FALSE == i4_ref_layer_frame_Mbs_only_flag))
  ------------------
  |  |   45|  68.0k|#define SVCD_FALSE 0
  ------------------
  |  Branch (2298:8): [True: 0, False: 68.0k]
  |  Branch (2298:50): [True: 0, False: 68.0k]
  ------------------
 2299|      0|    {
 2300|      0|        i4_phase_y = i4_phase_y + 4 * i4_bot_field_flag;
 2301|       |
 2302|      0|        if(1 == i4_ref_layer_frame_Mbs_only_flag)
  ------------------
  |  Branch (2302:12): [True: 0, False: 0]
  ------------------
 2303|      0|            i4_refphase_y = (2 * i4_refphase_y) + 2;
 2304|      0|        else
 2305|      0|            i4_refphase_y = i4_refphase_y + (4 * i4_bot_field_flag);
 2306|      0|    }
 2307|       |
 2308|       |    /* --------------------------------------------------------------------- */
 2309|       |    /* Dx and Dy Computation - Ratio of the base and enhance layer width     */
 2310|       |    /* --------------------------------------------------------------------- */
 2311|  68.0k|    u4_scale_x = ((i4_ref_wd << u4_shift_x) + (i4_scaled_wd >> 1)) / (i4_scaled_wd);
 2312|       |
 2313|  68.0k|    u4_scale_y = ((i4_ref_ht << u4_shift_y) + (i4_scaled_ht >> 1)) / (i4_scaled_ht);
 2314|       |
 2315|  68.0k|    i4_offset_x = i4_scaled_ref_layer_left_offset / i4_sub_wd;
 2316|  68.0k|    i4_add_x = (((i4_ref_wd * (2 + i4_phase_x)) << (u4_shift_x - 2)) + (i4_scaled_wd >> 1)) /
 2317|  68.0k|                   i4_scaled_wd +
 2318|  68.0k|               (1 << (u4_shift_x - 5));
 2319|  68.0k|    i4_delta_x = 4 * (2 + i4_refphase_x);
 2320|       |
 2321|  68.0k|    if((SVCD_TRUE == i4_frame_mbs_only_flag) && (SVCD_TRUE == i4_ref_layer_frame_Mbs_only_flag))
  ------------------
  |  |   46|  68.0k|#define SVCD_TRUE 1
  ------------------
                  if((SVCD_TRUE == i4_frame_mbs_only_flag) && (SVCD_TRUE == i4_ref_layer_frame_Mbs_only_flag))
  ------------------
  |  |   46|  68.0k|#define SVCD_TRUE 1
  ------------------
  |  Branch (2321:8): [True: 68.0k, False: 0]
  |  Branch (2321:49): [True: 68.0k, False: 0]
  ------------------
 2322|  68.0k|    {
 2323|  68.0k|        i4_offset_y = i4_scaled_ref_layer_top_offset / i4_sub_ht;
 2324|  68.0k|        i4_add_y = (((i4_ref_ht * (2 + i4_phase_y)) << (u4_shift_y - 2)) + (i4_scaled_ht >> 1)) /
 2325|  68.0k|                       i4_scaled_ht +
 2326|  68.0k|                   (1 << (u4_shift_y - 5));
 2327|  68.0k|        i4_delta_y = 4 * (2 + i4_refphase_y);
 2328|  68.0k|    }
 2329|      0|    else
 2330|      0|    {
 2331|      0|        i4_offset_y = i4_scaled_ref_layer_top_offset / (2 * i4_sub_ht);
 2332|      0|        i4_add_y = (((i4_ref_ht * (2 + i4_phase_y)) << (u4_shift_y - 3)) + (i4_scaled_ht >> 1)) /
 2333|      0|                       i4_scaled_ht +
 2334|      0|                   (1 << (u4_shift_y - 5));
 2335|      0|        i4_delta_y = 2 * (2 + i4_refphase_y);
 2336|      0|    }
 2337|       |
 2338|       |    /* --------------------------------------------------------------------- */
 2339|       |    /* Intializing Local Pointers    - Chroma and Luma                       */
 2340|       |    /* --------------------------------------------------------------------- */
 2341|  68.0k|    ps_x_off_len = ps_map_ctxt->ps_x_offset_length;
 2342|  68.0k|    ps_y_off_len = ps_map_ctxt->ps_y_offset_length;
 2343|  68.0k|    i4_curr_lyr_width = ps_curr_res_prms->i4_res_width >> i4_chroma_flag;
 2344|  68.0k|    i4_curr_lyr_height = ps_curr_res_prms->i4_res_height >> i4_chroma_flag;
 2345|       |
 2346|  68.0k|    {
 2347|  68.0k|        WORD32 i4_i, i4_j;
 2348|       |
 2349|       |        /* ----------------------------------------------------------------- */
 2350|       |        /* Computation of offsetX refArrayW Xmin and Xmax Lists               */
 2351|       |        /* ----------------------------------------------------------------- */
 2352|   305k|        for(i4_i = 0; i4_i < i4_curr_lyr_width; i4_i = i4_i + i4_mb_wd)
  ------------------
  |  Branch (2352:23): [True: 237k, False: 68.0k]
  ------------------
 2353|   237k|        {
 2354|   237k|            WORD32 i4_x_refmin16;
 2355|   237k|            WORD32 i4_x_refmax16;
 2356|   237k|            WORD32 i4_x_offset;
 2357|       |
 2358|   237k|            i4_x_refmin16 = (WORD64) (((WORD64) ((i4_i - i4_offset_x) * u4_scale_x) + i4_add_x) >>
 2359|   237k|                                      ((WORD32) (u4_shift_x - 4))) -
 2360|   237k|                            i4_delta_x;
 2361|       |
 2362|   237k|            i4_x_refmax16 =
 2363|   237k|                (WORD64) (((WORD64) (i4_i + i4_mb_wd - 1 - i4_offset_x) * u4_scale_x + i4_add_x) >>
 2364|   237k|                          ((WORD32) (u4_shift_x - 4))) -
 2365|   237k|                i4_delta_x;
 2366|       |
 2367|       |            /* AC205 */
 2368|   237k|            i4_x_offset = i4_x_refmin16 >> 4;
 2369|   237k|            ps_x_off_len->i2_offset = i4_x_offset;
 2370|   237k|            ps_x_off_len->i2_length = (i4_x_refmax16 >> 4) - i4_x_offset + 2;
 2371|       |
 2372|       |            /* increment the pointer */
 2373|   237k|            ps_x_off_len++;
 2374|       |
 2375|   237k|        } /* end of loop over current layer width */
 2376|       |
 2377|       |        /* ----------------------------------------------------------------- */
 2378|       |        /* Computation of offsetY refArrayH Ymin and Ymax Lists              */
 2379|       |        /* ----------------------------------------------------------------- */
 2380|   694k|        for(i4_j = 0; i4_j < i4_curr_lyr_height; i4_j = i4_j + i4_mb_ht)
  ------------------
  |  Branch (2380:23): [True: 626k, False: 68.0k]
  ------------------
 2381|   626k|        {
 2382|   626k|            WORD32 i4_y_refmin16;
 2383|   626k|            WORD32 i4_y_refmax16;
 2384|   626k|            WORD32 i4_y_offset;
 2385|       |
 2386|   626k|            i4_y_refmin16 = (WORD64) (((WORD64) (i4_j - i4_offset_y) * u4_scale_y + i4_add_y) >>
 2387|   626k|                                      ((WORD32) (u4_shift_y - 4))) -
 2388|   626k|                            i4_delta_y;
 2389|       |
 2390|   626k|            i4_y_refmax16 =
 2391|   626k|                (WORD64) (((WORD64) (i4_j + i4_mb_ht - 1 - i4_offset_y) * u4_scale_y + i4_add_y) >>
 2392|   626k|                          ((WORD32) (u4_shift_y - 4))) -
 2393|   626k|                i4_delta_y;
 2394|       |
 2395|       |            /* AC205 */
 2396|   626k|            i4_y_offset = i4_y_refmin16 >> 4;
 2397|   626k|            ps_y_off_len->i2_offset = i4_y_offset;
 2398|   626k|            ps_y_off_len->i2_length = (i4_y_refmax16 >> 4) - i4_y_offset + 2;
 2399|       |
 2400|       |            /* increment the pointer */
 2401|   626k|            ps_y_off_len++;
 2402|       |
 2403|   626k|        } /* end of loop over current layer height */
 2404|  68.0k|    }
 2405|       |
 2406|       |    /* --------------------------------------------------------------------- */
 2407|       |    /* Computation of Xref and Xphase List as per standard                     */
 2408|       |    /* --------------------------------------------------------------------- */
 2409|  68.0k|    ps_x_off_len = ps_map_ctxt->ps_x_offset_length;
 2410|  68.0k|    ps_y_off_len = ps_map_ctxt->ps_y_offset_length;
 2411|       |
 2412|  68.0k|    {
 2413|  68.0k|        WORD32 i4_xc;
 2414|  68.0k|        WORD32 i4_offset_x_index;
 2415|  68.0k|        ref_pixel_map_t *ps_x_pos_phase;
 2416|       |
 2417|  68.0k|        ps_x_pos_phase = ps_map_ctxt->ps_x_pos_phase;
 2418|       |
 2419|  2.92M|        for(i4_xc = 0; i4_xc < i4_curr_lyr_width; i4_xc++)
  ------------------
  |  Branch (2419:24): [True: 2.85M, False: 68.0k]
  ------------------
 2420|  2.85M|        {
 2421|  2.85M|            WORD32 i4_x_offset;
 2422|  2.85M|            WORD32 i4_x_ref16;
 2423|       |
 2424|  2.85M|            i4_offset_x_index = i4_xc / i4_mb_wd;
 2425|  2.85M|            i4_x_offset = ps_x_off_len[i4_offset_x_index].i2_offset;
 2426|  2.85M|            i4_x_ref16 = (WORD64) (((WORD64) (i4_xc - i4_offset_x) * u4_scale_x + i4_add_x) >>
 2427|  2.85M|                                   ((WORD32) (u4_shift_x - 4))) -
 2428|  2.85M|                         i4_delta_x;
 2429|       |
 2430|       |            /* store the values */
 2431|  2.85M|            ps_x_pos_phase->i2_ref_pos = (i4_x_ref16 >> 4) - i4_x_offset;
 2432|  2.85M|            ps_x_pos_phase->i2_phase = (i4_x_ref16 - (16 * i4_x_offset)) & 15;
 2433|       |
 2434|       |            /* increment the pointer */
 2435|  2.85M|            ps_x_pos_phase++;
 2436|  2.85M|        } /* end of loop over scaled width */
 2437|  68.0k|    }
 2438|       |
 2439|       |    /* --------------------------------------------------------------------- */
 2440|       |    /* Computation of Yref and Yphase List as per standard                     */
 2441|       |    /* --------------------------------------------------------------------- */
 2442|  68.0k|    {
 2443|  68.0k|        WORD32 i4_yc;
 2444|  68.0k|        WORD32 i4_offset_y_index;
 2445|  68.0k|        ref_pixel_map_t *ps_y_pos_phase;
 2446|       |
 2447|  68.0k|        ps_y_pos_phase = ps_map_ctxt->ps_y_pos_phase;
 2448|       |
 2449|  7.58M|        for(i4_yc = 0; i4_yc < i4_curr_lyr_height; i4_yc++)
  ------------------
  |  Branch (2449:24): [True: 7.51M, False: 68.0k]
  ------------------
 2450|  7.51M|        {
 2451|  7.51M|            WORD32 i4_y_offset;
 2452|  7.51M|            WORD32 i4_y_ref16;
 2453|       |
 2454|  7.51M|            i4_offset_y_index = i4_yc / i4_mb_ht;
 2455|  7.51M|            i4_y_offset = ps_y_off_len[i4_offset_y_index].i2_offset;
 2456|       |
 2457|  7.51M|            if((SVCD_FALSE == i4_frame_mbs_only_flag) ||
  ------------------
  |  |   45|  7.51M|#define SVCD_FALSE 0
  ------------------
  |  Branch (2457:16): [True: 0, False: 7.51M]
  ------------------
 2458|  7.51M|               (SVCD_FALSE == i4_ref_layer_frame_Mbs_only_flag))
  ------------------
  |  |   45|  7.51M|#define SVCD_FALSE 0
  ------------------
  |  Branch (2458:16): [True: 0, False: 7.51M]
  ------------------
 2459|      0|            {
 2460|      0|                i4_yc = i4_yc >> (1 - i4_field_Mb_flag);
 2461|      0|            }
 2462|       |
 2463|  7.51M|            i4_y_ref16 = (WORD64) ((((WORD64) (i4_yc - i4_offset_y) * u4_scale_y + i4_add_y) >>
 2464|  7.51M|                                    ((WORD32) (u4_shift_y - 4))) -
 2465|  7.51M|                                   i4_delta_y);
 2466|  7.51M|            ps_y_pos_phase->i2_ref_pos = (i4_y_ref16 >> 4) - i4_y_offset;
 2467|  7.51M|            ps_y_pos_phase->i2_phase = (i4_y_ref16 - (16 * i4_y_offset)) & 15;
 2468|       |
 2469|       |            /* increment the pointer */
 2470|  7.51M|            ps_y_pos_phase++;
 2471|  7.51M|        } /* end of loop over scaled height */
 2472|  68.0k|    }
 2473|  68.0k|    return;
 2474|  68.0k|}
isvcd_residual_samp_res_init:
 2501|   132k|{
 2502|   132k|    residual_sampling_ctxt_t *ps_ctxt;
 2503|   132k|    res_lyr_ctxt *ps_lyr_ctxt;
 2504|   132k|    dec_seq_params_t *ps_sps;
 2505|   132k|    dec_svc_seq_params_t *ps_subset_sps;
 2506|   132k|    svc_dec_lyr_struct_t *ps_svc_lyr_dec = (svc_dec_lyr_struct_t *) pv_svc_dec;
 2507|   132k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
 2508|       |
 2509|   132k|    void *pv_residual_samp_ctxt = ps_svc_lyr_dec->pv_residual_sample_ctxt;
 2510|   132k|    res_prms_t *ps_curr_lyr_res_prms = &ps_svc_lyr_dec->s_res_prms;
 2511|   132k|    ref_mb_map_t **pps_luma_map_horz = &ps_svc_lyr_dec->ps_ressam_luma_map_horz;
 2512|   132k|    ref_mb_map_t **pps_chroma_map_horz = &ps_svc_lyr_dec->ps_ressam_chroma_map_horz;
 2513|   132k|    ref_mb_map_t **pps_luma_map_vert = &ps_svc_lyr_dec->ps_ressam_luma_map_vert;
 2514|   132k|    ref_mb_map_t **pps_chroma_map_vert = &ps_svc_lyr_dec->ps_ressam_chroma_map_vert;
 2515|       |
 2516|   132k|    if((NULL == pv_residual_samp_ctxt) || (NULL == ps_curr_lyr_res_prms) ||
  ------------------
  |  Branch (2516:8): [True: 0, False: 132k]
  |  Branch (2516:43): [True: 0, False: 132k]
  ------------------
 2517|   132k|       (NULL == pps_luma_map_horz) || (NULL == pps_chroma_map_horz) ||
  ------------------
  |  Branch (2517:8): [True: 0, False: 132k]
  |  Branch (2517:39): [True: 0, False: 132k]
  ------------------
 2518|   132k|       (NULL == pps_luma_map_vert) || (NULL == pps_chroma_map_vert))
  ------------------
  |  Branch (2518:8): [True: 0, False: 132k]
  |  Branch (2518:39): [True: 0, False: 132k]
  ------------------
 2519|      0|    {
 2520|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 2521|      0|    }
 2522|       |
 2523|   132k|    ps_ctxt = (residual_sampling_ctxt_t *) pv_residual_samp_ctxt;
 2524|       |
 2525|       |    /* if called for base resolution store deafult values */
 2526|   132k|    if(SVCD_TRUE == ps_svc_lyr_dec->u1_base_res_flag)
  ------------------
  |  |   46|   132k|#define SVCD_TRUE 1
  ------------------
  |  Branch (2526:8): [True: 98.8k, False: 34.0k]
  ------------------
 2527|  98.8k|    {
 2528|  98.8k|        *pps_luma_map_horz = NULL;
 2529|  98.8k|        *pps_chroma_map_horz = NULL;
 2530|  98.8k|        *pps_luma_map_vert = NULL;
 2531|  98.8k|        *pps_chroma_map_vert = NULL;
 2532|  98.8k|        ps_ctxt->i4_res_lyr_id = -1;
 2533|  98.8k|        ps_ctxt->i4_ref_width = ps_curr_lyr_res_prms->i4_res_width;
 2534|  98.8k|        ps_ctxt->i4_ref_height = ps_curr_lyr_res_prms->i4_res_height;
 2535|  98.8k|        return OK;
  ------------------
  |  |  114|  98.8k|#define OK        0
  ------------------
 2536|  98.8k|    }
 2537|       |
 2538|       |    /* derive the current sps */
 2539|  34.0k|    ps_sps = ps_dec->ps_cur_sps;
 2540|  34.0k|    ps_subset_sps = ps_svc_lyr_dec->ps_cur_subset_sps;
 2541|       |
 2542|       |    /* store the res id appropriately */
 2543|  34.0k|    ps_ctxt->i4_res_lyr_id = ps_svc_lyr_dec->u1_layer_id - 1;
 2544|       |
 2545|       |    /* get the current layer ctxt */
 2546|  34.0k|    ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[ps_svc_lyr_dec->u1_layer_id - 1];
 2547|       |
 2548|       |    /* get the width and heights */
 2549|  34.0k|    ps_lyr_ctxt->i4_curr_width = ps_curr_lyr_res_prms->i4_res_width;
 2550|  34.0k|    ps_lyr_ctxt->i4_curr_height = ps_curr_lyr_res_prms->i4_res_height;
 2551|  34.0k|    ps_lyr_ctxt->i4_ref_width = ps_ctxt->i4_ref_width;
 2552|  34.0k|    ps_lyr_ctxt->i4_ref_height = ps_ctxt->i4_ref_height;
 2553|       |
 2554|       |    /* store the strcuture pointer containing projected locations */
 2555|  34.0k|    *pps_luma_map_horz = ps_lyr_ctxt->s_luma_map_ctxt.ps_x_offset_length;
 2556|  34.0k|    *pps_chroma_map_horz = ps_lyr_ctxt->s_chroma_map_ctxt.ps_x_offset_length;
 2557|  34.0k|    *pps_luma_map_vert = ps_lyr_ctxt->s_luma_map_ctxt.ps_y_offset_length;
 2558|  34.0k|    *pps_chroma_map_vert = ps_lyr_ctxt->s_chroma_map_ctxt.ps_y_offset_length;
 2559|       |
 2560|       |    /* check for recomputation of mapping required */
 2561|  34.0k|    if(SVCD_TRUE == ps_curr_lyr_res_prms->u1_remap_req_flag)
  ------------------
  |  |   46|  34.0k|#define SVCD_TRUE 1
  ------------------
  |  Branch (2561:8): [True: 34.0k, False: 0]
  ------------------
 2562|  34.0k|    {
 2563|  34.0k|        res_prms_t s_ref_res_prms = {0};
 2564|       |
 2565|       |        /* store the reference layer resolution width and height */
 2566|  34.0k|        s_ref_res_prms.i4_res_width = ps_ctxt->i4_ref_width;
 2567|  34.0k|        s_ref_res_prms.i4_res_height = ps_ctxt->i4_ref_height;
 2568|       |
 2569|       |        /* call the frame level projections calculation function */
 2570|  34.0k|        isvcd_residual_samp_populate_list(&ps_lyr_ctxt->s_luma_map_ctxt, ps_sps, ps_subset_sps,
 2571|  34.0k|                                          ps_curr_lyr_res_prms, &s_ref_res_prms, 0);
 2572|  34.0k|        isvcd_residual_samp_populate_list(&ps_lyr_ctxt->s_chroma_map_ctxt, ps_sps, ps_subset_sps,
 2573|  34.0k|                                          ps_curr_lyr_res_prms, &s_ref_res_prms, 1);
 2574|       |
 2575|       |        /* default values for flags */
 2576|  34.0k|        ps_lyr_ctxt->pf_residual_samp_mb = &isvcd_residual_samp_mb;
 2577|  34.0k|        ps_lyr_ctxt->i4_chrm_horz_int_mode = 0;
 2578|  34.0k|        ps_lyr_ctxt->i4_chrm_vert_int_mode = 0;
 2579|  34.0k|        ps_lyr_ctxt->i4_chrm_alt_proc = SVCD_FALSE;
  ------------------
  |  |   45|  34.0k|#define SVCD_FALSE 0
  ------------------
 2580|       |
 2581|       |        /* Store the Dyadic flag */
 2582|  34.0k|        ps_lyr_ctxt->i4_dyadic_flag = ps_curr_lyr_res_prms->u1_dyadic_flag;
 2583|       |
 2584|       |        /* set the appropriate chroma processing routine based on */
 2585|       |        /* phase values */
 2586|  34.0k|        if(SVCD_TRUE == ps_curr_lyr_res_prms->u1_dyadic_flag)
  ------------------
  |  |   46|  34.0k|#define SVCD_TRUE 1
  ------------------
  |  Branch (2586:12): [True: 16.6k, False: 17.3k]
  ------------------
 2587|  16.6k|        {
 2588|  16.6k|            WORD32 i4_ref_layer_chroma_phase_x_plus1_flag;
 2589|  16.6k|            WORD32 i4_ref_layer_chroma_phase_y_plus1;
 2590|  16.6k|            WORD32 i4_chroma_phase_x_plus1_flag;
 2591|  16.6k|            WORD32 i4_chroma_phase_y_plus1;
 2592|       |
 2593|  16.6k|            ps_lyr_ctxt->pf_residual_samp_mb = &isvcd_residual_samp_mb_dyadic;
 2594|  16.6k|            i4_ref_layer_chroma_phase_x_plus1_flag =
 2595|  16.6k|                ps_curr_lyr_res_prms->i1_ref_lyr_chroma_phase_x_plus1_flag;
 2596|  16.6k|            i4_ref_layer_chroma_phase_y_plus1 =
 2597|  16.6k|                ps_curr_lyr_res_prms->i1_ref_lyr_chroma_phase_y_plus1;
 2598|  16.6k|            i4_chroma_phase_x_plus1_flag =
 2599|  16.6k|                ps_subset_sps->s_sps_svc_ext.u1_seq_ref_layer_chroma_phase_x_plus1_flag;
 2600|  16.6k|            i4_chroma_phase_y_plus1 =
 2601|  16.6k|                ps_subset_sps->s_sps_svc_ext.u1_seq_ref_layer_chroma_phase_y_plus1;
 2602|  16.6k|            if((0 == i4_ref_layer_chroma_phase_x_plus1_flag) && (1 == i4_chroma_phase_x_plus1_flag))
  ------------------
  |  Branch (2602:16): [True: 4.95k, False: 11.6k]
  |  Branch (2602:65): [True: 0, False: 4.95k]
  ------------------
 2603|      0|            {
 2604|      0|                ps_lyr_ctxt->i4_chrm_horz_int_mode = 1;
 2605|      0|                ps_lyr_ctxt->i4_chrm_alt_proc = SVCD_TRUE;
  ------------------
  |  |   46|      0|#define SVCD_TRUE 1
  ------------------
 2606|      0|            }
 2607|       |
 2608|  16.6k|            if((0 == i4_ref_layer_chroma_phase_y_plus1) && (1 == i4_chroma_phase_y_plus1))
  ------------------
  |  Branch (2608:16): [True: 2.28k, False: 14.3k]
  |  Branch (2608:60): [True: 0, False: 2.28k]
  ------------------
 2609|      0|            {
 2610|      0|                ps_lyr_ctxt->i4_chrm_vert_int_mode = 1;
 2611|      0|                ps_lyr_ctxt->i4_chrm_alt_proc = SVCD_TRUE;
  ------------------
  |  |   46|      0|#define SVCD_TRUE 1
  ------------------
 2612|      0|            }
 2613|       |
 2614|  16.6k|            if((0 == i4_ref_layer_chroma_phase_y_plus1) && (2 == i4_chroma_phase_y_plus1))
  ------------------
  |  Branch (2614:16): [True: 2.28k, False: 14.3k]
  |  Branch (2614:60): [True: 0, False: 2.28k]
  ------------------
 2615|      0|            {
 2616|      0|                ps_lyr_ctxt->i4_chrm_vert_int_mode = 1;
 2617|      0|                ps_lyr_ctxt->i4_chrm_alt_proc = SVCD_TRUE;
  ------------------
  |  |   46|      0|#define SVCD_TRUE 1
  ------------------
 2618|      0|            }
 2619|       |
 2620|  16.6k|            if((2 == i4_ref_layer_chroma_phase_y_plus1) && (0 == i4_chroma_phase_y_plus1))
  ------------------
  |  Branch (2620:16): [True: 2.68k, False: 13.9k]
  |  Branch (2620:60): [True: 0, False: 2.68k]
  ------------------
 2621|      0|            {
 2622|      0|                ps_lyr_ctxt->i4_chrm_vert_int_mode = 2;
 2623|      0|                ps_lyr_ctxt->i4_chrm_alt_proc = SVCD_TRUE;
  ------------------
  |  |   46|      0|#define SVCD_TRUE 1
  ------------------
 2624|      0|            }
 2625|  16.6k|        }
 2626|  34.0k|    }
 2627|      0|    else
 2628|      0|    {
 2629|       |        /* should take false value */
 2630|      0|        if(SVCD_FALSE != ps_curr_lyr_res_prms->u1_remap_req_flag)
  ------------------
  |  |   45|      0|#define SVCD_FALSE 0
  ------------------
  |  Branch (2630:12): [True: 0, False: 0]
  ------------------
 2631|      0|        {
 2632|      0|            return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 2633|      0|        }
 2634|      0|    }
 2635|       |
 2636|       |    /* store the current layer width and height to context */
 2637|  34.0k|    ps_ctxt->i4_ref_width = ps_curr_lyr_res_prms->i4_res_width;
 2638|  34.0k|    ps_ctxt->i4_ref_height = ps_curr_lyr_res_prms->i4_res_height;
 2639|       |
 2640|       |    /* assert on max ranges of width and shift values */
 2641|  34.0k|    if((ps_lyr_ctxt->i4_curr_width > H264_MAX_FRAME_WIDTH) ||
  ------------------
  |  |   39|  34.0k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
  |  Branch (2641:8): [True: 0, False: 34.0k]
  ------------------
 2642|  34.0k|       (ps_lyr_ctxt->i4_ref_width > H264_MAX_FRAME_WIDTH) ||
  ------------------
  |  |   39|  34.0k|#define H264_MAX_FRAME_WIDTH                4096
  ------------------
  |  Branch (2642:8): [True: 0, False: 34.0k]
  ------------------
 2643|  34.0k|       (ps_lyr_ctxt->i4_curr_height > H264_MAX_FRAME_HEIGHT) ||
  ------------------
  |  |   40|  34.0k|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
  |  Branch (2643:8): [True: 0, False: 34.0k]
  ------------------
 2644|  34.0k|       (ps_lyr_ctxt->i4_ref_height > H264_MAX_FRAME_HEIGHT))
  ------------------
  |  |   40|  34.0k|#define H264_MAX_FRAME_HEIGHT               4096
  ------------------
  |  Branch (2644:8): [True: 0, False: 34.0k]
  ------------------
 2645|      0|    {
 2646|      0|        return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
 2647|      0|    }
 2648|  34.0k|    return OK;
  ------------------
  |  |  114|  34.0k|#define OK        0
  ------------------
 2649|  34.0k|}

isvcd_compute_bs_non_mbaff_thread:
   91|  1.07M|{
   92|  1.07M|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
   93|       |    /* Mvpred and Nnz for top and Courrent */
   94|  1.07M|    mv_pred_t *ps_cur_mv_pred, *ps_top_mv_pred = NULL, *ps_left_mv_pred;
   95|       |    /* deblk_mb_t Params */
   96|  1.07M|    deblk_mb_t *ps_cur_mb_params; /*< Parameters of current MacroBlock */
   97|  1.07M|    deblkmb_neighbour_t *ps_deblk_top_mb;
   98|       |
   99|       |    /* Reference Index to POC mapping*/
  100|  1.07M|    void **apv_map_ref_idx_to_poc;
  101|  1.07M|    UWORD32 u4_leftmbtype;
  102|       |
  103|  1.07M|    UWORD16 u2_left_csbp, u2_top_csbp, u2_cur_csbp;
  104|       |
  105|       |    /* Set of flags */
  106|  1.07M|    UWORD32 u4_cur_mb_intra, u1_top_mb_typ, u4_cur_mb_fld;
  107|  1.07M|    UWORD32 u4_cur_mb_ibl;
  108|  1.07M|    UWORD32 u1_cur_mb_type;
  109|  1.07M|    UWORD32 *pu4_bs_table;
  110|       |
  111|  1.07M|    UWORD16 *pu2_curr_res_luma_csbp;
  112|  1.07M|    UWORD16 *pu2_left_res_luma_csbp;
  113|  1.07M|    UWORD16 *pu2_top_res_luma_csbp;
  114|       |
  115|       |    /* Neighbour availability */
  116|       |    /* Initialization */
  117|  1.07M|    const UWORD32 u2_mbx = ps_cur_mb_info->u2_mbx;
  118|  1.07M|    const UWORD32 u2_mby = ps_cur_mb_info->u2_mby;
  119|  1.07M|    const UWORD32 u1_pingpong = u2_mbx & 0x01;
  120|       |
  121|  1.07M|    PROFILE_DISABLE_BOUNDARY_STRENGTH()
  ------------------
  |  |  125|  1.07M|#define PROFILE_DISABLE_BOUNDARY_STRENGTH() ;
  ------------------
  122|  1.07M|    ps_deblk_top_mb = ps_dec->ps_deblk_top_mb + u2_mbx;
  123|       |
  124|       |    /* Pointer assignment for Current DeblkMB, Current Mv Pred  */
  125|  1.07M|    ps_cur_mb_params = ps_dec->ps_deblk_pic + u4_mb_num;
  126|  1.07M|    ps_cur_mv_pred = ps_dec->s_cur_pic.ps_mv + (u4_mb_num << 4);
  127|       |
  128|       |    /*Pointer assignment for Residual NNZ */
  129|  1.07M|    pu2_curr_res_luma_csbp = ps_svc_lyr_dec->pu2_frm_res_luma_csbp + ps_cur_mb_info->u2_mbx;
  130|  1.07M|    pu2_curr_res_luma_csbp += ps_cur_mb_info->u2_mby * ps_svc_lyr_dec->i4_frm_res_luma_csbp_stride;
  131|       |
  132|  1.07M|    pu2_left_res_luma_csbp = pu2_curr_res_luma_csbp - (ps_cur_mb_info->u2_mbx != 0);
  133|  1.07M|    pu2_top_res_luma_csbp = pu2_curr_res_luma_csbp - ((ps_cur_mb_info->u2_mby != 0) *
  134|  1.07M|                                                      ps_svc_lyr_dec->i4_frm_res_luma_csbp_stride);
  135|       |
  136|  1.07M|    apv_map_ref_idx_to_poc = (void **) ps_dec->ps_computebs_cur_slice->ppv_map_ref_idx_to_poc + 1;
  137|  1.07M|    u1_cur_mb_type = ps_cur_mb_params->u1_mb_type;
  138|  1.07M|    u1_top_mb_typ = ps_deblk_top_mb->u1_mb_type;
  139|  1.07M|    ps_deblk_top_mb->u1_mb_type = u1_cur_mb_type;
  140|       |
  141|  1.07M|    ps_cur_mb_params->u1_topmb_qp = ps_deblk_top_mb->u1_mb_qp;
  142|  1.07M|    ps_deblk_top_mb->u1_mb_qp = ps_cur_mb_params->u1_mb_qp;
  143|  1.07M|    ps_cur_mb_params->u1_left_mb_qp = ps_dec->deblk_left_mb[1].u1_mb_qp;
  144|  1.07M|    ps_dec->deblk_left_mb[1].u1_mb_qp = ps_cur_mb_params->u1_mb_qp;
  145|       |
  146|       |    /* if no deblocking required for current Mb then continue */
  147|       |    /* Check next Mbs   in Mb group                           */
  148|  1.07M|    if(ps_cur_mb_params->u1_deblocking_mode & MB_DISABLE_FILTERING)
  ------------------
  |  |   70|  1.07M|#define MB_DISABLE_FILTERING          0x01
  ------------------
  |  Branch (148:8): [True: 4.91k, False: 1.06M]
  ------------------
  149|  4.91k|    {
  150|  4.91k|        void **pu4_map_ref_idx_to_poc_l1 = apv_map_ref_idx_to_poc + POC_LIST_L0_TO_L1_DIFF;
  ------------------
  |  |   86|  4.91k|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  ------------------
  |  |  |  |  600|  4.91k|#define MAX_FRAMES              16
  |  |  ------------------
  ------------------
  151|  4.91k|        {
  152|       |            /* Store Parameter for Top MvPred refernce frame Address */
  153|  4.91k|            void **ppv_top_mv_pred_addr = ps_cur_mb_info->ps_curmb->u4_pic_addrress;
  154|  4.91k|            WORD8 *p1_refTop0 = (ps_cur_mv_pred + 12)->i1_ref_frame;
  155|  4.91k|            WORD8 *p1_refTop1 = (ps_cur_mv_pred + 14)->i1_ref_frame;
  156|       |
  157|       |            /* Store Left addresses for Next Mb   */
  158|  4.91k|            void **ppv_left_mv_pred_addr = ps_dec->ps_left_mvpred_addr[!u1_pingpong][1].u4_add;
  159|  4.91k|            WORD8 *p1_refleft0 = (ps_cur_mv_pred + 3)->i1_ref_frame;
  160|       |
  161|  4.91k|            ppv_top_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refTop0[0]];
  162|  4.91k|            ppv_top_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refTop0[1]];
  163|       |
  164|  4.91k|            ppv_left_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]];
  165|  4.91k|            ppv_top_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]];
  166|  4.91k|            ppv_left_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]];
  167|  4.91k|            ppv_top_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]];
  168|       |
  169|  4.91k|            ppv_left_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refleft0[0]];
  170|  4.91k|            ppv_left_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refleft0[1]];
  171|       |
  172|       |            /* Storing the leftMbtype for next Mb */
  173|  4.91k|            ps_dec->deblk_left_mb[1].u1_mb_type = ps_cur_mb_params->u1_mb_type;
  174|  4.91k|        }
  175|       |
  176|  4.91k|        return;
  177|  4.91k|    }
  178|       |
  179|       |    /* Flag for extra left Edge */
  180|  1.06M|    ps_cur_mb_params->u1_single_call = 1;
  181|       |
  182|       |    /* Update the Left deblk_mb_t and Left MvPred Parameters */
  183|  1.06M|    if(!u2_mbx)
  ------------------
  |  Branch (183:8): [True: 92.3k, False: 976k]
  ------------------
  184|  92.3k|    {
  185|  92.3k|        u4_leftmbtype = 0;
  186|       |
  187|       |        /* Initialize the ps_left_mv_pred with Junk but Valid Location */
  188|       |        /* to avoid invalid memory access                           */
  189|       |        /* this is read only pointer                                */
  190|  92.3k|        ps_left_mv_pred = ps_cur_mv_pred + 3;
  191|  92.3k|    }
  192|   976k|    else
  193|   976k|    {
  194|   976k|        u4_leftmbtype = ps_dec->deblk_left_mb[1].u1_mb_type;
  195|       |
  196|       |        /* Come to Left Most Edge of the MB */
  197|   976k|        ps_left_mv_pred = ps_cur_mv_pred - (1 << 4) + 3;
  198|   976k|    }
  199|       |
  200|  1.06M|    if(!u2_mby) u1_top_mb_typ = 0;
  ------------------
  |  Branch (200:8): [True: 60.3k, False: 1.00M]
  ------------------
  201|       |
  202|       |    /* MvPred Pointer Calculation */
  203|  1.06M|    ps_top_mv_pred = ps_cur_mv_pred - (ps_dec->u2_frm_wd_in_mbs << 4) + 12;
  204|  1.06M|    u4_cur_mb_intra = u1_cur_mb_type & D_INTRA_MB;
  ------------------
  |  |  382|  1.06M|#define D_INTRA_MB        1
  ------------------
  205|  1.06M|    u4_cur_mb_ibl = u1_cur_mb_type & D_INTRA_IBL;
  ------------------
  |  |   72|  1.06M|#define D_INTRA_IBL 16
  ------------------
  206|  1.06M|    u4_cur_mb_fld = !!(u1_cur_mb_type & D_FLD_MB);
  ------------------
  |  |  386|  1.06M|#define D_FLD_MB          0x80
  ------------------
  207|       |    /* Compute BS function */
  208|  1.06M|    pu4_bs_table = ps_cur_mb_params->u4_bs_table;
  209|       |
  210|  1.06M|    u2_cur_csbp = ps_cur_mb_info->ps_curmb->u2_luma_csbp;
  211|  1.06M|    u2_left_csbp = ps_cur_mb_info->ps_left_mb->u2_luma_csbp;
  212|  1.06M|    u2_top_csbp = ps_cur_mb_info->ps_top_mb->u2_luma_csbp;
  213|       |
  214|       |    /* Compute BS function */
  215|  1.06M|    if((ps_dec->ps_cur_sps->u1_profile_idc == HIGH_PROFILE_IDC) ||
  ------------------
  |  |  278|  1.06M|#define HIGH_PROFILE_IDC   100
  ------------------
  |  Branch (215:8): [True: 374k, False: 693k]
  ------------------
  216|   693k|       (ps_dec->ps_cur_sps->u1_profile_idc == HIGH_PROFILE_IDC) ||
  ------------------
  |  |  278|   693k|#define HIGH_PROFILE_IDC   100
  ------------------
  |  Branch (216:8): [True: 0, False: 693k]
  ------------------
  217|   693k|       (ps_dec->ps_cur_sps->u1_profile_idc == SCALABLE_HIGH_PROFILE_IDC) ||
  ------------------
  |  |   60|   693k|#define SCALABLE_HIGH_PROFILE_IDC 86
  ------------------
  |  Branch (217:8): [True: 11.1k, False: 682k]
  ------------------
  218|   682k|       (ps_dec->ps_cur_sps->u1_profile_idc == SCALABLE_BASELINE_PROFILE_IDC))
  ------------------
  |  |   59|   682k|#define SCALABLE_BASELINE_PROFILE_IDC 83
  ------------------
  |  Branch (218:8): [True: 19.7k, False: 662k]
  ------------------
  219|   405k|    {
  220|   405k|        if(ps_cur_mb_info->u1_tran_form8x8 == 1)
  ------------------
  |  Branch (220:12): [True: 5.21k, False: 400k]
  ------------------
  221|  5.21k|        {
  222|  5.21k|            u2_cur_csbp = ih264d_update_csbp_8x8(ps_cur_mb_info->ps_curmb->u2_luma_csbp);
  223|  5.21k|            ps_cur_mb_info->ps_curmb->u2_luma_csbp = u2_cur_csbp;
  224|  5.21k|        }
  225|   405k|    }
  226|  1.06M|    u2_cur_csbp |= *pu2_curr_res_luma_csbp;
  227|  1.06M|    u2_left_csbp |= *pu2_left_res_luma_csbp;
  228|  1.06M|    u2_top_csbp |= *pu2_top_res_luma_csbp;
  229|       |
  230|  1.06M|    if(u4_cur_mb_intra)
  ------------------
  |  Branch (230:8): [True: 13.0k, False: 1.05M]
  ------------------
  231|  13.0k|    {
  232|  13.0k|        pu4_bs_table[4] = 0x04040404;
  233|  13.0k|        pu4_bs_table[0] = u4_cur_mb_fld ? 0x03030303 : 0x04040404;
  ------------------
  |  Branch (233:27): [True: 0, False: 13.0k]
  ------------------
  234|  13.0k|        pu4_bs_table[1] = 0x03030303;
  235|  13.0k|        pu4_bs_table[2] = 0x03030303;
  236|  13.0k|        pu4_bs_table[3] = 0x03030303;
  237|  13.0k|        pu4_bs_table[5] = 0x03030303;
  238|  13.0k|        pu4_bs_table[6] = 0x03030303;
  239|  13.0k|        pu4_bs_table[7] = 0x03030303;
  240|  13.0k|    }
  241|  1.05M|    else
  242|  1.05M|    {
  243|  1.05M|        isvcd_fill_bs_ibl(ps_cur_mb_params, u1_top_mb_typ, u4_leftmbtype, ps_cur_mb_info,
  244|  1.05M|                          pu2_curr_res_luma_csbp, pu2_left_res_luma_csbp, pu2_top_res_luma_csbp);
  245|       |
  246|  1.05M|        if(!u4_cur_mb_ibl)
  ------------------
  |  Branch (246:12): [True: 1.03M, False: 20.4k]
  ------------------
  247|  1.03M|        {
  248|  1.03M|            UWORD32 u4_is_non16x16 = !!(u1_cur_mb_type & D_PRED_NON_16x16);
  ------------------
  |  |  383|  1.03M|#define D_PRED_NON_16x16  2
  ------------------
  249|  1.03M|            UWORD32 u4_is_b = (ps_dec->ps_computebs_cur_slice->slice_type == B_SLICE);
  ------------------
  |  |  369|  1.03M|#define B_SLICE  1
  ------------------
  250|  1.03M|            UWORD32 u4_bs_0, u4_bs_4;
  251|       |
  252|  1.03M|            u4_bs_0 = pu4_bs_table[0];
  253|  1.03M|            u4_bs_4 = pu4_bs_table[4];
  254|       |
  255|  1.03M|            ih264d_fill_bs2_horz_vert(pu4_bs_table, u2_left_csbp, u2_top_csbp, u2_cur_csbp,
  256|  1.03M|                                      gau4_ih264d_packed_bs2, gau2_ih264d_4x4_v2h_reorder);
  257|       |
  258|  1.03M|            if(u4_leftmbtype & D_INTRA_MB)
  ------------------
  |  |  382|  1.03M|#define D_INTRA_MB        1
  ------------------
  |  Branch (258:16): [True: 1.18k, False: 1.03M]
  ------------------
  259|  1.18k|            {
  260|  1.18k|                pu4_bs_table[4] = 0x04040404;
  261|  1.18k|            }
  262|  1.03M|            else if(u4_leftmbtype & D_INTRA_IBL)
  ------------------
  |  |   72|  1.03M|#define D_INTRA_IBL 16
  ------------------
  |  Branch (262:21): [True: 2.45k, False: 1.03M]
  ------------------
  263|  2.45k|            {
  264|  2.45k|                pu4_bs_table[4] = u4_bs_4;
  265|  2.45k|            }
  266|       |
  267|  1.03M|            if(u1_top_mb_typ & D_INTRA_MB)
  ------------------
  |  |  382|  1.03M|#define D_INTRA_MB        1
  ------------------
  |  Branch (267:16): [True: 1.24k, False: 1.03M]
  ------------------
  268|  1.24k|            {
  269|  1.24k|                pu4_bs_table[0] = u4_cur_mb_fld ? 0x03030303 : 0x04040404;
  ------------------
  |  Branch (269:35): [True: 0, False: 1.24k]
  ------------------
  270|  1.24k|            }
  271|  1.03M|            else if(u1_top_mb_typ & D_INTRA_IBL)
  ------------------
  |  |   72|  1.03M|#define D_INTRA_IBL 16
  ------------------
  |  Branch (271:21): [True: 5.03k, False: 1.02M]
  ------------------
  272|  5.03k|            {
  273|  5.03k|                pu4_bs_table[0] = u4_bs_0;
  274|  5.03k|            }
  275|       |
  276|  1.03M|            ps_dec->pf_fill_bs1[u4_is_b][u4_is_non16x16](
  277|  1.03M|                ps_cur_mv_pred, ps_top_mv_pred, apv_map_ref_idx_to_poc, pu4_bs_table,
  278|  1.03M|                ps_left_mv_pred, &(ps_dec->ps_left_mvpred_addr[u1_pingpong][1]),
  279|  1.03M|                ps_cur_mb_info->ps_top_mb->u4_pic_addrress, (4 >> u4_cur_mb_fld));
  280|  1.03M|        }
  281|  1.05M|    }
  282|       |
  283|  1.06M|    {
  284|  1.06M|        void **pu4_map_ref_idx_to_poc_l1 = apv_map_ref_idx_to_poc + POC_LIST_L0_TO_L1_DIFF;
  ------------------
  |  |   86|  1.06M|#define POC_LIST_L0_TO_L1_DIFF  (( 2*MAX_FRAMES) + 1)
  |  |  ------------------
  |  |  |  |  600|  1.06M|#define MAX_FRAMES              16
  |  |  ------------------
  ------------------
  285|  1.06M|        {
  286|       |            /* Store Parameter for Top MvPred refernce frame Address */
  287|  1.06M|            void **ppv_top_mv_pred_addr = ps_cur_mb_info->ps_curmb->u4_pic_addrress;
  288|  1.06M|            WORD8 *p1_refTop0 = (ps_cur_mv_pred + 12)->i1_ref_frame;
  289|  1.06M|            WORD8 *p1_refTop1 = (ps_cur_mv_pred + 14)->i1_ref_frame;
  290|       |
  291|       |            /* Store Left addresses for Next Mb   */
  292|  1.06M|            void **ppv_left_mv_pred_addr = ps_dec->ps_left_mvpred_addr[!u1_pingpong][1].u4_add;
  293|  1.06M|            WORD8 *p1_refleft0 = (ps_cur_mv_pred + 3)->i1_ref_frame;
  294|       |
  295|  1.06M|            ppv_top_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refTop0[0]];
  296|  1.06M|            ppv_top_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refTop0[1]];
  297|       |
  298|  1.06M|            ppv_left_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]];
  299|  1.06M|            ppv_top_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]];
  300|  1.06M|            ppv_left_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]];
  301|  1.06M|            ppv_top_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]];
  302|       |
  303|  1.06M|            ppv_left_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refleft0[0]];
  304|  1.06M|            ppv_left_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refleft0[1]];
  305|       |
  306|       |            /* Storing the leftMbtype for next Mb */
  307|  1.06M|            ps_dec->deblk_left_mb[1].u1_mb_type = ps_cur_mb_params->u1_mb_type;
  308|  1.06M|        }
  309|  1.06M|    }
  310|       |
  311|       |    /* For transform 8x8 disable deblocking of the intrernal edges of a 8x8 block */
  312|  1.06M|    if(ps_cur_mb_info->u1_tran_form8x8)
  ------------------
  |  Branch (312:8): [True: 7.99k, False: 1.06M]
  ------------------
  313|  7.99k|    {
  314|  7.99k|        pu4_bs_table[1] = 0;
  315|  7.99k|        pu4_bs_table[3] = 0;
  316|  7.99k|        pu4_bs_table[5] = 0;
  317|  7.99k|        pu4_bs_table[7] = 0;
  318|  7.99k|    }
  319|  1.06M|}

isvcd_decode_recon_tfr_nmb_thread:
   76|   191k|{
   77|   191k|    WORD32 i, j;
   78|   191k|    dec_mb_info_t *ps_cur_mb_info;
   79|   191k|    dec_svc_mb_info_t *ps_svc_cur_mb_info;
   80|   191k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
   81|   191k|    const UWORD32 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
   82|   191k|    UWORD32 u1_slice_type, u1_B;
   83|   191k|    WORD32 u1_skip_th;
   84|   191k|    UWORD32 u1_ipcm_th;
   85|   191k|    UWORD32 u4_cond;
   86|   191k|    UWORD16 u2_slice_num, u2_cur_dec_mb_num;
   87|   191k|    UWORD32 u4_mb_num;
   88|   191k|    WORD32 nop_cnt = 8 * 128;
   89|   191k|    UWORD16 *pu2_res_luma_csbp;
   90|   191k|    WORD32 ret;
   91|   191k|    u1_slice_type = ps_dec->ps_decode_cur_slice->slice_type;
   92|       |
   93|   191k|    u1_B = (u1_slice_type == B_SLICE);
  ------------------
  |  |  369|   191k|#define B_SLICE  1
  ------------------
   94|   191k|    u1_skip_th = ((u1_slice_type != I_SLICE) ? (u1_B ? B_8x8 : PRED_8x8R0) : -1);
  ------------------
  |  |  370|   191k|#define I_SLICE  2
  ------------------
                  u1_skip_th = ((u1_slice_type != I_SLICE) ? (u1_B ? B_8x8 : PRED_8x8R0) : -1);
  ------------------
  |  |  480|  29.1k|#define B_8x8    22
  ------------------
                  u1_skip_th = ((u1_slice_type != I_SLICE) ? (u1_B ? B_8x8 : PRED_8x8R0) : -1);
  ------------------
  |  |  454|   149k|#define PRED_8x8R0  4
  ------------------
  |  Branch (94:19): [True: 178k, False: 13.1k]
  |  Branch (94:49): [True: 29.1k, False: 149k]
  ------------------
   95|   191k|    u1_ipcm_th = ((u1_slice_type != I_SLICE) ? (u1_B ? 23 : 5) : 0);
  ------------------
  |  |  370|   191k|#define I_SLICE  2
  ------------------
  |  Branch (95:19): [True: 178k, False: 13.1k]
  |  Branch (95:49): [True: 29.1k, False: 149k]
  ------------------
   96|   191k|    u2_cur_dec_mb_num = ps_dec->cur_dec_mb_num;
   97|       |
   98|   329k|    while(1)
  ------------------
  |  Branch (98:11): [True: 329k, Folded]
  ------------------
   99|   329k|    {
  100|   329k|        UWORD32 u4_max_mb =
  101|   329k|            (UWORD32) (ps_dec->i2_dec_thread_mb_y + (1 << u1_mbaff)) * ps_dec->u2_frm_wd_in_mbs - 1;
  102|   329k|        u4_mb_num = u2_cur_dec_mb_num;
  103|       |        /*introducing 1 MB delay*/
  104|   329k|        u4_mb_num = MIN(u4_mb_num + u4_num_mbs + 1, u4_max_mb);
  ------------------
  |  |   61|   329k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 0, False: 329k]
  |  |  ------------------
  ------------------
  105|       |
  106|   329k|        CHECK_MB_MAP_BYTE(u4_mb_num, ps_dec->pu1_dec_mb_map, u4_cond);
  ------------------
  |  |   80|   329k|#define CHECK_MB_MAP_BYTE(u4_mb_num, mb_map, u4_cond)                                               \
  |  |   81|   329k|{                                                                                                   \
  |  |   82|   329k|        volatile UWORD8 *pu1_mb_flag;                                                               \
  |  |   83|   329k|                                                                                                    \
  |  |   84|   329k|        pu1_mb_flag    = (UWORD8 *)mb_map + (u4_mb_num );                                           \
  |  |   85|   329k|                                                                                                    \
  |  |   86|   329k|        u4_cond = (*pu1_mb_flag);                                                                   \
  |  |   87|   329k|}
  ------------------
  107|   329k|        if(u4_cond)
  ------------------
  |  Branch (107:12): [True: 190k, False: 138k]
  ------------------
  108|   190k|        {
  109|   190k|            break;
  110|   190k|        }
  111|   138k|        else
  112|   138k|        {
  113|   138k|            if(nop_cnt > 0)
  ------------------
  |  Branch (113:16): [True: 120k, False: 18.3k]
  ------------------
  114|   120k|            {
  115|   120k|                nop_cnt -= 128;
  116|   120k|                NOP(128);
  ------------------
  |  |   87|  15.5M|#define NOP(nop_cnt) {UWORD32 nop_i; for (nop_i = 0; nop_i < nop_cnt; nop_i++) asm("nop");}
  |  |  ------------------
  |  |  |  Branch (87:54): [True: 15.4M, False: 120k]
  |  |  ------------------
  ------------------
  117|   120k|            }
  118|  18.3k|            else
  119|  18.3k|            {
  120|  18.3k|                if(ps_dec->u4_output_present && (2 == ps_dec->u4_num_cores) &&
  ------------------
  |  Branch (120:20): [True: 16.6k, False: 1.72k]
  |  Branch (120:49): [True: 16.6k, False: 0]
  ------------------
  121|  16.6k|                   (ps_dec->u4_fmt_conv_cur_row < ps_dec->s_disp_frame_info.u4_y_ht))
  ------------------
  |  Branch (121:20): [True: 6.23k, False: 10.4k]
  ------------------
  122|  6.23k|                {
  123|  6.23k|                    ps_dec->u4_fmt_conv_num_rows =
  124|  6.23k|                        MIN(FMT_CONV_NUM_ROWS,
  ------------------
  |  |   61|  6.23k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 5.72k, False: 511]
  |  |  ------------------
  ------------------
  125|  6.23k|                            (ps_dec->s_disp_frame_info.u4_y_ht - ps_dec->u4_fmt_conv_cur_row));
  126|  6.23k|                    ih264d_format_convert(ps_dec, &(ps_dec->s_disp_op), ps_dec->u4_fmt_conv_cur_row,
  127|  6.23k|                                          ps_dec->u4_fmt_conv_num_rows);
  128|  6.23k|                    ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows;
  129|  6.23k|                }
  130|  12.1k|                else
  131|  12.1k|                {
  132|  12.1k|                    nop_cnt = 8 * 128;
  133|  12.1k|                    ithread_yield();
  134|  12.1k|                }
  135|  18.3k|                if(1 == ps_svc_lyr_dec->u1_error_in_cur_frame)
  ------------------
  |  Branch (135:20): [True: 1.57k, False: 16.7k]
  ------------------
  136|  1.57k|                {
  137|  1.57k|                    return NOT_OK;
  ------------------
  |  |  116|  1.57k|#define NOT_OK    -1
  ------------------
  138|  1.57k|                }
  139|  18.3k|            }
  140|   138k|        }
  141|   329k|    }
  142|       |
  143|       |    /* N Mb MC Loop */
  144|   809k|    for(i = 0; i < u4_num_mbs; i++)
  ------------------
  |  Branch (144:16): [True: 626k, False: 182k]
  ------------------
  145|   626k|    {
  146|   626k|        u4_mb_num = u2_cur_dec_mb_num;
  147|   626k|        GET_SLICE_NUM_MAP(ps_dec->pu2_slice_num_map, u2_cur_dec_mb_num, u2_slice_num);
  ------------------
  |  |  156|   626k|#define GET_SLICE_NUM_MAP(slice_map, mb_number,u2_slice_num)                                                  \
  |  |  157|   626k|{                                                                                                   \
  |  |  158|   626k|        volatile UWORD16 *pu2_slice_map;                                                               \
  |  |  159|   626k|                                                                                                    \
  |  |  160|   626k|        pu2_slice_map    = (UWORD16 *)slice_map + (mb_number);                                         \
  |  |  161|   626k|        u2_slice_num = (*pu2_slice_map) ;                                                               \
  |  |  162|   626k|}
  ------------------
  148|       |
  149|   626k|        if(u2_slice_num != ps_dec->u2_cur_slice_num_dec_thread)
  ------------------
  |  Branch (149:12): [True: 7.54k, False: 618k]
  ------------------
  150|  7.54k|        {
  151|  7.54k|            ps_dec->u4_cur_slice_decode_done = 1;
  152|  7.54k|            break;
  153|  7.54k|        }
  154|       |
  155|   618k|        ps_cur_mb_info = &ps_dec->ps_frm_mb_info[u2_cur_dec_mb_num];
  156|       |
  157|   618k|        ps_dec->u4_dma_buf_idx = 0;
  158|   618k|        ps_dec->u4_pred_info_idx = 0;
  159|       |
  160|       |        /*Pointer assignment for Residual NNZ */
  161|   618k|        pu2_res_luma_csbp = ps_svc_lyr_dec->pu2_frm_res_luma_csbp + ps_cur_mb_info->u2_mbx;
  162|   618k|        pu2_res_luma_csbp += ps_cur_mb_info->u2_mby * ps_svc_lyr_dec->i4_frm_res_luma_csbp_stride;
  163|       |
  164|   618k|        if(ps_cur_mb_info->u1_mb_type <= u1_skip_th)
  ------------------
  |  Branch (164:12): [True: 151k, False: 467k]
  ------------------
  165|   151k|        {
  166|   151k|            WORD32 pred_cnt = 0;
  167|   151k|            pred_info_pkd_t *ps_pred_pkd;
  168|   151k|            UWORD32 u4_pred_info_pkd_idx;
  169|       |
  170|   151k|            u4_pred_info_pkd_idx = ps_cur_mb_info->u4_pred_info_pkd_idx;
  171|       |
  172|   437k|            while(pred_cnt < ps_cur_mb_info->u1_num_pred_parts)
  ------------------
  |  Branch (172:19): [True: 286k, False: 151k]
  ------------------
  173|   286k|            {
  174|   286k|                ps_pred_pkd = ps_dec->ps_pred_pkd + u4_pred_info_pkd_idx;
  175|       |
  176|   286k|                ps_dec->p_form_mb_part_info_thread(ps_pred_pkd, ps_dec, ps_cur_mb_info->u2_mbx,
  177|   286k|                                                   ps_cur_mb_info->u2_mby, (i >> u1_mbaff),
  178|   286k|                                                   ps_cur_mb_info);
  179|       |
  180|   286k|                u4_pred_info_pkd_idx++;
  181|   286k|                pred_cnt++;
  182|   286k|            }
  183|   151k|            ps_dec->p_mc_dec_thread(ps_dec, ps_cur_mb_info);
  184|   151k|        }
  185|   467k|        else if(ps_cur_mb_info->u1_mb_type == MB_SKIP)
  ------------------
  |  |  456|   467k|#define MB_SKIP     255
  ------------------
  |  Branch (185:17): [True: 405k, False: 62.4k]
  ------------------
  186|   405k|        {
  187|   405k|            WORD32 pred_cnt = 0;
  188|   405k|            pred_info_pkd_t *ps_pred_pkd;
  189|   405k|            UWORD32 u4_pred_info_pkd_idx;
  190|       |
  191|   405k|            u4_pred_info_pkd_idx = ps_cur_mb_info->u4_pred_info_pkd_idx;
  192|       |
  193|   849k|            while(pred_cnt < ps_cur_mb_info->u1_num_pred_parts)
  ------------------
  |  Branch (193:19): [True: 444k, False: 405k]
  ------------------
  194|   444k|            {
  195|   444k|                ps_pred_pkd = ps_dec->ps_pred_pkd + u4_pred_info_pkd_idx;
  196|       |
  197|   444k|                ps_dec->p_form_mb_part_info_thread(ps_pred_pkd, ps_dec, ps_cur_mb_info->u2_mbx,
  198|   444k|                                                   ps_cur_mb_info->u2_mby, (i >> u1_mbaff),
  199|   444k|                                                   ps_cur_mb_info);
  200|       |
  201|   444k|                u4_pred_info_pkd_idx++;
  202|   444k|                pred_cnt++;
  203|   444k|            }
  204|       |            /* Decode MB skip */
  205|   405k|            ps_dec->p_mc_dec_thread(ps_dec, ps_cur_mb_info);
  206|       |
  207|   405k|            *pu2_res_luma_csbp = 0;
  208|   405k|            ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb =
  209|   405k|                ps_svc_lyr_dec->ps_inter_lyr_mb_prms_frm_start + ps_cur_mb_info->u2_mbx +
  210|   405k|                (ps_svc_lyr_dec->u2_inter_lyr_mb_prms_stride * (ps_cur_mb_info->u2_mby));
  211|   405k|            ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->i1_mb_mode = SVC_INTER_MB;
  ------------------
  |  |  114|   405k|#define SVC_INTER_MB (1 << 0)       /*!< Intra MBs other than IPCM and I_BL */
  ------------------
  212|   405k|            ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->i1_tx_size =
  213|   405k|                ps_cur_mb_info->u1_tran_form8x8;
  214|   405k|            ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->u2_luma_nnz = 0;
  215|   405k|            ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->u1_chroma_nnz = 0;
  216|   405k|        }
  217|       |
  218|   618k|        u2_cur_dec_mb_num++;
  219|   618k|    }
  220|       |
  221|       |    /* N Mb IQ IT RECON  Loop */
  222|   809k|    for(j = 0; j < i; j++)
  ------------------
  |  Branch (222:16): [True: 618k, False: 190k]
  ------------------
  223|   618k|    {
  224|   618k|        ps_cur_mb_info = &ps_dec->ps_frm_mb_info[ps_dec->cur_dec_mb_num];
  225|   618k|        ps_svc_cur_mb_info = &ps_svc_lyr_dec->ps_svc_frm_mb_info[ps_dec->cur_dec_mb_num];
  226|       |
  227|   618k|        if(NULL == ps_cur_mb_info->ps_curmb)
  ------------------
  |  Branch (227:12): [True: 0, False: 618k]
  ------------------
  228|      0|        {
  229|      0|            return NOT_OK;
  ------------------
  |  |  116|      0|#define NOT_OK    -1
  ------------------
  230|      0|        }
  231|       |
  232|       |        /*Pointer assignment for Residual NNZ */
  233|   618k|        pu2_res_luma_csbp = ps_svc_lyr_dec->pu2_frm_res_luma_csbp + ps_cur_mb_info->u2_mbx;
  234|   618k|        pu2_res_luma_csbp += ps_cur_mb_info->u2_mby * ps_svc_lyr_dec->i4_frm_res_luma_csbp_stride;
  235|       |
  236|   618k|        ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb =
  237|   618k|            ps_svc_lyr_dec->ps_inter_lyr_mb_prms_frm_start + ps_cur_mb_info->u2_mbx +
  238|   618k|            (ps_svc_lyr_dec->u2_inter_lyr_mb_prms_stride * (ps_cur_mb_info->u2_mby));
  239|       |
  240|   618k|        ps_svc_lyr_dec->ps_inter_lyr_mb_prms_cur_mb->i1_slice_id = (WORD8) ps_dec->u2_cur_slice_num;
  241|       |
  242|   618k|        if((ps_dec->u4_num_cores == 2) || !ps_dec->i1_recon_in_thread3_flag)
  ------------------
  |  Branch (242:12): [True: 618k, False: 0]
  |  Branch (242:43): [True: 0, False: 0]
  ------------------
  243|   618k|        {
  244|   618k|            if(ps_cur_mb_info->u1_mb_type <= u1_skip_th)
  ------------------
  |  Branch (244:16): [True: 151k, False: 467k]
  ------------------
  245|   151k|            {
  246|   151k|                {
  247|       |                    /* inter intra pred generation */
  248|   151k|                    if(SVCD_FALSE == ps_svc_lyr_dec->u1_dyadic_flag)
  ------------------
  |  |   45|   151k|#define SVCD_FALSE 0
  ------------------
  |  Branch (248:24): [True: 101k, False: 49.6k]
  ------------------
  249|   101k|                    {
  250|   101k|                        ret = isvcd_process_ii_mb(ps_svc_lyr_dec, ps_cur_mb_info,
  251|   101k|                                                  ps_svc_cur_mb_info, j);
  252|   101k|                        if(ret != OK) return ret;
  ------------------
  |  |  114|   101k|#define OK        0
  ------------------
  |  Branch (252:28): [True: 0, False: 101k]
  ------------------
  253|   101k|                    }
  254|   151k|                    if(0 == ps_svc_cur_mb_info->u1_residual_prediction_flag)
  ------------------
  |  Branch (254:24): [True: 35.3k, False: 116k]
  ------------------
  255|  35.3k|                    {
  256|       |                        /* IT + Recon */
  257|  35.3k|                        ih264d_process_inter_mb(ps_dec, ps_cur_mb_info, j);
  258|  35.3k|                        isvcd_update_inter_mb_inter_layer_info(ps_svc_lyr_dec, ps_cur_mb_info, 0);
  259|  35.3k|                        *pu2_res_luma_csbp = ps_cur_mb_info->u2_luma_csbp;
  260|  35.3k|                    }
  261|   116k|                    else
  262|   116k|                    {
  263|       |                        /* IT + Residual + Recon */
  264|   116k|                        ret = isvcd_process_inter_mb_rsd_pred_target_lyr(
  265|   116k|                            ps_svc_lyr_dec, ps_cur_mb_info, j, 0, pu2_res_luma_csbp);
  266|   116k|                        if(ret != OK) return ret;
  ------------------
  |  |  114|   116k|#define OK        0
  ------------------
  |  Branch (266:28): [True: 0, False: 116k]
  ------------------
  267|   116k|                    }
  268|   151k|                }
  269|   151k|            }
  270|       |
  271|   467k|            else if((ps_cur_mb_info->u1_mb_type != MB_SKIP) &&
  ------------------
  |  |  456|   467k|#define MB_SKIP     255
  ------------------
  |  Branch (271:21): [True: 62.4k, False: 405k]
  ------------------
  272|  62.4k|                    (ps_cur_mb_info->u1_mb_type != MB_INFER))
  ------------------
  |  |  112|  62.4k|#define MB_INFER 250
  ------------------
  |  Branch (272:21): [True: 12.7k, False: 49.6k]
  ------------------
  273|  12.7k|            {
  274|  12.7k|                if((u1_ipcm_th + 25) != ps_cur_mb_info->u1_mb_type)
  ------------------
  |  Branch (274:20): [True: 12.6k, False: 145]
  ------------------
  275|  12.6k|                {
  276|  12.6k|                    ps_cur_mb_info->u1_mb_type -= (u1_skip_th + 1);
  277|  12.6k|                    ih264d_process_intra_mb(ps_dec, ps_cur_mb_info, j);
  278|  12.6k|                    isvcd_update_intra_mb_inter_layer_info(ps_svc_lyr_dec, ps_cur_mb_info);
  279|  12.6k|                }
  280|    145|                else
  281|    145|                {
  282|    145|                    isvcd_update_ipcm_mb_inter_layer_info(ps_svc_lyr_dec, ps_cur_mb_info);
  283|    145|                }
  284|  12.7k|                *pu2_res_luma_csbp = 0;
  285|  12.7k|            }
  286|   454k|            else if(ps_cur_mb_info->u1_mb_type == MB_INFER)
  ------------------
  |  |  112|   454k|#define MB_INFER 250
  ------------------
  |  Branch (286:21): [True: 49.6k, False: 405k]
  ------------------
  287|  49.6k|            {
  288|       |                /* inter layer intra prediction : intra upsample, IQ, IT ,deblock */
  289|  49.6k|                {
  290|       |                    /* Intra resample for IBL mode */
  291|  49.6k|                    ret = isvcd_process_ibl_mb(ps_svc_lyr_dec, ps_cur_mb_info, j, 0);
  292|  49.6k|                    if(ret != OK) return ret;
  ------------------
  |  |  114|  49.6k|#define OK        0
  ------------------
  |  Branch (292:24): [True: 0, False: 49.6k]
  ------------------
  293|       |                    /* Pass intra resample as pred to Recon generation */
  294|  49.6k|                    ih264d_process_inter_mb(ps_dec, ps_cur_mb_info, j);
  295|  49.6k|                    isvcd_update_inter_mb_inter_layer_info(ps_svc_lyr_dec, ps_cur_mb_info, 1);
  296|  49.6k|                    *pu2_res_luma_csbp = ps_cur_mb_info->u2_luma_csbp;
  297|  49.6k|                }
  298|  49.6k|                ps_dec->pi1_left_pred_mode[0] = DC;
  ------------------
  |  |  431|  49.6k|#define DC      2
  ------------------
  299|  49.6k|                ps_dec->pi1_left_pred_mode[1] = DC;
  ------------------
  |  |  431|  49.6k|#define DC      2
  ------------------
  300|  49.6k|                ps_dec->pi1_left_pred_mode[2] = DC;
  ------------------
  |  |  431|  49.6k|#define DC      2
  ------------------
  301|  49.6k|                ps_dec->pi1_left_pred_mode[3] = DC;
  ------------------
  |  |  431|  49.6k|#define DC      2
  ------------------
  302|       |
  303|  49.6k|                ps_cur_mb_info->ps_curmb->pi1_intrapredmodes[0] = DC;
  ------------------
  |  |  431|  49.6k|#define DC      2
  ------------------
  304|  49.6k|                ps_cur_mb_info->ps_curmb->pi1_intrapredmodes[1] = DC;
  ------------------
  |  |  431|  49.6k|#define DC      2
  ------------------
  305|  49.6k|                ps_cur_mb_info->ps_curmb->pi1_intrapredmodes[2] = DC;
  ------------------
  |  |  431|  49.6k|#define DC      2
  ------------------
  306|  49.6k|                ps_cur_mb_info->ps_curmb->pi1_intrapredmodes[3] = DC;
  ------------------
  |  |  431|  49.6k|#define DC      2
  ------------------
  307|       |
  308|  49.6k|                isvcd_update_ibl_mb_inter_layer_info(ps_svc_lyr_dec, ps_cur_mb_info);
  309|  49.6k|            }
  310|       |
  311|   618k|            if(ps_dec->u4_use_intrapred_line_copy == 1)
  ------------------
  |  Branch (311:16): [True: 618k, False: 0]
  ------------------
  312|   618k|                ih264d_copy_intra_pred_line(ps_dec, ps_cur_mb_info, j);
  313|   618k|        }
  314|       |
  315|   618k|        DATA_SYNC();
  ------------------
  |  |  116|   618k|#define DATA_SYNC()  __sync_synchronize()
  ------------------
  316|       |
  317|   618k|        u4_mb_num = ps_cur_mb_info->u2_mbx + ps_dec->u2_frm_wd_in_mbs * ps_cur_mb_info->u2_mby;
  318|   618k|        UPDATE_MB_MAP_MBNUM_BYTE(ps_dec->pu1_recon_mb_map, u4_mb_num);
  ------------------
  |  |  136|   618k|#define UPDATE_MB_MAP_MBNUM_BYTE(mb_map, u4_mb_number)                                                  \
  |  |  137|   618k|{                                                                                                   \
  |  |  138|   618k|        volatile UWORD8 *pu1_mb_flag;                                                                       \
  |  |  139|   618k|                                                                                                    \
  |  |  140|   618k|        pu1_mb_flag    = (UWORD8 *)mb_map + (u4_mb_number);                                                     \
  |  |  141|   618k|        /*                                                                                          \
  |  |  142|   618k|         * In case of MbAff, update the mb_map only if the entire MB is done. We can check that     \
  |  |  143|   618k|         * by checking if Y is odd, implying that this is the second row in the MbAff MB            \
  |  |  144|   618k|         */                                                                                         \
  |  |  145|   618k|        (*pu1_mb_flag) = 1;                                                             \
  |  |  146|   618k|}
  ------------------
  319|   618k|        ps_dec->cur_dec_mb_num++;
  320|   618k|    }
  321|       |
  322|       |    /*N MB deblocking*/
  323|   190k|    if(ps_dec->u4_nmb_deblk == 1)
  ------------------
  |  Branch (323:8): [True: 0, False: 190k]
  ------------------
  324|      0|    {
  325|      0|        UWORD32 u4_wd_y, u4_wd_uv;
  326|      0|        tfr_ctxt_t *ps_tfr_cxt = &(ps_dec->s_tran_addrecon);
  327|      0|        UWORD8 u1_field_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag;
  328|      0|        const WORD32 i4_cb_qp_idx_ofst = ps_dec->ps_cur_pps->i1_chroma_qp_index_offset;
  329|      0|        const WORD32 i4_cr_qp_idx_ofst = ps_dec->ps_cur_pps->i1_second_chroma_qp_index_offset;
  330|       |
  331|      0|        u4_wd_y = ps_dec->u2_frm_wd_y << u1_field_pic_flag;
  332|      0|        u4_wd_uv = ps_dec->u2_frm_wd_uv << u1_field_pic_flag;
  333|       |
  334|      0|        ps_cur_mb_info = &ps_dec->ps_frm_mb_info[ps_dec->u4_cur_deblk_mb_num];
  335|       |
  336|      0|        ps_dec->u4_deblk_mb_x = ps_cur_mb_info->u2_mbx;
  337|      0|        ps_dec->u4_deblk_mb_y = ps_cur_mb_info->u2_mby;
  338|       |
  339|      0|        for(j = 0; j < i; j++)
  ------------------
  |  Branch (339:20): [True: 0, False: 0]
  ------------------
  340|      0|        {
  341|      0|            ih264d_deblock_mb_nonmbaff(ps_dec, ps_tfr_cxt, i4_cb_qp_idx_ofst, i4_cr_qp_idx_ofst,
  342|      0|                                       u4_wd_y, u4_wd_uv);
  343|      0|        }
  344|      0|    }
  345|       |
  346|       |    /*handle the last mb in picture case*/
  347|   190k|    if(ps_dec->cur_dec_mb_num > ps_dec->ps_cur_sps->u4_max_mb_addr)
  ------------------
  |  Branch (347:8): [True: 19.0k, False: 171k]
  ------------------
  348|  19.0k|        ps_dec->u4_cur_slice_decode_done = 1;
  349|       |
  350|   190k|    if(i != u4_num_mbs)
  ------------------
  |  Branch (350:8): [True: 7.54k, False: 182k]
  ------------------
  351|  7.54k|    {
  352|  7.54k|        u4_end_of_row = 0;
  353|       |        /*Number of MB's left in row*/
  354|  7.54k|        u4_num_mbs_next = u4_num_mbs_next + ((u4_num_mbs - i) >> u1_mbaff);
  355|  7.54k|    }
  356|       |
  357|   190k|    ih264d_decode_tfr_nmb(ps_dec, (i), u4_num_mbs_next, u4_end_of_row);
  358|       |
  359|   190k|    return OK;
  ------------------
  |  |  114|   190k|#define OK        0
  ------------------
  360|   190k|}
isvcd_decode_slice_thread:
  381|  28.9k|{
  382|  28.9k|    UWORD32 u4_num_mbs_next, u4_num_mbsleft, u4_end_of_row = 0;
  383|  28.9k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
  384|  28.9k|    const UWORD32 i2_pic_wdin_mbs = ps_dec->u2_frm_wd_in_mbs;
  385|  28.9k|    UWORD32 u4_mbaff, u4_num_mbs;
  386|       |
  387|  28.9k|    UWORD16 u2_first_mb_in_slice;
  388|  28.9k|    UWORD16 i16_mb_x, i16_mb_y;
  389|  28.9k|    UWORD8 u1_field_pic;
  390|  28.9k|    UWORD32 u4_frame_stride, x_offset, y_offset;
  391|  28.9k|    WORD32 ret;
  392|  28.9k|    tfr_ctxt_t *ps_trns_addr;
  393|       |
  394|       |    /*check for mb map of first mb in slice to ensure slice header is parsed*/
  395|   173k|    while(1)
  ------------------
  |  Branch (395:11): [True: 173k, Folded]
  ------------------
  396|   173k|    {
  397|   173k|        UWORD32 u4_mb_num = ps_dec->cur_dec_mb_num;
  398|   173k|        UWORD32 u4_cond = 0;
  399|   173k|        WORD32 nop_cnt = 8 * 128;
  400|   173k|        CHECK_MB_MAP_BYTE(u4_mb_num, ps_dec->pu1_dec_mb_map, u4_cond);
  ------------------
  |  |   80|   173k|#define CHECK_MB_MAP_BYTE(u4_mb_num, mb_map, u4_cond)                                               \
  |  |   81|   173k|{                                                                                                   \
  |  |   82|   173k|        volatile UWORD8 *pu1_mb_flag;                                                               \
  |  |   83|   173k|                                                                                                    \
  |  |   84|   173k|        pu1_mb_flag    = (UWORD8 *)mb_map + (u4_mb_num );                                           \
  |  |   85|   173k|                                                                                                    \
  |  |   86|   173k|        u4_cond = (*pu1_mb_flag);                                                                   \
  |  |   87|   173k|}
  ------------------
  401|   173k|        if(u4_cond)
  ------------------
  |  Branch (401:12): [True: 28.1k, False: 145k]
  ------------------
  402|  28.1k|        {
  403|  28.1k|            break;
  404|  28.1k|        }
  405|   145k|        else
  406|   145k|        {
  407|   145k|            if(nop_cnt > 0)
  ------------------
  |  Branch (407:16): [True: 145k, False: 0]
  ------------------
  408|   145k|            {
  409|   145k|                nop_cnt -= 128;
  410|   145k|                NOP(128);
  ------------------
  |  |   87|  18.7M|#define NOP(nop_cnt) {UWORD32 nop_i; for (nop_i = 0; nop_i < nop_cnt; nop_i++) asm("nop");}
  |  |  ------------------
  |  |  |  Branch (87:54): [True: 18.6M, False: 145k]
  |  |  ------------------
  ------------------
  411|   145k|            }
  412|      0|            else if(ps_dec->u4_output_present && (2 == ps_dec->u4_num_cores) &&
  ------------------
  |  Branch (412:21): [True: 0, False: 0]
  |  Branch (412:50): [True: 0, False: 0]
  ------------------
  413|      0|                    (ps_dec->u4_fmt_conv_cur_row < ps_dec->s_disp_frame_info.u4_y_ht))
  ------------------
  |  Branch (413:21): [True: 0, False: 0]
  ------------------
  414|      0|            {
  415|      0|                ps_dec->u4_fmt_conv_num_rows =
  416|      0|                    MIN(FMT_CONV_NUM_ROWS,
  ------------------
  |  |   61|      0|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  417|      0|                        (ps_dec->s_disp_frame_info.u4_y_ht - ps_dec->u4_fmt_conv_cur_row));
  418|      0|                ih264d_format_convert(ps_dec, &(ps_dec->s_disp_op), ps_dec->u4_fmt_conv_cur_row,
  419|      0|                                      ps_dec->u4_fmt_conv_num_rows);
  420|      0|                ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows;
  421|      0|            }
  422|      0|            else
  423|      0|            {
  424|      0|                nop_cnt = 8 * 128;
  425|      0|                ithread_yield();
  426|      0|            }
  427|   145k|            if(1 == ps_svc_lyr_dec->u1_error_in_cur_frame)
  ------------------
  |  Branch (427:16): [True: 765, False: 144k]
  ------------------
  428|    765|            {
  429|    765|                return NOT_OK;
  ------------------
  |  |  116|    765|#define NOT_OK    -1
  ------------------
  430|    765|            }
  431|   144k|            DEBUG_THREADS_PRINTF(
  432|   144k|                "waiting for mb mapcur_dec_mb_num = %d,ps_dec->u2_cur_mb_addr  = "
  433|   144k|                "%d\n",
  434|   144k|                u2_cur_dec_mb_num, ps_dec->u2_cur_mb_addr);
  435|   144k|        }
  436|   173k|    }
  437|       |
  438|  28.1k|    u4_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
  439|  28.1k|    u2_first_mb_in_slice = ps_dec->ps_decode_cur_slice->u4_first_mb_in_slice;
  440|  28.1k|    i16_mb_x = MOD(u2_first_mb_in_slice, i2_pic_wdin_mbs);
  ------------------
  |  |   64|  28.1k|#define MOD(x,y) ((x)%(y))
  ------------------
  441|  28.1k|    i16_mb_y = DIV(u2_first_mb_in_slice, i2_pic_wdin_mbs);
  ------------------
  |  |   65|  28.1k|#define DIV(x,y) ((x)/(y))
  ------------------
  442|  28.1k|    i16_mb_y <<= u4_mbaff;
  443|  28.1k|    ps_dec->i2_dec_thread_mb_y = i16_mb_y;
  444|  28.1k|    ps_dec->cur_dec_mb_num = u2_first_mb_in_slice << u4_mbaff;
  445|       |
  446|  28.1k|    if((ps_dec->u4_num_cores == 2) || !ps_dec->i1_recon_in_thread3_flag)
  ------------------
  |  Branch (446:8): [True: 28.1k, False: 0]
  |  Branch (446:39): [True: 0, False: 0]
  ------------------
  447|  28.1k|    {
  448|  28.1k|        ps_dec->pv_proc_tu_coeff_data =
  449|  28.1k|            (void *) ps_dec->ps_decode_cur_slice->pv_tu_coeff_data_start;
  450|  28.1k|    }
  451|       |
  452|       |    // recalculate recon pointers
  453|  28.1k|    u1_field_pic = ps_dec->ps_cur_slice->u1_field_pic_flag;
  454|  28.1k|    u4_frame_stride = ps_dec->u2_frm_wd_y << u1_field_pic;
  455|  28.1k|    x_offset = i16_mb_x << 4;
  456|  28.1k|    y_offset = (i16_mb_y * u4_frame_stride) << 4;
  457|       |
  458|  28.1k|    ps_trns_addr = &(ps_dec->s_tran_addrecon);
  459|       |
  460|  28.1k|    ps_trns_addr->pu1_dest_y = ps_dec->s_cur_pic.pu1_buf1 + x_offset + y_offset;
  461|       |
  462|  28.1k|    u4_frame_stride = ps_dec->u2_frm_wd_uv << u1_field_pic;
  463|  28.1k|    x_offset >>= 1;
  464|  28.1k|    y_offset = (i16_mb_y * u4_frame_stride) << 3;
  465|  28.1k|    x_offset *= YUV420SP_FACTOR;
  ------------------
  |  |  119|  28.1k|#define YUV420SP_FACTOR 2
  ------------------
  466|       |
  467|  28.1k|    ps_trns_addr->pu1_dest_u = ps_dec->s_cur_pic.pu1_buf2 + x_offset + y_offset;
  468|  28.1k|    ps_trns_addr->pu1_dest_v = ps_dec->s_cur_pic.pu1_buf3 + x_offset + y_offset;
  469|       |
  470|  28.1k|    ps_trns_addr->pu1_mb_y = ps_trns_addr->pu1_dest_y;
  471|  28.1k|    ps_trns_addr->pu1_mb_u = ps_trns_addr->pu1_dest_u;
  472|  28.1k|    ps_trns_addr->pu1_mb_v = ps_trns_addr->pu1_dest_v;
  473|       |
  474|       |    /* Initialise MC and formMbPartInfo fn ptrs one time based on profile_idc */
  475|  28.1k|    {
  476|  28.1k|        ps_dec->p_mc_dec_thread = ih264d_motion_compensate_bp;
  477|  28.1k|        ps_dec->p_form_mb_part_info_thread = ih264d_form_mb_part_info_bp;
  478|  28.1k|    }
  479|  28.1k|    {
  480|  28.1k|        UWORD8 uc_nofield_nombaff;
  481|  28.1k|        uc_nofield_nombaff = ((ps_dec->ps_cur_slice->u1_field_pic_flag == 0) &&
  ------------------
  |  Branch (481:31): [True: 28.1k, False: 0]
  ------------------
  482|  28.1k|                              (ps_dec->ps_cur_slice->u1_mbaff_frame_flag == 0) &&
  ------------------
  |  Branch (482:31): [True: 28.1k, False: 0]
  ------------------
  483|  28.1k|                              (ps_dec->ps_decode_cur_slice->slice_type != B_SLICE) &&
  ------------------
  |  |  369|  28.1k|#define B_SLICE  1
  ------------------
  |  Branch (483:31): [True: 23.8k, False: 4.25k]
  ------------------
  484|  23.8k|                              (ps_dec->ps_cur_pps->u1_wted_pred_flag == 0));
  ------------------
  |  Branch (484:31): [True: 20.2k, False: 3.66k]
  ------------------
  485|       |
  486|  28.1k|        if(uc_nofield_nombaff == 0)
  ------------------
  |  Branch (486:12): [True: 7.92k, False: 20.2k]
  ------------------
  487|  7.92k|        {
  488|  7.92k|            ps_dec->p_mc_dec_thread = ih264d_motion_compensate_mp;
  489|  7.92k|            ps_dec->p_form_mb_part_info_thread = ih264d_form_mb_part_info_mp;
  490|  7.92k|        }
  491|  28.1k|    }
  492|       |
  493|  28.1k|    ps_dec->u4_cur_slice_decode_done = 0;
  494|       |
  495|   218k|    while(ps_dec->u4_cur_slice_decode_done != 1)
  ------------------
  |  Branch (495:11): [True: 191k, False: 26.5k]
  ------------------
  496|   191k|    {
  497|   191k|        u4_num_mbsleft = ((i2_pic_wdin_mbs - i16_mb_x) << u4_mbaff);
  498|       |
  499|   191k|        if(u4_num_mbsleft <= ps_dec->u4_recon_mb_grp)
  ------------------
  |  Branch (499:12): [True: 191k, False: 0]
  ------------------
  500|   191k|        {
  501|   191k|            u4_num_mbs = u4_num_mbsleft;
  502|       |
  503|       |            /*Indicate number of mb's left in a row*/
  504|   191k|            u4_num_mbs_next = 0;
  505|   191k|            u4_end_of_row = 1;
  506|   191k|            i16_mb_x = 0;
  507|   191k|        }
  508|      0|        else
  509|      0|        {
  510|      0|            u4_num_mbs = ps_dec->u4_recon_mb_grp;
  511|       |
  512|       |            /*Indicate number of mb's left in a row*/
  513|      0|            u4_num_mbs_next = i2_pic_wdin_mbs - i16_mb_x - (ps_dec->u4_recon_mb_grp >> u4_mbaff);
  514|      0|            i16_mb_x += (u4_num_mbs >> u4_mbaff);
  515|      0|            u4_end_of_row = 0;
  516|      0|        }
  517|   191k|        if(ps_svc_lyr_dec->u1_layer_identifier == TARGET_LAYER)
  ------------------
  |  |  110|   191k|#define TARGET_LAYER 2
  ------------------
  |  Branch (517:12): [True: 191k, False: 0]
  ------------------
  518|   191k|        {
  519|   191k|            ret = isvcd_decode_recon_tfr_nmb_thread(ps_svc_lyr_dec, u4_num_mbs, u4_num_mbs_next,
  520|   191k|                                                    u4_end_of_row);
  521|   191k|        }
  522|   191k|        if(ret != OK) return ret;
  ------------------
  |  |  114|   191k|#define OK        0
  ------------------
  |  Branch (522:12): [True: 1.57k, False: 190k]
  ------------------
  523|   191k|    }
  524|  26.5k|    return OK;
  ------------------
  |  |  114|  26.5k|#define OK        0
  ------------------
  525|  28.1k|}
isvcd_decode_picture_thread:
  546|  21.3k|{
  547|  21.3k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
  548|  21.3k|    WORD32 ret;
  549|  21.3k|    ithread_set_name("isvcd_decode_picture_thread");
  550|  21.3k|    while(1)
  ------------------
  |  Branch (550:11): [True: 21.3k, Folded]
  ------------------
  551|  21.3k|    {
  552|       |#ifdef KEEP_THREADS_ACTIVE
  553|       |        ret = ithread_mutex_lock(ps_dec->apv_proc_start_mutex[0]);
  554|       |        if(OK != ret) break;
  555|       |
  556|       |        while(ps_dec->ai4_process_start[0] != PROC_START)
  557|       |        {
  558|       |            ithread_cond_wait(ps_dec->apv_proc_start_condition[0], ps_dec->apv_proc_start_mutex[0]);
  559|       |        }
  560|       |        ps_dec->ai4_process_start[0] = PROC_IN_PROGRESS;
  561|       |
  562|       |        ret = ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[0]);
  563|       |        if(OK != ret || ps_dec->i4_break_threads == 1) break;
  564|       |#endif
  565|  28.9k|        while(1)
  ------------------
  |  Branch (565:15): [True: 28.9k, Folded]
  ------------------
  566|  28.9k|        {
  567|       |            /*Complete all writes before processing next slice*/
  568|       |
  569|  28.9k|            DEBUG_THREADS_PRINTF(" Entering decode slice svc ext\n");
  570|       |
  571|  28.9k|            ret = isvcd_decode_slice_thread(ps_svc_lyr_dec);
  572|  28.9k|            if(OK != ret) break;
  ------------------
  |  |  114|  28.9k|#define OK        0
  ------------------
  |  Branch (572:16): [True: 2.34k, False: 26.5k]
  ------------------
  573|  26.5k|            DEBUG_THREADS_PRINTF(" Exit  isvcd_decode_slice_thread\n");
  574|       |
  575|  26.5k|            if(ps_dec->cur_dec_mb_num > ps_dec->ps_cur_sps->u4_max_mb_addr)
  ------------------
  |  Branch (575:16): [True: 19.0k, False: 7.54k]
  ------------------
  576|  19.0k|            {
  577|       |                /*Last slice in frame*/
  578|  19.0k|                break;
  579|  19.0k|            }
  580|  7.54k|            else
  581|  7.54k|            {
  582|  7.54k|                ps_dec->ps_decode_cur_slice++;
  583|  7.54k|                ps_dec->u2_cur_slice_num_dec_thread++;
  584|  7.54k|            }
  585|  26.5k|        }
  586|  21.3k|        if(ps_dec->u4_output_present && (2 == ps_dec->u4_num_cores) &&
  ------------------
  |  Branch (586:12): [True: 8.96k, False: 12.3k]
  |  Branch (586:41): [True: 8.96k, False: 0]
  ------------------
  587|  8.96k|           (ps_dec->u4_fmt_conv_cur_row < ps_dec->s_disp_frame_info.u4_y_ht))
  ------------------
  |  Branch (587:12): [True: 8.45k, False: 511]
  ------------------
  588|  8.45k|        {
  589|  8.45k|            ps_dec->u4_fmt_conv_num_rows =
  590|  8.45k|                (ps_dec->s_disp_frame_info.u4_y_ht - ps_dec->u4_fmt_conv_cur_row);
  591|  8.45k|            ih264d_format_convert(ps_dec, &(ps_dec->s_disp_op), ps_dec->u4_fmt_conv_cur_row,
  592|  8.45k|                                  ps_dec->u4_fmt_conv_num_rows);
  593|  8.45k|            ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows;
  594|  8.45k|        }
  595|       |#ifdef KEEP_THREADS_ACTIVE
  596|       |        ret = ithread_mutex_lock(ps_dec->apv_proc_done_mutex[0]);
  597|       |        if(OK != ret) break;
  598|       |
  599|       |        ps_dec->ai4_process_done[0] = PROC_DONE;
  600|       |        ithread_cond_signal(ps_dec->apv_proc_done_condition[0]);
  601|       |
  602|       |        ret = ithread_mutex_unlock(ps_dec->apv_proc_done_mutex[0]);
  603|       |        if(OK != ret) break;
  604|       |#else
  605|  21.3k|        break;
  606|  21.3k|#endif
  607|  21.3k|    }
  608|  21.3k|}

isvcd_free_dynamic_bufs:
   84|   151k|{
   85|   151k|    WORD32 i;
   86|   151k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
   87|       |    /* Free any avc dynamic buffers that are allocated */
   88|   151k|    ih264d_free_dynamic_bufs(ps_dec);
   89|   151k|    PS_DEC_ALIGNED_FREE(ps_dec, ps_svc_lyr_dec->pu1_crop_wnd_flag);
  ------------------
  |  |   43|   151k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|   151k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 24.4k, False: 126k]
  |  |  ------------------
  ------------------
   90|   151k|    PS_DEC_ALIGNED_FREE(ps_dec, ps_svc_lyr_dec->ps_inter_lyr_mb_prms_base);
  ------------------
  |  |   43|   151k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|   151k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 24.4k, False: 126k]
  |  |  ------------------
  ------------------
   91|   151k|    PS_DEC_ALIGNED_FREE(ps_dec, ps_svc_lyr_dec->ps_il_pred_mv_bank_buf_base);
  ------------------
  |  |   43|   151k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|   151k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 24.4k, False: 126k]
  |  |  ------------------
  ------------------
   92|   151k|    PS_DEC_ALIGNED_FREE(ps_dec, ps_svc_lyr_dec->pi2_il_residual_resample_luma_base);
  ------------------
  |  |   43|   151k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|   151k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 24.4k, False: 126k]
  |  |  ------------------
  ------------------
   93|   151k|    PS_DEC_ALIGNED_FREE(ps_dec, ps_svc_lyr_dec->pi2_il_residual_resample_chroma_base);
  ------------------
  |  |   43|   151k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|   151k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 24.4k, False: 126k]
  |  |  ------------------
  ------------------
   94|   151k|    PS_DEC_ALIGNED_FREE(ps_dec, ps_svc_lyr_dec->ps_svc_frm_mb_info);
  ------------------
  |  |   43|   151k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|   151k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 24.4k, False: 126k]
  |  |  ------------------
  ------------------
   95|   151k|    PS_DEC_ALIGNED_FREE(ps_dec, ps_svc_lyr_dec->pu2_frm_res_luma_csbp);
  ------------------
  |  |   43|   151k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|   151k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 24.4k, False: 126k]
  |  |  ------------------
  ------------------
   96|   151k|    PS_DEC_ALIGNED_FREE(ps_dec, ps_svc_lyr_dec->pu1_svc_base_mode_flag);
  ------------------
  |  |   43|   151k|#define PS_DEC_ALIGNED_FREE(ps_dec, y) \
  |  |   44|   151k|if(y) {ps_dec->pf_aligned_free(ps_dec->pv_mem_ctxt, ((void *)y)); (y) = NULL;}
  |  |  ------------------
  |  |  |  Branch (44:4): [True: 24.4k, False: 126k]
  |  |  ------------------
  ------------------
   97|       |
   98|   151k|    memset(ps_dec->ps_pic_buf_base, 0, sizeof(struct pic_buffer_t) * (H264_MAX_REF_PICS * 2));
  ------------------
  |  |  534|   151k|#define H264_MAX_REF_PICS         16
  ------------------
   99|  9.83M|    for(i = 0; i < MAX_DISP_BUFS_NEW; i++)
  ------------------
  |  |   76|  9.83M|#define MAX_DISP_BUFS_NEW 64
  ------------------
  |  Branch (99:16): [True: 9.68M, False: 151k]
  ------------------
  100|  9.68M|    {
  101|       |        ps_dec->apv_buf_id_pic_buf_map[i] = NULL;
  102|  9.68M|    }
  103|   151k|    return 0;
  104|   151k|}
isvcd_allocate_dynamic_bufs:
  122|  24.4k|{
  123|  24.4k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
  124|  24.4k|    WORD16 i16_status = 0;
  125|  24.4k|    UWORD8 uc_frmOrFld = (1 - ps_dec->ps_cur_sps->u1_frame_mbs_only_flag);
  126|  24.4k|    dec_seq_params_t *ps_sps = ps_dec->ps_cur_sps;
  127|  24.4k|    UWORD32 u4_total_mbs = ps_sps->u4_total_num_of_mbs << uc_frmOrFld;
  128|  24.4k|    WORD32 size;
  129|  24.4k|    void *pv_buf;
  130|  24.4k|    void *pv_mem_ctxt = ps_dec->pv_mem_ctxt;
  131|  24.4k|    size = u4_total_mbs;
  132|       |
  133|  24.4k|    i16_status = ih264d_allocate_dynamic_bufs(ps_dec);
  134|       |
  135|  24.4k|    if(i16_status != OK)
  ------------------
  |  |  114|  24.4k|#define OK        0
  ------------------
  |  Branch (135:8): [True: 0, False: 24.4k]
  ------------------
  136|      0|    {
  137|       |        /* Free any dynamic buffers that are allocated */
  138|      0|        ih264d_free_dynamic_bufs(ps_dec);
  139|      0|        ps_dec->i4_error_code = IVD_MEM_ALLOC_FAILED;
  140|      0|        return IVD_MEM_ALLOC_FAILED;
  141|      0|    }
  142|  24.4k|    if(u4_total_mbs == 0)
  ------------------
  |  Branch (142:8): [True: 0, False: 24.4k]
  ------------------
  143|      0|    {
  144|      0|        return IVD_MEM_ALLOC_FAILED;
  145|      0|    }
  146|       |
  147|       |    /* Allocate frame level mb info */
  148|  24.4k|    size = sizeof(dec_svc_mb_info_t) * u4_total_mbs;
  149|  24.4k|    pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
  150|  24.4k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  24.4k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 24.4k]
  |  |  ------------------
  ------------------
  151|  24.4k|    ps_svc_lyr_dec->ps_svc_frm_mb_info = pv_buf;
  152|  24.4k|    memset(ps_svc_lyr_dec->ps_svc_frm_mb_info, 0, size);
  153|       |
  154|       |    /* Allocate frame level residual luma csbp info */
  155|  24.4k|    size = sizeof(UWORD16) * u4_total_mbs;
  156|  24.4k|    pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
  157|  24.4k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  24.4k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 24.4k]
  |  |  ------------------
  ------------------
  158|  24.4k|    ps_svc_lyr_dec->pu2_frm_res_luma_csbp = pv_buf;
  159|  24.4k|    memset(ps_svc_lyr_dec->pu2_frm_res_luma_csbp, 0, size);
  160|  24.4k|    ps_svc_lyr_dec->i4_frm_res_luma_csbp_stride = ps_dec->u2_frm_wd_in_mbs;
  161|       |
  162|       |    /* Allocate frame level residual luma csbp info */
  163|  24.4k|    size = sizeof(UWORD8) * u4_total_mbs;
  164|  24.4k|    pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
  165|  24.4k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  24.4k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 24.4k]
  |  |  ------------------
  ------------------
  166|  24.4k|    ps_svc_lyr_dec->pu1_svc_base_mode_flag = pv_buf;
  167|  24.4k|    memset(ps_svc_lyr_dec->pu1_svc_base_mode_flag, 0, size);
  168|  24.4k|    ps_svc_lyr_dec->i4_frm_svc_base_mode_cabac_stride = ps_dec->u2_frm_wd_in_mbs;
  169|  24.4k|    ps_svc_lyr_dec->i4_frm_svc_base_mode_cabac_size = u4_total_mbs;
  170|       |
  171|       |    /* Allocate frame level crop windows flags */
  172|  24.4k|    size = sizeof(UWORD8) * u4_total_mbs;
  173|  24.4k|    pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
  174|  24.4k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  24.4k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 24.4k]
  |  |  ------------------
  ------------------
  175|  24.4k|    ps_svc_lyr_dec->pu1_crop_wnd_flag = pv_buf;
  176|  24.4k|    memset(ps_svc_lyr_dec->pu1_crop_wnd_flag, 0, size);
  177|       |
  178|       |    /**********************************/
  179|       |    /*Creation of Inter layer buffers */
  180|       |    /**********************************/
  181|       |
  182|       |    /* MB type buffer : one element per MB */
  183|  24.4k|    size = (ps_dec->u2_frm_wd_in_mbs + 2) * (ps_dec->u2_frm_ht_in_mbs + 2) *
  184|  24.4k|           sizeof(inter_lyr_mb_prms_t);
  185|  24.4k|    pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
  186|  24.4k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  24.4k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 24.4k]
  |  |  ------------------
  ------------------
  187|  24.4k|    memset(pv_buf, -1, size);
  188|  24.4k|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_base = pv_buf;
  189|  24.4k|    ps_svc_lyr_dec->u2_inter_lyr_mb_prms_stride = ps_dec->u2_frm_wd_in_mbs + 2;
  190|  24.4k|    ps_svc_lyr_dec->ps_inter_lyr_mb_prms_frm_start =
  191|  24.4k|        ps_svc_lyr_dec->ps_inter_lyr_mb_prms_base + 1 + ps_svc_lyr_dec->u2_inter_lyr_mb_prms_stride;
  192|       |
  193|  24.4k|    ps_svc_lyr_dec->u4_inter_lyr_mb_prms_size = (ps_dec->u2_frm_wd_in_mbs + 2) *
  194|  24.4k|                                                (ps_dec->u2_frm_ht_in_mbs + 2) *
  195|  24.4k|                                                sizeof(inter_lyr_mb_prms_t);
  196|       |
  197|       |    /* Luma Residual data at each layer : dafault 0*/
  198|  24.4k|    size = ((ps_dec->u2_pic_wd + 4) * (ps_dec->u2_pic_ht + 4)) * sizeof(WORD16);
  199|  24.4k|    pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
  200|  24.4k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  24.4k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 24.4k]
  |  |  ------------------
  ------------------
  201|  24.4k|    memset(pv_buf, 0, size);
  202|  24.4k|    ps_svc_lyr_dec->pi2_il_residual_resample_luma_base = pv_buf;
  203|  24.4k|    ps_svc_lyr_dec->u2_residual_resample_luma_stride = (ps_dec->u2_pic_wd + 4);
  204|  24.4k|    ps_svc_lyr_dec->pi2_il_residual_resample_mb_luma_frm_start =
  205|  24.4k|        ps_svc_lyr_dec->pi2_il_residual_resample_luma_base + 2 +
  206|  24.4k|        (2 * ps_svc_lyr_dec->u2_residual_resample_luma_stride);
  207|  24.4k|    ps_svc_lyr_dec->u4_residual_resample_luma_size =
  208|  24.4k|        ((ps_dec->u2_pic_wd + 4) * (ps_dec->u2_pic_ht + 4)) * sizeof(WORD16);
  209|       |
  210|       |    /* Chroma Residual data at each layer : dafault 0*/
  211|  24.4k|    size = (((4 + ps_dec->u2_pic_wd) * ((4 + ps_dec->u2_pic_ht) >> 1)) * sizeof(WORD16));
  212|  24.4k|    pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
  213|  24.4k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  24.4k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 24.4k]
  |  |  ------------------
  ------------------
  214|  24.4k|    memset(pv_buf, 0, size);
  215|  24.4k|    ps_svc_lyr_dec->pi2_il_residual_resample_chroma_base = pv_buf;
  216|  24.4k|    ps_svc_lyr_dec->u2_residual_resample_chroma_stride = (ps_dec->u2_pic_wd + 4);
  217|  24.4k|    ps_svc_lyr_dec->pi2_il_residual_resample_mb_chroma_frm_start =
  218|  24.4k|        ps_svc_lyr_dec->pi2_il_residual_resample_chroma_base + 2 +
  219|  24.4k|        ps_svc_lyr_dec->u2_residual_resample_chroma_stride;
  220|  24.4k|    ps_svc_lyr_dec->u4_residual_resample_chroma_size =
  221|  24.4k|        (((4 + ps_dec->u2_pic_wd) * ((4 + ps_dec->u2_pic_ht) >> 1)) * sizeof(WORD16));
  222|       |
  223|       |    /* mv bank buffer : 16 elements per MB: each at 4x4 block level */
  224|  24.4k|    size = ((ps_dec->u2_pic_wd) * (ps_dec->u2_pic_ht >> 4)) * sizeof(mv_pred_t);
  225|  24.4k|    pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
  226|  24.4k|    RETURN_IF((NULL == pv_buf), IV_FAIL);
  ------------------
  |  |   44|  24.4k|#define RETURN_IF(cond, retval) if(cond) {return (retval);}
  |  |  ------------------
  |  |  |  Branch (44:36): [True: 0, False: 24.4k]
  |  |  ------------------
  ------------------
  227|  24.4k|    memset(pv_buf, 0, size);
  228|  24.4k|    ps_svc_lyr_dec->ps_il_pred_mv_bank_buf_base = pv_buf;
  229|       |
  230|       |    /*syntax for SVC related bin ctxt tables*/
  231|  24.4k|    {
  232|  24.4k|        bin_ctxt_model_t *const p_cabac_ctxt_table_t = ps_dec->p_cabac_ctxt_table_t;
  233|       |
  234|  24.4k|        ps_svc_lyr_dec->ps_base_mode_flag = p_cabac_ctxt_table_t + CABAC_BASE_MODE_FLAG;
  235|  24.4k|        ps_svc_lyr_dec->ps_motion_prediction_flag_l0 = p_cabac_ctxt_table_t + CABAC_MOT_PRED_FLAG0;
  236|  24.4k|        ps_svc_lyr_dec->ps_motion_prediction_flag_l1 = p_cabac_ctxt_table_t + CABAC_MOT_PRED_FLAG1;
  237|  24.4k|        ps_svc_lyr_dec->ps_residual_prediction_flag = p_cabac_ctxt_table_t + CABAC_RES_PRED_FLAG;
  238|  24.4k|    }
  239|  24.4k|    return (i16_status);
  240|  24.4k|}
isvcd_decode_pic_order_cnt:
  260|   136k|{
  261|   136k|    WORD64 i8_pic_msb;
  262|   136k|    WORD32 i4_top_field_order_cnt = 0, i4_bottom_field_order_cnt = 0;
  263|   136k|    dec_seq_params_t *ps_seq = ps_dec->ps_cur_sps;
  264|   136k|    WORD32 i4_prev_frame_num_ofst;
  265|       |
  266|   136k|    switch(ps_seq->u1_pic_order_cnt_type)
  267|   136k|    {
  268|   109k|        case 0:
  ------------------
  |  Branch (268:9): [True: 109k, False: 26.8k]
  ------------------
  269|       |            /* POC TYPE 0 */
  270|   109k|            if(u1_is_idr_slice)
  ------------------
  |  Branch (270:16): [True: 98.4k, False: 11.1k]
  ------------------
  271|  98.4k|            {
  272|  98.4k|                ps_prev_poc->i4_pic_order_cnt_msb = 0;
  273|  98.4k|                ps_prev_poc->i4_pic_order_cnt_lsb = 0;
  274|  98.4k|            }
  275|   109k|            if(ps_prev_poc->u1_mmco_equalto5)
  ------------------
  |  Branch (275:16): [True: 1.05k, False: 108k]
  ------------------
  276|  1.05k|            {
  277|  1.05k|                if(ps_prev_poc->u1_bot_field != 1)
  ------------------
  |  Branch (277:20): [True: 1.05k, False: 0]
  ------------------
  278|  1.05k|                {
  279|  1.05k|                    ps_prev_poc->i4_pic_order_cnt_msb = 0;
  280|  1.05k|                    ps_prev_poc->i4_pic_order_cnt_lsb = ps_prev_poc->i4_top_field_order_count;
  281|  1.05k|                }
  282|      0|                else
  283|      0|                {
  284|      0|                    ps_prev_poc->i4_pic_order_cnt_msb = 0;
  285|      0|                    ps_prev_poc->i4_pic_order_cnt_lsb = 0;
  286|      0|                }
  287|  1.05k|            }
  288|       |
  289|   109k|            if((ps_cur_poc->i4_pic_order_cnt_lsb < ps_prev_poc->i4_pic_order_cnt_lsb) &&
  ------------------
  |  Branch (289:16): [True: 4.28k, False: 105k]
  ------------------
  290|  4.28k|               ((ps_prev_poc->i4_pic_order_cnt_lsb - ps_cur_poc->i4_pic_order_cnt_lsb) >=
  ------------------
  |  Branch (290:16): [True: 2.83k, False: 1.44k]
  ------------------
  291|  4.28k|                (ps_seq->i4_max_pic_order_cntLsb >> 1)))
  292|  2.83k|            {
  293|  2.83k|                i8_pic_msb =
  294|  2.83k|                    (WORD64) ps_prev_poc->i4_pic_order_cnt_msb + ps_seq->i4_max_pic_order_cntLsb;
  295|  2.83k|            }
  296|   106k|            else if((ps_cur_poc->i4_pic_order_cnt_lsb > ps_prev_poc->i4_pic_order_cnt_lsb) &&
  ------------------
  |  Branch (296:21): [True: 79.3k, False: 27.5k]
  ------------------
  297|  79.3k|                    ((ps_cur_poc->i4_pic_order_cnt_lsb - ps_prev_poc->i4_pic_order_cnt_lsb) >=
  ------------------
  |  Branch (297:21): [True: 46.1k, False: 33.1k]
  ------------------
  298|  79.3k|                     (ps_seq->i4_max_pic_order_cntLsb >> 1)))
  299|  46.1k|            {
  300|  46.1k|                i8_pic_msb =
  301|  46.1k|                    (WORD64) ps_prev_poc->i4_pic_order_cnt_msb - ps_seq->i4_max_pic_order_cntLsb;
  302|  46.1k|            }
  303|  60.6k|            else
  304|  60.6k|            {
  305|  60.6k|                i8_pic_msb = ps_prev_poc->i4_pic_order_cnt_msb;
  306|  60.6k|            }
  307|       |
  308|   109k|            if(!u1_field_pic_flag || !u1_bottom_field_flag)
  ------------------
  |  Branch (308:16): [True: 109k, False: 0]
  |  Branch (308:38): [True: 0, False: 0]
  ------------------
  309|   109k|            {
  310|   109k|                WORD64 i8_result = i8_pic_msb + ps_cur_poc->i4_pic_order_cnt_lsb;
  311|   109k|                if(IS_OUT_OF_RANGE_S32(i8_result))
  ------------------
  |  |   58|   109k|#define IS_OUT_OF_RANGE_S32(a) (((a) < INT32_MIN) || ((a) > INT32_MAX))
  |  |  ------------------
  |  |  |  Branch (58:33): [True: 0, False: 109k]
  |  |  |  Branch (58:54): [True: 0, False: 109k]
  |  |  ------------------
  ------------------
  312|      0|                {
  313|      0|                    return ERROR_INV_POC;
  314|      0|                }
  315|   109k|                i4_top_field_order_cnt = (WORD32) i8_result;
  316|   109k|            }
  317|       |
  318|   109k|            if(!u1_field_pic_flag)
  ------------------
  |  Branch (318:16): [True: 109k, False: 0]
  ------------------
  319|   109k|            {
  320|   109k|                WORD64 i8_result =
  321|   109k|                    (WORD64) i4_top_field_order_cnt + ps_cur_poc->i4_delta_pic_order_cnt_bottom;
  322|   109k|                if(IS_OUT_OF_RANGE_S32(i8_result))
  ------------------
  |  |   58|   109k|#define IS_OUT_OF_RANGE_S32(a) (((a) < INT32_MIN) || ((a) > INT32_MAX))
  |  |  ------------------
  |  |  |  Branch (58:33): [True: 68, False: 109k]
  |  |  |  Branch (58:54): [True: 75, False: 109k]
  |  |  ------------------
  ------------------
  323|    143|                {
  324|    143|                    return ERROR_INV_POC;
  325|    143|                }
  326|   109k|                i4_bottom_field_order_cnt = (WORD32) i8_result;
  327|   109k|            }
  328|      0|            else if(u1_bottom_field_flag)
  ------------------
  |  Branch (328:21): [True: 0, False: 0]
  ------------------
  329|      0|            {
  330|      0|                WORD64 i8_result = i8_pic_msb + ps_cur_poc->i4_pic_order_cnt_lsb;
  331|      0|                if(IS_OUT_OF_RANGE_S32(i8_result))
  ------------------
  |  |   58|      0|#define IS_OUT_OF_RANGE_S32(a) (((a) < INT32_MIN) || ((a) > INT32_MAX))
  |  |  ------------------
  |  |  |  Branch (58:33): [True: 0, False: 0]
  |  |  |  Branch (58:54): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  332|      0|                {
  333|      0|                    return ERROR_INV_POC;
  334|      0|                }
  335|      0|                i4_bottom_field_order_cnt = (WORD32) i8_result;
  336|      0|            }
  337|       |
  338|   109k|            if(IS_OUT_OF_RANGE_S32(i8_pic_msb))
  ------------------
  |  |   58|   109k|#define IS_OUT_OF_RANGE_S32(a) (((a) < INT32_MIN) || ((a) > INT32_MAX))
  |  |  ------------------
  |  |  |  Branch (58:33): [True: 0, False: 109k]
  |  |  |  Branch (58:54): [True: 0, False: 109k]
  |  |  ------------------
  ------------------
  339|      0|            {
  340|      0|                return ERROR_INV_POC;
  341|      0|            }
  342|   109k|            ps_cur_poc->i4_pic_order_cnt_msb = (WORD32) i8_pic_msb;
  343|   109k|            break;
  344|       |
  345|  23.6k|        case 1:
  ------------------
  |  Branch (345:9): [True: 23.6k, False: 112k]
  ------------------
  346|  23.6k|        {
  347|       |            /* POC TYPE 1 */
  348|  23.6k|            UWORD8 i;
  349|  23.6k|            WORD32 prev_frame_num;
  350|  23.6k|            WORD32 frame_num_ofst;
  351|  23.6k|            WORD32 abs_frm_num;
  352|  23.6k|            WORD32 poc_cycle_cnt, frame_num_in_poc_cycle;
  353|  23.6k|            WORD64 i8_expected_delta_poc_cycle;
  354|  23.6k|            WORD32 expected_poc;
  355|  23.6k|            WORD64 i8_result;
  356|       |
  357|  23.6k|            prev_frame_num = (WORD32) ps_cur_slice->u2_frame_num;
  358|  23.6k|            if(!u1_is_idr_slice)
  ------------------
  |  Branch (358:16): [True: 11.0k, False: 12.6k]
  ------------------
  359|  11.0k|            {
  360|  11.0k|                if(ps_cur_slice->u1_mmco_equalto5)
  ------------------
  |  Branch (360:20): [True: 805, False: 10.2k]
  ------------------
  361|    805|                {
  362|    805|                    prev_frame_num = 0;
  363|    805|                    i4_prev_frame_num_ofst = 0;
  364|    805|                }
  365|  10.2k|                else
  366|  10.2k|                {
  367|  10.2k|                    i4_prev_frame_num_ofst = ps_prev_poc->i4_prev_frame_num_ofst;
  368|  10.2k|                }
  369|  11.0k|            }
  370|  12.6k|            else
  371|  12.6k|                i4_prev_frame_num_ofst = 0;
  372|       |
  373|       |            /* 1. Derivation for FrameNumOffset */
  374|  23.6k|            if(u1_is_idr_slice)
  ------------------
  |  Branch (374:16): [True: 12.6k, False: 11.0k]
  ------------------
  375|  12.6k|            {
  376|  12.6k|                frame_num_ofst = 0;
  377|  12.6k|                ps_cur_poc->i4_delta_pic_order_cnt[0] = 0;
  378|  12.6k|                ps_cur_poc->i4_delta_pic_order_cnt[1] = 0;
  379|  12.6k|            }
  380|  11.0k|            else if(prev_frame_num > ((WORD32) u2_frame_num))
  ------------------
  |  Branch (380:21): [True: 4.66k, False: 6.35k]
  ------------------
  381|  4.66k|            {
  382|  4.66k|                WORD64 i8_result =
  383|  4.66k|                    i4_prev_frame_num_ofst + (WORD64) ps_seq->u2_u4_max_pic_num_minus1 + 1;
  384|  4.66k|                if(IS_OUT_OF_RANGE_S32(i8_result))
  ------------------
  |  |   58|  4.66k|#define IS_OUT_OF_RANGE_S32(a) (((a) < INT32_MIN) || ((a) > INT32_MAX))
  |  |  ------------------
  |  |  |  Branch (58:33): [True: 0, False: 4.66k]
  |  |  |  Branch (58:54): [True: 67, False: 4.60k]
  |  |  ------------------
  ------------------
  385|     67|                {
  386|     67|                    return ERROR_INV_FRAME_NUM;
  387|     67|                }
  388|  4.60k|                frame_num_ofst = (WORD32) i8_result;
  389|  4.60k|            }
  390|  6.35k|            else
  391|  6.35k|                frame_num_ofst = i4_prev_frame_num_ofst;
  392|       |
  393|       |            /* 2. Derivation for absFrameNum */
  394|  23.6k|            if(0 != ps_seq->u1_num_ref_frames_in_pic_order_cnt_cycle)
  ------------------
  |  Branch (394:16): [True: 15.9k, False: 7.65k]
  ------------------
  395|  15.9k|            {
  396|  15.9k|                WORD64 i8_result = frame_num_ofst + (WORD64) u2_frame_num;
  397|  15.9k|                if(IS_OUT_OF_RANGE_S32(i8_result))
  ------------------
  |  |   58|  15.9k|#define IS_OUT_OF_RANGE_S32(a) (((a) < INT32_MIN) || ((a) > INT32_MAX))
  |  |  ------------------
  |  |  |  Branch (58:33): [True: 0, False: 15.9k]
  |  |  |  Branch (58:54): [True: 0, False: 15.9k]
  |  |  ------------------
  ------------------
  398|      0|                {
  399|      0|                    return ERROR_INV_FRAME_NUM;
  400|      0|                }
  401|  15.9k|                abs_frm_num = (WORD32) i8_result;
  402|  15.9k|            }
  403|  7.65k|            else
  404|  7.65k|                abs_frm_num = 0;
  405|  23.6k|            if((u1_nal_ref_idc == 0) && (abs_frm_num > 0)) abs_frm_num = abs_frm_num - 1;
  ------------------
  |  Branch (405:16): [True: 3.22k, False: 20.4k]
  |  Branch (405:41): [True: 1.59k, False: 1.63k]
  ------------------
  406|       |
  407|       |            /* 4. expectedDeltaPerPicOrderCntCycle is derived as */
  408|  23.6k|            i8_expected_delta_poc_cycle = 0;
  409|   144k|            for(i = 0; i < ps_seq->u1_num_ref_frames_in_pic_order_cnt_cycle; i++)
  ------------------
  |  Branch (409:24): [True: 120k, False: 23.6k]
  ------------------
  410|   120k|            {
  411|   120k|                i8_expected_delta_poc_cycle += ps_seq->i4_ofst_for_ref_frame[i];
  412|   120k|            }
  413|       |
  414|       |            /* 3. When absFrameNum > 0, picOrderCntCycleCnt and
  415|       |            frame_num_in_poc_cycle are derived as : */
  416|       |            /* 5. expectedPicOrderCnt is derived as : */
  417|  23.6k|            if(abs_frm_num > 0)
  ------------------
  |  Branch (417:16): [True: 15.9k, False: 7.69k]
  ------------------
  418|  15.9k|            {
  419|  15.9k|                poc_cycle_cnt =
  420|  15.9k|                    DIV((abs_frm_num - 1), ps_seq->u1_num_ref_frames_in_pic_order_cnt_cycle);
  ------------------
  |  |   65|  15.9k|#define DIV(x,y) ((x)/(y))
  ------------------
  421|  15.9k|                frame_num_in_poc_cycle =
  422|  15.9k|                    MOD((abs_frm_num - 1), ps_seq->u1_num_ref_frames_in_pic_order_cnt_cycle);
  ------------------
  |  |   64|  15.9k|#define MOD(x,y) ((x)%(y))
  ------------------
  423|       |
  424|  15.9k|                i8_result = poc_cycle_cnt * i8_expected_delta_poc_cycle;
  425|       |
  426|  81.1k|                for(i = 0; i <= frame_num_in_poc_cycle; i++)
  ------------------
  |  Branch (426:28): [True: 65.2k, False: 15.9k]
  ------------------
  427|  65.2k|                {
  428|  65.2k|                    i8_result = i8_result + ps_seq->i4_ofst_for_ref_frame[i];
  429|  65.2k|                }
  430|       |
  431|  15.9k|                if(IS_OUT_OF_RANGE_S32(i8_result)) return ERROR_INV_POC;
  ------------------
  |  |   58|  15.9k|#define IS_OUT_OF_RANGE_S32(a) (((a) < INT32_MIN) || ((a) > INT32_MAX))
  |  |  ------------------
  |  |  |  Branch (58:33): [True: 148, False: 15.7k]
  |  |  |  Branch (58:54): [True: 148, False: 15.6k]
  |  |  ------------------
  ------------------
  432|       |
  433|  15.6k|                expected_poc = (WORD32) i8_result;
  434|  15.6k|            }
  435|  7.69k|            else
  436|  7.69k|                expected_poc = 0;
  437|       |
  438|  23.3k|            if(u1_nal_ref_idc == 0)
  ------------------
  |  Branch (438:16): [True: 3.19k, False: 20.1k]
  ------------------
  439|  3.19k|            {
  440|  3.19k|                i8_result = (WORD64) expected_poc + ps_seq->i4_ofst_for_non_ref_pic;
  441|       |
  442|  3.19k|                if(IS_OUT_OF_RANGE_S32(i8_result)) return ERROR_INV_POC;
  ------------------
  |  |   58|  3.19k|#define IS_OUT_OF_RANGE_S32(a) (((a) < INT32_MIN) || ((a) > INT32_MAX))
  |  |  ------------------
  |  |  |  Branch (58:33): [True: 65, False: 3.12k]
  |  |  |  Branch (58:54): [True: 69, False: 3.05k]
  |  |  ------------------
  ------------------
  443|       |
  444|  3.05k|                expected_poc = (WORD32) i8_result;
  445|  3.05k|            }
  446|       |
  447|       |            /* 6. TopFieldOrderCnt or BottomFieldOrderCnt are derived as */
  448|  23.1k|            if(!u1_field_pic_flag)
  ------------------
  |  Branch (448:16): [True: 23.1k, False: 0]
  ------------------
  449|  23.1k|            {
  450|  23.1k|                i8_result = (WORD64) expected_poc + ps_cur_poc->i4_delta_pic_order_cnt[0];
  451|       |
  452|  23.1k|                if(IS_OUT_OF_RANGE_S32(i8_result)) return ERROR_INV_POC;
  ------------------
  |  |   58|  23.1k|#define IS_OUT_OF_RANGE_S32(a) (((a) < INT32_MIN) || ((a) > INT32_MAX))
  |  |  ------------------
  |  |  |  Branch (58:33): [True: 97, False: 23.0k]
  |  |  |  Branch (58:54): [True: 76, False: 23.0k]
  |  |  ------------------
  ------------------
  453|  23.0k|                i4_top_field_order_cnt = (WORD32) i8_result;
  454|       |
  455|  23.0k|                i8_result = (WORD64) i4_top_field_order_cnt +
  456|  23.0k|                            ps_seq->i4_ofst_for_top_to_bottom_field +
  457|  23.0k|                            ps_cur_poc->i4_delta_pic_order_cnt[1];
  458|       |
  459|  23.0k|                if(IS_OUT_OF_RANGE_S32(i8_result)) return ERROR_INV_POC;
  ------------------
  |  |   58|  23.0k|#define IS_OUT_OF_RANGE_S32(a) (((a) < INT32_MIN) || ((a) > INT32_MAX))
  |  |  ------------------
  |  |  |  Branch (58:33): [True: 91, False: 22.9k]
  |  |  |  Branch (58:54): [True: 90, False: 22.8k]
  |  |  ------------------
  ------------------
  460|  22.8k|                i4_bottom_field_order_cnt = (WORD32) i8_result;
  461|  22.8k|            }
  462|      0|            else if(!u1_bottom_field_flag)
  ------------------
  |  Branch (462:21): [True: 0, False: 0]
  ------------------
  463|      0|            {
  464|      0|                i8_result = (WORD64) expected_poc + ps_cur_poc->i4_delta_pic_order_cnt[0];
  465|       |
  466|      0|                if(IS_OUT_OF_RANGE_S32(i8_result)) return ERROR_INV_POC;
  ------------------
  |  |   58|      0|#define IS_OUT_OF_RANGE_S32(a) (((a) < INT32_MIN) || ((a) > INT32_MAX))
  |  |  ------------------
  |  |  |  Branch (58:33): [True: 0, False: 0]
  |  |  |  Branch (58:54): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  467|      0|                i4_top_field_order_cnt = (WORD32) i8_result;
  468|      0|            }
  469|      0|            else
  470|      0|            {
  471|      0|                i8_result = (WORD64) expected_poc + ps_seq->i4_ofst_for_top_to_bottom_field +
  472|      0|                            ps_cur_poc->i4_delta_pic_order_cnt[0];
  473|       |
  474|      0|                if(IS_OUT_OF_RANGE_S32(i8_result)) return ERROR_INV_POC;
  ------------------
  |  |   58|      0|#define IS_OUT_OF_RANGE_S32(a) (((a) < INT32_MIN) || ((a) > INT32_MAX))
  |  |  ------------------
  |  |  |  Branch (58:33): [True: 0, False: 0]
  |  |  |  Branch (58:54): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  475|      0|                i4_bottom_field_order_cnt = (WORD32) i8_result;
  476|      0|            }
  477|       |            /* Copy the current POC info into Previous POC structure */
  478|  22.8k|            ps_cur_poc->i4_prev_frame_num_ofst = frame_num_ofst;
  479|  22.8k|        }
  480|       |
  481|      0|        break;
  482|  3.14k|        case 2:
  ------------------
  |  Branch (482:9): [True: 3.14k, False: 133k]
  ------------------
  483|  3.14k|        {
  484|       |            /* POC TYPE 2 */
  485|  3.14k|            WORD32 prev_frame_num;
  486|  3.14k|            WORD32 frame_num_ofst;
  487|  3.14k|            WORD32 tmp_poc;
  488|       |
  489|  3.14k|            prev_frame_num = (WORD32) ps_cur_slice->u2_frame_num;
  490|  3.14k|            if(!u1_is_idr_slice)
  ------------------
  |  Branch (490:16): [True: 1.38k, False: 1.76k]
  ------------------
  491|  1.38k|            {
  492|  1.38k|                if(ps_cur_slice->u1_mmco_equalto5)
  ------------------
  |  Branch (492:20): [True: 356, False: 1.02k]
  ------------------
  493|    356|                {
  494|    356|                    prev_frame_num = 0;
  495|    356|                    i4_prev_frame_num_ofst = 0;
  496|    356|                }
  497|  1.02k|                else
  498|  1.02k|                    i4_prev_frame_num_ofst = ps_prev_poc->i4_prev_frame_num_ofst;
  499|  1.38k|            }
  500|  1.76k|            else
  501|  1.76k|                i4_prev_frame_num_ofst = 0;
  502|       |
  503|       |            /* 1. Derivation for FrameNumOffset */
  504|  3.14k|            if(u1_is_idr_slice)
  ------------------
  |  Branch (504:16): [True: 1.76k, False: 1.38k]
  ------------------
  505|  1.76k|            {
  506|  1.76k|                frame_num_ofst = 0;
  507|  1.76k|                ps_cur_poc->i4_delta_pic_order_cnt[0] = 0;
  508|  1.76k|                ps_cur_poc->i4_delta_pic_order_cnt[1] = 0;
  509|  1.76k|            }
  510|  1.38k|            else if(prev_frame_num > ((WORD32) u2_frame_num))
  ------------------
  |  Branch (510:21): [True: 486, False: 899]
  ------------------
  511|    486|            {
  512|    486|                WORD64 i8_result =
  513|    486|                    i4_prev_frame_num_ofst + (WORD64) ps_seq->u2_u4_max_pic_num_minus1 + 1;
  514|    486|                if(IS_OUT_OF_RANGE_S32(i8_result))
  ------------------
  |  |   58|    486|#define IS_OUT_OF_RANGE_S32(a) (((a) < INT32_MIN) || ((a) > INT32_MAX))
  |  |  ------------------
  |  |  |  Branch (58:33): [True: 0, False: 486]
  |  |  |  Branch (58:54): [True: 70, False: 416]
  |  |  ------------------
  ------------------
  515|     70|                {
  516|     70|                    return ERROR_INV_FRAME_NUM;
  517|     70|                }
  518|    416|                frame_num_ofst = (WORD32) i8_result;
  519|    416|            }
  520|    899|            else
  521|    899|                frame_num_ofst = i4_prev_frame_num_ofst;
  522|       |
  523|       |            /* 2. Derivation for tempPicOrderCnt */
  524|  3.07k|            if(u1_is_idr_slice)
  ------------------
  |  Branch (524:16): [True: 1.76k, False: 1.31k]
  ------------------
  525|  1.76k|                tmp_poc = 0;
  526|  1.31k|            else if(u1_nal_ref_idc == 0)
  ------------------
  |  Branch (526:21): [True: 451, False: 864]
  ------------------
  527|    451|            {
  528|    451|                WORD64 i8_result = ((frame_num_ofst + (WORD64) u2_frame_num) << 1) - 1;
  529|    451|                if(IS_OUT_OF_RANGE_S32(i8_result))
  ------------------
  |  |   58|    451|#define IS_OUT_OF_RANGE_S32(a) (((a) < INT32_MIN) || ((a) > INT32_MAX))
  |  |  ------------------
  |  |  |  Branch (58:33): [True: 0, False: 451]
  |  |  |  Branch (58:54): [True: 0, False: 451]
  |  |  ------------------
  ------------------
  530|      0|                {
  531|      0|                    return ERROR_INV_POC;
  532|      0|                }
  533|    451|                tmp_poc = (WORD32) i8_result;
  534|    451|            }
  535|    864|            else
  536|    864|            {
  537|    864|                WORD64 i8_result = (frame_num_ofst + (WORD64) u2_frame_num) << 1;
  538|    864|                if(IS_OUT_OF_RANGE_S32(i8_result))
  ------------------
  |  |   58|    864|#define IS_OUT_OF_RANGE_S32(a) (((a) < INT32_MIN) || ((a) > INT32_MAX))
  |  |  ------------------
  |  |  |  Branch (58:33): [True: 0, False: 864]
  |  |  |  Branch (58:54): [True: 0, False: 864]
  |  |  ------------------
  ------------------
  539|      0|                {
  540|      0|                    return ERROR_INV_POC;
  541|      0|                }
  542|    864|                tmp_poc = (WORD32) i8_result;
  543|    864|            }
  544|       |
  545|       |            /* 6. TopFieldOrderCnt or BottomFieldOrderCnt are derived as */
  546|  3.07k|            if(!u1_field_pic_flag)
  ------------------
  |  Branch (546:16): [True: 3.07k, False: 0]
  ------------------
  547|  3.07k|            {
  548|  3.07k|                i4_top_field_order_cnt = tmp_poc;
  549|  3.07k|                i4_bottom_field_order_cnt = tmp_poc;
  550|  3.07k|            }
  551|      0|            else if(!u1_bottom_field_flag)
  ------------------
  |  Branch (551:21): [True: 0, False: 0]
  ------------------
  552|      0|                i4_top_field_order_cnt = tmp_poc;
  553|      0|            else
  554|      0|                i4_bottom_field_order_cnt = tmp_poc;
  555|       |
  556|       |            /* Copy the current POC info into Previous POC structure */
  557|  3.07k|            ps_prev_poc->i4_prev_frame_num_ofst = frame_num_ofst;
  558|  3.07k|            ps_cur_poc->i4_prev_frame_num_ofst = frame_num_ofst;
  559|  3.07k|        }
  560|      0|        break;
  561|      0|        default:
  ------------------
  |  Branch (561:9): [True: 0, False: 136k]
  ------------------
  562|      0|            return ERROR_INV_POC_TYPE_T;
  563|      0|            break;
  564|   136k|    }
  565|       |
  566|   135k|    if(!u1_field_pic_flag)  // or a complementary field pair
  ------------------
  |  Branch (566:8): [True: 135k, False: 0]
  ------------------
  567|   135k|    {
  568|   135k|        *pi4_poc = MIN(i4_top_field_order_cnt, i4_bottom_field_order_cnt);
  ------------------
  |  |   61|   135k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 25.1k, False: 110k]
  |  |  ------------------
  ------------------
  569|   135k|        ps_pps->i4_top_field_order_cnt = i4_top_field_order_cnt;
  570|   135k|        ps_pps->i4_bottom_field_order_cnt = i4_bottom_field_order_cnt;
  571|   135k|    }
  572|      0|    else if(!u1_bottom_field_flag)
  ------------------
  |  Branch (572:13): [True: 0, False: 0]
  ------------------
  573|      0|    {
  574|      0|        *pi4_poc = i4_top_field_order_cnt;
  575|      0|        ps_pps->i4_top_field_order_cnt = i4_top_field_order_cnt;
  576|      0|    }
  577|      0|    else
  578|      0|    {
  579|      0|        *pi4_poc = i4_bottom_field_order_cnt;
  580|      0|        ps_pps->i4_bottom_field_order_cnt = i4_bottom_field_order_cnt;
  581|      0|    }
  582|       |
  583|   135k|    ps_pps->i4_avg_poc = *pi4_poc;
  584|       |
  585|   135k|    return OK;
  ------------------
  |  |  114|   135k|#define OK        0
  ------------------
  586|   136k|}
isvcd_init_dpb_ref_bufs:
  761|   107k|{
  762|   107k|    UWORD8 i;
  763|   107k|    struct pic_buffer_t *ps_init_dpb;
  764|   107k|    ps_init_dpb = ps_dec->ps_dpb_mgr->ps_init_dpb[0][0];
  765|  7.01M|    for(i = 0; i < 2 * MAX_REF_BUFS; i++)
  ------------------
  |  |   75|  7.01M|#define MAX_REF_BUFS    32
  ------------------
  |  Branch (765:16): [True: 6.90M, False: 107k]
  ------------------
  766|  6.90M|    {
  767|  6.90M|        memset(ps_init_dpb, 0, sizeof(struct pic_buffer_t));
  768|  6.90M|        ps_init_dpb->pu1_buf1 = NULL;
  769|  6.90M|        ps_init_dpb->u1_long_term_frm_idx = MAX_REF_BUFS + 1;
  ------------------
  |  |   75|  6.90M|#define MAX_REF_BUFS    32
  ------------------
  770|  6.90M|        ps_dec->ps_dpb_mgr->ps_init_dpb[0][i] = ps_init_dpb;
  771|  6.90M|        ps_dec->ps_dpb_mgr->ps_mod_dpb[0][i] = ps_init_dpb;
  772|  6.90M|        ps_init_dpb++;
  773|  6.90M|    }
  774|       |
  775|   107k|    ps_init_dpb = ps_dec->ps_dpb_mgr->ps_init_dpb[1][0];
  776|  7.01M|    for(i = 0; i < 2 * MAX_REF_BUFS; i++)
  ------------------
  |  |   75|  7.01M|#define MAX_REF_BUFS    32
  ------------------
  |  Branch (776:16): [True: 6.90M, False: 107k]
  ------------------
  777|  6.90M|    {
  778|  6.90M|        memset(ps_init_dpb, 0, sizeof(struct pic_buffer_t));
  779|  6.90M|        ps_init_dpb->pu1_buf1 = NULL;
  780|  6.90M|        ps_init_dpb->u1_long_term_frm_idx = MAX_REF_BUFS + 1;
  ------------------
  |  |   75|  6.90M|#define MAX_REF_BUFS    32
  ------------------
  781|  6.90M|        ps_dec->ps_dpb_mgr->ps_init_dpb[1][i] = ps_init_dpb;
  782|  6.90M|        ps_dec->ps_dpb_mgr->ps_mod_dpb[1][i] = ps_init_dpb;
  783|  6.90M|        ps_init_dpb++;
  784|  6.90M|    }
  785|   107k|}
isvcd_init_pic:
  804|   134k|{
  805|   134k|    dec_struct_t *ps_dec = &ps_svc_lyr_dec->s_dec;
  806|   134k|    dec_seq_params_t *ps_seq = ps_dec->ps_cur_sps;
  807|   134k|    prev_seq_params_t *ps_prev_seq_params = &ps_dec->s_prev_seq_params;
  808|   134k|    WORD32 ret;
  809|       |
  810|   134k|    ps_dec->ps_cur_slice->u2_frame_num = u2_frame_num;
  811|   134k|    ps_dec->ps_cur_slice->i4_poc = i4_poc;
  812|   134k|    ps_dec->ps_cur_pps = ps_pps;
  813|   134k|    ps_dec->ps_cur_pps->pv_codec_handle = ps_dec;
  814|       |
  815|   134k|    ps_dec->ps_dpb_mgr->i4_max_frm_num = ps_seq->u2_u4_max_pic_num_minus1 + 1;
  816|       |
  817|   134k|    ps_dec->ps_dpb_mgr->u2_pic_ht = ps_dec->u2_pic_ht;
  818|   134k|    ps_dec->ps_dpb_mgr->u2_pic_wd = ps_dec->u2_pic_wd;
  819|   134k|    ps_dec->i4_pic_type = NA_SLICE;
  ------------------
  |  |  367|   134k|#define NA_SLICE -1
  ------------------
  820|   134k|    ps_dec->i4_frametype = IV_NA_FRAME;
  821|   134k|    ps_dec->i4_content_type = IV_CONTENTTYPE_NA;
  822|       |
  823|       |    /*--------------------------------------------------------------------*/
  824|       |    /* Get the value of MaxMbAddress and frmheight in Mbs                 */
  825|       |    /*--------------------------------------------------------------------*/
  826|   134k|    ps_seq->u4_max_mb_addr =
  827|   134k|        ((UWORD32)ps_seq->u2_frm_wd_in_mbs *
  828|   134k|         ((UWORD32)ps_dec->u2_pic_ht >> (4 + ps_dec->ps_cur_slice->u1_field_pic_flag))) -
  829|   134k|        1;
  830|   134k|    ps_dec->u2_frm_ht_in_mbs = (ps_dec->u2_pic_ht >> (4 + ps_dec->ps_cur_slice->u1_field_pic_flag));
  831|       |
  832|       |    /***************************************************************************/
  833|       |    /* If change in Level or the required PicBuffers i4_size is more than the  */
  834|       |    /* current one FREE the current PicBuffers and allocate affresh            */
  835|       |    /***************************************************************************/
  836|   134k|    if(!ps_dec->u1_init_dec_flag)
  ------------------
  |  Branch (836:8): [True: 24.4k, False: 109k]
  ------------------
  837|  24.4k|    {
  838|  24.4k|        ps_dec->u1_max_dec_frame_buffering = ih264d_get_dpb_size(ps_seq);
  839|       |
  840|  24.4k|        ps_dec->i4_display_delay = ps_dec->u1_max_dec_frame_buffering;
  841|  24.4k|        if((1 == ps_seq->u1_vui_parameters_present_flag) &&
  ------------------
  |  Branch (841:12): [True: 1.90k, False: 22.5k]
  ------------------
  842|  1.90k|           (1 == ps_seq->s_vui.u1_bitstream_restriction_flag))
  ------------------
  |  Branch (842:12): [True: 202, False: 1.70k]
  ------------------
  843|    202|        {
  844|    202|            if(ps_seq->u1_frame_mbs_only_flag == 1)
  ------------------
  |  Branch (844:16): [True: 202, False: 0]
  ------------------
  845|    202|                ps_dec->i4_display_delay = ps_seq->s_vui.u4_num_reorder_frames + 1;
  846|      0|            else
  847|      0|                ps_dec->i4_display_delay = ps_seq->s_vui.u4_num_reorder_frames * 2 + 2;
  848|    202|        }
  849|       |
  850|  24.4k|        if(IVD_DECODE_FRAME_OUT == ps_dec->e_frm_out_mode) ps_dec->i4_display_delay = 0;
  ------------------
  |  Branch (850:12): [True: 0, False: 24.4k]
  ------------------
  851|       |
  852|  24.4k|        if(ps_dec->u4_share_disp_buf == 0)
  ------------------
  |  Branch (852:12): [True: 24.4k, False: 0]
  ------------------
  853|  24.4k|        {
  854|  24.4k|            if(ps_seq->u1_frame_mbs_only_flag == 1)
  ------------------
  |  Branch (854:16): [True: 24.4k, False: 0]
  ------------------
  855|  24.4k|                ps_dec->u1_pic_bufs = ps_dec->i4_display_delay + ps_seq->u1_num_ref_frames + 1;
  856|      0|            else
  857|      0|                ps_dec->u1_pic_bufs = ps_dec->i4_display_delay + ps_seq->u1_num_ref_frames * 2 + 2;
  858|  24.4k|        }
  859|      0|        else
  860|      0|        {
  861|      0|            ps_dec->u1_pic_bufs = (WORD32) ps_dec->u4_num_disp_bufs;
  862|      0|        }
  863|       |
  864|       |        /* Ensure at least two buffers are allocated */
  865|  24.4k|        ps_dec->u1_pic_bufs = MAX(ps_dec->u1_pic_bufs, 2);
  ------------------
  |  |   60|  24.4k|#define MAX(a,b) ((a > b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (60:19): [True: 24.3k, False: 141]
  |  |  ------------------
  ------------------
  866|       |
  867|  24.4k|        if(ps_dec->u4_share_disp_buf == 0)
  ------------------
  |  Branch (867:12): [True: 24.4k, False: 0]
  ------------------
  868|  24.4k|            ps_dec->u1_pic_bufs = MIN(ps_dec->u1_pic_bufs, (H264_MAX_REF_PICS * 2));
  ------------------
  |  |   61|  24.4k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 23.9k, False: 484]
  |  |  ------------------
  ------------------
  869|       |
  870|  24.4k|        ps_dec->u1_max_dec_frame_buffering =
  871|  24.4k|            MIN(ps_dec->u1_max_dec_frame_buffering, ps_dec->u1_pic_bufs);
  ------------------
  |  |   61|  24.4k|#define MIN(a,b) ((a < b)?(a):(b))
  |  |  ------------------
  |  |  |  Branch (61:19): [True: 24.2k, False: 202]
  |  |  ------------------
  ------------------
  872|       |
  873|       |        /* Temporary hack to run Tractor Cav/Cab/MbAff Profiler streams  also for
  874|       |         * CAFI1_SVA_C.264 in conformance*/
  875|  24.4k|        if(ps_dec->u1_init_dec_flag)
  ------------------
  |  Branch (875:12): [True: 0, False: 24.4k]
  ------------------
  876|      0|        {
  877|      0|            ih264d_release_pics_in_dpb((void *) ps_dec, ps_dec->u1_pic_bufs);
  878|      0|            ih264d_release_display_bufs(ps_dec);
  879|      0|            ih264d_reset_ref_bufs(ps_dec->ps_dpb_mgr);
  880|      0|        }
  881|       |
  882|       |        /*********************************************************************/
  883|       |        /* Configuring decoder parameters based on level and then            */
  884|       |        /* fresh pointer initialisation in decoder scratch and state buffers */
  885|       |        /*********************************************************************/
  886|  24.4k|        if(!ps_dec->u1_init_dec_flag || ((ps_seq->u1_level_idc < H264_LEVEL_3_0) ^
  ------------------
  |  |  298|      0|#define H264_LEVEL_3_0     30
  ------------------
  |  Branch (886:12): [True: 24.4k, False: 0]
  |  Branch (886:41): [True: 0, False: 0]
  ------------------
  887|      0|                                         (ps_prev_seq_params->u1_level_idc < H264_LEVEL_3_0)))
  ------------------
  |  |  298|      0|#define H264_LEVEL_3_0     30
  ------------------
  888|  24.4k|        {
  889|  24.4k|            ret = ih264d_init_dec_mb_grp(ps_dec);
  890|  24.4k|            if(ret != OK) return ret;
  ------------------
  |  |  114|  24.4k|#define OK        0
  ------------------
  |  Branch (890:16): [True: 0, False: 24.4k]
  ------------------
  891|  24.4k|        }
  892|       |
  893|  24.4k|        ret = isvcd_allocate_dynamic_bufs(ps_svc_lyr_dec);
  894|       |
  895|  24.4k|        if(ret != OK)
  ------------------
  |  |  114|  24.4k|#define OK        0
  ------------------
  |  Branch (895:12): [True: 0, False: 24.4k]
  ------------------
  896|      0|        {
  897|       |            /* Free any dynamic buffers that are allocated */
  898|      0|            isvcd_free_dynamic_bufs(ps_svc_lyr_dec);
  899|      0|            ps_dec->i4_error_code = IVD_MEM_ALLOC_FAILED;
  900|      0|            return IVD_MEM_ALLOC_FAILED;
  901|      0|        }
  902|       |
  903|  24.4k|        ih264d_init_ref_bufs((dpb_manager_t *)ps_dec->ps_dpb_mgr);
  904|  24.4k|        isvcd_init_dpb_ref_bufs(ps_dec);
  905|       |
  906|  24.4k|        ret = ih264d_create_pic_buffers(ps_dec->u1_pic_bufs, ps_dec);
  907|  24.4k|        if(ret != OK) return ret;
  ------------------
  |  |  114|  24.4k|#define OK        0
  ------------------
  |  Branch (907:12): [True: 0, False: 24.4k]
  ------------------
  908|       |
  909|  24.4k|        ret = ih264d_create_mv_bank(ps_dec, ps_dec->u2_pic_wd, ps_dec->u2_pic_ht);
  910|  24.4k|        if(ret != OK) return ret;
  ------------------
  |  |  114|  24.4k|#define OK        0
  ------------------
  |  Branch (910:12): [True: 0, False: 24.4k]
  ------------------
  911|       |
  912|       |        /* In shared mode, set all of them as used by display */
  913|  24.4k|        if(ps_dec->u4_share_disp_buf == 1)
  ------------------
  |  Branch (913:12): [True: 0, False: 24.4k]
  ------------------
  914|      0|        {
  915|      0|            WORD32 i;
  916|       |
  917|      0|            for(i = 0; i < ps_dec->u1_pic_bufs; i++)
  ------------------
  |  Branch (917:24): [True: 0, False: 0]
  ------------------
  918|      0|            {
  919|      0|                ih264_buf_mgr_set_status((buf_mgr_t *) ps_dec->pv_pic_buf_mgr, i, BUF_MGR_IO);
  ------------------
  |  |   53|      0|#define BUF_MGR_IO           (1 << 3)
  ------------------
  920|      0|            }
  921|      0|        }
  922|       |
  923|  24.4k|        ps_dec->u1_init_dec_flag = 1;
  924|  24.4k|        ps_prev_seq_params->u2_frm_wd_in_mbs = ps_seq->u2_frm_wd_in_mbs;
  925|  24.4k|        ps_prev_seq_params->u1_level_idc = ps_seq->u1_level_idc;
  926|  24.4k|        ps_prev_seq_params->u1_profile_idc = ps_seq->u1_profile_idc;
  927|  24.4k|        ps_prev_seq_params->u2_frm_ht_in_mbs = ps_seq->u2_frm_ht_in_mbs;
  928|  24.4k|        ps_prev_seq_params->u1_frame_mbs_only_flag = ps_seq->u1_frame_mbs_only_flag;
  929|  24.4k|        ps_prev_seq_params->u1_direct_8x8_inference_flag = ps_seq->u1_direct_8x8_inference_flag;
  930|       |
  931|  24.4k|        ps_dec->i4_cur_display_seq = 0;
  932|  24.4k|        ps_dec->i4_prev_max_display_seq = 0;
  933|  24.4k|        ps_dec->i4_max_poc = 0;
  934|       |
  935|  24.4k|        {
  936|       |            /* 0th entry of CtxtIncMbMap will be always be containing default values
  937|       |            for CABAC context representing MB not available */
  938|  24.4k|            ctxt_inc_mb_info_t *p_DefCtxt = ps_dec->p_ctxt_inc_mb_map - 1;
  939|  24.4k|            UWORD8 *pu1_temp;
  940|  24.4k|            WORD8 i;
  941|  24.4k|            p_DefCtxt->u1_mb_type = CAB_SKIP;
  ------------------
  |  |  402|  24.4k|#define CAB_SKIP          0x10 /* 0001 0000 */
  ------------------
  942|       |
  943|  24.4k|            p_DefCtxt->u1_cbp = 0x0f;
  944|  24.4k|            p_DefCtxt->u1_intra_chroma_pred_mode = 0;
  945|       |
  946|  24.4k|            p_DefCtxt->u1_yuv_dc_csbp = 0x7;
  947|       |
  948|  24.4k|            p_DefCtxt->u1_transform8x8_ctxt = 0;
  949|       |
  950|  24.4k|            pu1_temp = (UWORD8 *) p_DefCtxt->i1_ref_idx;
  951|   122k|            for(i = 0; i < 4; i++, pu1_temp++) (*pu1_temp) = 0;
  ------------------
  |  Branch (951:24): [True: 97.7k, False: 24.4k]
  ------------------
  952|  24.4k|            pu1_temp = (UWORD8 *) p_DefCtxt->u1_mv;
  953|   415k|            for(i = 0; i < 16; i++, pu1_temp++) (*pu1_temp) = 0;
  ------------------
  |  Branch (953:24): [True: 391k, False: 24.4k]
  ------------------
  954|  24.4k|            ps_dec->ps_def_ctxt_mb_info = p_DefCtxt;
  955|  24.4k|        }
  956|  24.4k|    }
  957|       |    /* reset DBP commands read u4_flag */
  958|   134k|    ps_dec->ps_dpb_cmds->u1_dpb_commands_read = 0;
  959|       |
  960|   134k|    return OK;
  ------------------
  |  |  114|   134k|#define OK        0
  ------------------
  961|   134k|}

isvcd_parse_vui_ext_parametres:
   68|  1.17k|{
   69|  1.17k|    UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
   70|  1.17k|    UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
   71|  1.17k|    WORD32 ret;
   72|  1.17k|    UWORD32 u4_i;
   73|       |
   74|  1.17k|    ps_svc_vui_ext->u4_vui_ext_num_entries_minus1 = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
   75|  1.17k|    if(ps_svc_vui_ext->u4_vui_ext_num_entries_minus1 > 1023)
  ------------------
  |  Branch (75:8): [True: 110, False: 1.06k]
  ------------------
   76|    110|    {
   77|    110|        return ERROR_INV_SPS_PPS_T;
   78|    110|    }
   79|       |
   80|  19.5k|    for(u4_i = 0; u4_i <= ps_svc_vui_ext->u4_vui_ext_num_entries_minus1; u4_i++)
  ------------------
  |  Branch (80:19): [True: 18.5k, False: 930]
  ------------------
   81|  18.5k|    {
   82|  18.5k|        ps_svc_vui_ext->u1_vui_ext_dependency_id[u4_i] = ih264d_get_bits_h264(ps_bitstrm, 3);
   83|  18.5k|        ps_svc_vui_ext->u1_vui_ext_quality_id[u4_i] = ih264d_get_bits_h264(ps_bitstrm, 4);
   84|  18.5k|        ps_svc_vui_ext->u1_vui_ext_temporal_id[u4_i] = ih264d_get_bits_h264(ps_bitstrm, 3);
   85|  18.5k|        ps_svc_vui_ext->u1_vui_ext_timing_info_present_flag[u4_i] = ih264d_get_bit_h264(ps_bitstrm);
   86|       |
   87|  18.5k|        if(1 == ps_svc_vui_ext->u1_vui_ext_timing_info_present_flag[u4_i])
  ------------------
  |  Branch (87:12): [True: 2.66k, False: 15.9k]
  ------------------
   88|  2.66k|        {
   89|  2.66k|            ps_svc_vui_ext->u4_vui_ext_num_units_in_tick[u4_i] =
   90|  2.66k|                ih264d_get_bits_h264(ps_bitstrm, 32);
   91|  2.66k|            ps_svc_vui_ext->u4_vui_ext_time_scale[u4_i] = ih264d_get_bits_h264(ps_bitstrm, 32);
   92|  2.66k|            ps_svc_vui_ext->u1_vui_ext_fixed_frame_rate_flag[u4_i] =
   93|  2.66k|                ih264d_get_bit_h264(ps_bitstrm);
   94|  2.66k|        }
   95|       |
   96|  18.5k|        ps_svc_vui_ext->u1_vui_ext_nal_hrd_params_present_flag[u4_i] =
   97|  18.5k|            ih264d_get_bit_h264(ps_bitstrm);
   98|  18.5k|        if(ps_svc_vui_ext->u1_vui_ext_nal_hrd_params_present_flag[u4_i])
  ------------------
  |  Branch (98:12): [True: 2.54k, False: 16.0k]
  ------------------
   99|  2.54k|        {
  100|  2.54k|            ret = ih264d_parse_hrd_parametres(&ps_svc_vui_ext->s_nal_hrd[u4_i], ps_bitstrm);
  101|  2.54k|            if(ret != OK) return ret;
  ------------------
  |  |  114|  2.54k|#define OK        0
  ------------------
  |  Branch (101:16): [True: 69, False: 2.48k]
  ------------------
  102|  2.54k|        }
  103|  18.5k|        ps_svc_vui_ext->u1_vui_ext_vcl_hrd_params_present_flag[u4_i] =
  104|  18.5k|            ih264d_get_bit_h264(ps_bitstrm);
  105|  18.5k|        if(ps_svc_vui_ext->u1_vui_ext_vcl_hrd_params_present_flag[u4_i])
  ------------------
  |  Branch (105:12): [True: 2.54k, False: 15.9k]
  ------------------
  106|  2.54k|        {
  107|  2.54k|            ret = ih264d_parse_hrd_parametres(&ps_svc_vui_ext->s_vcl_hrd[u4_i], ps_bitstrm);
  108|  2.54k|            if(ret != OK) return ret;
  ------------------
  |  |  114|  2.54k|#define OK        0
  ------------------
  |  Branch (108:16): [True: 67, False: 2.47k]
  ------------------
  109|  2.54k|        }
  110|  18.4k|        if(ps_svc_vui_ext->u1_vui_ext_nal_hrd_params_present_flag[u4_i] ||
  ------------------
  |  Branch (110:12): [True: 2.42k, False: 16.0k]
  ------------------
  111|  16.0k|           ps_svc_vui_ext->u1_vui_ext_vcl_hrd_params_present_flag[u4_i])
  ------------------
  |  Branch (111:12): [True: 711, False: 15.3k]
  ------------------
  112|  3.13k|        {
  113|  3.13k|            ps_svc_vui_ext->u1_vui_ext_low_delay_hrd_flag[u4_i] = ih264d_get_bit_h264(ps_bitstrm);
  114|  3.13k|        }
  115|  18.4k|        ps_svc_vui_ext->u1_vui_ext_pic_struct_present_flag[u4_i] = ih264d_get_bit_h264(ps_bitstrm);
  116|  18.4k|    }
  117|    930|    return OK;
  ------------------
  |  |  114|    930|#define OK        0
  ------------------
  118|  1.06k|}

ih264d_init_arch:
   85|  83.4k|{
   86|  83.4k|#ifdef DEFAULT_ARCH
   87|  83.4k|#if DEFAULT_ARCH == D_ARCH_X86_SSE42
   88|  83.4k|    ps_codec->e_processor_arch = ARCH_X86_SSE42;
   89|       |#elif DEFAULT_ARCH == D_ARCH_X86_SSSE3
   90|       |    ps_codec->e_processor_arch = ARCH_X86_SSSE3;
   91|       |#elif DEFAULT_ARCH == D_ARCH_X86_AVX2
   92|       |    ps_codec->e_processor_arch = ARCH_X86_AVX2;
   93|       |#else
   94|       |    ps_codec->e_processor_arch = ARCH_X86_GENERIC;
   95|       |#endif
   96|       |#else
   97|       |    ps_codec->e_processor_arch = ARCH_X86_SSE42;
   98|       |#endif
   99|       |
  100|  83.4k|}

ih264d_init_function_ptr_sse42:
   83|   119k|{
   84|   119k|    ps_codec->pf_default_weighted_pred_luma = ih264_default_weighted_pred_luma_sse42;
   85|   119k|    ps_codec->pf_default_weighted_pred_chroma = ih264_default_weighted_pred_chroma_sse42;
   86|   119k|    ps_codec->pf_weighted_pred_luma = ih264_weighted_pred_luma_sse42;
   87|   119k|    ps_codec->pf_weighted_pred_chroma = ih264_weighted_pred_chroma_sse42;
   88|   119k|    ps_codec->pf_weighted_bi_pred_luma = ih264_weighted_bi_pred_luma_sse42;
   89|   119k|    ps_codec->pf_weighted_bi_pred_chroma = ih264_weighted_bi_pred_chroma_sse42;
   90|       |
   91|   119k|    ps_codec->pf_iquant_itrans_recon_luma_4x4 = ih264_iquant_itrans_recon_4x4_sse42;
   92|   119k|    ps_codec->pf_iquant_itrans_recon_chroma_4x4 = ih264_iquant_itrans_recon_chroma_4x4_sse42;
   93|   119k|    ps_codec->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_sse42;
   94|   119k|    return;
   95|   119k|}

ih264d_init_function_ptr_ssse3:
   83|   126k|{
   84|       |
   85|       |
   86|       |
   87|       |    /* Init function pointers for intra pred leaf level functions luma
   88|       |     * Intra 16x16 */
   89|   126k|    ps_codec->apf_intra_pred_luma_16x16[0] = ih264_intra_pred_luma_16x16_mode_vert_ssse3;
   90|   126k|    ps_codec->apf_intra_pred_luma_16x16[1] = ih264_intra_pred_luma_16x16_mode_horz_ssse3;
   91|   126k|    ps_codec->apf_intra_pred_luma_16x16[2] = ih264_intra_pred_luma_16x16_mode_dc_ssse3;
   92|   126k|    ps_codec->apf_intra_pred_luma_16x16[3] = ih264_intra_pred_luma_16x16_mode_plane_ssse3;
   93|       |
   94|       |    /* Init function pointers for intra pred leaf level functions luma
   95|       |     * Intra 4x4 */
   96|   126k|    ps_codec->apf_intra_pred_luma_4x4[0] = ih264_intra_pred_luma_4x4_mode_vert_ssse3;
   97|   126k|    ps_codec->apf_intra_pred_luma_4x4[1] = ih264_intra_pred_luma_4x4_mode_horz_ssse3;
   98|   126k|    ps_codec->apf_intra_pred_luma_4x4[2] = ih264_intra_pred_luma_4x4_mode_dc_ssse3;
   99|   126k|    ps_codec->apf_intra_pred_luma_4x4[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3;
  100|   126k|    ps_codec->apf_intra_pred_luma_4x4[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3;
  101|   126k|    ps_codec->apf_intra_pred_luma_4x4[5] = ih264_intra_pred_luma_4x4_mode_vert_r_ssse3;
  102|   126k|    ps_codec->apf_intra_pred_luma_4x4[6] = ih264_intra_pred_luma_4x4_mode_horz_d_ssse3;
  103|   126k|    ps_codec->apf_intra_pred_luma_4x4[7] = ih264_intra_pred_luma_4x4_mode_vert_l_ssse3;
  104|   126k|    ps_codec->apf_intra_pred_luma_4x4[8] = ih264_intra_pred_luma_4x4_mode_horz_u_ssse3;
  105|       |
  106|       |    /* Init function pointers for intra pred leaf level functions luma
  107|       |     * Intra 8x8 */
  108|   126k|    ps_codec->apf_intra_pred_luma_8x8[0] = ih264_intra_pred_luma_8x8_mode_vert_ssse3;
  109|   126k|    ps_codec->apf_intra_pred_luma_8x8[1] = ih264_intra_pred_luma_8x8_mode_horz_ssse3;
  110|   126k|    ps_codec->apf_intra_pred_luma_8x8[2] = ih264_intra_pred_luma_8x8_mode_dc_ssse3;
  111|   126k|    ps_codec->apf_intra_pred_luma_8x8[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3;
  112|   126k|    ps_codec->apf_intra_pred_luma_8x8[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3;
  113|   126k|    ps_codec->apf_intra_pred_luma_8x8[5] = ih264_intra_pred_luma_8x8_mode_vert_r_ssse3;
  114|   126k|    ps_codec->apf_intra_pred_luma_8x8[6] = ih264_intra_pred_luma_8x8_mode_horz_d_ssse3;
  115|   126k|    ps_codec->apf_intra_pred_luma_8x8[7] = ih264_intra_pred_luma_8x8_mode_vert_l_ssse3;
  116|   126k|    ps_codec->apf_intra_pred_luma_8x8[8] = ih264_intra_pred_luma_8x8_mode_horz_u_ssse3;
  117|       |
  118|   126k|    ps_codec->pf_intra_pred_ref_filtering = ih264_intra_pred_luma_8x8_mode_ref_filtering;
  119|       |
  120|       |    /* Init function pointers for intra pred leaf level functions chroma
  121|       |     * Intra 8x8 */
  122|   126k|    ps_codec->apf_intra_pred_chroma[0] = ih264_intra_pred_chroma_8x8_mode_vert_ssse3;
  123|   126k|    ps_codec->apf_intra_pred_chroma[1] = ih264_intra_pred_chroma_8x8_mode_horz_ssse3;
  124|   126k|    ps_codec->apf_intra_pred_chroma[2] = ih264_intra_pred_chroma_8x8_mode_dc;
  125|   126k|    ps_codec->apf_intra_pred_chroma[3] = ih264_intra_pred_chroma_8x8_mode_plane_ssse3;
  126|       |
  127|       |
  128|   126k|    ps_codec->pf_pad_left_luma = ih264_pad_left_luma_ssse3;
  129|   126k|    ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_ssse3;
  130|   126k|    ps_codec->pf_pad_right_luma = ih264_pad_right_luma_ssse3;
  131|   126k|    ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_ssse3;
  132|       |
  133|       |
  134|   126k|    ps_codec->pf_iquant_itrans_recon_luma_4x4 = ih264_iquant_itrans_recon_4x4_ssse3;
  135|   126k|    ps_codec->pf_iquant_itrans_recon_luma_4x4_dc = ih264_iquant_itrans_recon_4x4_dc_ssse3;
  136|   126k|    ps_codec->pf_iquant_itrans_recon_luma_8x8 = ih264_iquant_itrans_recon_8x8_ssse3;
  137|   126k|    ps_codec->pf_iquant_itrans_recon_luma_8x8_dc = ih264_iquant_itrans_recon_8x8_dc_ssse3;
  138|       |
  139|   126k|    ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc = ih264_iquant_itrans_recon_chroma_4x4_dc_ssse3;
  140|       |
  141|       |    /* Init fn ptr luma deblocking */
  142|   126k|    ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_ssse3;
  143|   126k|    ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_ssse3;
  144|   126k|    ps_codec->pf_deblk_luma_vert_bs4_mbaff = ih264_deblk_luma_vert_bs4_mbaff_ssse3;
  145|   126k|    ps_codec->pf_deblk_luma_vert_bslt4_mbaff = ih264_deblk_luma_vert_bslt4_mbaff_ssse3;
  146|       |
  147|   126k|    ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_ssse3;
  148|   126k|    ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_ssse3;
  149|       |
  150|       |    /* Init fn ptr chroma deblocking */
  151|   126k|    ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_ssse3;
  152|   126k|    ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_ssse3;
  153|   126k|    ps_codec->pf_deblk_chroma_vert_bs4_mbaff = ih264_deblk_chroma_vert_bs4_mbaff_ssse3;
  154|   126k|    ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_ssse3;
  155|   126k|    ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_ssse3;
  156|   126k|    ps_codec->pf_deblk_chroma_vert_bslt4_mbaff = ih264_deblk_chroma_vert_bslt4_mbaff_ssse3;
  157|       |
  158|       |    /* Inter pred leaf level functions */
  159|       |
  160|   126k|    ps_codec->apf_inter_pred_luma[0] = ih264_inter_pred_luma_copy_ssse3;
  161|   126k|    ps_codec->apf_inter_pred_luma[1] = ih264_inter_pred_luma_horz_qpel_ssse3;
  162|   126k|    ps_codec->apf_inter_pred_luma[2] = ih264_inter_pred_luma_horz_ssse3;
  163|   126k|    ps_codec->apf_inter_pred_luma[3] = ih264_inter_pred_luma_horz_qpel_ssse3;
  164|   126k|    ps_codec->apf_inter_pred_luma[4] = ih264_inter_pred_luma_vert_qpel_ssse3;
  165|   126k|    ps_codec->apf_inter_pred_luma[5] = ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3;
  166|   126k|    ps_codec->apf_inter_pred_luma[6] = ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3;
  167|   126k|    ps_codec->apf_inter_pred_luma[7] = ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3;
  168|   126k|    ps_codec->apf_inter_pred_luma[8] = ih264_inter_pred_luma_vert_ssse3;
  169|   126k|    ps_codec->apf_inter_pred_luma[9] = ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3;
  170|   126k|    ps_codec->apf_inter_pred_luma[10] = ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3;
  171|   126k|    ps_codec->apf_inter_pred_luma[11] = ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3;
  172|   126k|    ps_codec->apf_inter_pred_luma[12] = ih264_inter_pred_luma_vert_qpel_ssse3;
  173|   126k|    ps_codec->apf_inter_pred_luma[13] = ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3;
  174|   126k|    ps_codec->apf_inter_pred_luma[14] = ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3;
  175|   126k|    ps_codec->apf_inter_pred_luma[15] = ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3;
  176|       |
  177|   126k|    ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma_ssse3;
  178|       |
  179|       |
  180|   126k|    return;
  181|   126k|}

isvcd_init_function_ptr:
   69|   151k|{
   70|   151k|    isvcd_init_function_ptr_generic(ps_svc_lyr_dec);
   71|   151k|    switch(ps_svc_lyr_dec->s_dec.e_processor_arch)
   72|   151k|    {
   73|  25.0k|        case ARCH_X86_GENERIC:
  ------------------
  |  Branch (73:9): [True: 25.0k, False: 126k]
  ------------------
   74|  25.0k|            isvcd_init_function_ptr_generic(ps_svc_lyr_dec);
   75|  25.0k|            break;
   76|  7.11k|        case ARCH_X86_SSSE3:
  ------------------
  |  Branch (76:9): [True: 7.11k, False: 144k]
  ------------------
   77|  7.11k|            ih264d_init_function_ptr_ssse3(&ps_svc_lyr_dec->s_dec);
   78|  7.11k|            break;
   79|  89.9k|        case ARCH_X86_SSE42:
  ------------------
  |  Branch (79:9): [True: 89.9k, False: 61.3k]
  ------------------
   80|   119k|        default:
  ------------------
  |  Branch (80:9): [True: 29.1k, False: 122k]
  ------------------
   81|   119k|            ih264d_init_function_ptr_ssse3(&ps_svc_lyr_dec->s_dec);
   82|   119k|            isvcd_init_function_ptr_sse42(ps_svc_lyr_dec);
   83|   119k|            break;
   84|   151k|    }
   85|   151k|}

isvcd_init_function_ptr_sse42:
   73|   119k|{
   74|   119k|    dec_struct_t *ps_codec = &ps_svc_lyr_dec->s_dec;
   75|   119k|    residual_sampling_ctxt_t *ps_resd_samp_ctx;
   76|   119k|    intra_sampling_ctxt_t *ps_intra_samp_ctxt;
   77|       |    /* call default init func prt sse42 */
   78|   119k|    ih264d_init_function_ptr_sse42(ps_codec);
   79|       |
   80|   119k|    ps_resd_samp_ctx = (residual_sampling_ctxt_t *) ps_svc_lyr_dec->pv_residual_sample_ctxt;
   81|   119k|    ps_intra_samp_ctxt = (intra_sampling_ctxt_t *) ps_svc_lyr_dec->pv_intra_sample_ctxt;
   82|       |
   83|   119k|    ps_svc_lyr_dec->pf_iquant_itrans_residual_recon_luma_4x4 =
   84|   119k|        isvcd_iquant_itrans_residual_recon_4x4_sse42;
   85|   119k|    ps_svc_lyr_dec->pf_iquant_itrans_residual_recon_luma_4x4_dc =
   86|   119k|        isvcd_iquant_itrans_residual_recon_4x4_dc_sse42;
   87|   119k|    ps_svc_lyr_dec->pf_iquant_itrans_residual_recon_luma_8x8 =
   88|   119k|        isvcd_iquant_itrans_residual_recon_8x8_sse42;
   89|   119k|    ps_svc_lyr_dec->pf_iquant_itrans_residual_recon_luma_8x8_dc =
   90|   119k|        isvcd_iquant_itrans_residual_recon_8x8_dc_sse42;
   91|   119k|    ps_svc_lyr_dec->pf_iquant_itrans_residual_recon_chroma_4x4 =
   92|   119k|        isvcd_iquant_itrans_residual_recon_chroma_4x4_sse42;
   93|   119k|    ps_svc_lyr_dec->pf_iquant_itrans_residual_recon_chroma_4x4_dc =
   94|   119k|        isvcd_iquant_itrans_residual_recon_chroma_4x4_dc_sse42;
   95|       |
   96|   119k|    ps_svc_lyr_dec->pf_iquant_itrans_residual_luma_4x4 = isvcd_iquant_itrans_residual_4x4_sse42;
   97|   119k|    ps_svc_lyr_dec->pf_iquant_itrans_residual_luma_4x4_dc =
   98|   119k|        isvcd_iquant_itrans_residual_4x4_dc_sse42;
   99|   119k|    ps_svc_lyr_dec->pf_iquant_itrans_residual_luma_8x8 = isvcd_iquant_itrans_residual_8x8_sse42;
  100|   119k|    ps_svc_lyr_dec->pf_iquant_itrans_residual_luma_8x8_dc =
  101|   119k|        isvcd_iquant_itrans_residual_8x8_dc_sse42;
  102|   119k|    ps_svc_lyr_dec->pf_iquant_itrans_residual_chroma_4x4 =
  103|   119k|        isvcd_iquant_itrans_residual_chroma_4x4_sse42;
  104|   119k|    ps_svc_lyr_dec->pf_iquant_itrans_residual_chroma_4x4_dc =
  105|   119k|        isvcd_iquant_itrans_residual_chroma_4x4_dc_sse42;
  106|       |
  107|   119k|    ps_svc_lyr_dec->pf_pred_residual_recon_luma_4x4 = isvcd_pred_residual_recon_4x4_sse42;
  108|   119k|    ps_svc_lyr_dec->pf_pred_residual_recon_luma_8x8 = isvcd_pred_residual_recon_8x8_sse42;
  109|   119k|    ps_svc_lyr_dec->pf_pred_residual_recon_luma_16x16 = isvcd_pred_residual_recon_16x16_sse42;
  110|   119k|    ps_svc_lyr_dec->pf_pred_residual_recon_chroma_4x4 = isvcd_pred_residual_recon_chroma_4x4_sse42;
  111|   119k|    ps_svc_lyr_dec->pf_pred_residual_recon_chroma_8x8 = isvcd_pred_residual_recon_chroma_8x8_sse42;
  112|       |
  113|   119k|    ps_svc_lyr_dec->pf_residual_luma_4x4 = isvcd_residual_luma_4x4_sse42;
  114|   119k|    ps_svc_lyr_dec->pf_residual_luma_8x8 = isvcd_residual_luma_8x8_sse42;
  115|   119k|    ps_svc_lyr_dec->pf_residual_luma_16x16 = isvcd_residual_luma_16x16_sse42;
  116|   119k|    ps_svc_lyr_dec->pf_residual_chroma_cb_cr_8x8 = isvcd_residual_chroma_cb_cr_8x8_sse42;
  117|       |
  118|   119k|    ps_svc_lyr_dec->pf_iquant_itrans_luma_4x4 = isvcd_iquant_itrans_4x4_sse42;
  119|   119k|    ps_svc_lyr_dec->pf_iquant_itrans_luma_4x4_dc = isvcd_iquant_itrans_4x4_dc_sse42;
  120|   119k|    ps_svc_lyr_dec->pf_iquant_itrans_luma_8x8 = isvcd_iquant_itrans_8x8_sse42;
  121|   119k|    ps_svc_lyr_dec->pf_iquant_itrans_luma_8x8_dc = isvcd_iquant_itrans_8x8_dc_sse42;
  122|   119k|    ps_svc_lyr_dec->pf_iquant_itrans_chroma_4x4 = isvcd_iquant_itrans_chroma_4x4_sse42;
  123|   119k|    ps_svc_lyr_dec->pf_iquant_itrans_chroma_4x4_dc = isvcd_iquant_itrans_chroma_4x4_dc_sse42;
  124|       |
  125|   119k|    ps_intra_samp_ctxt->pf_interpolate_base_luma_dyadic = isvcd_interpolate_base_luma_dyadic_sse42;
  126|   119k|    ps_intra_samp_ctxt->pf_interpolate_intra_base = isvcd_interpolate_intra_base_sse42;
  127|       |
  128|   119k|    ps_intra_samp_ctxt->pf_vert_chroma_interpol[0] = isvcd_vert_interpol_chroma_dyadic_1_sse42;
  129|   119k|    ps_intra_samp_ctxt->pf_vert_chroma_interpol[1] = isvcd_vert_interpol_chroma_dyadic_2_sse42;
  130|   119k|    ps_intra_samp_ctxt->pf_vert_chroma_interpol[2] = isvcd_vert_interpol_chroma_dyadic_3_sse42;
  131|       |
  132|   119k|    ps_intra_samp_ctxt->pf_horz_chroma_interpol[0] = isvcd_horz_interpol_chroma_dyadic_1_sse42;
  133|   119k|    ps_intra_samp_ctxt->pf_horz_chroma_interpol[1] = isvcd_horz_interpol_chroma_dyadic_2_sse42;
  134|       |
  135|   119k|    ps_resd_samp_ctx->pf_residual_luma_dyadic = isvcd_residual_luma_dyadic_sse42;
  136|   119k|    ps_resd_samp_ctx->pf_interpolate_residual = isvcd_interpolate_residual_sse42;
  137|   119k|    ps_resd_samp_ctx->pf_residual_reflayer_const_non_boundary_mb =
  138|   119k|        isvcd_residual_reflayer_const_non_boundary_mb_sse42;
  139|       |
  140|   119k|    return;
  141|   119k|}

isvcd_interpolate_base_luma_dyadic_sse42:
   79|  25.2k|{
   80|  25.2k|    WORD32 i4_x, i4_y;
   81|  25.2k|    WORD32 i4_filt_stride, i4_src_stride;
   82|  25.2k|    UWORD8 *pu1_inp, *pu1_out;
   83|  25.2k|    WORD16 *pi2_tmp;
   84|       |
   85|  25.2k|    __m128i i4_samp_16x8b_0, i4_samp_16x8b_1, i4_samp_16x8b_2, i4_samp_16x8b_3;
   86|  25.2k|    __m128i i4_samp_8x16b_0, i4_samp_8x16b_1, i4_samp_8x16b_2, i4_samp_8x16b_3;
   87|  25.2k|    __m128i i4_res_8x16b_r1_1, i4_res_8x16b_r1_2, i4_res_8x16b_r1_3;
   88|  25.2k|    __m128i i4_res_8x16b_r2_1, i4_res_8x16b_r2_2, i4_res_8x16b_r2_3;
   89|       |
   90|       |    /* Filter coefficient values for phase 4 */
   91|  25.2k|    __m128i i4_coeff_8x16b_0 = _mm_set1_epi16(-3);
   92|  25.2k|    __m128i i4_coeff_8x16b_1 = _mm_set1_epi16(28);
   93|  25.2k|    i4_filt_stride = 12;
   94|  25.2k|    i4_src_stride = DYADIC_REF_W_Y;
  ------------------
  |  |   56|  25.2k|#define DYADIC_REF_W_Y 20
  ------------------
   95|       |
   96|  25.2k|    pu1_inp = pu1_inp_buf;
   97|  25.2k|    pi2_tmp = pi2_tmp_filt_buf;
   98|  25.2k|    pu1_out = pu1_out_buf;
   99|       |
  100|       |    /* Vertical interpolation */
  101|       |    /*First 64 bit */
  102|  50.5k|    for(i4_x = 0; i4_x < 1; i4_x++)
  ------------------
  |  Branch (102:19): [True: 25.2k, False: 25.2k]
  ------------------
  103|  25.2k|    {
  104|       |        /* y = 0, y_phase = 12 */
  105|  25.2k|        i4_samp_16x8b_0 = _mm_loadl_epi64((__m128i *) (pu1_inp));
  106|  25.2k|        i4_samp_16x8b_1 = _mm_loadl_epi64((__m128i *) (pu1_inp + i4_src_stride));
  107|  25.2k|        i4_samp_16x8b_2 = _mm_loadl_epi64((__m128i *) (pu1_inp + (i4_src_stride << 1)));
  108|  25.2k|        i4_samp_16x8b_3 =
  109|  25.2k|            _mm_loadl_epi64((__m128i *) (pu1_inp + (i4_src_stride << 1) + i4_src_stride));
  110|  25.2k|        pu1_inp += (i4_src_stride << 2);
  111|  25.2k|        i4_samp_8x16b_0 = _mm_cvtepu8_epi16(i4_samp_16x8b_0);
  112|  25.2k|        i4_samp_8x16b_1 = _mm_cvtepu8_epi16(i4_samp_16x8b_1);
  113|  25.2k|        i4_samp_8x16b_2 = _mm_cvtepu8_epi16(i4_samp_16x8b_2);
  114|  25.2k|        i4_samp_8x16b_3 = _mm_cvtepu8_epi16(i4_samp_16x8b_3);
  115|       |
  116|       |        /* since y_phase 12 for y = 0 */
  117|       |        /*Multiply by 8 =>  left shift by 3*/
  118|  25.2k|        i4_res_8x16b_r1_1 = _mm_slli_epi16(i4_samp_8x16b_1, 3);
  119|  25.2k|        i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_2, i4_coeff_8x16b_1);
  120|  25.2k|        i4_res_8x16b_r1_3 = _mm_mullo_epi16(i4_samp_8x16b_3, i4_coeff_8x16b_0);
  121|       |
  122|  25.2k|        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
  123|  25.2k|        i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_0);
  124|  25.2k|        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
  125|       |
  126|  25.2k|        _mm_storeu_si128((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
  127|  25.2k|        pi2_tmp += i4_filt_stride;
  128|       |
  129|  25.2k|        i4_samp_8x16b_0 = i4_samp_8x16b_1;
  130|  25.2k|        i4_samp_8x16b_1 = i4_samp_8x16b_2;
  131|  25.2k|        i4_samp_8x16b_2 = i4_samp_8x16b_3;
  132|  25.2k|        i4_samp_8x16b_3 = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *) (pu1_inp)));
  133|       |
  134|       |        /* y_phase is 4 for odd values of y */
  135|       |        /* and 12 for even values of y        */
  136|       |        /*Multiply by 8 =>  left shift by 3*/
  137|       |
  138|  25.2k|        i4_res_8x16b_r1_1 = _mm_mullo_epi16(i4_samp_8x16b_0, i4_coeff_8x16b_0);
  139|  25.2k|        i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_1, i4_coeff_8x16b_1);
  140|  25.2k|        i4_res_8x16b_r1_3 = _mm_slli_epi16(i4_samp_8x16b_2, 3);
  141|       |
  142|  25.2k|        i4_res_8x16b_r2_1 = _mm_slli_epi16(i4_samp_8x16b_1, 3);
  143|  25.2k|        i4_res_8x16b_r2_2 = _mm_mullo_epi16(i4_samp_8x16b_2, i4_coeff_8x16b_1);
  144|  25.2k|        i4_res_8x16b_r2_3 = _mm_mullo_epi16(i4_samp_8x16b_3, i4_coeff_8x16b_0);
  145|       |
  146|  25.2k|        i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_3);
  147|  25.2k|        i4_res_8x16b_r2_3 = _mm_subs_epi16(i4_res_8x16b_r2_3, i4_samp_8x16b_0);
  148|       |
  149|  25.2k|        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
  150|  25.2k|        i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_2);
  151|       |
  152|  25.2k|        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
  153|  25.2k|        i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_3);
  154|       |
  155|       |        /* Storing the results */
  156|  25.2k|        _mm_storeu_si128((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
  157|  25.2k|        _mm_storeu_si128((__m128i *) (pi2_tmp + i4_filt_stride), i4_res_8x16b_r2_1);
  158|  25.2k|        pi2_tmp += (i4_filt_stride << 1);
  159|  25.2k|        pu1_inp += i4_src_stride;
  160|       |
  161|  25.2k|        i4_samp_8x16b_0 = i4_samp_8x16b_1;
  162|  25.2k|        i4_samp_8x16b_1 = i4_samp_8x16b_2;
  163|  25.2k|        i4_samp_8x16b_2 = i4_samp_8x16b_3;
  164|  25.2k|        i4_samp_8x16b_3 = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *) (pu1_inp)));
  165|       |
  166|       |        /* y_phase is 4 for odd values of y */
  167|       |        /* and 12 for even values of y        */
  168|       |        /*Multiply by 8 =>  left shift by 3*/
  169|       |
  170|  25.2k|        i4_res_8x16b_r1_1 = _mm_mullo_epi16(i4_samp_8x16b_0, i4_coeff_8x16b_0);
  171|  25.2k|        i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_1, i4_coeff_8x16b_1);
  172|  25.2k|        i4_res_8x16b_r1_3 = _mm_slli_epi16(i4_samp_8x16b_2, 3);
  173|       |
  174|  25.2k|        i4_res_8x16b_r2_1 = _mm_slli_epi16(i4_samp_8x16b_1, 3);
  175|  25.2k|        i4_res_8x16b_r2_2 = _mm_mullo_epi16(i4_samp_8x16b_2, i4_coeff_8x16b_1);
  176|  25.2k|        i4_res_8x16b_r2_3 = _mm_mullo_epi16(i4_samp_8x16b_3, i4_coeff_8x16b_0);
  177|       |
  178|  25.2k|        i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_3);
  179|  25.2k|        i4_res_8x16b_r2_3 = _mm_subs_epi16(i4_res_8x16b_r2_3, i4_samp_8x16b_0);
  180|       |
  181|  25.2k|        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
  182|  25.2k|        i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_2);
  183|       |
  184|  25.2k|        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
  185|  25.2k|        i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_3);
  186|       |
  187|       |        /* Storing the results */
  188|  25.2k|        _mm_storeu_si128((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
  189|  25.2k|        _mm_storeu_si128((__m128i *) (pi2_tmp + i4_filt_stride), i4_res_8x16b_r2_1);
  190|  25.2k|        pi2_tmp += (i4_filt_stride << 1);
  191|  25.2k|        pu1_inp += i4_src_stride;
  192|       |
  193|  25.2k|        i4_samp_8x16b_0 = i4_samp_8x16b_1;
  194|  25.2k|        i4_samp_8x16b_1 = i4_samp_8x16b_2;
  195|  25.2k|        i4_samp_8x16b_2 = i4_samp_8x16b_3;
  196|  25.2k|        i4_samp_8x16b_3 = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *) (pu1_inp)));
  197|       |
  198|       |        /* y_phase is 4 for odd values of y */
  199|       |        /* and 12 for even values of y        */
  200|       |        /*Multiply by 8 =>  left shift by 3*/
  201|       |
  202|  25.2k|        i4_res_8x16b_r1_1 = _mm_mullo_epi16(i4_samp_8x16b_0, i4_coeff_8x16b_0);
  203|  25.2k|        i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_1, i4_coeff_8x16b_1);
  204|  25.2k|        i4_res_8x16b_r1_3 = _mm_slli_epi16(i4_samp_8x16b_2, 3);
  205|       |
  206|  25.2k|        i4_res_8x16b_r2_1 = _mm_slli_epi16(i4_samp_8x16b_1, 3);
  207|  25.2k|        i4_res_8x16b_r2_2 = _mm_mullo_epi16(i4_samp_8x16b_2, i4_coeff_8x16b_1);
  208|  25.2k|        i4_res_8x16b_r2_3 = _mm_mullo_epi16(i4_samp_8x16b_3, i4_coeff_8x16b_0);
  209|       |
  210|  25.2k|        i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_3);
  211|  25.2k|        i4_res_8x16b_r2_3 = _mm_subs_epi16(i4_res_8x16b_r2_3, i4_samp_8x16b_0);
  212|       |
  213|  25.2k|        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
  214|  25.2k|        i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_2);
  215|       |
  216|  25.2k|        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
  217|  25.2k|        i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_3);
  218|       |
  219|       |        /* Storing the results */
  220|  25.2k|        _mm_storeu_si128((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
  221|  25.2k|        _mm_storeu_si128((__m128i *) (pi2_tmp + i4_filt_stride), i4_res_8x16b_r2_1);
  222|  25.2k|        pi2_tmp += (i4_filt_stride << 1);
  223|  25.2k|        pu1_inp += i4_src_stride;
  224|       |
  225|  25.2k|        i4_samp_8x16b_0 = i4_samp_8x16b_1;
  226|  25.2k|        i4_samp_8x16b_1 = i4_samp_8x16b_2;
  227|  25.2k|        i4_samp_8x16b_2 = i4_samp_8x16b_3;
  228|  25.2k|        i4_samp_8x16b_3 = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *) (pu1_inp)));
  229|       |
  230|       |        /* y_phase is 4 for odd values of y */
  231|       |        /* and 12 for even values of y        */
  232|       |        /*Multiply by 8 =>  left shift by 3*/
  233|  25.2k|        i4_res_8x16b_r1_1 = _mm_mullo_epi16(i4_samp_8x16b_0, i4_coeff_8x16b_0);
  234|  25.2k|        i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_1, i4_coeff_8x16b_1);
  235|  25.2k|        i4_res_8x16b_r1_3 = _mm_slli_epi16(i4_samp_8x16b_2, 3);
  236|       |
  237|  25.2k|        i4_res_8x16b_r2_1 = _mm_slli_epi16(i4_samp_8x16b_1, 3);
  238|  25.2k|        i4_res_8x16b_r2_2 = _mm_mullo_epi16(i4_samp_8x16b_2, i4_coeff_8x16b_1);
  239|  25.2k|        i4_res_8x16b_r2_3 = _mm_mullo_epi16(i4_samp_8x16b_3, i4_coeff_8x16b_0);
  240|       |
  241|  25.2k|        i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_3);
  242|  25.2k|        i4_res_8x16b_r2_3 = _mm_subs_epi16(i4_res_8x16b_r2_3, i4_samp_8x16b_0);
  243|       |
  244|  25.2k|        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
  245|  25.2k|        i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_2);
  246|       |
  247|  25.2k|        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
  248|  25.2k|        i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_3);
  249|       |
  250|       |        /* Storing the results */
  251|  25.2k|        _mm_storeu_si128((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
  252|  25.2k|        _mm_storeu_si128((__m128i *) (pi2_tmp + i4_filt_stride), i4_res_8x16b_r2_1);
  253|  25.2k|        pi2_tmp += (i4_filt_stride << 1);
  254|  25.2k|        pu1_inp += i4_src_stride;
  255|       |
  256|  25.2k|        i4_samp_8x16b_0 = i4_samp_8x16b_1;
  257|  25.2k|        i4_samp_8x16b_1 = i4_samp_8x16b_2;
  258|  25.2k|        i4_samp_8x16b_2 = i4_samp_8x16b_3;
  259|  25.2k|        i4_samp_8x16b_3 = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *) (pu1_inp)));
  260|       |        /* y_phase is 4 for odd values of y */
  261|       |        /* and 12 for even values of y        */
  262|       |        /*Multiply by 8 =>  left shift by 3*/
  263|  25.2k|        i4_res_8x16b_r1_1 = _mm_mullo_epi16(i4_samp_8x16b_0, i4_coeff_8x16b_0);
  264|  25.2k|        i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_1, i4_coeff_8x16b_1);
  265|  25.2k|        i4_res_8x16b_r1_3 = _mm_slli_epi16(i4_samp_8x16b_2, 3);
  266|       |
  267|  25.2k|        i4_res_8x16b_r2_1 = _mm_slli_epi16(i4_samp_8x16b_1, 3);
  268|  25.2k|        i4_res_8x16b_r2_2 = _mm_mullo_epi16(i4_samp_8x16b_2, i4_coeff_8x16b_1);
  269|  25.2k|        i4_res_8x16b_r2_3 = _mm_mullo_epi16(i4_samp_8x16b_3, i4_coeff_8x16b_0);
  270|       |
  271|  25.2k|        i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_3);
  272|  25.2k|        i4_res_8x16b_r2_3 = _mm_subs_epi16(i4_res_8x16b_r2_3, i4_samp_8x16b_0);
  273|       |
  274|  25.2k|        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
  275|  25.2k|        i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_2);
  276|       |
  277|  25.2k|        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
  278|  25.2k|        i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_3);
  279|       |
  280|       |        /* Storing the results */
  281|  25.2k|        _mm_storeu_si128((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
  282|  25.2k|        _mm_storeu_si128((__m128i *) (pi2_tmp + i4_filt_stride), i4_res_8x16b_r2_1);
  283|  25.2k|        pi2_tmp += (i4_filt_stride << 1);
  284|  25.2k|        pu1_inp += i4_src_stride;
  285|       |
  286|  25.2k|        i4_samp_8x16b_0 = i4_samp_8x16b_1;
  287|  25.2k|        i4_samp_8x16b_1 = i4_samp_8x16b_2;
  288|  25.2k|        i4_samp_8x16b_2 = i4_samp_8x16b_3;
  289|  25.2k|        i4_samp_8x16b_3 = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *) (pu1_inp)));
  290|       |        /* y_phase is 4 for odd values of y */
  291|       |        /* and 12 for even values of y        */
  292|       |        /*Multiply by 8 =>  left shift by 3*/
  293|       |
  294|  25.2k|        i4_res_8x16b_r1_1 = _mm_mullo_epi16(i4_samp_8x16b_0, i4_coeff_8x16b_0);
  295|  25.2k|        i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_1, i4_coeff_8x16b_1);
  296|  25.2k|        i4_res_8x16b_r1_3 = _mm_slli_epi16(i4_samp_8x16b_2, 3);
  297|       |
  298|  25.2k|        i4_res_8x16b_r2_1 = _mm_slli_epi16(i4_samp_8x16b_1, 3);
  299|  25.2k|        i4_res_8x16b_r2_2 = _mm_mullo_epi16(i4_samp_8x16b_2, i4_coeff_8x16b_1);
  300|  25.2k|        i4_res_8x16b_r2_3 = _mm_mullo_epi16(i4_samp_8x16b_3, i4_coeff_8x16b_0);
  301|       |
  302|  25.2k|        i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_3);
  303|  25.2k|        i4_res_8x16b_r2_3 = _mm_subs_epi16(i4_res_8x16b_r2_3, i4_samp_8x16b_0);
  304|       |
  305|  25.2k|        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
  306|  25.2k|        i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_2);
  307|       |
  308|  25.2k|        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
  309|  25.2k|        i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_3);
  310|       |
  311|       |        /* Storing the results */
  312|  25.2k|        _mm_storeu_si128((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
  313|  25.2k|        _mm_storeu_si128((__m128i *) (pi2_tmp + i4_filt_stride), i4_res_8x16b_r2_1);
  314|  25.2k|        pi2_tmp += (i4_filt_stride << 1);
  315|  25.2k|        pu1_inp += i4_src_stride;
  316|       |
  317|  25.2k|        i4_samp_8x16b_0 = i4_samp_8x16b_1;
  318|  25.2k|        i4_samp_8x16b_1 = i4_samp_8x16b_2;
  319|  25.2k|        i4_samp_8x16b_2 = i4_samp_8x16b_3;
  320|  25.2k|        i4_samp_8x16b_3 = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *) (pu1_inp)));
  321|       |        /* y_phase is 4 for odd values of y */
  322|       |        /* and 12 for even values of y        */
  323|       |        /*Multiply by 8 =>  left shift by 3*/
  324|       |
  325|  25.2k|        i4_res_8x16b_r1_1 = _mm_mullo_epi16(i4_samp_8x16b_0, i4_coeff_8x16b_0);
  326|  25.2k|        i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_1, i4_coeff_8x16b_1);
  327|  25.2k|        i4_res_8x16b_r1_3 = _mm_slli_epi16(i4_samp_8x16b_2, 3);
  328|       |
  329|  25.2k|        i4_res_8x16b_r2_1 = _mm_slli_epi16(i4_samp_8x16b_1, 3);
  330|  25.2k|        i4_res_8x16b_r2_2 = _mm_mullo_epi16(i4_samp_8x16b_2, i4_coeff_8x16b_1);
  331|  25.2k|        i4_res_8x16b_r2_3 = _mm_mullo_epi16(i4_samp_8x16b_3, i4_coeff_8x16b_0);
  332|       |
  333|  25.2k|        i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_3);
  334|  25.2k|        i4_res_8x16b_r2_3 = _mm_subs_epi16(i4_res_8x16b_r2_3, i4_samp_8x16b_0);
  335|       |
  336|  25.2k|        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
  337|  25.2k|        i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_2);
  338|       |
  339|  25.2k|        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
  340|  25.2k|        i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_3);
  341|       |
  342|       |        /* Storing the results */
  343|  25.2k|        _mm_storeu_si128((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
  344|  25.2k|        _mm_storeu_si128((__m128i *) (pi2_tmp + i4_filt_stride), i4_res_8x16b_r2_1);
  345|  25.2k|        pi2_tmp += (i4_filt_stride << 1);
  346|  25.2k|        pu1_inp += i4_src_stride;
  347|       |
  348|       |        /* y = 15, y_phase = 4 */
  349|  25.2k|        i4_samp_8x16b_0 = i4_samp_8x16b_1;
  350|  25.2k|        i4_samp_8x16b_1 = i4_samp_8x16b_2;
  351|  25.2k|        i4_samp_8x16b_2 = i4_samp_8x16b_3;
  352|  25.2k|        i4_samp_8x16b_3 = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *) (pu1_inp)));
  353|       |
  354|  25.2k|        i4_res_8x16b_r1_1 = _mm_mullo_epi16(i4_samp_8x16b_0, i4_coeff_8x16b_0);
  355|  25.2k|        i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_1, i4_coeff_8x16b_1);
  356|  25.2k|        i4_res_8x16b_r1_3 = _mm_slli_epi16(i4_samp_8x16b_2, 3);
  357|  25.2k|        i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_3);
  358|       |
  359|  25.2k|        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
  360|  25.2k|        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
  361|       |
  362|       |        /* Store the output */
  363|  25.2k|        _mm_storeu_si128((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
  364|       |
  365|       |        /* Reinitializing the ptrs */
  366|  25.2k|        pu1_inp = pu1_inp_buf;
  367|  25.2k|        pi2_tmp = pi2_tmp_filt_buf;
  368|  25.2k|    } /* End of loop over x */
  369|       |
  370|       |    /*Remaining 32 bit */
  371|  25.2k|    pu1_inp += 8;
  372|  25.2k|    pi2_tmp += 8;
  373|  50.5k|    for(i4_x = 0; i4_x < 1; i4_x++)
  ------------------
  |  Branch (373:19): [True: 25.2k, False: 25.2k]
  ------------------
  374|  25.2k|    {
  375|       |        /* y = 0, y_phase = 12 */
  376|  25.2k|        i4_samp_16x8b_0 = _mm_loadl_epi64((__m128i *) (pu1_inp));
  377|  25.2k|        i4_samp_16x8b_1 = _mm_loadl_epi64((__m128i *) (pu1_inp + i4_src_stride));
  378|  25.2k|        i4_samp_16x8b_2 = _mm_loadl_epi64((__m128i *) (pu1_inp + (i4_src_stride << 1)));
  379|  25.2k|        i4_samp_16x8b_3 =
  380|  25.2k|            _mm_loadl_epi64((__m128i *) (pu1_inp + (i4_src_stride << 1) + i4_src_stride));
  381|  25.2k|        pu1_inp += (i4_src_stride << 2);
  382|  25.2k|        i4_samp_8x16b_0 = _mm_cvtepu8_epi16(i4_samp_16x8b_0);
  383|  25.2k|        i4_samp_8x16b_1 = _mm_cvtepu8_epi16(i4_samp_16x8b_1);
  384|  25.2k|        i4_samp_8x16b_2 = _mm_cvtepu8_epi16(i4_samp_16x8b_2);
  385|  25.2k|        i4_samp_8x16b_3 = _mm_cvtepu8_epi16(i4_samp_16x8b_3);
  386|       |
  387|       |        /* since y_phase 12 for y = 0 */
  388|       |        /*Multiply by 8 =>  left shift by 3*/
  389|  25.2k|        i4_res_8x16b_r1_1 = _mm_slli_epi16(i4_samp_8x16b_1, 3);
  390|  25.2k|        i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_2, i4_coeff_8x16b_1);
  391|  25.2k|        i4_res_8x16b_r1_3 = _mm_mullo_epi16(i4_samp_8x16b_3, i4_coeff_8x16b_0);
  392|       |
  393|  25.2k|        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
  394|  25.2k|        i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_0);
  395|  25.2k|        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
  396|       |
  397|  25.2k|        _mm_storel_epi64((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
  398|  25.2k|        pi2_tmp += i4_filt_stride;
  399|       |
  400|   202k|        for(i4_y = 1; i4_y < 15; i4_y += 2)
  ------------------
  |  Branch (400:23): [True: 176k, False: 25.2k]
  ------------------
  401|   176k|        {
  402|   176k|            i4_samp_8x16b_0 = i4_samp_8x16b_1;
  403|   176k|            i4_samp_8x16b_1 = i4_samp_8x16b_2;
  404|   176k|            i4_samp_8x16b_2 = i4_samp_8x16b_3;
  405|   176k|            i4_samp_8x16b_3 = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *) (pu1_inp)));
  406|       |            /* y_phase is 4 for odd values of y */
  407|       |            /* and 12 for even values of y        */
  408|       |            /*Multiply by 8 =>  left shift by 3*/
  409|       |
  410|   176k|            i4_res_8x16b_r1_1 = _mm_mullo_epi16(i4_samp_8x16b_0, i4_coeff_8x16b_0);
  411|   176k|            i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_1, i4_coeff_8x16b_1);
  412|   176k|            i4_res_8x16b_r1_3 = _mm_slli_epi16(i4_samp_8x16b_2, 3);
  413|       |
  414|   176k|            i4_res_8x16b_r2_1 = _mm_slli_epi16(i4_samp_8x16b_1, 3);
  415|   176k|            i4_res_8x16b_r2_2 = _mm_mullo_epi16(i4_samp_8x16b_2, i4_coeff_8x16b_1);
  416|   176k|            i4_res_8x16b_r2_3 = _mm_mullo_epi16(i4_samp_8x16b_3, i4_coeff_8x16b_0);
  417|       |
  418|   176k|            i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_3);
  419|   176k|            i4_res_8x16b_r2_3 = _mm_subs_epi16(i4_res_8x16b_r2_3, i4_samp_8x16b_0);
  420|       |
  421|   176k|            i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
  422|   176k|            i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_2);
  423|       |
  424|   176k|            i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
  425|   176k|            i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_3);
  426|       |
  427|       |            /* Storing the results */
  428|   176k|            _mm_storel_epi64((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
  429|   176k|            _mm_storel_epi64((__m128i *) (pi2_tmp + i4_filt_stride), i4_res_8x16b_r2_1);
  430|   176k|            pi2_tmp += (i4_filt_stride << 1);
  431|   176k|            pu1_inp += i4_src_stride;
  432|   176k|        } /* End of loop over y */
  433|       |
  434|       |        /* y = 15, y_phase = 4 */
  435|  25.2k|        i4_samp_8x16b_0 = i4_samp_8x16b_1;
  436|  25.2k|        i4_samp_8x16b_1 = i4_samp_8x16b_2;
  437|  25.2k|        i4_samp_8x16b_2 = i4_samp_8x16b_3;
  438|  25.2k|        i4_samp_8x16b_3 = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *) (pu1_inp)));
  439|       |
  440|  25.2k|        i4_res_8x16b_r1_1 = _mm_mullo_epi16(i4_samp_8x16b_0, i4_coeff_8x16b_0);
  441|  25.2k|        i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_1, i4_coeff_8x16b_1);
  442|  25.2k|        i4_res_8x16b_r1_3 = _mm_slli_epi16(i4_samp_8x16b_2, 3);
  443|  25.2k|        i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_3);
  444|       |
  445|  25.2k|        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
  446|  25.2k|        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
  447|       |
  448|       |        /* Store the output */
  449|  25.2k|        _mm_storel_epi64((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
  450|       |
  451|       |        /* Reinitializing the ptrs */
  452|  25.2k|        pu1_inp = pu1_inp_buf;
  453|  25.2k|        pi2_tmp = pi2_tmp_filt_buf;
  454|  25.2k|    }
  455|       |
  456|  25.2k|    {
  457|  25.2k|        __m128i coeff_c0_c1_8x16b = _mm_set_epi16(28, -3, 28, -3, 28, -3, 28, -3);
  458|  25.2k|        __m128i coeff_c2_c3_8x16b = _mm_set_epi16(-1, 8, -1, 8, -1, 8, -1, 8);
  459|  25.2k|        __m128i coeff_c3_c2_8x16b = _mm_set_epi16(8, -1, 8, -1, 8, -1, 8, -1);
  460|  25.2k|        __m128i coeff_c1_c0_8x16b = _mm_set_epi16(-3, 28, -3, 28, -3, 28, -3, 28);
  461|       |
  462|  25.2k|        __m128i i4_samp_8x16b_rpart1_0, i4_samp_8x16b_rpart2_0;
  463|  25.2k|        __m128i i4_samp_8x16b_rpart1_1, i4_samp_8x16b_rpart2_1;
  464|  25.2k|        __m128i i4_samp_8x16b_rpart1_2, i4_samp_8x16b_rpart2_2;
  465|  25.2k|        __m128i i4_samp_8x16b_rpart1_3, i4_samp_8x16b_rpart2_3;
  466|  25.2k|        __m128i i4_samp_8x16b_rpart1_4, i4_samp_8x16b_rpart2_4;
  467|       |
  468|  25.2k|        __m128i i4_res_4x32b_rpart1_0, i4_res_4x32b_rpart2_0;
  469|  25.2k|        __m128i i4_res_4x32b_rpart1_1, i4_res_4x32b_rpart2_1;
  470|  25.2k|        __m128i i4_res_4x32b_rpart1_2, i4_res_4x32b_rpart2_2;
  471|  25.2k|        __m128i i4_res_4x32b_rpart1_3, i4_res_4x32b_rpart2_3;
  472|       |
  473|  25.2k|        __m128i res_512 = _mm_set1_epi32(512);
  474|       |        /* Horizontal interpolation */
  475|   429k|        for(i4_y = 0; i4_y < 16; i4_y++)
  ------------------
  |  Branch (475:23): [True: 404k, False: 25.2k]
  ------------------
  476|   404k|        {
  477|       |            // a0 a1 a2 a3 a4 a5 a6 a7
  478|   404k|            i4_samp_8x16b_rpart1_0 = _mm_loadu_si128((__m128i *) pi2_tmp);
  479|       |            // a4 a5 a6 a7 a8 a9 a10 a11
  480|   404k|            i4_samp_8x16b_rpart2_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 4));
  481|       |            // a1 a2 a3 a4 a5 a6 a7 0
  482|   404k|            i4_samp_8x16b_rpart1_1 = _mm_srli_si128(i4_samp_8x16b_rpart1_0, 2);
  483|       |            // a2 a3 a4 a5 a6 a7 0 0
  484|   404k|            i4_samp_8x16b_rpart1_2 = _mm_srli_si128(i4_samp_8x16b_rpart1_0, 4);
  485|       |            // a3 a4 a5 a6 a7 0 0 0
  486|   404k|            i4_samp_8x16b_rpart1_3 = _mm_srli_si128(i4_samp_8x16b_rpart1_0, 6);
  487|       |            // a4 a5 a6 a7 0 0 0 0
  488|   404k|            i4_samp_8x16b_rpart1_4 = _mm_srli_si128(i4_samp_8x16b_rpart1_0, 8);
  489|       |
  490|       |            // a5 a6 a7 a8 a9 a10 a11 0
  491|   404k|            i4_samp_8x16b_rpart2_1 = _mm_srli_si128(i4_samp_8x16b_rpart2_0, 2);
  492|       |            // a6 a7 a8 a9 a10 a11 0 0
  493|   404k|            i4_samp_8x16b_rpart2_2 = _mm_srli_si128(i4_samp_8x16b_rpart2_0, 4);
  494|       |            // a7 a8 a9 a10 a11 0 0 0
  495|   404k|            i4_samp_8x16b_rpart2_3 = _mm_srli_si128(i4_samp_8x16b_rpart2_0, 6);
  496|       |            // a8 a9 a10 a11 0 0 0 0
  497|   404k|            i4_samp_8x16b_rpart2_4 = _mm_srli_si128(i4_samp_8x16b_rpart2_0, 8);
  498|       |            // a0 a1  a1 a2  a2 a3  a3 a4
  499|   404k|            i4_samp_8x16b_rpart1_0 =
  500|   404k|                _mm_unpacklo_epi16(i4_samp_8x16b_rpart1_0, i4_samp_8x16b_rpart1_1);
  501|       |            // a1 a2  a2 a3  a3 a4  a4 a5
  502|   404k|            i4_samp_8x16b_rpart1_1 =
  503|   404k|                _mm_unpacklo_epi16(i4_samp_8x16b_rpart1_1, i4_samp_8x16b_rpart1_2);
  504|       |            // a2 a3  a3 a4  a4 a5  a5 a6
  505|   404k|            i4_samp_8x16b_rpart1_2 =
  506|   404k|                _mm_unpacklo_epi16(i4_samp_8x16b_rpart1_2, i4_samp_8x16b_rpart1_3);
  507|       |            // a3 a4  a4 a5  a5 a6  a6 a7
  508|   404k|            i4_samp_8x16b_rpart1_3 =
  509|   404k|                _mm_unpacklo_epi16(i4_samp_8x16b_rpart1_3, i4_samp_8x16b_rpart1_4);
  510|       |            // a4 a5  a5 a6  a6 a7  a7 a8
  511|   404k|            i4_samp_8x16b_rpart2_0 =
  512|   404k|                _mm_unpacklo_epi16(i4_samp_8x16b_rpart2_0, i4_samp_8x16b_rpart2_1);
  513|       |            // a5 a6  a6 a7  a7 a8  a8 a9
  514|   404k|            i4_samp_8x16b_rpart2_1 =
  515|   404k|                _mm_unpacklo_epi16(i4_samp_8x16b_rpart2_1, i4_samp_8x16b_rpart2_2);
  516|       |            // a6 a7  a7 a8  a8 a9  a9 a10
  517|   404k|            i4_samp_8x16b_rpart2_2 =
  518|   404k|                _mm_unpacklo_epi16(i4_samp_8x16b_rpart2_2, i4_samp_8x16b_rpart2_3);
  519|       |            // a7 a8  a8 a9  a9 a10 a10 a11
  520|   404k|            i4_samp_8x16b_rpart2_3 =
  521|   404k|                _mm_unpacklo_epi16(i4_samp_8x16b_rpart2_3, i4_samp_8x16b_rpart2_4);
  522|       |            // a0c3+a1c2  a1c3+a2c2  a2c3+a3c2  a3c3+a4c2
  523|   404k|            i4_res_4x32b_rpart1_0 = _mm_madd_epi16(i4_samp_8x16b_rpart1_0, coeff_c3_c2_8x16b);
  524|       |            // a2c1+a3c0  a3c1+a4c0  a4c1+a5c0  a5c1+a6c0
  525|   404k|            i4_res_4x32b_rpart1_2 = _mm_madd_epi16(i4_samp_8x16b_rpart1_2, coeff_c1_c0_8x16b);
  526|       |            // a1c0+a2c1  a2c0+a3c1  a3c0+a4c1  a4c0+a5c1
  527|   404k|            i4_res_4x32b_rpart1_1 = _mm_madd_epi16(i4_samp_8x16b_rpart1_1, coeff_c0_c1_8x16b);
  528|       |            // a3c2+a4c3  a5c2+a5c3  a5c2+a6c3  a6c2+a7c3
  529|   404k|            i4_res_4x32b_rpart1_3 = _mm_madd_epi16(i4_samp_8x16b_rpart1_3, coeff_c2_c3_8x16b);
  530|       |            // a4c3+a5c2  a5a3+a6c2  a6c3+a7c2  a7c3+a8c2
  531|   404k|            i4_res_4x32b_rpart2_0 = _mm_madd_epi16(i4_samp_8x16b_rpart2_0, coeff_c3_c2_8x16b);
  532|       |            // a6c1+a7c0  a7c1+a8c0  a8c1+a9c0  a9c1+a10c0
  533|   404k|            i4_res_4x32b_rpart2_2 = _mm_madd_epi16(i4_samp_8x16b_rpart2_2, coeff_c1_c0_8x16b);
  534|       |            // a5c0+a6c1  a6c0+a7c1  a7c0+a8c1  a8c0+a9c1
  535|   404k|            i4_res_4x32b_rpart2_1 = _mm_madd_epi16(i4_samp_8x16b_rpart2_1, coeff_c0_c1_8x16b);
  536|       |            // a7c2+a8c3  a8c2+a9c3  a9c2+a10c3  a10c2+a11c3
  537|   404k|            i4_res_4x32b_rpart2_3 = _mm_madd_epi16(i4_samp_8x16b_rpart2_3, coeff_c2_c3_8x16b);
  538|       |            // a0c3+a1c2 + a2c1+a3c0  a1c3+a2c2 + a3c1+a4c0 a2c3+a3c2 + a4c1+a5c0
  539|       |            // a3c3+a4c2 +a5c1+a6c0
  540|   404k|            i4_res_4x32b_rpart1_0 = _mm_add_epi32(i4_res_4x32b_rpart1_0, i4_res_4x32b_rpart1_2);
  541|       |            // a1c0+a2c1 + a3c2+a4c3  a2c0+a3c1 + a5c2+a5c3 a3c0+a4c1 + a5c2+a6c3
  542|       |            // a4c0+a5c1 + a6c2+a7c3
  543|   404k|            i4_res_4x32b_rpart1_1 = _mm_add_epi32(i4_res_4x32b_rpart1_1, i4_res_4x32b_rpart1_3);
  544|       |            // a4c3+a5c2 + a6c1+a7c0  a5a3+a6c2 + a7c1+a8c0 a6c3+a7c2 + a8c1+a9c0
  545|       |            // a7c3+a8c2+ a9c1+a10c0
  546|   404k|            i4_res_4x32b_rpart2_0 = _mm_add_epi32(i4_res_4x32b_rpart2_0, i4_res_4x32b_rpart2_2);
  547|       |            // a5c0+a6c1 + a7c2+a8c3  a6c0+a7c1 + a8c2+a9c3 a7c0+a8c1 + a9c2+a10c3
  548|       |            // a8c0+a9c1 + a10c2+a11c3
  549|   404k|            i4_res_4x32b_rpart2_1 = _mm_add_epi32(i4_res_4x32b_rpart2_1, i4_res_4x32b_rpart2_3);
  550|       |
  551|   404k|            i4_res_4x32b_rpart1_2 =
  552|   404k|                _mm_unpacklo_epi32(i4_res_4x32b_rpart1_0, i4_res_4x32b_rpart1_1);
  553|   404k|            i4_res_4x32b_rpart1_3 =
  554|   404k|                _mm_unpackhi_epi32(i4_res_4x32b_rpart1_0, i4_res_4x32b_rpart1_1);
  555|       |
  556|   404k|            i4_res_4x32b_rpart2_2 =
  557|   404k|                _mm_unpacklo_epi32(i4_res_4x32b_rpart2_0, i4_res_4x32b_rpart2_1);
  558|   404k|            i4_res_4x32b_rpart2_3 =
  559|   404k|                _mm_unpackhi_epi32(i4_res_4x32b_rpart2_0, i4_res_4x32b_rpart2_1);
  560|       |
  561|   404k|            i4_res_4x32b_rpart1_0 = _mm_add_epi32(i4_res_4x32b_rpart1_2, res_512);
  562|   404k|            i4_res_4x32b_rpart1_1 = _mm_add_epi32(i4_res_4x32b_rpart1_3, res_512);
  563|       |
  564|   404k|            i4_res_4x32b_rpart1_0 = _mm_srai_epi32(i4_res_4x32b_rpart1_0, 10);
  565|   404k|            i4_res_4x32b_rpart1_1 = _mm_srai_epi32(i4_res_4x32b_rpart1_1, 10);
  566|       |
  567|   404k|            i4_res_4x32b_rpart2_0 = _mm_add_epi32(i4_res_4x32b_rpart2_2, res_512);
  568|   404k|            i4_res_4x32b_rpart2_1 = _mm_add_epi32(i4_res_4x32b_rpart2_3, res_512);
  569|       |
  570|   404k|            i4_res_4x32b_rpart2_0 = _mm_srai_epi32(i4_res_4x32b_rpart2_0, 10);
  571|   404k|            i4_res_4x32b_rpart2_1 = _mm_srai_epi32(i4_res_4x32b_rpart2_1, 10);
  572|       |
  573|   404k|            _mm_storeu_si128(
  574|   404k|                (__m128i *) pu1_out,
  575|   404k|                _mm_packus_epi16(_mm_packus_epi32(i4_res_4x32b_rpart1_0, i4_res_4x32b_rpart1_1),
  576|   404k|                                 _mm_packus_epi32(i4_res_4x32b_rpart2_0, i4_res_4x32b_rpart2_1)));
  577|       |
  578|   404k|            pi2_tmp += i4_filt_stride;
  579|   404k|            pu1_out += i4_out_stride;
  580|   404k|        } /* End of loop over y */
  581|  25.2k|    }
  582|  25.2k|} /* isvcd_interpolate_base_luma_dyadic */
isvcd_interpolate_intra_base_sse42:
  616|  41.6k|{
  617|       |    /* --------------------------------------------------------------------- */
  618|       |    /* Index Parameters                                                         */
  619|       |    /* --------------------------------------------------------------------- */
  620|  41.6k|    intra_sampling_ctxt_t *ps_ctxt;
  621|  41.6k|    intra_samp_map_ctxt_t *ps_map_ctxt;
  622|  41.6k|    intra_samp_lyr_ctxt *ps_lyr_ctxt;
  623|  41.6k|    WORD32 i4_x, i4_y;
  624|  41.6k|    WORD32 i4_frm_mb_x, i4_frm_mb_y;
  625|  41.6k|    UWORD8 *pu1_refarray = NULL;
  626|  41.6k|    ref_pixel_map_t *ps_x_pos_phase;
  627|  41.6k|    ref_pixel_map_t *ps_y_pos_phase;
  628|  41.6k|    WORD32 i4_temp_array_ht;
  629|  41.6k|    WORD32 *pi4_interp_buff;
  630|  41.6k|    WORD32 i4_mb_wd;
  631|  41.6k|    WORD32 i4_mb_ht;
  632|       |
  633|  41.6k|    WORD32 i4_x_min;
  634|  41.6k|    ref_min_max_map_t *ps_x_min_max;
  635|  41.6k|    WORD8 arr_y_ref_pos_luma[16] = {0};
  636|  41.6k|    WORD8 arr_x_ref_pos_luma[16] = {0};
  637|  41.6k|    WORD8 arr_x_ref_pos_luma_low[16] = {0};
  638|  41.6k|    WORD8 arr_x_ref_pos_luma_high[16] = {0};
  639|  41.6k|    WORD8 arr_phase_luma[32] = {0};
  640|  41.6k|    WORD8 *pi4_y_ref_pos_luma;
  641|  41.6k|    WORD8 *pi4_x_ref_pos_luma_low;
  642|  41.6k|    WORD8 *pi4_x_ref_pos_luma_high;
  643|  41.6k|    WORD8 *pi4_phase_luma;
  644|  41.6k|    UWORD8 *pu1_refarray_temp;
  645|       |
  646|       |    /* --------------------------------------------------------------------- */
  647|       |    /* Extracting pointers from the  context                                  */
  648|       |    /* --------------------------------------------------------------------- */
  649|  41.6k|    ps_ctxt = (intra_sampling_ctxt_t *) pv_intra_samp_ctxt;
  650|  41.6k|    ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id];
  651|       |
  652|  41.6k|    if(0 == i4_refarray_flag)
  ------------------
  |  Branch (652:8): [True: 27.7k, False: 13.8k]
  ------------------
  653|  27.7k|    {
  654|  27.7k|        pu1_refarray = ps_ctxt->pu1_refarray_buffer;
  655|  27.7k|    }
  656|  13.8k|    else if(1 == i4_refarray_flag)
  ------------------
  |  Branch (656:13): [True: 13.8k, False: 0]
  ------------------
  657|  13.8k|    {
  658|  13.8k|        pu1_refarray = ps_ctxt->pu1_refarray_cb;
  659|  13.8k|    }
  660|       |
  661|       |    /* --------------------------------------------------------------------- */
  662|       |    /* LUMA    or CHROMA */
  663|       |    /* --------------------------------------------------------------------- */
  664|       |
  665|  41.6k|    if(1 == i4_chroma_flag)
  ------------------
  |  Branch (665:8): [True: 27.7k, False: 13.8k]
  ------------------
  666|  27.7k|        ps_map_ctxt = &(ps_lyr_ctxt->s_chroma_map_ctxt);
  667|  13.8k|    else
  668|  13.8k|        ps_map_ctxt = &(ps_lyr_ctxt->s_luma_map_ctxt);
  669|       |
  670|  41.6k|    i4_mb_wd = MB_WIDTH >> i4_chroma_flag;
  ------------------
  |  |   67|  41.6k|#define MB_WIDTH 16
  ------------------
  671|  41.6k|    i4_mb_ht = MB_HEIGHT >> i4_chroma_flag;
  ------------------
  |  |   68|  41.6k|#define MB_HEIGHT 16
  ------------------
  672|       |
  673|  41.6k|    ps_x_min_max = ps_map_ctxt->ps_x_min_max;
  674|       |
  675|  41.6k|    i4_frm_mb_y = i4_mb_y * i4_mb_ht;
  676|  41.6k|    i4_frm_mb_x = i4_mb_x * i4_mb_wd;
  677|       |    /* get the min position */
  678|  41.6k|    i4_x_min = ps_x_min_max[i4_mb_x].i2_min_pos;
  679|       |
  680|       |    /* --------------------------------------------------------------------- */
  681|       |    /* Projected frame level pointers                                        */
  682|       |    /* --------------------------------------------------------------------- */
  683|  41.6k|    ps_x_pos_phase = ps_map_ctxt->ps_x_pos_phase;
  684|  41.6k|    ps_y_pos_phase = ps_map_ctxt->ps_y_pos_phase;
  685|       |
  686|       |    /* --------------------------------------------------------------------- */
  687|       |    /* Pointers and Dimenstion of the temporary buffer                         */
  688|       |    /* --------------------------------------------------------------------- */
  689|  41.6k|    i4_temp_array_ht = i4_mb_ht;
  690|  41.6k|    pi4_interp_buff = ps_ctxt->pi4_temp_interpolation_buffer;
  691|       |
  692|  41.6k|    if(i4_chroma_flag == 0)
  ------------------
  |  Branch (692:8): [True: 13.8k, False: 27.7k]
  ------------------
  693|  13.8k|    {
  694|       |        /* --------------------------------------------------------------------- */
  695|       |        /* Loop for interpolation in vertical direction */
  696|       |        /* --------------------------------------------------------------------- */
  697|  13.8k|        WORD16 *pi2_interp_buff_temp;
  698|  13.8k|        pi2_interp_buff_temp = (WORD16 *) pi4_interp_buff;
  699|  13.8k|        {
  700|  13.8k|            __m128i out_res_8x16b_0, out_res_8x16b_1;
  701|       |
  702|  13.8k|            __m128i inp_8x16b_r0, inp_8x16b_r01_0, phs_mask_16x8b_r0, phs_mask_16x8b_r01_0,
  703|  13.8k|                out_res_8x16b_r01_0;
  704|  13.8k|            __m128i inp_8x16b_r1, inp_8x16b_r23_0, phs_mask_16x8b_r1, phs_mask_16x8b_r23_0,
  705|  13.8k|                out_res_8x16b_r01_1;
  706|  13.8k|            __m128i inp_8x16b_r2, inp_8x16b_r01_1, phs_mask_16x8b_r2, phs_mask_16x8b_r01_1,
  707|  13.8k|                out_res_8x16b_r23_0;
  708|  13.8k|            __m128i inp_8x16b_r3, inp_8x16b_r23_1, phs_mask_16x8b_r3, phs_mask_16x8b_r23_1,
  709|  13.8k|                out_res_8x16b_r23_1;
  710|       |
  711|   236k|            for(i4_y = 0; i4_y < (i4_temp_array_ht); i4_y++)
  ------------------
  |  Branch (711:27): [True: 222k, False: 13.8k]
  ------------------
  712|   222k|            {
  713|   222k|                arr_phase_luma[i4_y] = (WORD8) ps_y_pos_phase[i4_y + i4_frm_mb_y].i2_phase;
  714|   222k|                arr_y_ref_pos_luma[i4_y] = (WORD8) (ps_y_pos_phase[i4_y + i4_frm_mb_y].i2_ref_pos);
  715|   222k|            }
  716|  13.8k|            pi4_y_ref_pos_luma = arr_y_ref_pos_luma;
  717|  13.8k|            pi4_phase_luma = arr_phase_luma;
  718|       |
  719|   236k|            for(i4_y = 0; i4_y < (i4_temp_array_ht); i4_y++)
  ------------------
  |  Branch (719:27): [True: 222k, False: 13.8k]
  ------------------
  720|   222k|            {
  721|   222k|                pu1_refarray_temp =
  722|   222k|                    pu1_refarray + (pi4_y_ref_pos_luma[i4_y] * i4_refarray_wd) + (i4_x_min - 1);
  723|   222k|                inp_8x16b_r0 = _mm_loadu_si128((__m128i *) (pu1_refarray_temp - i4_refarray_wd));
  724|   222k|                inp_8x16b_r1 = _mm_loadu_si128((__m128i *) (pu1_refarray_temp));
  725|   222k|                inp_8x16b_r2 = _mm_loadu_si128((__m128i *) (pu1_refarray_temp + i4_refarray_wd));
  726|   222k|                inp_8x16b_r3 =
  727|   222k|                    _mm_loadu_si128((__m128i *) (pu1_refarray_temp + 2 * i4_refarray_wd));
  728|       |
  729|   222k|                inp_8x16b_r01_0 = _mm_unpacklo_epi8(inp_8x16b_r0, inp_8x16b_r1);
  730|   222k|                inp_8x16b_r23_0 = _mm_unpacklo_epi8(inp_8x16b_r2, inp_8x16b_r3);
  731|   222k|                inp_8x16b_r01_1 = _mm_unpackhi_epi8(inp_8x16b_r0, inp_8x16b_r1);
  732|   222k|                inp_8x16b_r23_1 = _mm_unpackhi_epi8(inp_8x16b_r2, inp_8x16b_r3);
  733|       |
  734|   222k|                phs_mask_16x8b_r0 = _mm_set1_epi8(g_ai1_interp_filter_luma[pi4_phase_luma[i4_y]]);
  735|   222k|                phs_mask_16x8b_r1 =
  736|   222k|                    _mm_set1_epi8(g_ai1_interp_filter_luma[pi4_phase_luma[i4_y] + 16]);
  737|   222k|                phs_mask_16x8b_r2 =
  738|   222k|                    _mm_set1_epi8(g_ai1_interp_filter_luma[pi4_phase_luma[i4_y] + 32]);
  739|   222k|                phs_mask_16x8b_r3 =
  740|   222k|                    _mm_set1_epi8(g_ai1_interp_filter_luma[pi4_phase_luma[i4_y] + 48]);
  741|       |
  742|   222k|                phs_mask_16x8b_r01_0 = _mm_unpacklo_epi8(phs_mask_16x8b_r0, phs_mask_16x8b_r1);
  743|   222k|                phs_mask_16x8b_r23_0 = _mm_unpacklo_epi8(phs_mask_16x8b_r2, phs_mask_16x8b_r3);
  744|   222k|                phs_mask_16x8b_r01_1 = _mm_unpackhi_epi8(phs_mask_16x8b_r0, phs_mask_16x8b_r1);
  745|   222k|                phs_mask_16x8b_r23_1 = _mm_unpackhi_epi8(phs_mask_16x8b_r2, phs_mask_16x8b_r3);
  746|       |
  747|   222k|                out_res_8x16b_r01_0 = _mm_maddubs_epi16(inp_8x16b_r01_0, phs_mask_16x8b_r01_0);
  748|   222k|                out_res_8x16b_r01_1 = _mm_maddubs_epi16(inp_8x16b_r01_1, phs_mask_16x8b_r01_1);
  749|   222k|                out_res_8x16b_r23_0 = _mm_maddubs_epi16(inp_8x16b_r23_0, phs_mask_16x8b_r23_0);
  750|   222k|                out_res_8x16b_r23_1 = _mm_maddubs_epi16(inp_8x16b_r23_1, phs_mask_16x8b_r23_1);
  751|       |
  752|   222k|                out_res_8x16b_0 = _mm_add_epi16(out_res_8x16b_r01_0, out_res_8x16b_r23_0);
  753|   222k|                out_res_8x16b_1 = _mm_add_epi16(out_res_8x16b_r01_1, out_res_8x16b_r23_1);
  754|       |
  755|   222k|                _mm_storeu_si128(
  756|   222k|                    (__m128i *) (pi2_interp_buff_temp + (i4_y * i4_refarray_wd) + (i4_x_min - 1)),
  757|   222k|                    out_res_8x16b_0);
  758|   222k|                _mm_storeu_si128((__m128i *) (pi2_interp_buff_temp + (i4_y * i4_refarray_wd) +
  759|   222k|                                              (i4_x_min - 1) + 8),
  760|   222k|                                 out_res_8x16b_1);
  761|   222k|            }
  762|  13.8k|        }
  763|       |        /* --------------------------------------------------------------------- */
  764|       |        /* Loop for interpolation in horizontal direction                         */
  765|       |        /* --------------------------------------------------------------------- */
  766|  13.8k|        {
  767|  13.8k|            WORD32 strt_indx = 10, strt_indx_h = 0;
  768|       |
  769|  13.8k|            __m128i inp_8x16b_0;
  770|  13.8k|            __m128i inp_8x16b_1;
  771|       |
  772|  13.8k|            __m128i phs_mask_16x8b_0;
  773|  13.8k|            __m128i phs_mask_16x8b_1;
  774|  13.8k|            __m128i x_ref_pos_luma_mask_r0_0, x_ref_pos_luma_mask_r0_1, x_ref_pos_luma_mask_r1_0,
  775|  13.8k|                x_ref_pos_luma_mask_r1_1, x_ref_pos_luma_mask_r2_0, x_ref_pos_luma_mask_r2_1,
  776|  13.8k|                x_ref_pos_luma_mask_r3_0, x_ref_pos_luma_mask_r3_1;
  777|       |
  778|  13.8k|            __m128i inp_8x16b_2, inp_8x16b_3;
  779|       |
  780|  13.8k|            WORD32 i4_x2 = 0;
  781|  13.8k|            WORD32 i4_mb_wd_hlf = (i4_mb_wd >> 1);
  782|  13.8k|            __m128i twos = _mm_set1_epi8(2);
  783|       |
  784|  13.8k|            strt_indx = ps_x_pos_phase[0 + i4_frm_mb_x].i2_ref_pos - 1;
  785|  13.8k|            strt_indx_h = (ps_x_pos_phase[8 + i4_frm_mb_x].i2_ref_pos - strt_indx - 1);
  786|   236k|            for(i4_x = 0; i4_x < i4_mb_wd; i4_x++)
  ------------------
  |  Branch (786:27): [True: 222k, False: 13.8k]
  ------------------
  787|   222k|            {
  788|   222k|                arr_x_ref_pos_luma[i4_x] = (WORD8) ps_x_pos_phase[i4_x + i4_frm_mb_x].i2_ref_pos;
  789|   222k|                arr_phase_luma[i4_x] = (WORD8) ps_x_pos_phase[i4_x + i4_frm_mb_x].i2_phase;
  790|   222k|                arr_x_ref_pos_luma[i4_x] = arr_x_ref_pos_luma[i4_x] - strt_indx - 1;
  791|   222k|            }
  792|       |
  793|   125k|            for(i4_x = 0; i4_x < i4_mb_wd_hlf; i4_x++)
  ------------------
  |  Branch (793:27): [True: 111k, False: 13.8k]
  ------------------
  794|   111k|            {
  795|   111k|                i4_x2 = i4_x << 1;
  796|   111k|                arr_x_ref_pos_luma_low[i4_x2] = (arr_x_ref_pos_luma[i4_x]) << 1;
  797|   111k|                arr_x_ref_pos_luma_low[i4_x2 + 1] = arr_x_ref_pos_luma_low[i4_x2] + 1;
  798|   111k|            }
  799|   125k|            for(i4_x = i4_mb_wd_hlf; i4_x < i4_mb_wd; i4_x++)
  ------------------
  |  Branch (799:38): [True: 111k, False: 13.8k]
  ------------------
  800|   111k|            {
  801|   111k|                i4_x2 = (i4_x - i4_mb_wd_hlf) << 1;
  802|   111k|                arr_x_ref_pos_luma_high[i4_x2] = ((arr_x_ref_pos_luma[i4_x] - strt_indx_h) << 1);
  803|   111k|                arr_x_ref_pos_luma_high[i4_x2 + 1] = arr_x_ref_pos_luma_high[i4_x2] + 1;
  804|   111k|            }
  805|  13.8k|            pi4_x_ref_pos_luma_low = arr_x_ref_pos_luma_low;
  806|  13.8k|            pi4_x_ref_pos_luma_high = arr_x_ref_pos_luma_high;
  807|  13.8k|            pi4_phase_luma = arr_phase_luma;
  808|       |
  809|  13.8k|            phs_mask_16x8b_0 = _mm_loadu_si128((__m128i *) (pi4_phase_luma));
  810|  13.8k|            phs_mask_16x8b_1 = _mm_loadu_si128((__m128i *) (pi4_phase_luma + 8));
  811|       |
  812|  13.8k|            x_ref_pos_luma_mask_r0_0 = _mm_loadu_si128((__m128i *) (pi4_x_ref_pos_luma_low));
  813|  13.8k|            x_ref_pos_luma_mask_r0_1 = _mm_loadu_si128((__m128i *) (pi4_x_ref_pos_luma_high));
  814|  13.8k|            x_ref_pos_luma_mask_r1_0 = _mm_add_epi8(x_ref_pos_luma_mask_r0_0, twos);
  815|  13.8k|            x_ref_pos_luma_mask_r1_1 = _mm_add_epi8(x_ref_pos_luma_mask_r0_1, twos);
  816|  13.8k|            x_ref_pos_luma_mask_r2_0 = _mm_add_epi8(x_ref_pos_luma_mask_r1_0, twos);
  817|  13.8k|            x_ref_pos_luma_mask_r2_1 = _mm_add_epi8(x_ref_pos_luma_mask_r1_1, twos);
  818|  13.8k|            x_ref_pos_luma_mask_r3_0 = x_ref_pos_luma_mask_r0_0;
  819|  13.8k|            x_ref_pos_luma_mask_r3_1 = x_ref_pos_luma_mask_r0_1;
  820|       |
  821|  13.8k|            {
  822|  13.8k|                __m128i ip_filt_16x8b_r0, ip_filt_8x16b_r0_0, ip_filt_8x16b_r0_1,
  823|  13.8k|                    ip_filt_8x16b_r01_l_0, ip_filt_8x16b_r01_h_0;
  824|  13.8k|                __m128i ip_filt_16x8b_r1, ip_filt_8x16b_r1_0, ip_filt_8x16b_r1_1,
  825|  13.8k|                    ip_filt_8x16b_r23_l_0, ip_filt_8x16b_r23_h_0;
  826|  13.8k|                __m128i ip_filt_16x8b_r2, ip_filt_8x16b_r2_0, ip_filt_8x16b_r2_1,
  827|  13.8k|                    ip_filt_8x16b_r01_l_1, ip_filt_8x16b_r01_h_1;
  828|  13.8k|                __m128i ip_filt_16x8b_r3, ip_filt_8x16b_r3_0, ip_filt_8x16b_r3_1,
  829|  13.8k|                    ip_filt_8x16b_r23_l_1, ip_filt_8x16b_r23_h_1;
  830|       |
  831|  13.8k|                __m128i inp_8x16b_r0_0, inp_8x16b_r2_0, inp_8x16b_r01_l_0, inp_8x16b_r01_h_0,
  832|  13.8k|                    out_res_4x32b_r01_l_0, out_res_4x32b_r01_h_0;
  833|  13.8k|                __m128i inp_8x16b_r0_1, inp_8x16b_r2_1, inp_8x16b_r23_l_0, inp_8x16b_r23_h_0,
  834|  13.8k|                    out_res_4x32b_r01_l_1, out_res_4x32b_r01_h_1;
  835|  13.8k|                __m128i inp_8x16b_r1_0, inp_8x16b_r3_0, inp_8x16b_r01_l_1, inp_8x16b_r01_h_1,
  836|  13.8k|                    out_res_4x32b_r23_l_0, out_res_4x32b_r23_h_0;
  837|  13.8k|                __m128i inp_8x16b_r1_1, inp_8x16b_r3_1, inp_8x16b_r23_l_1, inp_8x16b_r23_h_1,
  838|  13.8k|                    out_res_4x32b_r23_l_1, out_res_4x32b_r23_h_1;
  839|       |
  840|  13.8k|                __m128i out_res_4x32b_l_0;
  841|  13.8k|                __m128i out_res_4x32b_l_1;
  842|  13.8k|                __m128i out_res_4x32b_h_0;
  843|  13.8k|                __m128i out_res_4x32b_h_1;
  844|       |
  845|  13.8k|                __m128i out_res_8x16b_l;
  846|  13.8k|                __m128i out_res_8x16b_h;
  847|       |
  848|  13.8k|                __m128i out_res_16x8b;
  849|  13.8k|                __m128i const_512 = _mm_set1_epi32(512);
  850|       |
  851|  13.8k|                ip_filt_16x8b_r0 = _mm_loadu_si128((__m128i *) (g_ai1_interp_filter_luma));
  852|  13.8k|                ip_filt_16x8b_r1 = _mm_loadu_si128((__m128i *) (g_ai1_interp_filter_luma + 16));
  853|  13.8k|                ip_filt_16x8b_r2 = _mm_loadu_si128((__m128i *) (g_ai1_interp_filter_luma + 32));
  854|  13.8k|                ip_filt_16x8b_r3 = _mm_loadu_si128((__m128i *) (g_ai1_interp_filter_luma + 48));
  855|       |
  856|  13.8k|                ip_filt_8x16b_r0_0 =
  857|  13.8k|                    _mm_cvtepi8_epi16(_mm_shuffle_epi8(ip_filt_16x8b_r0, phs_mask_16x8b_0));
  858|  13.8k|                ip_filt_8x16b_r1_0 =
  859|  13.8k|                    _mm_cvtepi8_epi16(_mm_shuffle_epi8(ip_filt_16x8b_r1, phs_mask_16x8b_0));
  860|  13.8k|                ip_filt_8x16b_r2_0 =
  861|  13.8k|                    _mm_cvtepi8_epi16(_mm_shuffle_epi8(ip_filt_16x8b_r2, phs_mask_16x8b_0));
  862|  13.8k|                ip_filt_8x16b_r3_0 =
  863|  13.8k|                    _mm_cvtepi8_epi16(_mm_shuffle_epi8(ip_filt_16x8b_r3, phs_mask_16x8b_0));
  864|       |
  865|  13.8k|                ip_filt_8x16b_r0_1 =
  866|  13.8k|                    _mm_cvtepi8_epi16(_mm_shuffle_epi8(ip_filt_16x8b_r0, phs_mask_16x8b_1));
  867|  13.8k|                ip_filt_8x16b_r1_1 =
  868|  13.8k|                    _mm_cvtepi8_epi16(_mm_shuffle_epi8(ip_filt_16x8b_r1, phs_mask_16x8b_1));
  869|  13.8k|                ip_filt_8x16b_r2_1 =
  870|  13.8k|                    _mm_cvtepi8_epi16(_mm_shuffle_epi8(ip_filt_16x8b_r2, phs_mask_16x8b_1));
  871|  13.8k|                ip_filt_8x16b_r3_1 =
  872|  13.8k|                    _mm_cvtepi8_epi16(_mm_shuffle_epi8(ip_filt_16x8b_r3, phs_mask_16x8b_1));
  873|       |
  874|  13.8k|                ip_filt_8x16b_r01_l_0 = _mm_unpacklo_epi16(ip_filt_8x16b_r0_0, ip_filt_8x16b_r1_0);
  875|  13.8k|                ip_filt_8x16b_r23_l_0 = _mm_unpacklo_epi16(ip_filt_8x16b_r2_0, ip_filt_8x16b_r3_0);
  876|  13.8k|                ip_filt_8x16b_r01_l_1 = _mm_unpackhi_epi16(ip_filt_8x16b_r0_0, ip_filt_8x16b_r1_0);
  877|  13.8k|                ip_filt_8x16b_r23_l_1 = _mm_unpackhi_epi16(ip_filt_8x16b_r2_0, ip_filt_8x16b_r3_0);
  878|       |
  879|  13.8k|                ip_filt_8x16b_r01_h_0 = _mm_unpacklo_epi16(ip_filt_8x16b_r0_1, ip_filt_8x16b_r1_1);
  880|  13.8k|                ip_filt_8x16b_r23_h_0 = _mm_unpacklo_epi16(ip_filt_8x16b_r2_1, ip_filt_8x16b_r3_1);
  881|  13.8k|                ip_filt_8x16b_r01_h_1 = _mm_unpackhi_epi16(ip_filt_8x16b_r0_1, ip_filt_8x16b_r1_1);
  882|  13.8k|                ip_filt_8x16b_r23_h_1 = _mm_unpackhi_epi16(ip_filt_8x16b_r2_1, ip_filt_8x16b_r3_1);
  883|       |
  884|   236k|                for(i4_y = 0; i4_y < i4_temp_array_ht; i4_y++)
  ------------------
  |  Branch (884:31): [True: 222k, False: 13.8k]
  ------------------
  885|   222k|                {
  886|   222k|                    inp_8x16b_0 = _mm_loadu_si128((__m128i *) (pi2_interp_buff_temp + strt_indx));
  887|   222k|                    inp_8x16b_1 = _mm_loadu_si128(
  888|   222k|                        (__m128i *) (pi2_interp_buff_temp + strt_indx + strt_indx_h));
  889|   222k|                    inp_8x16b_2 =
  890|   222k|                        _mm_loadu_si128((__m128i *) (pi2_interp_buff_temp + strt_indx + 3));
  891|   222k|                    inp_8x16b_3 = _mm_loadu_si128(
  892|   222k|                        (__m128i *) (pi2_interp_buff_temp + strt_indx + strt_indx_h + 3));
  893|   222k|                    pi2_interp_buff_temp += i4_refarray_wd;
  894|       |
  895|   222k|                    inp_8x16b_r0_0 = _mm_shuffle_epi8(inp_8x16b_0, x_ref_pos_luma_mask_r0_0);
  896|   222k|                    inp_8x16b_r0_1 = _mm_shuffle_epi8(inp_8x16b_1, x_ref_pos_luma_mask_r0_1);
  897|   222k|                    inp_8x16b_r1_0 = _mm_shuffle_epi8(inp_8x16b_0, x_ref_pos_luma_mask_r1_0);
  898|   222k|                    inp_8x16b_r1_1 = _mm_shuffle_epi8(inp_8x16b_1, x_ref_pos_luma_mask_r1_1);
  899|       |
  900|   222k|                    inp_8x16b_r2_0 = _mm_shuffle_epi8(inp_8x16b_0, x_ref_pos_luma_mask_r2_0);
  901|   222k|                    inp_8x16b_r2_1 = _mm_shuffle_epi8(inp_8x16b_1, x_ref_pos_luma_mask_r2_1);
  902|   222k|                    inp_8x16b_r3_0 = _mm_shuffle_epi8(inp_8x16b_2, x_ref_pos_luma_mask_r3_0);
  903|   222k|                    inp_8x16b_r3_1 = _mm_shuffle_epi8(inp_8x16b_3, x_ref_pos_luma_mask_r3_1);
  904|       |
  905|   222k|                    inp_8x16b_r01_l_0 = _mm_unpacklo_epi16(inp_8x16b_r0_0, inp_8x16b_r1_0);
  906|   222k|                    inp_8x16b_r23_l_0 = _mm_unpacklo_epi16(inp_8x16b_r2_0, inp_8x16b_r3_0);
  907|   222k|                    inp_8x16b_r01_l_1 = _mm_unpackhi_epi16(inp_8x16b_r0_0, inp_8x16b_r1_0);
  908|   222k|                    inp_8x16b_r23_l_1 = _mm_unpackhi_epi16(inp_8x16b_r2_0, inp_8x16b_r3_0);
  909|       |
  910|   222k|                    inp_8x16b_r01_h_0 = _mm_unpacklo_epi16(inp_8x16b_r0_1, inp_8x16b_r1_1);
  911|   222k|                    inp_8x16b_r23_h_0 = _mm_unpacklo_epi16(inp_8x16b_r2_1, inp_8x16b_r3_1);
  912|   222k|                    inp_8x16b_r01_h_1 = _mm_unpackhi_epi16(inp_8x16b_r0_1, inp_8x16b_r1_1);
  913|   222k|                    inp_8x16b_r23_h_1 = _mm_unpackhi_epi16(inp_8x16b_r2_1, inp_8x16b_r3_1);
  914|       |
  915|   222k|                    out_res_4x32b_r01_l_0 =
  916|   222k|                        _mm_madd_epi16(inp_8x16b_r01_l_0, ip_filt_8x16b_r01_l_0);
  917|   222k|                    out_res_4x32b_r01_l_1 =
  918|   222k|                        _mm_madd_epi16(inp_8x16b_r01_l_1, ip_filt_8x16b_r01_l_1);
  919|   222k|                    out_res_4x32b_r23_l_0 =
  920|   222k|                        _mm_madd_epi16(inp_8x16b_r23_l_0, ip_filt_8x16b_r23_l_0);
  921|   222k|                    out_res_4x32b_r23_l_1 =
  922|   222k|                        _mm_madd_epi16(inp_8x16b_r23_l_1, ip_filt_8x16b_r23_l_1);
  923|       |
  924|   222k|                    out_res_4x32b_r01_h_0 =
  925|   222k|                        _mm_madd_epi16(inp_8x16b_r01_h_0, ip_filt_8x16b_r01_h_0);
  926|   222k|                    out_res_4x32b_r01_h_1 =
  927|   222k|                        _mm_madd_epi16(inp_8x16b_r01_h_1, ip_filt_8x16b_r01_h_1);
  928|   222k|                    out_res_4x32b_r23_h_0 =
  929|   222k|                        _mm_madd_epi16(inp_8x16b_r23_h_0, ip_filt_8x16b_r23_h_0);
  930|   222k|                    out_res_4x32b_r23_h_1 =
  931|   222k|                        _mm_madd_epi16(inp_8x16b_r23_h_1, ip_filt_8x16b_r23_h_1);
  932|       |
  933|   222k|                    out_res_4x32b_l_0 = _mm_add_epi32(out_res_4x32b_r01_l_0, out_res_4x32b_r23_l_0);
  934|   222k|                    out_res_4x32b_l_1 = _mm_add_epi32(out_res_4x32b_r01_l_1, out_res_4x32b_r23_l_1);
  935|   222k|                    out_res_4x32b_h_0 = _mm_add_epi32(out_res_4x32b_r01_h_0, out_res_4x32b_r23_h_0);
  936|   222k|                    out_res_4x32b_h_1 = _mm_add_epi32(out_res_4x32b_r01_h_1, out_res_4x32b_r23_h_1);
  937|       |
  938|   222k|                    out_res_4x32b_l_0 =
  939|   222k|                        _mm_srai_epi32(_mm_add_epi32(out_res_4x32b_l_0, const_512), 10);
  940|   222k|                    out_res_4x32b_l_1 =
  941|   222k|                        _mm_srai_epi32(_mm_add_epi32(out_res_4x32b_l_1, const_512), 10);
  942|   222k|                    out_res_4x32b_h_0 =
  943|   222k|                        _mm_srai_epi32(_mm_add_epi32(out_res_4x32b_h_0, const_512), 10);
  944|   222k|                    out_res_4x32b_h_1 =
  945|   222k|                        _mm_srai_epi32(_mm_add_epi32(out_res_4x32b_h_1, const_512), 10);
  946|       |
  947|   222k|                    out_res_8x16b_l = _mm_packs_epi32(out_res_4x32b_l_0, out_res_4x32b_l_1);
  948|   222k|                    out_res_8x16b_h = _mm_packs_epi32(out_res_4x32b_h_0, out_res_4x32b_h_1);
  949|       |
  950|   222k|                    out_res_16x8b = _mm_packus_epi16(out_res_8x16b_l, out_res_8x16b_h);
  951|   222k|                    _mm_storeu_si128((__m128i *) (pu1_out + (i4_y * i4_out_stride)), out_res_16x8b);
  952|   222k|                }
  953|  13.8k|            }
  954|  13.8k|        }
  955|  13.8k|    }
  956|  27.7k|    else
  957|  27.7k|    {
  958|  27.7k|        WORD16 *pi2_interp_buff_temp;
  959|  27.7k|        pi2_interp_buff_temp = (WORD16 *) pi4_interp_buff;
  960|       |
  961|  27.7k|        {
  962|  27.7k|            __m128i inp_8x16b_r0, inp_8x16b_r01_0, phs_mask_16x8b_r0, phs_mask_16x8b_r01_0,
  963|  27.7k|                out_res_8x16b_r01_0;
  964|  27.7k|            __m128i inp_8x16b_r1, phs_mask_16x8b_r1, out_res_8x16b_r01_1;
  965|  27.7k|            __m128i inp_8x16b_r01_1, phs_mask_16x8b_r01_1;
  966|       |
  967|   250k|            for(i4_y = 0; i4_y < (i4_temp_array_ht); i4_y++)
  ------------------
  |  Branch (967:27): [True: 222k, False: 27.7k]
  ------------------
  968|   222k|            {
  969|   222k|                arr_y_ref_pos_luma[i4_y] = (WORD8) ps_y_pos_phase[i4_y + i4_frm_mb_y].i2_ref_pos;
  970|   222k|                arr_phase_luma[i4_y] = (WORD8) ps_y_pos_phase[i4_y + i4_frm_mb_y].i2_phase;
  971|   222k|            }
  972|  27.7k|            pi4_y_ref_pos_luma = arr_y_ref_pos_luma;
  973|  27.7k|            pi4_phase_luma = arr_phase_luma;
  974|       |
  975|   250k|            for(i4_y = 0; i4_y < (i4_temp_array_ht); i4_y++)
  ------------------
  |  Branch (975:27): [True: 222k, False: 27.7k]
  ------------------
  976|   222k|            {
  977|   222k|                pu1_refarray_temp =
  978|   222k|                    pu1_refarray + (pi4_y_ref_pos_luma[i4_y] * i4_refarray_wd) + (i4_x_min - 1);
  979|   222k|                inp_8x16b_r0 = _mm_loadu_si128((__m128i *) (pu1_refarray_temp));
  980|   222k|                inp_8x16b_r1 = _mm_loadu_si128((__m128i *) (pu1_refarray_temp + i4_refarray_wd));
  981|       |
  982|   222k|                inp_8x16b_r01_0 = _mm_unpacklo_epi8(inp_8x16b_r0, inp_8x16b_r1);
  983|   222k|                inp_8x16b_r01_1 = _mm_unpackhi_epi8(inp_8x16b_r0, inp_8x16b_r1);
  984|       |
  985|   222k|                phs_mask_16x8b_r0 = _mm_set1_epi8(g_au1_interp_filter_chroma[pi4_phase_luma[i4_y]]);
  986|   222k|                phs_mask_16x8b_r1 =
  987|   222k|                    _mm_set1_epi8(g_au1_interp_filter_chroma[pi4_phase_luma[i4_y] + 16]);
  988|       |
  989|   222k|                phs_mask_16x8b_r01_0 = _mm_unpacklo_epi8(phs_mask_16x8b_r0, phs_mask_16x8b_r1);
  990|   222k|                phs_mask_16x8b_r01_1 = _mm_unpackhi_epi8(phs_mask_16x8b_r0, phs_mask_16x8b_r1);
  991|       |
  992|   222k|                out_res_8x16b_r01_0 = _mm_maddubs_epi16(inp_8x16b_r01_0, phs_mask_16x8b_r01_0);
  993|   222k|                out_res_8x16b_r01_1 = _mm_maddubs_epi16(inp_8x16b_r01_1, phs_mask_16x8b_r01_1);
  994|       |
  995|   222k|                _mm_storeu_si128(
  996|   222k|                    (__m128i *) (pi2_interp_buff_temp + (i4_y * i4_refarray_wd) + (i4_x_min - 1)),
  997|   222k|                    out_res_8x16b_r01_0);
  998|   222k|                _mm_storeu_si128((__m128i *) (pi2_interp_buff_temp + (i4_y * i4_refarray_wd) +
  999|   222k|                                              (i4_x_min - 1) + 8),
 1000|   222k|                                 out_res_8x16b_r01_1);
 1001|   222k|            }
 1002|  27.7k|        }
 1003|       |
 1004|  27.7k|        {
 1005|  27.7k|            WORD32 strt_indx = 10;
 1006|  27.7k|            __m128i inp_8x16b_0, inp_8x16b_r0_0;
 1007|  27.7k|            __m128i phs_mask_16x8b_0;
 1008|  27.7k|            __m128i x_ref_pos_luma_mask_r0_0, x_ref_pos_luma_mask_r1_0;
 1009|  27.7k|            __m128i ip_filt_16x8b_r0, ip_filt_8x16b_r0_0, ip_filt_8x16b_r01_l_0;
 1010|  27.7k|            __m128i ip_filt_16x8b_r1, ip_filt_8x16b_r1_0, ip_filt_8x16b_r01_l_1;
 1011|  27.7k|            __m128i inp_8x16b_r1_0, inp_8x16b_r01_l_0, out_res_4x32b_r01_l_0;
 1012|  27.7k|            __m128i inp_8x16b_r01_l_1, out_res_4x32b_r01_l_1;
 1013|       |
 1014|  27.7k|            __m128i out_res_4x32b_l_0;
 1015|  27.7k|            __m128i out_res_4x32b_l_1;
 1016|  27.7k|            __m128i out_res_8x16b_l;
 1017|  27.7k|            __m128i out_16x8b_r1;
 1018|  27.7k|            __m128i chroma_mask;
 1019|  27.7k|            __m128i const_512 = _mm_set1_epi32(512);
 1020|       |
 1021|  27.7k|            WORD32 i4_x2 = 0;
 1022|  27.7k|            __m128i twos = _mm_set1_epi8(2);
 1023|  27.7k|            strt_indx = ps_x_pos_phase[0 + i4_frm_mb_x].i2_ref_pos;
 1024|   250k|            for(i4_x = 0; i4_x < i4_mb_wd; i4_x++)
  ------------------
  |  Branch (1024:27): [True: 222k, False: 27.7k]
  ------------------
 1025|   222k|            {
 1026|   222k|                arr_x_ref_pos_luma[i4_x] = (WORD8) ps_x_pos_phase[i4_x + i4_frm_mb_x].i2_ref_pos;
 1027|   222k|                arr_phase_luma[i4_x] = (WORD8) ps_x_pos_phase[i4_x + i4_frm_mb_x].i2_phase;
 1028|   222k|                arr_x_ref_pos_luma[i4_x] = arr_x_ref_pos_luma[i4_x] - strt_indx;
 1029|   222k|                i4_x2 = i4_x << 1;
 1030|   222k|                arr_x_ref_pos_luma_low[i4_x2] = (arr_x_ref_pos_luma[i4_x]) << 1;
 1031|   222k|                arr_x_ref_pos_luma_low[i4_x2 + 1] = arr_x_ref_pos_luma_low[i4_x2] + 1;
 1032|   222k|            }
 1033|       |
 1034|  27.7k|            pi4_x_ref_pos_luma_low = arr_x_ref_pos_luma_low;
 1035|  27.7k|            pi4_phase_luma = arr_phase_luma;
 1036|  27.7k|            phs_mask_16x8b_0 = _mm_loadu_si128((__m128i *) (pi4_phase_luma));
 1037|  27.7k|            x_ref_pos_luma_mask_r0_0 = _mm_loadu_si128((__m128i *) (pi4_x_ref_pos_luma_low));
 1038|  27.7k|            x_ref_pos_luma_mask_r1_0 = _mm_add_epi8(x_ref_pos_luma_mask_r0_0, twos);
 1039|       |
 1040|  27.7k|            ip_filt_16x8b_r0 = _mm_loadu_si128((__m128i *) (g_au1_interp_filter_chroma));
 1041|  27.7k|            ip_filt_16x8b_r1 = _mm_loadu_si128((__m128i *) (g_au1_interp_filter_chroma + 16));
 1042|       |
 1043|  27.7k|            ip_filt_8x16b_r0_0 =
 1044|  27.7k|                _mm_cvtepi8_epi16(_mm_shuffle_epi8(ip_filt_16x8b_r0, phs_mask_16x8b_0));
 1045|  27.7k|            ip_filt_8x16b_r1_0 =
 1046|  27.7k|                _mm_cvtepi8_epi16(_mm_shuffle_epi8(ip_filt_16x8b_r1, phs_mask_16x8b_0));
 1047|       |
 1048|  27.7k|            ip_filt_8x16b_r01_l_0 = _mm_unpacklo_epi16(ip_filt_8x16b_r0_0, ip_filt_8x16b_r1_0);
 1049|  27.7k|            ip_filt_8x16b_r01_l_1 = _mm_unpackhi_epi16(ip_filt_8x16b_r0_0, ip_filt_8x16b_r1_0);
 1050|       |
 1051|   250k|            for(i4_y = 0; i4_y < i4_temp_array_ht; i4_y++)
  ------------------
  |  Branch (1051:27): [True: 222k, False: 27.7k]
  ------------------
 1052|   222k|            {
 1053|   222k|                inp_8x16b_0 = _mm_loadu_si128((__m128i *) (pi2_interp_buff_temp + strt_indx));
 1054|   222k|                pi2_interp_buff_temp += i4_refarray_wd;
 1055|       |
 1056|   222k|                inp_8x16b_r0_0 = _mm_shuffle_epi8(inp_8x16b_0, x_ref_pos_luma_mask_r0_0);
 1057|   222k|                inp_8x16b_r1_0 = _mm_shuffle_epi8(inp_8x16b_0, x_ref_pos_luma_mask_r1_0);
 1058|       |
 1059|   222k|                inp_8x16b_r01_l_0 = _mm_unpacklo_epi16(inp_8x16b_r0_0, inp_8x16b_r1_0);
 1060|   222k|                inp_8x16b_r01_l_1 = _mm_unpackhi_epi16(inp_8x16b_r0_0, inp_8x16b_r1_0);
 1061|       |
 1062|   222k|                out_res_4x32b_r01_l_0 = _mm_madd_epi16(inp_8x16b_r01_l_0, ip_filt_8x16b_r01_l_0);
 1063|   222k|                out_res_4x32b_r01_l_1 = _mm_madd_epi16(inp_8x16b_r01_l_1, ip_filt_8x16b_r01_l_1);
 1064|       |
 1065|   222k|                out_res_4x32b_l_0 =
 1066|   222k|                    _mm_srai_epi32(_mm_add_epi32(out_res_4x32b_r01_l_0, const_512), 10);
 1067|   222k|                out_res_4x32b_l_1 =
 1068|   222k|                    _mm_srai_epi32(_mm_add_epi32(out_res_4x32b_r01_l_1, const_512), 10);
 1069|       |
 1070|   222k|                out_res_8x16b_l = _mm_packs_epi32(out_res_4x32b_l_0, out_res_4x32b_l_1);
 1071|       |
 1072|   222k|                chroma_mask = _mm_set1_epi16(0xFF00);
 1073|   222k|                out_16x8b_r1 = _mm_loadu_si128((__m128i *) (pu1_out + (i4_y * i4_out_stride)));
 1074|   222k|                out_16x8b_r1 = _mm_and_si128(out_16x8b_r1, chroma_mask);
 1075|   222k|                out_16x8b_r1 = _mm_add_epi8(out_res_8x16b_l, out_16x8b_r1);
 1076|   222k|                _mm_storeu_si128((__m128i *) (pu1_out + (i4_y * i4_out_stride)), out_16x8b_r1);
 1077|   222k|            }
 1078|  27.7k|        }
 1079|  27.7k|    }
 1080|  41.6k|    return;
 1081|  41.6k|} /* End of Interpolation Function */
isvcd_vert_interpol_chroma_dyadic_1_sse42:
 1112|  44.6k|{
 1113|  44.6k|    WORD8 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3;
 1114|  44.6k|    WORD32 i4_filt_stride, i4_src_stride;
 1115|  44.6k|    UWORD8 *pu1_inp;
 1116|  44.6k|    WORD16 *pi2_tmp;
 1117|  44.6k|    __m128i i4_samp_16x8b_0, i4_samp_16x8b_1, i4_samp_16x8b_2, i4_samp_16x8b_3, i4_samp_16x8b_4,
 1118|  44.6k|        i4_samp_16x8b_5;
 1119|  44.6k|    __m128i i4_res_8x16b_r0, i4_res_8x16b_r1, i4_res_8x16b_r2, i4_res_8x16b_r3, i4_res_8x16b_r4,
 1120|  44.6k|        i4_res_8x16b_r5, i4_res_8x16b_r6, i4_res_8x16b_r7;
 1121|  44.6k|    __m128i i4_res_8x16b_r7_temp;
 1122|  44.6k|    __m128i i4_c0_c1_16x8b, i4_c2_c3_16x8b;
 1123|       |
 1124|  44.6k|    i4_coeff_0 = (WORD8) (8 - i4_phase_0);
 1125|  44.6k|    i4_coeff_1 = (WORD8) (i4_phase_0);
 1126|  44.6k|    i4_coeff_2 = (WORD8) (8 - i4_phase_1);
 1127|  44.6k|    i4_coeff_3 = (WORD8) (i4_phase_1);
 1128|       |
 1129|  44.6k|    i4_c0_c1_16x8b =
 1130|  44.6k|        _mm_set_epi8(i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0,
 1131|  44.6k|                     i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0,
 1132|  44.6k|                     i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0);
 1133|  44.6k|    i4_c2_c3_16x8b =
 1134|  44.6k|        _mm_set_epi8(i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2,
 1135|  44.6k|                     i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2,
 1136|  44.6k|                     i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2);
 1137|       |
 1138|  44.6k|    pu1_inp = pu1_inp_buf;
 1139|  44.6k|    pi2_tmp = pi2_tmp_filt_buf;
 1140|  44.6k|    i4_filt_stride = 6;
 1141|  44.6k|    i4_src_stride = DYADIC_REF_W_C;
  ------------------
  |  |   58|  44.6k|#define DYADIC_REF_W_C 10
  ------------------
 1142|       |
 1143|  44.6k|    i4_samp_16x8b_0 = _mm_loadl_epi64((__m128i *) (pu1_inp));
 1144|  44.6k|    i4_samp_16x8b_1 = _mm_loadl_epi64((__m128i *) (pu1_inp + i4_src_stride));
 1145|  44.6k|    i4_samp_16x8b_2 = _mm_loadl_epi64((__m128i *) (pu1_inp + (i4_src_stride << 1)));
 1146|  44.6k|    i4_samp_16x8b_3 = _mm_loadl_epi64((__m128i *) (pu1_inp + (i4_src_stride << 1) + i4_src_stride));
 1147|  44.6k|    i4_samp_16x8b_4 = _mm_loadl_epi64((__m128i *) (pu1_inp + (i4_src_stride << 2)));
 1148|  44.6k|    i4_samp_16x8b_5 = _mm_loadl_epi64((__m128i *) (pu1_inp + (i4_src_stride << 2) + i4_src_stride));
 1149|       |
 1150|  44.6k|    i4_samp_16x8b_0 = _mm_unpacklo_epi8(i4_samp_16x8b_0, i4_samp_16x8b_1);
 1151|  44.6k|    i4_res_8x16b_r0 = _mm_maddubs_epi16(i4_samp_16x8b_0, i4_c0_c1_16x8b);
 1152|  44.6k|    _mm_storeu_si128((__m128i *) (pi2_tmp), i4_res_8x16b_r0);
 1153|       |
 1154|  44.6k|    i4_samp_16x8b_1 = _mm_unpacklo_epi8(i4_samp_16x8b_1, i4_samp_16x8b_2);
 1155|  44.6k|    i4_res_8x16b_r1 = _mm_maddubs_epi16(i4_samp_16x8b_1, i4_c2_c3_16x8b);
 1156|  44.6k|    _mm_storeu_si128((__m128i *) (pi2_tmp + i4_filt_stride), i4_res_8x16b_r1);
 1157|       |
 1158|  44.6k|    i4_res_8x16b_r2 = _mm_maddubs_epi16(i4_samp_16x8b_1, i4_c0_c1_16x8b);
 1159|  44.6k|    _mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 1)), i4_res_8x16b_r2);
 1160|       |
 1161|  44.6k|    i4_samp_16x8b_2 = _mm_unpacklo_epi8(i4_samp_16x8b_2, i4_samp_16x8b_3);
 1162|  44.6k|    i4_res_8x16b_r3 = _mm_maddubs_epi16(i4_samp_16x8b_2, i4_c2_c3_16x8b);
 1163|  44.6k|    _mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 1) + i4_filt_stride),
 1164|  44.6k|                     i4_res_8x16b_r3);
 1165|       |
 1166|  44.6k|    i4_res_8x16b_r4 = _mm_maddubs_epi16(i4_samp_16x8b_2, i4_c0_c1_16x8b);
 1167|  44.6k|    _mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 2)), i4_res_8x16b_r4);
 1168|       |
 1169|  44.6k|    i4_samp_16x8b_3 = _mm_unpacklo_epi8(i4_samp_16x8b_3, i4_samp_16x8b_4);
 1170|  44.6k|    i4_res_8x16b_r5 = _mm_maddubs_epi16(i4_samp_16x8b_3, i4_c2_c3_16x8b);
 1171|  44.6k|    _mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 2) + i4_filt_stride),
 1172|  44.6k|                     i4_res_8x16b_r5);
 1173|       |
 1174|  44.6k|    i4_res_8x16b_r6 = _mm_maddubs_epi16(i4_samp_16x8b_3, i4_c0_c1_16x8b);
 1175|  44.6k|    _mm_storel_epi64((__m128i *) (pi2_tmp + (i4_filt_stride << 2) + (i4_filt_stride << 1)),
 1176|  44.6k|                     i4_res_8x16b_r6);
 1177|       |
 1178|  44.6k|    i4_res_8x16b_r6 = _mm_shuffle_epi32(i4_res_8x16b_r6, 78);
 1179|       |
 1180|  44.6k|    i4_samp_16x8b_4 = _mm_unpacklo_epi8(i4_samp_16x8b_4, i4_samp_16x8b_5);
 1181|       |
 1182|  44.6k|    i4_res_8x16b_r7 = _mm_maddubs_epi16(i4_samp_16x8b_4, i4_c2_c3_16x8b);
 1183|       |
 1184|  44.6k|    i4_res_8x16b_r7 = _mm_shuffle_epi32(i4_res_8x16b_r7, 147);
 1185|       |
 1186|  44.6k|    i4_res_8x16b_r7_temp = _mm_blend_epi16(i4_res_8x16b_r6, i4_res_8x16b_r7, 252);
 1187|       |
 1188|  44.6k|    _mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 2) + (i4_filt_stride << 1) + 4),
 1189|  44.6k|                     i4_res_8x16b_r7_temp);
 1190|  44.6k|}
isvcd_vert_interpol_chroma_dyadic_2_sse42:
 1223|  1.76k|{
 1224|  1.76k|    WORD8 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3;
 1225|  1.76k|    WORD32 i4_filt_stride, i4_src_stride;
 1226|  1.76k|    UWORD8 *pu1_inp;
 1227|  1.76k|    WORD16 *pi2_tmp;
 1228|  1.76k|    __m128i i4_samp_16x8b_0, i4_samp_16x8b_1, i4_samp_16x8b_2, i4_samp_16x8b_3, i4_samp_16x8b_4;
 1229|  1.76k|    __m128i i4_res_8x16b_r0, i4_res_8x16b_r1, i4_res_8x16b_r2, i4_res_8x16b_r3, i4_res_8x16b_r4,
 1230|  1.76k|        i4_res_8x16b_r5, i4_res_8x16b_r6, i4_res_8x16b_r7;
 1231|  1.76k|    __m128i i4_res_8x16b_r7_temp, i4_c0_c1_16x8b, i4_c2_c3_16x8b;
 1232|  1.76k|    i4_coeff_0 = (WORD8) (8 - i4_phase_0);
 1233|  1.76k|    i4_coeff_1 = (WORD8) (i4_phase_0);
 1234|  1.76k|    i4_coeff_2 = (WORD8) (8 - i4_phase_1);
 1235|  1.76k|    i4_coeff_3 = (WORD8) (i4_phase_1);
 1236|       |
 1237|  1.76k|    i4_c0_c1_16x8b =
 1238|  1.76k|        _mm_set_epi8(i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0,
 1239|  1.76k|                     i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0,
 1240|  1.76k|                     i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0);
 1241|  1.76k|    i4_c2_c3_16x8b =
 1242|  1.76k|        _mm_set_epi8(i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2,
 1243|  1.76k|                     i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2,
 1244|  1.76k|                     i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2);
 1245|       |
 1246|  1.76k|    pi2_tmp = pi2_tmp_filt_buf;
 1247|  1.76k|    i4_filt_stride = 6;
 1248|  1.76k|    i4_src_stride = DYADIC_REF_W_C;
  ------------------
  |  |   58|  1.76k|#define DYADIC_REF_W_C 10
  ------------------
 1249|  1.76k|    pu1_inp = pu1_inp_buf + i4_src_stride;
 1250|       |
 1251|  1.76k|    i4_samp_16x8b_0 = _mm_loadu_si128((__m128i *) (pu1_inp));
 1252|  1.76k|    i4_samp_16x8b_1 = _mm_loadu_si128((__m128i *) (pu1_inp + i4_src_stride));
 1253|  1.76k|    i4_samp_16x8b_2 = _mm_loadu_si128((__m128i *) (pu1_inp + (i4_src_stride << 1)));
 1254|  1.76k|    i4_samp_16x8b_3 = _mm_loadu_si128((__m128i *) (pu1_inp + (i4_src_stride << 1) + i4_src_stride));
 1255|  1.76k|    i4_samp_16x8b_4 = _mm_loadu_si128((__m128i *) (pu1_inp + (i4_src_stride << 2)));
 1256|       |
 1257|  1.76k|    i4_samp_16x8b_0 = _mm_unpacklo_epi8(i4_samp_16x8b_0, i4_samp_16x8b_1);
 1258|  1.76k|    i4_res_8x16b_r0 = _mm_maddubs_epi16(i4_samp_16x8b_0, i4_c0_c1_16x8b);
 1259|  1.76k|    _mm_storeu_si128((__m128i *) (pi2_tmp), i4_res_8x16b_r0);
 1260|       |
 1261|  1.76k|    i4_res_8x16b_r1 = _mm_maddubs_epi16(i4_samp_16x8b_0, i4_c2_c3_16x8b);
 1262|  1.76k|    _mm_storeu_si128((__m128i *) (pi2_tmp + i4_filt_stride), i4_res_8x16b_r1);
 1263|       |
 1264|  1.76k|    i4_samp_16x8b_1 = _mm_unpacklo_epi8(i4_samp_16x8b_1, i4_samp_16x8b_2);
 1265|  1.76k|    i4_res_8x16b_r2 = _mm_maddubs_epi16(i4_samp_16x8b_1, i4_c0_c1_16x8b);
 1266|  1.76k|    _mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 1)), i4_res_8x16b_r2);
 1267|       |
 1268|  1.76k|    i4_res_8x16b_r3 = _mm_maddubs_epi16(i4_samp_16x8b_1, i4_c2_c3_16x8b);
 1269|  1.76k|    _mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 1) + i4_filt_stride),
 1270|  1.76k|                     i4_res_8x16b_r3);
 1271|       |
 1272|  1.76k|    i4_samp_16x8b_2 = _mm_unpacklo_epi8(i4_samp_16x8b_2, i4_samp_16x8b_3);
 1273|  1.76k|    i4_res_8x16b_r4 = _mm_maddubs_epi16(i4_samp_16x8b_2, i4_c0_c1_16x8b);
 1274|  1.76k|    _mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 2)), i4_res_8x16b_r4);
 1275|       |
 1276|  1.76k|    i4_res_8x16b_r5 = _mm_maddubs_epi16(i4_samp_16x8b_2, i4_c2_c3_16x8b);
 1277|  1.76k|    _mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 2) + i4_filt_stride),
 1278|  1.76k|                     i4_res_8x16b_r5);
 1279|       |
 1280|  1.76k|    i4_samp_16x8b_3 = _mm_unpacklo_epi8(i4_samp_16x8b_3, i4_samp_16x8b_4);
 1281|  1.76k|    i4_res_8x16b_r6 = _mm_maddubs_epi16(i4_samp_16x8b_3, i4_c0_c1_16x8b);
 1282|  1.76k|    _mm_storel_epi64((__m128i *) (pi2_tmp + (i4_filt_stride << 2) + (i4_filt_stride << 1)),
 1283|  1.76k|                     i4_res_8x16b_r6);
 1284|       |
 1285|  1.76k|    i4_res_8x16b_r7 = _mm_maddubs_epi16(i4_samp_16x8b_3, i4_c2_c3_16x8b);
 1286|  1.76k|    i4_res_8x16b_r6 = _mm_shuffle_epi32(i4_res_8x16b_r6, 78);
 1287|  1.76k|    i4_res_8x16b_r7 = _mm_shuffle_epi32(i4_res_8x16b_r7, 147);
 1288|  1.76k|    i4_res_8x16b_r7_temp = _mm_blend_epi16(i4_res_8x16b_r6, i4_res_8x16b_r7, 252);
 1289|       |
 1290|  1.76k|    _mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 2) + (i4_filt_stride << 1) + 4),
 1291|  1.76k|                     i4_res_8x16b_r7_temp);
 1292|  1.76k|}
isvcd_vert_interpol_chroma_dyadic_3_sse42:
 1324|  4.10k|{
 1325|  4.10k|    WORD8 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3;
 1326|  4.10k|    WORD32 i4_filt_stride, i4_src_stride;
 1327|  4.10k|    UWORD8 *pu1_inp;
 1328|  4.10k|    WORD16 *pi2_tmp;
 1329|  4.10k|    __m128i i4_samp_16x8b_0, i4_samp_16x8b_1, i4_samp_16x8b_2, i4_samp_16x8b_3, i4_samp_16x8b_4;
 1330|  4.10k|    __m128i i4_res_8x16b_r0, i4_res_8x16b_r1, i4_res_8x16b_r2, i4_res_8x16b_r3, i4_res_8x16b_r4,
 1331|  4.10k|        i4_res_8x16b_r5, i4_res_8x16b_r6, i4_res_8x16b_r7;
 1332|  4.10k|    __m128i i4_res_8x16b_r7_temp, i4_c0_c1_16x8b, i4_c2_c3_16x8b;
 1333|  4.10k|    i4_coeff_0 = (WORD8) (8 - i4_phase_0);
 1334|  4.10k|    i4_coeff_1 = (WORD8) (i4_phase_0);
 1335|  4.10k|    i4_coeff_2 = (WORD8) (8 - i4_phase_1);
 1336|  4.10k|    i4_coeff_3 = (WORD8) (i4_phase_1);
 1337|       |
 1338|  4.10k|    i4_c0_c1_16x8b =
 1339|  4.10k|        _mm_set_epi8(i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0,
 1340|  4.10k|                     i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0,
 1341|  4.10k|                     i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0);
 1342|  4.10k|    i4_c2_c3_16x8b =
 1343|  4.10k|        _mm_set_epi8(i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2,
 1344|  4.10k|                     i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2,
 1345|  4.10k|                     i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2);
 1346|       |
 1347|  4.10k|    pi2_tmp = pi2_tmp_filt_buf;
 1348|  4.10k|    i4_filt_stride = 6;
 1349|  4.10k|    i4_src_stride = DYADIC_REF_W_C;
  ------------------
  |  |   58|  4.10k|#define DYADIC_REF_W_C 10
  ------------------
 1350|  4.10k|    pu1_inp = pu1_inp_buf;
 1351|       |
 1352|  4.10k|    i4_samp_16x8b_0 = _mm_loadu_si128((__m128i *) (pu1_inp));
 1353|  4.10k|    i4_samp_16x8b_1 = _mm_loadu_si128((__m128i *) (pu1_inp + i4_src_stride));
 1354|  4.10k|    i4_samp_16x8b_2 = _mm_loadu_si128((__m128i *) (pu1_inp + (i4_src_stride << 1)));
 1355|  4.10k|    i4_samp_16x8b_3 = _mm_loadu_si128((__m128i *) (pu1_inp + (i4_src_stride << 1) + i4_src_stride));
 1356|  4.10k|    i4_samp_16x8b_4 = _mm_loadu_si128((__m128i *) (pu1_inp + (i4_src_stride << 2)));
 1357|       |
 1358|  4.10k|    i4_samp_16x8b_0 = _mm_unpacklo_epi8(i4_samp_16x8b_0, i4_samp_16x8b_1);
 1359|  4.10k|    i4_res_8x16b_r0 = _mm_maddubs_epi16(i4_samp_16x8b_0, i4_c0_c1_16x8b);
 1360|  4.10k|    _mm_storeu_si128((__m128i *) (pi2_tmp), i4_res_8x16b_r0);
 1361|       |
 1362|  4.10k|    i4_res_8x16b_r1 = _mm_maddubs_epi16(i4_samp_16x8b_0, i4_c2_c3_16x8b);
 1363|  4.10k|    _mm_storeu_si128((__m128i *) (pi2_tmp + i4_filt_stride), i4_res_8x16b_r1);
 1364|       |
 1365|  4.10k|    i4_samp_16x8b_1 = _mm_unpacklo_epi8(i4_samp_16x8b_1, i4_samp_16x8b_2);
 1366|  4.10k|    i4_res_8x16b_r2 = _mm_maddubs_epi16(i4_samp_16x8b_1, i4_c0_c1_16x8b);
 1367|  4.10k|    _mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 1)), i4_res_8x16b_r2);
 1368|       |
 1369|  4.10k|    i4_res_8x16b_r3 = _mm_maddubs_epi16(i4_samp_16x8b_1, i4_c2_c3_16x8b);
 1370|  4.10k|    _mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 1) + i4_filt_stride),
 1371|  4.10k|                     i4_res_8x16b_r3);
 1372|       |
 1373|  4.10k|    i4_samp_16x8b_2 = _mm_unpacklo_epi8(i4_samp_16x8b_2, i4_samp_16x8b_3);
 1374|  4.10k|    i4_res_8x16b_r4 = _mm_maddubs_epi16(i4_samp_16x8b_2, i4_c0_c1_16x8b);
 1375|  4.10k|    _mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 2)), i4_res_8x16b_r4);
 1376|       |
 1377|  4.10k|    i4_res_8x16b_r5 = _mm_maddubs_epi16(i4_samp_16x8b_2, i4_c2_c3_16x8b);
 1378|  4.10k|    _mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 2) + i4_filt_stride),
 1379|  4.10k|                     i4_res_8x16b_r5);
 1380|       |
 1381|  4.10k|    i4_samp_16x8b_3 = _mm_unpacklo_epi8(i4_samp_16x8b_3, i4_samp_16x8b_4);
 1382|  4.10k|    i4_res_8x16b_r6 = _mm_maddubs_epi16(i4_samp_16x8b_3, i4_c0_c1_16x8b);
 1383|  4.10k|    _mm_storel_epi64((__m128i *) (pi2_tmp + (i4_filt_stride << 2) + (i4_filt_stride << 1)),
 1384|  4.10k|                     i4_res_8x16b_r6);
 1385|       |
 1386|  4.10k|    i4_res_8x16b_r7 = _mm_maddubs_epi16(i4_samp_16x8b_3, i4_c2_c3_16x8b);
 1387|  4.10k|    i4_res_8x16b_r6 = _mm_shuffle_epi32(i4_res_8x16b_r6, 78);
 1388|  4.10k|    i4_res_8x16b_r7 = _mm_shuffle_epi32(i4_res_8x16b_r7, 147);
 1389|       |    i4_res_8x16b_r7_temp = _mm_blend_epi16(i4_res_8x16b_r6, i4_res_8x16b_r7, 252);
 1390|  4.10k|    _mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 2) + (i4_filt_stride << 1) + 4),
 1391|  4.10k|                     i4_res_8x16b_r7_temp);
 1392|  4.10k|}
isvcd_horz_interpol_chroma_dyadic_1_sse42:
 1425|  29.5k|{
 1426|  29.5k|    WORD32 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3;
 1427|  29.5k|    WORD32 i4_dst_stride, i4_dst_stride2, i4_dst_stride4;
 1428|  29.5k|    UWORD8 *pu1_out;
 1429|  29.5k|    WORD16 *pi2_tmp;
 1430|       |
 1431|  29.5k|    __m128i i4_samp_8x16b_r1_0, i4_samp_8x16b_r1_1, i4_samp_8x16b_r1_2;
 1432|  29.5k|    __m128i i4_samp_8x16b_r2_0, i4_samp_8x16b_r2_1, i4_samp_8x16b_r2_2;
 1433|  29.5k|    __m128i i4_samp_8x16b_r3_0, i4_samp_8x16b_r3_1, i4_samp_8x16b_r3_2;
 1434|  29.5k|    __m128i i4_samp_8x16b_r4_0, i4_samp_8x16b_r4_1, i4_samp_8x16b_r4_2;
 1435|  29.5k|    __m128i i4_samp_8x16b_r5_0, i4_samp_8x16b_r5_1, i4_samp_8x16b_r5_2;
 1436|  29.5k|    __m128i i4_samp_8x16b_r6_0, i4_samp_8x16b_r6_1, i4_samp_8x16b_r6_2;
 1437|  29.5k|    __m128i i4_samp_8x16b_r7_0, i4_samp_8x16b_r7_1, i4_samp_8x16b_r7_2;
 1438|  29.5k|    __m128i i4_samp_8x16b_r8_0, i4_samp_8x16b_r8_1, i4_samp_8x16b_r8_2;
 1439|       |
 1440|  29.5k|    __m128i i4_res_4x32b_r1_0, i4_res_4x32b_r1_1;
 1441|  29.5k|    __m128i i4_res_4x32b_r2_0, i4_res_4x32b_r2_1;
 1442|  29.5k|    __m128i i4_res_4x32b_r3_0, i4_res_4x32b_r3_1;
 1443|  29.5k|    __m128i i4_res_4x32b_r4_0, i4_res_4x32b_r4_1;
 1444|  29.5k|    __m128i i4_res_4x32b_r5_0, i4_res_4x32b_r5_1;
 1445|  29.5k|    __m128i i4_res_4x32b_r6_0, i4_res_4x32b_r6_1;
 1446|  29.5k|    __m128i i4_res_4x32b_r7_0, i4_res_4x32b_r7_1;
 1447|  29.5k|    __m128i i4_res_4x32b_r8_0, i4_res_4x32b_r8_1;
 1448|       |
 1449|  29.5k|    __m128i i4_res_final_8x16b_r1;
 1450|  29.5k|    __m128i i4_res_final_8x16b_r2;
 1451|  29.5k|    __m128i i4_res_final_8x16b_r3;
 1452|  29.5k|    __m128i i4_res_final_8x16b_r4;
 1453|  29.5k|    __m128i i4_res_final_8x16b_r5;
 1454|  29.5k|    __m128i i4_res_final_8x16b_r6;
 1455|  29.5k|    __m128i i4_res_final_8x16b_r7;
 1456|  29.5k|    __m128i i4_res_final_8x16b_r8;
 1457|       |
 1458|  29.5k|    __m128i out_16x8b_r1;
 1459|  29.5k|    __m128i out_16x8b_r2;
 1460|  29.5k|    __m128i out_16x8b_r3;
 1461|  29.5k|    __m128i out_16x8b_r4;
 1462|  29.5k|    __m128i out_16x8b_r5;
 1463|  29.5k|    __m128i out_16x8b_r6;
 1464|  29.5k|    __m128i out_16x8b_r7;
 1465|  29.5k|    __m128i out_16x8b_r8;
 1466|  29.5k|    __m128i i4_res_final_8x16b_r12_0, i4_res_final_8x16b_r12_1;
 1467|  29.5k|    __m128i i4_res_final_8x16b_r34_0, i4_res_final_8x16b_r34_1;
 1468|  29.5k|    __m128i i4_res_final_8x16b_r56_0, i4_res_final_8x16b_r56_1;
 1469|  29.5k|    __m128i i4_res_final_8x16b_r67_0, i4_res_final_8x16b_r67_1;
 1470|  29.5k|    __m128i chroma_mask, chroma_mask2;
 1471|  29.5k|    __m128i coeff_c0_c1_8x16b, coeff_c2_c3_8x16b, res_32;
 1472|       |
 1473|  29.5k|    i4_coeff_0 = 8 - i4_phase_0;
 1474|  29.5k|    i4_coeff_1 = i4_phase_0;
 1475|  29.5k|    i4_coeff_2 = 8 - i4_phase_1;
 1476|  29.5k|    i4_coeff_3 = i4_phase_1;
 1477|  29.5k|    coeff_c0_c1_8x16b = _mm_set_epi16(i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0, i4_coeff_1,
 1478|  29.5k|                                      i4_coeff_0, i4_coeff_1, i4_coeff_0);
 1479|  29.5k|    coeff_c2_c3_8x16b = _mm_set_epi16(i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2, i4_coeff_3,
 1480|  29.5k|                                      i4_coeff_2, i4_coeff_3, i4_coeff_2);
 1481|  29.5k|    res_32 = _mm_set1_epi32(32);
 1482|  29.5k|    pu1_out = pu1_out_buf;
 1483|  29.5k|    pi2_tmp = pi2_tmp_filt_buf;
 1484|  29.5k|    i4_dst_stride = i4_out_stride;
 1485|       |
 1486|  29.5k|    i4_dst_stride2 = i4_dst_stride << 1;
 1487|  29.5k|    i4_dst_stride4 = i4_dst_stride << 2;
 1488|       |
 1489|       |    /* Horizontal interpolation */
 1490|       |    /* x = 0, x_phase = phase_0 */
 1491|  29.5k|    i4_samp_8x16b_r1_0 = _mm_loadu_si128((__m128i *) pi2_tmp);         // a0 a1 a2 a3 a4 a5 a6 a7
 1492|  29.5k|    i4_samp_8x16b_r2_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 6));   // b0 b1 b2 b3 b4 b5 b6 b7
 1493|  29.5k|    i4_samp_8x16b_r3_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 12));  // b0 b1 b2 b3 b4 b5 b6 b7
 1494|  29.5k|    i4_samp_8x16b_r4_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 18));  // b0 b1 b2 b3 b4 b5 b6 b7
 1495|  29.5k|    i4_samp_8x16b_r5_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 24));  // b0 b1 b2 b3 b4 b5 b6 b7
 1496|  29.5k|    i4_samp_8x16b_r6_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 30));  // b0 b1 b2 b3 b4 b5 b6 b7
 1497|  29.5k|    i4_samp_8x16b_r7_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 36));  // b0 b1 b2 b3 b4 b5 b6 b7
 1498|  29.5k|    i4_samp_8x16b_r8_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 42));  // b0 b1 b2 b3 b4 b5 b6 b7
 1499|       |
 1500|  29.5k|    i4_samp_8x16b_r1_1 = _mm_srli_si128(i4_samp_8x16b_r1_0, 2);        // a1 a2 a3 a4 a5 a6 a7 0
 1501|  29.5k|    i4_samp_8x16b_r1_2 = _mm_srli_si128(i4_samp_8x16b_r1_0, 4);        // a2 a3 a4 a5 a6 a7 0 0
 1502|       |
 1503|  29.5k|    i4_samp_8x16b_r2_1 = _mm_srli_si128(i4_samp_8x16b_r2_0, 2);        // b1 b2 b3 b4 b5 b6 b7 0
 1504|  29.5k|    i4_samp_8x16b_r2_2 = _mm_srli_si128(i4_samp_8x16b_r2_0, 4);        // b2 b3 b4 b5 b6 b7 0 0
 1505|       |
 1506|  29.5k|    i4_samp_8x16b_r3_1 = _mm_srli_si128(i4_samp_8x16b_r3_0, 2);        // b1 b2 b3 b4 b5 b6 b7 0
 1507|  29.5k|    i4_samp_8x16b_r3_2 = _mm_srli_si128(i4_samp_8x16b_r3_0, 4);        // b2 b3 b4 b5 b6 b7 0 0
 1508|       |
 1509|  29.5k|    i4_samp_8x16b_r4_1 = _mm_srli_si128(i4_samp_8x16b_r4_0, 2);        // b1 b2 b3 b4 b5 b6 b7 0
 1510|  29.5k|    i4_samp_8x16b_r4_2 = _mm_srli_si128(i4_samp_8x16b_r4_0, 4);        // b2 b3 b4 b5 b6 b7 0 0
 1511|       |
 1512|  29.5k|    i4_samp_8x16b_r5_1 = _mm_srli_si128(i4_samp_8x16b_r5_0, 2);        // b1 b2 b3 b4 b5 b6 b7 0
 1513|  29.5k|    i4_samp_8x16b_r5_2 = _mm_srli_si128(i4_samp_8x16b_r5_0, 4);        // b2 b3 b4 b5 b6 b7 0 0
 1514|       |
 1515|  29.5k|    i4_samp_8x16b_r6_1 = _mm_srli_si128(i4_samp_8x16b_r6_0, 2);        // b1 b2 b3 b4 b5 b6 b7 0
 1516|  29.5k|    i4_samp_8x16b_r6_2 = _mm_srli_si128(i4_samp_8x16b_r6_0, 4);        // b2 b3 b4 b5 b6 b7 0 0
 1517|       |
 1518|  29.5k|    i4_samp_8x16b_r7_1 = _mm_srli_si128(i4_samp_8x16b_r7_0, 2);        // b1 b2 b3 b4 b5 b6 b7 0
 1519|  29.5k|    i4_samp_8x16b_r7_2 = _mm_srli_si128(i4_samp_8x16b_r7_0, 4);        // b2 b3 b4 b5 b6 b7 0 0
 1520|       |
 1521|  29.5k|    i4_samp_8x16b_r8_1 = _mm_srli_si128(i4_samp_8x16b_r8_0, 2);        // b1 b2 b3 b4 b5 b6 b7 0
 1522|  29.5k|    i4_samp_8x16b_r8_2 = _mm_srli_si128(i4_samp_8x16b_r8_0, 4);        // b2 b3 b4 b5 b6 b7 0 0
 1523|       |
 1524|  29.5k|    i4_samp_8x16b_r1_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r1_0,
 1525|  29.5k|                                            i4_samp_8x16b_r1_1);  // a0 a1  a1 a2  a2 a3  a3 a4
 1526|  29.5k|    i4_samp_8x16b_r2_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r2_0,
 1527|  29.5k|                                            i4_samp_8x16b_r2_1);  // b0 b1  b1 b2  b2 b3  b3 b4
 1528|  29.5k|    i4_samp_8x16b_r3_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r3_0, i4_samp_8x16b_r3_1);
 1529|  29.5k|    i4_samp_8x16b_r4_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r4_0, i4_samp_8x16b_r4_1);
 1530|  29.5k|    i4_samp_8x16b_r5_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r5_0, i4_samp_8x16b_r5_1);
 1531|  29.5k|    i4_samp_8x16b_r6_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r6_0, i4_samp_8x16b_r6_1);
 1532|  29.5k|    i4_samp_8x16b_r7_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r7_0, i4_samp_8x16b_r7_1);
 1533|  29.5k|    i4_samp_8x16b_r8_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r8_0, i4_samp_8x16b_r8_1);
 1534|       |
 1535|  29.5k|    i4_samp_8x16b_r1_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r1_1,
 1536|  29.5k|                                            i4_samp_8x16b_r1_2);  // a1 a2  a2 a3  a3 a4  a4 a5
 1537|  29.5k|    i4_samp_8x16b_r2_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r2_1,
 1538|  29.5k|                                            i4_samp_8x16b_r2_2);  // b1 b2  b2 b3  b3 b4  b4 b5
 1539|  29.5k|    i4_samp_8x16b_r3_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r3_1, i4_samp_8x16b_r3_2);
 1540|  29.5k|    i4_samp_8x16b_r4_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r4_1, i4_samp_8x16b_r4_2);
 1541|  29.5k|    i4_samp_8x16b_r5_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r5_1, i4_samp_8x16b_r5_2);
 1542|  29.5k|    i4_samp_8x16b_r6_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r6_1, i4_samp_8x16b_r6_2);
 1543|  29.5k|    i4_samp_8x16b_r7_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r7_1, i4_samp_8x16b_r7_2);
 1544|  29.5k|    i4_samp_8x16b_r8_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r8_1, i4_samp_8x16b_r8_2);
 1545|       |
 1546|       |    // a0c0+a1c1  a1c0+a2c1  a2c0+a3c1  a3c0+a4c1
 1547|  29.5k|    i4_res_4x32b_r1_0 = _mm_madd_epi16(i4_samp_8x16b_r1_0, coeff_c0_c1_8x16b);
 1548|       |    // b0c0+b1c1  b1c0+b2c1  b2c0+b3c1  b3c0+b4c1
 1549|  29.5k|    i4_res_4x32b_r2_0 = _mm_madd_epi16(i4_samp_8x16b_r2_0, coeff_c0_c1_8x16b);
 1550|  29.5k|    i4_res_4x32b_r3_0 = _mm_madd_epi16(i4_samp_8x16b_r3_0, coeff_c0_c1_8x16b);
 1551|  29.5k|    i4_res_4x32b_r4_0 = _mm_madd_epi16(i4_samp_8x16b_r4_0, coeff_c0_c1_8x16b);
 1552|  29.5k|    i4_res_4x32b_r5_0 = _mm_madd_epi16(i4_samp_8x16b_r5_0, coeff_c0_c1_8x16b);
 1553|  29.5k|    i4_res_4x32b_r6_0 = _mm_madd_epi16(i4_samp_8x16b_r6_0, coeff_c0_c1_8x16b);
 1554|  29.5k|    i4_res_4x32b_r7_0 = _mm_madd_epi16(i4_samp_8x16b_r7_0, coeff_c0_c1_8x16b);
 1555|  29.5k|    i4_res_4x32b_r8_0 = _mm_madd_epi16(i4_samp_8x16b_r8_0, coeff_c0_c1_8x16b);
 1556|       |
 1557|       |    // a1c2+a2c3  a2c2+a3c3  a3c2+a4c3  a4c2+a5c3
 1558|  29.5k|    i4_res_4x32b_r1_1 = _mm_madd_epi16(i4_samp_8x16b_r1_1, coeff_c2_c3_8x16b);
 1559|       |    // b1c2+b2c3  b2c2+b3c3  b3c2+b4c3  b4c2+b5c3
 1560|  29.5k|    i4_res_4x32b_r2_1 = _mm_madd_epi16(i4_samp_8x16b_r2_1, coeff_c2_c3_8x16b);
 1561|  29.5k|    i4_res_4x32b_r3_1 = _mm_madd_epi16(i4_samp_8x16b_r3_1, coeff_c2_c3_8x16b);
 1562|  29.5k|    i4_res_4x32b_r4_1 = _mm_madd_epi16(i4_samp_8x16b_r4_1, coeff_c2_c3_8x16b);
 1563|  29.5k|    i4_res_4x32b_r5_1 = _mm_madd_epi16(i4_samp_8x16b_r5_1, coeff_c2_c3_8x16b);
 1564|  29.5k|    i4_res_4x32b_r6_1 = _mm_madd_epi16(i4_samp_8x16b_r6_1, coeff_c2_c3_8x16b);
 1565|  29.5k|    i4_res_4x32b_r7_1 = _mm_madd_epi16(i4_samp_8x16b_r7_1, coeff_c2_c3_8x16b);
 1566|  29.5k|    i4_res_4x32b_r8_1 = _mm_madd_epi16(i4_samp_8x16b_r8_1, coeff_c2_c3_8x16b);
 1567|       |
 1568|  29.5k|    i4_res_4x32b_r1_0 = _mm_add_epi32(i4_res_4x32b_r1_0, res_32);
 1569|  29.5k|    i4_res_4x32b_r2_0 = _mm_add_epi32(i4_res_4x32b_r2_0, res_32);
 1570|  29.5k|    i4_res_4x32b_r3_0 = _mm_add_epi32(i4_res_4x32b_r3_0, res_32);
 1571|  29.5k|    i4_res_4x32b_r4_0 = _mm_add_epi32(i4_res_4x32b_r4_0, res_32);
 1572|  29.5k|    i4_res_4x32b_r5_0 = _mm_add_epi32(i4_res_4x32b_r5_0, res_32);
 1573|  29.5k|    i4_res_4x32b_r6_0 = _mm_add_epi32(i4_res_4x32b_r6_0, res_32);
 1574|  29.5k|    i4_res_4x32b_r7_0 = _mm_add_epi32(i4_res_4x32b_r7_0, res_32);
 1575|  29.5k|    i4_res_4x32b_r8_0 = _mm_add_epi32(i4_res_4x32b_r8_0, res_32);
 1576|       |
 1577|  29.5k|    i4_res_4x32b_r1_1 = _mm_add_epi32(i4_res_4x32b_r1_1, res_32);
 1578|  29.5k|    i4_res_4x32b_r2_1 = _mm_add_epi32(i4_res_4x32b_r2_1, res_32);
 1579|  29.5k|    i4_res_4x32b_r3_1 = _mm_add_epi32(i4_res_4x32b_r3_1, res_32);
 1580|  29.5k|    i4_res_4x32b_r4_1 = _mm_add_epi32(i4_res_4x32b_r4_1, res_32);
 1581|  29.5k|    i4_res_4x32b_r5_1 = _mm_add_epi32(i4_res_4x32b_r5_1, res_32);
 1582|  29.5k|    i4_res_4x32b_r6_1 = _mm_add_epi32(i4_res_4x32b_r6_1, res_32);
 1583|  29.5k|    i4_res_4x32b_r7_1 = _mm_add_epi32(i4_res_4x32b_r7_1, res_32);
 1584|  29.5k|    i4_res_4x32b_r8_1 = _mm_add_epi32(i4_res_4x32b_r8_1, res_32);
 1585|       |
 1586|  29.5k|    i4_res_4x32b_r1_0 = _mm_srai_epi32(i4_res_4x32b_r1_0, 6);
 1587|  29.5k|    i4_res_4x32b_r2_0 = _mm_srai_epi32(i4_res_4x32b_r2_0, 6);
 1588|  29.5k|    i4_res_4x32b_r3_0 = _mm_srai_epi32(i4_res_4x32b_r3_0, 6);
 1589|  29.5k|    i4_res_4x32b_r4_0 = _mm_srai_epi32(i4_res_4x32b_r4_0, 6);
 1590|  29.5k|    i4_res_4x32b_r5_0 = _mm_srai_epi32(i4_res_4x32b_r5_0, 6);
 1591|  29.5k|    i4_res_4x32b_r6_0 = _mm_srai_epi32(i4_res_4x32b_r6_0, 6);
 1592|  29.5k|    i4_res_4x32b_r7_0 = _mm_srai_epi32(i4_res_4x32b_r7_0, 6);
 1593|  29.5k|    i4_res_4x32b_r8_0 = _mm_srai_epi32(i4_res_4x32b_r8_0, 6);
 1594|       |
 1595|  29.5k|    i4_res_4x32b_r1_1 = _mm_srai_epi32(i4_res_4x32b_r1_1, 6);
 1596|  29.5k|    i4_res_4x32b_r2_1 = _mm_srai_epi32(i4_res_4x32b_r2_1, 6);
 1597|  29.5k|    i4_res_4x32b_r3_1 = _mm_srai_epi32(i4_res_4x32b_r3_1, 6);
 1598|  29.5k|    i4_res_4x32b_r4_1 = _mm_srai_epi32(i4_res_4x32b_r4_1, 6);
 1599|  29.5k|    i4_res_4x32b_r5_1 = _mm_srai_epi32(i4_res_4x32b_r5_1, 6);
 1600|  29.5k|    i4_res_4x32b_r6_1 = _mm_srai_epi32(i4_res_4x32b_r6_1, 6);
 1601|  29.5k|    i4_res_4x32b_r7_1 = _mm_srai_epi32(i4_res_4x32b_r7_1, 6);
 1602|  29.5k|    i4_res_4x32b_r8_1 = _mm_srai_epi32(i4_res_4x32b_r8_1, 6);
 1603|       |
 1604|  29.5k|    i4_res_final_8x16b_r12_0 = _mm_packs_epi32(i4_res_4x32b_r1_0, i4_res_4x32b_r2_0);
 1605|  29.5k|    i4_res_final_8x16b_r34_0 = _mm_packs_epi32(i4_res_4x32b_r3_0, i4_res_4x32b_r4_0);
 1606|  29.5k|    i4_res_final_8x16b_r56_0 = _mm_packs_epi32(i4_res_4x32b_r5_0, i4_res_4x32b_r6_0);
 1607|  29.5k|    i4_res_final_8x16b_r67_0 = _mm_packs_epi32(i4_res_4x32b_r7_0, i4_res_4x32b_r8_0);
 1608|       |
 1609|  29.5k|    i4_res_final_8x16b_r12_1 = _mm_packs_epi32(i4_res_4x32b_r1_1, i4_res_4x32b_r2_1);
 1610|  29.5k|    i4_res_final_8x16b_r34_1 = _mm_packs_epi32(i4_res_4x32b_r3_1, i4_res_4x32b_r4_1);
 1611|  29.5k|    i4_res_final_8x16b_r56_1 = _mm_packs_epi32(i4_res_4x32b_r5_1, i4_res_4x32b_r6_1);
 1612|  29.5k|    i4_res_final_8x16b_r67_1 = _mm_packs_epi32(i4_res_4x32b_r7_1, i4_res_4x32b_r8_1);
 1613|       |
 1614|  29.5k|    i4_res_final_8x16b_r1 = _mm_unpacklo_epi16(i4_res_final_8x16b_r12_0, i4_res_final_8x16b_r12_1);
 1615|  29.5k|    i4_res_final_8x16b_r2 = _mm_unpackhi_epi16(i4_res_final_8x16b_r12_0, i4_res_final_8x16b_r12_1);
 1616|  29.5k|    i4_res_final_8x16b_r3 = _mm_unpacklo_epi16(i4_res_final_8x16b_r34_0, i4_res_final_8x16b_r34_1);
 1617|  29.5k|    i4_res_final_8x16b_r4 = _mm_unpackhi_epi16(i4_res_final_8x16b_r34_0, i4_res_final_8x16b_r34_1);
 1618|  29.5k|    i4_res_final_8x16b_r5 = _mm_unpacklo_epi16(i4_res_final_8x16b_r56_0, i4_res_final_8x16b_r56_1);
 1619|  29.5k|    i4_res_final_8x16b_r6 = _mm_unpackhi_epi16(i4_res_final_8x16b_r56_0, i4_res_final_8x16b_r56_1);
 1620|  29.5k|    i4_res_final_8x16b_r7 = _mm_unpacklo_epi16(i4_res_final_8x16b_r67_0, i4_res_final_8x16b_r67_1);
 1621|  29.5k|    i4_res_final_8x16b_r8 = _mm_unpackhi_epi16(i4_res_final_8x16b_r67_0, i4_res_final_8x16b_r67_1);
 1622|       |
 1623|  29.5k|    chroma_mask = _mm_set1_epi16(0xFF00);
 1624|  29.5k|    chroma_mask2 = _mm_set1_epi16(0x00FF);
 1625|  29.5k|    out_16x8b_r1 = _mm_loadu_si128((__m128i *) (&pu1_out[0]));
 1626|  29.5k|    out_16x8b_r2 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride]));
 1627|  29.5k|    out_16x8b_r3 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride2]));
 1628|  29.5k|    out_16x8b_r4 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride2 + i4_dst_stride]));
 1629|  29.5k|    out_16x8b_r5 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride4]));
 1630|  29.5k|    out_16x8b_r6 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride4 + i4_dst_stride]));
 1631|  29.5k|    out_16x8b_r7 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride4 + i4_dst_stride2]));
 1632|  29.5k|    out_16x8b_r8 =
 1633|  29.5k|        _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride4 + i4_dst_stride2 + i4_dst_stride]));
 1634|       |
 1635|  29.5k|    out_16x8b_r1 = _mm_and_si128(out_16x8b_r1, chroma_mask);
 1636|  29.5k|    out_16x8b_r2 = _mm_and_si128(out_16x8b_r2, chroma_mask);
 1637|  29.5k|    out_16x8b_r3 = _mm_and_si128(out_16x8b_r3, chroma_mask);
 1638|  29.5k|    out_16x8b_r4 = _mm_and_si128(out_16x8b_r4, chroma_mask);
 1639|  29.5k|    out_16x8b_r5 = _mm_and_si128(out_16x8b_r5, chroma_mask);
 1640|  29.5k|    out_16x8b_r6 = _mm_and_si128(out_16x8b_r6, chroma_mask);
 1641|  29.5k|    out_16x8b_r7 = _mm_and_si128(out_16x8b_r7, chroma_mask);
 1642|  29.5k|    out_16x8b_r8 = _mm_and_si128(out_16x8b_r8, chroma_mask);
 1643|       |
 1644|  29.5k|    i4_res_final_8x16b_r1 = _mm_and_si128(i4_res_final_8x16b_r1, chroma_mask2);
 1645|  29.5k|    i4_res_final_8x16b_r2 = _mm_and_si128(i4_res_final_8x16b_r2, chroma_mask2);
 1646|  29.5k|    i4_res_final_8x16b_r3 = _mm_and_si128(i4_res_final_8x16b_r3, chroma_mask2);
 1647|  29.5k|    i4_res_final_8x16b_r4 = _mm_and_si128(i4_res_final_8x16b_r4, chroma_mask2);
 1648|  29.5k|    i4_res_final_8x16b_r5 = _mm_and_si128(i4_res_final_8x16b_r5, chroma_mask2);
 1649|  29.5k|    i4_res_final_8x16b_r6 = _mm_and_si128(i4_res_final_8x16b_r6, chroma_mask2);
 1650|  29.5k|    i4_res_final_8x16b_r7 = _mm_and_si128(i4_res_final_8x16b_r7, chroma_mask2);
 1651|  29.5k|    i4_res_final_8x16b_r8 = _mm_and_si128(i4_res_final_8x16b_r8, chroma_mask2);
 1652|       |
 1653|  29.5k|    out_16x8b_r1 = _mm_add_epi8(i4_res_final_8x16b_r1, out_16x8b_r1);
 1654|  29.5k|    out_16x8b_r2 = _mm_add_epi8(i4_res_final_8x16b_r2, out_16x8b_r2);
 1655|  29.5k|    out_16x8b_r3 = _mm_add_epi8(i4_res_final_8x16b_r3, out_16x8b_r3);
 1656|  29.5k|    out_16x8b_r4 = _mm_add_epi8(i4_res_final_8x16b_r4, out_16x8b_r4);
 1657|  29.5k|    out_16x8b_r5 = _mm_add_epi8(i4_res_final_8x16b_r5, out_16x8b_r5);
 1658|  29.5k|    out_16x8b_r6 = _mm_add_epi8(i4_res_final_8x16b_r6, out_16x8b_r6);
 1659|  29.5k|    out_16x8b_r7 = _mm_add_epi8(i4_res_final_8x16b_r7, out_16x8b_r7);
 1660|  29.5k|    out_16x8b_r8 = _mm_add_epi8(i4_res_final_8x16b_r8, out_16x8b_r8);
 1661|       |
 1662|  29.5k|    _mm_storeu_si128((__m128i *) pu1_out, out_16x8b_r1);
 1663|  29.5k|    _mm_storeu_si128((__m128i *) (pu1_out + i4_dst_stride), out_16x8b_r2);
 1664|  29.5k|    _mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride << 1)), out_16x8b_r3);
 1665|  29.5k|    _mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 3)), out_16x8b_r4);
 1666|  29.5k|    _mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride << 2)), out_16x8b_r5);
 1667|  29.5k|    _mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 5)), out_16x8b_r6);
 1668|  29.5k|    _mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 6)), out_16x8b_r7);
 1669|  29.5k|    _mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 7)), out_16x8b_r8);
 1670|       |    /* End of loop over x */
 1671|  29.5k|} /* isvcd_horz_interpol_chroma_dyadic_1_sse42 */
isvcd_horz_interpol_chroma_dyadic_2_sse42:
 1704|  21.0k|{
 1705|  21.0k|    WORD32 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3;
 1706|  21.0k|    WORD32 i4_dst_stride, i4_dst_stride2, i4_dst_stride4;
 1707|  21.0k|    UWORD8 *pu1_out;
 1708|  21.0k|    WORD16 *pi2_tmp;
 1709|       |
 1710|  21.0k|    __m128i i4_samp_8x16b_r1_0, i4_samp_8x16b_r1_1;
 1711|  21.0k|    __m128i i4_samp_8x16b_r2_0, i4_samp_8x16b_r2_1;
 1712|  21.0k|    __m128i i4_samp_8x16b_r3_0, i4_samp_8x16b_r3_1;
 1713|  21.0k|    __m128i i4_samp_8x16b_r4_0, i4_samp_8x16b_r4_1;
 1714|  21.0k|    __m128i i4_samp_8x16b_r5_0, i4_samp_8x16b_r5_1;
 1715|  21.0k|    __m128i i4_samp_8x16b_r6_0, i4_samp_8x16b_r6_1;
 1716|  21.0k|    __m128i i4_samp_8x16b_r7_0, i4_samp_8x16b_r7_1;
 1717|  21.0k|    __m128i i4_samp_8x16b_r8_0, i4_samp_8x16b_r8_1;
 1718|       |
 1719|  21.0k|    __m128i i4_res_4x32b_r1_0, i4_res_4x32b_r1_1;
 1720|  21.0k|    __m128i i4_res_4x32b_r2_0, i4_res_4x32b_r2_1;
 1721|  21.0k|    __m128i i4_res_4x32b_r3_0, i4_res_4x32b_r3_1;
 1722|  21.0k|    __m128i i4_res_4x32b_r4_0, i4_res_4x32b_r4_1;
 1723|  21.0k|    __m128i i4_res_4x32b_r5_0, i4_res_4x32b_r5_1;
 1724|  21.0k|    __m128i i4_res_4x32b_r6_0, i4_res_4x32b_r6_1;
 1725|  21.0k|    __m128i i4_res_4x32b_r7_0, i4_res_4x32b_r7_1;
 1726|  21.0k|    __m128i i4_res_4x32b_r8_0, i4_res_4x32b_r8_1;
 1727|       |
 1728|  21.0k|    __m128i i4_res_final_8x16b_r1;
 1729|  21.0k|    __m128i i4_res_final_8x16b_r2;
 1730|  21.0k|    __m128i i4_res_final_8x16b_r3;
 1731|  21.0k|    __m128i i4_res_final_8x16b_r4;
 1732|  21.0k|    __m128i i4_res_final_8x16b_r5;
 1733|  21.0k|    __m128i i4_res_final_8x16b_r6;
 1734|  21.0k|    __m128i i4_res_final_8x16b_r7;
 1735|  21.0k|    __m128i i4_res_final_8x16b_r8;
 1736|       |
 1737|  21.0k|    __m128i out_16x8b_r1;
 1738|  21.0k|    __m128i out_16x8b_r2;
 1739|  21.0k|    __m128i out_16x8b_r3;
 1740|  21.0k|    __m128i out_16x8b_r4;
 1741|  21.0k|    __m128i out_16x8b_r5;
 1742|  21.0k|    __m128i out_16x8b_r6;
 1743|  21.0k|    __m128i out_16x8b_r7;
 1744|  21.0k|    __m128i out_16x8b_r8;
 1745|  21.0k|    __m128i i4_res_final_8x16b_r12_0, i4_res_final_8x16b_r12_1;
 1746|  21.0k|    __m128i i4_res_final_8x16b_r34_0, i4_res_final_8x16b_r34_1;
 1747|  21.0k|    __m128i i4_res_final_8x16b_r56_0, i4_res_final_8x16b_r56_1;
 1748|  21.0k|    __m128i i4_res_final_8x16b_r67_0, i4_res_final_8x16b_r67_1;
 1749|  21.0k|    __m128i chroma_mask, chroma_mask2;
 1750|  21.0k|    __m128i coeff_c0_c1_8x16b, coeff_c2_c3_8x16b, res_32;
 1751|       |
 1752|  21.0k|    i4_coeff_0 = 8 - i4_phase_0;
 1753|  21.0k|    i4_coeff_1 = i4_phase_0;
 1754|  21.0k|    i4_coeff_2 = 8 - i4_phase_1;
 1755|  21.0k|    i4_coeff_3 = i4_phase_1;
 1756|  21.0k|    coeff_c0_c1_8x16b = _mm_set_epi16(i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0, i4_coeff_1,
 1757|  21.0k|                                      i4_coeff_0, i4_coeff_1, i4_coeff_0);
 1758|  21.0k|    coeff_c2_c3_8x16b = _mm_set_epi16(i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2, i4_coeff_3,
 1759|  21.0k|                                      i4_coeff_2, i4_coeff_3, i4_coeff_2);
 1760|  21.0k|    res_32 = _mm_set1_epi32(32);
 1761|  21.0k|    pu1_out = pu1_out_buf;
 1762|  21.0k|    pi2_tmp = pi2_tmp_filt_buf + 1;
 1763|  21.0k|    i4_dst_stride = i4_out_stride;
 1764|       |
 1765|  21.0k|    i4_dst_stride2 = i4_dst_stride << 1;
 1766|  21.0k|    i4_dst_stride4 = i4_dst_stride << 2;
 1767|       |
 1768|       |    /* Horizontal interpolation */
 1769|       |    /* x = 0, x_phase = phase_0 */
 1770|  21.0k|    i4_samp_8x16b_r1_0 = _mm_loadu_si128((__m128i *) pi2_tmp);         // a0 a1 a2 a3 a4 a5 a6 a7
 1771|  21.0k|    i4_samp_8x16b_r2_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 6));   // b0 b1 b2 b3 b4 b5 b6 b7
 1772|  21.0k|    i4_samp_8x16b_r3_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 12));  // b0 b1 b2 b3 b4 b5 b6 b7
 1773|  21.0k|    i4_samp_8x16b_r4_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 18));  // b0 b1 b2 b3 b4 b5 b6 b7
 1774|  21.0k|    i4_samp_8x16b_r5_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 24));  // b0 b1 b2 b3 b4 b5 b6 b7
 1775|  21.0k|    i4_samp_8x16b_r6_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 30));  // b0 b1 b2 b3 b4 b5 b6 b7
 1776|  21.0k|    i4_samp_8x16b_r7_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 36));  // b0 b1 b2 b3 b4 b5 b6 b7
 1777|  21.0k|    i4_samp_8x16b_r8_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 42));  // b0 b1 b2 b3 b4 b5 b6 b7
 1778|       |
 1779|  21.0k|    i4_samp_8x16b_r1_1 = _mm_srli_si128(i4_samp_8x16b_r1_0, 2);        // a1 a2 a3 a4 a5 a6 a7 0
 1780|  21.0k|    i4_samp_8x16b_r2_1 = _mm_srli_si128(i4_samp_8x16b_r2_0, 2);        // b1 b2 b3 b4 b5 b6 b7 0
 1781|  21.0k|    i4_samp_8x16b_r3_1 = _mm_srli_si128(i4_samp_8x16b_r3_0, 2);        // b1 b2 b3 b4 b5 b6 b7 0
 1782|  21.0k|    i4_samp_8x16b_r4_1 = _mm_srli_si128(i4_samp_8x16b_r4_0, 2);        // b1 b2 b3 b4 b5 b6 b7 0
 1783|  21.0k|    i4_samp_8x16b_r5_1 = _mm_srli_si128(i4_samp_8x16b_r5_0, 2);        // b1 b2 b3 b4 b5 b6 b7 0
 1784|  21.0k|    i4_samp_8x16b_r6_1 = _mm_srli_si128(i4_samp_8x16b_r6_0, 2);        // b1 b2 b3 b4 b5 b6 b7 0
 1785|  21.0k|    i4_samp_8x16b_r7_1 = _mm_srli_si128(i4_samp_8x16b_r7_0, 2);        // b1 b2 b3 b4 b5 b6 b7 0
 1786|  21.0k|    i4_samp_8x16b_r8_1 = _mm_srli_si128(i4_samp_8x16b_r8_0, 2);        // b1 b2 b3 b4 b5 b6 b7 0
 1787|       |
 1788|  21.0k|    i4_samp_8x16b_r1_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r1_0,
 1789|  21.0k|                                            i4_samp_8x16b_r1_1);  // a0 a1  a1 a2  a2 a3  a3 a4
 1790|  21.0k|    i4_samp_8x16b_r2_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r2_0,
 1791|  21.0k|                                            i4_samp_8x16b_r2_1);  // b0 b1  b1 b2  b2 b3  b3 b4
 1792|  21.0k|    i4_samp_8x16b_r3_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r3_0, i4_samp_8x16b_r3_1);
 1793|  21.0k|    i4_samp_8x16b_r4_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r4_0, i4_samp_8x16b_r4_1);
 1794|  21.0k|    i4_samp_8x16b_r5_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r5_0, i4_samp_8x16b_r5_1);
 1795|  21.0k|    i4_samp_8x16b_r6_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r6_0, i4_samp_8x16b_r6_1);
 1796|  21.0k|    i4_samp_8x16b_r7_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r7_0, i4_samp_8x16b_r7_1);
 1797|  21.0k|    i4_samp_8x16b_r8_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r8_0, i4_samp_8x16b_r8_1);
 1798|       |
 1799|       |    // a0c0+a1c1  a1c0+a2c1  a2c0+a3c1  a3c0+a4c1
 1800|  21.0k|    i4_res_4x32b_r1_0 = _mm_madd_epi16(i4_samp_8x16b_r1_0, coeff_c0_c1_8x16b);
 1801|       |    // b0c0+b1c1  b1c0+b2c1  b2c0+b3c1  b3c0+b4c1
 1802|  21.0k|    i4_res_4x32b_r2_0 = _mm_madd_epi16(i4_samp_8x16b_r2_0, coeff_c0_c1_8x16b);
 1803|  21.0k|    i4_res_4x32b_r3_0 = _mm_madd_epi16(i4_samp_8x16b_r3_0, coeff_c0_c1_8x16b);
 1804|  21.0k|    i4_res_4x32b_r4_0 = _mm_madd_epi16(i4_samp_8x16b_r4_0, coeff_c0_c1_8x16b);
 1805|  21.0k|    i4_res_4x32b_r5_0 = _mm_madd_epi16(i4_samp_8x16b_r5_0, coeff_c0_c1_8x16b);
 1806|  21.0k|    i4_res_4x32b_r6_0 = _mm_madd_epi16(i4_samp_8x16b_r6_0, coeff_c0_c1_8x16b);
 1807|  21.0k|    i4_res_4x32b_r7_0 = _mm_madd_epi16(i4_samp_8x16b_r7_0, coeff_c0_c1_8x16b);
 1808|  21.0k|    i4_res_4x32b_r8_0 = _mm_madd_epi16(i4_samp_8x16b_r8_0, coeff_c0_c1_8x16b);
 1809|       |
 1810|       |    // a1c2+a2c3  a2c2+a3c3  a3c2+a4c3  a4c2+a5c3
 1811|  21.0k|    i4_res_4x32b_r1_1 = _mm_madd_epi16(i4_samp_8x16b_r1_0, coeff_c2_c3_8x16b);
 1812|       |    // b1c2+b2c3  b2c2+b3c3  b3c2+b4c3  b4c2+b5c3
 1813|  21.0k|    i4_res_4x32b_r2_1 = _mm_madd_epi16(i4_samp_8x16b_r2_0, coeff_c2_c3_8x16b);
 1814|  21.0k|    i4_res_4x32b_r3_1 = _mm_madd_epi16(i4_samp_8x16b_r3_0, coeff_c2_c3_8x16b);
 1815|  21.0k|    i4_res_4x32b_r4_1 = _mm_madd_epi16(i4_samp_8x16b_r4_0, coeff_c2_c3_8x16b);
 1816|  21.0k|    i4_res_4x32b_r5_1 = _mm_madd_epi16(i4_samp_8x16b_r5_0, coeff_c2_c3_8x16b);
 1817|  21.0k|    i4_res_4x32b_r6_1 = _mm_madd_epi16(i4_samp_8x16b_r6_0, coeff_c2_c3_8x16b);
 1818|  21.0k|    i4_res_4x32b_r7_1 = _mm_madd_epi16(i4_samp_8x16b_r7_0, coeff_c2_c3_8x16b);
 1819|  21.0k|    i4_res_4x32b_r8_1 = _mm_madd_epi16(i4_samp_8x16b_r8_0, coeff_c2_c3_8x16b);
 1820|       |
 1821|  21.0k|    i4_res_4x32b_r1_0 = _mm_add_epi32(i4_res_4x32b_r1_0, res_32);
 1822|  21.0k|    i4_res_4x32b_r2_0 = _mm_add_epi32(i4_res_4x32b_r2_0, res_32);
 1823|  21.0k|    i4_res_4x32b_r3_0 = _mm_add_epi32(i4_res_4x32b_r3_0, res_32);
 1824|  21.0k|    i4_res_4x32b_r4_0 = _mm_add_epi32(i4_res_4x32b_r4_0, res_32);
 1825|  21.0k|    i4_res_4x32b_r5_0 = _mm_add_epi32(i4_res_4x32b_r5_0, res_32);
 1826|  21.0k|    i4_res_4x32b_r6_0 = _mm_add_epi32(i4_res_4x32b_r6_0, res_32);
 1827|  21.0k|    i4_res_4x32b_r7_0 = _mm_add_epi32(i4_res_4x32b_r7_0, res_32);
 1828|  21.0k|    i4_res_4x32b_r8_0 = _mm_add_epi32(i4_res_4x32b_r8_0, res_32);
 1829|       |
 1830|  21.0k|    i4_res_4x32b_r1_1 = _mm_add_epi32(i4_res_4x32b_r1_1, res_32);
 1831|  21.0k|    i4_res_4x32b_r2_1 = _mm_add_epi32(i4_res_4x32b_r2_1, res_32);
 1832|  21.0k|    i4_res_4x32b_r3_1 = _mm_add_epi32(i4_res_4x32b_r3_1, res_32);
 1833|  21.0k|    i4_res_4x32b_r4_1 = _mm_add_epi32(i4_res_4x32b_r4_1, res_32);
 1834|  21.0k|    i4_res_4x32b_r5_1 = _mm_add_epi32(i4_res_4x32b_r5_1, res_32);
 1835|  21.0k|    i4_res_4x32b_r6_1 = _mm_add_epi32(i4_res_4x32b_r6_1, res_32);
 1836|  21.0k|    i4_res_4x32b_r7_1 = _mm_add_epi32(i4_res_4x32b_r7_1, res_32);
 1837|  21.0k|    i4_res_4x32b_r8_1 = _mm_add_epi32(i4_res_4x32b_r8_1, res_32);
 1838|       |
 1839|  21.0k|    i4_res_4x32b_r1_0 = _mm_srai_epi32(i4_res_4x32b_r1_0, 6);
 1840|  21.0k|    i4_res_4x32b_r2_0 = _mm_srai_epi32(i4_res_4x32b_r2_0, 6);
 1841|  21.0k|    i4_res_4x32b_r3_0 = _mm_srai_epi32(i4_res_4x32b_r3_0, 6);
 1842|  21.0k|    i4_res_4x32b_r4_0 = _mm_srai_epi32(i4_res_4x32b_r4_0, 6);
 1843|  21.0k|    i4_res_4x32b_r5_0 = _mm_srai_epi32(i4_res_4x32b_r5_0, 6);
 1844|  21.0k|    i4_res_4x32b_r6_0 = _mm_srai_epi32(i4_res_4x32b_r6_0, 6);
 1845|  21.0k|    i4_res_4x32b_r7_0 = _mm_srai_epi32(i4_res_4x32b_r7_0, 6);
 1846|  21.0k|    i4_res_4x32b_r8_0 = _mm_srai_epi32(i4_res_4x32b_r8_0, 6);
 1847|       |
 1848|  21.0k|    i4_res_4x32b_r1_1 = _mm_srai_epi32(i4_res_4x32b_r1_1, 6);
 1849|  21.0k|    i4_res_4x32b_r2_1 = _mm_srai_epi32(i4_res_4x32b_r2_1, 6);
 1850|  21.0k|    i4_res_4x32b_r3_1 = _mm_srai_epi32(i4_res_4x32b_r3_1, 6);
 1851|  21.0k|    i4_res_4x32b_r4_1 = _mm_srai_epi32(i4_res_4x32b_r4_1, 6);
 1852|  21.0k|    i4_res_4x32b_r5_1 = _mm_srai_epi32(i4_res_4x32b_r5_1, 6);
 1853|  21.0k|    i4_res_4x32b_r6_1 = _mm_srai_epi32(i4_res_4x32b_r6_1, 6);
 1854|  21.0k|    i4_res_4x32b_r7_1 = _mm_srai_epi32(i4_res_4x32b_r7_1, 6);
 1855|  21.0k|    i4_res_4x32b_r8_1 = _mm_srai_epi32(i4_res_4x32b_r8_1, 6);
 1856|       |
 1857|  21.0k|    i4_res_final_8x16b_r12_0 = _mm_packs_epi32(i4_res_4x32b_r1_0, i4_res_4x32b_r2_0);
 1858|  21.0k|    i4_res_final_8x16b_r34_0 = _mm_packs_epi32(i4_res_4x32b_r3_0, i4_res_4x32b_r4_0);
 1859|  21.0k|    i4_res_final_8x16b_r56_0 = _mm_packs_epi32(i4_res_4x32b_r5_0, i4_res_4x32b_r6_0);
 1860|  21.0k|    i4_res_final_8x16b_r67_0 = _mm_packs_epi32(i4_res_4x32b_r7_0, i4_res_4x32b_r8_0);
 1861|       |
 1862|  21.0k|    i4_res_final_8x16b_r12_1 = _mm_packs_epi32(i4_res_4x32b_r1_1, i4_res_4x32b_r2_1);
 1863|  21.0k|    i4_res_final_8x16b_r34_1 = _mm_packs_epi32(i4_res_4x32b_r3_1, i4_res_4x32b_r4_1);
 1864|  21.0k|    i4_res_final_8x16b_r56_1 = _mm_packs_epi32(i4_res_4x32b_r5_1, i4_res_4x32b_r6_1);
 1865|  21.0k|    i4_res_final_8x16b_r67_1 = _mm_packs_epi32(i4_res_4x32b_r7_1, i4_res_4x32b_r8_1);
 1866|       |
 1867|  21.0k|    i4_res_final_8x16b_r1 = _mm_unpacklo_epi16(i4_res_final_8x16b_r12_0, i4_res_final_8x16b_r12_1);
 1868|  21.0k|    i4_res_final_8x16b_r2 = _mm_unpackhi_epi16(i4_res_final_8x16b_r12_0, i4_res_final_8x16b_r12_1);
 1869|  21.0k|    i4_res_final_8x16b_r3 = _mm_unpacklo_epi16(i4_res_final_8x16b_r34_0, i4_res_final_8x16b_r34_1);
 1870|  21.0k|    i4_res_final_8x16b_r4 = _mm_unpackhi_epi16(i4_res_final_8x16b_r34_0, i4_res_final_8x16b_r34_1);
 1871|  21.0k|    i4_res_final_8x16b_r5 = _mm_unpacklo_epi16(i4_res_final_8x16b_r56_0, i4_res_final_8x16b_r56_1);
 1872|  21.0k|    i4_res_final_8x16b_r6 = _mm_unpackhi_epi16(i4_res_final_8x16b_r56_0, i4_res_final_8x16b_r56_1);
 1873|  21.0k|    i4_res_final_8x16b_r7 = _mm_unpacklo_epi16(i4_res_final_8x16b_r67_0, i4_res_final_8x16b_r67_1);
 1874|  21.0k|    i4_res_final_8x16b_r8 = _mm_unpackhi_epi16(i4_res_final_8x16b_r67_0, i4_res_final_8x16b_r67_1);
 1875|       |
 1876|  21.0k|    chroma_mask = _mm_set1_epi16(0xFF00);
 1877|  21.0k|    chroma_mask2 = _mm_set1_epi16(0x00FF);
 1878|  21.0k|    out_16x8b_r1 = _mm_loadu_si128((__m128i *) (&pu1_out[0]));
 1879|  21.0k|    out_16x8b_r2 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride]));
 1880|  21.0k|    out_16x8b_r3 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride2]));
 1881|  21.0k|    out_16x8b_r4 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride2 + i4_dst_stride]));
 1882|  21.0k|    out_16x8b_r5 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride4]));
 1883|  21.0k|    out_16x8b_r6 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride4 + i4_dst_stride]));
 1884|  21.0k|    out_16x8b_r7 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride4 + i4_dst_stride2]));
 1885|  21.0k|    out_16x8b_r8 =
 1886|  21.0k|        _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride4 + i4_dst_stride2 + i4_dst_stride]));
 1887|       |
 1888|  21.0k|    out_16x8b_r1 = _mm_and_si128(out_16x8b_r1, chroma_mask);
 1889|  21.0k|    out_16x8b_r2 = _mm_and_si128(out_16x8b_r2, chroma_mask);
 1890|  21.0k|    out_16x8b_r3 = _mm_and_si128(out_16x8b_r3, chroma_mask);
 1891|  21.0k|    out_16x8b_r4 = _mm_and_si128(out_16x8b_r4, chroma_mask);
 1892|  21.0k|    out_16x8b_r5 = _mm_and_si128(out_16x8b_r5, chroma_mask);
 1893|  21.0k|    out_16x8b_r6 = _mm_and_si128(out_16x8b_r6, chroma_mask);
 1894|  21.0k|    out_16x8b_r7 = _mm_and_si128(out_16x8b_r7, chroma_mask);
 1895|  21.0k|    out_16x8b_r8 = _mm_and_si128(out_16x8b_r8, chroma_mask);
 1896|       |
 1897|  21.0k|    i4_res_final_8x16b_r1 = _mm_and_si128(i4_res_final_8x16b_r1, chroma_mask2);
 1898|  21.0k|    i4_res_final_8x16b_r2 = _mm_and_si128(i4_res_final_8x16b_r2, chroma_mask2);
 1899|  21.0k|    i4_res_final_8x16b_r3 = _mm_and_si128(i4_res_final_8x16b_r3, chroma_mask2);
 1900|  21.0k|    i4_res_final_8x16b_r4 = _mm_and_si128(i4_res_final_8x16b_r4, chroma_mask2);
 1901|  21.0k|    i4_res_final_8x16b_r5 = _mm_and_si128(i4_res_final_8x16b_r5, chroma_mask2);
 1902|  21.0k|    i4_res_final_8x16b_r6 = _mm_and_si128(i4_res_final_8x16b_r6, chroma_mask2);
 1903|  21.0k|    i4_res_final_8x16b_r7 = _mm_and_si128(i4_res_final_8x16b_r7, chroma_mask2);
 1904|  21.0k|    i4_res_final_8x16b_r8 = _mm_and_si128(i4_res_final_8x16b_r8, chroma_mask2);
 1905|       |
 1906|  21.0k|    out_16x8b_r1 = _mm_add_epi8(i4_res_final_8x16b_r1, out_16x8b_r1);
 1907|  21.0k|    out_16x8b_r2 = _mm_add_epi8(i4_res_final_8x16b_r2, out_16x8b_r2);
 1908|  21.0k|    out_16x8b_r3 = _mm_add_epi8(i4_res_final_8x16b_r3, out_16x8b_r3);
 1909|  21.0k|    out_16x8b_r4 = _mm_add_epi8(i4_res_final_8x16b_r4, out_16x8b_r4);
 1910|  21.0k|    out_16x8b_r5 = _mm_add_epi8(i4_res_final_8x16b_r5, out_16x8b_r5);
 1911|  21.0k|    out_16x8b_r6 = _mm_add_epi8(i4_res_final_8x16b_r6, out_16x8b_r6);
 1912|  21.0k|    out_16x8b_r7 = _mm_add_epi8(i4_res_final_8x16b_r7, out_16x8b_r7);
 1913|  21.0k|    out_16x8b_r8 = _mm_add_epi8(i4_res_final_8x16b_r8, out_16x8b_r8);
 1914|       |
 1915|  21.0k|    _mm_storeu_si128((__m128i *) pu1_out, out_16x8b_r1);
 1916|  21.0k|    _mm_storeu_si128((__m128i *) (pu1_out + i4_dst_stride), out_16x8b_r2);
 1917|  21.0k|    _mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride << 1)), out_16x8b_r3);
 1918|  21.0k|    _mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 3)), out_16x8b_r4);
 1919|  21.0k|    _mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride << 2)), out_16x8b_r5);
 1920|  21.0k|    _mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 5)), out_16x8b_r6);
 1921|  21.0k|    _mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 6)), out_16x8b_r7);
 1922|  21.0k|    _mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 7)), out_16x8b_r8);
 1923|       |
 1924|       |    /* End of loop over x */
 1925|  21.0k|}

isvcd_iquant_itrans_residual_recon_4x4_sse42:
   82|  23.7k|{
   83|  23.7k|    WORD32 i4_nnz = 0;
   84|  23.7k|    WORD32 row_0, row_1, row_2, row_3;
   85|  23.7k|    UWORD32 *pu4_out = (UWORD32 *) pu1_out;
   86|  23.7k|    __m128i src_r0_r1, src_r2_r3;
   87|  23.7k|    __m128i src_r0, src_r1, src_r2, src_r3;
   88|  23.7k|    __m128i scalemat_r0_r1, scalemat_r2_r3;
   89|  23.7k|    __m128i pred_r0, pred_r1, pred_r2, pred_r3;
   90|  23.7k|    __m128i rsd_r0, rsd_r1, rsd_r2, rsd_r3;
   91|  23.7k|    __m128i sign_reg, dequant_r0_r1, dequant_r2_r3;
   92|  23.7k|    __m128i zero_8x16b = _mm_setzero_si128();  // all bits reset to zero
   93|  23.7k|    __m128i temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
   94|  23.7k|    __m128i resq_r0, resq_r1, resq_r2, resq_r3;
   95|  23.7k|    __m128i add_rshift = _mm_set1_epi32((u4_qp_div_6 < 4) ? (1 << (3 - u4_qp_div_6)) : 0);
  ------------------
  |  Branch (95:41): [True: 12.5k, False: 11.1k]
  ------------------
   96|  23.7k|    __m128i value_32 = _mm_set1_epi32(32);
   97|  23.7k|    __m128i dupmax_8x16b = _mm_set1_epi16(RSD_MAX);
  ------------------
  |  |  772|  23.7k|#define RSD_MAX 255
  ------------------
   98|  23.7k|    __m128i dupmin_8x16b = _mm_set1_epi16(RSD_MIN);
  ------------------
  |  |  773|  23.7k|#define RSD_MIN -255
  ------------------
   99|       |
  100|  23.7k|    UNUSED(pi2_tmp);
  ------------------
  |  |   45|  23.7k|#define UNUSED(x) ((void)(x))
  ------------------
  101|       |
  102|       |    /*************************************************************/
  103|       |    /* Dequantization of coefficients. Will be replaced by SIMD  */
  104|       |    /* operations on platform                                    */
  105|       |    /*************************************************************/
  106|       |    // a00 a01 a02 a03 a10 a11 a12 a13 -- the source matrix 0th,1st row
  107|  23.7k|    src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src));
  108|       |    // a20 a21 a22 a23 a30 a31 a32 a33 --the source matrix 2nd,3rd row
  109|  23.7k|    src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8));
  110|       |    // b00 b01 b02 b03 b10 b11 b12 b13 -- the scaling matrix 0th,1st row
  111|  23.7k|    scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat));
  112|       |    // b20 b21 b22 b23 b30 b31 b32 b33 -- the scaling matrix 2nd,3rd row
  113|  23.7k|    scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 8));
  114|       |    // q00 q01 q02 q03 q10 q11 q12 q13 -- all 16 bits
  115|  23.7k|    dequant_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat));
  116|       |    // q20 q21 q22 q23 q30 q31 q32 q33 -- all 16 bits
  117|  23.7k|    dequant_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat + 8));
  118|       |    // b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 b12*q12 b13*q13 -- 16 bit result
  119|  23.7k|    temp0 = _mm_mullo_epi16(scalemat_r0_r1, dequant_r0_r1);
  120|       |    // b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 b12*q12 b13*q13 -- 16 bit result
  121|  23.7k|    temp1 = _mm_mullo_epi16(scalemat_r2_r3, dequant_r2_r3);
  122|       |    // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long
  123|  23.7k|    temp4 = _mm_unpacklo_epi16(temp0, zero_8x16b);
  124|       |    // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long
  125|  23.7k|    temp5 = _mm_unpackhi_epi16(temp0, zero_8x16b);
  126|       |    // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long
  127|  23.7k|    temp6 = _mm_unpacklo_epi16(temp1, zero_8x16b);
  128|       |    // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long
  129|  23.7k|    temp7 = _mm_unpackhi_epi16(temp1, zero_8x16b);
  130|  23.7k|    src_r0 = _mm_unpacklo_epi16(src_r0_r1, zero_8x16b);  // a00 0 a01 0 a02 0 a03 0 -- 16 bit long
  131|  23.7k|    src_r1 = _mm_unpackhi_epi16(src_r0_r1, zero_8x16b);  // a10 0 a11 0 a12 0 a13 0 -- 16 bit long
  132|  23.7k|    src_r2 = _mm_unpacklo_epi16(src_r2_r3, zero_8x16b);  // a20 0 a21 0 a22 0 a23 0 -- 16 bit long
  133|  23.7k|    src_r3 = _mm_unpackhi_epi16(src_r2_r3, zero_8x16b);  // a30 0 a31 0 a32 0 a33 0 -- 16 bit long
  134|       |    // a00*b00*q00 a10*b10*q10 a20*b20*q20 a30*b30 q30 -- 32 bits long
  135|  23.7k|    temp4 = _mm_madd_epi16(src_r0, temp4);
  136|  23.7k|    temp5 = _mm_madd_epi16(src_r1, temp5);
  137|  23.7k|    temp6 = _mm_madd_epi16(src_r2, temp6);
  138|  23.7k|    temp7 = _mm_madd_epi16(src_r3, temp7);
  139|       |
  140|  23.7k|    if(u4_qp_div_6 >= 4)
  ------------------
  |  Branch (140:8): [True: 11.1k, False: 12.5k]
  ------------------
  141|  11.1k|    {
  142|  11.1k|        resq_r0 = _mm_slli_epi32(temp4, u4_qp_div_6 - 4);
  143|  11.1k|        resq_r1 = _mm_slli_epi32(temp5, u4_qp_div_6 - 4);
  144|  11.1k|        resq_r2 = _mm_slli_epi32(temp6, u4_qp_div_6 - 4);
  145|  11.1k|        resq_r3 = _mm_slli_epi32(temp7, u4_qp_div_6 - 4);
  146|  11.1k|    }
  147|  12.5k|    else
  148|  12.5k|    {
  149|  12.5k|        temp4 = _mm_add_epi32(temp4, add_rshift);
  150|  12.5k|        temp5 = _mm_add_epi32(temp5, add_rshift);
  151|  12.5k|        temp6 = _mm_add_epi32(temp6, add_rshift);
  152|  12.5k|        temp7 = _mm_add_epi32(temp7, add_rshift);
  153|  12.5k|        resq_r0 = _mm_srai_epi32(temp4, 4 - u4_qp_div_6);
  154|  12.5k|        resq_r1 = _mm_srai_epi32(temp5, 4 - u4_qp_div_6);
  155|  12.5k|        resq_r2 = _mm_srai_epi32(temp6, 4 - u4_qp_div_6);
  156|  12.5k|        resq_r3 = _mm_srai_epi32(temp7, 4 - u4_qp_div_6);
  157|  12.5k|    }
  158|       |
  159|  23.7k|    if(iq_start_idx == 1) resq_r0 = _mm_insert_epi32(resq_r0, (WORD32) pi2_dc_ld_addr[0], 0);
  ------------------
  |  Branch (159:8): [True: 0, False: 23.7k]
  ------------------
  160|       |    /* Perform Inverse transform */
  161|       |    /*-------------------------------------------------------------*/
  162|       |    /* IDCT [ Horizontal transformation ]                          */
  163|       |    /*-------------------------------------------------------------*/
  164|       |    // Matrix transpose
  165|       |    /*
  166|       |     *  a0 a1 a2 a3
  167|       |     *  b0 b1 b2 b3
  168|       |     *  c0 c1 c2 c3
  169|       |     *  d0 d1 d2 d3
  170|       |     */
  171|  23.7k|    temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1);  // a0 b0 a1 b1
  172|  23.7k|    temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3);  // c0 d0 c1 d1
  173|  23.7k|    temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1);  // a2 b2 a3 b3
  174|  23.7k|    temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3);  // c2 d2 c3 d3
  175|  23.7k|    resq_r0 = _mm_unpacklo_epi64(temp1, temp3);    // a0 b0 c0 d0
  176|  23.7k|    resq_r1 = _mm_unpackhi_epi64(temp1, temp3);    // a1 b1 c1 d1
  177|  23.7k|    resq_r2 = _mm_unpacklo_epi64(temp2, temp4);    // a2 b2 c2 d2
  178|  23.7k|    resq_r3 = _mm_unpackhi_epi64(temp2, temp4);    // a3 b3 c3 d3
  179|       |    // Transform starts -- horizontal transform
  180|       |    /*------------------------------------------------------------------*/
  181|       |    /* z0 = w0 + w2                                             */
  182|  23.7k|    temp0 = _mm_add_epi32(resq_r0, resq_r2);
  183|       |    /* z1 = w0 - w2                                             */
  184|  23.7k|    temp1 = _mm_sub_epi32(resq_r0, resq_r2);
  185|       |    /* z2 = (w1 >> 1) - w3                                      */
  186|  23.7k|    temp2 = _mm_srai_epi32(resq_r1, 1);     //(w1>>1)
  187|  23.7k|    temp2 = _mm_sub_epi32(temp2, resq_r3);  //(w1>>1) - w3
  188|       |    /* z3 = w1 + (w3 >> 1)                                      */
  189|  23.7k|    temp3 = _mm_srai_epi32(resq_r3, 1);  //(w3>>1) + w1
  190|  23.7k|    temp3 = _mm_add_epi32(temp3, resq_r1);
  191|       |    /*----------------------------------------------------------*/
  192|       |    /* x0 = z0 + z3                                             */
  193|  23.7k|    resq_r0 = _mm_add_epi32(temp0, temp3);
  194|       |    /* x1 = z1 + z2                                             */
  195|  23.7k|    resq_r1 = _mm_add_epi32(temp1, temp2);
  196|       |    /* x2 = z1 - z2                                             */
  197|  23.7k|    resq_r2 = _mm_sub_epi32(temp1, temp2);
  198|       |    /* x3 = z0 - z3                                             */
  199|  23.7k|    resq_r3 = _mm_sub_epi32(temp0, temp3);
  200|       |    // Matrix transpose
  201|       |    /*
  202|       |     *  a0 b0 c0 d0
  203|       |     *  a1 b1 c1 d1
  204|       |     *  a2 b2 c2 d2
  205|       |     *  a3 b3 c3 d3
  206|       |     */
  207|  23.7k|    temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1);  // a0 a1 b0 b1
  208|  23.7k|    temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3);  // a2 a3 b2 b3
  209|  23.7k|    temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1);  // c0 c1 d0 d1
  210|  23.7k|    temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3);  // c2 c3 d2 d3
  211|  23.7k|    resq_r0 = _mm_unpacklo_epi64(temp1, temp3);    // a0 a1 a2 a3
  212|  23.7k|    resq_r1 = _mm_unpackhi_epi64(temp1, temp3);    // b0 b1 b2 b3
  213|  23.7k|    resq_r2 = _mm_unpacklo_epi64(temp2, temp4);    // c0 c1 c2 c3
  214|  23.7k|    resq_r3 = _mm_unpackhi_epi64(temp2, temp4);    // d0 d1 d2 d3
  215|       |    // Transform ends -- horizontal transform
  216|       |
  217|       |    // Load pred buffer
  218|       |    // p00 p01 p02 p03 0 0 0 0 0 0 0 0 -- all 8 bits
  219|  23.7k|    pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0]));
  220|       |    // p10 p11 p12 p13 0 0 0 0 0 0 0 0 -- all 8 bits
  221|  23.7k|    pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd]));
  222|       |    // p20 p21 p22 p23 0 0 0 0 0 0 0 0 -- all 8 bits
  223|  23.7k|    pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * pred_strd]));
  224|       |    // p30 p31 p32 p33 0 0 0 0 0 0 0 0 -- all 8 bits
  225|  23.7k|    pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * pred_strd]));
  226|       |
  227|  23.7k|    pred_r0 = _mm_cvtepu8_epi32(pred_r0);  // p00 p01 p02 p03 -- all 32 bits
  228|  23.7k|    pred_r1 = _mm_cvtepu8_epi32(pred_r1);  // p10 p11 p12 p13 -- all 32 bits
  229|  23.7k|    pred_r2 = _mm_cvtepu8_epi32(pred_r2);  // p20 p21 p22 p23 -- all 32 bits
  230|  23.7k|    pred_r3 = _mm_cvtepu8_epi32(pred_r3);  // p30 p31 p32 p33 -- all 32 bits
  231|       |
  232|       |    // Load resd buffer
  233|  23.7k|    rsd_r0 = _mm_loadl_epi64((__m128i *) (&pi2_rsd[0]));
  234|  23.7k|    rsd_r1 = _mm_loadl_epi64((__m128i *) (&pi2_rsd[rsd_strd]));
  235|  23.7k|    rsd_r2 = _mm_loadl_epi64((__m128i *) (&pi2_rsd[2 * rsd_strd]));
  236|  23.7k|    rsd_r3 = _mm_loadl_epi64((__m128i *) (&pi2_rsd[3 * rsd_strd]));
  237|       |
  238|  23.7k|    rsd_r0 = _mm_cvtepi16_epi32(rsd_r0);
  239|  23.7k|    rsd_r1 = _mm_cvtepi16_epi32(rsd_r1);
  240|  23.7k|    rsd_r2 = _mm_cvtepi16_epi32(rsd_r2);
  241|  23.7k|    rsd_r3 = _mm_cvtepi16_epi32(rsd_r3);
  242|       |
  243|       |    /*--------------------------------------------------------------*/
  244|       |    /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6      */
  245|       |    /*                                                              */
  246|       |    /* Add the prediction and store it back to same buffer          */
  247|       |    /*--------------------------------------------------------------*/
  248|       |    /* z0j = y0j + y2j                                                        */
  249|  23.7k|    temp0 = _mm_add_epi32(resq_r0, resq_r2);
  250|       |    /* z1j = y0j - y2j                                                        */
  251|  23.7k|    temp1 = _mm_sub_epi32(resq_r0, resq_r2);
  252|       |    /* z2j = (y1j>>1) - y3j */
  253|  23.7k|    temp2 = _mm_srai_epi32(resq_r1, 1);  //(y1j>>1)
  254|  23.7k|    temp2 = _mm_sub_epi32(temp2, resq_r3);
  255|       |    /* z3j = y1j + (y3j>>1) */
  256|  23.7k|    temp3 = _mm_srai_epi32(resq_r3, 1);  //(y3j>>1)
  257|  23.7k|    temp3 = _mm_add_epi32(temp3, resq_r1);
  258|       |
  259|       |    /* x0j = z0j + z3j                                                        */
  260|  23.7k|    temp4 = _mm_add_epi32(temp0, temp3);
  261|  23.7k|    temp4 = _mm_add_epi32(temp4, value_32);
  262|  23.7k|    temp4 = _mm_srai_epi32(temp4, 6);
  263|       |
  264|  23.7k|    row_0 = _mm_test_all_ones(_mm_cmpeq_epi32(temp4, zero_8x16b));  // return 1 if all zeros, else 0
  265|  23.7k|    temp4 = _mm_add_epi32(temp4, rsd_r0);
  266|  23.7k|    temp4 = _mm_min_epi16(dupmax_8x16b, temp4);
  267|  23.7k|    temp4 = _mm_max_epi16(dupmin_8x16b, temp4);
  268|  23.7k|    temp4 = _mm_add_epi32(temp4, pred_r0);
  269|       |    /* x1j = z1j + z2j                                                        */
  270|  23.7k|    temp5 = _mm_add_epi32(temp1, temp2);
  271|  23.7k|    temp5 = _mm_add_epi32(temp5, value_32);
  272|  23.7k|    temp5 = _mm_srai_epi32(temp5, 6);
  273|       |
  274|  23.7k|    row_1 = _mm_test_all_ones(_mm_cmpeq_epi32(temp5, zero_8x16b));  // return 1 if all zeros, else 0
  275|  23.7k|    temp5 = _mm_add_epi32(temp5, rsd_r1);
  276|  23.7k|    temp5 = _mm_min_epi16(dupmax_8x16b, temp5);
  277|  23.7k|    temp5 = _mm_max_epi16(dupmin_8x16b, temp5);
  278|  23.7k|    temp5 = _mm_add_epi32(temp5, pred_r1);
  279|       |    /* x2j = z1j - z2j                                                        */
  280|  23.7k|    temp6 = _mm_sub_epi32(temp1, temp2);
  281|  23.7k|    temp6 = _mm_add_epi32(temp6, value_32);
  282|  23.7k|    temp6 = _mm_srai_epi32(temp6, 6);
  283|       |
  284|  23.7k|    row_2 = _mm_test_all_ones(_mm_cmpeq_epi32(temp6, zero_8x16b));  // return 1 if all zeros, else 0
  285|  23.7k|    temp6 = _mm_add_epi32(temp6, rsd_r2);
  286|  23.7k|    temp6 = _mm_min_epi16(dupmax_8x16b, temp6);
  287|  23.7k|    temp6 = _mm_max_epi16(dupmin_8x16b, temp6);
  288|  23.7k|    temp6 = _mm_add_epi32(temp6, pred_r2);
  289|       |    /* x3j = z0j - z3j                                                        */
  290|  23.7k|    temp7 = _mm_sub_epi32(temp0, temp3);
  291|  23.7k|    temp7 = _mm_add_epi32(temp7, value_32);
  292|  23.7k|    temp7 = _mm_srai_epi32(temp7, 6);
  293|       |
  294|  23.7k|    row_3 = _mm_test_all_ones(_mm_cmpeq_epi32(temp7, zero_8x16b));  // return 1 if all zeros, else 0
  295|  23.7k|    temp7 = _mm_add_epi32(temp7, rsd_r3);
  296|  23.7k|    temp7 = _mm_min_epi16(dupmax_8x16b, temp7);
  297|  23.7k|    temp7 = _mm_max_epi16(dupmin_8x16b, temp7);
  298|  23.7k|    temp7 = _mm_add_epi32(temp7, pred_r3);
  299|       |
  300|       |    // 32-bit to 16-bit conversion
  301|  23.7k|    temp0 = _mm_packs_epi32(temp4, temp5);
  302|  23.7k|    temp1 = _mm_packs_epi32(temp6, temp7);
  303|       |    /*------------------------------------------------------------------*/
  304|       |    // Clipping the results to 8 bits
  305|  23.7k|    sign_reg = _mm_cmpgt_epi16(temp0, zero_8x16b);  // sign check
  306|  23.7k|    temp0 = _mm_and_si128(temp0, sign_reg);
  307|  23.7k|    sign_reg = _mm_cmpgt_epi16(temp1, zero_8x16b);
  308|  23.7k|    temp1 = _mm_and_si128(temp1, sign_reg);
  309|       |
  310|  23.7k|    resq_r0 = _mm_packus_epi16(temp0, temp1);
  311|  23.7k|    resq_r1 = _mm_srli_si128(resq_r0, 4);
  312|  23.7k|    resq_r2 = _mm_srli_si128(resq_r1, 4);
  313|  23.7k|    resq_r3 = _mm_srli_si128(resq_r2, 4);
  314|       |
  315|  23.7k|    *pu4_out = _mm_cvtsi128_si32(resq_r0);
  316|  23.7k|    pu1_out += out_strd;
  317|  23.7k|    pu4_out = (UWORD32 *) (pu1_out);
  318|  23.7k|    *(pu4_out) = _mm_cvtsi128_si32(resq_r1);
  319|  23.7k|    pu1_out += out_strd;
  320|  23.7k|    pu4_out = (UWORD32 *) (pu1_out);
  321|  23.7k|    *(pu4_out) = _mm_cvtsi128_si32(resq_r2);
  322|  23.7k|    pu1_out += out_strd;
  323|  23.7k|    pu4_out = (UWORD32 *) (pu1_out);
  324|  23.7k|    *(pu4_out) = _mm_cvtsi128_si32(resq_r3);
  325|       |
  326|  23.7k|    i4_nnz = !(row_0 && row_1 && row_2 && row_3);
  ------------------
  |  Branch (326:16): [True: 2.22k, False: 21.5k]
  |  Branch (326:25): [True: 1.61k, False: 610]
  |  Branch (326:34): [True: 747, False: 867]
  |  Branch (326:43): [True: 708, False: 39]
  ------------------
  327|  23.7k|    return i4_nnz;
  328|  23.7k|}
isvcd_iquant_itrans_residual_recon_8x8_sse42:
  357|  33.9k|{
  358|  33.9k|    __m128i rsd_r01_b0, rsd_r23_b0, rsd_r45_b2, rsd_r67_b2;
  359|  33.9k|    __m128i rsd_r01_b1, rsd_r23_b1, rsd_r45_b3, rsd_r67_b3;
  360|       |
  361|  33.9k|    WORD32 row_01_b0, row_23_b0, row_45_b2, row_67_b2;
  362|  33.9k|    WORD32 row_01_b1, row_23_b1, row_45_b3, row_67_b3;
  363|  33.9k|    WORD32 i4_nnz, i4_nnz_b0, i4_nnz_b1, i4_nnz_b2, i4_nnz_b3;
  364|  33.9k|    __m128i src_r0;
  365|  33.9k|    __m128i scalemat_r0;
  366|  33.9k|    __m128i zero_8x16b = _mm_setzero_si128();  // all bits reset to zero
  367|  33.9k|    __m128i value_32 = _mm_set1_epi32(32);
  368|  33.9k|    __m128i add_rshift = _mm_set1_epi32((qp_div < 6) ? (1 << (5 - qp_div)) : 0);
  ------------------
  |  Branch (368:41): [True: 21.6k, False: 12.2k]
  ------------------
  369|  33.9k|    __m128i dequant_r0;
  370|  33.9k|    __m128i predload_r;
  371|  33.9k|    __m128i pred_r0_1, pred_r1_1, pred_r2_1, pred_r3_1, pred_r4_1, pred_r5_1, pred_r6_1, pred_r7_1;
  372|       |
  373|  33.9k|    __m128i rsd_r0, rsd_r1, rsd_r2, rsd_r3, rsd_r4, rsd_r5, rsd_r6, rsd_r7;
  374|  33.9k|    __m128i sign_reg;
  375|  33.9k|    __m128i src_r0_1, src_r0_2;
  376|  33.9k|    __m128i scalemat_r0_1, scalemat_r0_2;
  377|  33.9k|    __m128i temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
  378|  33.9k|    __m128i temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18, temp19, temp20;
  379|       |    // To store dequantization results
  380|  33.9k|    __m128i resq_r0_1, resq_r0_2, resq_r1_1, resq_r1_2, resq_r2_1, resq_r2_2, resq_r3_1, resq_r3_2,
  381|  33.9k|        resq_r4_1, resq_r4_2, resq_r5_1, resq_r5_2, resq_r6_1, resq_r6_2, resq_r7_1, resq_r7_2;
  382|  33.9k|    __m128i dupmax_8x16b = _mm_set1_epi16(RSD_MAX);
  ------------------
  |  |  772|  33.9k|#define RSD_MAX 255
  ------------------
  383|  33.9k|    __m128i dupmin_8x16b = _mm_set1_epi16(RSD_MIN);
  ------------------
  |  |  773|  33.9k|#define RSD_MIN -255
  ------------------
  384|       |
  385|  33.9k|    UNUSED(pi2_tmp);
  ------------------
  |  |   45|  33.9k|#define UNUSED(x) ((void)(x))
  ------------------
  386|  33.9k|    UNUSED(iq_start_idx);
  ------------------
  |  |   45|  33.9k|#define UNUSED(x) ((void)(x))
  ------------------
  387|  33.9k|    UNUSED(pi2_dc_ld_addr);
  ------------------
  |  |   45|  33.9k|#define UNUSED(x) ((void)(x))
  ------------------
  388|       |
  389|       |    /*************************************************************/
  390|       |    /* Dequantization of coefficients. Will be replaced by SIMD  */
  391|       |    /* operations on platform. Note : DC coeff is not scaled     */
  392|       |    /*************************************************************/
  393|       |
  394|       |    // Row 0 processing
  395|       |    // a00 a01 a02 a03 a04 a05 a06 a07 -- the source matrix 0th row
  396|  33.9k|    src_r0 = _mm_loadu_si128((__m128i *) (pi2_src));
  397|       |    // b00 b01 b02 b03 b04 b05 b06 b07 -- the scaling matrix 0th row
  398|  33.9k|    scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat));
  399|       |    // q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
  400|  33.9k|    dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[0]));
  401|  33.9k|    src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b);  // a00 0 a01 0 a02 0 a03 0 -- 16 bit long
  402|  33.9k|    src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b);  // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
  403|       |    // b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result
  404|  33.9k|    temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0);
  405|       |    // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
  406|  33.9k|    scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b);
  407|       |    // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
  408|  33.9k|    scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b);
  409|       |    // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long
  410|  33.9k|    temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1);
  411|       |    // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long
  412|  33.9k|    temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2);
  413|       |
  414|  33.9k|    if(qp_div >= 6)
  ------------------
  |  Branch (414:8): [True: 12.2k, False: 21.6k]
  ------------------
  415|  12.2k|    {
  416|  12.2k|        resq_r0_1 = _mm_slli_epi32(temp5, qp_div - 6);
  417|  12.2k|        resq_r0_2 = _mm_slli_epi32(temp7, qp_div - 6);
  418|  12.2k|    }
  419|  21.6k|    else
  420|  21.6k|    {
  421|  21.6k|        temp5 = _mm_add_epi32(temp5, add_rshift);
  422|  21.6k|        temp7 = _mm_add_epi32(temp7, add_rshift);
  423|  21.6k|        resq_r0_1 = _mm_srai_epi32(temp5, 6 - qp_div);
  424|  21.6k|        resq_r0_2 = _mm_srai_epi32(temp7, 6 - qp_div);
  425|  21.6k|    }
  426|       |    // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16
  427|       |    // bit long
  428|  33.9k|    resq_r0_1 = _mm_packs_epi32(resq_r0_1, resq_r0_2);
  429|       |    // Row 1 processing
  430|       |    // a00 a01 a02 a03 a04 a05 a06 a07 a08 --the source matrix 1st row
  431|  33.9k|    src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 8));
  432|       |    // b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 1st row
  433|  33.9k|    scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 8));
  434|       |    // q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
  435|  33.9k|    dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[8]));
  436|       |    // a00 0 a01 0 a02 0 a03 0 -- 16 bit long
  437|  33.9k|    src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b);
  438|       |    // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
  439|  33.9k|    src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b);
  440|       |    // b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result
  441|  33.9k|    temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0);
  442|       |    // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
  443|  33.9k|    scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b);
  444|       |    // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
  445|  33.9k|    scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b);
  446|       |    // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long
  447|  33.9k|    temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1);
  448|       |    // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long
  449|  33.9k|    temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2);
  450|  33.9k|    if(qp_div >= 6)
  ------------------
  |  Branch (450:8): [True: 12.2k, False: 21.6k]
  ------------------
  451|  12.2k|    {
  452|  12.2k|        resq_r1_1 = _mm_slli_epi32(temp5, qp_div - 6);
  453|  12.2k|        resq_r1_2 = _mm_slli_epi32(temp7, qp_div - 6);
  454|  12.2k|    }
  455|  21.6k|    else
  456|  21.6k|    {
  457|  21.6k|        temp5 = _mm_add_epi32(temp5, add_rshift);
  458|  21.6k|        temp7 = _mm_add_epi32(temp7, add_rshift);
  459|  21.6k|        resq_r1_1 = _mm_srai_epi32(temp5, 6 - qp_div);
  460|  21.6k|        resq_r1_2 = _mm_srai_epi32(temp7, 6 - qp_div);
  461|  21.6k|    }
  462|       |    // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16
  463|       |    // bit long
  464|  33.9k|    resq_r1_1 = _mm_packs_epi32(resq_r1_1, resq_r1_2);
  465|       |    // Row 2 processing
  466|       |    // a00 a01 a02 a03 a04 a05 a06 a07 a08 --the source matrix 2nd row
  467|  33.9k|    src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 16));
  468|       |    // b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 2nd row
  469|  33.9k|    scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 16));
  470|       |    // q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
  471|  33.9k|    dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[16]));
  472|  33.9k|    src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b);  // a00 0 a01 0 a02 0 a03 0 -- 16 bit long
  473|  33.9k|    src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b);  // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
  474|       |    // b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result
  475|  33.9k|    temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0);
  476|       |    // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
  477|  33.9k|    scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b);
  478|       |    // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
  479|  33.9k|    scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b);
  480|       |    // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long
  481|  33.9k|    temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1);
  482|       |    // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long
  483|  33.9k|    temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2);
  484|  33.9k|    if(qp_div >= 6)
  ------------------
  |  Branch (484:8): [True: 12.2k, False: 21.6k]
  ------------------
  485|  12.2k|    {
  486|  12.2k|        resq_r2_1 = _mm_slli_epi32(temp5, qp_div - 6);
  487|  12.2k|        resq_r2_2 = _mm_slli_epi32(temp7, qp_div - 6);
  488|  12.2k|    }
  489|  21.6k|    else
  490|  21.6k|    {
  491|  21.6k|        temp5 = _mm_add_epi32(temp5, add_rshift);
  492|  21.6k|        temp7 = _mm_add_epi32(temp7, add_rshift);
  493|  21.6k|        resq_r2_1 = _mm_srai_epi32(temp5, 6 - qp_div);
  494|  21.6k|        resq_r2_2 = _mm_srai_epi32(temp7, 6 - qp_div);
  495|  21.6k|    }
  496|       |    // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 -- 16 bit long
  497|  33.9k|    resq_r2_1 = _mm_packs_epi32(resq_r2_1, resq_r2_2);
  498|       |    // Row 3 processing
  499|       |    // a00 a01 a02 a03 a04 a05 a06 a07 a08 --the source matrix 3rd row
  500|  33.9k|    src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 24));
  501|       |    // b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 3rd row
  502|  33.9k|    scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 24));
  503|       |    // q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
  504|  33.9k|    dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[24]));
  505|  33.9k|    src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b);  // a00 0 a01 0 a02 0 a03 0 -- 16 bit long
  506|  33.9k|    src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b);  // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
  507|       |    // b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result
  508|  33.9k|    temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0);
  509|       |    // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
  510|  33.9k|    scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b);
  511|       |    // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
  512|  33.9k|    scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b);
  513|       |    // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 - 32 bits long
  514|  33.9k|    temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1);
  515|       |    // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long
  516|  33.9k|    temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2);
  517|  33.9k|    if(qp_div >= 6)
  ------------------
  |  Branch (517:8): [True: 12.2k, False: 21.6k]
  ------------------
  518|  12.2k|    {
  519|  12.2k|        resq_r3_1 = _mm_slli_epi32(temp5, qp_div - 6);
  520|  12.2k|        resq_r3_2 = _mm_slli_epi32(temp7, qp_div - 6);
  521|  12.2k|    }
  522|  21.6k|    else
  523|  21.6k|    {
  524|  21.6k|        temp5 = _mm_add_epi32(temp5, add_rshift);
  525|  21.6k|        temp7 = _mm_add_epi32(temp7, add_rshift);
  526|  21.6k|        resq_r3_1 = _mm_srai_epi32(temp5, 6 - qp_div);
  527|  21.6k|        resq_r3_2 = _mm_srai_epi32(temp7, 6 - qp_div);
  528|  21.6k|    }
  529|       |    // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 .... -- 16 bit long
  530|  33.9k|    resq_r3_1 = _mm_packs_epi32(resq_r3_1, resq_r3_2);
  531|       |    // Row 4 processing
  532|       |    // a00 a01 a02 a03 a04 a05 a06 a07 a08 --the source matrix 4th row
  533|  33.9k|    src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 32));
  534|       |    // b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 4th row
  535|  33.9k|    scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 32));
  536|       |    // q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
  537|  33.9k|    dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[32]));
  538|  33.9k|    src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b);  // a00 0 a01 0 a02 0 a03 0 -- 16 bit long
  539|  33.9k|    src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b);  // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
  540|       |    // b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result
  541|  33.9k|    temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0);
  542|       |    // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
  543|  33.9k|    scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b);
  544|       |    // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
  545|  33.9k|    scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b);
  546|       |    // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long
  547|  33.9k|    temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1);
  548|       |    // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long
  549|  33.9k|    temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2);
  550|  33.9k|    if(qp_div >= 6)
  ------------------
  |  Branch (550:8): [True: 12.2k, False: 21.6k]
  ------------------
  551|  12.2k|    {
  552|  12.2k|        resq_r4_1 = _mm_slli_epi32(temp5, qp_div - 6);
  553|  12.2k|        resq_r4_2 = _mm_slli_epi32(temp7, qp_div - 6);
  554|  12.2k|    }
  555|  21.6k|    else
  556|  21.6k|    {
  557|  21.6k|        temp5 = _mm_add_epi32(temp5, add_rshift);
  558|  21.6k|        temp7 = _mm_add_epi32(temp7, add_rshift);
  559|  21.6k|        resq_r4_1 = _mm_srai_epi32(temp5, 6 - qp_div);
  560|  21.6k|        resq_r4_2 = _mm_srai_epi32(temp7, 6 - qp_div);
  561|  21.6k|    }
  562|       |    // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5  -- 16 bit long
  563|  33.9k|    resq_r4_1 = _mm_packs_epi32(resq_r4_1, resq_r4_2);
  564|       |    // Row 5 processing
  565|       |    // a00 a01 a02 a03 a04 a05 a06 a07 a08 --the source matrix 5th row
  566|  33.9k|    src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 40));
  567|       |    //
  568|       |    // b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 5th row
  569|  33.9k|    scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 40));
  570|       |    // q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
  571|  33.9k|    dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[40]));
  572|  33.9k|    src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b);  // a00 0 a01 0 a02 0 a03 0 -- 16 bit long
  573|  33.9k|    src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b);  // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
  574|       |    // b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result
  575|  33.9k|    temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0);
  576|       |    // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
  577|  33.9k|    scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b);
  578|       |    // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
  579|  33.9k|    scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b);
  580|       |    // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long
  581|  33.9k|    temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1);
  582|       |    // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long
  583|  33.9k|    temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2);
  584|  33.9k|    if(qp_div >= 6)
  ------------------
  |  Branch (584:8): [True: 12.2k, False: 21.6k]
  ------------------
  585|  12.2k|    {
  586|  12.2k|        resq_r5_1 = _mm_slli_epi32(temp5, qp_div - 6);
  587|  12.2k|        resq_r5_2 = _mm_slli_epi32(temp7, qp_div - 6);
  588|  12.2k|    }
  589|  21.6k|    else
  590|  21.6k|    {
  591|  21.6k|        temp5 = _mm_add_epi32(temp5, add_rshift);
  592|  21.6k|        temp7 = _mm_add_epi32(temp7, add_rshift);
  593|  21.6k|        resq_r5_1 = _mm_srai_epi32(temp5, 6 - qp_div);
  594|  21.6k|        resq_r5_2 = _mm_srai_epi32(temp7, 6 - qp_div);
  595|  21.6k|    }
  596|       |    // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 -- 16 bit long
  597|  33.9k|    resq_r5_1 = _mm_packs_epi32(resq_r5_1, resq_r5_2);
  598|       |    // Row 6 processing
  599|       |    // a00 a01 a02 a03 a04 a05 a06 a07 a08 --the source matrix 6th row
  600|  33.9k|    src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 48));
  601|       |    // b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 6th row
  602|  33.9k|    scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 48));
  603|       |    // q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
  604|  33.9k|    dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[48]));
  605|  33.9k|    src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b);  // a00 0 a01 0 a02 0 a03 0 -- 16 bit long
  606|  33.9k|    src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b);  // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
  607|       |    // b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result
  608|  33.9k|    temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0);
  609|       |    // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
  610|  33.9k|    scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b);
  611|       |    // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
  612|  33.9k|    scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b);
  613|       |    // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long
  614|  33.9k|    temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1);
  615|       |    // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long
  616|  33.9k|    temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2);
  617|  33.9k|    if(qp_div >= 6)
  ------------------
  |  Branch (617:8): [True: 12.2k, False: 21.6k]
  ------------------
  618|  12.2k|    {
  619|  12.2k|        resq_r6_1 = _mm_slli_epi32(temp5, qp_div - 6);
  620|  12.2k|        resq_r6_2 = _mm_slli_epi32(temp7, qp_div - 6);
  621|  12.2k|    }
  622|  21.6k|    else
  623|  21.6k|    {
  624|  21.6k|        temp5 = _mm_add_epi32(temp5, add_rshift);
  625|  21.6k|        temp7 = _mm_add_epi32(temp7, add_rshift);
  626|  21.6k|        resq_r6_1 = _mm_srai_epi32(temp5, 6 - qp_div);
  627|  21.6k|        resq_r6_2 = _mm_srai_epi32(temp7, 6 - qp_div);
  628|  21.6k|    }
  629|       |    // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 -- 16 bit long
  630|  33.9k|    resq_r6_1 = _mm_packs_epi32(resq_r6_1, resq_r6_2);
  631|       |    // Row 7 processing
  632|       |    // a00 a01 a02 a03 a04 a05 a06 a07 a08 --the source matrix 7th row
  633|  33.9k|    src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 56));
  634|       |    // b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 7th row
  635|  33.9k|    scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 56));
  636|       |    // q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
  637|  33.9k|    dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[56]));
  638|  33.9k|    src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b);  // a00 0 a01 0 a02 0 a03 0 -- 16 bit long
  639|  33.9k|    src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b);  // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
  640|       |    // b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result
  641|  33.9k|    temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0);
  642|       |    // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
  643|  33.9k|    scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b);
  644|       |    // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
  645|  33.9k|    scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b);
  646|       |    // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long
  647|  33.9k|    temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1);
  648|       |    // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long
  649|  33.9k|    temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2);
  650|  33.9k|    if(qp_div >= 6)
  ------------------
  |  Branch (650:8): [True: 12.2k, False: 21.6k]
  ------------------
  651|  12.2k|    {
  652|  12.2k|        resq_r7_1 = _mm_slli_epi32(temp5, qp_div - 6);
  653|  12.2k|        resq_r7_2 = _mm_slli_epi32(temp7, qp_div - 6);
  654|  12.2k|    }
  655|  21.6k|    else
  656|  21.6k|    {
  657|  21.6k|        temp5 = _mm_add_epi32(temp5, add_rshift);
  658|  21.6k|        temp7 = _mm_add_epi32(temp7, add_rshift);
  659|  21.6k|        resq_r7_1 = _mm_srai_epi32(temp5, 6 - qp_div);
  660|  21.6k|        resq_r7_2 = _mm_srai_epi32(temp7, 6 - qp_div);
  661|  21.6k|    }
  662|       |    // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 -- 16 bit long
  663|  33.9k|    resq_r7_1 = _mm_packs_epi32(resq_r7_1, resq_r7_2);
  664|       |
  665|       |    /* Perform Inverse transform */
  666|       |    /*--------------------------------------------------------------------*/
  667|       |    /* IDCT [ Horizontal transformation ]                                 */
  668|       |    /*--------------------------------------------------------------------*/
  669|       |    // Matrix transpose
  670|       |    /*
  671|       |     *  a0 a1 a2 a3 a4 a5 a6 a7
  672|       |     *  b0 b1 b2 b3 b4 b5 b6 b7
  673|       |     *  c0 c1 c2 c3 c4 c5 c6 c7
  674|       |     *  d0 d1 d2 d3 d4 d5 d6 d7
  675|       |     */
  676|  33.9k|    temp1 = _mm_unpacklo_epi16(resq_r0_1, resq_r1_1);  // a0 b0 a1 b1 a2 b2 a3 b3
  677|  33.9k|    temp3 = _mm_unpacklo_epi16(resq_r2_1, resq_r3_1);  // c0 d0 c1 d1 c2 d2 c3 d3
  678|  33.9k|    temp2 = _mm_unpackhi_epi16(resq_r0_1, resq_r1_1);  // a4 b4 a5 b5 a6 b6 a7 b7
  679|  33.9k|    temp4 = _mm_unpackhi_epi16(resq_r2_1, resq_r3_1);  // c4 d4 c5 d5 c6 d6 c7 d7
  680|  33.9k|    resq_r0_1 = _mm_unpacklo_epi32(temp1, temp3);      // a0 b0 c0 d0 a1 b1 c1 d1
  681|  33.9k|    resq_r1_1 = _mm_unpackhi_epi32(temp1, temp3);      // a2 b2 c2 d2 a3 b3 c3 d3
  682|  33.9k|    resq_r2_1 = _mm_unpacklo_epi32(temp2, temp4);      // a4 b4 c4 d4 a5 b5 c5 d5
  683|  33.9k|    resq_r3_1 = _mm_unpackhi_epi32(temp2, temp4);      // a6 b6 c6 d6 a7 b7 c7 d7
  684|       |    /*
  685|       |     * e0 e1 e2 e3 e4 e5 e6 e7
  686|       |     * f0 f1 f2 f3 f4 f5 f6 f7
  687|       |     * g0 g1 g2 g3 g4 g5 g6 g7
  688|       |     * h0 h1 h2 h3 h4 h5 h6 h7
  689|       |     */
  690|  33.9k|    temp1 = _mm_unpacklo_epi16(resq_r4_1, resq_r5_1);  // e0 f0 e1 f1 e2 f2 e2 f3
  691|  33.9k|    temp3 = _mm_unpacklo_epi16(resq_r6_1, resq_r7_1);  // g0 h0 g1 h1 g2 h2 g3 h3
  692|  33.9k|    temp2 = _mm_unpackhi_epi16(resq_r4_1, resq_r5_1);  // e4 f4 e5 f5 e6 f6 e7 f7
  693|  33.9k|    temp4 = _mm_unpackhi_epi16(resq_r6_1, resq_r7_1);  // g4 h4 g5 h5 g6 h6 g7 h7
  694|  33.9k|    resq_r4_1 = _mm_unpacklo_epi32(temp1, temp3);      // e0 f0 g0 h0 e1 f1 g1 h1
  695|  33.9k|    resq_r5_1 = _mm_unpackhi_epi32(temp1, temp3);      // e2 f2 g2 h2 e3 f3 g3 h3
  696|  33.9k|    resq_r6_1 = _mm_unpacklo_epi32(temp2, temp4);      // e4 f4 g4 h4 e5 f5 g5 h5
  697|  33.9k|    resq_r7_1 = _mm_unpackhi_epi32(temp2, temp4);      // e6 f6 g6 h6 e7 f7 g7 h7
  698|       |    /*
  699|       |     * a0 b0 c0 d0 a1 b1 c1 d1
  700|       |     * a2 b2 c2 d2 a3 b3 c3 d3
  701|       |     * a4 b4 c4 d4 a5 b5 c5 d5
  702|       |     * a6 b6 c6 d6 a7 b7 c7 d7
  703|       |     * e0 f0 g0 h0 e1 f1 g1 h1
  704|       |     * e2 f2 g2 h2 e3 f3 g3 h3
  705|       |     * e4 f4 g4 h4 e5 f5 g5 h5
  706|       |     * e6 f6 g6 h6 e7 f7 g7 h7
  707|       |     */
  708|  33.9k|    resq_r0_2 = _mm_unpacklo_epi64(resq_r0_1, resq_r4_1);  // a0 b0 c0 d0 e0 f0 g0 h0
  709|  33.9k|    resq_r1_2 = _mm_unpackhi_epi64(resq_r0_1, resq_r4_1);  // a1 b1 c1 d1 e1 f1 g1 h1
  710|  33.9k|    resq_r2_2 = _mm_unpacklo_epi64(resq_r1_1, resq_r5_1);  // a2 b2 c2 d2 e2 f2 g2 h2
  711|  33.9k|    resq_r3_2 = _mm_unpackhi_epi64(resq_r1_1, resq_r5_1);  // a3 b3 c3 d3 e3 f3 g3 h3
  712|  33.9k|    resq_r4_2 = _mm_unpacklo_epi64(resq_r2_1, resq_r6_1);  // a4 b4 c4 d4 e4 f4 g4 h4
  713|  33.9k|    resq_r5_2 = _mm_unpackhi_epi64(resq_r2_1, resq_r6_1);  // a5 b5 c5 d5 e5 f5 g5 h5
  714|  33.9k|    resq_r6_2 = _mm_unpacklo_epi64(resq_r3_1, resq_r7_1);  // a6 b6 c6 d6 e6 f6 g6 h6
  715|  33.9k|    resq_r7_2 = _mm_unpackhi_epi64(resq_r3_1, resq_r7_1);  // a7 b7 c7 d7 e7 f7 g7 h7
  716|       |
  717|  33.9k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r1_2);
  718|  33.9k|    resq_r1_1 = _mm_unpacklo_epi16(resq_r1_2, sign_reg);  // a1 b1 c1 d1 -- 32 bit
  719|  33.9k|    resq_r1_2 = _mm_unpackhi_epi16(resq_r1_2, sign_reg);  // e1 f1 g1 h1 -- 32 bit
  720|  33.9k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r3_2);
  721|  33.9k|    resq_r3_1 = _mm_unpacklo_epi16(resq_r3_2, sign_reg);  // a3 b3 c3 d3 -- 32 bit
  722|  33.9k|    resq_r3_2 = _mm_unpackhi_epi16(resq_r3_2, sign_reg);  // e3 f3 g3 h3 -- 32 bit
  723|  33.9k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r5_2);
  724|  33.9k|    resq_r5_1 = _mm_unpacklo_epi16(resq_r5_2, sign_reg);  // a5 b5 c5 d5 -- 32 bit
  725|  33.9k|    resq_r5_2 = _mm_unpackhi_epi16(resq_r5_2, sign_reg);  // e5 f5 g5 h5 -- 32 bit
  726|  33.9k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r7_2);
  727|  33.9k|    resq_r7_1 = _mm_unpacklo_epi16(resq_r7_2, sign_reg);  // a7 b7 c7 d7 -- 32 bit
  728|  33.9k|    resq_r7_2 = _mm_unpackhi_epi16(resq_r7_2, sign_reg);  // e7 f7 g7 h7 -- 32 bit
  729|       |    // Transform starts -- horizontal transform
  730|       |    /*------------------------------------------------------------------*/
  731|       |    /* y0 = w0 + w4                                                     */
  732|  33.9k|    temp1 = _mm_add_epi16(resq_r0_2, resq_r4_2);
  733|       |    /* y2 = w0 - w4                                                      */
  734|  33.9k|    temp3 = _mm_sub_epi16(resq_r0_2, resq_r4_2);
  735|       |    /* y1 = -w3 + w5 - w7 - (w7 >> 1)                                   */
  736|  33.9k|    temp2 = _mm_sub_epi32(resq_r5_1, resq_r3_1);  //-w3+w5
  737|  33.9k|    temp10 = _mm_sub_epi32(resq_r5_2, resq_r3_2);
  738|  33.9k|    temp4 = _mm_sub_epi32(temp2, resq_r7_1);      //-w3+w5-w7
  739|  33.9k|    temp12 = _mm_sub_epi32(temp10, resq_r7_2);
  740|  33.9k|    temp5 = _mm_srai_epi32(resq_r7_1, 1);         // w7>>1
  741|  33.9k|    temp13 = _mm_srai_epi32(resq_r7_2, 1);
  742|  33.9k|    temp2 = _mm_sub_epi32(temp4, temp5);          //-w3+w5-w7 -(w7>>1)
  743|  33.9k|    temp10 = _mm_sub_epi32(temp12, temp13);
  744|  33.9k|    temp2 = _mm_packs_epi32(temp2, temp10);
  745|       |    /* y3 = w1 + w7 - w3 - (w3 >> 1)                                    */
  746|  33.9k|    temp4 = _mm_add_epi32(resq_r1_1, resq_r7_1);  // w1+w7
  747|  33.9k|    temp12 = _mm_add_epi32(resq_r1_2, resq_r7_2);
  748|  33.9k|    temp4 = _mm_sub_epi32(temp4, resq_r3_1);      // w1+w7-w3
  749|  33.9k|    temp12 = _mm_sub_epi32(temp12, resq_r3_2);
  750|  33.9k|    temp5 = _mm_srai_epi32(resq_r3_1, 1);         // w3>>1
  751|  33.9k|    temp13 = _mm_srai_epi32(resq_r3_2, 1);
  752|  33.9k|    temp4 = _mm_sub_epi32(temp4, temp5);          // w1+w7-w3-(w3>>1)
  753|  33.9k|    temp12 = _mm_sub_epi32(temp12, temp13);
  754|  33.9k|    temp4 = _mm_packs_epi32(temp4, temp12);
  755|       |    /* y4 = (w2 >> 1) - w6                                              */
  756|  33.9k|    temp5 = _mm_srai_epi16(resq_r2_2, 1);     // w2>>1
  757|  33.9k|    temp5 = _mm_sub_epi16(temp5, resq_r6_2);  //(w2>>1)-w6
  758|       |    /* y5 = -w1 + w7 + w5 + (w5 >> 1)                                   */
  759|  33.9k|    temp6 = _mm_sub_epi32(resq_r7_1, resq_r1_1);  // w7-w1
  760|  33.9k|    temp14 = _mm_sub_epi32(resq_r7_2, resq_r1_2);
  761|  33.9k|    temp6 = _mm_add_epi32(temp6, resq_r5_1);      // w7-w1+w5
  762|  33.9k|    temp14 = _mm_add_epi32(temp14, resq_r5_2);
  763|  33.9k|    temp7 = _mm_srai_epi32(resq_r5_1, 1);         // w5>>1
  764|  33.9k|    temp15 = _mm_srai_epi32(resq_r5_2, 1);
  765|  33.9k|    temp6 = _mm_add_epi32(temp6, temp7);          // w7-w1_w5+(w5>>1)
  766|  33.9k|    temp14 = _mm_add_epi32(temp14, temp15);
  767|  33.9k|    temp6 = _mm_packs_epi32(temp6, temp14);
  768|       |    /* y6 = w2 + (w6 >> 1)                                              */
  769|  33.9k|    temp7 = _mm_srai_epi16(resq_r6_2, 1);     // w6>>1
  770|  33.9k|    temp7 = _mm_add_epi16(temp7, resq_r2_2);  //(w6>>1)+w2
  771|       |    /* y7 = w3 + w5 + w1 + (w1 >> 1)                                    */
  772|  33.9k|    temp8 = _mm_add_epi32(resq_r3_1, resq_r5_1);  // w3+w5
  773|  33.9k|    temp16 = _mm_add_epi32(resq_r3_2, resq_r5_2);
  774|  33.9k|    temp8 = _mm_add_epi32(temp8, resq_r1_1);      // w3+w5+w1
  775|  33.9k|    temp16 = _mm_add_epi32(temp16, resq_r1_2);
  776|  33.9k|    temp17 = _mm_srai_epi32(resq_r1_1, 1);        // w1>>1
  777|  33.9k|    temp18 = _mm_srai_epi32(resq_r1_2, 1);
  778|  33.9k|    temp8 = _mm_add_epi32(temp8, temp17);         // w3+w5+w1+(w1>>1)
  779|  33.9k|    temp16 = _mm_add_epi32(temp16, temp18);
  780|  33.9k|    temp8 = _mm_packs_epi32(temp8, temp16);
  781|       |    /*------------------------------------------------------------------*/
  782|       |    /*------------------------------------------------------------------*/
  783|       |    /* z0 = y0 + y6                                                        */
  784|  33.9k|    resq_r0_1 = _mm_add_epi16(temp1, temp7);
  785|       |    /* z1 = y1 + (y7 >> 2)                                                */
  786|  33.9k|    resq_r1_1 = _mm_srai_epi16(temp8, 2);
  787|  33.9k|    resq_r1_1 = _mm_add_epi16(resq_r1_1, temp2);
  788|       |    /* z2 = y2 + y4                                                        */
  789|  33.9k|    resq_r2_1 = _mm_add_epi16(temp3, temp5);
  790|       |    /* z3 = y3 + (y5 >> 2)                                                */
  791|  33.9k|    resq_r3_1 = _mm_srai_epi16(temp6, 2);
  792|  33.9k|    resq_r3_1 = _mm_add_epi16(resq_r3_1, temp4);
  793|       |    /* z4 = y2 - y4                                                        */
  794|  33.9k|    resq_r4_1 = _mm_sub_epi16(temp3, temp5);
  795|       |    /* z5 = (y3 >> 2) - y5                                                 */
  796|  33.9k|    resq_r5_1 = _mm_srai_epi16(temp4, 2);
  797|  33.9k|    resq_r5_1 = _mm_sub_epi16(resq_r5_1, temp6);
  798|       |    /* z6 = y0 - y6                                                     */
  799|  33.9k|    resq_r6_1 = _mm_sub_epi16(temp1, temp7);
  800|       |    /* z7 = y7 - (y1 >> 2)                                                 */
  801|  33.9k|    resq_r7_1 = _mm_srai_epi16(temp2, 2);
  802|  33.9k|    resq_r7_1 = _mm_sub_epi16(temp8, resq_r7_1);
  803|       |    /*------------------------------------------------------------------*/
  804|       |    /*------------------------------------------------------------------*/
  805|       |    /* x0 = z0 + z7                                                        */
  806|  33.9k|    temp1 = _mm_add_epi16(resq_r0_1, resq_r7_1);
  807|       |    /* x1 = z2 + z5                                                        */
  808|  33.9k|    temp2 = _mm_add_epi16(resq_r2_1, resq_r5_1);
  809|       |    /* x2 = z4 + z3                                                        */
  810|  33.9k|    temp3 = _mm_add_epi16(resq_r4_1, resq_r3_1);
  811|       |    /* x3 = z6 + z1                                                        */
  812|  33.9k|    temp4 = _mm_add_epi16(resq_r6_1, resq_r1_1);
  813|       |    /* x4 = z6 - z1                                                        */
  814|  33.9k|    temp5 = _mm_sub_epi16(resq_r6_1, resq_r1_1);
  815|       |    /* x5 = z4 - z3                                                        */
  816|  33.9k|    temp6 = _mm_sub_epi16(resq_r4_1, resq_r3_1);
  817|       |    /* x6 = z2 - z5                                                        */
  818|  33.9k|    temp7 = _mm_sub_epi16(resq_r2_1, resq_r5_1);
  819|       |    /* x7 = z0 - z7                                                        */
  820|  33.9k|    temp8 = _mm_sub_epi16(resq_r0_1, resq_r7_1);
  821|       |    /*------------------------------------------------------------------*/
  822|       |    // Matrix transpose
  823|       |    /*
  824|       |     *  a0 b0 c0 d0 e0 f0 g0 h0
  825|       |     *  a1 b1 c1 d1 e1 f1 g1 h1
  826|       |     *  a2 b2 c2 d2 e2 f2 g2 h2
  827|       |     *  a3 b3 c3 d3 e3 f3 g3 h3
  828|       |     */
  829|  33.9k|    temp17 = _mm_unpacklo_epi16(temp1, temp2);       // a0 a1 b0 b1 c0 c1 d0 d1
  830|  33.9k|    temp19 = _mm_unpacklo_epi16(temp3, temp4);       // a2 a3 b2 b3 c2 c3 d2 d3
  831|  33.9k|    temp18 = _mm_unpackhi_epi16(temp1, temp2);       // e0 e1 f0 f1 g0 g1 h0 h1
  832|  33.9k|    temp20 = _mm_unpackhi_epi16(temp3, temp4);       // e2 e3 f2 f3 g2 g3 h2 h3
  833|       |
  834|  33.9k|    resq_r0_1 = _mm_unpacklo_epi32(temp17, temp19);  // a0 a1 a2 a3 b0 b1 b2 b3
  835|  33.9k|    resq_r1_1 = _mm_unpackhi_epi32(temp17, temp19);  // c0 c1 c2 c3 d0 d1 d2 d3
  836|  33.9k|    resq_r2_1 = _mm_unpacklo_epi32(temp18, temp20);  // e0 e1 e2 e3 f0 f1 f2 f3
  837|  33.9k|    resq_r3_1 = _mm_unpackhi_epi32(temp18, temp20);  // g0 g2 g2 g3 h0 h1 h2 h3
  838|       |    /*
  839|       |     *  a4 b4 c4 d4 e4 f4 g4 h4
  840|       |     *  a5 b5 c5 d5 e5 f5 g5 h5
  841|       |     *  a6 b6 c6 d6 e6 f6 g6 h6
  842|       |     *  a7 b7 c7 d7 e7 f7 g7 h7
  843|       |     */
  844|  33.9k|    temp17 = _mm_unpacklo_epi16(temp5, temp6);       // a4 a5 b4 b5 c4 c5 d4 d5
  845|  33.9k|    temp19 = _mm_unpacklo_epi16(temp7, temp8);       // a6 a7 b6 b7 c6 c7 d6 d7
  846|  33.9k|    temp18 = _mm_unpackhi_epi16(temp5, temp6);       // e4 e5 f4 f5 g4 g5 h4 h5
  847|  33.9k|    temp20 = _mm_unpackhi_epi16(temp7, temp8);       // e6 e7 f6 f7 g6 g7 h6 h7
  848|       |
  849|  33.9k|    resq_r4_1 = _mm_unpacklo_epi32(temp17, temp19);  // a4 a5 a6 a7 b4 b5 b6 b7
  850|  33.9k|    resq_r5_1 = _mm_unpackhi_epi32(temp17, temp19);  // c4 c5 c6 c7 d4 d5 d6 d7
  851|  33.9k|    resq_r6_1 = _mm_unpacklo_epi32(temp18, temp20);  // e4 e5 e6 e7 f4 f5 f6 f7
  852|  33.9k|    resq_r7_1 = _mm_unpackhi_epi32(temp18, temp20);  // g4 g5 g6 g7 h4 h5 h6 h7
  853|       |    /*  a0 a1 a2 a3 b0 b1 b2 b3
  854|       |     *  c0 c1 c2 c3 d0 d1 d2 d3
  855|       |     *  e0 e1 e2 e3 f0 f1 f2 f3
  856|       |     *  g0 g2 g2 g3 h0 h1 h2 h3
  857|       |     *  a4 a5 a6 a7 b4 b5 b6 b7
  858|       |     *  c4 c5 c6 c7 d4 d5 d6 d7
  859|       |     *  e4 e5 e6 e7 f4 f5 f6 f7
  860|       |     *  g4 g5 g6 g7 h4 h5 h6 h7
  861|       |     */
  862|  33.9k|    resq_r0_2 = _mm_unpacklo_epi64(resq_r0_1, resq_r4_1);  // a0 a1 a2 a3 a4 a5 a6 a7
  863|  33.9k|    resq_r1_2 = _mm_unpackhi_epi64(resq_r0_1, resq_r4_1);  // b0 b1 b2 b3 b4 b5 b6 b7
  864|  33.9k|    resq_r2_2 = _mm_unpacklo_epi64(resq_r1_1, resq_r5_1);  // c0 c1 c2 c3 c4 c5 c6 c7
  865|  33.9k|    resq_r3_2 = _mm_unpackhi_epi64(resq_r1_1, resq_r5_1);  // d0 d1 d2 d3 d4 d5 d6 d7
  866|  33.9k|    resq_r4_2 = _mm_unpacklo_epi64(resq_r2_1, resq_r6_1);  // e0 e1 e2 e3 e4 e5 e6 e7
  867|  33.9k|    resq_r5_2 = _mm_unpackhi_epi64(resq_r2_1, resq_r6_1);  // f0 f1 f2 f3 f4 f5 f6 f7
  868|  33.9k|    resq_r6_2 = _mm_unpacklo_epi64(resq_r3_1, resq_r7_1);  // g0 g1 g2 g3 g4 g5 g6 g7
  869|  33.9k|    resq_r7_2 = _mm_unpackhi_epi64(resq_r3_1, resq_r7_1);  // h0 h1 h2 h3 h4 h5 h6 h7
  870|       |
  871|  33.9k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r1_2);
  872|  33.9k|    resq_r1_1 = _mm_unpacklo_epi16(resq_r1_2, sign_reg);  // a1 b1 c1 d1 -- 32 bit
  873|  33.9k|    resq_r1_2 = _mm_unpackhi_epi16(resq_r1_2, sign_reg);  // e1 f1 g1 h1 -- 32 bit
  874|  33.9k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r3_2);
  875|  33.9k|    resq_r3_1 = _mm_unpacklo_epi16(resq_r3_2, sign_reg);  // a3 b3 c3 d3 -- 32 bit
  876|  33.9k|    resq_r3_2 = _mm_unpackhi_epi16(resq_r3_2, sign_reg);  // e3 f3 g3 h3 -- 32 bit
  877|  33.9k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r5_2);
  878|  33.9k|    resq_r5_1 = _mm_unpacklo_epi16(resq_r5_2, sign_reg);  // a5 b5 c5 d5 -- 32 bit
  879|  33.9k|    resq_r5_2 = _mm_unpackhi_epi16(resq_r5_2, sign_reg);  // e5 f5 g5 h5 -- 32 bit
  880|  33.9k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r7_2);
  881|  33.9k|    resq_r7_1 = _mm_unpacklo_epi16(resq_r7_2, sign_reg);  // a7 b7 c7 d7 -- 32 bit
  882|  33.9k|    resq_r7_2 = _mm_unpackhi_epi16(resq_r7_2, sign_reg);  // e7 f7 g7 h7 -- 32 bit
  883|       |
  884|  33.9k|    zero_8x16b = _mm_setzero_si128();                     // all bits reset to zero
  885|       |    // Load pred buffer row 0
  886|       |    // p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
  887|  33.9k|    predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[0]));
  888|       |    // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
  889|  33.9k|    pred_r0_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b);
  890|       |    // Load pred buffer row 1
  891|       |    // p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
  892|  33.9k|    predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd]));
  893|       |    // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
  894|  33.9k|    pred_r1_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b);
  895|       |    // Load pred buffer row 2
  896|       |    // p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
  897|  33.9k|    predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * pred_strd]));
  898|       |    // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
  899|  33.9k|    pred_r2_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b);
  900|       |    // Load pred buffer row 3
  901|       |    // p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
  902|  33.9k|    predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * pred_strd]));
  903|       |    // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
  904|  33.9k|    pred_r3_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b);
  905|       |    // Load pred buffer row 4
  906|       |    // p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
  907|  33.9k|    predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[4 * pred_strd]));
  908|       |    // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
  909|  33.9k|    pred_r4_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b);
  910|       |    // Load pred buffer row 5
  911|       |    // p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bit
  912|  33.9k|    predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[5 * pred_strd]));
  913|       |    // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
  914|  33.9k|    pred_r5_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b);
  915|       |    // Load pred buffer row 6
  916|       |    // p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
  917|  33.9k|    predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[6 * pred_strd]));
  918|       |    // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
  919|  33.9k|    pred_r6_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b);
  920|       |    // Load pred buffer row 7
  921|       |    // p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
  922|  33.9k|    predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[7 * pred_strd]));
  923|       |    // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
  924|  33.9k|    pred_r7_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b);
  925|       |
  926|  33.9k|    rsd_r0 = _mm_loadu_si128((__m128i *) (&pi2_rsd[0]));
  927|  33.9k|    rsd_r1 = _mm_loadu_si128((__m128i *) (&pi2_rsd[1 * rsd_strd]));
  928|  33.9k|    rsd_r2 = _mm_loadu_si128((__m128i *) (&pi2_rsd[2 * rsd_strd]));
  929|  33.9k|    rsd_r3 = _mm_loadu_si128((__m128i *) (&pi2_rsd[3 * rsd_strd]));
  930|  33.9k|    rsd_r4 = _mm_loadu_si128((__m128i *) (&pi2_rsd[4 * rsd_strd]));
  931|  33.9k|    rsd_r5 = _mm_loadu_si128((__m128i *) (&pi2_rsd[5 * rsd_strd]));
  932|  33.9k|    rsd_r6 = _mm_loadu_si128((__m128i *) (&pi2_rsd[6 * rsd_strd]));
  933|  33.9k|    rsd_r7 = _mm_loadu_si128((__m128i *) (&pi2_rsd[7 * rsd_strd]));
  934|       |
  935|       |    /*--------------------------------------------------------------------*/
  936|       |    /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6            */
  937|       |    /*                                                                    */
  938|       |    /* Add the prediction and store it back to reconstructed frame buffer */
  939|       |    /* [Prediction buffer itself in this case]                            */
  940|       |    /*--------------------------------------------------------------------*/
  941|       |
  942|       |    /* y0j = w0j + w4j                                                     */
  943|  33.9k|    temp1 = _mm_add_epi16(resq_r0_2, resq_r4_2);
  944|       |    /* y2j = w0j - w4j                                                      */
  945|  33.9k|    temp3 = _mm_sub_epi16(resq_r0_2, resq_r4_2);
  946|       |    /* y1j = -w3j + w5j - w7j - (w7j >> 1)                                   */
  947|  33.9k|    temp2 = _mm_sub_epi32(resq_r5_1, resq_r3_1);  //-w3+w5
  948|  33.9k|    temp10 = _mm_sub_epi32(resq_r5_2, resq_r3_2);
  949|  33.9k|    temp4 = _mm_sub_epi32(temp2, resq_r7_1);      //-w3+w5-w7
  950|  33.9k|    temp12 = _mm_sub_epi32(temp10, resq_r7_2);
  951|  33.9k|    temp5 = _mm_srai_epi32(resq_r7_1, 1);         // w7>>1
  952|  33.9k|    temp13 = _mm_srai_epi32(resq_r7_2, 1);
  953|  33.9k|    temp2 = _mm_sub_epi32(temp4, temp5);          //-w3+w5-w7 -(w7>>1)
  954|  33.9k|    temp10 = _mm_sub_epi32(temp12, temp13);
  955|  33.9k|    temp2 = _mm_packs_epi32(temp2, temp10);
  956|       |    /* y3j = w1j + w7j - w3j - (w3j >> 1)                                    */
  957|  33.9k|    temp4 = _mm_add_epi32(resq_r1_1, resq_r7_1);  // w1+w7
  958|  33.9k|    temp12 = _mm_add_epi32(resq_r1_2, resq_r7_2);
  959|  33.9k|    temp4 = _mm_sub_epi32(temp4, resq_r3_1);      // w1+w7-w3
  960|  33.9k|    temp12 = _mm_sub_epi32(temp12, resq_r3_2);
  961|  33.9k|    temp5 = _mm_srai_epi32(resq_r3_1, 1);         // w3>>1
  962|  33.9k|    temp13 = _mm_srai_epi32(resq_r3_2, 1);
  963|  33.9k|    temp4 = _mm_sub_epi32(temp4, temp5);          // w1+w7-w3-(w3>>1)
  964|  33.9k|    temp12 = _mm_sub_epi32(temp12, temp13);
  965|  33.9k|    temp4 = _mm_packs_epi32(temp4, temp12);
  966|       |    /* y4j = (w2j >> 1) - w6j                                              */
  967|  33.9k|    temp5 = _mm_srai_epi16(resq_r2_2, 1);     // w2>>1
  968|  33.9k|    temp5 = _mm_sub_epi16(temp5, resq_r6_2);  //(w2>>1)-w6
  969|       |    /* y5j = -w1j + w7j + w5j + (w5j >> 1)                                   */
  970|  33.9k|    temp6 = _mm_sub_epi32(resq_r7_1, resq_r1_1);  // w7-w1
  971|  33.9k|    temp14 = _mm_sub_epi32(resq_r7_2, resq_r1_2);
  972|  33.9k|    temp6 = _mm_add_epi32(temp6, resq_r5_1);      // w7-w1+w5
  973|  33.9k|    temp14 = _mm_add_epi32(temp14, resq_r5_2);
  974|  33.9k|    temp7 = _mm_srai_epi32(resq_r5_1, 1);         // w5>>1
  975|  33.9k|    temp15 = _mm_srai_epi32(resq_r5_2, 1);
  976|  33.9k|    temp6 = _mm_add_epi32(temp6, temp7);          // w7-w1_w5+(w5>>1)
  977|  33.9k|    temp14 = _mm_add_epi32(temp14, temp15);
  978|  33.9k|    temp6 = _mm_packs_epi32(temp6, temp14);
  979|       |    /* y6j = w2j + (w6j >> 1)                                              */
  980|  33.9k|    temp7 = _mm_srai_epi16(resq_r6_2, 1);     // w6>>1
  981|  33.9k|    temp7 = _mm_add_epi16(temp7, resq_r2_2);  //(w6>>1)+w2
  982|       |    /* y7j = w3j + w5j + w1j + (w1j >> 1)                                    */
  983|  33.9k|    temp8 = _mm_add_epi32(resq_r3_1, resq_r5_1);  // w3+w5
  984|  33.9k|    temp16 = _mm_add_epi32(resq_r3_2, resq_r5_2);
  985|  33.9k|    temp8 = _mm_add_epi32(temp8, resq_r1_1);      // w3+w5+w1
  986|  33.9k|    temp16 = _mm_add_epi32(temp16, resq_r1_2);
  987|  33.9k|    temp17 = _mm_srai_epi32(resq_r1_1, 1);        // w1>>1
  988|  33.9k|    temp18 = _mm_srai_epi32(resq_r1_2, 1);
  989|  33.9k|    temp8 = _mm_add_epi32(temp8, temp17);         // w3+w5+w1+(w1>>1)
  990|  33.9k|    temp16 = _mm_add_epi32(temp16, temp18);
  991|  33.9k|    temp8 = _mm_packs_epi32(temp8, temp16);
  992|       |    /*------------------------------------------------------------------*/
  993|       |    /*------------------------------------------------------------------*/
  994|       |    /* z0j = y0j + y6j                                                        */
  995|  33.9k|    resq_r0_1 = _mm_add_epi16(temp1, temp7);
  996|       |    /* z1j = y1j + (y7j >> 2)                                                */
  997|  33.9k|    resq_r1_1 = _mm_srai_epi16(temp8, 2);
  998|  33.9k|    resq_r1_1 = _mm_add_epi16(resq_r1_1, temp2);
  999|       |    /* z2j = y2j + y4j                                                        */
 1000|  33.9k|    resq_r2_1 = _mm_add_epi16(temp3, temp5);
 1001|       |    /* z3j = y3j + (y5j >> 2)                                                */
 1002|  33.9k|    resq_r3_1 = _mm_srai_epi16(temp6, 2);
 1003|  33.9k|    resq_r3_1 = _mm_add_epi16(resq_r3_1, temp4);
 1004|       |    /* z4j = y2j - y4j                                                        */
 1005|  33.9k|    resq_r4_1 = _mm_sub_epi16(temp3, temp5);
 1006|       |    /* z5j = (y3j >> 2) - y5j                                                 */
 1007|  33.9k|    resq_r5_1 = _mm_srai_epi16(temp4, 2);
 1008|  33.9k|    resq_r5_1 = _mm_sub_epi16(resq_r5_1, temp6);
 1009|       |    /* z6j = y0j - y6j                                                     */
 1010|  33.9k|    resq_r6_1 = _mm_sub_epi16(temp1, temp7);
 1011|       |    /* z7j = y7j - (y1j >> 2)                                                 */
 1012|  33.9k|    resq_r7_1 = _mm_srai_epi16(temp2, 2);
 1013|  33.9k|    resq_r7_1 = _mm_sub_epi16(temp8, resq_r7_1);
 1014|       |    /*------------------------------------------------------------------*/
 1015|       |
 1016|       |    /*------------------------------------------------------------------*/
 1017|       |    /* x0j = z0j + z7j                                                        */
 1018|  33.9k|    temp1 = _mm_add_epi16(resq_r0_1, resq_r7_1);
 1019|  33.9k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp1);
 1020|  33.9k|    temp10 = _mm_unpacklo_epi16(temp1, sign_reg);
 1021|  33.9k|    temp11 = _mm_unpackhi_epi16(temp1, sign_reg);
 1022|  33.9k|    temp10 = _mm_add_epi32(temp10, value_32);
 1023|  33.9k|    temp11 = _mm_add_epi32(temp11, value_32);
 1024|  33.9k|    temp10 = _mm_srai_epi32(temp10, 6);
 1025|  33.9k|    temp11 = _mm_srai_epi32(temp11, 6);
 1026|  33.9k|    temp10 = _mm_packs_epi32(temp10, temp11);
 1027|  33.9k|    rsd_r0 = _mm_add_epi16(temp10, rsd_r0);
 1028|  33.9k|    rsd_r0 = _mm_min_epi16(dupmax_8x16b, rsd_r0);
 1029|  33.9k|    rsd_r0 = _mm_max_epi16(dupmin_8x16b, rsd_r0);
 1030|  33.9k|    temp1 = _mm_add_epi16(rsd_r0, pred_r0_1);
 1031|       |    /* x1j = z2j + z5j                                                        */
 1032|  33.9k|    temp2 = _mm_add_epi16(resq_r2_1, resq_r5_1);
 1033|  33.9k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp2);
 1034|  33.9k|    temp10 = _mm_unpacklo_epi16(temp2, sign_reg);
 1035|  33.9k|    temp11 = _mm_unpackhi_epi16(temp2, sign_reg);
 1036|  33.9k|    temp10 = _mm_add_epi32(temp10, value_32);
 1037|  33.9k|    temp11 = _mm_add_epi32(temp11, value_32);
 1038|  33.9k|    temp10 = _mm_srai_epi32(temp10, 6);
 1039|  33.9k|    temp11 = _mm_srai_epi32(temp11, 6);
 1040|  33.9k|    temp10 = _mm_packs_epi32(temp10, temp11);
 1041|  33.9k|    rsd_r1 = _mm_add_epi16(temp10, rsd_r1);
 1042|  33.9k|    rsd_r1 = _mm_min_epi16(dupmax_8x16b, rsd_r1);
 1043|  33.9k|    rsd_r1 = _mm_max_epi16(dupmin_8x16b, rsd_r1);
 1044|  33.9k|    temp2 = _mm_add_epi16(rsd_r1, pred_r1_1);
 1045|       |    /* x2j = z4j + z3j                                                        */
 1046|  33.9k|    temp3 = _mm_add_epi16(resq_r4_1, resq_r3_1);
 1047|  33.9k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp3);
 1048|  33.9k|    temp10 = _mm_unpacklo_epi16(temp3, sign_reg);
 1049|  33.9k|    temp11 = _mm_unpackhi_epi16(temp3, sign_reg);
 1050|  33.9k|    temp10 = _mm_add_epi32(temp10, value_32);
 1051|  33.9k|    temp11 = _mm_add_epi32(temp11, value_32);
 1052|  33.9k|    temp10 = _mm_srai_epi32(temp10, 6);
 1053|  33.9k|    temp11 = _mm_srai_epi32(temp11, 6);
 1054|  33.9k|    temp10 = _mm_packs_epi32(temp10, temp11);
 1055|  33.9k|    rsd_r2 = _mm_add_epi16(temp10, rsd_r2);
 1056|  33.9k|    rsd_r2 = _mm_min_epi16(dupmax_8x16b, rsd_r2);
 1057|  33.9k|    rsd_r2 = _mm_max_epi16(dupmin_8x16b, rsd_r2);
 1058|  33.9k|    temp3 = _mm_add_epi16(rsd_r2, pred_r2_1);
 1059|       |    /* x3j = z6j + z1j                                                        */
 1060|  33.9k|    temp4 = _mm_add_epi16(resq_r6_1, resq_r1_1);
 1061|  33.9k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp4);
 1062|  33.9k|    temp10 = _mm_unpacklo_epi16(temp4, sign_reg);
 1063|  33.9k|    temp11 = _mm_unpackhi_epi16(temp4, sign_reg);
 1064|  33.9k|    temp10 = _mm_add_epi32(temp10, value_32);
 1065|  33.9k|    temp11 = _mm_add_epi32(temp11, value_32);
 1066|  33.9k|    temp10 = _mm_srai_epi32(temp10, 6);
 1067|  33.9k|    temp11 = _mm_srai_epi32(temp11, 6);
 1068|  33.9k|    temp10 = _mm_packs_epi32(temp10, temp11);
 1069|  33.9k|    rsd_r3 = _mm_add_epi16(temp10, rsd_r3);
 1070|  33.9k|    rsd_r3 = _mm_min_epi16(dupmax_8x16b, rsd_r3);
 1071|  33.9k|    rsd_r3 = _mm_max_epi16(dupmin_8x16b, rsd_r3);
 1072|  33.9k|    temp4 = _mm_add_epi16(rsd_r3, pred_r3_1);
 1073|       |    /* x4j = z6j - z1j                                                        */
 1074|  33.9k|    temp5 = _mm_sub_epi16(resq_r6_1, resq_r1_1);
 1075|  33.9k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp5);
 1076|  33.9k|    temp10 = _mm_unpacklo_epi16(temp5, sign_reg);
 1077|  33.9k|    temp11 = _mm_unpackhi_epi16(temp5, sign_reg);
 1078|  33.9k|    temp10 = _mm_add_epi32(temp10, value_32);
 1079|  33.9k|    temp11 = _mm_add_epi32(temp11, value_32);
 1080|  33.9k|    temp10 = _mm_srai_epi32(temp10, 6);
 1081|  33.9k|    temp11 = _mm_srai_epi32(temp11, 6);
 1082|  33.9k|    temp10 = _mm_packs_epi32(temp10, temp11);
 1083|  33.9k|    rsd_r4 = _mm_add_epi16(temp10, rsd_r4);
 1084|  33.9k|    rsd_r4 = _mm_min_epi16(dupmax_8x16b, rsd_r4);
 1085|  33.9k|    rsd_r4 = _mm_max_epi16(dupmin_8x16b, rsd_r4);
 1086|  33.9k|    temp5 = _mm_add_epi16(rsd_r4, pred_r4_1);
 1087|       |    /* x5j = z4j - z3j                                                        */
 1088|  33.9k|    temp6 = _mm_sub_epi16(resq_r4_1, resq_r3_1);
 1089|  33.9k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp6);
 1090|  33.9k|    temp10 = _mm_unpacklo_epi16(temp6, sign_reg);
 1091|  33.9k|    temp11 = _mm_unpackhi_epi16(temp6, sign_reg);
 1092|  33.9k|    temp10 = _mm_add_epi32(temp10, value_32);
 1093|  33.9k|    temp11 = _mm_add_epi32(temp11, value_32);
 1094|  33.9k|    temp10 = _mm_srai_epi32(temp10, 6);
 1095|  33.9k|    temp11 = _mm_srai_epi32(temp11, 6);
 1096|  33.9k|    temp10 = _mm_packs_epi32(temp10, temp11);
 1097|  33.9k|    rsd_r5 = _mm_add_epi16(temp10, rsd_r5);
 1098|  33.9k|    rsd_r5 = _mm_min_epi16(dupmax_8x16b, rsd_r5);
 1099|  33.9k|    rsd_r5 = _mm_max_epi16(dupmin_8x16b, rsd_r5);
 1100|  33.9k|    temp6 = _mm_add_epi16(rsd_r5, pred_r5_1);
 1101|       |    /* x6j = z2j - z5j                                                        */
 1102|  33.9k|    temp7 = _mm_sub_epi16(resq_r2_1, resq_r5_1);
 1103|  33.9k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp7);
 1104|  33.9k|    temp10 = _mm_unpacklo_epi16(temp7, sign_reg);
 1105|  33.9k|    temp11 = _mm_unpackhi_epi16(temp7, sign_reg);
 1106|  33.9k|    temp10 = _mm_add_epi32(temp10, value_32);
 1107|  33.9k|    temp11 = _mm_add_epi32(temp11, value_32);
 1108|  33.9k|    temp10 = _mm_srai_epi32(temp10, 6);
 1109|  33.9k|    temp11 = _mm_srai_epi32(temp11, 6);
 1110|  33.9k|    temp10 = _mm_packs_epi32(temp10, temp11);
 1111|  33.9k|    rsd_r6 = _mm_add_epi16(temp10, rsd_r6);
 1112|  33.9k|    rsd_r6 = _mm_min_epi16(dupmax_8x16b, rsd_r6);
 1113|  33.9k|    rsd_r6 = _mm_max_epi16(dupmin_8x16b, rsd_r6);
 1114|  33.9k|    temp7 = _mm_add_epi16(rsd_r6, pred_r6_1);
 1115|       |    /* x7j = z0j - z7j                                                        */
 1116|  33.9k|    temp8 = _mm_sub_epi16(resq_r0_1, resq_r7_1);
 1117|  33.9k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp8);
 1118|  33.9k|    temp10 = _mm_unpacklo_epi16(temp8, sign_reg);
 1119|  33.9k|    temp11 = _mm_unpackhi_epi16(temp8, sign_reg);
 1120|  33.9k|    temp10 = _mm_add_epi32(temp10, value_32);
 1121|  33.9k|    temp11 = _mm_add_epi32(temp11, value_32);
 1122|  33.9k|    temp10 = _mm_srai_epi32(temp10, 6);
 1123|  33.9k|    temp11 = _mm_srai_epi32(temp11, 6);
 1124|  33.9k|    temp10 = _mm_packs_epi32(temp10, temp11);
 1125|  33.9k|    rsd_r7 = _mm_add_epi16(temp10, rsd_r7);
 1126|  33.9k|    rsd_r7 = _mm_min_epi16(dupmax_8x16b, rsd_r7);
 1127|  33.9k|    rsd_r7 = _mm_max_epi16(dupmin_8x16b, rsd_r7);
 1128|  33.9k|    temp8 = _mm_add_epi16(rsd_r7, pred_r7_1);
 1129|       |
 1130|  33.9k|    rsd_r01_b0 = _mm_unpacklo_epi64(rsd_r0, rsd_r1);
 1131|  33.9k|    rsd_r23_b0 = _mm_unpacklo_epi64(rsd_r2, rsd_r3);
 1132|  33.9k|    rsd_r45_b2 = _mm_unpacklo_epi64(rsd_r4, rsd_r5);
 1133|  33.9k|    rsd_r67_b2 = _mm_unpacklo_epi64(rsd_r6, rsd_r7);
 1134|       |
 1135|  33.9k|    rsd_r01_b1 = _mm_unpackhi_epi64(rsd_r0, rsd_r1);
 1136|  33.9k|    rsd_r23_b1 = _mm_unpackhi_epi64(rsd_r2, rsd_r3);
 1137|  33.9k|    rsd_r45_b3 = _mm_unpackhi_epi64(rsd_r4, rsd_r5);
 1138|  33.9k|    rsd_r67_b3 = _mm_unpackhi_epi64(rsd_r6, rsd_r7);
 1139|       |
 1140|  33.9k|    row_01_b0 = _mm_test_all_ones(
 1141|  33.9k|        _mm_cmpeq_epi16(rsd_r01_b0, zero_8x16b));  // return 1 if all zeros, else 0
 1142|  33.9k|    row_23_b0 = _mm_test_all_ones(
 1143|  33.9k|        _mm_cmpeq_epi16(rsd_r23_b0, zero_8x16b));  // return 1 if all zeros, else 0
 1144|  33.9k|    row_45_b2 = _mm_test_all_ones(
 1145|  33.9k|        _mm_cmpeq_epi16(rsd_r45_b2, zero_8x16b));  // return 1 if all zeros, else 0
 1146|  33.9k|    row_67_b2 = _mm_test_all_ones(
 1147|  33.9k|        _mm_cmpeq_epi16(rsd_r67_b2, zero_8x16b));  // return 1 if all zeros, else 0
 1148|       |
 1149|  33.9k|    row_01_b1 = _mm_test_all_ones(
 1150|  33.9k|        _mm_cmpeq_epi16(rsd_r01_b1, zero_8x16b));  // return 1 if all zeros, else 0
 1151|  33.9k|    row_23_b1 = _mm_test_all_ones(
 1152|  33.9k|        _mm_cmpeq_epi16(rsd_r23_b1, zero_8x16b));  // return 1 if all zeros, else 0
 1153|  33.9k|    row_45_b3 = _mm_test_all_ones(
 1154|  33.9k|        _mm_cmpeq_epi16(rsd_r45_b3, zero_8x16b));  // return 1 if all zeros, else 0
 1155|  33.9k|    row_67_b3 = _mm_test_all_ones(
 1156|  33.9k|        _mm_cmpeq_epi16(rsd_r67_b3, zero_8x16b));  // return 1 if all zeros, else 0
 1157|       |
 1158|       |    /*------------------------------------------------------------------*/
 1159|       |    // Clipping the results to 8 bits
 1160|  33.9k|    sign_reg = _mm_cmpgt_epi16(temp1, zero_8x16b);  // sign check
 1161|  33.9k|    temp1 = _mm_and_si128(temp1, sign_reg);
 1162|  33.9k|    sign_reg = _mm_cmpgt_epi16(temp2, zero_8x16b);  // sign check
 1163|  33.9k|    temp2 = _mm_and_si128(temp2, sign_reg);
 1164|  33.9k|    sign_reg = _mm_cmpgt_epi16(temp3, zero_8x16b);  // sign check
 1165|  33.9k|    temp3 = _mm_and_si128(temp3, sign_reg);
 1166|  33.9k|    sign_reg = _mm_cmpgt_epi16(temp4, zero_8x16b);  // sign check
 1167|  33.9k|    temp4 = _mm_and_si128(temp4, sign_reg);
 1168|  33.9k|    sign_reg = _mm_cmpgt_epi16(temp5, zero_8x16b);  // sign check
 1169|  33.9k|    temp5 = _mm_and_si128(temp5, sign_reg);
 1170|  33.9k|    sign_reg = _mm_cmpgt_epi16(temp6, zero_8x16b);  // sign check
 1171|  33.9k|    temp6 = _mm_and_si128(temp6, sign_reg);
 1172|  33.9k|    sign_reg = _mm_cmpgt_epi16(temp7, zero_8x16b);  // sign check
 1173|  33.9k|    temp7 = _mm_and_si128(temp7, sign_reg);
 1174|  33.9k|    sign_reg = _mm_cmpgt_epi16(temp8, zero_8x16b);  // sign check
 1175|  33.9k|    temp8 = _mm_and_si128(temp8, sign_reg);
 1176|       |
 1177|  33.9k|    resq_r0_2 = _mm_packus_epi16(temp1, zero_8x16b);
 1178|  33.9k|    resq_r1_2 = _mm_packus_epi16(temp2, zero_8x16b);
 1179|  33.9k|    resq_r2_2 = _mm_packus_epi16(temp3, zero_8x16b);
 1180|  33.9k|    resq_r3_2 = _mm_packus_epi16(temp4, zero_8x16b);
 1181|  33.9k|    resq_r4_2 = _mm_packus_epi16(temp5, zero_8x16b);
 1182|  33.9k|    resq_r5_2 = _mm_packus_epi16(temp6, zero_8x16b);
 1183|  33.9k|    resq_r6_2 = _mm_packus_epi16(temp7, zero_8x16b);
 1184|  33.9k|    resq_r7_2 = _mm_packus_epi16(temp8, zero_8x16b);
 1185|       |
 1186|  33.9k|    _mm_storel_epi64((__m128i *) (&pu1_out[0]), resq_r0_2);
 1187|  33.9k|    _mm_storel_epi64((__m128i *) (&pu1_out[out_strd]), resq_r1_2);
 1188|  33.9k|    _mm_storel_epi64((__m128i *) (&pu1_out[2 * out_strd]), resq_r2_2);
 1189|  33.9k|    _mm_storel_epi64((__m128i *) (&pu1_out[3 * out_strd]), resq_r3_2);
 1190|  33.9k|    _mm_storel_epi64((__m128i *) (&pu1_out[4 * out_strd]), resq_r4_2);
 1191|  33.9k|    _mm_storel_epi64((__m128i *) (&pu1_out[5 * out_strd]), resq_r5_2);
 1192|  33.9k|    _mm_storel_epi64((__m128i *) (&pu1_out[6 * out_strd]), resq_r6_2);
 1193|  33.9k|    _mm_storel_epi64((__m128i *) (&pu1_out[7 * out_strd]), resq_r7_2);
 1194|       |
 1195|  33.9k|    i4_nnz_b0 = (!(row_01_b0 && row_23_b0));
  ------------------
  |  Branch (1195:20): [True: 2.74k, False: 31.1k]
  |  Branch (1195:33): [True: 1.22k, False: 1.52k]
  ------------------
 1196|  33.9k|    i4_nnz_b1 = (!(row_01_b1 && row_23_b1)) << 1;
  ------------------
  |  Branch (1196:20): [True: 2.88k, False: 31.0k]
  |  Branch (1196:33): [True: 1.54k, False: 1.34k]
  ------------------
 1197|  33.9k|    i4_nnz_b2 = (!(row_45_b2 && row_67_b2)) << 4;
  ------------------
  |  Branch (1197:20): [True: 1.48k, False: 32.4k]
  |  Branch (1197:33): [True: 1.42k, False: 56]
  ------------------
 1198|  33.9k|    i4_nnz_b3 = (!(row_45_b3 && row_67_b3)) << 5;
  ------------------
  |  Branch (1198:20): [True: 1.25k, False: 32.6k]
  |  Branch (1198:33): [True: 1.20k, False: 49]
  ------------------
 1199|       |
 1200|  33.9k|    i4_nnz = (i4_nnz_b0 | i4_nnz_b1 | i4_nnz_b2 | i4_nnz_b3);
 1201|  33.9k|    return i4_nnz;
 1202|  33.9k|}
isvcd_iquant_itrans_residual_recon_4x4_dc_sse42:
 1231|  1.79k|{
 1232|  1.79k|    __m128i pred_16x8b_0, pred_8x16b_0, rsd_8x16b_0, out_8x16b_0, out_16x8b_0;
 1233|  1.79k|    __m128i pred_16x8b_1, pred_8x16b_1, rsd_8x16b_1, out_8x16b_1, out_16x8b_1;
 1234|  1.79k|    __m128i pred_16x8b_2, pred_8x16b_2, rsd_8x16b_2, out_8x16b_2, out_16x8b_2;
 1235|  1.79k|    __m128i pred_16x8b_3, pred_8x16b_3, rsd_8x16b_3, out_8x16b_3, out_16x8b_3;
 1236|  1.79k|    __m128i rsd_8x16b_01, rsd_8x16b_23;
 1237|       |
 1238|  1.79k|    __m128i i_macro_8x16b, dupmax_8x16b, dupmin_8x16b;
 1239|  1.79k|    __m128i zero_8x16b = _mm_setzero_si128();
 1240|  1.79k|    WORD32 i4_nnz, row_01, row_23;
 1241|  1.79k|    WORD32 q0;
 1242|  1.79k|    WORD16 i_macro;
 1243|  1.79k|    WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
  ------------------
  |  Branch (1243:23): [True: 368, False: 1.42k]
  ------------------
 1244|  1.79k|    UNUSED(pi2_tmp);
  ------------------
  |  |   45|  1.79k|#define UNUSED(x) ((void)(x))
  ------------------
 1245|       |
 1246|  1.79k|    if(iq_start_idx == 0)
  ------------------
  |  Branch (1246:8): [True: 1.79k, False: 0]
  ------------------
 1247|  1.79k|    {
 1248|  1.79k|        q0 = pi2_src[0];
 1249|  1.79k|        INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|  1.79k|                {\
  |  |  104|  1.79k|                    i4_value *= quant_scale;\
  |  |  105|  1.79k|                    i4_value *= weight_scale;\
  |  |  106|  1.79k|                    i4_value += rndfactor;\
  |  |  107|  1.79k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|  1.79k|                    i4_value >>= qbits;\
  |  |  109|  1.79k|                }
  ------------------
 1250|  1.79k|    }
 1251|      0|    else
 1252|      0|    {
 1253|      0|        q0 = pi2_dc_ld_addr[0];  // Restoring dc value for intra case3
 1254|      0|    }
 1255|  1.79k|    i_macro = ((q0 + 32) >> 6);
 1256|       |
 1257|  1.79k|    i_macro_8x16b = _mm_set1_epi16(i_macro);
 1258|  1.79k|    dupmax_8x16b = _mm_set1_epi16(RSD_MAX);
  ------------------
  |  |  772|  1.79k|#define RSD_MAX 255
  ------------------
 1259|  1.79k|    dupmin_8x16b = _mm_set1_epi16(RSD_MIN);
  ------------------
  |  |  773|  1.79k|#define RSD_MIN -255
  ------------------
 1260|       |
 1261|  1.79k|    pred_16x8b_0 = _mm_loadu_si128((__m128i *) (pu1_pred));
 1262|  1.79k|    pred_16x8b_1 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd));
 1263|  1.79k|    pred_16x8b_2 = _mm_loadu_si128((__m128i *) (pu1_pred + (pred_strd << 1)));
 1264|  1.79k|    pred_16x8b_3 = _mm_loadu_si128((__m128i *) (pu1_pred + (pred_strd << 1) + pred_strd));
 1265|       |
 1266|  1.79k|    pred_8x16b_0 = _mm_cvtepu8_epi16(pred_16x8b_0);
 1267|  1.79k|    pred_8x16b_1 = _mm_cvtepu8_epi16(pred_16x8b_1);
 1268|  1.79k|    pred_8x16b_2 = _mm_cvtepu8_epi16(pred_16x8b_2);
 1269|  1.79k|    pred_8x16b_3 = _mm_cvtepu8_epi16(pred_16x8b_3);
 1270|       |
 1271|  1.79k|    rsd_8x16b_0 = _mm_loadu_si128((__m128i *) (pi2_rsd));
 1272|  1.79k|    rsd_8x16b_1 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd));
 1273|  1.79k|    rsd_8x16b_2 = _mm_loadu_si128((__m128i *) (pi2_rsd + (rsd_strd << 1)));
 1274|  1.79k|    rsd_8x16b_3 = _mm_loadu_si128((__m128i *) (pi2_rsd + (rsd_strd << 1) + rsd_strd));
 1275|       |
 1276|  1.79k|    rsd_8x16b_0 = _mm_add_epi16(i_macro_8x16b, rsd_8x16b_0);
 1277|  1.79k|    rsd_8x16b_1 = _mm_add_epi16(i_macro_8x16b, rsd_8x16b_1);
 1278|  1.79k|    rsd_8x16b_2 = _mm_add_epi16(i_macro_8x16b, rsd_8x16b_2);
 1279|  1.79k|    rsd_8x16b_3 = _mm_add_epi16(i_macro_8x16b, rsd_8x16b_3);
 1280|       |
 1281|  1.79k|    rsd_8x16b_0 = _mm_min_epi16(dupmax_8x16b, rsd_8x16b_0);
 1282|  1.79k|    rsd_8x16b_0 = _mm_max_epi16(dupmin_8x16b, rsd_8x16b_0);
 1283|  1.79k|    rsd_8x16b_1 = _mm_min_epi16(dupmax_8x16b, rsd_8x16b_1);
 1284|  1.79k|    rsd_8x16b_1 = _mm_max_epi16(dupmin_8x16b, rsd_8x16b_1);
 1285|  1.79k|    rsd_8x16b_2 = _mm_min_epi16(dupmax_8x16b, rsd_8x16b_2);
 1286|  1.79k|    rsd_8x16b_2 = _mm_max_epi16(dupmin_8x16b, rsd_8x16b_2);
 1287|  1.79k|    rsd_8x16b_3 = _mm_min_epi16(dupmax_8x16b, rsd_8x16b_3);
 1288|  1.79k|    rsd_8x16b_3 = _mm_max_epi16(dupmin_8x16b, rsd_8x16b_3);
 1289|       |
 1290|  1.79k|    rsd_8x16b_01 = _mm_unpacklo_epi64(rsd_8x16b_0, rsd_8x16b_1);
 1291|  1.79k|    rsd_8x16b_23 = _mm_unpacklo_epi64(rsd_8x16b_2, rsd_8x16b_3);
 1292|       |
 1293|  1.79k|    row_01 = _mm_test_all_ones(
 1294|  1.79k|        _mm_cmpeq_epi16(rsd_8x16b_01, zero_8x16b));  // return 1 if all zeros, else 0
 1295|  1.79k|    row_23 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_23, zero_8x16b));
 1296|       |
 1297|  1.79k|    out_8x16b_0 = _mm_add_epi16(pred_8x16b_0, rsd_8x16b_0);
 1298|  1.79k|    out_8x16b_1 = _mm_add_epi16(pred_8x16b_1, rsd_8x16b_1);
 1299|  1.79k|    out_8x16b_2 = _mm_add_epi16(pred_8x16b_2, rsd_8x16b_2);
 1300|  1.79k|    out_8x16b_3 = _mm_add_epi16(pred_8x16b_3, rsd_8x16b_3);
 1301|       |
 1302|  1.79k|    out_16x8b_0 = _mm_packus_epi16(out_8x16b_0, zero_8x16b);
 1303|  1.79k|    out_16x8b_1 = _mm_packus_epi16(out_8x16b_1, zero_8x16b);
 1304|  1.79k|    out_16x8b_2 = _mm_packus_epi16(out_8x16b_2, zero_8x16b);
 1305|  1.79k|    out_16x8b_3 = _mm_packus_epi16(out_8x16b_3, zero_8x16b);
 1306|       |
 1307|  1.79k|    *((WORD32 *) (pu1_out)) = _mm_cvtsi128_si32(out_16x8b_0);
 1308|  1.79k|    *((WORD32 *) (pu1_out + out_strd)) = _mm_cvtsi128_si32(out_16x8b_1);
 1309|  1.79k|    *((WORD32 *) (pu1_out + (out_strd << 1))) = _mm_cvtsi128_si32(out_16x8b_2);
 1310|  1.79k|    *((WORD32 *) (pu1_out + (out_strd * 3))) = _mm_cvtsi128_si32(out_16x8b_3);
 1311|       |
 1312|  1.79k|    i4_nnz = !(row_01 && row_23);
  ------------------
  |  Branch (1312:16): [True: 152, False: 1.64k]
  |  Branch (1312:26): [True: 152, False: 0]
  ------------------
 1313|  1.79k|    return i4_nnz;
 1314|  1.79k|}
isvcd_iquant_itrans_residual_recon_8x8_dc_sse42:
 1343|  2.02k|{
 1344|  2.02k|    __m128i pred_16x8b_0, pred_8x16b_0, rsd_8x16b_0, out_8x16b_0, out_16x8b_0;
 1345|  2.02k|    __m128i pred_16x8b_1, pred_8x16b_1, rsd_8x16b_1, out_8x16b_1, out_16x8b_1;
 1346|  2.02k|    __m128i pred_16x8b_2, pred_8x16b_2, rsd_8x16b_2, out_8x16b_2, out_16x8b_2;
 1347|  2.02k|    __m128i pred_16x8b_3, pred_8x16b_3, rsd_8x16b_3, out_8x16b_3, out_16x8b_3;
 1348|  2.02k|    __m128i pred_16x8b_4, pred_8x16b_4, rsd_8x16b_4, out_8x16b_4, out_16x8b_4;
 1349|  2.02k|    __m128i pred_16x8b_5, pred_8x16b_5, rsd_8x16b_5, out_8x16b_5, out_16x8b_5;
 1350|  2.02k|    __m128i pred_16x8b_6, pred_8x16b_6, rsd_8x16b_6, out_8x16b_6, out_16x8b_6;
 1351|  2.02k|    __m128i pred_16x8b_7, pred_8x16b_7, rsd_8x16b_7, out_8x16b_7, out_16x8b_7;
 1352|  2.02k|    __m128i rsd_8x16b_01_b0, rsd_8x16b_23_b0, rsd_8x16b_45_b2, rsd_8x16b_67_b2;
 1353|  2.02k|    __m128i rsd_8x16b_01_b1, rsd_8x16b_23_b1, rsd_8x16b_45_b3, rsd_8x16b_67_b3;
 1354|       |
 1355|  2.02k|    WORD32 row_01_b0, row_23_b0, row_45_b2, row_67_b2;
 1356|  2.02k|    WORD32 row_01_b1, row_23_b1, row_45_b3, row_67_b3;
 1357|  2.02k|    WORD32 i4_nnz, i4_nnz_b0, i4_nnz_b1, i4_nnz_b2, i4_nnz_b3;
 1358|       |
 1359|  2.02k|    __m128i zero_8x16b = _mm_setzero_si128();
 1360|       |
 1361|  2.02k|    WORD32 pred_strd2 = (pred_strd << 1);
 1362|  2.02k|    WORD32 pred_strd4 = (pred_strd << 2);
 1363|  2.02k|    WORD32 rsd_strd2 = (rsd_strd << 1);
 1364|  2.02k|    WORD32 rsd_strd4 = (rsd_strd << 2);
 1365|  2.02k|    WORD32 out_strd2 = (out_strd << 1);
 1366|  2.02k|    WORD32 out_strd4 = (out_strd << 2);
 1367|       |
 1368|  2.02k|    __m128i i_macro_8x16b, dupmax_8x16b, dupmin_8x16b;
 1369|  2.02k|    WORD32 q;
 1370|  2.02k|    WORD16 i_macro;
 1371|  2.02k|    WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
  ------------------
  |  Branch (1371:23): [True: 1.27k, False: 754]
  ------------------
 1372|  2.02k|    UNUSED(pi2_tmp);
  ------------------
  |  |   45|  2.02k|#define UNUSED(x) ((void)(x))
  ------------------
 1373|  2.02k|    UNUSED(iq_start_idx);
  ------------------
  |  |   45|  2.02k|#define UNUSED(x) ((void)(x))
  ------------------
 1374|  2.02k|    UNUSED(pi2_dc_ld_addr);
  ------------------
  |  |   45|  2.02k|#define UNUSED(x) ((void)(x))
  ------------------
 1375|       |    /*************************************************************/
 1376|       |    /* Dequantization of coefficients. Will be replaced by SIMD  */
 1377|       |    /* operations on platform. Note : DC coeff is not scaled     */
 1378|       |    /*************************************************************/
 1379|  2.02k|    q = pi2_src[0];
 1380|  2.02k|    INV_QUANT(q, pu2_iscale_mat[0], pu2_weigh_mat[0], qp_div, rnd_fact, 6);
  ------------------
  |  |  103|  2.02k|                {\
  |  |  104|  2.02k|                    i4_value *= quant_scale;\
  |  |  105|  2.02k|                    i4_value *= weight_scale;\
  |  |  106|  2.02k|                    i4_value += rndfactor;\
  |  |  107|  2.02k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|  2.02k|                    i4_value >>= qbits;\
  |  |  109|  2.02k|                }
  ------------------
 1381|  2.02k|    i_macro = (q + 32) >> 6;
 1382|       |
 1383|  2.02k|    i_macro_8x16b = _mm_set1_epi16(i_macro);
 1384|  2.02k|    dupmax_8x16b = _mm_set1_epi16(RSD_MAX);
  ------------------
  |  |  772|  2.02k|#define RSD_MAX 255
  ------------------
 1385|  2.02k|    dupmin_8x16b = _mm_set1_epi16(RSD_MIN);
  ------------------
  |  |  773|  2.02k|#define RSD_MIN -255
  ------------------
 1386|       |
 1387|  2.02k|    pred_16x8b_0 = _mm_loadu_si128((__m128i *) (pu1_pred));
 1388|  2.02k|    pred_16x8b_1 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd));
 1389|  2.02k|    pred_16x8b_2 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd2));
 1390|  2.02k|    pred_16x8b_3 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd2 + pred_strd));
 1391|  2.02k|    pred_16x8b_4 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd4));
 1392|  2.02k|    pred_16x8b_5 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd4 + pred_strd));
 1393|  2.02k|    pred_16x8b_6 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd4 + pred_strd2));
 1394|  2.02k|    pred_16x8b_7 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd4 + pred_strd2 + pred_strd));
 1395|       |
 1396|  2.02k|    pred_8x16b_0 = _mm_cvtepu8_epi16(pred_16x8b_0);
 1397|  2.02k|    pred_8x16b_1 = _mm_cvtepu8_epi16(pred_16x8b_1);
 1398|  2.02k|    pred_8x16b_2 = _mm_cvtepu8_epi16(pred_16x8b_2);
 1399|  2.02k|    pred_8x16b_3 = _mm_cvtepu8_epi16(pred_16x8b_3);
 1400|  2.02k|    pred_8x16b_4 = _mm_cvtepu8_epi16(pred_16x8b_4);
 1401|  2.02k|    pred_8x16b_5 = _mm_cvtepu8_epi16(pred_16x8b_5);
 1402|  2.02k|    pred_8x16b_6 = _mm_cvtepu8_epi16(pred_16x8b_6);
 1403|  2.02k|    pred_8x16b_7 = _mm_cvtepu8_epi16(pred_16x8b_7);
 1404|       |
 1405|  2.02k|    rsd_8x16b_0 = _mm_loadu_si128((__m128i *) (pi2_rsd));
 1406|  2.02k|    rsd_8x16b_1 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd));
 1407|  2.02k|    rsd_8x16b_2 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd2));
 1408|  2.02k|    rsd_8x16b_3 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd2 + rsd_strd));
 1409|  2.02k|    rsd_8x16b_4 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd4));
 1410|  2.02k|    rsd_8x16b_5 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd4 + rsd_strd));
 1411|  2.02k|    rsd_8x16b_6 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd4 + rsd_strd2));
 1412|  2.02k|    rsd_8x16b_7 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd4 + rsd_strd2 + rsd_strd));
 1413|       |
 1414|  2.02k|    rsd_8x16b_0 = _mm_add_epi16(i_macro_8x16b, rsd_8x16b_0);
 1415|  2.02k|    rsd_8x16b_1 = _mm_add_epi16(i_macro_8x16b, rsd_8x16b_1);
 1416|  2.02k|    rsd_8x16b_2 = _mm_add_epi16(i_macro_8x16b, rsd_8x16b_2);
 1417|  2.02k|    rsd_8x16b_3 = _mm_add_epi16(i_macro_8x16b, rsd_8x16b_3);
 1418|  2.02k|    rsd_8x16b_4 = _mm_add_epi16(i_macro_8x16b, rsd_8x16b_4);
 1419|  2.02k|    rsd_8x16b_5 = _mm_add_epi16(i_macro_8x16b, rsd_8x16b_5);
 1420|  2.02k|    rsd_8x16b_6 = _mm_add_epi16(i_macro_8x16b, rsd_8x16b_6);
 1421|  2.02k|    rsd_8x16b_7 = _mm_add_epi16(i_macro_8x16b, rsd_8x16b_7);
 1422|       |
 1423|  2.02k|    rsd_8x16b_0 = _mm_min_epi16(dupmax_8x16b, rsd_8x16b_0);
 1424|  2.02k|    rsd_8x16b_0 = _mm_max_epi16(dupmin_8x16b, rsd_8x16b_0);
 1425|  2.02k|    rsd_8x16b_1 = _mm_min_epi16(dupmax_8x16b, rsd_8x16b_1);
 1426|  2.02k|    rsd_8x16b_1 = _mm_max_epi16(dupmin_8x16b, rsd_8x16b_1);
 1427|  2.02k|    rsd_8x16b_2 = _mm_min_epi16(dupmax_8x16b, rsd_8x16b_2);
 1428|  2.02k|    rsd_8x16b_2 = _mm_max_epi16(dupmin_8x16b, rsd_8x16b_2);
 1429|  2.02k|    rsd_8x16b_3 = _mm_min_epi16(dupmax_8x16b, rsd_8x16b_3);
 1430|  2.02k|    rsd_8x16b_3 = _mm_max_epi16(dupmin_8x16b, rsd_8x16b_3);
 1431|  2.02k|    rsd_8x16b_4 = _mm_min_epi16(dupmax_8x16b, rsd_8x16b_4);
 1432|  2.02k|    rsd_8x16b_4 = _mm_max_epi16(dupmin_8x16b, rsd_8x16b_4);
 1433|  2.02k|    rsd_8x16b_5 = _mm_min_epi16(dupmax_8x16b, rsd_8x16b_5);
 1434|  2.02k|    rsd_8x16b_5 = _mm_max_epi16(dupmin_8x16b, rsd_8x16b_5);
 1435|  2.02k|    rsd_8x16b_6 = _mm_min_epi16(dupmax_8x16b, rsd_8x16b_6);
 1436|  2.02k|    rsd_8x16b_6 = _mm_max_epi16(dupmin_8x16b, rsd_8x16b_6);
 1437|  2.02k|    rsd_8x16b_7 = _mm_min_epi16(dupmax_8x16b, rsd_8x16b_7);
 1438|  2.02k|    rsd_8x16b_7 = _mm_max_epi16(dupmin_8x16b, rsd_8x16b_7);
 1439|       |
 1440|  2.02k|    rsd_8x16b_01_b0 = _mm_unpacklo_epi64(rsd_8x16b_0, rsd_8x16b_1);
 1441|  2.02k|    rsd_8x16b_23_b0 = _mm_unpacklo_epi64(rsd_8x16b_2, rsd_8x16b_3);
 1442|  2.02k|    rsd_8x16b_01_b1 = _mm_unpackhi_epi64(rsd_8x16b_0, rsd_8x16b_1);
 1443|  2.02k|    rsd_8x16b_23_b1 = _mm_unpackhi_epi64(rsd_8x16b_2, rsd_8x16b_3);
 1444|       |
 1445|  2.02k|    rsd_8x16b_45_b2 = _mm_unpacklo_epi64(rsd_8x16b_4, rsd_8x16b_5);
 1446|  2.02k|    rsd_8x16b_67_b2 = _mm_unpacklo_epi64(rsd_8x16b_6, rsd_8x16b_7);
 1447|  2.02k|    rsd_8x16b_45_b3 = _mm_unpackhi_epi64(rsd_8x16b_4, rsd_8x16b_5);
 1448|  2.02k|    rsd_8x16b_67_b3 = _mm_unpackhi_epi64(rsd_8x16b_6, rsd_8x16b_7);
 1449|       |
 1450|  2.02k|    row_01_b0 = _mm_test_all_ones(rsd_8x16b_01_b0);  // return 1 if all zeros, else 0
 1451|  2.02k|    row_23_b0 = _mm_test_all_ones(rsd_8x16b_23_b0);
 1452|  2.02k|    row_01_b1 = _mm_test_all_ones(rsd_8x16b_01_b1);
 1453|  2.02k|    row_23_b1 = _mm_test_all_ones(rsd_8x16b_23_b1);
 1454|       |
 1455|  2.02k|    row_45_b2 = _mm_test_all_ones(rsd_8x16b_45_b2);
 1456|  2.02k|    row_67_b2 = _mm_test_all_ones(rsd_8x16b_67_b2);
 1457|  2.02k|    row_45_b3 = _mm_test_all_ones(rsd_8x16b_45_b3);
 1458|  2.02k|    row_67_b3 = _mm_test_all_ones(rsd_8x16b_67_b3);
 1459|       |
 1460|  2.02k|    out_8x16b_0 = _mm_add_epi16(pred_8x16b_0, rsd_8x16b_0);
 1461|  2.02k|    out_8x16b_1 = _mm_add_epi16(pred_8x16b_1, rsd_8x16b_1);
 1462|  2.02k|    out_8x16b_2 = _mm_add_epi16(pred_8x16b_2, rsd_8x16b_2);
 1463|  2.02k|    out_8x16b_3 = _mm_add_epi16(pred_8x16b_3, rsd_8x16b_3);
 1464|  2.02k|    out_8x16b_4 = _mm_add_epi16(pred_8x16b_4, rsd_8x16b_4);
 1465|  2.02k|    out_8x16b_5 = _mm_add_epi16(pred_8x16b_5, rsd_8x16b_5);
 1466|  2.02k|    out_8x16b_6 = _mm_add_epi16(pred_8x16b_6, rsd_8x16b_6);
 1467|  2.02k|    out_8x16b_7 = _mm_add_epi16(pred_8x16b_7, rsd_8x16b_7);
 1468|       |
 1469|  2.02k|    out_16x8b_0 = _mm_packus_epi16(out_8x16b_0, zero_8x16b);
 1470|  2.02k|    out_16x8b_1 = _mm_packus_epi16(out_8x16b_1, zero_8x16b);
 1471|  2.02k|    out_16x8b_2 = _mm_packus_epi16(out_8x16b_2, zero_8x16b);
 1472|  2.02k|    out_16x8b_3 = _mm_packus_epi16(out_8x16b_3, zero_8x16b);
 1473|  2.02k|    out_16x8b_4 = _mm_packus_epi16(out_8x16b_4, zero_8x16b);
 1474|  2.02k|    out_16x8b_5 = _mm_packus_epi16(out_8x16b_5, zero_8x16b);
 1475|  2.02k|    out_16x8b_6 = _mm_packus_epi16(out_8x16b_6, zero_8x16b);
 1476|  2.02k|    out_16x8b_7 = _mm_packus_epi16(out_8x16b_7, zero_8x16b);
 1477|       |
 1478|  2.02k|    _mm_storel_epi64((__m128i *) (pu1_out), out_16x8b_0);
 1479|  2.02k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd), out_16x8b_1);
 1480|  2.02k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd2), out_16x8b_2);
 1481|  2.02k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd2 + out_strd), out_16x8b_3);
 1482|  2.02k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd4), out_16x8b_4);
 1483|  2.02k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd4 + out_strd), out_16x8b_5);
 1484|  2.02k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd4 + out_strd2), out_16x8b_6);
 1485|  2.02k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd4 + out_strd2 + out_strd), out_16x8b_7);
 1486|       |
 1487|  2.02k|    i4_nnz_b0 = (!(row_01_b0 && row_23_b0));
  ------------------
  |  Branch (1487:20): [True: 424, False: 1.60k]
  |  Branch (1487:33): [True: 424, False: 0]
  ------------------
 1488|  2.02k|    i4_nnz_b1 = (!(row_01_b1 && row_23_b1)) << 1;
  ------------------
  |  Branch (1488:20): [True: 424, False: 1.60k]
  |  Branch (1488:33): [True: 424, False: 0]
  ------------------
 1489|  2.02k|    i4_nnz_b2 = (!(row_45_b2 && row_67_b2)) << 4;
  ------------------
  |  Branch (1489:20): [True: 424, False: 1.60k]
  |  Branch (1489:33): [True: 424, False: 0]
  ------------------
 1490|  2.02k|    i4_nnz_b3 = (!(row_45_b3 && row_67_b3)) << 5;
  ------------------
  |  Branch (1490:20): [True: 424, False: 1.60k]
  |  Branch (1490:33): [True: 424, False: 0]
  ------------------
 1491|       |
 1492|  2.02k|    i4_nnz = (i4_nnz_b0 | i4_nnz_b1 | i4_nnz_b2 | i4_nnz_b3);
 1493|  2.02k|    return i4_nnz;
 1494|  2.02k|}
isvcd_iquant_itrans_residual_recon_chroma_4x4_dc_sse42:
 1523|  42.2k|{
 1524|  42.2k|    __m128i pred_16x8b_0, pred_8x16b_0, rsd_8x16b_0, rsd_16x8b_0, out_16x8b_0;
 1525|  42.2k|    __m128i pred_16x8b_1, pred_8x16b_1, rsd_8x16b_1, rsd_16x8b_1, out_16x8b_1;
 1526|  42.2k|    __m128i pred_16x8b_2, pred_8x16b_2, rsd_8x16b_2, rsd_16x8b_2, out_16x8b_2;
 1527|  42.2k|    __m128i pred_16x8b_3, pred_8x16b_3, rsd_8x16b_3, rsd_16x8b_3, out_16x8b_3;
 1528|       |
 1529|  42.2k|    __m128i i_macro_8x16b, dupmax_8x16b, dupmin_8x16b;
 1530|  42.2k|    __m128i chroma_mask, chroma_mask2;
 1531|  42.2k|    __m128i zero_8x16b = _mm_setzero_si128();
 1532|  42.2k|    WORD32 q0;
 1533|  42.2k|    WORD16 i_macro;
 1534|  42.2k|    UNUSED(pi2_src);
  ------------------
  |  |   45|  42.2k|#define UNUSED(x) ((void)(x))
  ------------------
 1535|  42.2k|    UNUSED(pu2_iscal_mat);
  ------------------
  |  |   45|  42.2k|#define UNUSED(x) ((void)(x))
  ------------------
 1536|  42.2k|    UNUSED(pu2_weigh_mat);
  ------------------
  |  |   45|  42.2k|#define UNUSED(x) ((void)(x))
  ------------------
 1537|  42.2k|    UNUSED(u4_qp_div_6);
  ------------------
  |  |   45|  42.2k|#define UNUSED(x) ((void)(x))
  ------------------
 1538|  42.2k|    UNUSED(pi2_tmp);
  ------------------
  |  |   45|  42.2k|#define UNUSED(x) ((void)(x))
  ------------------
 1539|       |
 1540|  42.2k|    q0 = pi2_dc_src[0];  // Restoring dc value for intra case3
 1541|  42.2k|    i_macro = ((q0 + 32) >> 6);
 1542|       |
 1543|  42.2k|    i_macro_8x16b = _mm_set1_epi16(i_macro);
 1544|  42.2k|    dupmax_8x16b = _mm_set1_epi16(RSD_MAX);
  ------------------
  |  |  772|  42.2k|#define RSD_MAX 255
  ------------------
 1545|  42.2k|    dupmin_8x16b = _mm_set1_epi16(RSD_MIN);
  ------------------
  |  |  773|  42.2k|#define RSD_MIN -255
  ------------------
 1546|       |
 1547|  42.2k|    pred_16x8b_0 = _mm_loadu_si128((__m128i *) (pu1_pred));
 1548|  42.2k|    pred_16x8b_1 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd));
 1549|  42.2k|    pred_16x8b_2 = _mm_loadu_si128((__m128i *) (pu1_pred + (pred_strd << 1)));
 1550|  42.2k|    pred_16x8b_3 = _mm_loadu_si128((__m128i *) (pu1_pred + (pred_strd << 1) + pred_strd));
 1551|       |
 1552|  42.2k|    pred_8x16b_0 = _mm_cvtepu8_epi16(pred_16x8b_0);
 1553|  42.2k|    pred_8x16b_1 = _mm_cvtepu8_epi16(pred_16x8b_1);
 1554|  42.2k|    pred_8x16b_2 = _mm_cvtepu8_epi16(pred_16x8b_2);
 1555|  42.2k|    pred_8x16b_3 = _mm_cvtepu8_epi16(pred_16x8b_3);
 1556|       |
 1557|  42.2k|    rsd_8x16b_0 = _mm_loadu_si128((__m128i *) (pi2_rsd));
 1558|  42.2k|    rsd_8x16b_1 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd));
 1559|  42.2k|    rsd_8x16b_2 = _mm_loadu_si128((__m128i *) (pi2_rsd + (rsd_strd << 1)));
 1560|  42.2k|    rsd_8x16b_3 = _mm_loadu_si128((__m128i *) (pi2_rsd + (rsd_strd << 1) + rsd_strd));
 1561|       |
 1562|  42.2k|    rsd_8x16b_0 = _mm_add_epi16(i_macro_8x16b, rsd_8x16b_0);
 1563|  42.2k|    rsd_8x16b_1 = _mm_add_epi16(i_macro_8x16b, rsd_8x16b_1);
 1564|  42.2k|    rsd_8x16b_2 = _mm_add_epi16(i_macro_8x16b, rsd_8x16b_2);
 1565|  42.2k|    rsd_8x16b_3 = _mm_add_epi16(i_macro_8x16b, rsd_8x16b_3);
 1566|       |
 1567|  42.2k|    rsd_8x16b_0 = _mm_min_epi16(dupmax_8x16b, rsd_8x16b_0);
 1568|  42.2k|    rsd_8x16b_0 = _mm_max_epi16(dupmin_8x16b, rsd_8x16b_0);
 1569|  42.2k|    rsd_8x16b_1 = _mm_min_epi16(dupmax_8x16b, rsd_8x16b_1);
 1570|  42.2k|    rsd_8x16b_1 = _mm_max_epi16(dupmin_8x16b, rsd_8x16b_1);
 1571|  42.2k|    rsd_8x16b_2 = _mm_min_epi16(dupmax_8x16b, rsd_8x16b_2);
 1572|  42.2k|    rsd_8x16b_2 = _mm_max_epi16(dupmin_8x16b, rsd_8x16b_2);
 1573|  42.2k|    rsd_8x16b_3 = _mm_min_epi16(dupmax_8x16b, rsd_8x16b_3);
 1574|  42.2k|    rsd_8x16b_3 = _mm_max_epi16(dupmin_8x16b, rsd_8x16b_3);
 1575|       |
 1576|  42.2k|    rsd_8x16b_0 = _mm_add_epi16(pred_8x16b_0, rsd_8x16b_0);
 1577|  42.2k|    rsd_8x16b_1 = _mm_add_epi16(pred_8x16b_1, rsd_8x16b_1);
 1578|  42.2k|    rsd_8x16b_2 = _mm_add_epi16(pred_8x16b_2, rsd_8x16b_2);
 1579|  42.2k|    rsd_8x16b_3 = _mm_add_epi16(pred_8x16b_3, rsd_8x16b_3);
 1580|       |
 1581|  42.2k|    chroma_mask = _mm_set1_epi16(0xFF00);
 1582|  42.2k|    chroma_mask2 = _mm_set1_epi16(0x00FF);
 1583|  42.2k|    out_16x8b_0 = _mm_loadu_si128((__m128i *) (&pu1_out[0]));
 1584|  42.2k|    out_16x8b_1 = _mm_loadu_si128((__m128i *) (&pu1_out[out_strd]));
 1585|  42.2k|    out_16x8b_2 = _mm_loadu_si128((__m128i *) (&pu1_out[(out_strd << 1)]));
 1586|  42.2k|    out_16x8b_3 = _mm_loadu_si128((__m128i *) (&pu1_out[(out_strd << 1) + out_strd]));
 1587|       |
 1588|  42.2k|    out_16x8b_0 = _mm_and_si128(out_16x8b_0, chroma_mask);
 1589|  42.2k|    out_16x8b_1 = _mm_and_si128(out_16x8b_1, chroma_mask);
 1590|  42.2k|    out_16x8b_2 = _mm_and_si128(out_16x8b_2, chroma_mask);
 1591|  42.2k|    out_16x8b_3 = _mm_and_si128(out_16x8b_3, chroma_mask);
 1592|       |
 1593|  42.2k|    rsd_16x8b_0 = _mm_packus_epi16(rsd_8x16b_0, zero_8x16b);
 1594|  42.2k|    rsd_16x8b_1 = _mm_packus_epi16(rsd_8x16b_1, zero_8x16b);
 1595|  42.2k|    rsd_16x8b_2 = _mm_packus_epi16(rsd_8x16b_2, zero_8x16b);
 1596|  42.2k|    rsd_16x8b_3 = _mm_packus_epi16(rsd_8x16b_3, zero_8x16b);
 1597|       |
 1598|  42.2k|    rsd_8x16b_0 = _mm_and_si128(rsd_16x8b_0, chroma_mask2);
 1599|  42.2k|    rsd_8x16b_1 = _mm_and_si128(rsd_16x8b_1, chroma_mask2);
 1600|  42.2k|    rsd_8x16b_2 = _mm_and_si128(rsd_16x8b_2, chroma_mask2);
 1601|  42.2k|    rsd_8x16b_3 = _mm_and_si128(rsd_16x8b_3, chroma_mask2);
 1602|       |
 1603|  42.2k|    out_16x8b_0 = _mm_add_epi8(rsd_8x16b_0, out_16x8b_0);
 1604|  42.2k|    out_16x8b_1 = _mm_add_epi8(rsd_8x16b_1, out_16x8b_1);
 1605|  42.2k|    out_16x8b_2 = _mm_add_epi8(rsd_8x16b_2, out_16x8b_2);
 1606|  42.2k|    out_16x8b_3 = _mm_add_epi8(rsd_8x16b_3, out_16x8b_3);
 1607|       |
 1608|  42.2k|    _mm_storel_epi64((__m128i *) (pu1_out), out_16x8b_0);
 1609|  42.2k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd), out_16x8b_1);
 1610|  42.2k|    _mm_storel_epi64((__m128i *) (pu1_out + (out_strd << 1)), out_16x8b_2);
 1611|  42.2k|    _mm_storel_epi64((__m128i *) (pu1_out + (out_strd * 3)), out_16x8b_3);
 1612|  42.2k|}
isvcd_iquant_itrans_residual_recon_chroma_4x4_sse42:
 1641|  4.04k|{
 1642|  4.04k|    __m128i src_r0_r1, src_r2_r3;
 1643|  4.04k|    __m128i src_r0, src_r1, src_r2, src_r3;
 1644|  4.04k|    __m128i scalemat_r0_r1, scalemat_r2_r3;
 1645|  4.04k|    __m128i pred_r0, pred_r1, pred_r2, pred_r3;
 1646|  4.04k|    __m128i rsd_r0, rsd_r1, rsd_r2, rsd_r3;
 1647|  4.04k|    __m128i dequant_r0_r1, dequant_r2_r3;
 1648|  4.04k|    __m128i zero_8x16b = _mm_setzero_si128();  // all bits reset to zero
 1649|  4.04k|    __m128i temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
 1650|  4.04k|    __m128i resq_r0, resq_r1, resq_r2, resq_r3;
 1651|  4.04k|    __m128i add_rshift = _mm_set1_epi32((u4_qp_div_6 < 4) ? (1 << (3 - u4_qp_div_6)) : 0);
  ------------------
  |  Branch (1651:41): [True: 2.42k, False: 1.61k]
  ------------------
 1652|  4.04k|    __m128i value_32 = _mm_set1_epi32(32);
 1653|  4.04k|    __m128i chroma_mask = _mm_set1_epi16(0xFF00);
 1654|  4.04k|    __m128i chroma_mask2 = _mm_set1_epi16(0x00FF);
 1655|  4.04k|    __m128i dupmax_8x16b = _mm_set1_epi16(RSD_MAX);
  ------------------
  |  |  772|  4.04k|#define RSD_MAX 255
  ------------------
 1656|  4.04k|    __m128i dupmin_8x16b = _mm_set1_epi16(RSD_MIN);
  ------------------
  |  |  773|  4.04k|#define RSD_MIN -255
  ------------------
 1657|       |
 1658|  4.04k|    __m128i out_16x8b_0, out_16x8b_1, out_16x8b_2, out_16x8b_3;
 1659|       |
 1660|  4.04k|    UNUSED(pi2_tmp);
  ------------------
  |  |   45|  4.04k|#define UNUSED(x) ((void)(x))
  ------------------
 1661|       |
 1662|       |    /*************************************************************/
 1663|       |    /* Dequantization of coefficients. Will be replaced by SIMD  */
 1664|       |    /* operations on platform                                    */
 1665|       |    /*************************************************************/
 1666|       |    // a00 a01 a02 a03 a10 a11 a12 a13  -- the source matrix 0th,1st row
 1667|  4.04k|    src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src));
 1668|       |    // a20 a21 a22 a23 a30 a31 a32 a33 --  the source matrix 2nd,3rd row
 1669|  4.04k|    src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8));
 1670|       |    // b00 b01 b02 b03 b10 b11 b12 b13 -- the  scaling matrix 0th,1st row
 1671|  4.04k|    scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat));
 1672|       |    // b20 b21 b22 b23 b30 b31 b32 b33 -- the  scaling matrix 2nd,3rd row
 1673|  4.04k|    scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 8));
 1674|       |    // q00 q01 q02 q03 q10 q11  q12 q13 -- all 16 bits
 1675|  4.04k|    dequant_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat));
 1676|       |    // q20 q21 q22 q23 q30 q31 q32 q33 -- all 16 bits
 1677|  4.04k|    dequant_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat + 8));
 1678|       |
 1679|       |    // b00*q00 b01*q01 b02*q02 b03*q03 b10*q10  b11*q11 b12*q12 b13*q13 -- 16 bit result
 1680|  4.04k|    temp0 = _mm_mullo_epi16(scalemat_r0_r1, dequant_r0_r1);
 1681|       |    // b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 b12*q12 b13*q13 -- 16 bit result
 1682|  4.04k|    temp1 = _mm_mullo_epi16(scalemat_r2_r3, dequant_r2_r3);
 1683|       |    // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long
 1684|  4.04k|    temp4 = _mm_unpacklo_epi16(temp0, zero_8x16b);
 1685|       |    // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long
 1686|  4.04k|    temp5 = _mm_unpackhi_epi16(temp0, zero_8x16b);
 1687|       |    // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long
 1688|  4.04k|    temp6 = _mm_unpacklo_epi16(temp1, zero_8x16b);
 1689|       |    // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long
 1690|  4.04k|    temp7 = _mm_unpackhi_epi16(temp1, zero_8x16b);
 1691|       |
 1692|  4.04k|    src_r0 = _mm_unpacklo_epi16(src_r0_r1, zero_8x16b);  // a00 0 a01 0 a02 0 a03 0 -- 16 bit long
 1693|  4.04k|    src_r1 = _mm_unpackhi_epi16(src_r0_r1, zero_8x16b);  // a10 0 a11 0 a12 0 a13 0 -- 16 bit long
 1694|  4.04k|    src_r2 = _mm_unpacklo_epi16(src_r2_r3, zero_8x16b);  // a20 0 a21 0 a22 0 a23 0 -- 16 bit long
 1695|  4.04k|    src_r3 = _mm_unpackhi_epi16(src_r2_r3, zero_8x16b);  // a30 0 a31 0 a32 0 a33 0 -- 16 bit long
 1696|       |
 1697|       |    // a00*b00*q00 a10*b10*q10 a20*b20*q20 a30*b30 q30 -- 32 bits long
 1698|  4.04k|    temp4 = _mm_madd_epi16(src_r0, temp4);
 1699|  4.04k|    temp5 = _mm_madd_epi16(src_r1, temp5);
 1700|  4.04k|    temp6 = _mm_madd_epi16(src_r2, temp6);
 1701|  4.04k|    temp7 = _mm_madd_epi16(src_r3, temp7);
 1702|       |
 1703|  4.04k|    if(u4_qp_div_6 >= 4)
  ------------------
  |  Branch (1703:8): [True: 1.61k, False: 2.42k]
  ------------------
 1704|  1.61k|    {
 1705|  1.61k|        resq_r0 = _mm_slli_epi32(temp4, u4_qp_div_6 - 4);
 1706|  1.61k|        resq_r1 = _mm_slli_epi32(temp5, u4_qp_div_6 - 4);
 1707|  1.61k|        resq_r2 = _mm_slli_epi32(temp6, u4_qp_div_6 - 4);
 1708|  1.61k|        resq_r3 = _mm_slli_epi32(temp7, u4_qp_div_6 - 4);
 1709|  1.61k|    }
 1710|  2.42k|    else
 1711|  2.42k|    {
 1712|  2.42k|        temp4 = _mm_add_epi32(temp4, add_rshift);
 1713|  2.42k|        temp5 = _mm_add_epi32(temp5, add_rshift);
 1714|  2.42k|        temp6 = _mm_add_epi32(temp6, add_rshift);
 1715|  2.42k|        temp7 = _mm_add_epi32(temp7, add_rshift);
 1716|  2.42k|        resq_r0 = _mm_srai_epi32(temp4, 4 - u4_qp_div_6);
 1717|  2.42k|        resq_r1 = _mm_srai_epi32(temp5, 4 - u4_qp_div_6);
 1718|  2.42k|        resq_r2 = _mm_srai_epi32(temp6, 4 - u4_qp_div_6);
 1719|  2.42k|        resq_r3 = _mm_srai_epi32(temp7, 4 - u4_qp_div_6);
 1720|  2.42k|    }
 1721|       |
 1722|  4.04k|    resq_r0 = _mm_insert_epi32(resq_r0, (WORD32) pi2_dc_src[0], 0);
 1723|       |    /* Perform Inverse transform */
 1724|       |    /*-------------------------------------------------------------*/
 1725|       |    /* IDCT [ Horizontal transformation ]                          */
 1726|       |    /*-------------------------------------------------------------*/
 1727|       |    // Matrix transpose
 1728|       |    /*
 1729|       |     *  a0 a1 a2 a3
 1730|       |     *  b0 b1 b2 b3
 1731|       |     *  c0 c1 c2 c3
 1732|       |     *  d0 d1 d2 d3
 1733|       |     */
 1734|  4.04k|    temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1);  // a0 b0 a1 b1
 1735|  4.04k|    temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3);  // c0 d0 c1 d1
 1736|  4.04k|    temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1);  // a2 b2 a3 b3
 1737|  4.04k|    temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3);  // c2 d2 c3 d3
 1738|  4.04k|    resq_r0 = _mm_unpacklo_epi64(temp1, temp3);    // a0 b0 c0 d0
 1739|  4.04k|    resq_r1 = _mm_unpackhi_epi64(temp1, temp3);    // a1 b1 c1 d1
 1740|  4.04k|    resq_r2 = _mm_unpacklo_epi64(temp2, temp4);    // a2 b2 c2 d2
 1741|  4.04k|    resq_r3 = _mm_unpackhi_epi64(temp2, temp4);    // a3 b3 c3 d3
 1742|       |    // Transform starts -- horizontal transform
 1743|       |    /*------------------------------------------------------------------*/
 1744|       |    /* z0 = w0 + w2                                             */
 1745|  4.04k|    temp0 = _mm_add_epi32(resq_r0, resq_r2);
 1746|       |    /* z1 = w0 - w2                                             */
 1747|  4.04k|    temp1 = _mm_sub_epi32(resq_r0, resq_r2);
 1748|       |    /* z2 = (w1 >> 1) - w3                                      */
 1749|  4.04k|    temp2 = _mm_srai_epi32(resq_r1, 1);     //(w1>>1)
 1750|  4.04k|    temp2 = _mm_sub_epi32(temp2, resq_r3);  //(w1>>1) - w3
 1751|       |    /* z3 = w1 + (w3 >> 1)                                      */
 1752|  4.04k|    temp3 = _mm_srai_epi32(resq_r3, 1);  //(w3>>1) + w1
 1753|  4.04k|    temp3 = _mm_add_epi32(temp3, resq_r1);
 1754|       |    /*----------------------------------------------------------*/
 1755|       |    /* x0 = z0 + z3                                             */
 1756|  4.04k|    resq_r0 = _mm_add_epi32(temp0, temp3);
 1757|       |    /* x1 = z1 + z2                                             */
 1758|  4.04k|    resq_r1 = _mm_add_epi32(temp1, temp2);
 1759|       |    /* x2 = z1 - z2                                             */
 1760|  4.04k|    resq_r2 = _mm_sub_epi32(temp1, temp2);
 1761|       |    /* x3 = z0 - z3                                             */
 1762|  4.04k|    resq_r3 = _mm_sub_epi32(temp0, temp3);
 1763|       |    // Matrix transpose
 1764|       |    /*
 1765|       |     *  a0 b0 c0 d0
 1766|       |     *  a1 b1 c1 d1
 1767|       |     *  a2 b2 c2 d2
 1768|       |     *  a3 b3 c3 d3
 1769|       |     */
 1770|  4.04k|    temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1);  // a0 a1 b0 b1
 1771|  4.04k|    temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3);  // a2 a3 b2 b3
 1772|  4.04k|    temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1);  // c0 c1 d0 d1
 1773|  4.04k|    temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3);  // c2 c3 d2 d3
 1774|  4.04k|    resq_r0 = _mm_unpacklo_epi64(temp1, temp3);    // a0 a1 a2 a3
 1775|  4.04k|    resq_r1 = _mm_unpackhi_epi64(temp1, temp3);    // b0 b1 b2 b3
 1776|  4.04k|    resq_r2 = _mm_unpacklo_epi64(temp2, temp4);    // c0 c1 c2 c3
 1777|  4.04k|    resq_r3 = _mm_unpackhi_epi64(temp2, temp4);    // d0 d1 d2 d3
 1778|       |    // Transform ends -- horizontal transform
 1779|       |
 1780|       |    // Load pred buffer
 1781|       |    // p00 p01 p02 p03 0 0 0 0 0 0 0 0 -- all 8 bits
 1782|  4.04k|    pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0]));
 1783|       |    // p10 p11 p12 p13 0 0 0 0 0 0 0 0 -- all 8 bits
 1784|  4.04k|    pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd]));
 1785|       |    // p20 p21 p22 p23 0 0 0 0 0 0 0 0 -- all 8 bits
 1786|  4.04k|    pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * pred_strd]));
 1787|       |    // p30 p31 p32 p33 0 0 0 0 0 0 0 0 -- all 8 bits
 1788|  4.04k|    pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * pred_strd]));
 1789|       |
 1790|  4.04k|    pred_r0 = _mm_cvtepu8_epi16(pred_r0);  // p00 p01 p02 p03 -- all 16 bits
 1791|  4.04k|    pred_r1 = _mm_cvtepu8_epi16(pred_r1);  // p10 p11 p12 p13 -- all 16 bits
 1792|  4.04k|    pred_r2 = _mm_cvtepu8_epi16(pred_r2);  // p20 p21 p22 p23 -- all 16 bits
 1793|  4.04k|    pred_r3 = _mm_cvtepu8_epi16(pred_r3);  // p30 p31 p32 p33 -- all 16 bits
 1794|       |
 1795|       |    // Load resd buffer
 1796|  4.04k|    rsd_r0 = _mm_loadu_si128((__m128i *) (&pi2_rsd[0]));
 1797|  4.04k|    rsd_r1 = _mm_loadu_si128((__m128i *) (&pi2_rsd[rsd_strd]));
 1798|  4.04k|    rsd_r2 = _mm_loadu_si128((__m128i *) (&pi2_rsd[2 * rsd_strd]));
 1799|  4.04k|    rsd_r3 = _mm_loadu_si128((__m128i *) (&pi2_rsd[3 * rsd_strd]));
 1800|       |
 1801|       |    /*--------------------------------------------------------------*/
 1802|       |    /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6      */
 1803|       |    /*                                                              */
 1804|       |    /* Add the prediction and store it back to same buffer          */
 1805|       |    /*--------------------------------------------------------------*/
 1806|       |    /* z0j = y0j + y2j                                                        */
 1807|  4.04k|    temp0 = _mm_add_epi32(resq_r0, resq_r2);
 1808|       |    /* z1j = y0j - y2j                                                        */
 1809|  4.04k|    temp1 = _mm_sub_epi32(resq_r0, resq_r2);
 1810|       |    /* z2j = (y1j>>1) - y3j */
 1811|  4.04k|    temp2 = _mm_srai_epi32(resq_r1, 1);  //(y1j>>1)
 1812|  4.04k|    temp2 = _mm_sub_epi32(temp2, resq_r3);
 1813|       |    /* z3j = y1j + (y3j>>1) */
 1814|  4.04k|    temp3 = _mm_srai_epi32(resq_r3, 1);  //(y3j>>1)
 1815|  4.04k|    temp3 = _mm_add_epi32(temp3, resq_r1);
 1816|       |
 1817|       |    /* x0j = z0j + z3j                                                        */
 1818|  4.04k|    temp4 = _mm_add_epi32(temp0, temp3);
 1819|  4.04k|    temp4 = _mm_add_epi32(temp4, value_32);
 1820|  4.04k|    temp4 = _mm_srai_epi32(temp4, 6);
 1821|  4.04k|    temp4 = _mm_add_epi16(temp4, rsd_r0);
 1822|  4.04k|    temp4 = _mm_min_epi16(dupmax_8x16b, temp4);
 1823|  4.04k|    temp4 = _mm_max_epi16(dupmin_8x16b, temp4);
 1824|  4.04k|    temp4 = _mm_add_epi16(temp4, pred_r0);
 1825|       |    /* x1j = z1j + z2j                                                        */
 1826|  4.04k|    temp5 = _mm_add_epi32(temp1, temp2);
 1827|  4.04k|    temp5 = _mm_add_epi32(temp5, value_32);
 1828|  4.04k|    temp5 = _mm_srai_epi32(temp5, 6);
 1829|  4.04k|    temp5 = _mm_add_epi16(temp5, rsd_r1);
 1830|  4.04k|    temp5 = _mm_min_epi16(dupmax_8x16b, temp5);
 1831|  4.04k|    temp5 = _mm_max_epi16(dupmin_8x16b, temp5);
 1832|  4.04k|    temp5 = _mm_add_epi16(temp5, pred_r1);
 1833|       |    /* x2j = z1j - z2j                                                        */
 1834|  4.04k|    temp6 = _mm_sub_epi32(temp1, temp2);
 1835|  4.04k|    temp6 = _mm_add_epi32(temp6, value_32);
 1836|  4.04k|    temp6 = _mm_srai_epi32(temp6, 6);
 1837|  4.04k|    temp6 = _mm_add_epi16(temp6, rsd_r2);
 1838|  4.04k|    temp6 = _mm_min_epi16(dupmax_8x16b, temp6);
 1839|  4.04k|    temp6 = _mm_max_epi16(dupmin_8x16b, temp6);
 1840|  4.04k|    temp6 = _mm_add_epi16(temp6, pred_r2);
 1841|       |    /* x3j = z0j - z3j                                                        */
 1842|  4.04k|    temp7 = _mm_sub_epi32(temp0, temp3);
 1843|  4.04k|    temp7 = _mm_add_epi32(temp7, value_32);
 1844|  4.04k|    temp7 = _mm_srai_epi32(temp7, 6);
 1845|  4.04k|    temp7 = _mm_add_epi16(temp7, rsd_r3);
 1846|  4.04k|    temp7 = _mm_min_epi16(dupmax_8x16b, temp7);
 1847|  4.04k|    temp7 = _mm_max_epi16(dupmin_8x16b, temp7);
 1848|  4.04k|    temp7 = _mm_add_epi16(temp7, pred_r3);
 1849|       |
 1850|  4.04k|    out_16x8b_0 = _mm_loadu_si128((__m128i *) (&pu1_out[0]));
 1851|  4.04k|    out_16x8b_1 = _mm_loadu_si128((__m128i *) (&pu1_out[out_strd]));
 1852|  4.04k|    out_16x8b_2 = _mm_loadu_si128((__m128i *) (&pu1_out[(out_strd << 1)]));
 1853|  4.04k|    out_16x8b_3 = _mm_loadu_si128((__m128i *) (&pu1_out[(out_strd << 1) + out_strd]));
 1854|       |
 1855|  4.04k|    out_16x8b_0 = _mm_and_si128(out_16x8b_0, chroma_mask);
 1856|  4.04k|    out_16x8b_1 = _mm_and_si128(out_16x8b_1, chroma_mask);
 1857|  4.04k|    out_16x8b_2 = _mm_and_si128(out_16x8b_2, chroma_mask);
 1858|  4.04k|    out_16x8b_3 = _mm_and_si128(out_16x8b_3, chroma_mask);
 1859|       |
 1860|  4.04k|    temp4 = _mm_packus_epi16(temp4, zero_8x16b);
 1861|  4.04k|    temp5 = _mm_packus_epi16(temp5, zero_8x16b);
 1862|  4.04k|    temp6 = _mm_packus_epi16(temp6, zero_8x16b);
 1863|  4.04k|    temp7 = _mm_packus_epi16(temp7, zero_8x16b);
 1864|       |
 1865|  4.04k|    temp4 = _mm_and_si128(temp4, chroma_mask2);
 1866|  4.04k|    temp5 = _mm_and_si128(temp5, chroma_mask2);
 1867|  4.04k|    temp6 = _mm_and_si128(temp6, chroma_mask2);
 1868|  4.04k|    temp7 = _mm_and_si128(temp7, chroma_mask2);
 1869|       |
 1870|  4.04k|    out_16x8b_0 = _mm_add_epi8(temp4, out_16x8b_0);
 1871|  4.04k|    out_16x8b_1 = _mm_add_epi8(temp5, out_16x8b_1);
 1872|  4.04k|    out_16x8b_2 = _mm_add_epi8(temp6, out_16x8b_2);
 1873|  4.04k|    out_16x8b_3 = _mm_add_epi8(temp7, out_16x8b_3);
 1874|       |
 1875|  4.04k|    _mm_storel_epi64((__m128i *) (pu1_out), out_16x8b_0);
 1876|  4.04k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd), out_16x8b_1);
 1877|  4.04k|    _mm_storel_epi64((__m128i *) (pu1_out + (out_strd << 1)), out_16x8b_2);
 1878|  4.04k|    _mm_storel_epi64((__m128i *) (pu1_out + (out_strd * 3)), out_16x8b_3);
 1879|  4.04k|}

isvcd_iquant_itrans_4x4_dc_sse42:
   84|  2.13k|{
   85|  2.13k|    WORD32 q0;
   86|  2.13k|    WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
  ------------------
  |  Branch (86:23): [True: 559, False: 1.57k]
  ------------------
   87|  2.13k|    __m128i dupmax_8x16b = _mm_set1_epi16(RSD_MAX);
  ------------------
  |  |  772|  2.13k|#define RSD_MAX 255
  ------------------
   88|  2.13k|    __m128i dupmin_8x16b = _mm_set1_epi16(RSD_MIN);
  ------------------
  |  |  773|  2.13k|#define RSD_MIN -255
  ------------------
   89|  2.13k|    __m128i i_macro;
   90|  2.13k|    UNUSED(pi2_tmp);
  ------------------
  |  |   45|  2.13k|#define UNUSED(x) ((void)(x))
  ------------------
   91|       |
   92|  2.13k|    if(iq_start_idx == 0)
  ------------------
  |  Branch (92:8): [True: 2.13k, False: 0]
  ------------------
   93|  2.13k|    {
   94|  2.13k|        q0 = pi2_src[0];
   95|  2.13k|        INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
  ------------------
  |  |  103|  2.13k|                {\
  |  |  104|  2.13k|                    i4_value *= quant_scale;\
  |  |  105|  2.13k|                    i4_value *= weight_scale;\
  |  |  106|  2.13k|                    i4_value += rndfactor;\
  |  |  107|  2.13k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|  2.13k|                    i4_value >>= qbits;\
  |  |  109|  2.13k|                }
  ------------------
   96|  2.13k|    }
   97|      0|    else
   98|      0|    {
   99|      0|        q0 = pi2_dc_ld_addr[0];  // Restoring dc value for intra case3
  100|      0|    }
  101|       |
  102|  2.13k|    i_macro = _mm_set1_epi16((q0 + 32) >> 6);
  103|  2.13k|    i_macro = _mm_min_epi16(dupmax_8x16b, i_macro);
  104|  2.13k|    i_macro = _mm_max_epi16(dupmin_8x16b, i_macro);
  105|       |
  106|  2.13k|    _mm_storel_epi64((__m128i *) pi2_out, i_macro);
  107|  2.13k|    pi2_out += out_strd;
  108|  2.13k|    _mm_storel_epi64((__m128i *) pi2_out, i_macro);
  109|  2.13k|    pi2_out += out_strd;
  110|  2.13k|    _mm_storel_epi64((__m128i *) pi2_out, i_macro);
  111|  2.13k|    pi2_out += out_strd;
  112|  2.13k|    _mm_storel_epi64((__m128i *) pi2_out, i_macro);
  113|  2.13k|}
isvcd_iquant_itrans_8x8_dc_sse42:
  142|  1.00k|{
  143|  1.00k|    WORD32 q;
  144|  1.00k|    WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
  ------------------
  |  Branch (144:23): [True: 546, False: 460]
  ------------------
  145|  1.00k|    __m128i dupmin_8x16b, dupmax_8x16b, i_macro;
  146|       |
  147|  1.00k|    UNUSED(pi2_tmp);
  ------------------
  |  |   45|  1.00k|#define UNUSED(x) ((void)(x))
  ------------------
  148|  1.00k|    UNUSED(iq_start_idx);
  ------------------
  |  |   45|  1.00k|#define UNUSED(x) ((void)(x))
  ------------------
  149|  1.00k|    UNUSED(pi2_dc_ld_addr);
  ------------------
  |  |   45|  1.00k|#define UNUSED(x) ((void)(x))
  ------------------
  150|       |
  151|  1.00k|    q = pi2_src[0];
  152|  1.00k|    INV_QUANT(q, pu2_iscale_mat[0], pu2_weigh_mat[0], qp_div, rnd_fact, 6);
  ------------------
  |  |  103|  1.00k|                {\
  |  |  104|  1.00k|                    i4_value *= quant_scale;\
  |  |  105|  1.00k|                    i4_value *= weight_scale;\
  |  |  106|  1.00k|                    i4_value += rndfactor;\
  |  |  107|  1.00k|                    i4_value <<= u4_qp_div_6;\
  |  |  108|  1.00k|                    i4_value >>= qbits;\
  |  |  109|  1.00k|                }
  ------------------
  153|       |
  154|  1.00k|    i_macro = _mm_set1_epi16((q + 32) >> 6);
  155|  1.00k|    dupmax_8x16b = _mm_set1_epi16(RSD_MAX);
  ------------------
  |  |  772|  1.00k|#define RSD_MAX 255
  ------------------
  156|  1.00k|    dupmin_8x16b = _mm_set1_epi16(RSD_MIN);
  ------------------
  |  |  773|  1.00k|#define RSD_MIN -255
  ------------------
  157|       |
  158|  1.00k|    i_macro = _mm_min_epi16(dupmax_8x16b, i_macro);
  159|  1.00k|    i_macro = _mm_max_epi16(dupmin_8x16b, i_macro);
  160|       |
  161|  1.00k|    _mm_storeu_si128((__m128i *) pi2_out, i_macro);
  162|  1.00k|    pi2_out += out_strd;
  163|  1.00k|    _mm_storeu_si128((__m128i *) pi2_out, i_macro);
  164|  1.00k|    pi2_out += out_strd;
  165|  1.00k|    _mm_storeu_si128((__m128i *) pi2_out, i_macro);
  166|  1.00k|    pi2_out += out_strd;
  167|  1.00k|    _mm_storeu_si128((__m128i *) pi2_out, i_macro);
  168|  1.00k|    pi2_out += out_strd;
  169|  1.00k|    _mm_storeu_si128((__m128i *) pi2_out, i_macro);
  170|  1.00k|    pi2_out += out_strd;
  171|  1.00k|    _mm_storeu_si128((__m128i *) pi2_out, i_macro);
  172|  1.00k|    pi2_out += out_strd;
  173|  1.00k|    _mm_storeu_si128((__m128i *) pi2_out, i_macro);
  174|  1.00k|    pi2_out += out_strd;
  175|  1.00k|    _mm_storeu_si128((__m128i *) pi2_out, i_macro);
  176|  1.00k|}
isvcd_iquant_itrans_chroma_4x4_dc_sse42:
  204|  4.22k|{
  205|  4.22k|    __m128i src_r0, src_r1, src_r2, src_r3;
  206|  4.22k|    __m128i i_macro = _mm_set1_epi16((pi2_dc_src[0] + 32) >> 6);
  207|  4.22k|    __m128i chroma_mask_even, chroma_mask_odd;
  208|  4.22k|    __m128i dupmax_8x16b = _mm_set1_epi16(RSD_MAX);
  ------------------
  |  |  772|  4.22k|#define RSD_MAX 255
  ------------------
  209|  4.22k|    __m128i dupmin_8x16b = _mm_set1_epi16(RSD_MIN);
  ------------------
  |  |  773|  4.22k|#define RSD_MIN -255
  ------------------
  210|       |
  211|  4.22k|    UNUSED(pi2_src);
  ------------------
  |  |   45|  4.22k|#define UNUSED(x) ((void)(x))
  ------------------
  212|  4.22k|    UNUSED(pu2_iscal_mat);
  ------------------
  |  |   45|  4.22k|#define UNUSED(x) ((void)(x))
  ------------------
  213|  4.22k|    UNUSED(pu2_weigh_mat);
  ------------------
  |  |   45|  4.22k|#define UNUSED(x) ((void)(x))
  ------------------
  214|  4.22k|    UNUSED(pi2_tmp);
  ------------------
  |  |   45|  4.22k|#define UNUSED(x) ((void)(x))
  ------------------
  215|  4.22k|    UNUSED(u4_qp_div_6);
  ------------------
  |  |   45|  4.22k|#define UNUSED(x) ((void)(x))
  ------------------
  216|       |
  217|  4.22k|    i_macro = _mm_min_epi16(dupmax_8x16b, i_macro);
  218|  4.22k|    i_macro = _mm_max_epi16(dupmin_8x16b, i_macro);
  219|       |
  220|       |    // a00 a01 a02 a03 a10 a11 a12 a13 -- the source matrix 0th,1st row
  221|  4.22k|    src_r0 = _mm_loadu_si128((__m128i *) (pi2_out));
  222|       |    // a20 a21 a22 a23 a30 a31 a32 a33  -- the source matrix 2nd,3rd row
  223|  4.22k|    src_r1 = _mm_loadu_si128((__m128i *) (pi2_out + (1 * out_strd)));
  224|  4.22k|    src_r2 = _mm_loadu_si128((__m128i *) (pi2_out + (2 * out_strd)));
  225|  4.22k|    src_r3 = _mm_loadu_si128((__m128i *) (pi2_out + (3 * out_strd)));
  226|       |
  227|  4.22k|    chroma_mask_even =
  228|  4.22k|        _mm_set_epi16(0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff);
  229|  4.22k|    chroma_mask_odd = _mm_set_epi16(0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000);
  230|       |
  231|  4.22k|    src_r0 = _mm_and_si128(src_r0, chroma_mask_odd);  // 0 src1 0 src2 0 ...
  232|  4.22k|    src_r1 = _mm_and_si128(src_r1, chroma_mask_odd);
  233|  4.22k|    src_r2 = _mm_and_si128(src_r2, chroma_mask_odd);
  234|  4.22k|    src_r3 = _mm_and_si128(src_r3, chroma_mask_odd);
  235|       |
  236|  4.22k|    i_macro = _mm_and_si128(i_macro, chroma_mask_even);  // macro 0 macro 0 ..
  237|       |
  238|  4.22k|    src_r0 = _mm_add_epi16(src_r0, i_macro);             // macro  src1 macro src2 macro ...
  239|  4.22k|    src_r1 = _mm_add_epi16(src_r1, i_macro);
  240|  4.22k|    src_r2 = _mm_add_epi16(src_r2, i_macro);
  241|  4.22k|    src_r3 = _mm_add_epi16(src_r3, i_macro);
  242|       |
  243|  4.22k|    _mm_storeu_si128((__m128i *) (&pi2_out[0]), src_r0);
  244|  4.22k|    _mm_storeu_si128((__m128i *) (&pi2_out[out_strd]), src_r1);
  245|  4.22k|    _mm_storeu_si128((__m128i *) (&pi2_out[2 * out_strd]), src_r2);
  246|  4.22k|    _mm_storeu_si128((__m128i *) (&pi2_out[3 * out_strd]), src_r3);
  247|  4.22k|}
isvcd_iquant_itrans_4x4_sse42:
  275|  10.3k|{
  276|  10.3k|    __m128i src_r0_r1, src_r2_r3;
  277|  10.3k|    __m128i src_r0, src_r1, src_r2, src_r3;
  278|  10.3k|    __m128i scalemat_r0_r1, scalemat_r2_r3;
  279|  10.3k|    __m128i dequant_r0_r1, dequant_r2_r3;
  280|  10.3k|    __m128i zero_8x16b = _mm_setzero_si128();  // all bits reset to zero
  281|  10.3k|    __m128i temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
  282|  10.3k|    __m128i resq_r0, resq_r1, resq_r2, resq_r3;
  283|  10.3k|    __m128i add_rshift = _mm_set1_epi32((u4_qp_div_6 < 4) ? (1 << (3 - u4_qp_div_6)) : 0);
  ------------------
  |  Branch (283:41): [True: 6.69k, False: 3.69k]
  ------------------
  284|  10.3k|    __m128i value_32 = _mm_set1_epi32(32);
  285|  10.3k|    __m128i dupmax_4x32b = _mm_set1_epi32(RSD_MAX);
  ------------------
  |  |  772|  10.3k|#define RSD_MAX 255
  ------------------
  286|  10.3k|    __m128i dupmin_4x32b = _mm_set1_epi32(RSD_MIN);
  ------------------
  |  |  773|  10.3k|#define RSD_MIN -255
  ------------------
  287|  10.3k|    UNUSED(pi2_tmp);
  ------------------
  |  |   45|  10.3k|#define UNUSED(x) ((void)(x))
  ------------------
  288|       |
  289|       |    /*************************************************************/
  290|       |    /* Dequantization of coefficients. Will be replaced by SIMD  */
  291|       |    /* operations on platform                                    */
  292|       |    /*************************************************************/
  293|       |    // a00 a01 a02 a03 a10 a11 a12 a13 -- the source matrix 0th,1st row
  294|  10.3k|    src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src));
  295|       |    // a20 a21 a22 a23 a30 a31 a32 a33 -- the source matrix 2nd,3rd row
  296|  10.3k|    src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8));
  297|       |    // b00 b01 b02 b03 b10 b11 b12 b13 -- the scaling matrix 0th,1st row
  298|  10.3k|    scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat));
  299|       |    // b20 b21 b22 b23 b30 b31 b32 b33 -- the scaling matrix 2nd,3rd row
  300|  10.3k|    scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 8));
  301|       |    // q00 q01 q02 q03 q10 q11 q12 q13 -- all 16 bits
  302|  10.3k|    dequant_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat));
  303|       |    // q20 q21 q22 q23 q30 q31 q32 q33 -- all 16 bits
  304|  10.3k|    dequant_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat + 8));
  305|       |
  306|       |    // b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 b12*q12 b13*q13 -- 16 bit result
  307|  10.3k|    temp0 = _mm_mullo_epi16(scalemat_r0_r1, dequant_r0_r1);
  308|       |    // b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 b12*q12 b13*q13 -- 16 bit result
  309|  10.3k|    temp1 = _mm_mullo_epi16(scalemat_r2_r3, dequant_r2_r3);
  310|       |
  311|       |    // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long
  312|  10.3k|    temp4 = _mm_unpacklo_epi16(temp0, zero_8x16b);
  313|       |    // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long
  314|  10.3k|    temp5 = _mm_unpackhi_epi16(temp0, zero_8x16b);
  315|       |    // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long
  316|  10.3k|    temp6 = _mm_unpacklo_epi16(temp1, zero_8x16b);
  317|       |    // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long
  318|  10.3k|    temp7 = _mm_unpackhi_epi16(temp1, zero_8x16b);
  319|       |
  320|  10.3k|    src_r0 = _mm_unpacklo_epi16(src_r0_r1, zero_8x16b);  // a00 0 a01 0 a02 0 a03 0 -- 16 bit long
  321|  10.3k|    src_r1 = _mm_unpackhi_epi16(src_r0_r1, zero_8x16b);  // a10 0 a11 0 a12 0 a13 0 -- 16 bit long
  322|  10.3k|    src_r2 = _mm_unpacklo_epi16(src_r2_r3, zero_8x16b);  // a20 0 a21 0 a22 0 a23 0 -- 16 bit long
  323|  10.3k|    src_r3 = _mm_unpackhi_epi16(src_r2_r3, zero_8x16b);  // a30 0 a31 0 a32 0 a33 0 -- 16 bit long
  324|       |
  325|       |    // a00*b00*q00 a10*b10*q10 a20*b20*q20 a30*b30 q30 -- 32 bits long
  326|  10.3k|    temp4 = _mm_madd_epi16(src_r0, temp4);
  327|  10.3k|    temp5 = _mm_madd_epi16(src_r1, temp5);
  328|  10.3k|    temp6 = _mm_madd_epi16(src_r2, temp6);
  329|  10.3k|    temp7 = _mm_madd_epi16(src_r3, temp7);
  330|       |
  331|  10.3k|    if(u4_qp_div_6 >= 4)
  ------------------
  |  Branch (331:8): [True: 3.69k, False: 6.69k]
  ------------------
  332|  3.69k|    {
  333|  3.69k|        resq_r0 = _mm_slli_epi32(temp4, u4_qp_div_6 - 4);
  334|  3.69k|        resq_r1 = _mm_slli_epi32(temp5, u4_qp_div_6 - 4);
  335|  3.69k|        resq_r2 = _mm_slli_epi32(temp6, u4_qp_div_6 - 4);
  336|  3.69k|        resq_r3 = _mm_slli_epi32(temp7, u4_qp_div_6 - 4);
  337|  3.69k|    }
  338|  6.69k|    else
  339|  6.69k|    {
  340|  6.69k|        temp4 = _mm_add_epi32(temp4, add_rshift);
  341|  6.69k|        temp5 = _mm_add_epi32(temp5, add_rshift);
  342|  6.69k|        temp6 = _mm_add_epi32(temp6, add_rshift);
  343|  6.69k|        temp7 = _mm_add_epi32(temp7, add_rshift);
  344|  6.69k|        resq_r0 = _mm_srai_epi32(temp4, 4 - u4_qp_div_6);
  345|  6.69k|        resq_r1 = _mm_srai_epi32(temp5, 4 - u4_qp_div_6);
  346|  6.69k|        resq_r2 = _mm_srai_epi32(temp6, 4 - u4_qp_div_6);
  347|  6.69k|        resq_r3 = _mm_srai_epi32(temp7, 4 - u4_qp_div_6);
  348|  6.69k|    }
  349|       |
  350|  10.3k|    if(iq_start_idx == 1) resq_r0 = _mm_insert_epi32(resq_r0, (WORD32) pi2_dc_ld_addr[0], 0);
  ------------------
  |  Branch (350:8): [True: 0, False: 10.3k]
  ------------------
  351|       |    /* Perform Inverse transform */
  352|       |    /*-------------------------------------------------------------*/
  353|       |    /* IDCT [ Horizontal transformation ]                          */
  354|       |    /*-------------------------------------------------------------*/
  355|       |    // Matrix transpose
  356|       |    /*
  357|       |     *  a0 a1 a2 a3
  358|       |     *  b0 b1 b2 b3
  359|       |     *  c0 c1 c2 c3
  360|       |     *  d0 d1 d2 d3
  361|       |     */
  362|  10.3k|    temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1);  // a0 b0 a1 b1
  363|  10.3k|    temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3);  // c0 d0 c1 d1
  364|  10.3k|    temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1);  // a2 b2 a3 b3
  365|  10.3k|    temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3);  // c2 d2 c3 d3
  366|  10.3k|    resq_r0 = _mm_unpacklo_epi64(temp1, temp3);    // a0 b0 c0 d0
  367|  10.3k|    resq_r1 = _mm_unpackhi_epi64(temp1, temp3);    // a1 b1 c1 d1
  368|  10.3k|    resq_r2 = _mm_unpacklo_epi64(temp2, temp4);    // a2 b2 c2 d2
  369|  10.3k|    resq_r3 = _mm_unpackhi_epi64(temp2, temp4);    // a3 b3 c3 d3
  370|       |    // Transform starts -- horizontal transform
  371|       |    /*------------------------------------------------------------------*/
  372|       |    /* z0 = w0 + w2                                             */
  373|  10.3k|    temp0 = _mm_add_epi32(resq_r0, resq_r2);
  374|       |    /* z1 = w0 - w2                                             */
  375|  10.3k|    temp1 = _mm_sub_epi32(resq_r0, resq_r2);
  376|       |    /* z2 = (w1 >> 1) - w3                                      */
  377|  10.3k|    temp2 = _mm_srai_epi32(resq_r1, 1);     //(w1>>1)
  378|  10.3k|    temp2 = _mm_sub_epi32(temp2, resq_r3);  //(w1>>1) - w3
  379|       |    /* z3 = w1 + (w3 >> 1)                                      */
  380|  10.3k|    temp3 = _mm_srai_epi32(resq_r3, 1);  //(w3>>1) + w1
  381|  10.3k|    temp3 = _mm_add_epi32(temp3, resq_r1);
  382|       |    /*----------------------------------------------------------*/
  383|       |    /* x0 = z0 + z3                                             */
  384|  10.3k|    resq_r0 = _mm_add_epi32(temp0, temp3);
  385|       |    /* x1 = z1 + z2                                             */
  386|  10.3k|    resq_r1 = _mm_add_epi32(temp1, temp2);
  387|       |    /* x2 = z1 - z2                                             */
  388|  10.3k|    resq_r2 = _mm_sub_epi32(temp1, temp2);
  389|       |    /* x3 = z0 - z3                                             */
  390|  10.3k|    resq_r3 = _mm_sub_epi32(temp0, temp3);
  391|       |    // Matrix transpose
  392|       |    /*
  393|       |     *  a0 b0 c0 d0
  394|       |     *  a1 b1 c1 d1
  395|       |     *  a2 b2 c2 d2
  396|       |     *  a3 b3 c3 d3
  397|       |     */
  398|  10.3k|    temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1);  // a0 a1 b0 b1
  399|  10.3k|    temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3);  // a2 a3 b2 b3
  400|  10.3k|    temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1);  // c0 c1 d0 d1
  401|  10.3k|    temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3);  // c2 c3 d2 d3
  402|  10.3k|    resq_r0 = _mm_unpacklo_epi64(temp1, temp3);    // a0 a1 a2 a3
  403|  10.3k|    resq_r1 = _mm_unpackhi_epi64(temp1, temp3);    // b0 b1 b2 b3
  404|  10.3k|    resq_r2 = _mm_unpacklo_epi64(temp2, temp4);    // c0 c1 c2 c3
  405|  10.3k|    resq_r3 = _mm_unpackhi_epi64(temp2, temp4);    // d0 d1 d2 d3
  406|       |    // Transform ends -- horizontal transform
  407|       |
  408|       |    /*--------------------------------------------------------------*/
  409|       |    /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6      */
  410|       |    /*                                                              */
  411|       |    /* Add the prediction and store it back to same buffer          */
  412|       |    /*--------------------------------------------------------------*/
  413|       |    /* z0j = y0j + y2j                                                        */
  414|  10.3k|    temp0 = _mm_add_epi32(resq_r0, resq_r2);
  415|       |    /* z1j = y0j - y2j                                                        */
  416|  10.3k|    temp1 = _mm_sub_epi32(resq_r0, resq_r2);
  417|       |    /* z2j = (y1j>>1) - y3j */
  418|  10.3k|    temp2 = _mm_srai_epi32(resq_r1, 1);  //(y1j>>1)
  419|  10.3k|    temp2 = _mm_sub_epi32(temp2, resq_r3);
  420|       |    /* z3j = y1j + (y3j>>1) */
  421|  10.3k|    temp3 = _mm_srai_epi32(resq_r3, 1);  //(y3j>>1)
  422|  10.3k|    temp3 = _mm_add_epi32(temp3, resq_r1);
  423|       |
  424|       |    /* x0j = z0j + z3j                                                        */
  425|  10.3k|    temp4 = _mm_add_epi32(temp0, temp3);
  426|  10.3k|    temp4 = _mm_add_epi32(temp4, value_32);
  427|  10.3k|    temp4 = _mm_srai_epi32(temp4, 6);
  428|  10.3k|    temp4 = _mm_min_epi32(dupmax_4x32b, temp4);
  429|  10.3k|    temp4 = _mm_max_epi32(dupmin_4x32b, temp4);
  430|       |
  431|       |    /* x1j = z1j + z2j                                                        */
  432|  10.3k|    temp5 = _mm_add_epi32(temp1, temp2);
  433|  10.3k|    temp5 = _mm_add_epi32(temp5, value_32);
  434|  10.3k|    temp5 = _mm_srai_epi32(temp5, 6);
  435|  10.3k|    temp5 = _mm_min_epi32(dupmax_4x32b, temp5);
  436|  10.3k|    temp5 = _mm_max_epi32(dupmin_4x32b, temp5);
  437|       |
  438|       |    /* x2j = z1j - z2j                                                        */
  439|  10.3k|    temp6 = _mm_sub_epi32(temp1, temp2);
  440|  10.3k|    temp6 = _mm_add_epi32(temp6, value_32);
  441|  10.3k|    temp6 = _mm_srai_epi32(temp6, 6);
  442|  10.3k|    temp6 = _mm_min_epi32(dupmax_4x32b, temp6);
  443|  10.3k|    temp6 = _mm_max_epi32(dupmin_4x32b, temp6);
  444|       |
  445|       |    /* x3j = z0j - z3j                                                        */
  446|  10.3k|    temp7 = _mm_sub_epi32(temp0, temp3);
  447|  10.3k|    temp7 = _mm_add_epi32(temp7, value_32);
  448|  10.3k|    temp7 = _mm_srai_epi32(temp7, 6);
  449|  10.3k|    temp7 = _mm_min_epi32(dupmax_4x32b, temp7);
  450|  10.3k|    temp7 = _mm_max_epi32(dupmin_4x32b, temp7);
  451|       |
  452|       |    // 32-bit to 16-bit conversion
  453|  10.3k|    temp0 = _mm_packs_epi32(temp4, temp5);
  454|  10.3k|    temp1 = _mm_packs_epi32(temp6, temp7);
  455|       |
  456|  10.3k|    resq_r0 = temp0;
  457|  10.3k|    resq_r1 = _mm_srli_si128(temp0, 8);
  458|  10.3k|    resq_r2 = temp1;
  459|  10.3k|    resq_r3 = _mm_srli_si128(temp1, 8);
  460|       |
  461|  10.3k|    _mm_storel_epi64((__m128i *) pi2_out, resq_r0);
  462|  10.3k|    pi2_out += out_strd;
  463|  10.3k|    _mm_storel_epi64((__m128i *) pi2_out, resq_r1);
  464|  10.3k|    pi2_out += out_strd;
  465|  10.3k|    _mm_storel_epi64((__m128i *) pi2_out, resq_r2);
  466|  10.3k|    pi2_out += out_strd;
  467|  10.3k|    _mm_storel_epi64((__m128i *) pi2_out, resq_r3);
  468|  10.3k|}
isvcd_iquant_itrans_8x8_sse42:
  497|  3.17k|{
  498|  3.17k|    __m128i src_r0;
  499|  3.17k|    __m128i scalemat_r0;
  500|  3.17k|    __m128i zero_8x16b = _mm_setzero_si128();  // all bits reset to zero
  501|  3.17k|    __m128i value_32 = _mm_set1_epi32(32);
  502|  3.17k|    __m128i add_rshift = _mm_set1_epi32((qp_div < 6) ? (1 << (5 - qp_div)) : 0);
  ------------------
  |  Branch (502:41): [True: 2.08k, False: 1.09k]
  ------------------
  503|  3.17k|    __m128i dequant_r0;
  504|  3.17k|    __m128i sign_reg;
  505|  3.17k|    __m128i src_r0_1, src_r0_2;
  506|  3.17k|    __m128i scalemat_r0_1, scalemat_r0_2;
  507|  3.17k|    __m128i temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
  508|  3.17k|    __m128i temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18, temp19, temp20;
  509|       |    // To store dequantization results
  510|  3.17k|    __m128i resq_r0_1, resq_r0_2, resq_r1_1, resq_r1_2, resq_r2_1, resq_r2_2, resq_r3_1, resq_r3_2,
  511|  3.17k|        resq_r4_1, resq_r4_2, resq_r5_1, resq_r5_2, resq_r6_1, resq_r6_2, resq_r7_1, resq_r7_2;
  512|  3.17k|    __m128i dupmax_4x32b = _mm_set1_epi32(RSD_MAX);
  ------------------
  |  |  772|  3.17k|#define RSD_MAX 255
  ------------------
  513|  3.17k|    __m128i dupmin_4x32b = _mm_set1_epi32(RSD_MIN);
  ------------------
  |  |  773|  3.17k|#define RSD_MIN -255
  ------------------
  514|       |
  515|  3.17k|    UNUSED(pi2_tmp);
  ------------------
  |  |   45|  3.17k|#define UNUSED(x) ((void)(x))
  ------------------
  516|  3.17k|    UNUSED(iq_start_idx);
  ------------------
  |  |   45|  3.17k|#define UNUSED(x) ((void)(x))
  ------------------
  517|  3.17k|    UNUSED(pi2_dc_ld_addr);
  ------------------
  |  |   45|  3.17k|#define UNUSED(x) ((void)(x))
  ------------------
  518|       |
  519|       |    /*************************************************************/
  520|       |    /* Dequantization of coefficients. Will be replaced by SIMD  */
  521|       |    /* operations on platform. Note : DC coeff is not scaled     */
  522|       |    /*************************************************************/
  523|       |
  524|       |    // Row 0 processing
  525|       |    // a00 a01 a02 a03 a04 a05 a06 a07 -- the source matrix 0th row
  526|  3.17k|    src_r0 = _mm_loadu_si128((__m128i *) (pi2_src));
  527|       |    // b00 b01 b02 b03 b04 b05 b06 b07 -- the scaling matrix 0th row
  528|  3.17k|    scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat));
  529|  3.17k|    dequant_r0 =
  530|  3.17k|        _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[0]));  // q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
  531|  3.17k|    src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b);     // a00 0 a01 0 a02 0 a03 0 -- 16 bit long
  532|  3.17k|    src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b);     // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
  533|       |    // b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result
  534|  3.17k|    temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0);
  535|       |    // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
  536|  3.17k|    scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b);
  537|       |    // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
  538|  3.17k|    scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b);
  539|       |
  540|       |    // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long
  541|  3.17k|    temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1);
  542|       |    // a04*b04*q4 a05*b05*q5 a06*b06*q6  a07*b07*q7 -- 32 bits long
  543|  3.17k|    temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2);
  544|       |
  545|  3.17k|    if(qp_div >= 6)
  ------------------
  |  Branch (545:8): [True: 1.09k, False: 2.08k]
  ------------------
  546|  1.09k|    {
  547|  1.09k|        resq_r0_1 = _mm_slli_epi32(temp5, qp_div - 6);
  548|  1.09k|        resq_r0_2 = _mm_slli_epi32(temp7, qp_div - 6);
  549|  1.09k|    }
  550|  2.08k|    else
  551|  2.08k|    {
  552|  2.08k|        temp5 = _mm_add_epi32(temp5, add_rshift);
  553|  2.08k|        temp7 = _mm_add_epi32(temp7, add_rshift);
  554|  2.08k|        resq_r0_1 = _mm_srai_epi32(temp5, 6 - qp_div);
  555|  2.08k|        resq_r0_2 = _mm_srai_epi32(temp7, 6 - qp_div);
  556|  2.08k|    }
  557|       |    // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16
  558|       |    // bit long
  559|  3.17k|    resq_r0_1 = _mm_packs_epi32(resq_r0_1, resq_r0_2);
  560|       |    // Row 1 processing
  561|       |    // a00 a01 a02 a03 a04 a05 a06 a07 a08 -- the source matrix 1st row
  562|  3.17k|    src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 8));
  563|       |    // b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 1st row
  564|  3.17k|    scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 8));
  565|       |    // q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
  566|  3.17k|    dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[8]));
  567|  3.17k|    src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b);  // a00 0 a01 0 a02 0 a03 0 -- 16 bit long
  568|  3.17k|    src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b);  // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
  569|       |    // b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result
  570|  3.17k|    temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0);
  571|       |    // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
  572|  3.17k|    scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b);
  573|       |    // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
  574|  3.17k|    scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b);
  575|       |    // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long
  576|  3.17k|    temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1);
  577|       |    // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long
  578|  3.17k|    temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2);
  579|  3.17k|    if(qp_div >= 6)
  ------------------
  |  Branch (579:8): [True: 1.09k, False: 2.08k]
  ------------------
  580|  1.09k|    {
  581|  1.09k|        resq_r1_1 = _mm_slli_epi32(temp5, qp_div - 6);
  582|  1.09k|        resq_r1_2 = _mm_slli_epi32(temp7, qp_div - 6);
  583|  1.09k|    }
  584|  2.08k|    else
  585|  2.08k|    {
  586|  2.08k|        temp5 = _mm_add_epi32(temp5, add_rshift);
  587|  2.08k|        temp7 = _mm_add_epi32(temp7, add_rshift);
  588|  2.08k|        resq_r1_1 = _mm_srai_epi32(temp5, 6 - qp_div);
  589|  2.08k|        resq_r1_2 = _mm_srai_epi32(temp7, 6 - qp_div);
  590|  2.08k|    }
  591|       |    // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7
  592|  3.17k|    resq_r1_1 = _mm_packs_epi32(resq_r1_1, resq_r1_2);
  593|       |    // Row 2 processing
  594|       |    // a00 a01 a02 a03 a04 a05 a06 a07 a08 -- the source matrix 2nd row
  595|  3.17k|    src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 16));
  596|       |    // b00 b01 b02 b03 b04 b05 b06 b07 b08-- the scaling matrix 2nd row
  597|  3.17k|    scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 16));
  598|       |    // q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
  599|  3.17k|    dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[16]));
  600|  3.17k|    src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b);  // a00 0 a01 0 a02 0 a03 0 -- 16 bit long
  601|  3.17k|    src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b);  // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
  602|       |    // b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result
  603|  3.17k|    temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0);
  604|       |    // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
  605|  3.17k|    scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b);
  606|       |    // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
  607|  3.17k|    scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b);
  608|       |    // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long
  609|  3.17k|    temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1);
  610|       |    // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long
  611|  3.17k|    temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2);
  612|  3.17k|    if(qp_div >= 6)
  ------------------
  |  Branch (612:8): [True: 1.09k, False: 2.08k]
  ------------------
  613|  1.09k|    {
  614|  1.09k|        resq_r2_1 = _mm_slli_epi32(temp5, qp_div - 6);
  615|  1.09k|        resq_r2_2 = _mm_slli_epi32(temp7, qp_div - 6);
  616|  1.09k|    }
  617|  2.08k|    else
  618|  2.08k|    {
  619|  2.08k|        temp5 = _mm_add_epi32(temp5, add_rshift);
  620|  2.08k|        temp7 = _mm_add_epi32(temp7, add_rshift);
  621|  2.08k|        resq_r2_1 = _mm_srai_epi32(temp5, 6 - qp_div);
  622|  2.08k|        resq_r2_2 = _mm_srai_epi32(temp7, 6 - qp_div);
  623|  2.08k|    }
  624|       |    // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7
  625|  3.17k|    resq_r2_1 = _mm_packs_epi32(resq_r2_1, resq_r2_2);
  626|       |    // Row 3 processing
  627|       |    // a00 a01 a02 a03 a04 a05 a06 a07 a08 -- the source matrix 3rd row
  628|  3.17k|    src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 24));
  629|       |    // b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 3rd row
  630|  3.17k|    scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 24));
  631|       |    // q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
  632|  3.17k|    dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[24]));
  633|  3.17k|    src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b);  // a00 0 a01 0 a02 0 a03 0 -- 16 bit long
  634|       |    // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
  635|  3.17k|    src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b);
  636|       |    // b00*q0 b01*q1 b02*q2 b03*q3 b04*q4  b05*q5 b06*q6 b07*q7 -- 16 bit result
  637|  3.17k|    temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0);
  638|       |    // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
  639|  3.17k|    scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b);
  640|       |    // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
  641|  3.17k|    scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b);
  642|       |    // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 - 32 bits long
  643|  3.17k|    temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1);
  644|       |    // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long
  645|  3.17k|    temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2);
  646|  3.17k|    if(qp_div >= 6)
  ------------------
  |  Branch (646:8): [True: 1.09k, False: 2.08k]
  ------------------
  647|  1.09k|    {
  648|  1.09k|        resq_r3_1 = _mm_slli_epi32(temp5, qp_div - 6);
  649|  1.09k|        resq_r3_2 = _mm_slli_epi32(temp7, qp_div - 6);
  650|  1.09k|    }
  651|  2.08k|    else
  652|  2.08k|    {
  653|  2.08k|        temp5 = _mm_add_epi32(temp5, add_rshift);
  654|  2.08k|        temp7 = _mm_add_epi32(temp7, add_rshift);
  655|  2.08k|        resq_r3_1 = _mm_srai_epi32(temp5, 6 - qp_div);
  656|  2.08k|        resq_r3_2 = _mm_srai_epi32(temp7, 6 - qp_div);
  657|  2.08k|    }
  658|       |    // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4  a05*b05*q5 a06*b06*q6 a07*b07*q7
  659|  3.17k|    resq_r3_1 = _mm_packs_epi32(resq_r3_1, resq_r3_2);
  660|       |    // Row 4 processing
  661|       |    // a00 a01 a02 a03 a04 a05 a06 a07 a08 --  the source matrix 4th row
  662|  3.17k|    src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 32));
  663|       |    // b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 4th row
  664|  3.17k|    scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 32));
  665|  3.17k|    dequant_r0 = _mm_loadu_si128(
  666|  3.17k|        (__m128i *) (&pu2_weigh_mat[32]));              // q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
  667|  3.17k|    src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b);  // a00 0 a01 0 a02 0 a03 0 -- 16 bit long
  668|  3.17k|    src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b);  // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
  669|       |    // b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result
  670|  3.17k|    temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0);
  671|       |    // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
  672|  3.17k|    scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b);
  673|       |    // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
  674|  3.17k|    scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b);
  675|       |    // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long
  676|  3.17k|    temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1);
  677|       |    // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long
  678|  3.17k|    temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2);
  679|  3.17k|    if(qp_div >= 6)
  ------------------
  |  Branch (679:8): [True: 1.09k, False: 2.08k]
  ------------------
  680|  1.09k|    {
  681|  1.09k|        resq_r4_1 = _mm_slli_epi32(temp5, qp_div - 6);
  682|  1.09k|        resq_r4_2 = _mm_slli_epi32(temp7, qp_div - 6);
  683|  1.09k|    }
  684|  2.08k|    else
  685|  2.08k|    {
  686|  2.08k|        temp5 = _mm_add_epi32(temp5, add_rshift);
  687|  2.08k|        temp7 = _mm_add_epi32(temp7, add_rshift);
  688|  2.08k|        resq_r4_1 = _mm_srai_epi32(temp5, 6 - qp_div);
  689|  2.08k|        resq_r4_2 = _mm_srai_epi32(temp7, 6 - qp_div);
  690|  2.08k|    }
  691|       |    // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7
  692|  3.17k|    resq_r4_1 = _mm_packs_epi32(resq_r4_1, resq_r4_2);
  693|       |    // Row 5 processing
  694|       |    // a00 a01 a02 a03 a04 a05 a06 a07 a08 -- the source matrix 5th row
  695|  3.17k|    src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 40));
  696|       |    // b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 5th row
  697|  3.17k|    scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 40));
  698|       |    // q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
  699|  3.17k|    dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[40]));
  700|  3.17k|    src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b);  // a00 0 a01 0 a02 0 a03 0 -- 16 bit long
  701|  3.17k|    src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b);  // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
  702|       |    // b00*q0 b01*q1 b02*q2 b03*q3 b04*q4  b05*q5 b06*q6 b07*q7 -- 16 bit result
  703|  3.17k|    temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0);
  704|       |    // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
  705|  3.17k|    scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b);
  706|       |    // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
  707|  3.17k|    scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b);
  708|       |    // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long
  709|  3.17k|    temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1);
  710|       |    // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long
  711|  3.17k|    temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2);
  712|  3.17k|    if(qp_div >= 6)
  ------------------
  |  Branch (712:8): [True: 1.09k, False: 2.08k]
  ------------------
  713|  1.09k|    {
  714|  1.09k|        resq_r5_1 = _mm_slli_epi32(temp5, qp_div - 6);
  715|  1.09k|        resq_r5_2 = _mm_slli_epi32(temp7, qp_div - 6);
  716|  1.09k|    }
  717|  2.08k|    else
  718|  2.08k|    {
  719|  2.08k|        temp5 = _mm_add_epi32(temp5, add_rshift);
  720|  2.08k|        temp7 = _mm_add_epi32(temp7, add_rshift);
  721|  2.08k|        resq_r5_1 = _mm_srai_epi32(temp5, 6 - qp_div);
  722|  2.08k|        resq_r5_2 = _mm_srai_epi32(temp7, 6 - qp_div);
  723|  2.08k|    }
  724|       |    /* a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 */
  725|  3.17k|    resq_r5_1 = _mm_packs_epi32(resq_r5_1, resq_r5_2);
  726|       |    // Row 6 processing
  727|       |    /* a00 a01 a02 a03 a04 a05 a06 a07 a08 -- the source matrix 6th row */
  728|  3.17k|    src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 48));
  729|       |    /* b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 6th row */
  730|  3.17k|    scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 48));
  731|  3.17k|    dequant_r0 = _mm_loadu_si128(
  732|  3.17k|        (__m128i *) (&pu2_weigh_mat[48]));              // q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
  733|  3.17k|    src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b);  // a00 0 a01 0 a02 0 a03 0 -- 16 bit long
  734|  3.17k|    src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b);  // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
  735|       |    /* b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result */
  736|  3.17k|    temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0);
  737|       |    // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
  738|  3.17k|    scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b);
  739|       |    // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
  740|  3.17k|    scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b);
  741|       |    /* a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long */
  742|  3.17k|    temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1);
  743|       |    /* a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long */
  744|  3.17k|    temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2);
  745|  3.17k|    if(qp_div >= 6)
  ------------------
  |  Branch (745:8): [True: 1.09k, False: 2.08k]
  ------------------
  746|  1.09k|    {
  747|  1.09k|        resq_r6_1 = _mm_slli_epi32(temp5, qp_div - 6);
  748|  1.09k|        resq_r6_2 = _mm_slli_epi32(temp7, qp_div - 6);
  749|  1.09k|    }
  750|  2.08k|    else
  751|  2.08k|    {
  752|  2.08k|        temp5 = _mm_add_epi32(temp5, add_rshift);
  753|  2.08k|        temp7 = _mm_add_epi32(temp7, add_rshift);
  754|  2.08k|        resq_r6_1 = _mm_srai_epi32(temp5, 6 - qp_div);
  755|  2.08k|        resq_r6_2 = _mm_srai_epi32(temp7, 6 - qp_div);
  756|  2.08k|    }
  757|       |    /* a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 */
  758|  3.17k|    resq_r6_1 = _mm_packs_epi32(resq_r6_1, resq_r6_2);
  759|       |    // Row 7 processing
  760|       |    /* a00 a01 a02 a03 a04 a05 a06 a07 a08 -- the source matrix 7th row */
  761|  3.17k|    src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 56));
  762|       |    /* b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 7th row */
  763|  3.17k|    scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 56));
  764|  3.17k|    dequant_r0 = _mm_loadu_si128(
  765|  3.17k|        (__m128i *) (&pu2_weigh_mat[56]));              // q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
  766|  3.17k|    src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b);  // a00 0 a01 0 a02 0 a03 0 -- 16 bit long
  767|  3.17k|    src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b);  // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
  768|       |
  769|       |    /* b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result */
  770|  3.17k|    temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0);
  771|       |    /* b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long */
  772|  3.17k|    scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b);
  773|       |    /* b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long */
  774|  3.17k|    scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b);
  775|       |    /* a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long */
  776|  3.17k|    temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1);
  777|       |    /* a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long */
  778|  3.17k|    temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2);
  779|  3.17k|    if(qp_div >= 6)
  ------------------
  |  Branch (779:8): [True: 1.09k, False: 2.08k]
  ------------------
  780|  1.09k|    {
  781|  1.09k|        resq_r7_1 = _mm_slli_epi32(temp5, qp_div - 6);
  782|  1.09k|        resq_r7_2 = _mm_slli_epi32(temp7, qp_div - 6);
  783|  1.09k|    }
  784|  2.08k|    else
  785|  2.08k|    {
  786|  2.08k|        temp5 = _mm_add_epi32(temp5, add_rshift);
  787|  2.08k|        temp7 = _mm_add_epi32(temp7, add_rshift);
  788|  2.08k|        resq_r7_1 = _mm_srai_epi32(temp5, 6 - qp_div);
  789|  2.08k|        resq_r7_2 = _mm_srai_epi32(temp7, 6 - qp_div);
  790|  2.08k|    }
  791|       |    /* a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 */
  792|  3.17k|    resq_r7_1 = _mm_packs_epi32(resq_r7_1, resq_r7_2);
  793|       |    /* Perform Inverse transform */
  794|       |    /*--------------------------------------------------------------------*/
  795|       |    /* IDCT [ Horizontal transformation ]                                 */
  796|       |    /*--------------------------------------------------------------------*/
  797|       |    // Matrix transpose
  798|       |    /*
  799|       |     *  a0 a1 a2 a3 a4 a5 a6 a7
  800|       |     *  b0 b1 b2 b3 b4 b5 b6 b7
  801|       |     *  c0 c1 c2 c3 c4 c5 c6 c7
  802|       |     *  d0 d1 d2 d3 d4 d5 d6 d7
  803|       |     */
  804|  3.17k|    temp1 = _mm_unpacklo_epi16(resq_r0_1, resq_r1_1);  // a0 b0 a1 b1 a2 b2 a3 b3
  805|  3.17k|    temp3 = _mm_unpacklo_epi16(resq_r2_1, resq_r3_1);  // c0 d0 c1 d1 c2 d2 c3 d3
  806|  3.17k|    temp2 = _mm_unpackhi_epi16(resq_r0_1, resq_r1_1);  // a4 b4 a5 b5 a6 b6 a7 b7
  807|  3.17k|    temp4 = _mm_unpackhi_epi16(resq_r2_1, resq_r3_1);  // c4 d4 c5 d5 c6 d6 c7 d7
  808|  3.17k|    resq_r0_1 = _mm_unpacklo_epi32(temp1, temp3);      // a0 b0 c0 d0 a1 b1 c1 d1
  809|  3.17k|    resq_r1_1 = _mm_unpackhi_epi32(temp1, temp3);      // a2 b2 c2 d2 a3 b3 c3 d3
  810|  3.17k|    resq_r2_1 = _mm_unpacklo_epi32(temp2, temp4);      // a4 b4 c4 d4 a5 b5 c5 d5
  811|  3.17k|    resq_r3_1 = _mm_unpackhi_epi32(temp2, temp4);      // a6 b6 c6 d6 a7 b7 c7 d7
  812|       |    /*
  813|       |     * e0 e1 e2 e3 e4 e5 e6 e7
  814|       |     * f0 f1 f2 f3 f4 f5 f6 f7
  815|       |     * g0 g1 g2 g3 g4 g5 g6 g7
  816|       |     * h0 h1 h2 h3 h4 h5 h6 h7
  817|       |     */
  818|  3.17k|    temp1 = _mm_unpacklo_epi16(resq_r4_1, resq_r5_1);  // e0 f0 e1 f1 e2 f2 e2 f3
  819|  3.17k|    temp3 = _mm_unpacklo_epi16(resq_r6_1, resq_r7_1);  // g0 h0 g1 h1 g2 h2 g3 h3
  820|  3.17k|    temp2 = _mm_unpackhi_epi16(resq_r4_1, resq_r5_1);  // e4 f4 e5 f5 e6 f6 e7 f7
  821|  3.17k|    temp4 = _mm_unpackhi_epi16(resq_r6_1, resq_r7_1);  // g4 h4 g5 h5 g6 h6 g7 h7
  822|  3.17k|    resq_r4_1 = _mm_unpacklo_epi32(temp1, temp3);      // e0 f0 g0 h0 e1 f1 g1 h1
  823|  3.17k|    resq_r5_1 = _mm_unpackhi_epi32(temp1, temp3);      // e2 f2 g2 h2 e3 f3 g3 h3
  824|  3.17k|    resq_r6_1 = _mm_unpacklo_epi32(temp2, temp4);      // e4 f4 g4 h4 e5 f5 g5 h5
  825|  3.17k|    resq_r7_1 = _mm_unpackhi_epi32(temp2, temp4);      // e6 f6 g6 h6 e7 f7 g7 h7
  826|       |    /*
  827|       |     * a0 b0 c0 d0 a1 b1 c1 d1
  828|       |     * a2 b2 c2 d2 a3 b3 c3 d3
  829|       |     * a4 b4 c4 d4 a5 b5 c5 d5
  830|       |     * a6 b6 c6 d6 a7 b7 c7 d7
  831|       |     * e0 f0 g0 h0 e1 f1 g1 h1
  832|       |     * e2 f2 g2 h2 e3 f3 g3 h3
  833|       |     * e4 f4 g4 h4 e5 f5 g5 h5
  834|       |     * e6 f6 g6 h6 e7 f7 g7 h7
  835|       |     */
  836|  3.17k|    resq_r0_2 = _mm_unpacklo_epi64(resq_r0_1, resq_r4_1);  // a0 b0 c0 d0 e0 f0 g0 h0
  837|  3.17k|    resq_r1_2 = _mm_unpackhi_epi64(resq_r0_1, resq_r4_1);  // a1 b1 c1 d1 e1 f1 g1 h1
  838|  3.17k|    resq_r2_2 = _mm_unpacklo_epi64(resq_r1_1, resq_r5_1);  // a2 b2 c2 d2 e2 f2 g2 h2
  839|  3.17k|    resq_r3_2 = _mm_unpackhi_epi64(resq_r1_1, resq_r5_1);  // a3 b3 c3 d3 e3 f3 g3 h3
  840|  3.17k|    resq_r4_2 = _mm_unpacklo_epi64(resq_r2_1, resq_r6_1);  // a4 b4 c4 d4 e4 f4 g4 h4
  841|  3.17k|    resq_r5_2 = _mm_unpackhi_epi64(resq_r2_1, resq_r6_1);  // a5 b5 c5 d5 e5 f5 g5 h5
  842|  3.17k|    resq_r6_2 = _mm_unpacklo_epi64(resq_r3_1, resq_r7_1);  // a6 b6 c6 d6 e6 f6 g6 h6
  843|  3.17k|    resq_r7_2 = _mm_unpackhi_epi64(resq_r3_1, resq_r7_1);  // a7 b7 c7 d7 e7 f7 g7 h7
  844|       |
  845|  3.17k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r1_2);
  846|  3.17k|    resq_r1_1 = _mm_unpacklo_epi16(resq_r1_2, sign_reg);  // a1 b1 c1 d1 -- 32 bit
  847|  3.17k|    resq_r1_2 = _mm_unpackhi_epi16(resq_r1_2, sign_reg);  // e1 f1 g1 h1 -- 32 bit
  848|  3.17k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r3_2);
  849|  3.17k|    resq_r3_1 = _mm_unpacklo_epi16(resq_r3_2, sign_reg);  // a3 b3 c3 d3 -- 32 bit
  850|  3.17k|    resq_r3_2 = _mm_unpackhi_epi16(resq_r3_2, sign_reg);  // e3 f3 g3 h3 -- 32 bit
  851|  3.17k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r5_2);
  852|  3.17k|    resq_r5_1 = _mm_unpacklo_epi16(resq_r5_2, sign_reg);  // a5 b5 c5 d5 -- 32 bit
  853|  3.17k|    resq_r5_2 = _mm_unpackhi_epi16(resq_r5_2, sign_reg);  // e5 f5 g5 h5 -- 32 bit
  854|  3.17k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r7_2);
  855|  3.17k|    resq_r7_1 = _mm_unpacklo_epi16(resq_r7_2, sign_reg);  // a7 b7 c7 d7 -- 32 bit
  856|  3.17k|    resq_r7_2 = _mm_unpackhi_epi16(resq_r7_2, sign_reg);  // e7 f7 g7 h7 -- 32 bit
  857|       |    // Transform starts -- horizontal transform
  858|       |    /*------------------------------------------------------------------*/
  859|       |    /* y0 = w0 + w4                                                     */
  860|  3.17k|    temp1 = _mm_add_epi16(resq_r0_2, resq_r4_2);
  861|       |    /* y2 = w0 - w4                                                      */
  862|  3.17k|    temp3 = _mm_sub_epi16(resq_r0_2, resq_r4_2);
  863|       |    /* y1 = -w3 + w5 - w7 - (w7 >> 1)                                   */
  864|  3.17k|    temp2 = _mm_sub_epi32(resq_r5_1, resq_r3_1);  //-w3+w5
  865|  3.17k|    temp10 = _mm_sub_epi32(resq_r5_2, resq_r3_2);
  866|  3.17k|    temp4 = _mm_sub_epi32(temp2, resq_r7_1);      //-w3+w5-w7
  867|  3.17k|    temp12 = _mm_sub_epi32(temp10, resq_r7_2);
  868|  3.17k|    temp5 = _mm_srai_epi32(resq_r7_1, 1);         // w7>>1
  869|  3.17k|    temp13 = _mm_srai_epi32(resq_r7_2, 1);
  870|  3.17k|    temp2 = _mm_sub_epi32(temp4, temp5);          //-w3+w5-w7 -(w7>>1)
  871|  3.17k|    temp10 = _mm_sub_epi32(temp12, temp13);
  872|  3.17k|    temp2 = _mm_packs_epi32(temp2, temp10);
  873|       |    /* y3 = w1 + w7 - w3 - (w3 >> 1)                                    */
  874|  3.17k|    temp4 = _mm_add_epi32(resq_r1_1, resq_r7_1);  // w1+w7
  875|  3.17k|    temp12 = _mm_add_epi32(resq_r1_2, resq_r7_2);
  876|  3.17k|    temp4 = _mm_sub_epi32(temp4, resq_r3_1);      // w1+w7-w3
  877|  3.17k|    temp12 = _mm_sub_epi32(temp12, resq_r3_2);
  878|  3.17k|    temp5 = _mm_srai_epi32(resq_r3_1, 1);         // w3>>1
  879|  3.17k|    temp13 = _mm_srai_epi32(resq_r3_2, 1);
  880|  3.17k|    temp4 = _mm_sub_epi32(temp4, temp5);          // w1+w7-w3-(w3>>1)
  881|  3.17k|    temp12 = _mm_sub_epi32(temp12, temp13);
  882|  3.17k|    temp4 = _mm_packs_epi32(temp4, temp12);
  883|       |    /* y4 = (w2 >> 1) - w6                                              */
  884|  3.17k|    temp5 = _mm_srai_epi16(resq_r2_2, 1);     // w2>>1
  885|  3.17k|    temp5 = _mm_sub_epi16(temp5, resq_r6_2);  //(w2>>1)-w6
  886|       |    /* y5 = -w1 + w7 + w5 + (w5 >> 1)                                   */
  887|  3.17k|    temp6 = _mm_sub_epi32(resq_r7_1, resq_r1_1);  // w7-w1
  888|  3.17k|    temp14 = _mm_sub_epi32(resq_r7_2, resq_r1_2);
  889|  3.17k|    temp6 = _mm_add_epi32(temp6, resq_r5_1);      // w7-w1+w5
  890|  3.17k|    temp14 = _mm_add_epi32(temp14, resq_r5_2);
  891|  3.17k|    temp7 = _mm_srai_epi32(resq_r5_1, 1);         // w5>>1
  892|  3.17k|    temp15 = _mm_srai_epi32(resq_r5_2, 1);
  893|  3.17k|    temp6 = _mm_add_epi32(temp6, temp7);          // w7-w1_w5+(w5>>1)
  894|  3.17k|    temp14 = _mm_add_epi32(temp14, temp15);
  895|  3.17k|    temp6 = _mm_packs_epi32(temp6, temp14);
  896|       |    /* y6 = w2 + (w6 >> 1)                                              */
  897|  3.17k|    temp7 = _mm_srai_epi16(resq_r6_2, 1);     // w6>>1
  898|  3.17k|    temp7 = _mm_add_epi16(temp7, resq_r2_2);  //(w6>>1)+w2
  899|       |    /* y7 = w3 + w5 + w1 + (w1 >> 1)                                    */
  900|  3.17k|    temp8 = _mm_add_epi32(resq_r3_1, resq_r5_1);  // w3+w5
  901|  3.17k|    temp16 = _mm_add_epi32(resq_r3_2, resq_r5_2);
  902|  3.17k|    temp8 = _mm_add_epi32(temp8, resq_r1_1);      // w3+w5+w1
  903|  3.17k|    temp16 = _mm_add_epi32(temp16, resq_r1_2);
  904|  3.17k|    temp17 = _mm_srai_epi32(resq_r1_1, 1);        // w1>>1
  905|  3.17k|    temp18 = _mm_srai_epi32(resq_r1_2, 1);
  906|  3.17k|    temp8 = _mm_add_epi32(temp8, temp17);         // w3+w5+w1+(w1>>1)
  907|  3.17k|    temp16 = _mm_add_epi32(temp16, temp18);
  908|  3.17k|    temp8 = _mm_packs_epi32(temp8, temp16);
  909|       |    /*------------------------------------------------------------------*/
  910|       |    /*------------------------------------------------------------------*/
  911|       |    /* z0 = y0 + y6                                                        */
  912|  3.17k|    resq_r0_1 = _mm_add_epi16(temp1, temp7);
  913|       |    /* z1 = y1 + (y7 >> 2)                                                */
  914|  3.17k|    resq_r1_1 = _mm_srai_epi16(temp8, 2);
  915|  3.17k|    resq_r1_1 = _mm_add_epi16(resq_r1_1, temp2);
  916|       |    /* z2 = y2 + y4                                                        */
  917|  3.17k|    resq_r2_1 = _mm_add_epi16(temp3, temp5);
  918|       |    /* z3 = y3 + (y5 >> 2)                                                */
  919|  3.17k|    resq_r3_1 = _mm_srai_epi16(temp6, 2);
  920|  3.17k|    resq_r3_1 = _mm_add_epi16(resq_r3_1, temp4);
  921|       |    /* z4 = y2 - y4                                                        */
  922|  3.17k|    resq_r4_1 = _mm_sub_epi16(temp3, temp5);
  923|       |    /* z5 = (y3 >> 2) - y5                                                 */
  924|  3.17k|    resq_r5_1 = _mm_srai_epi16(temp4, 2);
  925|  3.17k|    resq_r5_1 = _mm_sub_epi16(resq_r5_1, temp6);
  926|       |    /* z6 = y0 - y6                                                     */
  927|  3.17k|    resq_r6_1 = _mm_sub_epi16(temp1, temp7);
  928|       |    /* z7 = y7 - (y1 >> 2)                                                 */
  929|  3.17k|    resq_r7_1 = _mm_srai_epi16(temp2, 2);
  930|  3.17k|    resq_r7_1 = _mm_sub_epi16(temp8, resq_r7_1);
  931|       |    /*------------------------------------------------------------------*/
  932|       |    /*------------------------------------------------------------------*/
  933|       |    /* x0 = z0 + z7                                                        */
  934|  3.17k|    temp1 = _mm_add_epi16(resq_r0_1, resq_r7_1);
  935|       |    /* x1 = z2 + z5                                                        */
  936|  3.17k|    temp2 = _mm_add_epi16(resq_r2_1, resq_r5_1);
  937|       |    /* x2 = z4 + z3                                                        */
  938|  3.17k|    temp3 = _mm_add_epi16(resq_r4_1, resq_r3_1);
  939|       |    /* x3 = z6 + z1                                                        */
  940|  3.17k|    temp4 = _mm_add_epi16(resq_r6_1, resq_r1_1);
  941|       |    /* x4 = z6 - z1                                                        */
  942|  3.17k|    temp5 = _mm_sub_epi16(resq_r6_1, resq_r1_1);
  943|       |    /* x5 = z4 - z3                                                        */
  944|  3.17k|    temp6 = _mm_sub_epi16(resq_r4_1, resq_r3_1);
  945|       |    /* x6 = z2 - z5                                                        */
  946|  3.17k|    temp7 = _mm_sub_epi16(resq_r2_1, resq_r5_1);
  947|       |    /* x7 = z0 - z7                                                        */
  948|  3.17k|    temp8 = _mm_sub_epi16(resq_r0_1, resq_r7_1);
  949|       |    /*------------------------------------------------------------------*/
  950|       |    // Matrix transpose
  951|       |    /*
  952|       |     *  a0 b0 c0 d0 e0 f0 g0 h0
  953|       |     *  a1 b1 c1 d1 e1 f1 g1 h1
  954|       |     *  a2 b2 c2 d2 e2 f2 g2 h2
  955|       |     *  a3 b3 c3 d3 e3 f3 g3 h3
  956|       |     */
  957|  3.17k|    temp17 = _mm_unpacklo_epi16(temp1, temp2);       // a0 a1 b0 b1 c0 c1 d0 d1
  958|  3.17k|    temp19 = _mm_unpacklo_epi16(temp3, temp4);       // a2 a3 b2 b3 c2 c3 d2 d3
  959|  3.17k|    temp18 = _mm_unpackhi_epi16(temp1, temp2);       // e0 e1 f0 f1 g0 g1 h0 h1
  960|  3.17k|    temp20 = _mm_unpackhi_epi16(temp3, temp4);       // e2 e3 f2 f3 g2 g3 h2 h3
  961|       |
  962|  3.17k|    resq_r0_1 = _mm_unpacklo_epi32(temp17, temp19);  // a0 a1 a2 a3 b0 b1 b2 b3
  963|  3.17k|    resq_r1_1 = _mm_unpackhi_epi32(temp17, temp19);  // c0 c1 c2 c3 d0 d1 d2 d3
  964|  3.17k|    resq_r2_1 = _mm_unpacklo_epi32(temp18, temp20);  // e0 e1 e2 e3 f0 f1 f2 f3
  965|  3.17k|    resq_r3_1 = _mm_unpackhi_epi32(temp18, temp20);  // g0 g2 g2 g3 h0 h1 h2 h3
  966|       |    /*
  967|       |     *  a4 b4 c4 d4 e4 f4 g4 h4
  968|       |     *  a5 b5 c5 d5 e5 f5 g5 h5
  969|       |     *  a6 b6 c6 d6 e6 f6 g6 h6
  970|       |     *  a7 b7 c7 d7 e7 f7 g7 h7
  971|       |     */
  972|  3.17k|    temp17 = _mm_unpacklo_epi16(temp5, temp6);       // a4 a5 b4 b5 c4 c5 d4 d5
  973|  3.17k|    temp19 = _mm_unpacklo_epi16(temp7, temp8);       // a6 a7 b6 b7 c6 c7 d6 d7
  974|  3.17k|    temp18 = _mm_unpackhi_epi16(temp5, temp6);       // e4 e5 f4 f5 g4 g5 h4 h5
  975|  3.17k|    temp20 = _mm_unpackhi_epi16(temp7, temp8);       // e6 e7 f6 f7 g6 g7 h6 h7
  976|       |
  977|  3.17k|    resq_r4_1 = _mm_unpacklo_epi32(temp17, temp19);  // a4 a5 a6 a7 b4 b5 b6 b7
  978|  3.17k|    resq_r5_1 = _mm_unpackhi_epi32(temp17, temp19);  // c4 c5 c6 c7 d4 d5 d6 d7
  979|  3.17k|    resq_r6_1 = _mm_unpacklo_epi32(temp18, temp20);  // e4 e5 e6 e7 f4 f5 f6 f7
  980|  3.17k|    resq_r7_1 = _mm_unpackhi_epi32(temp18, temp20);  // g4 g5 g6 g7 h4 h5 h6 h7
  981|       |    /*  a0 a1 a2 a3 b0 b1 b2 b3
  982|       |     *  c0 c1 c2 c3 d0 d1 d2 d3
  983|       |     *  e0 e1 e2 e3 f0 f1 f2 f3
  984|       |     *  g0 g2 g2 g3 h0 h1 h2 h3
  985|       |     *  a4 a5 a6 a7 b4 b5 b6 b7
  986|       |     *  c4 c5 c6 c7 d4 d5 d6 d7
  987|       |     *  e4 e5 e6 e7 f4 f5 f6 f7
  988|       |     *  g4 g5 g6 g7 h4 h5 h6 h7
  989|       |     */
  990|  3.17k|    resq_r0_2 = _mm_unpacklo_epi64(resq_r0_1, resq_r4_1);  // a0 a1 a2 a3 a4 a5 a6 a7
  991|  3.17k|    resq_r1_2 = _mm_unpackhi_epi64(resq_r0_1, resq_r4_1);  // b0 b1 b2 b3 b4 b5 b6 b7
  992|  3.17k|    resq_r2_2 = _mm_unpacklo_epi64(resq_r1_1, resq_r5_1);  // c0 c1 c2 c3 c4 c5 c6 c7
  993|  3.17k|    resq_r3_2 = _mm_unpackhi_epi64(resq_r1_1, resq_r5_1);  // d0 d1 d2 d3 d4 d5 d6 d7
  994|  3.17k|    resq_r4_2 = _mm_unpacklo_epi64(resq_r2_1, resq_r6_1);  // e0 e1 e2 e3 e4 e5 e6 e7
  995|  3.17k|    resq_r5_2 = _mm_unpackhi_epi64(resq_r2_1, resq_r6_1);  // f0 f1 f2 f3 f4 f5 f6 f7
  996|  3.17k|    resq_r6_2 = _mm_unpacklo_epi64(resq_r3_1, resq_r7_1);  // g0 g1 g2 g3 g4 g5 g6 g7
  997|  3.17k|    resq_r7_2 = _mm_unpackhi_epi64(resq_r3_1, resq_r7_1);  // h0 h1 h2 h3 h4 h5 h6 h7
  998|       |
  999|  3.17k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r1_2);
 1000|  3.17k|    resq_r1_1 = _mm_unpacklo_epi16(resq_r1_2, sign_reg);  // a1 b1 c1 d1 -- 32 bit
 1001|  3.17k|    resq_r1_2 = _mm_unpackhi_epi16(resq_r1_2, sign_reg);  // e1 f1 g1 h1 -- 32 bit
 1002|  3.17k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r3_2);
 1003|  3.17k|    resq_r3_1 = _mm_unpacklo_epi16(resq_r3_2, sign_reg);  // a3 b3 c3 d3 -- 32 bit
 1004|  3.17k|    resq_r3_2 = _mm_unpackhi_epi16(resq_r3_2, sign_reg);  // e3 f3 g3 h3 -- 32 bit
 1005|  3.17k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r5_2);
 1006|  3.17k|    resq_r5_1 = _mm_unpacklo_epi16(resq_r5_2, sign_reg);  // a5 b5 c5 d5 -- 32 bit
 1007|  3.17k|    resq_r5_2 = _mm_unpackhi_epi16(resq_r5_2, sign_reg);  // e5 f5 g5 h5 -- 32 bit
 1008|  3.17k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r7_2);
 1009|  3.17k|    resq_r7_1 = _mm_unpacklo_epi16(resq_r7_2, sign_reg);  // a7 b7 c7 d7 -- 32 bit
 1010|  3.17k|    resq_r7_2 = _mm_unpackhi_epi16(resq_r7_2, sign_reg);  // e7 f7 g7 h7 -- 32 bit
 1011|       |
 1012|       |    /*--------------------------------------------------------------------*/
 1013|       |    /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6            */
 1014|       |    /*                                                                    */
 1015|       |
 1016|       |    /* y0j = w0j + w4j                                                     */
 1017|  3.17k|    temp1 = _mm_add_epi16(resq_r0_2, resq_r4_2);
 1018|       |    /* y2j = w0j - w4j                                                      */
 1019|  3.17k|    temp3 = _mm_sub_epi16(resq_r0_2, resq_r4_2);
 1020|       |    /* y1j = -w3j + w5j - w7j - (w7j >> 1)                                   */
 1021|  3.17k|    temp2 = _mm_sub_epi32(resq_r5_1, resq_r3_1);  //-w3+w5
 1022|  3.17k|    temp10 = _mm_sub_epi32(resq_r5_2, resq_r3_2);
 1023|  3.17k|    temp4 = _mm_sub_epi32(temp2, resq_r7_1);      //-w3+w5-w7
 1024|  3.17k|    temp12 = _mm_sub_epi32(temp10, resq_r7_2);
 1025|  3.17k|    temp5 = _mm_srai_epi32(resq_r7_1, 1);         // w7>>1
 1026|  3.17k|    temp13 = _mm_srai_epi32(resq_r7_2, 1);
 1027|  3.17k|    temp2 = _mm_sub_epi32(temp4, temp5);          //-w3+w5-w7 -(w7>>1)
 1028|  3.17k|    temp10 = _mm_sub_epi32(temp12, temp13);
 1029|  3.17k|    temp2 = _mm_packs_epi32(temp2, temp10);
 1030|       |    /* y3j = w1j + w7j - w3j - (w3j >> 1)                                    */
 1031|  3.17k|    temp4 = _mm_add_epi32(resq_r1_1, resq_r7_1);  // w1+w7
 1032|  3.17k|    temp12 = _mm_add_epi32(resq_r1_2, resq_r7_2);
 1033|  3.17k|    temp4 = _mm_sub_epi32(temp4, resq_r3_1);      // w1+w7-w3
 1034|  3.17k|    temp12 = _mm_sub_epi32(temp12, resq_r3_2);
 1035|  3.17k|    temp5 = _mm_srai_epi32(resq_r3_1, 1);         // w3>>1
 1036|  3.17k|    temp13 = _mm_srai_epi32(resq_r3_2, 1);
 1037|  3.17k|    temp4 = _mm_sub_epi32(temp4, temp5);          // w1+w7-w3-(w3>>1)
 1038|  3.17k|    temp12 = _mm_sub_epi32(temp12, temp13);
 1039|  3.17k|    temp4 = _mm_packs_epi32(temp4, temp12);
 1040|       |    /* y4j = (w2j >> 1) - w6j                                              */
 1041|  3.17k|    temp5 = _mm_srai_epi16(resq_r2_2, 1);     // w2>>1
 1042|  3.17k|    temp5 = _mm_sub_epi16(temp5, resq_r6_2);  //(w2>>1)-w6
 1043|       |    /* y5j = -w1j + w7j + w5j + (w5j >> 1)                                   */
 1044|  3.17k|    temp6 = _mm_sub_epi32(resq_r7_1, resq_r1_1);  // w7-w1
 1045|  3.17k|    temp14 = _mm_sub_epi32(resq_r7_2, resq_r1_2);
 1046|  3.17k|    temp6 = _mm_add_epi32(temp6, resq_r5_1);      // w7-w1+w5
 1047|  3.17k|    temp14 = _mm_add_epi32(temp14, resq_r5_2);
 1048|  3.17k|    temp7 = _mm_srai_epi32(resq_r5_1, 1);         // w5>>1
 1049|  3.17k|    temp15 = _mm_srai_epi32(resq_r5_2, 1);
 1050|  3.17k|    temp6 = _mm_add_epi32(temp6, temp7);          // w7-w1_w5+(w5>>1)
 1051|  3.17k|    temp14 = _mm_add_epi32(temp14, temp15);
 1052|  3.17k|    temp6 = _mm_packs_epi32(temp6, temp14);
 1053|       |    /* y6j = w2j + (w6j >> 1)                                              */
 1054|  3.17k|    temp7 = _mm_srai_epi16(resq_r6_2, 1);     // w6>>1
 1055|  3.17k|    temp7 = _mm_add_epi16(temp7, resq_r2_2);  //(w6>>1)+w2
 1056|       |    /* y7j = w3j + w5j + w1j + (w1j >> 1)                                    */
 1057|  3.17k|    temp8 = _mm_add_epi32(resq_r3_1, resq_r5_1);  // w3+w5
 1058|  3.17k|    temp16 = _mm_add_epi32(resq_r3_2, resq_r5_2);
 1059|  3.17k|    temp8 = _mm_add_epi32(temp8, resq_r1_1);      // w3+w5+w1
 1060|  3.17k|    temp16 = _mm_add_epi32(temp16, resq_r1_2);
 1061|  3.17k|    temp17 = _mm_srai_epi32(resq_r1_1, 1);        // w1>>1
 1062|  3.17k|    temp18 = _mm_srai_epi32(resq_r1_2, 1);
 1063|  3.17k|    temp8 = _mm_add_epi32(temp8, temp17);         // w3+w5+w1+(w1>>1)
 1064|  3.17k|    temp16 = _mm_add_epi32(temp16, temp18);
 1065|  3.17k|    temp8 = _mm_packs_epi32(temp8, temp16);
 1066|       |    /*------------------------------------------------------------------*/
 1067|       |    /*------------------------------------------------------------------*/
 1068|       |    /* z0j = y0j + y6j                                                        */
 1069|  3.17k|    resq_r0_1 = _mm_add_epi16(temp1, temp7);
 1070|       |    /* z1j = y1j + (y7j >> 2)                                                */
 1071|  3.17k|    resq_r1_1 = _mm_srai_epi16(temp8, 2);
 1072|  3.17k|    resq_r1_1 = _mm_add_epi16(resq_r1_1, temp2);
 1073|       |    /* z2j = y2j + y4j                                                        */
 1074|  3.17k|    resq_r2_1 = _mm_add_epi16(temp3, temp5);
 1075|       |    /* z3j = y3j + (y5j >> 2)                                                */
 1076|  3.17k|    resq_r3_1 = _mm_srai_epi16(temp6, 2);
 1077|  3.17k|    resq_r3_1 = _mm_add_epi16(resq_r3_1, temp4);
 1078|       |    /* z4j = y2j - y4j                                                        */
 1079|  3.17k|    resq_r4_1 = _mm_sub_epi16(temp3, temp5);
 1080|       |    /* z5j = (y3j >> 2) - y5j                                                 */
 1081|  3.17k|    resq_r5_1 = _mm_srai_epi16(temp4, 2);
 1082|  3.17k|    resq_r5_1 = _mm_sub_epi16(resq_r5_1, temp6);
 1083|       |    /* z6j = y0j - y6j                                                     */
 1084|  3.17k|    resq_r6_1 = _mm_sub_epi16(temp1, temp7);
 1085|       |    /* z7j = y7j - (y1j >> 2)                                                 */
 1086|  3.17k|    resq_r7_1 = _mm_srai_epi16(temp2, 2);
 1087|  3.17k|    resq_r7_1 = _mm_sub_epi16(temp8, resq_r7_1);
 1088|       |    /*------------------------------------------------------------------*/
 1089|       |
 1090|       |    /*------------------------------------------------------------------*/
 1091|       |    /* x0j = z0j + z7j                                                        */
 1092|  3.17k|    temp1 = _mm_add_epi16(resq_r0_1, resq_r7_1);
 1093|  3.17k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp1);
 1094|  3.17k|    temp10 = _mm_unpacklo_epi16(temp1, sign_reg);
 1095|  3.17k|    temp11 = _mm_unpackhi_epi16(temp1, sign_reg);
 1096|  3.17k|    temp10 = _mm_add_epi32(temp10, value_32);
 1097|  3.17k|    temp11 = _mm_add_epi32(temp11, value_32);
 1098|  3.17k|    temp10 = _mm_srai_epi32(temp10, 6);
 1099|  3.17k|    temp10 = _mm_min_epi32(dupmax_4x32b, temp10);
 1100|  3.17k|    temp10 = _mm_max_epi32(dupmin_4x32b, temp10);
 1101|  3.17k|    temp11 = _mm_srai_epi32(temp11, 6);
 1102|  3.17k|    temp11 = _mm_min_epi32(dupmax_4x32b, temp11);
 1103|  3.17k|    temp11 = _mm_max_epi32(dupmin_4x32b, temp11);
 1104|  3.17k|    temp10 = _mm_packs_epi32(temp10, temp11);
 1105|  3.17k|    temp1 = temp10;  //_mm_add_epi16(temp10, pred_r0_1);
 1106|       |    /* x1j = z2j + z5j                                                        */
 1107|  3.17k|    temp2 = _mm_add_epi16(resq_r2_1, resq_r5_1);
 1108|  3.17k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp2);
 1109|  3.17k|    temp10 = _mm_unpacklo_epi16(temp2, sign_reg);
 1110|  3.17k|    temp11 = _mm_unpackhi_epi16(temp2, sign_reg);
 1111|  3.17k|    temp10 = _mm_add_epi32(temp10, value_32);
 1112|  3.17k|    temp11 = _mm_add_epi32(temp11, value_32);
 1113|  3.17k|    temp10 = _mm_srai_epi32(temp10, 6);
 1114|  3.17k|    temp10 = _mm_min_epi32(dupmax_4x32b, temp10);
 1115|  3.17k|    temp10 = _mm_max_epi32(dupmin_4x32b, temp10);
 1116|  3.17k|    temp11 = _mm_srai_epi32(temp11, 6);
 1117|  3.17k|    temp11 = _mm_min_epi32(dupmax_4x32b, temp11);
 1118|  3.17k|    temp11 = _mm_max_epi32(dupmin_4x32b, temp11);
 1119|  3.17k|    temp10 = _mm_packs_epi32(temp10, temp11);
 1120|  3.17k|    temp2 = temp10;  //_mm_add_epi16(temp10, pred_r1_1);
 1121|       |    /* x2j = z4j + z3j                                                        */
 1122|  3.17k|    temp3 = _mm_add_epi16(resq_r4_1, resq_r3_1);
 1123|  3.17k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp3);
 1124|  3.17k|    temp10 = _mm_unpacklo_epi16(temp3, sign_reg);
 1125|  3.17k|    temp11 = _mm_unpackhi_epi16(temp3, sign_reg);
 1126|  3.17k|    temp10 = _mm_add_epi32(temp10, value_32);
 1127|  3.17k|    temp11 = _mm_add_epi32(temp11, value_32);
 1128|  3.17k|    temp10 = _mm_srai_epi32(temp10, 6);
 1129|  3.17k|    temp10 = _mm_min_epi32(dupmax_4x32b, temp10);
 1130|  3.17k|    temp10 = _mm_max_epi32(dupmin_4x32b, temp10);
 1131|  3.17k|    temp11 = _mm_srai_epi32(temp11, 6);
 1132|  3.17k|    temp11 = _mm_min_epi32(dupmax_4x32b, temp11);
 1133|  3.17k|    temp11 = _mm_max_epi32(dupmin_4x32b, temp11);
 1134|  3.17k|    temp10 = _mm_packs_epi32(temp10, temp11);
 1135|  3.17k|    temp3 = temp10;  //_mm_add_epi16(temp10, pred_r2_1);
 1136|       |    /* x3j = z6j + z1j                                                        */
 1137|  3.17k|    temp4 = _mm_add_epi16(resq_r6_1, resq_r1_1);
 1138|  3.17k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp4);
 1139|  3.17k|    temp10 = _mm_unpacklo_epi16(temp4, sign_reg);
 1140|  3.17k|    temp11 = _mm_unpackhi_epi16(temp4, sign_reg);
 1141|  3.17k|    temp10 = _mm_add_epi32(temp10, value_32);
 1142|  3.17k|    temp11 = _mm_add_epi32(temp11, value_32);
 1143|  3.17k|    temp10 = _mm_srai_epi32(temp10, 6);
 1144|  3.17k|    temp10 = _mm_min_epi32(dupmax_4x32b, temp10);
 1145|  3.17k|    temp10 = _mm_max_epi32(dupmin_4x32b, temp10);
 1146|  3.17k|    temp11 = _mm_srai_epi32(temp11, 6);
 1147|  3.17k|    temp11 = _mm_min_epi32(dupmax_4x32b, temp11);
 1148|  3.17k|    temp11 = _mm_max_epi32(dupmin_4x32b, temp11);
 1149|  3.17k|    temp10 = _mm_packs_epi32(temp10, temp11);
 1150|  3.17k|    temp4 = temp10;  //_mm_add_epi16(temp10, pred_r3_1);
 1151|       |    /* x4j = z6j - z1j                                                        */
 1152|  3.17k|    temp5 = _mm_sub_epi16(resq_r6_1, resq_r1_1);
 1153|  3.17k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp5);
 1154|  3.17k|    temp10 = _mm_unpacklo_epi16(temp5, sign_reg);
 1155|  3.17k|    temp11 = _mm_unpackhi_epi16(temp5, sign_reg);
 1156|  3.17k|    temp10 = _mm_add_epi32(temp10, value_32);
 1157|  3.17k|    temp11 = _mm_add_epi32(temp11, value_32);
 1158|  3.17k|    temp10 = _mm_srai_epi32(temp10, 6);
 1159|  3.17k|    temp10 = _mm_min_epi32(dupmax_4x32b, temp10);
 1160|  3.17k|    temp10 = _mm_max_epi32(dupmin_4x32b, temp10);
 1161|  3.17k|    temp11 = _mm_srai_epi32(temp11, 6);
 1162|  3.17k|    temp11 = _mm_min_epi32(dupmax_4x32b, temp11);
 1163|  3.17k|    temp11 = _mm_max_epi32(dupmin_4x32b, temp11);
 1164|  3.17k|    temp10 = _mm_packs_epi32(temp10, temp11);
 1165|  3.17k|    temp5 = temp10;  //_mm_add_epi16(temp10, pred_r4_1);
 1166|       |    /* x5j = z4j - z3j                                                        */
 1167|  3.17k|    temp6 = _mm_sub_epi16(resq_r4_1, resq_r3_1);
 1168|  3.17k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp6);
 1169|  3.17k|    temp10 = _mm_unpacklo_epi16(temp6, sign_reg);
 1170|  3.17k|    temp11 = _mm_unpackhi_epi16(temp6, sign_reg);
 1171|  3.17k|    temp10 = _mm_add_epi32(temp10, value_32);
 1172|  3.17k|    temp11 = _mm_add_epi32(temp11, value_32);
 1173|  3.17k|    temp10 = _mm_srai_epi32(temp10, 6);
 1174|  3.17k|    temp10 = _mm_min_epi32(dupmax_4x32b, temp10);
 1175|  3.17k|    temp10 = _mm_max_epi32(dupmin_4x32b, temp10);
 1176|  3.17k|    temp11 = _mm_srai_epi32(temp11, 6);
 1177|  3.17k|    temp11 = _mm_min_epi32(dupmax_4x32b, temp11);
 1178|  3.17k|    temp11 = _mm_max_epi32(dupmin_4x32b, temp11);
 1179|  3.17k|    temp10 = _mm_packs_epi32(temp10, temp11);
 1180|  3.17k|    temp6 = temp10;  //_mm_add_epi16(temp10, pred_r5_1);
 1181|       |    /* x6j = z2j - z5j                                                        */
 1182|  3.17k|    temp7 = _mm_sub_epi16(resq_r2_1, resq_r5_1);
 1183|  3.17k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp7);
 1184|  3.17k|    temp10 = _mm_unpacklo_epi16(temp7, sign_reg);
 1185|  3.17k|    temp11 = _mm_unpackhi_epi16(temp7, sign_reg);
 1186|  3.17k|    temp10 = _mm_add_epi32(temp10, value_32);
 1187|  3.17k|    temp11 = _mm_add_epi32(temp11, value_32);
 1188|  3.17k|    temp10 = _mm_srai_epi32(temp10, 6);
 1189|  3.17k|    temp10 = _mm_min_epi32(dupmax_4x32b, temp10);
 1190|  3.17k|    temp10 = _mm_max_epi32(dupmin_4x32b, temp10);
 1191|  3.17k|    temp11 = _mm_srai_epi32(temp11, 6);
 1192|  3.17k|    temp11 = _mm_min_epi32(dupmax_4x32b, temp11);
 1193|  3.17k|    temp11 = _mm_max_epi32(dupmin_4x32b, temp11);
 1194|  3.17k|    temp10 = _mm_packs_epi32(temp10, temp11);
 1195|  3.17k|    temp7 = temp10;  //_mm_add_epi16(temp10, pred_r6_1);
 1196|       |    /* x7j = z0j - z7j                                                        */
 1197|  3.17k|    temp8 = _mm_sub_epi16(resq_r0_1, resq_r7_1);
 1198|  3.17k|    sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp8);
 1199|  3.17k|    temp10 = _mm_unpacklo_epi16(temp8, sign_reg);
 1200|  3.17k|    temp11 = _mm_unpackhi_epi16(temp8, sign_reg);
 1201|  3.17k|    temp10 = _mm_add_epi32(temp10, value_32);
 1202|  3.17k|    temp11 = _mm_add_epi32(temp11, value_32);
 1203|  3.17k|    temp10 = _mm_srai_epi32(temp10, 6);
 1204|  3.17k|    temp10 = _mm_min_epi32(dupmax_4x32b, temp10);
 1205|  3.17k|    temp10 = _mm_max_epi32(dupmin_4x32b, temp10);
 1206|  3.17k|    temp11 = _mm_srai_epi32(temp11, 6);
 1207|  3.17k|    temp11 = _mm_min_epi32(dupmax_4x32b, temp11);
 1208|  3.17k|    temp11 = _mm_max_epi32(dupmin_4x32b, temp11);
 1209|  3.17k|    temp10 = _mm_packs_epi32(temp10, temp11);
 1210|  3.17k|    temp8 = temp10;  //_mm_add_epi16(temp10, pred_r7_1);
 1211|       |
 1212|  3.17k|    _mm_storeu_si128((__m128i *) (&pi2_out[0]), temp1);
 1213|  3.17k|    _mm_storeu_si128((__m128i *) (&pi2_out[out_strd]), temp2);
 1214|  3.17k|    _mm_storeu_si128((__m128i *) (&pi2_out[2 * out_strd]), temp3);
 1215|  3.17k|    _mm_storeu_si128((__m128i *) (&pi2_out[3 * out_strd]), temp4);
 1216|  3.17k|    _mm_storeu_si128((__m128i *) (&pi2_out[4 * out_strd]), temp5);
 1217|  3.17k|    _mm_storeu_si128((__m128i *) (&pi2_out[5 * out_strd]), temp6);
 1218|  3.17k|    _mm_storeu_si128((__m128i *) (&pi2_out[6 * out_strd]), temp7);
 1219|  3.17k|    _mm_storeu_si128((__m128i *) (&pi2_out[7 * out_strd]), temp8);
 1220|  3.17k|}
isvcd_iquant_itrans_chroma_4x4_sse42:
 1249|  3.30k|{
 1250|  3.30k|    __m128i src_r0_r1, src_r2_r3;
 1251|  3.30k|    __m128i src_r0, src_r1, src_r2, src_r3;
 1252|  3.30k|    __m128i scalemat_r0_r1, scalemat_r2_r3;
 1253|  3.30k|    __m128i dequant_r0_r1, dequant_r2_r3;
 1254|  3.30k|    __m128i zero_8x16b = _mm_setzero_si128();  // all bits reset to zero
 1255|  3.30k|    __m128i temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
 1256|  3.30k|    __m128i resq_r0, resq_r1, resq_r2, resq_r3;
 1257|  3.30k|    __m128i add_rshift = _mm_set1_epi32((u4_qp_div_6 < 4) ? (1 << (3 - u4_qp_div_6)) : 0);
  ------------------
  |  Branch (1257:41): [True: 2.41k, False: 895]
  ------------------
 1258|  3.30k|    __m128i value_32 = _mm_set1_epi32(32);
 1259|  3.30k|    __m128i dupmax_4x32b = _mm_set1_epi32(RSD_MAX);
  ------------------
  |  |  772|  3.30k|#define RSD_MAX 255
  ------------------
 1260|  3.30k|    __m128i dupmin_4x32b = _mm_set1_epi32(RSD_MIN);
  ------------------
  |  |  773|  3.30k|#define RSD_MIN -255
  ------------------
 1261|       |
 1262|  3.30k|    __m128i chroma_mask_even =
 1263|  3.30k|        _mm_set_epi16(0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff);
 1264|  3.30k|    __m128i chroma_mask_odd =
 1265|  3.30k|        _mm_set_epi16(0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000);
 1266|       |
 1267|  3.30k|    UNUSED(pi2_tmp);
  ------------------
  |  |   45|  3.30k|#define UNUSED(x) ((void)(x))
  ------------------
 1268|       |
 1269|       |    /*************************************************************/
 1270|       |    /* Dequantization of coefficients. Will be replaced by SIMD  */
 1271|       |    /* operations on platform                                    */
 1272|       |    /*************************************************************/
 1273|       |    // a00 a01 a02 a03 a10 a11 a12 a13 -- the source matrix 0th,1st row
 1274|  3.30k|    src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src));
 1275|       |    // a20 a21 a22 a23 a30 a31 a32 a33 --the source matrix 2nd,3rd row
 1276|  3.30k|    src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8));
 1277|       |    // b00 b01 b02 b03 b10 b11 b12 b13 -- the scaling matrix 0th,1st row
 1278|  3.30k|    scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat));
 1279|       |    // b20 b21 b22 b23 b30 b31 b32 b33 -- the scaling matrix 2nd,3rd row
 1280|  3.30k|    scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 8));
 1281|       |    // q00 q01 q02 q03 q10 q11 q12 q13 -- all 16 bits
 1282|  3.30k|    dequant_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat));
 1283|       |    // q20 q21 q22 q23 q30 q31 q32 q33 -- all 16 bits
 1284|  3.30k|    dequant_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat + 8));
 1285|       |    // b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 b12*q12 b13*q13 -- 16 bit result
 1286|  3.30k|    temp0 = _mm_mullo_epi16(scalemat_r0_r1, dequant_r0_r1);
 1287|       |    // b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 b12*q12 b13*q13 -- 16 bit result
 1288|  3.30k|    temp1 = _mm_mullo_epi16(scalemat_r2_r3, dequant_r2_r3);
 1289|       |    // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long
 1290|  3.30k|    temp4 = _mm_unpacklo_epi16(temp0, zero_8x16b);
 1291|       |    // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long
 1292|  3.30k|    temp5 = _mm_unpackhi_epi16(temp0, zero_8x16b);
 1293|       |    // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long
 1294|  3.30k|    temp6 = _mm_unpacklo_epi16(temp1, zero_8x16b);
 1295|       |    // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long
 1296|  3.30k|    temp7 = _mm_unpackhi_epi16(temp1, zero_8x16b);
 1297|       |
 1298|  3.30k|    src_r0 = _mm_unpacklo_epi16(src_r0_r1, zero_8x16b);  // a00 0 a01 0 a02 0 a03 0 -- 16 bit long
 1299|  3.30k|    src_r1 = _mm_unpackhi_epi16(src_r0_r1, zero_8x16b);  // a10 0 a11 0 a12 0 a13 0 -- 16 bit long
 1300|  3.30k|    src_r2 = _mm_unpacklo_epi16(src_r2_r3, zero_8x16b);  // a20 0 a21 0 a22 0 a23 0 -- 16 bit long
 1301|  3.30k|    src_r3 = _mm_unpackhi_epi16(src_r2_r3, zero_8x16b);  // a30 0 a31 0 a32 0 a33 0 -- 16 bit long
 1302|       |
 1303|       |    // a00*b00*q00 a10*b10*q10 a20*b20*q20 a30*b30 q30 -- 32 bits long
 1304|  3.30k|    temp4 = _mm_madd_epi16(src_r0, temp4);
 1305|  3.30k|    temp5 = _mm_madd_epi16(src_r1, temp5);
 1306|  3.30k|    temp6 = _mm_madd_epi16(src_r2, temp6);
 1307|  3.30k|    temp7 = _mm_madd_epi16(src_r3, temp7);
 1308|       |
 1309|  3.30k|    if(u4_qp_div_6 >= 4)
  ------------------
  |  Branch (1309:8): [True: 895, False: 2.41k]
  ------------------
 1310|    895|    {
 1311|    895|        resq_r0 = _mm_slli_epi32(temp4, u4_qp_div_6 - 4);
 1312|    895|        resq_r1 = _mm_slli_epi32(temp5, u4_qp_div_6 - 4);
 1313|    895|        resq_r2 = _mm_slli_epi32(temp6, u4_qp_div_6 - 4);
 1314|    895|        resq_r3 = _mm_slli_epi32(temp7, u4_qp_div_6 - 4);
 1315|    895|    }
 1316|  2.41k|    else
 1317|  2.41k|    {
 1318|  2.41k|        temp4 = _mm_add_epi32(temp4, add_rshift);
 1319|  2.41k|        temp5 = _mm_add_epi32(temp5, add_rshift);
 1320|  2.41k|        temp6 = _mm_add_epi32(temp6, add_rshift);
 1321|  2.41k|        temp7 = _mm_add_epi32(temp7, add_rshift);
 1322|  2.41k|        resq_r0 = _mm_srai_epi32(temp4, 4 - u4_qp_div_6);
 1323|  2.41k|        resq_r1 = _mm_srai_epi32(temp5, 4 - u4_qp_div_6);
 1324|  2.41k|        resq_r2 = _mm_srai_epi32(temp6, 4 - u4_qp_div_6);
 1325|  2.41k|        resq_r3 = _mm_srai_epi32(temp7, 4 - u4_qp_div_6);
 1326|  2.41k|    }
 1327|       |
 1328|  3.30k|    resq_r0 = _mm_insert_epi32(resq_r0, (WORD32) pi2_dc_src[0], 0);
 1329|       |    /* Perform Inverse transform */
 1330|       |    /*-------------------------------------------------------------*/
 1331|       |    /* IDCT [ Horizontal transformation ]                          */
 1332|       |    /*-------------------------------------------------------------*/
 1333|       |    // Matrix transpose
 1334|       |    /*
 1335|       |     *  a0 a1 a2 a3
 1336|       |     *  b0 b1 b2 b3
 1337|       |     *  c0 c1 c2 c3
 1338|       |     *  d0 d1 d2 d3
 1339|       |     */
 1340|  3.30k|    temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1);  // a0 b0 a1 b1
 1341|  3.30k|    temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3);  // c0 d0 c1 d1
 1342|  3.30k|    temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1);  // a2 b2 a3 b3
 1343|  3.30k|    temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3);  // c2 d2 c3 d3
 1344|  3.30k|    resq_r0 = _mm_unpacklo_epi64(temp1, temp3);    // a0 b0 c0 d0
 1345|  3.30k|    resq_r1 = _mm_unpackhi_epi64(temp1, temp3);    // a1 b1 c1 d1
 1346|  3.30k|    resq_r2 = _mm_unpacklo_epi64(temp2, temp4);    // a2 b2 c2 d2
 1347|  3.30k|    resq_r3 = _mm_unpackhi_epi64(temp2, temp4);    // a3 b3 c3 d3
 1348|       |    // Transform starts -- horizontal transform
 1349|       |    /*------------------------------------------------------------------*/
 1350|       |    /* z0 = w0 + w2                                             */
 1351|  3.30k|    temp0 = _mm_add_epi32(resq_r0, resq_r2);
 1352|       |    /* z1 = w0 - w2                                             */
 1353|  3.30k|    temp1 = _mm_sub_epi32(resq_r0, resq_r2);
 1354|       |    /* z2 = (w1 >> 1) - w3                                      */
 1355|  3.30k|    temp2 = _mm_srai_epi32(resq_r1, 1);     //(w1>>1)
 1356|  3.30k|    temp2 = _mm_sub_epi32(temp2, resq_r3);  //(w1>>1) - w3
 1357|       |    /* z3 = w1 + (w3 >> 1)                                      */
 1358|  3.30k|    temp3 = _mm_srai_epi32(resq_r3, 1);  //(w3>>1) + w1
 1359|  3.30k|    temp3 = _mm_add_epi32(temp3, resq_r1);
 1360|       |    /*----------------------------------------------------------*/
 1361|       |    /* x0 = z0 + z3                                             */
 1362|  3.30k|    resq_r0 = _mm_add_epi32(temp0, temp3);
 1363|       |    /* x1 = z1 + z2                                             */
 1364|  3.30k|    resq_r1 = _mm_add_epi32(temp1, temp2);
 1365|       |    /* x2 = z1 - z2                                             */
 1366|  3.30k|    resq_r2 = _mm_sub_epi32(temp1, temp2);
 1367|       |    /* x3 = z0 - z3                                             */
 1368|  3.30k|    resq_r3 = _mm_sub_epi32(temp0, temp3);
 1369|       |    // Matrix transpose
 1370|       |    /*
 1371|       |     *  a0 b0 c0 d0
 1372|       |     *  a1 b1 c1 d1
 1373|       |     *  a2 b2 c2 d2
 1374|       |     *  a3 b3 c3 d3
 1375|       |     */
 1376|  3.30k|    temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1);  // a0 a1 b0 b1
 1377|  3.30k|    temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3);  // a2 a3 b2 b3
 1378|  3.30k|    temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1);  // c0 c1 d0 d1
 1379|  3.30k|    temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3);  // c2 c3 d2 d3
 1380|  3.30k|    resq_r0 = _mm_unpacklo_epi64(temp1, temp3);    // a0 a1 a2 a3
 1381|  3.30k|    resq_r1 = _mm_unpackhi_epi64(temp1, temp3);    // b0 b1 b2 b3
 1382|  3.30k|    resq_r2 = _mm_unpacklo_epi64(temp2, temp4);    // c0 c1 c2 c3
 1383|  3.30k|    resq_r3 = _mm_unpackhi_epi64(temp2, temp4);    // d0 d1 d2 d3
 1384|       |    // Transform ends -- horizontal transform
 1385|       |
 1386|       |    /*--------------------------------------------------------------*/
 1387|       |    /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6      */
 1388|       |    /* Add the prediction and store it back to same buffer          */
 1389|       |    /*--------------------------------------------------------------*/
 1390|       |    /* z0j = y0j + y2j                                                        */
 1391|  3.30k|    temp0 = _mm_add_epi32(resq_r0, resq_r2);
 1392|       |    /* z1j = y0j - y2j                                                        */
 1393|  3.30k|    temp1 = _mm_sub_epi32(resq_r0, resq_r2);
 1394|       |    /* z2j = (y1j>>1) - y3j */
 1395|  3.30k|    temp2 = _mm_srai_epi32(resq_r1, 1);  //(y1j>>1)
 1396|  3.30k|    temp2 = _mm_sub_epi32(temp2, resq_r3);
 1397|       |    /* z3j = y1j + (y3j>>1) */
 1398|  3.30k|    temp3 = _mm_srai_epi32(resq_r3, 1);  //(y3j>>1)
 1399|  3.30k|    temp3 = _mm_add_epi32(temp3, resq_r1);
 1400|       |
 1401|       |    /* x0j = z0j + z3j                                                        */
 1402|  3.30k|    temp4 = _mm_add_epi32(temp0, temp3);
 1403|  3.30k|    temp4 = _mm_add_epi32(temp4, value_32);
 1404|  3.30k|    temp4 = _mm_srai_epi32(temp4, 6);
 1405|  3.30k|    temp4 = _mm_min_epi32(dupmax_4x32b, temp4);
 1406|  3.30k|    temp4 = _mm_max_epi32(dupmin_4x32b, temp4);
 1407|       |
 1408|       |    /* x1j = z1j + z2j                                                        */
 1409|  3.30k|    temp5 = _mm_add_epi32(temp1, temp2);
 1410|  3.30k|    temp5 = _mm_add_epi32(temp5, value_32);
 1411|  3.30k|    temp5 = _mm_srai_epi32(temp5, 6);
 1412|  3.30k|    temp5 = _mm_min_epi32(dupmax_4x32b, temp5);
 1413|  3.30k|    temp5 = _mm_max_epi32(dupmin_4x32b, temp5);
 1414|       |
 1415|       |    /* x2j = z1j - z2j                                                        */
 1416|  3.30k|    temp6 = _mm_sub_epi32(temp1, temp2);
 1417|  3.30k|    temp6 = _mm_add_epi32(temp6, value_32);
 1418|  3.30k|    temp6 = _mm_srai_epi32(temp6, 6);
 1419|  3.30k|    temp6 = _mm_min_epi32(dupmax_4x32b, temp6);
 1420|  3.30k|    temp6 = _mm_max_epi32(dupmin_4x32b, temp6);
 1421|       |
 1422|       |    /* x3j = z0j - z3j                                                        */
 1423|  3.30k|    temp7 = _mm_sub_epi32(temp0, temp3);
 1424|  3.30k|    temp7 = _mm_add_epi32(temp7, value_32);
 1425|  3.30k|    temp7 = _mm_srai_epi32(temp7, 6);
 1426|  3.30k|    temp7 = _mm_min_epi32(dupmax_4x32b, temp7);
 1427|  3.30k|    temp7 = _mm_max_epi32(dupmin_4x32b, temp7);
 1428|       |
 1429|       |    // 32-bit to 16-bit conversion
 1430|  3.30k|    temp0 = _mm_packs_epi32(temp4, temp5);
 1431|  3.30k|    temp1 = _mm_packs_epi32(temp6, temp7);
 1432|       |
 1433|  3.30k|    resq_r0 = temp0;
 1434|  3.30k|    resq_r1 = _mm_srli_si128(temp0, 8);
 1435|  3.30k|    resq_r2 = temp1;
 1436|  3.30k|    resq_r3 = _mm_srli_si128(temp1, 8);
 1437|       |
 1438|       |    // a00 a01 a02 a03 a10 a11 a12 a13 -- the source matrix 0th,1st row
 1439|  3.30k|    src_r0 = _mm_loadu_si128((__m128i *) (pi2_out));
 1440|       |    // a20 a21 a22 a23 a30 a31 a32 a33 -- the source matrix 2nd,3rd row
 1441|  3.30k|    src_r1 = _mm_loadu_si128((__m128i *) (pi2_out + (1 * out_strd)));
 1442|       |
 1443|  3.30k|    src_r2 = _mm_loadu_si128((__m128i *) (pi2_out + (2 * out_strd)));
 1444|  3.30k|    src_r3 = _mm_loadu_si128((__m128i *) (pi2_out + (3 * out_strd)));
 1445|       |
 1446|  3.30k|    resq_r0 = _mm_and_si128(temp4, chroma_mask_even);  // macro 0 macro 0 ..
 1447|  3.30k|    resq_r1 = _mm_and_si128(temp5, chroma_mask_even);
 1448|  3.30k|    resq_r2 = _mm_and_si128(temp6, chroma_mask_even);
 1449|  3.30k|    resq_r3 = _mm_and_si128(temp7, chroma_mask_even);
 1450|       |
 1451|  3.30k|    src_r0 = _mm_and_si128(src_r0, chroma_mask_odd);  // 0 src1 0 src2 0 ...
 1452|  3.30k|    src_r1 = _mm_and_si128(src_r1, chroma_mask_odd);
 1453|  3.30k|    src_r2 = _mm_and_si128(src_r2, chroma_mask_odd);
 1454|  3.30k|    src_r3 = _mm_and_si128(src_r3, chroma_mask_odd);
 1455|       |
 1456|  3.30k|    src_r0 = _mm_add_epi16(src_r0, resq_r0);  // macro  src1 macro src2 macro ...
 1457|  3.30k|    src_r1 = _mm_add_epi16(src_r1, resq_r1);
 1458|  3.30k|    src_r2 = _mm_add_epi16(src_r2, resq_r2);
 1459|  3.30k|    src_r3 = _mm_add_epi16(src_r3, resq_r3);
 1460|       |
 1461|  3.30k|    _mm_storeu_si128((__m128i *) (&pi2_out[0]), src_r0);
 1462|  3.30k|    _mm_storeu_si128((__m128i *) (&pi2_out[out_strd]), src_r1);
 1463|  3.30k|    _mm_storeu_si128((__m128i *) (&pi2_out[2 * out_strd]), src_r2);
 1464|  3.30k|    _mm_storeu_si128((__m128i *) (&pi2_out[3 * out_strd]), src_r3);
 1465|  3.30k|}

isvcd_pred_residual_recon_4x4_sse42:
   83|  74.0k|{
   84|  74.0k|    __m128i pred_16x8b_0, pred_8x16b_0, rsd_8x16b_0, out_8x16b_0, out_16x8b_0;
   85|  74.0k|    __m128i pred_16x8b_1, pred_8x16b_1, rsd_8x16b_1, out_8x16b_1, out_16x8b_1;
   86|  74.0k|    __m128i pred_16x8b_2, pred_8x16b_2, rsd_8x16b_2, out_8x16b_2, out_16x8b_2;
   87|  74.0k|    __m128i pred_16x8b_3, pred_8x16b_3, rsd_8x16b_3, out_8x16b_3, out_16x8b_3;
   88|  74.0k|    __m128i rsd_8x16b_01, rsd_8x16b_23;
   89|       |
   90|  74.0k|    __m128i zero_8x16b = _mm_setzero_si128();
   91|  74.0k|    WORD32 i4_nnz, row_01, row_23;
   92|       |
   93|  74.0k|    pred_16x8b_0 = _mm_loadu_si128((__m128i *) (pu1_pred));
   94|  74.0k|    pred_16x8b_1 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd));
   95|  74.0k|    pred_16x8b_2 = _mm_loadu_si128((__m128i *) (pu1_pred + (pred_strd << 1)));
   96|  74.0k|    pred_16x8b_3 = _mm_loadu_si128((__m128i *) (pu1_pred + (pred_strd << 1) + pred_strd));
   97|       |
   98|  74.0k|    pred_8x16b_0 = _mm_cvtepu8_epi16(pred_16x8b_0);
   99|  74.0k|    pred_8x16b_1 = _mm_cvtepu8_epi16(pred_16x8b_1);
  100|  74.0k|    pred_8x16b_2 = _mm_cvtepu8_epi16(pred_16x8b_2);
  101|  74.0k|    pred_8x16b_3 = _mm_cvtepu8_epi16(pred_16x8b_3);
  102|       |
  103|  74.0k|    rsd_8x16b_0 = _mm_loadu_si128((__m128i *) (pi2_rsd));
  104|  74.0k|    rsd_8x16b_1 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd));
  105|  74.0k|    rsd_8x16b_2 = _mm_loadu_si128((__m128i *) (pi2_rsd + (rsd_strd << 1)));
  106|  74.0k|    rsd_8x16b_3 = _mm_loadu_si128((__m128i *) (pi2_rsd + (rsd_strd << 1) + rsd_strd));
  107|       |
  108|  74.0k|    rsd_8x16b_01 = _mm_unpacklo_epi64(rsd_8x16b_0, rsd_8x16b_1);
  109|  74.0k|    rsd_8x16b_23 = _mm_unpacklo_epi64(rsd_8x16b_2, rsd_8x16b_3);
  110|       |
  111|  74.0k|    row_01 = _mm_test_all_ones(
  112|  74.0k|        _mm_cmpeq_epi16(rsd_8x16b_01, zero_8x16b));  // return 1 if all zeros, else 0
  113|  74.0k|    row_23 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_23, zero_8x16b));
  114|       |
  115|  74.0k|    out_8x16b_0 = _mm_add_epi16(pred_8x16b_0, rsd_8x16b_0);
  116|  74.0k|    out_8x16b_1 = _mm_add_epi16(pred_8x16b_1, rsd_8x16b_1);
  117|  74.0k|    out_8x16b_2 = _mm_add_epi16(pred_8x16b_2, rsd_8x16b_2);
  118|  74.0k|    out_8x16b_3 = _mm_add_epi16(pred_8x16b_3, rsd_8x16b_3);
  119|       |
  120|  74.0k|    out_16x8b_0 = _mm_packus_epi16(out_8x16b_0, zero_8x16b);
  121|  74.0k|    out_16x8b_1 = _mm_packus_epi16(out_8x16b_1, zero_8x16b);
  122|  74.0k|    out_16x8b_2 = _mm_packus_epi16(out_8x16b_2, zero_8x16b);
  123|  74.0k|    out_16x8b_3 = _mm_packus_epi16(out_8x16b_3, zero_8x16b);
  124|       |
  125|  74.0k|    *((WORD32 *) (pu1_out)) = _mm_cvtsi128_si32(out_16x8b_0);
  126|  74.0k|    *((WORD32 *) (pu1_out + out_strd)) = _mm_cvtsi128_si32(out_16x8b_1);
  127|  74.0k|    *((WORD32 *) (pu1_out + (out_strd << 1))) = _mm_cvtsi128_si32(out_16x8b_2);
  128|  74.0k|    *((WORD32 *) (pu1_out + (out_strd * 3))) = _mm_cvtsi128_si32(out_16x8b_3);
  129|  74.0k|    i4_nnz = !(row_01 && row_23);
  ------------------
  |  Branch (129:16): [True: 73.1k, False: 898]
  |  Branch (129:26): [True: 72.9k, False: 234]
  ------------------
  130|       |
  131|  74.0k|    return i4_nnz;
  132|  74.0k|}
isvcd_pred_residual_recon_8x8_sse42:
  158|  6.25k|{
  159|  6.25k|    __m128i pred_16x8b_0, pred_8x16b_0, rsd_8x16b_0, out_8x16b_0, out_16x8b_0;
  160|  6.25k|    __m128i pred_16x8b_1, pred_8x16b_1, rsd_8x16b_1, out_8x16b_1, out_16x8b_1;
  161|  6.25k|    __m128i pred_16x8b_2, pred_8x16b_2, rsd_8x16b_2, out_8x16b_2, out_16x8b_2;
  162|  6.25k|    __m128i pred_16x8b_3, pred_8x16b_3, rsd_8x16b_3, out_8x16b_3, out_16x8b_3;
  163|  6.25k|    __m128i pred_16x8b_4, pred_8x16b_4, rsd_8x16b_4, out_8x16b_4, out_16x8b_4;
  164|  6.25k|    __m128i pred_16x8b_5, pred_8x16b_5, rsd_8x16b_5, out_8x16b_5, out_16x8b_5;
  165|  6.25k|    __m128i pred_16x8b_6, pred_8x16b_6, rsd_8x16b_6, out_8x16b_6, out_16x8b_6;
  166|  6.25k|    __m128i pred_16x8b_7, pred_8x16b_7, rsd_8x16b_7, out_8x16b_7, out_16x8b_7;
  167|  6.25k|    __m128i rsd_8x16b_01_b0, rsd_8x16b_23_b0, rsd_8x16b_45_b2, rsd_8x16b_67_b2;
  168|  6.25k|    __m128i rsd_8x16b_01_b1, rsd_8x16b_23_b1, rsd_8x16b_45_b3, rsd_8x16b_67_b3;
  169|       |
  170|  6.25k|    WORD32 row_01_b0, row_23_b0, row_45_b2, row_67_b2;
  171|  6.25k|    WORD32 row_01_b1, row_23_b1, row_45_b3, row_67_b3;
  172|  6.25k|    WORD32 i4_nnz, i4_nnz_b0, i4_nnz_b1, i4_nnz_b2, i4_nnz_b3;
  173|       |
  174|  6.25k|    __m128i zero_8x16b = _mm_setzero_si128();
  175|       |
  176|  6.25k|    WORD32 pred_strd2 = (pred_strd << 1);
  177|  6.25k|    WORD32 pred_strd4 = (pred_strd << 2);
  178|  6.25k|    WORD32 rsd_strd2 = (rsd_strd << 1);
  179|  6.25k|    WORD32 rsd_strd4 = (rsd_strd << 2);
  180|  6.25k|    WORD32 out_strd2 = (out_strd << 1);
  181|  6.25k|    WORD32 out_strd4 = (out_strd << 2);
  182|       |
  183|  6.25k|    pred_16x8b_0 = _mm_loadu_si128((__m128i *) (pu1_pred));
  184|  6.25k|    pred_16x8b_1 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd));
  185|  6.25k|    pred_16x8b_2 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd2));
  186|  6.25k|    pred_16x8b_3 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd2 + pred_strd));
  187|  6.25k|    pred_16x8b_4 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd4));
  188|  6.25k|    pred_16x8b_5 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd4 + pred_strd));
  189|  6.25k|    pred_16x8b_6 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd4 + pred_strd2));
  190|  6.25k|    pred_16x8b_7 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd4 + pred_strd2 + pred_strd));
  191|       |
  192|  6.25k|    pred_8x16b_0 = _mm_cvtepu8_epi16(pred_16x8b_0);
  193|  6.25k|    pred_8x16b_1 = _mm_cvtepu8_epi16(pred_16x8b_1);
  194|  6.25k|    pred_8x16b_2 = _mm_cvtepu8_epi16(pred_16x8b_2);
  195|  6.25k|    pred_8x16b_3 = _mm_cvtepu8_epi16(pred_16x8b_3);
  196|  6.25k|    pred_8x16b_4 = _mm_cvtepu8_epi16(pred_16x8b_4);
  197|  6.25k|    pred_8x16b_5 = _mm_cvtepu8_epi16(pred_16x8b_5);
  198|  6.25k|    pred_8x16b_6 = _mm_cvtepu8_epi16(pred_16x8b_6);
  199|  6.25k|    pred_8x16b_7 = _mm_cvtepu8_epi16(pred_16x8b_7);
  200|       |
  201|  6.25k|    rsd_8x16b_0 = _mm_loadu_si128((__m128i *) (pi2_rsd));
  202|  6.25k|    rsd_8x16b_1 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd));
  203|  6.25k|    rsd_8x16b_2 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd2));
  204|  6.25k|    rsd_8x16b_3 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd2 + rsd_strd));
  205|  6.25k|    rsd_8x16b_4 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd4));
  206|  6.25k|    rsd_8x16b_5 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd4 + rsd_strd));
  207|  6.25k|    rsd_8x16b_6 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd4 + rsd_strd2));
  208|  6.25k|    rsd_8x16b_7 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd4 + rsd_strd2 + rsd_strd));
  209|       |
  210|  6.25k|    rsd_8x16b_01_b0 = _mm_unpacklo_epi64(rsd_8x16b_0, rsd_8x16b_1);
  211|  6.25k|    rsd_8x16b_23_b0 = _mm_unpacklo_epi64(rsd_8x16b_2, rsd_8x16b_3);
  212|  6.25k|    rsd_8x16b_01_b1 = _mm_unpackhi_epi64(rsd_8x16b_0, rsd_8x16b_1);
  213|  6.25k|    rsd_8x16b_23_b1 = _mm_unpackhi_epi64(rsd_8x16b_2, rsd_8x16b_3);
  214|       |
  215|  6.25k|    rsd_8x16b_45_b2 = _mm_unpacklo_epi64(rsd_8x16b_4, rsd_8x16b_5);
  216|  6.25k|    rsd_8x16b_67_b2 = _mm_unpacklo_epi64(rsd_8x16b_6, rsd_8x16b_7);
  217|  6.25k|    rsd_8x16b_45_b3 = _mm_unpackhi_epi64(rsd_8x16b_4, rsd_8x16b_5);
  218|  6.25k|    rsd_8x16b_67_b3 = _mm_unpackhi_epi64(rsd_8x16b_6, rsd_8x16b_7);
  219|       |
  220|  6.25k|    row_01_b0 = _mm_test_all_ones(
  221|  6.25k|        _mm_cmpeq_epi16(rsd_8x16b_01_b0, zero_8x16b));  // return 1 if all zeros, else 0
  222|  6.25k|    row_23_b0 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_23_b0, zero_8x16b));
  223|  6.25k|    row_01_b1 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_01_b1, zero_8x16b));
  224|  6.25k|    row_23_b1 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_23_b1, zero_8x16b));
  225|  6.25k|    row_45_b2 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_45_b2, zero_8x16b));
  226|  6.25k|    row_67_b2 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_67_b2, zero_8x16b));
  227|  6.25k|    row_45_b3 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_45_b3, zero_8x16b));
  228|  6.25k|    row_67_b3 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_67_b3, zero_8x16b));
  229|       |
  230|  6.25k|    out_8x16b_0 = _mm_add_epi16(pred_8x16b_0, rsd_8x16b_0);
  231|  6.25k|    out_8x16b_1 = _mm_add_epi16(pred_8x16b_1, rsd_8x16b_1);
  232|  6.25k|    out_8x16b_2 = _mm_add_epi16(pred_8x16b_2, rsd_8x16b_2);
  233|  6.25k|    out_8x16b_3 = _mm_add_epi16(pred_8x16b_3, rsd_8x16b_3);
  234|  6.25k|    out_8x16b_4 = _mm_add_epi16(pred_8x16b_4, rsd_8x16b_4);
  235|  6.25k|    out_8x16b_5 = _mm_add_epi16(pred_8x16b_5, rsd_8x16b_5);
  236|  6.25k|    out_8x16b_6 = _mm_add_epi16(pred_8x16b_6, rsd_8x16b_6);
  237|  6.25k|    out_8x16b_7 = _mm_add_epi16(pred_8x16b_7, rsd_8x16b_7);
  238|       |
  239|  6.25k|    out_16x8b_0 = _mm_packus_epi16(out_8x16b_0, zero_8x16b);
  240|  6.25k|    out_16x8b_1 = _mm_packus_epi16(out_8x16b_1, zero_8x16b);
  241|  6.25k|    out_16x8b_2 = _mm_packus_epi16(out_8x16b_2, zero_8x16b);
  242|  6.25k|    out_16x8b_3 = _mm_packus_epi16(out_8x16b_3, zero_8x16b);
  243|  6.25k|    out_16x8b_4 = _mm_packus_epi16(out_8x16b_4, zero_8x16b);
  244|  6.25k|    out_16x8b_5 = _mm_packus_epi16(out_8x16b_5, zero_8x16b);
  245|  6.25k|    out_16x8b_6 = _mm_packus_epi16(out_8x16b_6, zero_8x16b);
  246|  6.25k|    out_16x8b_7 = _mm_packus_epi16(out_8x16b_7, zero_8x16b);
  247|       |
  248|  6.25k|    _mm_storel_epi64((__m128i *) (pu1_out), out_16x8b_0);
  249|  6.25k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd), out_16x8b_1);
  250|  6.25k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd2), out_16x8b_2);
  251|  6.25k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd2 + out_strd), out_16x8b_3);
  252|  6.25k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd4), out_16x8b_4);
  253|  6.25k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd4 + out_strd), out_16x8b_5);
  254|  6.25k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd4 + out_strd2), out_16x8b_6);
  255|  6.25k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd4 + out_strd2 + out_strd), out_16x8b_7);
  256|       |
  257|  6.25k|    i4_nnz_b0 = (!(row_01_b0 && row_23_b0));
  ------------------
  |  Branch (257:20): [True: 5.79k, False: 458]
  |  Branch (257:33): [True: 5.79k, False: 0]
  ------------------
  258|  6.25k|    i4_nnz_b1 = (!(row_01_b1 && row_23_b1)) << 1;
  ------------------
  |  Branch (258:20): [True: 5.79k, False: 458]
  |  Branch (258:33): [True: 5.79k, False: 0]
  ------------------
  259|  6.25k|    i4_nnz_b2 = (!(row_45_b2 && row_67_b2)) << 4;
  ------------------
  |  Branch (259:20): [True: 5.79k, False: 457]
  |  Branch (259:33): [True: 5.79k, False: 1]
  ------------------
  260|  6.25k|    i4_nnz_b3 = (!(row_45_b3 && row_67_b3)) << 5;
  ------------------
  |  Branch (260:20): [True: 5.79k, False: 457]
  |  Branch (260:33): [True: 5.79k, False: 0]
  ------------------
  261|       |
  262|  6.25k|    i4_nnz = (i4_nnz_b0 | i4_nnz_b1 | i4_nnz_b2 | i4_nnz_b3);
  263|  6.25k|    return i4_nnz;
  264|  6.25k|}
isvcd_pred_residual_recon_16x16_sse42:
  290|  53.5k|{
  291|  53.5k|    __m128i pred_16x8b_0, pred_8x16b_0, rsd_8x16b_0, out_8x16b_0, out_16x8b_0;
  292|  53.5k|    __m128i pred_16x8b_1, pred_8x16b_1, rsd_8x16b_1, out_8x16b_1, out_16x8b_1;
  293|  53.5k|    __m128i pred_16x8b_2, pred_8x16b_2, rsd_8x16b_2, out_8x16b_2, out_16x8b_2;
  294|  53.5k|    __m128i pred_16x8b_3, pred_8x16b_3, rsd_8x16b_3, out_8x16b_3, out_16x8b_3;
  295|  53.5k|    __m128i pred_16x8b_4, pred_8x16b_4, rsd_8x16b_4, out_8x16b_4, out_16x8b_4;
  296|  53.5k|    __m128i pred_16x8b_5, pred_8x16b_5, rsd_8x16b_5, out_8x16b_5, out_16x8b_5;
  297|  53.5k|    __m128i pred_16x8b_6, pred_8x16b_6, rsd_8x16b_6, out_8x16b_6, out_16x8b_6;
  298|  53.5k|    __m128i pred_16x8b_7, pred_8x16b_7, rsd_8x16b_7, out_8x16b_7, out_16x8b_7;
  299|  53.5k|    __m128i rsd_8x16b_01_b0, rsd_8x16b_23_b0, rsd_8x16b_45_b2, rsd_8x16b_67_b2;
  300|  53.5k|    __m128i rsd_8x16b_01_b1, rsd_8x16b_23_b1, rsd_8x16b_45_b3, rsd_8x16b_67_b3;
  301|       |
  302|  53.5k|    WORD32 row_01_b0, row_23_b0, row_45_b2, row_67_b2;
  303|  53.5k|    WORD32 row_01_b1, row_23_b1, row_45_b3, row_67_b3;
  304|  53.5k|    WORD32 i4_nnz, i4_nnz_b0, i4_nnz_b1, i4_nnz_b2, i4_nnz_b3;
  305|       |
  306|  53.5k|    __m128i zero_8x16b = _mm_setzero_si128();
  307|       |
  308|  53.5k|    WORD32 pred_strd2 = (pred_strd << 1);
  309|  53.5k|    WORD32 pred_strd4 = (pred_strd << 2);
  310|  53.5k|    WORD32 rsd_strd2 = (rsd_strd << 1);
  311|  53.5k|    WORD32 rsd_strd4 = (rsd_strd << 2);
  312|  53.5k|    WORD32 out_strd2 = (out_strd << 1);
  313|  53.5k|    WORD32 out_strd4 = (out_strd << 2);
  314|       |
  315|  53.5k|    pred_16x8b_0 = _mm_loadu_si128((__m128i *) (pu1_pred));
  316|  53.5k|    pred_16x8b_1 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd));
  317|  53.5k|    pred_16x8b_2 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd2));
  318|  53.5k|    pred_16x8b_3 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd2 + pred_strd));
  319|  53.5k|    pred_16x8b_4 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd4));
  320|  53.5k|    pred_16x8b_5 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd4 + pred_strd));
  321|  53.5k|    pred_16x8b_6 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd4 + pred_strd2));
  322|  53.5k|    pred_16x8b_7 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd4 + pred_strd2 + pred_strd));
  323|       |
  324|  53.5k|    pred_8x16b_0 = _mm_cvtepu8_epi16(pred_16x8b_0);
  325|  53.5k|    pred_8x16b_1 = _mm_cvtepu8_epi16(pred_16x8b_1);
  326|  53.5k|    pred_8x16b_2 = _mm_cvtepu8_epi16(pred_16x8b_2);
  327|  53.5k|    pred_8x16b_3 = _mm_cvtepu8_epi16(pred_16x8b_3);
  328|  53.5k|    pred_8x16b_4 = _mm_cvtepu8_epi16(pred_16x8b_4);
  329|  53.5k|    pred_8x16b_5 = _mm_cvtepu8_epi16(pred_16x8b_5);
  330|  53.5k|    pred_8x16b_6 = _mm_cvtepu8_epi16(pred_16x8b_6);
  331|  53.5k|    pred_8x16b_7 = _mm_cvtepu8_epi16(pred_16x8b_7);
  332|       |
  333|  53.5k|    rsd_8x16b_0 = _mm_loadu_si128((__m128i *) (pi2_rsd));
  334|  53.5k|    rsd_8x16b_1 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd));
  335|  53.5k|    rsd_8x16b_2 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd2));
  336|  53.5k|    rsd_8x16b_3 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd2 + rsd_strd));
  337|  53.5k|    rsd_8x16b_4 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd4));
  338|  53.5k|    rsd_8x16b_5 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd4 + rsd_strd));
  339|  53.5k|    rsd_8x16b_6 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd4 + rsd_strd2));
  340|  53.5k|    rsd_8x16b_7 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd4 + rsd_strd2 + rsd_strd));
  341|       |
  342|  53.5k|    rsd_8x16b_01_b0 = _mm_unpacklo_epi64(rsd_8x16b_0, rsd_8x16b_1);
  343|  53.5k|    rsd_8x16b_23_b0 = _mm_unpacklo_epi64(rsd_8x16b_2, rsd_8x16b_3);
  344|  53.5k|    rsd_8x16b_01_b1 = _mm_unpackhi_epi64(rsd_8x16b_0, rsd_8x16b_1);
  345|  53.5k|    rsd_8x16b_23_b1 = _mm_unpackhi_epi64(rsd_8x16b_2, rsd_8x16b_3);
  346|       |
  347|  53.5k|    rsd_8x16b_45_b2 = _mm_unpacklo_epi64(rsd_8x16b_4, rsd_8x16b_5);
  348|  53.5k|    rsd_8x16b_67_b2 = _mm_unpacklo_epi64(rsd_8x16b_6, rsd_8x16b_7);
  349|  53.5k|    rsd_8x16b_45_b3 = _mm_unpackhi_epi64(rsd_8x16b_4, rsd_8x16b_5);
  350|  53.5k|    rsd_8x16b_67_b3 = _mm_unpackhi_epi64(rsd_8x16b_6, rsd_8x16b_7);
  351|       |
  352|  53.5k|    row_01_b0 = _mm_test_all_ones(
  353|  53.5k|        _mm_cmpeq_epi16(rsd_8x16b_01_b0, zero_8x16b));  // return 1 if all zeros, else 0
  354|  53.5k|    row_23_b0 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_23_b0, zero_8x16b));
  355|  53.5k|    row_01_b1 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_01_b1, zero_8x16b));
  356|  53.5k|    row_23_b1 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_23_b1, zero_8x16b));
  357|  53.5k|    row_45_b2 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_45_b2, zero_8x16b));
  358|  53.5k|    row_67_b2 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_67_b2, zero_8x16b));
  359|  53.5k|    row_45_b3 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_45_b3, zero_8x16b));
  360|  53.5k|    row_67_b3 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_67_b3, zero_8x16b));
  361|       |
  362|  53.5k|    out_8x16b_0 = _mm_add_epi16(pred_8x16b_0, rsd_8x16b_0);
  363|  53.5k|    out_8x16b_1 = _mm_add_epi16(pred_8x16b_1, rsd_8x16b_1);
  364|  53.5k|    out_8x16b_2 = _mm_add_epi16(pred_8x16b_2, rsd_8x16b_2);
  365|  53.5k|    out_8x16b_3 = _mm_add_epi16(pred_8x16b_3, rsd_8x16b_3);
  366|  53.5k|    out_8x16b_4 = _mm_add_epi16(pred_8x16b_4, rsd_8x16b_4);
  367|  53.5k|    out_8x16b_5 = _mm_add_epi16(pred_8x16b_5, rsd_8x16b_5);
  368|  53.5k|    out_8x16b_6 = _mm_add_epi16(pred_8x16b_6, rsd_8x16b_6);
  369|  53.5k|    out_8x16b_7 = _mm_add_epi16(pred_8x16b_7, rsd_8x16b_7);
  370|       |
  371|  53.5k|    out_16x8b_0 = _mm_packus_epi16(out_8x16b_0, zero_8x16b);
  372|  53.5k|    out_16x8b_1 = _mm_packus_epi16(out_8x16b_1, zero_8x16b);
  373|  53.5k|    out_16x8b_2 = _mm_packus_epi16(out_8x16b_2, zero_8x16b);
  374|  53.5k|    out_16x8b_3 = _mm_packus_epi16(out_8x16b_3, zero_8x16b);
  375|  53.5k|    out_16x8b_4 = _mm_packus_epi16(out_8x16b_4, zero_8x16b);
  376|  53.5k|    out_16x8b_5 = _mm_packus_epi16(out_8x16b_5, zero_8x16b);
  377|  53.5k|    out_16x8b_6 = _mm_packus_epi16(out_8x16b_6, zero_8x16b);
  378|  53.5k|    out_16x8b_7 = _mm_packus_epi16(out_8x16b_7, zero_8x16b);
  379|       |
  380|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out), out_16x8b_0);
  381|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd), out_16x8b_1);
  382|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd2), out_16x8b_2);
  383|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd2 + out_strd), out_16x8b_3);
  384|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd4), out_16x8b_4);
  385|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd4 + out_strd), out_16x8b_5);
  386|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd4 + out_strd2), out_16x8b_6);
  387|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd4 + out_strd2 + out_strd), out_16x8b_7);
  388|       |
  389|  53.5k|    i4_nnz_b0 = (!(row_01_b0 && row_23_b0));
  ------------------
  |  Branch (389:20): [True: 52.1k, False: 1.42k]
  |  Branch (389:33): [True: 51.5k, False: 566]
  ------------------
  390|  53.5k|    i4_nnz_b1 = (!(row_01_b1 && row_23_b1)) << 1;
  ------------------
  |  Branch (390:20): [True: 52.2k, False: 1.25k]
  |  Branch (390:33): [True: 51.5k, False: 688]
  ------------------
  391|  53.5k|    i4_nnz_b2 = (!(row_45_b2 && row_67_b2)) << 4;
  ------------------
  |  Branch (391:20): [True: 51.6k, False: 1.87k]
  |  Branch (391:33): [True: 51.5k, False: 55]
  ------------------
  392|  53.5k|    i4_nnz_b3 = (!(row_45_b3 && row_67_b3)) << 5;
  ------------------
  |  Branch (392:20): [True: 51.8k, False: 1.71k]
  |  Branch (392:33): [True: 51.6k, False: 188]
  ------------------
  393|       |
  394|  53.5k|    i4_nnz = (i4_nnz_b0 | i4_nnz_b1 | i4_nnz_b2 | i4_nnz_b3);
  395|       |
  396|  53.5k|    pu1_pred += 8;
  397|  53.5k|    pi2_rsd += 8;
  398|  53.5k|    pu1_out += 8;
  399|       |
  400|  53.5k|    pred_16x8b_0 = _mm_loadu_si128((__m128i *) (pu1_pred));
  401|  53.5k|    pred_16x8b_1 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd));
  402|  53.5k|    pred_16x8b_2 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd2));
  403|  53.5k|    pred_16x8b_3 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd2 + pred_strd));
  404|  53.5k|    pred_16x8b_4 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd4));
  405|  53.5k|    pred_16x8b_5 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd4 + pred_strd));
  406|  53.5k|    pred_16x8b_6 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd4 + pred_strd2));
  407|  53.5k|    pred_16x8b_7 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd4 + pred_strd2 + pred_strd));
  408|       |
  409|  53.5k|    pred_8x16b_0 = _mm_cvtepu8_epi16(pred_16x8b_0);
  410|  53.5k|    pred_8x16b_1 = _mm_cvtepu8_epi16(pred_16x8b_1);
  411|  53.5k|    pred_8x16b_2 = _mm_cvtepu8_epi16(pred_16x8b_2);
  412|  53.5k|    pred_8x16b_3 = _mm_cvtepu8_epi16(pred_16x8b_3);
  413|  53.5k|    pred_8x16b_4 = _mm_cvtepu8_epi16(pred_16x8b_4);
  414|  53.5k|    pred_8x16b_5 = _mm_cvtepu8_epi16(pred_16x8b_5);
  415|  53.5k|    pred_8x16b_6 = _mm_cvtepu8_epi16(pred_16x8b_6);
  416|  53.5k|    pred_8x16b_7 = _mm_cvtepu8_epi16(pred_16x8b_7);
  417|       |
  418|  53.5k|    rsd_8x16b_0 = _mm_loadu_si128((__m128i *) (pi2_rsd));
  419|  53.5k|    rsd_8x16b_1 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd));
  420|  53.5k|    rsd_8x16b_2 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd2));
  421|  53.5k|    rsd_8x16b_3 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd2 + rsd_strd));
  422|  53.5k|    rsd_8x16b_4 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd4));
  423|  53.5k|    rsd_8x16b_5 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd4 + rsd_strd));
  424|  53.5k|    rsd_8x16b_6 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd4 + rsd_strd2));
  425|  53.5k|    rsd_8x16b_7 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd4 + rsd_strd2 + rsd_strd));
  426|       |
  427|  53.5k|    rsd_8x16b_01_b0 = _mm_unpacklo_epi64(rsd_8x16b_0, rsd_8x16b_1);
  428|  53.5k|    rsd_8x16b_23_b0 = _mm_unpacklo_epi64(rsd_8x16b_2, rsd_8x16b_3);
  429|  53.5k|    rsd_8x16b_01_b1 = _mm_unpackhi_epi64(rsd_8x16b_0, rsd_8x16b_1);
  430|  53.5k|    rsd_8x16b_23_b1 = _mm_unpackhi_epi64(rsd_8x16b_2, rsd_8x16b_3);
  431|       |
  432|  53.5k|    rsd_8x16b_45_b2 = _mm_unpacklo_epi64(rsd_8x16b_4, rsd_8x16b_5);
  433|  53.5k|    rsd_8x16b_67_b2 = _mm_unpacklo_epi64(rsd_8x16b_6, rsd_8x16b_7);
  434|  53.5k|    rsd_8x16b_45_b3 = _mm_unpackhi_epi64(rsd_8x16b_4, rsd_8x16b_5);
  435|  53.5k|    rsd_8x16b_67_b3 = _mm_unpackhi_epi64(rsd_8x16b_6, rsd_8x16b_7);
  436|       |
  437|  53.5k|    row_01_b0 = _mm_test_all_ones(
  438|  53.5k|        _mm_cmpeq_epi16(rsd_8x16b_01_b0, zero_8x16b));  // return 1 if all zeros, else 0
  439|  53.5k|    row_23_b0 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_23_b0, zero_8x16b));
  440|  53.5k|    row_01_b1 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_01_b1, zero_8x16b));
  441|  53.5k|    row_23_b1 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_23_b1, zero_8x16b));
  442|  53.5k|    row_45_b2 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_45_b2, zero_8x16b));
  443|  53.5k|    row_67_b2 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_67_b2, zero_8x16b));
  444|  53.5k|    row_45_b3 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_45_b3, zero_8x16b));
  445|  53.5k|    row_67_b3 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_67_b3, zero_8x16b));
  446|       |
  447|  53.5k|    out_8x16b_0 = _mm_add_epi16(pred_8x16b_0, rsd_8x16b_0);
  448|  53.5k|    out_8x16b_1 = _mm_add_epi16(pred_8x16b_1, rsd_8x16b_1);
  449|  53.5k|    out_8x16b_2 = _mm_add_epi16(pred_8x16b_2, rsd_8x16b_2);
  450|  53.5k|    out_8x16b_3 = _mm_add_epi16(pred_8x16b_3, rsd_8x16b_3);
  451|  53.5k|    out_8x16b_4 = _mm_add_epi16(pred_8x16b_4, rsd_8x16b_4);
  452|  53.5k|    out_8x16b_5 = _mm_add_epi16(pred_8x16b_5, rsd_8x16b_5);
  453|  53.5k|    out_8x16b_6 = _mm_add_epi16(pred_8x16b_6, rsd_8x16b_6);
  454|  53.5k|    out_8x16b_7 = _mm_add_epi16(pred_8x16b_7, rsd_8x16b_7);
  455|       |
  456|  53.5k|    out_16x8b_0 = _mm_packus_epi16(out_8x16b_0, zero_8x16b);
  457|  53.5k|    out_16x8b_1 = _mm_packus_epi16(out_8x16b_1, zero_8x16b);
  458|  53.5k|    out_16x8b_2 = _mm_packus_epi16(out_8x16b_2, zero_8x16b);
  459|  53.5k|    out_16x8b_3 = _mm_packus_epi16(out_8x16b_3, zero_8x16b);
  460|  53.5k|    out_16x8b_4 = _mm_packus_epi16(out_8x16b_4, zero_8x16b);
  461|  53.5k|    out_16x8b_5 = _mm_packus_epi16(out_8x16b_5, zero_8x16b);
  462|  53.5k|    out_16x8b_6 = _mm_packus_epi16(out_8x16b_6, zero_8x16b);
  463|  53.5k|    out_16x8b_7 = _mm_packus_epi16(out_8x16b_7, zero_8x16b);
  464|       |
  465|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out), out_16x8b_0);
  466|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd), out_16x8b_1);
  467|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd2), out_16x8b_2);
  468|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd2 + out_strd), out_16x8b_3);
  469|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd4), out_16x8b_4);
  470|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd4 + out_strd), out_16x8b_5);
  471|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd4 + out_strd2), out_16x8b_6);
  472|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd4 + out_strd2 + out_strd), out_16x8b_7);
  473|       |
  474|  53.5k|    i4_nnz_b0 = (!(row_01_b0 && row_23_b0)) << 2;
  ------------------
  |  Branch (474:20): [True: 51.9k, False: 1.54k]
  |  Branch (474:33): [True: 51.4k, False: 574]
  ------------------
  475|  53.5k|    i4_nnz_b1 = (!(row_01_b1 && row_23_b1)) << 3;
  ------------------
  |  Branch (475:20): [True: 52.1k, False: 1.40k]
  |  Branch (475:33): [True: 51.4k, False: 683]
  ------------------
  476|  53.5k|    i4_nnz_b2 = (!(row_45_b2 && row_67_b2)) << 6;
  ------------------
  |  Branch (476:20): [True: 51.6k, False: 1.87k]
  |  Branch (476:33): [True: 51.5k, False: 83]
  ------------------
  477|  53.5k|    i4_nnz_b3 = (!(row_45_b3 && row_67_b3)) << 7;
  ------------------
  |  Branch (477:20): [True: 51.7k, False: 1.73k]
  |  Branch (477:33): [True: 51.6k, False: 165]
  ------------------
  478|       |
  479|  53.5k|    i4_nnz |= (i4_nnz_b0 | i4_nnz_b1 | i4_nnz_b2 | i4_nnz_b3);
  480|       |
  481|  53.5k|    pu1_pred -= 8;
  482|  53.5k|    pi2_rsd -= 8;
  483|  53.5k|    pu1_out -= 8;
  484|       |
  485|  53.5k|    pu1_pred += (pred_strd << 3);
  486|  53.5k|    pi2_rsd += (rsd_strd << 3);
  487|  53.5k|    pu1_out += (out_strd << 3);
  488|       |
  489|  53.5k|    pred_16x8b_0 = _mm_loadu_si128((__m128i *) (pu1_pred));
  490|  53.5k|    pred_16x8b_1 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd));
  491|  53.5k|    pred_16x8b_2 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd2));
  492|  53.5k|    pred_16x8b_3 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd2 + pred_strd));
  493|  53.5k|    pred_16x8b_4 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd4));
  494|  53.5k|    pred_16x8b_5 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd4 + pred_strd));
  495|  53.5k|    pred_16x8b_6 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd4 + pred_strd2));
  496|  53.5k|    pred_16x8b_7 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd4 + pred_strd2 + pred_strd));
  497|       |
  498|  53.5k|    pred_8x16b_0 = _mm_cvtepu8_epi16(pred_16x8b_0);
  499|  53.5k|    pred_8x16b_1 = _mm_cvtepu8_epi16(pred_16x8b_1);
  500|  53.5k|    pred_8x16b_2 = _mm_cvtepu8_epi16(pred_16x8b_2);
  501|  53.5k|    pred_8x16b_3 = _mm_cvtepu8_epi16(pred_16x8b_3);
  502|  53.5k|    pred_8x16b_4 = _mm_cvtepu8_epi16(pred_16x8b_4);
  503|  53.5k|    pred_8x16b_5 = _mm_cvtepu8_epi16(pred_16x8b_5);
  504|  53.5k|    pred_8x16b_6 = _mm_cvtepu8_epi16(pred_16x8b_6);
  505|  53.5k|    pred_8x16b_7 = _mm_cvtepu8_epi16(pred_16x8b_7);
  506|       |
  507|  53.5k|    rsd_8x16b_0 = _mm_loadu_si128((__m128i *) (pi2_rsd));
  508|  53.5k|    rsd_8x16b_1 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd));
  509|  53.5k|    rsd_8x16b_2 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd2));
  510|  53.5k|    rsd_8x16b_3 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd2 + rsd_strd));
  511|  53.5k|    rsd_8x16b_4 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd4));
  512|  53.5k|    rsd_8x16b_5 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd4 + rsd_strd));
  513|  53.5k|    rsd_8x16b_6 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd4 + rsd_strd2));
  514|  53.5k|    rsd_8x16b_7 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd4 + rsd_strd2 + rsd_strd));
  515|       |
  516|  53.5k|    rsd_8x16b_01_b0 = _mm_unpacklo_epi64(rsd_8x16b_0, rsd_8x16b_1);
  517|  53.5k|    rsd_8x16b_23_b0 = _mm_unpacklo_epi64(rsd_8x16b_2, rsd_8x16b_3);
  518|  53.5k|    rsd_8x16b_01_b1 = _mm_unpackhi_epi64(rsd_8x16b_0, rsd_8x16b_1);
  519|  53.5k|    rsd_8x16b_23_b1 = _mm_unpackhi_epi64(rsd_8x16b_2, rsd_8x16b_3);
  520|       |
  521|  53.5k|    rsd_8x16b_45_b2 = _mm_unpacklo_epi64(rsd_8x16b_4, rsd_8x16b_5);
  522|  53.5k|    rsd_8x16b_67_b2 = _mm_unpacklo_epi64(rsd_8x16b_6, rsd_8x16b_7);
  523|  53.5k|    rsd_8x16b_45_b3 = _mm_unpackhi_epi64(rsd_8x16b_4, rsd_8x16b_5);
  524|  53.5k|    rsd_8x16b_67_b3 = _mm_unpackhi_epi64(rsd_8x16b_6, rsd_8x16b_7);
  525|       |
  526|  53.5k|    row_01_b0 = _mm_test_all_ones(
  527|  53.5k|        _mm_cmpeq_epi16(rsd_8x16b_01_b0, zero_8x16b));  // return 1 if all zeros, else 0
  528|  53.5k|    row_23_b0 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_23_b0, zero_8x16b));
  529|  53.5k|    row_01_b1 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_01_b1, zero_8x16b));
  530|  53.5k|    row_23_b1 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_23_b1, zero_8x16b));
  531|  53.5k|    row_45_b2 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_45_b2, zero_8x16b));
  532|  53.5k|    row_67_b2 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_67_b2, zero_8x16b));
  533|  53.5k|    row_45_b3 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_45_b3, zero_8x16b));
  534|  53.5k|    row_67_b3 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_67_b3, zero_8x16b));
  535|       |
  536|  53.5k|    out_8x16b_0 = _mm_add_epi16(pred_8x16b_0, rsd_8x16b_0);
  537|  53.5k|    out_8x16b_1 = _mm_add_epi16(pred_8x16b_1, rsd_8x16b_1);
  538|  53.5k|    out_8x16b_2 = _mm_add_epi16(pred_8x16b_2, rsd_8x16b_2);
  539|  53.5k|    out_8x16b_3 = _mm_add_epi16(pred_8x16b_3, rsd_8x16b_3);
  540|  53.5k|    out_8x16b_4 = _mm_add_epi16(pred_8x16b_4, rsd_8x16b_4);
  541|  53.5k|    out_8x16b_5 = _mm_add_epi16(pred_8x16b_5, rsd_8x16b_5);
  542|  53.5k|    out_8x16b_6 = _mm_add_epi16(pred_8x16b_6, rsd_8x16b_6);
  543|  53.5k|    out_8x16b_7 = _mm_add_epi16(pred_8x16b_7, rsd_8x16b_7);
  544|       |
  545|  53.5k|    out_16x8b_0 = _mm_packus_epi16(out_8x16b_0, zero_8x16b);
  546|  53.5k|    out_16x8b_1 = _mm_packus_epi16(out_8x16b_1, zero_8x16b);
  547|  53.5k|    out_16x8b_2 = _mm_packus_epi16(out_8x16b_2, zero_8x16b);
  548|  53.5k|    out_16x8b_3 = _mm_packus_epi16(out_8x16b_3, zero_8x16b);
  549|  53.5k|    out_16x8b_4 = _mm_packus_epi16(out_8x16b_4, zero_8x16b);
  550|  53.5k|    out_16x8b_5 = _mm_packus_epi16(out_8x16b_5, zero_8x16b);
  551|  53.5k|    out_16x8b_6 = _mm_packus_epi16(out_8x16b_6, zero_8x16b);
  552|  53.5k|    out_16x8b_7 = _mm_packus_epi16(out_8x16b_7, zero_8x16b);
  553|       |
  554|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out), out_16x8b_0);
  555|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd), out_16x8b_1);
  556|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd2), out_16x8b_2);
  557|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd2 + out_strd), out_16x8b_3);
  558|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd4), out_16x8b_4);
  559|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd4 + out_strd), out_16x8b_5);
  560|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd4 + out_strd2), out_16x8b_6);
  561|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd4 + out_strd2 + out_strd), out_16x8b_7);
  562|       |
  563|  53.5k|    i4_nnz_b0 = (!(row_01_b0 && row_23_b0)) << 8;
  ------------------
  |  Branch (563:20): [True: 52.0k, False: 1.47k]
  |  Branch (563:33): [True: 51.4k, False: 584]
  ------------------
  564|  53.5k|    i4_nnz_b1 = (!(row_01_b1 && row_23_b1)) << 9;
  ------------------
  |  Branch (564:20): [True: 52.2k, False: 1.23k]
  |  Branch (564:33): [True: 51.5k, False: 703]
  ------------------
  565|  53.5k|    i4_nnz_b2 = (!(row_45_b2 && row_67_b2)) << 12;
  ------------------
  |  Branch (565:20): [True: 51.6k, False: 1.88k]
  |  Branch (565:33): [True: 51.5k, False: 80]
  ------------------
  566|  53.5k|    i4_nnz_b3 = (!(row_45_b3 && row_67_b3)) << 13;
  ------------------
  |  Branch (566:20): [True: 51.8k, False: 1.72k]
  |  Branch (566:33): [True: 51.6k, False: 181]
  ------------------
  567|       |
  568|  53.5k|    i4_nnz |= (i4_nnz_b0 | i4_nnz_b1 | i4_nnz_b2 | i4_nnz_b3);
  569|       |
  570|  53.5k|    pu1_pred += 8;
  571|  53.5k|    pi2_rsd += 8;
  572|  53.5k|    pu1_out += 8;
  573|       |
  574|  53.5k|    pred_16x8b_0 = _mm_loadu_si128((__m128i *) (pu1_pred));
  575|  53.5k|    pred_16x8b_1 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd));
  576|  53.5k|    pred_16x8b_2 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd2));
  577|  53.5k|    pred_16x8b_3 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd2 + pred_strd));
  578|  53.5k|    pred_16x8b_4 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd4));
  579|  53.5k|    pred_16x8b_5 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd4 + pred_strd));
  580|  53.5k|    pred_16x8b_6 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd4 + pred_strd2));
  581|  53.5k|    pred_16x8b_7 = _mm_loadu_si128((__m128i *) (pu1_pred + pred_strd4 + pred_strd2 + pred_strd));
  582|       |
  583|  53.5k|    pred_8x16b_0 = _mm_cvtepu8_epi16(pred_16x8b_0);
  584|  53.5k|    pred_8x16b_1 = _mm_cvtepu8_epi16(pred_16x8b_1);
  585|  53.5k|    pred_8x16b_2 = _mm_cvtepu8_epi16(pred_16x8b_2);
  586|  53.5k|    pred_8x16b_3 = _mm_cvtepu8_epi16(pred_16x8b_3);
  587|  53.5k|    pred_8x16b_4 = _mm_cvtepu8_epi16(pred_16x8b_4);
  588|  53.5k|    pred_8x16b_5 = _mm_cvtepu8_epi16(pred_16x8b_5);
  589|  53.5k|    pred_8x16b_6 = _mm_cvtepu8_epi16(pred_16x8b_6);
  590|  53.5k|    pred_8x16b_7 = _mm_cvtepu8_epi16(pred_16x8b_7);
  591|       |
  592|  53.5k|    rsd_8x16b_0 = _mm_loadu_si128((__m128i *) (pi2_rsd));
  593|  53.5k|    rsd_8x16b_1 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd));
  594|  53.5k|    rsd_8x16b_2 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd2));
  595|  53.5k|    rsd_8x16b_3 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd2 + rsd_strd));
  596|  53.5k|    rsd_8x16b_4 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd4));
  597|  53.5k|    rsd_8x16b_5 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd4 + rsd_strd));
  598|  53.5k|    rsd_8x16b_6 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd4 + rsd_strd2));
  599|  53.5k|    rsd_8x16b_7 = _mm_loadu_si128((__m128i *) (pi2_rsd + rsd_strd4 + rsd_strd2 + rsd_strd));
  600|       |
  601|  53.5k|    rsd_8x16b_01_b0 = _mm_unpacklo_epi64(rsd_8x16b_0, rsd_8x16b_1);
  602|  53.5k|    rsd_8x16b_23_b0 = _mm_unpacklo_epi64(rsd_8x16b_2, rsd_8x16b_3);
  603|  53.5k|    rsd_8x16b_01_b1 = _mm_unpackhi_epi64(rsd_8x16b_0, rsd_8x16b_1);
  604|  53.5k|    rsd_8x16b_23_b1 = _mm_unpackhi_epi64(rsd_8x16b_2, rsd_8x16b_3);
  605|       |
  606|  53.5k|    rsd_8x16b_45_b2 = _mm_unpacklo_epi64(rsd_8x16b_4, rsd_8x16b_5);
  607|  53.5k|    rsd_8x16b_67_b2 = _mm_unpacklo_epi64(rsd_8x16b_6, rsd_8x16b_7);
  608|  53.5k|    rsd_8x16b_45_b3 = _mm_unpackhi_epi64(rsd_8x16b_4, rsd_8x16b_5);
  609|  53.5k|    rsd_8x16b_67_b3 = _mm_unpackhi_epi64(rsd_8x16b_6, rsd_8x16b_7);
  610|       |
  611|  53.5k|    row_01_b0 = _mm_test_all_ones(
  612|  53.5k|        _mm_cmpeq_epi16(rsd_8x16b_01_b0, zero_8x16b));  // return 1 if all zeros, else 0
  613|  53.5k|    row_23_b0 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_23_b0, zero_8x16b));
  614|  53.5k|    row_01_b1 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_01_b1, zero_8x16b));
  615|  53.5k|    row_23_b1 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_23_b1, zero_8x16b));
  616|  53.5k|    row_45_b2 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_45_b2, zero_8x16b));
  617|  53.5k|    row_67_b2 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_67_b2, zero_8x16b));
  618|  53.5k|    row_45_b3 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_45_b3, zero_8x16b));
  619|  53.5k|    row_67_b3 = _mm_test_all_ones(_mm_cmpeq_epi16(rsd_8x16b_67_b3, zero_8x16b));
  620|       |
  621|  53.5k|    out_8x16b_0 = _mm_add_epi16(pred_8x16b_0, rsd_8x16b_0);
  622|  53.5k|    out_8x16b_1 = _mm_add_epi16(pred_8x16b_1, rsd_8x16b_1);
  623|  53.5k|    out_8x16b_2 = _mm_add_epi16(pred_8x16b_2, rsd_8x16b_2);
  624|  53.5k|    out_8x16b_3 = _mm_add_epi16(pred_8x16b_3, rsd_8x16b_3);
  625|  53.5k|    out_8x16b_4 = _mm_add_epi16(pred_8x16b_4, rsd_8x16b_4);
  626|  53.5k|    out_8x16b_5 = _mm_add_epi16(pred_8x16b_5, rsd_8x16b_5);
  627|  53.5k|    out_8x16b_6 = _mm_add_epi16(pred_8x16b_6, rsd_8x16b_6);
  628|  53.5k|    out_8x16b_7 = _mm_add_epi16(pred_8x16b_7, rsd_8x16b_7);
  629|       |
  630|  53.5k|    out_16x8b_0 = _mm_packus_epi16(out_8x16b_0, zero_8x16b);
  631|  53.5k|    out_16x8b_1 = _mm_packus_epi16(out_8x16b_1, zero_8x16b);
  632|  53.5k|    out_16x8b_2 = _mm_packus_epi16(out_8x16b_2, zero_8x16b);
  633|  53.5k|    out_16x8b_3 = _mm_packus_epi16(out_8x16b_3, zero_8x16b);
  634|  53.5k|    out_16x8b_4 = _mm_packus_epi16(out_8x16b_4, zero_8x16b);
  635|  53.5k|    out_16x8b_5 = _mm_packus_epi16(out_8x16b_5, zero_8x16b);
  636|  53.5k|    out_16x8b_6 = _mm_packus_epi16(out_8x16b_6, zero_8x16b);
  637|  53.5k|    out_16x8b_7 = _mm_packus_epi16(out_8x16b_7, zero_8x16b);
  638|       |
  639|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out), out_16x8b_0);
  640|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd), out_16x8b_1);
  641|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd2), out_16x8b_2);
  642|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd2 + out_strd), out_16x8b_3);
  643|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd4), out_16x8b_4);
  644|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd4 + out_strd), out_16x8b_5);
  645|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd4 + out_strd2), out_16x8b_6);
  646|  53.5k|    _mm_storel_epi64((__m128i *) (pu1_out + out_strd4 + out_strd2 + out_strd), out_16x8b_7);
  647|       |
  648|  53.5k|    i4_nnz_b0 = (!(row_01_b0 && row_23_b0)) << 10;
  ------------------
  |  Branch (648:20): [True: 52.0k, False: 1.52k]
  |  Branch (648:33): [True: 51.4k, False: 573]
  ------------------
  649|  53.5k|    i4_nnz_b1 = (!(row_01_b1 && row_23_b1)) << 11;
  ------------------
  |  Branch (649:20): [True: 52.1k, False: 1.37k]
  |  Branch (649:33): [True: 51.4k, False: 691]
  ------------------
  650|  53.5k|    i4_nnz_b2 = (!(row_45_b2 && row_67_b2)) << 14;
  ------------------
  |  Branch (650:20): [True: 51.5k, False: 1.99k]
  |  Branch (650:33): [True: 51.5k, False: 17]
  ------------------
  651|  53.5k|    i4_nnz_b3 = (!(row_45_b3 && row_67_b3)) << 15;
  ------------------
  |  Branch (651:20): [True: 51.6k, False: 1.85k]
  |  Branch (651:33): [True: 51.5k, False: 133]
  ------------------
  652|       |
  653|  53.5k|    i4_nnz |= (i4_nnz_b0 | i4_nnz_b1 | i4_nnz_b2 | i4_nnz_b3);
  654|  53.5k|    return i4_nnz;
  655|  53.5k|}
isvcd_pred_residual_recon_chroma_4x4_sse42:
  681|  10.8k|{
  682|  10.8k|    __m128i src_r0, src_r1, src_r2, src_r3;
  683|  10.8k|    __m128i pred_r0, pred_r1, pred_r2, pred_r3;
  684|  10.8k|    __m128i pred0, pred1, pred2, pred3;
  685|  10.8k|    __m128i rsd_r0, rsd_r1, rsd_r2, rsd_r3;
  686|  10.8k|    __m128i zero_16x8b;  // all bits reset to zero
  687|  10.8k|    __m128i chroma_mask_even;
  688|  10.8k|    __m128i chroma_mask_odd;
  689|       |
  690|  10.8k|    zero_16x8b = _mm_setzero_si128();
  691|       |
  692|  10.8k|    rsd_r0 = _mm_loadu_si128((__m128i *) (pi2_rsd));
  693|  10.8k|    rsd_r1 = _mm_loadu_si128((__m128i *) (pi2_rsd + (1 * rsd_strd)));
  694|  10.8k|    rsd_r2 = _mm_loadu_si128((__m128i *) (pi2_rsd + (2 * rsd_strd)));
  695|  10.8k|    rsd_r3 = _mm_loadu_si128((__m128i *) (pi2_rsd + (3 * rsd_strd)));
  696|       |
  697|  10.8k|    pred_r0 = _mm_loadu_si128((__m128i *) (pu1_pred));
  698|  10.8k|    pred_r1 = _mm_loadu_si128((__m128i *) (pu1_pred + (1 * pred_strd)));
  699|  10.8k|    pred_r2 = _mm_loadu_si128((__m128i *) (pu1_pred + (2 * pred_strd)));
  700|  10.8k|    pred_r3 = _mm_loadu_si128((__m128i *) (pu1_pred + (3 * pred_strd)));
  701|       |
  702|  10.8k|    src_r0 = _mm_loadu_si128((__m128i *) (pu1_out));
  703|  10.8k|    src_r1 = _mm_loadu_si128((__m128i *) (pu1_out + (1 * out_strd)));
  704|  10.8k|    src_r2 = _mm_loadu_si128((__m128i *) (pu1_out + (2 * out_strd)));
  705|  10.8k|    src_r3 = _mm_loadu_si128((__m128i *) (pu1_out + (3 * out_strd)));
  706|       |
  707|  10.8k|    pred0 = _mm_cvtepu8_epi16(pred_r0);
  708|  10.8k|    pred1 = _mm_cvtepu8_epi16(pred_r1);
  709|  10.8k|    pred2 = _mm_cvtepu8_epi16(pred_r2);
  710|  10.8k|    pred3 = _mm_cvtepu8_epi16(pred_r3);
  711|       |
  712|  10.8k|    pred0 = _mm_add_epi16(pred0, rsd_r0);
  713|  10.8k|    pred1 = _mm_add_epi16(pred1, rsd_r1);
  714|  10.8k|    pred2 = _mm_add_epi16(pred2, rsd_r2);
  715|  10.8k|    pred3 = _mm_add_epi16(pred3, rsd_r3);
  716|       |
  717|  10.8k|    pred0 = _mm_packus_epi16(pred0, zero_16x8b);
  718|  10.8k|    pred1 = _mm_packus_epi16(pred1, zero_16x8b);
  719|  10.8k|    pred2 = _mm_packus_epi16(pred2, zero_16x8b);
  720|  10.8k|    pred3 = _mm_packus_epi16(pred3, zero_16x8b);
  721|       |
  722|  10.8k|    chroma_mask_even = _mm_set_epi8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff,
  723|  10.8k|                                    0x00, 0xff, 0x00, 0xff, 0x00, 0xff);
  724|  10.8k|    chroma_mask_odd = _mm_set_epi8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xff,
  725|  10.8k|                                   0x00, 0xff, 0x00, 0xff, 0x00);
  726|       |
  727|  10.8k|    src_r0 = _mm_and_si128(src_r0, chroma_mask_odd);  // 0 src1 0 src2 0 ...
  728|  10.8k|    src_r1 = _mm_and_si128(src_r1, chroma_mask_odd);
  729|  10.8k|    src_r2 = _mm_and_si128(src_r2, chroma_mask_odd);
  730|  10.8k|    src_r3 = _mm_and_si128(src_r3, chroma_mask_odd);
  731|       |
  732|  10.8k|    pred0 = _mm_and_si128(pred0, chroma_mask_even);  // val 0 val 0 ..
  733|  10.8k|    pred1 = _mm_and_si128(pred1, chroma_mask_even);
  734|  10.8k|    pred2 = _mm_and_si128(pred2, chroma_mask_even);
  735|  10.8k|    pred3 = _mm_and_si128(pred3, chroma_mask_even);
  736|       |
  737|  10.8k|    src_r0 = _mm_add_epi8(src_r0, pred0);  // macro  src1 macro src2 macro ...
  738|  10.8k|    src_r1 = _mm_add_epi8(src_r1, pred1);
  739|  10.8k|    src_r2 = _mm_add_epi8(src_r2, pred2);
  740|  10.8k|    src_r3 = _mm_add_epi8(src_r3, pred3);
  741|       |
  742|  10.8k|    _mm_storel_epi64((__m128i *) (&pu1_out[0]), src_r0);
  743|  10.8k|    _mm_storel_epi64((__m128i *) (&pu1_out[out_strd]), src_r1);
  744|  10.8k|    _mm_storel_epi64((__m128i *) (&pu1_out[2 * out_strd]), src_r2);
  745|  10.8k|    _mm_storel_epi64((__m128i *) (&pu1_out[3 * out_strd]), src_r3);
  746|  10.8k|}
isvcd_pred_residual_recon_chroma_8x8_sse42:
  772|   126k|{
  773|   126k|    __m128i src_r0, src_r1, src_r2, src_r3, src_r4, src_r5, src_r6, src_r7;
  774|   126k|    __m128i pred0, pred1, pred2, pred3, pred4, pred5, pred6, pred7;
  775|   126k|    __m128i rsd_r0, rsd_r1, rsd_r2, rsd_r3, rsd_r4, rsd_r5, rsd_r6, rsd_r7;
  776|   126k|    __m128i zero_16x8b;  // all bits reset to zero
  777|   126k|    __m128i chroma_mask_even;
  778|   126k|    __m128i chroma_mask_odd;
  779|       |
  780|   126k|    zero_16x8b = _mm_setzero_si128();
  781|       |
  782|   126k|    rsd_r0 = _mm_loadu_si128((__m128i *) (pi2_rsd));
  783|   126k|    rsd_r1 = _mm_loadu_si128((__m128i *) (pi2_rsd + (1 * rsd_strd)));
  784|   126k|    rsd_r2 = _mm_loadu_si128((__m128i *) (pi2_rsd + (2 * rsd_strd)));
  785|   126k|    rsd_r3 = _mm_loadu_si128((__m128i *) (pi2_rsd + (3 * rsd_strd)));
  786|   126k|    rsd_r4 = _mm_loadu_si128((__m128i *) (pi2_rsd + (4 * rsd_strd)));
  787|   126k|    rsd_r5 = _mm_loadu_si128((__m128i *) (pi2_rsd + (5 * rsd_strd)));
  788|   126k|    rsd_r6 = _mm_loadu_si128((__m128i *) (pi2_rsd + (6 * rsd_strd)));
  789|   126k|    rsd_r7 = _mm_loadu_si128((__m128i *) (pi2_rsd + (7 * rsd_strd)));
  790|       |
  791|   126k|    pred0 = _mm_loadu_si128((__m128i *) (pu1_pred));
  792|   126k|    pred1 = _mm_loadu_si128((__m128i *) (pu1_pred + (1 * pred_strd)));
  793|   126k|    pred2 = _mm_loadu_si128((__m128i *) (pu1_pred + (2 * pred_strd)));
  794|   126k|    pred3 = _mm_loadu_si128((__m128i *) (pu1_pred + (3 * pred_strd)));
  795|   126k|    pred4 = _mm_loadu_si128((__m128i *) (pu1_pred + (4 * pred_strd)));
  796|   126k|    pred5 = _mm_loadu_si128((__m128i *) (pu1_pred + (5 * pred_strd)));
  797|   126k|    pred6 = _mm_loadu_si128((__m128i *) (pu1_pred + (6 * pred_strd)));
  798|   126k|    pred7 = _mm_loadu_si128((__m128i *) (pu1_pred + (7 * pred_strd)));
  799|       |
  800|   126k|    src_r0 = _mm_loadu_si128((__m128i *) (pu1_out));
  801|   126k|    src_r1 = _mm_loadu_si128((__m128i *) (pu1_out + (1 * out_strd)));
  802|   126k|    src_r2 = _mm_loadu_si128((__m128i *) (pu1_out + (2 * out_strd)));
  803|   126k|    src_r3 = _mm_loadu_si128((__m128i *) (pu1_out + (3 * out_strd)));
  804|   126k|    src_r4 = _mm_loadu_si128((__m128i *) (pu1_out + (4 * out_strd)));
  805|   126k|    src_r5 = _mm_loadu_si128((__m128i *) (pu1_out + (5 * out_strd)));
  806|   126k|    src_r6 = _mm_loadu_si128((__m128i *) (pu1_out + (6 * out_strd)));
  807|   126k|    src_r7 = _mm_loadu_si128((__m128i *) (pu1_out + (7 * out_strd)));
  808|       |
  809|   126k|    pred0 = _mm_cvtepu8_epi16(pred0);
  810|   126k|    pred1 = _mm_cvtepu8_epi16(pred1);
  811|   126k|    pred2 = _mm_cvtepu8_epi16(pred2);
  812|   126k|    pred3 = _mm_cvtepu8_epi16(pred3);
  813|   126k|    pred4 = _mm_cvtepu8_epi16(pred4);
  814|   126k|    pred5 = _mm_cvtepu8_epi16(pred5);
  815|   126k|    pred6 = _mm_cvtepu8_epi16(pred6);
  816|   126k|    pred7 = _mm_cvtepu8_epi16(pred7);
  817|       |
  818|   126k|    pred0 = _mm_add_epi16(pred0, rsd_r0);
  819|   126k|    pred1 = _mm_add_epi16(pred1, rsd_r1);
  820|   126k|    pred2 = _mm_add_epi16(pred2, rsd_r2);
  821|   126k|    pred3 = _mm_add_epi16(pred3, rsd_r3);
  822|   126k|    pred4 = _mm_add_epi16(pred4, rsd_r4);
  823|   126k|    pred5 = _mm_add_epi16(pred5, rsd_r5);
  824|   126k|    pred6 = _mm_add_epi16(pred6, rsd_r6);
  825|   126k|    pred7 = _mm_add_epi16(pred7, rsd_r7);
  826|       |
  827|   126k|    pred0 = _mm_packus_epi16(pred0, zero_16x8b);
  828|   126k|    pred1 = _mm_packus_epi16(pred1, zero_16x8b);
  829|   126k|    pred2 = _mm_packus_epi16(pred2, zero_16x8b);
  830|   126k|    pred3 = _mm_packus_epi16(pred3, zero_16x8b);
  831|   126k|    pred4 = _mm_packus_epi16(pred4, zero_16x8b);
  832|   126k|    pred5 = _mm_packus_epi16(pred5, zero_16x8b);
  833|   126k|    pred6 = _mm_packus_epi16(pred6, zero_16x8b);
  834|   126k|    pred7 = _mm_packus_epi16(pred7, zero_16x8b);
  835|       |
  836|   126k|    chroma_mask_even = _mm_set_epi8(0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff,
  837|   126k|                                    0x00, 0xff, 0x00, 0xff, 0x00, 0xff);
  838|   126k|    chroma_mask_odd = _mm_set_epi8(0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff,
  839|   126k|                                   0x00, 0xff, 0x00, 0xff, 0x00);
  840|       |
  841|   126k|    src_r0 = _mm_and_si128(src_r0, chroma_mask_odd);  // 0 src1 0 src2 0 ...
  842|   126k|    src_r1 = _mm_and_si128(src_r1, chroma_mask_odd);
  843|   126k|    src_r2 = _mm_and_si128(src_r2, chroma_mask_odd);
  844|   126k|    src_r3 = _mm_and_si128(src_r3, chroma_mask_odd);
  845|   126k|    src_r4 = _mm_and_si128(src_r4, chroma_mask_odd);
  846|   126k|    src_r5 = _mm_and_si128(src_r5, chroma_mask_odd);
  847|   126k|    src_r6 = _mm_and_si128(src_r6, chroma_mask_odd);
  848|   126k|    src_r7 = _mm_and_si128(src_r7, chroma_mask_odd);
  849|       |
  850|   126k|    pred0 = _mm_and_si128(pred0, chroma_mask_even);  // val 0 val 0 ..
  851|   126k|    pred1 = _mm_and_si128(pred1, chroma_mask_even);
  852|   126k|    pred2 = _mm_and_si128(pred2, chroma_mask_even);
  853|   126k|    pred3 = _mm_and_si128(pred3, chroma_mask_even);
  854|   126k|    pred4 = _mm_and_si128(pred4, chroma_mask_even);
  855|   126k|    pred5 = _mm_and_si128(pred5, chroma_mask_even);
  856|   126k|    pred6 = _mm_and_si128(pred6, chroma_mask_even);
  857|   126k|    pred7 = _mm_and_si128(pred7, chroma_mask_even);
  858|       |
  859|   126k|    src_r0 = _mm_add_epi8(src_r0, pred0);  // macro  src1 macro src2 macro ...
  860|   126k|    src_r1 = _mm_add_epi8(src_r1, pred1);
  861|   126k|    src_r2 = _mm_add_epi8(src_r2, pred2);
  862|   126k|    src_r3 = _mm_add_epi8(src_r3, pred3);
  863|   126k|    src_r4 = _mm_add_epi8(src_r4, pred4);
  864|   126k|    src_r5 = _mm_add_epi8(src_r5, pred5);
  865|   126k|    src_r6 = _mm_add_epi8(src_r6, pred6);
  866|   126k|    src_r7 = _mm_add_epi8(src_r7, pred7);
  867|       |
  868|   126k|    _mm_storel_epi64((__m128i *) (&pu1_out[0]), src_r0);
  869|   126k|    _mm_storel_epi64((__m128i *) (&pu1_out[out_strd]), src_r1);
  870|   126k|    _mm_storel_epi64((__m128i *) (&pu1_out[2 * out_strd]), src_r2);
  871|   126k|    _mm_storel_epi64((__m128i *) (&pu1_out[3 * out_strd]), src_r3);
  872|   126k|    _mm_storel_epi64((__m128i *) (&pu1_out[4 * out_strd]), src_r4);
  873|   126k|    _mm_storel_epi64((__m128i *) (&pu1_out[5 * out_strd]), src_r5);
  874|   126k|    _mm_storel_epi64((__m128i *) (&pu1_out[6 * out_strd]), src_r6);
  875|   126k|    _mm_storel_epi64((__m128i *) (&pu1_out[7 * out_strd]), src_r7);
  876|       |
  877|       |    /* load and repeat for the last 4 elements interleaved in the row */
  878|       |
  879|   126k|    rsd_r0 = _mm_loadu_si128((__m128i *) (pi2_rsd + 8));
  880|   126k|    rsd_r1 = _mm_loadu_si128((__m128i *) (pi2_rsd + (1 * rsd_strd) + 8));
  881|   126k|    rsd_r2 = _mm_loadu_si128((__m128i *) (pi2_rsd + (2 * rsd_strd) + 8));
  882|   126k|    rsd_r3 = _mm_loadu_si128((__m128i *) (pi2_rsd + (3 * rsd_strd) + 8));
  883|   126k|    rsd_r4 = _mm_loadu_si128((__m128i *) (pi2_rsd + (4 * rsd_strd) + 8));
  884|   126k|    rsd_r5 = _mm_loadu_si128((__m128i *) (pi2_rsd + (5 * rsd_strd) + 8));
  885|   126k|    rsd_r6 = _mm_loadu_si128((__m128i *) (pi2_rsd + (6 * rsd_strd) + 8));
  886|   126k|    rsd_r7 = _mm_loadu_si128((__m128i *) (pi2_rsd + (7 * rsd_strd) + 8));
  887|       |
  888|   126k|    pred0 = _mm_loadu_si128((__m128i *) (pu1_pred + 8));
  889|   126k|    pred1 = _mm_loadu_si128((__m128i *) (pu1_pred + (1 * pred_strd) + 8));
  890|   126k|    pred2 = _mm_loadu_si128((__m128i *) (pu1_pred + (2 * pred_strd) + 8));
  891|   126k|    pred3 = _mm_loadu_si128((__m128i *) (pu1_pred + (3 * pred_strd) + 8));
  892|   126k|    pred4 = _mm_loadu_si128((__m128i *) (pu1_pred + (4 * pred_strd) + 8));
  893|   126k|    pred5 = _mm_loadu_si128((__m128i *) (pu1_pred + (5 * pred_strd) + 8));
  894|   126k|    pred6 = _mm_loadu_si128((__m128i *) (pu1_pred + (6 * pred_strd) + 8));
  895|   126k|    pred7 = _mm_loadu_si128((__m128i *) (pu1_pred + (7 * pred_strd) + 8));
  896|       |
  897|   126k|    src_r0 = _mm_loadu_si128((__m128i *) (pu1_out + 8));
  898|   126k|    src_r1 = _mm_loadu_si128((__m128i *) (pu1_out + (1 * out_strd) + 8));
  899|   126k|    src_r2 = _mm_loadu_si128((__m128i *) (pu1_out + (2 * out_strd) + 8));
  900|   126k|    src_r3 = _mm_loadu_si128((__m128i *) (pu1_out + (3 * out_strd) + 8));
  901|   126k|    src_r4 = _mm_loadu_si128((__m128i *) (pu1_out + (4 * out_strd) + 8));
  902|   126k|    src_r5 = _mm_loadu_si128((__m128i *) (pu1_out + (5 * out_strd) + 8));
  903|   126k|    src_r6 = _mm_loadu_si128((__m128i *) (pu1_out + (6 * out_strd) + 8));
  904|   126k|    src_r7 = _mm_loadu_si128((__m128i *) (pu1_out + (7 * out_strd) + 8));
  905|       |
  906|   126k|    pred0 = _mm_cvtepu8_epi16(pred0);
  907|   126k|    pred1 = _mm_cvtepu8_epi16(pred1);
  908|   126k|    pred2 = _mm_cvtepu8_epi16(pred2);
  909|   126k|    pred3 = _mm_cvtepu8_epi16(pred3);
  910|   126k|    pred4 = _mm_cvtepu8_epi16(pred4);
  911|   126k|    pred5 = _mm_cvtepu8_epi16(pred5);
  912|   126k|    pred6 = _mm_cvtepu8_epi16(pred6);
  913|   126k|    pred7 = _mm_cvtepu8_epi16(pred7);
  914|       |
  915|   126k|    pred0 = _mm_add_epi16(pred0, rsd_r0);
  916|   126k|    pred1 = _mm_add_epi16(pred1, rsd_r1);
  917|   126k|    pred2 = _mm_add_epi16(pred2, rsd_r2);
  918|   126k|    pred3 = _mm_add_epi16(pred3, rsd_r3);
  919|   126k|    pred4 = _mm_add_epi16(pred4, rsd_r4);
  920|   126k|    pred5 = _mm_add_epi16(pred5, rsd_r5);
  921|   126k|    pred6 = _mm_add_epi16(pred6, rsd_r6);
  922|   126k|    pred7 = _mm_add_epi16(pred7, rsd_r7);
  923|       |
  924|   126k|    pred0 = _mm_packus_epi16(pred0, zero_16x8b);
  925|   126k|    pred1 = _mm_packus_epi16(pred1, zero_16x8b);
  926|   126k|    pred2 = _mm_packus_epi16(pred2, zero_16x8b);
  927|   126k|    pred3 = _mm_packus_epi16(pred3, zero_16x8b);
  928|   126k|    pred4 = _mm_packus_epi16(pred4, zero_16x8b);
  929|   126k|    pred5 = _mm_packus_epi16(pred5, zero_16x8b);
  930|   126k|    pred6 = _mm_packus_epi16(pred6, zero_16x8b);
  931|   126k|    pred7 = _mm_packus_epi16(pred7, zero_16x8b);
  932|       |
  933|   126k|    src_r0 = _mm_and_si128(src_r0, chroma_mask_odd);  // 0 src1 0 src2 0 ...
  934|   126k|    src_r1 = _mm_and_si128(src_r1, chroma_mask_odd);
  935|   126k|    src_r2 = _mm_and_si128(src_r2, chroma_mask_odd);
  936|   126k|    src_r3 = _mm_and_si128(src_r3, chroma_mask_odd);
  937|   126k|    src_r4 = _mm_and_si128(src_r4, chroma_mask_odd);
  938|   126k|    src_r5 = _mm_and_si128(src_r5, chroma_mask_odd);
  939|   126k|    src_r6 = _mm_and_si128(src_r6, chroma_mask_odd);
  940|   126k|    src_r7 = _mm_and_si128(src_r7, chroma_mask_odd);
  941|       |
  942|   126k|    pred0 = _mm_and_si128(pred0, chroma_mask_even);  // val 0 val 0 ..
  943|   126k|    pred1 = _mm_and_si128(pred1, chroma_mask_even);
  944|   126k|    pred2 = _mm_and_si128(pred2, chroma_mask_even);
  945|   126k|    pred3 = _mm_and_si128(pred3, chroma_mask_even);
  946|   126k|    pred4 = _mm_and_si128(pred4, chroma_mask_even);
  947|   126k|    pred5 = _mm_and_si128(pred5, chroma_mask_even);
  948|   126k|    pred6 = _mm_and_si128(pred6, chroma_mask_even);
  949|   126k|    pred7 = _mm_and_si128(pred7, chroma_mask_even);
  950|       |
  951|   126k|    src_r0 = _mm_add_epi8(src_r0, pred0);  // macro  src1 macro src2 macro ...
  952|   126k|    src_r1 = _mm_add_epi8(src_r1, pred1);
  953|   126k|    src_r2 = _mm_add_epi8(src_r2, pred2);
  954|   126k|    src_r3 = _mm_add_epi8(src_r3, pred3);
  955|   126k|    src_r4 = _mm_add_epi8(src_r4, pred4);
  956|   126k|    src_r5 = _mm_add_epi8(src_r5, pred5);
  957|   126k|    src_r6 = _mm_add_epi8(src_r6, pred6);
  958|   126k|    src_r7 = _mm_add_epi8(src_r7, pred7);
  959|       |
  960|   126k|    _mm_storel_epi64((__m128i *) (&pu1_out[0] + 8), src_r0);
  961|   126k|    _mm_storel_epi64((__m128i *) (&pu1_out[out_strd] + 8), src_r1);
  962|   126k|    _mm_storel_epi64((__m128i *) (&pu1_out[(2 * out_strd)] + 8), src_r2);
  963|   126k|    _mm_storel_epi64((__m128i *) (&pu1_out[(3 * out_strd)] + 8), src_r3);
  964|   126k|    _mm_storel_epi64((__m128i *) (&pu1_out[(4 * out_strd)] + 8), src_r4);
  965|   126k|    _mm_storel_epi64((__m128i *) (&pu1_out[(5 * out_strd)] + 8), src_r5);
  966|   126k|    _mm_storel_epi64((__m128i *) (&pu1_out[(6 * out_strd)] + 8), src_r6);
  967|   126k|    _mm_storel_epi64((__m128i *) (&pu1_out[(7 * out_strd)] + 8), src_r7);
  968|   126k|}

isvcd_residual_luma_dyadic_sse42:
   75|  21.8k|{
   76|  21.8k|    WORD16 *pi2_refarray_buffer;
   77|  21.8k|    WORD32 i4_blk_ctr;
   78|  21.8k|    residual_sampling_ctxt_t *ps_ctxt;
   79|       |
   80|  21.8k|    UNUSED(ps_ref_mb_mode);
  ------------------
  |  |   45|  21.8k|#define UNUSED(x) ((void)(x))
  ------------------
   81|  21.8k|    UNUSED(u2_mb_x);
  ------------------
  |  |   45|  21.8k|#define UNUSED(x) ((void)(x))
  ------------------
   82|  21.8k|    UNUSED(u2_mb_y);
  ------------------
  |  |   45|  21.8k|#define UNUSED(x) ((void)(x))
  ------------------
   83|       |
   84|  21.8k|    ps_ctxt = (residual_sampling_ctxt_t *) pv_residual_samp_ctxt;
   85|  21.8k|    pi2_refarray_buffer = ps_ctxt->pi2_refarray_buffer;
   86|       |
   87|       |    /* based on transform size the counter and interpolation width and */
   88|       |    /* height are intialised as follows                                */
   89|       |
   90|  21.8k|    if((i4_ref_tx_size) && (0 != i4_ref_nnz))
  ------------------
  |  Branch (90:8): [True: 1.11k, False: 20.7k]
  |  Branch (90:28): [True: 856, False: 254]
  ------------------
   91|    856|    {
   92|    856|        WORD16 *pi2_ref_data_byte;
   93|    856|        WORD32 i4_i, i4_j;
   94|    856|        WORD16 *pi2_refarray_buffer_tmp = pi2_refarray_buffer;
   95|       |
   96|    856|        __m128i i2_coeff_8x16b_r1_0, i2_coeff_8x16b_r1_1;
   97|    856|        __m128i res_8x16b_r1_0, res_8x16b_r1_1;
   98|    856|        __m128i final_res_8x16b_r1_0, final_res_8x16b_r1_1;
   99|       |
  100|    856|        __m128i coeff_add_8x16b_r1;
  101|       |
  102|    856|        __m128i coeff_add_8x16b_r2;
  103|    856|        __m128i i2_coeff_8x16b_r2_0, i2_coeff_8x16b_r2_1;
  104|    856|        __m128i res_8x16b_r2_0, res_8x16b_r2_1;
  105|    856|        __m128i final_res_8x16b_r2_0, final_res_8x16b_r2_1;
  106|       |
  107|    856|        pi2_ref_data_byte = pi2_inp_data;
  108|       |
  109|       |        /* ----------- Horizontal Interpolation ---------------- */
  110|  4.28k|        for(i4_i = 0; i4_i < BLOCK_HEIGHT; i4_i += 2)
  ------------------
  |  |   65|  4.28k|#define BLOCK_HEIGHT 8
  ------------------
  |  Branch (110:23): [True: 3.42k, False: 856]
  ------------------
  111|  3.42k|        {
  112|  3.42k|            i2_coeff_8x16b_r1_0 =
  113|  3.42k|                _mm_loadu_si128((__m128i *) pi2_ref_data_byte);         // a0 a1 a2 a3 a4 a5 a6 a7
  114|  3.42k|            i2_coeff_8x16b_r2_0 = _mm_loadu_si128(
  115|  3.42k|                (__m128i *) (pi2_ref_data_byte + i4_inp_data_stride));  // b0 b1 b2 b3 b4 b5 b6 b7
  116|       |
  117|  3.42k|            i2_coeff_8x16b_r1_1 = _mm_srli_si128(i2_coeff_8x16b_r1_0, 2);  // a1 a2 a3 a4 a5 a6 a7 0
  118|  3.42k|            i2_coeff_8x16b_r2_1 = _mm_srli_si128(i2_coeff_8x16b_r2_0, 2);  // b1 b2 b3 b4 b5 b6 b7 0
  119|       |
  120|  3.42k|            coeff_add_8x16b_r1 = _mm_add_epi16(i2_coeff_8x16b_r1_0, i2_coeff_8x16b_r1_1);
  121|  3.42k|            coeff_add_8x16b_r2 = _mm_add_epi16(i2_coeff_8x16b_r2_0, i2_coeff_8x16b_r2_1);
  122|       |
  123|  3.42k|            i2_coeff_8x16b_r1_0 = _mm_slli_epi16(i2_coeff_8x16b_r1_0, 1);
  124|  3.42k|            i2_coeff_8x16b_r2_0 = _mm_slli_epi16(i2_coeff_8x16b_r2_0, 1);
  125|       |
  126|  3.42k|            i2_coeff_8x16b_r1_1 = _mm_slli_epi16(i2_coeff_8x16b_r1_1, 1);
  127|  3.42k|            i2_coeff_8x16b_r2_1 = _mm_slli_epi16(i2_coeff_8x16b_r2_1, 1);
  128|       |
  129|  3.42k|            res_8x16b_r1_0 = _mm_add_epi16(i2_coeff_8x16b_r1_0, coeff_add_8x16b_r1);
  130|  3.42k|            res_8x16b_r2_0 = _mm_add_epi16(i2_coeff_8x16b_r2_0, coeff_add_8x16b_r2);
  131|       |
  132|  3.42k|            res_8x16b_r1_1 = _mm_add_epi16(i2_coeff_8x16b_r1_1, coeff_add_8x16b_r1);
  133|  3.42k|            res_8x16b_r2_1 = _mm_add_epi16(i2_coeff_8x16b_r2_1, coeff_add_8x16b_r2);
  134|       |
  135|  3.42k|            final_res_8x16b_r1_0 = _mm_unpacklo_epi16(res_8x16b_r1_0, res_8x16b_r1_1);
  136|  3.42k|            final_res_8x16b_r2_0 = _mm_unpacklo_epi16(res_8x16b_r2_0, res_8x16b_r2_1);
  137|       |
  138|  3.42k|            final_res_8x16b_r1_1 = _mm_unpackhi_epi16(res_8x16b_r1_0, res_8x16b_r1_1);
  139|  3.42k|            final_res_8x16b_r2_1 = _mm_unpackhi_epi16(res_8x16b_r2_0, res_8x16b_r2_1);
  140|       |
  141|  3.42k|            _mm_storeu_si128((__m128i *) (pi2_refarray_buffer + 1), final_res_8x16b_r1_0);
  142|  3.42k|            _mm_storeu_si128((__m128i *) (pi2_refarray_buffer + 9), final_res_8x16b_r1_1);
  143|       |
  144|  3.42k|            _mm_storeu_si128((__m128i *) (pi2_refarray_buffer + 17), final_res_8x16b_r2_0);
  145|  3.42k|            _mm_storeu_si128((__m128i *) (pi2_refarray_buffer + 25), final_res_8x16b_r2_1);
  146|       |
  147|  3.42k|            pi2_refarray_buffer[0] = (pi2_ref_data_byte[0] << 2);
  148|  3.42k|            pi2_refarray_buffer[15] = (pi2_ref_data_byte[7] << 2);
  149|  3.42k|            pi2_ref_data_byte += i4_inp_data_stride;
  150|  3.42k|            pi2_refarray_buffer[16] = (pi2_ref_data_byte[0] << 2);
  151|  3.42k|            pi2_refarray_buffer[31] = (pi2_ref_data_byte[7] << 2);
  152|       |
  153|       |            /* vertical loop uopdates */
  154|  3.42k|            pi2_ref_data_byte = pi2_inp_data + ((i4_i + 2) * i4_inp_data_stride);
  155|  3.42k|            pi2_refarray_buffer += 32;
  156|  3.42k|        }
  157|       |
  158|       |        /* ----------- Vertical Interpolation ---------------- */
  159|    856|        pi2_refarray_buffer = pi2_refarray_buffer_tmp;
  160|       |
  161|    856|        {
  162|    856|            __m128i i4_horz_samp_4x32b_r1_1, i4_horz_samp_4x32b_r1_2, i4_horz_samp_4x32b_r1_3,
  163|    856|                i4_horz_samp_4x32b_r1_4;
  164|    856|            __m128i i4_horz_samp_4x32b_r2_1, i4_horz_samp_4x32b_r2_2, i4_horz_samp_4x32b_r2_3,
  165|    856|                i4_horz_samp_4x32b_r2_4;
  166|    856|            __m128i i4_res_samp_4x32b_r1_1, i4_res_samp_4x32b_r1_2, i4_res_samp_4x32b_r1_3,
  167|    856|                i4_res_samp_4x32b_r1_4;
  168|    856|            __m128i i4_res_samp_4x32b_r2_1, i4_res_samp_4x32b_r2_2, i4_res_samp_4x32b_r2_3,
  169|    856|                i4_res_samp_4x32b_r2_4;
  170|    856|            __m128i horz_add_4x32b_r2_1, horz_add_4x32b_r2_2, horz_add_4x32b_r2_3,
  171|    856|                horz_add_4x32b_r2_4;
  172|       |
  173|    856|            __m128i i4_horz_samp_8x16b_r1_1, i4_horz_samp_8x16b_r2_1;
  174|    856|            __m128i i4_horz_samp_8x16b_r1_2, i4_horz_samp_8x16b_r2_2;
  175|    856|            __m128i i4_horz_samp_8x16b_r1_3, i4_horz_samp_8x16b_r2_3;
  176|    856|            __m128i i4_horz_samp_8x16b_r1_4, i4_horz_samp_8x16b_r2_4;
  177|       |
  178|    856|            __m128i twos = _mm_set1_epi32(2);
  179|    856|            __m128i eights = _mm_set1_epi32(8);
  180|       |
  181|    856|            WORD16 *pi2_out;
  182|       |
  183|    856|            pi2_out = pi2_out_res;
  184|       |
  185|    856|            i4_horz_samp_8x16b_r1_1 = _mm_loadu_si128((__m128i *) (pi2_refarray_buffer));
  186|    856|            i4_horz_samp_8x16b_r1_2 = _mm_loadu_si128((__m128i *) (pi2_refarray_buffer + 4));
  187|    856|            i4_horz_samp_8x16b_r1_3 = _mm_loadu_si128((__m128i *) (pi2_refarray_buffer + 8));
  188|    856|            i4_horz_samp_8x16b_r1_4 = _mm_loadu_si128((__m128i *) (pi2_refarray_buffer + 12));
  189|       |
  190|    856|            i4_horz_samp_4x32b_r1_1 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r1_1);
  191|    856|            i4_horz_samp_4x32b_r1_2 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r1_2);
  192|    856|            i4_horz_samp_4x32b_r1_3 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r1_3);
  193|    856|            i4_horz_samp_4x32b_r1_4 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r1_4);
  194|       |
  195|       |            /* populate the first inter sample */
  196|    856|            i4_res_samp_4x32b_r1_1 =
  197|    856|                _mm_srai_epi32(_mm_add_epi32(i4_horz_samp_4x32b_r1_1, twos), 2);
  198|    856|            i4_res_samp_4x32b_r1_2 =
  199|    856|                _mm_srai_epi32(_mm_add_epi32(i4_horz_samp_4x32b_r1_2, twos), 2);
  200|    856|            i4_res_samp_4x32b_r1_3 =
  201|    856|                _mm_srai_epi32(_mm_add_epi32(i4_horz_samp_4x32b_r1_3, twos), 2);
  202|    856|            i4_res_samp_4x32b_r1_4 =
  203|    856|                _mm_srai_epi32(_mm_add_epi32(i4_horz_samp_4x32b_r1_4, twos), 2);
  204|       |
  205|    856|            _mm_storeu_si128((__m128i *) pi2_out,
  206|    856|                             _mm_packs_epi32(i4_res_samp_4x32b_r1_1, i4_res_samp_4x32b_r1_2));
  207|    856|            _mm_storeu_si128((__m128i *) (pi2_out + 8),
  208|    856|                             _mm_packs_epi32(i4_res_samp_4x32b_r1_3, i4_res_samp_4x32b_r1_4));
  209|    856|            pi2_out += i4_out_res_stride;
  210|       |
  211|  6.84k|            for(i4_j = 0; i4_j < 14; i4_j += 2)
  ------------------
  |  Branch (211:27): [True: 5.99k, False: 856]
  ------------------
  212|  5.99k|            {
  213|  5.99k|                pi2_refarray_buffer += MB_WIDTH;
  ------------------
  |  |   67|  5.99k|#define MB_WIDTH 16
  ------------------
  214|       |
  215|  5.99k|                i4_horz_samp_8x16b_r2_1 = _mm_loadu_si128((__m128i *) (pi2_refarray_buffer));
  216|  5.99k|                i4_horz_samp_8x16b_r2_2 = _mm_loadu_si128((__m128i *) (pi2_refarray_buffer + 4));
  217|  5.99k|                i4_horz_samp_8x16b_r2_3 = _mm_loadu_si128((__m128i *) (pi2_refarray_buffer + 8));
  218|  5.99k|                i4_horz_samp_8x16b_r2_4 = _mm_loadu_si128((__m128i *) (pi2_refarray_buffer + 12));
  219|       |
  220|  5.99k|                i4_horz_samp_4x32b_r2_1 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r2_1);
  221|  5.99k|                i4_horz_samp_4x32b_r2_2 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r2_2);
  222|  5.99k|                i4_horz_samp_4x32b_r2_3 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r2_3);
  223|  5.99k|                i4_horz_samp_4x32b_r2_4 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r2_4);
  224|       |
  225|  5.99k|                horz_add_4x32b_r2_1 =
  226|  5.99k|                    _mm_add_epi32(i4_horz_samp_4x32b_r1_1, i4_horz_samp_4x32b_r2_1);
  227|  5.99k|                horz_add_4x32b_r2_2 =
  228|  5.99k|                    _mm_add_epi32(i4_horz_samp_4x32b_r1_2, i4_horz_samp_4x32b_r2_2);
  229|  5.99k|                horz_add_4x32b_r2_3 =
  230|  5.99k|                    _mm_add_epi32(i4_horz_samp_4x32b_r1_3, i4_horz_samp_4x32b_r2_3);
  231|  5.99k|                horz_add_4x32b_r2_4 =
  232|  5.99k|                    _mm_add_epi32(i4_horz_samp_4x32b_r1_4, i4_horz_samp_4x32b_r2_4);
  233|       |
  234|  5.99k|                i4_res_samp_4x32b_r1_1 =
  235|  5.99k|                    _mm_add_epi32(_mm_slli_epi32(i4_horz_samp_4x32b_r1_1, 1), horz_add_4x32b_r2_1);
  236|  5.99k|                i4_res_samp_4x32b_r1_2 =
  237|  5.99k|                    _mm_add_epi32(_mm_slli_epi32(i4_horz_samp_4x32b_r1_2, 1), horz_add_4x32b_r2_2);
  238|  5.99k|                i4_res_samp_4x32b_r1_3 =
  239|  5.99k|                    _mm_add_epi32(_mm_slli_epi32(i4_horz_samp_4x32b_r1_3, 1), horz_add_4x32b_r2_3);
  240|  5.99k|                i4_res_samp_4x32b_r1_4 =
  241|  5.99k|                    _mm_add_epi32(_mm_slli_epi32(i4_horz_samp_4x32b_r1_4, 1), horz_add_4x32b_r2_4);
  242|       |
  243|  5.99k|                i4_res_samp_4x32b_r2_1 =
  244|  5.99k|                    _mm_add_epi32(_mm_slli_epi32(i4_horz_samp_4x32b_r2_1, 1), horz_add_4x32b_r2_1);
  245|  5.99k|                i4_res_samp_4x32b_r2_2 =
  246|  5.99k|                    _mm_add_epi32(_mm_slli_epi32(i4_horz_samp_4x32b_r2_2, 1), horz_add_4x32b_r2_2);
  247|  5.99k|                i4_res_samp_4x32b_r2_3 =
  248|  5.99k|                    _mm_add_epi32(_mm_slli_epi32(i4_horz_samp_4x32b_r2_3, 1), horz_add_4x32b_r2_3);
  249|  5.99k|                i4_res_samp_4x32b_r2_4 =
  250|  5.99k|                    _mm_add_epi32(_mm_slli_epi32(i4_horz_samp_4x32b_r2_4, 1), horz_add_4x32b_r2_4);
  251|       |
  252|  5.99k|                i4_res_samp_4x32b_r1_1 =
  253|  5.99k|                    _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r1_1, eights), 4);
  254|  5.99k|                i4_res_samp_4x32b_r1_2 =
  255|  5.99k|                    _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r1_2, eights), 4);
  256|  5.99k|                i4_res_samp_4x32b_r1_3 =
  257|  5.99k|                    _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r1_3, eights), 4);
  258|  5.99k|                i4_res_samp_4x32b_r1_4 =
  259|  5.99k|                    _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r1_4, eights), 4);
  260|       |
  261|  5.99k|                i4_res_samp_4x32b_r2_1 =
  262|  5.99k|                    _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r2_1, eights), 4);
  263|  5.99k|                i4_res_samp_4x32b_r2_2 =
  264|  5.99k|                    _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r2_2, eights), 4);
  265|  5.99k|                i4_res_samp_4x32b_r2_3 =
  266|  5.99k|                    _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r2_3, eights), 4);
  267|  5.99k|                i4_res_samp_4x32b_r2_4 =
  268|  5.99k|                    _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r2_4, eights), 4);
  269|       |
  270|       |                /* populate 2 samples based on current coeffs */
  271|  5.99k|                _mm_storeu_si128((__m128i *) pi2_out,
  272|  5.99k|                                 _mm_packs_epi32(i4_res_samp_4x32b_r1_1, i4_res_samp_4x32b_r1_2));
  273|  5.99k|                _mm_storeu_si128((__m128i *) (pi2_out + 8),
  274|  5.99k|                                 _mm_packs_epi32(i4_res_samp_4x32b_r1_3, i4_res_samp_4x32b_r1_4));
  275|  5.99k|                pi2_out += i4_out_res_stride;
  276|       |
  277|  5.99k|                _mm_storeu_si128((__m128i *) pi2_out,
  278|  5.99k|                                 _mm_packs_epi32(i4_res_samp_4x32b_r2_1, i4_res_samp_4x32b_r2_2));
  279|  5.99k|                _mm_storeu_si128((__m128i *) (pi2_out + 8),
  280|  5.99k|                                 _mm_packs_epi32(i4_res_samp_4x32b_r2_3, i4_res_samp_4x32b_r2_4));
  281|  5.99k|                pi2_out += i4_out_res_stride;
  282|       |
  283|       |                /* store the coeff 2 to coeff 1 */
  284|       |                /* (used in next iteration)     */
  285|  5.99k|                i4_horz_samp_4x32b_r1_1 = i4_horz_samp_4x32b_r2_1;
  286|  5.99k|                i4_horz_samp_4x32b_r1_2 = i4_horz_samp_4x32b_r2_2;
  287|  5.99k|                i4_horz_samp_4x32b_r1_3 = i4_horz_samp_4x32b_r2_3;
  288|  5.99k|                i4_horz_samp_4x32b_r1_4 = i4_horz_samp_4x32b_r2_4;
  289|  5.99k|            }
  290|       |
  291|    856|            i4_res_samp_4x32b_r1_1 =
  292|    856|                _mm_srai_epi32(_mm_add_epi32(i4_horz_samp_4x32b_r1_1, twos), 2);
  293|    856|            i4_res_samp_4x32b_r1_2 =
  294|    856|                _mm_srai_epi32(_mm_add_epi32(i4_horz_samp_4x32b_r1_2, twos), 2);
  295|    856|            i4_res_samp_4x32b_r1_3 =
  296|    856|                _mm_srai_epi32(_mm_add_epi32(i4_horz_samp_4x32b_r1_3, twos), 2);
  297|    856|            i4_res_samp_4x32b_r1_4 =
  298|    856|                _mm_srai_epi32(_mm_add_epi32(i4_horz_samp_4x32b_r1_4, twos), 2);
  299|       |
  300|    856|            _mm_storeu_si128((__m128i *) pi2_out,
  301|    856|                             _mm_packs_epi32(i4_res_samp_4x32b_r1_1, i4_res_samp_4x32b_r1_2));
  302|    856|            _mm_storeu_si128((__m128i *) (pi2_out + 8),
  303|    856|                             _mm_packs_epi32(i4_res_samp_4x32b_r1_3, i4_res_samp_4x32b_r1_4));
  304|    856|        }
  305|    856|    }
  306|  20.9k|    else
  307|  20.9k|    {
  308|       |        /* ----------------------------------------------------------------- */
  309|       |        /* LOOP over number of blocks                                        */
  310|       |        /* ----------------------------------------------------------------- */
  311|   104k|        for(i4_blk_ctr = 0; i4_blk_ctr < 4; i4_blk_ctr++)
  ------------------
  |  Branch (311:29): [True: 83.8k, False: 20.9k]
  ------------------
  312|  83.8k|        {
  313|       |            /* if reference layer is not coded then no processing */
  314|  83.8k|            if(0 != (i4_ref_nnz & 0x1))
  ------------------
  |  Branch (314:16): [True: 4.42k, False: 79.4k]
  ------------------
  315|  4.42k|            {
  316|  4.42k|                __m128i i2_coeff_8x16b_r1_0, i2_coeff_8x16b_r1_1;
  317|  4.42k|                __m128i i2_coeff_8x16b_r2_0, i2_coeff_8x16b_r2_1;
  318|  4.42k|                __m128i i2_coeff_8x16b_r3_0, i2_coeff_8x16b_r3_1;
  319|  4.42k|                __m128i i2_coeff_8x16b_r4_0, i2_coeff_8x16b_r4_1;
  320|       |
  321|  4.42k|                __m128i res_8x16b_r1_0, res_8x16b_r1_1;
  322|  4.42k|                __m128i res_8x16b_r2_0, res_8x16b_r2_1;
  323|  4.42k|                __m128i res_8x16b_r3_0, res_8x16b_r3_1;
  324|  4.42k|                __m128i res_8x16b_r4_0, res_8x16b_r4_1;
  325|  4.42k|                __m128i final_res_8x16b_r1_0;
  326|  4.42k|                __m128i final_res_8x16b_r2_0;
  327|  4.42k|                __m128i final_res_8x16b_r3_0;
  328|  4.42k|                __m128i final_res_8x16b_r4_0;
  329|       |
  330|  4.42k|                __m128i coeff_add_8x16b_r1;
  331|  4.42k|                __m128i coeff_add_8x16b_r2;
  332|  4.42k|                __m128i coeff_add_8x16b_r3;
  333|  4.42k|                __m128i coeff_add_8x16b_r4;
  334|       |
  335|       |                /* ----------- Horizontal Interpolation ---------------- */
  336|       |
  337|  4.42k|                i2_coeff_8x16b_r1_0 =
  338|  4.42k|                    _mm_loadu_si128((__m128i *) pi2_inp_data);         // a0 a1 a2 a3 a4 a5 a6 a7
  339|  4.42k|                i2_coeff_8x16b_r2_0 = _mm_loadu_si128(
  340|  4.42k|                    (__m128i *) (pi2_inp_data + i4_inp_data_stride));  // b0 b1 b2 b3 b4 b5 b6 b7
  341|  4.42k|                i2_coeff_8x16b_r3_0 =
  342|  4.42k|                    _mm_loadu_si128((__m128i *) (pi2_inp_data + (i4_inp_data_stride << 1)));
  343|  4.42k|                i2_coeff_8x16b_r4_0 =
  344|  4.42k|                    _mm_loadu_si128((__m128i *) (pi2_inp_data + (i4_inp_data_stride * 3)));
  345|       |
  346|  4.42k|                i2_coeff_8x16b_r1_1 = _mm_srli_si128(i2_coeff_8x16b_r1_0,
  347|  4.42k|                                                     2);  // a1 a2 a3 a4 a5 a6 a7 0
  348|  4.42k|                i2_coeff_8x16b_r2_1 = _mm_srli_si128(i2_coeff_8x16b_r2_0,
  349|  4.42k|                                                     2);  // b1 b2 b3 b4 b5 b6 b7 0
  350|  4.42k|                i2_coeff_8x16b_r3_1 = _mm_srli_si128(i2_coeff_8x16b_r3_0, 2);
  351|  4.42k|                i2_coeff_8x16b_r4_1 = _mm_srli_si128(i2_coeff_8x16b_r4_0, 2);
  352|       |
  353|  4.42k|                coeff_add_8x16b_r1 = _mm_add_epi16(i2_coeff_8x16b_r1_0, i2_coeff_8x16b_r1_1);
  354|  4.42k|                coeff_add_8x16b_r2 = _mm_add_epi16(i2_coeff_8x16b_r2_0, i2_coeff_8x16b_r2_1);
  355|  4.42k|                coeff_add_8x16b_r3 = _mm_add_epi16(i2_coeff_8x16b_r3_0, i2_coeff_8x16b_r3_1);
  356|  4.42k|                coeff_add_8x16b_r4 = _mm_add_epi16(i2_coeff_8x16b_r4_0, i2_coeff_8x16b_r4_1);
  357|       |
  358|  4.42k|                i2_coeff_8x16b_r1_0 = _mm_slli_epi16(i2_coeff_8x16b_r1_0, 1);
  359|  4.42k|                i2_coeff_8x16b_r2_0 = _mm_slli_epi16(i2_coeff_8x16b_r2_0, 1);
  360|  4.42k|                i2_coeff_8x16b_r3_0 = _mm_slli_epi16(i2_coeff_8x16b_r3_0, 1);
  361|  4.42k|                i2_coeff_8x16b_r4_0 = _mm_slli_epi16(i2_coeff_8x16b_r4_0, 1);
  362|       |
  363|  4.42k|                i2_coeff_8x16b_r1_1 = _mm_slli_epi16(i2_coeff_8x16b_r1_1, 1);
  364|  4.42k|                i2_coeff_8x16b_r2_1 = _mm_slli_epi16(i2_coeff_8x16b_r2_1, 1);
  365|  4.42k|                i2_coeff_8x16b_r3_1 = _mm_slli_epi16(i2_coeff_8x16b_r3_1, 1);
  366|  4.42k|                i2_coeff_8x16b_r4_1 = _mm_slli_epi16(i2_coeff_8x16b_r4_1, 1);
  367|       |
  368|  4.42k|                res_8x16b_r1_0 = _mm_add_epi16(i2_coeff_8x16b_r1_0, coeff_add_8x16b_r1);
  369|  4.42k|                res_8x16b_r2_0 = _mm_add_epi16(i2_coeff_8x16b_r2_0, coeff_add_8x16b_r2);
  370|  4.42k|                res_8x16b_r3_0 = _mm_add_epi16(i2_coeff_8x16b_r3_0, coeff_add_8x16b_r3);
  371|  4.42k|                res_8x16b_r4_0 = _mm_add_epi16(i2_coeff_8x16b_r4_0, coeff_add_8x16b_r4);
  372|       |
  373|  4.42k|                res_8x16b_r1_1 = _mm_add_epi16(i2_coeff_8x16b_r1_1, coeff_add_8x16b_r1);
  374|  4.42k|                res_8x16b_r2_1 = _mm_add_epi16(i2_coeff_8x16b_r2_1, coeff_add_8x16b_r2);
  375|  4.42k|                res_8x16b_r3_1 = _mm_add_epi16(i2_coeff_8x16b_r3_1, coeff_add_8x16b_r3);
  376|  4.42k|                res_8x16b_r4_1 = _mm_add_epi16(i2_coeff_8x16b_r4_1, coeff_add_8x16b_r4);
  377|       |
  378|  4.42k|                final_res_8x16b_r1_0 = _mm_unpacklo_epi16(res_8x16b_r1_0, res_8x16b_r1_1);
  379|  4.42k|                final_res_8x16b_r2_0 = _mm_unpacklo_epi16(res_8x16b_r2_0, res_8x16b_r2_1);
  380|  4.42k|                final_res_8x16b_r3_0 = _mm_unpacklo_epi16(res_8x16b_r3_0, res_8x16b_r3_1);
  381|  4.42k|                final_res_8x16b_r4_0 = _mm_unpacklo_epi16(res_8x16b_r4_0, res_8x16b_r4_1);
  382|       |
  383|  4.42k|                _mm_storeu_si128((__m128i *) (pi2_refarray_buffer + 1), final_res_8x16b_r1_0);
  384|  4.42k|                _mm_storeu_si128((__m128i *) (pi2_refarray_buffer + 9), final_res_8x16b_r2_0);
  385|  4.42k|                _mm_storeu_si128((__m128i *) (pi2_refarray_buffer + 17), final_res_8x16b_r3_0);
  386|  4.42k|                _mm_storeu_si128((__m128i *) (pi2_refarray_buffer + 25), final_res_8x16b_r4_0);
  387|       |
  388|  4.42k|                pi2_refarray_buffer[0] = (pi2_inp_data[0] << 2);
  389|  4.42k|                pi2_refarray_buffer[7] = (pi2_inp_data[3] << 2);
  390|  4.42k|                pi2_refarray_buffer[8] = (pi2_inp_data[i4_inp_data_stride] << 2);
  391|  4.42k|                pi2_refarray_buffer[15] = (pi2_inp_data[i4_inp_data_stride + 3] << 2);
  392|  4.42k|                pi2_refarray_buffer[16] = (pi2_inp_data[(i4_inp_data_stride << 1)] << 2);
  393|  4.42k|                pi2_refarray_buffer[23] = (pi2_inp_data[(i4_inp_data_stride << 1) + 3] << 2);
  394|  4.42k|                pi2_refarray_buffer[24] = (pi2_inp_data[(i4_inp_data_stride * 3)] << 2);
  395|  4.42k|                pi2_refarray_buffer[31] = (pi2_inp_data[(i4_inp_data_stride * 3) + 3] << 2);
  396|       |
  397|       |                /* ----------- Vertical Interpolation ---------------- */
  398|  4.42k|                {
  399|  4.42k|                    __m128i i4_horz_samp_8x16b_r0_1, i4_horz_samp_8x16b_r0_2;
  400|  4.42k|                    __m128i i4_horz_samp_8x16b_r1_1, i4_horz_samp_8x16b_r1_2;
  401|  4.42k|                    __m128i i4_horz_samp_8x16b_r2_1, i4_horz_samp_8x16b_r2_2;
  402|  4.42k|                    __m128i i4_horz_samp_8x16b_r3_1, i4_horz_samp_8x16b_r3_2;
  403|       |
  404|  4.42k|                    __m128i i4_horz_samp_4x32b_r0_1, i4_horz_samp_4x32b_r0_2;
  405|  4.42k|                    __m128i i4_horz_samp_4x32b_r1_1, i4_horz_samp_4x32b_r1_2;
  406|  4.42k|                    __m128i i4_horz_samp_4x32b_r2_1, i4_horz_samp_4x32b_r2_2;
  407|  4.42k|                    __m128i i4_horz_samp_4x32b_r3_1, i4_horz_samp_4x32b_r3_2;
  408|       |
  409|  4.42k|                    __m128i i4_res_samp_4x32b_r0_1, i4_res_samp_4x32b_r0_2;
  410|  4.42k|                    __m128i i4_res_samp_4x32b_r1_1, i4_res_samp_4x32b_r1_2;
  411|  4.42k|                    __m128i i4_res_samp_4x32b_r2_1, i4_res_samp_4x32b_r2_2;
  412|  4.42k|                    __m128i i4_res_samp_4x32b_r3_1, i4_res_samp_4x32b_r3_2;
  413|  4.42k|                    __m128i i4_res_samp_4x32b_r4_1, i4_res_samp_4x32b_r4_2;
  414|  4.42k|                    __m128i i4_res_samp_4x32b_r5_1, i4_res_samp_4x32b_r5_2;
  415|  4.42k|                    __m128i i4_res_samp_4x32b_r6_1, i4_res_samp_4x32b_r6_2;
  416|  4.42k|                    __m128i i4_res_samp_4x32b_r7_1, i4_res_samp_4x32b_r7_2;
  417|       |
  418|  4.42k|                    __m128i horz_add_4x32b_r1_1, horz_add_4x32b_r1_2;
  419|  4.42k|                    __m128i horz_add_4x32b_r2_1, horz_add_4x32b_r2_2;
  420|  4.42k|                    __m128i horz_add_4x32b_r3_1, horz_add_4x32b_r3_2;
  421|       |
  422|  4.42k|                    __m128i twos = _mm_set1_epi32(2);
  423|  4.42k|                    __m128i eights = _mm_set1_epi32(8);
  424|       |
  425|  4.42k|                    i4_horz_samp_8x16b_r0_1 = _mm_loadu_si128((__m128i *) (pi2_refarray_buffer));
  426|  4.42k|                    i4_horz_samp_8x16b_r0_2 =
  427|  4.42k|                        _mm_loadu_si128((__m128i *) (pi2_refarray_buffer + 4));
  428|  4.42k|                    i4_horz_samp_8x16b_r1_1 =
  429|  4.42k|                        _mm_loadu_si128((__m128i *) (pi2_refarray_buffer + BLOCK_WIDTH));
  ------------------
  |  |   64|  4.42k|#define BLOCK_WIDTH 8
  ------------------
  430|  4.42k|                    i4_horz_samp_8x16b_r1_2 =
  431|  4.42k|                        _mm_loadu_si128((__m128i *) (pi2_refarray_buffer + BLOCK_WIDTH + 4));
  ------------------
  |  |   64|  4.42k|#define BLOCK_WIDTH 8
  ------------------
  432|  4.42k|                    i4_horz_samp_8x16b_r2_1 =
  433|  4.42k|                        _mm_loadu_si128((__m128i *) (pi2_refarray_buffer + (BLOCK_WIDTH << 1)));
  ------------------
  |  |   64|  4.42k|#define BLOCK_WIDTH 8
  ------------------
  434|  4.42k|                    i4_horz_samp_8x16b_r2_2 =
  435|  4.42k|                        _mm_loadu_si128((__m128i *) (pi2_refarray_buffer + (BLOCK_WIDTH << 1) + 4));
  ------------------
  |  |   64|  4.42k|#define BLOCK_WIDTH 8
  ------------------
  436|  4.42k|                    i4_horz_samp_8x16b_r3_1 =
  437|  4.42k|                        _mm_loadu_si128((__m128i *) (pi2_refarray_buffer + (BLOCK_WIDTH * 3)));
  ------------------
  |  |   64|  4.42k|#define BLOCK_WIDTH 8
  ------------------
  438|  4.42k|                    i4_horz_samp_8x16b_r3_2 =
  439|  4.42k|                        _mm_loadu_si128((__m128i *) (pi2_refarray_buffer + (BLOCK_WIDTH * 3) + 4));
  ------------------
  |  |   64|  4.42k|#define BLOCK_WIDTH 8
  ------------------
  440|       |
  441|  4.42k|                    i4_horz_samp_4x32b_r0_1 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r0_1);
  442|  4.42k|                    i4_horz_samp_4x32b_r0_2 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r0_2);
  443|  4.42k|                    i4_horz_samp_4x32b_r1_1 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r1_1);
  444|  4.42k|                    i4_horz_samp_4x32b_r1_2 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r1_2);
  445|  4.42k|                    i4_horz_samp_4x32b_r2_1 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r2_1);
  446|  4.42k|                    i4_horz_samp_4x32b_r2_2 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r2_2);
  447|  4.42k|                    i4_horz_samp_4x32b_r3_1 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r3_1);
  448|  4.42k|                    i4_horz_samp_4x32b_r3_2 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r3_2);
  449|       |
  450|  4.42k|                    horz_add_4x32b_r1_1 =
  451|  4.42k|                        _mm_add_epi32(i4_horz_samp_4x32b_r0_1, i4_horz_samp_4x32b_r1_1);
  452|  4.42k|                    horz_add_4x32b_r2_1 =
  453|  4.42k|                        _mm_add_epi32(i4_horz_samp_4x32b_r1_1, i4_horz_samp_4x32b_r2_1);
  454|  4.42k|                    horz_add_4x32b_r3_1 =
  455|  4.42k|                        _mm_add_epi32(i4_horz_samp_4x32b_r2_1, i4_horz_samp_4x32b_r3_1);
  456|       |
  457|  4.42k|                    horz_add_4x32b_r1_2 =
  458|  4.42k|                        _mm_add_epi32(i4_horz_samp_4x32b_r0_2, i4_horz_samp_4x32b_r1_2);
  459|  4.42k|                    horz_add_4x32b_r2_2 =
  460|  4.42k|                        _mm_add_epi32(i4_horz_samp_4x32b_r1_2, i4_horz_samp_4x32b_r2_2);
  461|  4.42k|                    horz_add_4x32b_r3_2 =
  462|  4.42k|                        _mm_add_epi32(i4_horz_samp_4x32b_r2_2, i4_horz_samp_4x32b_r3_2);
  463|       |
  464|  4.42k|                    i4_res_samp_4x32b_r1_1 = _mm_add_epi32(
  465|  4.42k|                        _mm_slli_epi32(i4_horz_samp_4x32b_r0_1, 1), horz_add_4x32b_r1_1);
  466|  4.42k|                    i4_res_samp_4x32b_r2_1 = _mm_add_epi32(
  467|  4.42k|                        _mm_slli_epi32(i4_horz_samp_4x32b_r1_1, 1), horz_add_4x32b_r1_1);
  468|  4.42k|                    i4_res_samp_4x32b_r3_1 = _mm_add_epi32(
  469|  4.42k|                        _mm_slli_epi32(i4_horz_samp_4x32b_r1_1, 1), horz_add_4x32b_r2_1);
  470|  4.42k|                    i4_res_samp_4x32b_r4_1 = _mm_add_epi32(
  471|  4.42k|                        _mm_slli_epi32(i4_horz_samp_4x32b_r2_1, 1), horz_add_4x32b_r2_1);
  472|  4.42k|                    i4_res_samp_4x32b_r5_1 = _mm_add_epi32(
  473|  4.42k|                        _mm_slli_epi32(i4_horz_samp_4x32b_r2_1, 1), horz_add_4x32b_r3_1);
  474|  4.42k|                    i4_res_samp_4x32b_r6_1 = _mm_add_epi32(
  475|  4.42k|                        _mm_slli_epi32(i4_horz_samp_4x32b_r3_1, 1), horz_add_4x32b_r3_1);
  476|       |
  477|  4.42k|                    i4_res_samp_4x32b_r1_2 = _mm_add_epi32(
  478|  4.42k|                        _mm_slli_epi32(i4_horz_samp_4x32b_r0_2, 1), horz_add_4x32b_r1_2);
  479|  4.42k|                    i4_res_samp_4x32b_r2_2 = _mm_add_epi32(
  480|  4.42k|                        _mm_slli_epi32(i4_horz_samp_4x32b_r1_2, 1), horz_add_4x32b_r1_2);
  481|  4.42k|                    i4_res_samp_4x32b_r3_2 = _mm_add_epi32(
  482|  4.42k|                        _mm_slli_epi32(i4_horz_samp_4x32b_r1_2, 1), horz_add_4x32b_r2_2);
  483|  4.42k|                    i4_res_samp_4x32b_r4_2 = _mm_add_epi32(
  484|  4.42k|                        _mm_slli_epi32(i4_horz_samp_4x32b_r2_2, 1), horz_add_4x32b_r2_2);
  485|  4.42k|                    i4_res_samp_4x32b_r5_2 = _mm_add_epi32(
  486|  4.42k|                        _mm_slli_epi32(i4_horz_samp_4x32b_r2_2, 1), horz_add_4x32b_r3_2);
  487|  4.42k|                    i4_res_samp_4x32b_r6_2 = _mm_add_epi32(
  488|  4.42k|                        _mm_slli_epi32(i4_horz_samp_4x32b_r3_2, 1), horz_add_4x32b_r3_2);
  489|       |
  490|  4.42k|                    i4_res_samp_4x32b_r0_1 =
  491|  4.42k|                        _mm_srai_epi32(_mm_add_epi32(i4_horz_samp_4x32b_r0_1, twos), 2);
  492|  4.42k|                    i4_res_samp_4x32b_r1_1 =
  493|  4.42k|                        _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r1_1, eights), 4);
  494|  4.42k|                    i4_res_samp_4x32b_r2_1 =
  495|  4.42k|                        _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r2_1, eights), 4);
  496|  4.42k|                    i4_res_samp_4x32b_r3_1 =
  497|  4.42k|                        _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r3_1, eights), 4);
  498|  4.42k|                    i4_res_samp_4x32b_r4_1 =
  499|  4.42k|                        _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r4_1, eights), 4);
  500|  4.42k|                    i4_res_samp_4x32b_r5_1 =
  501|  4.42k|                        _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r5_1, eights), 4);
  502|  4.42k|                    i4_res_samp_4x32b_r6_1 =
  503|  4.42k|                        _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r6_1, eights), 4);
  504|  4.42k|                    i4_res_samp_4x32b_r7_1 =
  505|  4.42k|                        _mm_srai_epi32(_mm_add_epi32(i4_horz_samp_4x32b_r3_1, twos), 2);
  506|       |
  507|  4.42k|                    i4_res_samp_4x32b_r0_2 =
  508|  4.42k|                        _mm_srai_epi32(_mm_add_epi32(i4_horz_samp_4x32b_r0_2, twos), 2);
  509|  4.42k|                    i4_res_samp_4x32b_r1_2 =
  510|  4.42k|                        _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r1_2, eights), 4);
  511|  4.42k|                    i4_res_samp_4x32b_r2_2 =
  512|  4.42k|                        _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r2_2, eights), 4);
  513|  4.42k|                    i4_res_samp_4x32b_r3_2 =
  514|  4.42k|                        _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r3_2, eights), 4);
  515|  4.42k|                    i4_res_samp_4x32b_r4_2 =
  516|  4.42k|                        _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r4_2, eights), 4);
  517|  4.42k|                    i4_res_samp_4x32b_r5_2 =
  518|  4.42k|                        _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r5_2, eights), 4);
  519|  4.42k|                    i4_res_samp_4x32b_r6_2 =
  520|  4.42k|                        _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r6_2, eights), 4);
  521|  4.42k|                    i4_res_samp_4x32b_r7_2 =
  522|  4.42k|                        _mm_srai_epi32(_mm_add_epi32(i4_horz_samp_4x32b_r3_2, twos), 2);
  523|       |
  524|       |                    /* populate 2 samples based on current coeffs */
  525|  4.42k|                    _mm_storeu_si128(
  526|  4.42k|                        (__m128i *) pi2_out_res,
  527|  4.42k|                        _mm_packs_epi32(i4_res_samp_4x32b_r0_1, i4_res_samp_4x32b_r0_2));
  528|  4.42k|                    _mm_storeu_si128(
  529|  4.42k|                        (__m128i *) (pi2_out_res + i4_out_res_stride),
  530|  4.42k|                        _mm_packs_epi32(i4_res_samp_4x32b_r1_1, i4_res_samp_4x32b_r1_2));
  531|  4.42k|                    _mm_storeu_si128(
  532|  4.42k|                        (__m128i *) (pi2_out_res + (i4_out_res_stride << 1)),
  533|  4.42k|                        _mm_packs_epi32(i4_res_samp_4x32b_r2_1, i4_res_samp_4x32b_r2_2));
  534|  4.42k|                    _mm_storeu_si128(
  535|  4.42k|                        (__m128i *) (pi2_out_res + (i4_out_res_stride * 3)),
  536|  4.42k|                        _mm_packs_epi32(i4_res_samp_4x32b_r3_1, i4_res_samp_4x32b_r3_2));
  537|  4.42k|                    _mm_storeu_si128(
  538|  4.42k|                        (__m128i *) (pi2_out_res + (i4_out_res_stride << 2)),
  539|  4.42k|                        _mm_packs_epi32(i4_res_samp_4x32b_r4_1, i4_res_samp_4x32b_r4_2));
  540|  4.42k|                    _mm_storeu_si128(
  541|  4.42k|                        (__m128i *) (pi2_out_res + (i4_out_res_stride * 5)),
  542|  4.42k|                        _mm_packs_epi32(i4_res_samp_4x32b_r5_1, i4_res_samp_4x32b_r5_2));
  543|  4.42k|                    _mm_storeu_si128(
  544|  4.42k|                        (__m128i *) (pi2_out_res + (i4_out_res_stride * 6)),
  545|  4.42k|                        _mm_packs_epi32(i4_res_samp_4x32b_r6_1, i4_res_samp_4x32b_r6_2));
  546|  4.42k|                    _mm_storeu_si128(
  547|  4.42k|                        (__m128i *) (pi2_out_res + (i4_out_res_stride * 7)),
  548|  4.42k|                        _mm_packs_epi32(i4_res_samp_4x32b_r7_1, i4_res_samp_4x32b_r7_2));
  549|       |
  550|  4.42k|                    pi2_out_res += BLOCK_WIDTH;
  ------------------
  |  |   64|  4.42k|#define BLOCK_WIDTH 8
  ------------------
  551|  4.42k|                }
  552|  4.42k|            }
  553|  79.4k|            else
  554|  79.4k|            {
  555|  79.4k|                pi2_out_res += BLOCK_WIDTH;
  ------------------
  |  |   64|  79.4k|#define BLOCK_WIDTH 8
  ------------------
  556|  79.4k|            }
  557|       |
  558|       |            /* Block level loop updates */
  559|  83.8k|            if(1 == i4_blk_ctr)
  ------------------
  |  Branch (559:16): [True: 20.9k, False: 62.8k]
  ------------------
  560|  20.9k|            {
  561|  20.9k|                pi2_inp_data -= SUB_BLOCK_WIDTH;
  ------------------
  |  |   61|  20.9k|#define SUB_BLOCK_WIDTH 4
  ------------------
  562|  20.9k|                pi2_inp_data += (i4_inp_data_stride * SUB_BLOCK_HEIGHT);
  ------------------
  |  |   62|  20.9k|#define SUB_BLOCK_HEIGHT 4
  ------------------
  563|  20.9k|                pi2_out_res -= MB_WIDTH;
  ------------------
  |  |   67|  20.9k|#define MB_WIDTH 16
  ------------------
  564|  20.9k|                pi2_out_res += (i4_out_res_stride * BLOCK_HEIGHT);
  ------------------
  |  |   65|  20.9k|#define BLOCK_HEIGHT 8
  ------------------
  565|  20.9k|                i4_ref_nnz >>= 2;
  566|  20.9k|            }
  567|  62.8k|            else
  568|  62.8k|            {
  569|  62.8k|                pi2_inp_data += SUB_BLOCK_WIDTH;
  ------------------
  |  |   61|  62.8k|#define SUB_BLOCK_WIDTH 4
  ------------------
  570|  62.8k|            }
  571|       |
  572|  83.8k|            i4_ref_nnz >>= 1;
  573|  83.8k|        } /* end of loop over all the blocks */
  574|  20.9k|    }
  575|  21.8k|    return;
  576|  21.8k|}
isvcd_interpolate_residual_sse42:
  603|   145k|{
  604|   145k|    residual_sampling_ctxt_t *ps_ctxt;
  605|   145k|    residual_samp_map_ctxt_t *ps_map_ctxt;
  606|   145k|    res_lyr_ctxt *ps_lyr_ctxt;
  607|   145k|    ref_pixel_map_t *ps_x_pos_phase;
  608|   145k|    ref_pixel_map_t *ps_y_pos_phase;
  609|       |
  610|   145k|    WORD32 i4_x, i4_y;
  611|   145k|    WORD32 i4_frm_mb_x, i4_frm_mb_y;
  612|   145k|    WORD32 i4_temp_array_ht;
  613|   145k|    WORD32 i4_mb_wd;
  614|   145k|    WORD32 i4_mb_ht;
  615|   145k|    WORD16 *pi2_ref_array;
  616|   145k|    UWORD8 *pu1_ref_x_ptr_incr, *pu1_ref_y_ptr_incr;
  617|       |
  618|   145k|    WORD8 arr_y_ref_pos[16] = {0};
  619|   145k|    WORD8 arr_x_ref_pos[16] = {0};
  620|   145k|    WORD8 arr_x_phase[32] = {0};
  621|   145k|    WORD8 arr_y_phase[32] = {0};
  622|   145k|    WORD8 *pi1_y_ref_pos;
  623|   145k|    WORD8 *pi1_x_ref_pos;
  624|   145k|    WORD8 *pi1_y_phase;
  625|   145k|    WORD8 *pi1_x_phase;
  626|       |
  627|   145k|    ps_ctxt = (residual_sampling_ctxt_t *) pv_residual_samp_ctxt;
  628|   145k|    ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id];
  629|   145k|    pi2_ref_array = ps_ctxt->pi2_refarray_buffer;
  630|   145k|    pu1_ref_x_ptr_incr = ps_ctxt->pu1_ref_x_ptr_incr;
  631|   145k|    pu1_ref_y_ptr_incr = ps_ctxt->pu1_ref_y_ptr_incr;
  632|       |
  633|       |    /* --------------------------------------------------------------------- */
  634|       |    /* Extracting information from the mapping context                       */
  635|       |    /* --------------------------------------------------------------------- */
  636|   145k|    if(1 == i4_chroma_flag)
  ------------------
  |  Branch (636:8): [True: 96.9k, False: 48.4k]
  ------------------
  637|  96.9k|        ps_map_ctxt = &ps_lyr_ctxt->s_chroma_map_ctxt;
  638|  48.4k|    else
  639|  48.4k|        ps_map_ctxt = &ps_lyr_ctxt->s_luma_map_ctxt;
  640|       |
  641|   145k|    i4_mb_wd = MB_WIDTH >> i4_chroma_flag;
  ------------------
  |  |   67|   145k|#define MB_WIDTH 16
  ------------------
  642|   145k|    i4_mb_ht = MB_HEIGHT >> i4_chroma_flag;
  ------------------
  |  |   68|   145k|#define MB_HEIGHT 16
  ------------------
  643|       |
  644|   145k|    ps_x_pos_phase = ps_map_ctxt->ps_x_pos_phase;
  645|   145k|    ps_y_pos_phase = ps_map_ctxt->ps_y_pos_phase;
  646|       |
  647|   145k|    i4_temp_array_ht = i4_mb_ht;
  648|   145k|    i4_frm_mb_y = u2_mb_y * i4_mb_ht;
  649|   145k|    i4_frm_mb_x = u2_mb_x * i4_mb_wd;
  650|       |
  651|       |    /* --------------------------------------------------------------------- */
  652|       |    /* Loop for interpolation                                                */
  653|       |    /* --------------------------------------------------------------------- */
  654|       |
  655|   145k|    if(i4_chroma_flag == 0)
  ------------------
  |  Branch (655:8): [True: 48.4k, False: 96.9k]
  ------------------
  656|  48.4k|    {
  657|  48.4k|        __m128i const_16_8x16b, const_128, const_ones, const_ones_8x16b, mid_indx_16x8b;
  658|  48.4k|        __m128i ref_arr_8x16b_r0_0;
  659|  48.4k|        __m128i ref_arr_8x16b_r1_0;
  660|  48.4k|        __m128i phs_mask_8x16b_0, phs_mask_16min_8x16b_0, phs_mask_16x8b_0;
  661|  48.4k|        __m128i x_ref_pos_mask_r0, x_ref_rnd_mask_r0_0;
  662|  48.4k|        __m128i x_ref_pos_mask_temp_r0_0;
  663|  48.4k|        __m128i x_ref_pos_mask_temp_r1_0;
  664|  48.4k|        __m128i phs_mask_div8_8x16b_0;
  665|  48.4k|        __m128i u1_incr_8x16b_r0_0, ref_arr_temp0_8x16b_r0_0, res0_8x16b_r0_0,
  666|  48.4k|            u1_incr_not_8x16b_r0_0;
  667|  48.4k|        __m128i u1_incr_8x16b_r1_0, ref_arr_temp1_8x16b_r0_0, res1_8x16b_r0_0;
  668|       |
  669|  48.4k|        __m128i u1_incr_not_8x16b_r0_even, u1_incr_not_8x16b_r1_even, x_ref_pos_mask_temp_r0_even,
  670|  48.4k|            x_ref_pos_mask_temp_r1_even;
  671|  48.4k|        __m128i u1_incr_not_8x16b_r0_odd, u1_incr_not_8x16b_r1_odd, x_ref_pos_mask_temp_r0_odd,
  672|  48.4k|            x_ref_pos_mask_temp_r1_odd;
  673|       |
  674|  48.4k|        __m128i ref_arr_temp0_8x16b_r1_0, res_8x16b_r0_0, res0_8x16b_r1_0, u1_incr_not_8x16b_r1_0;
  675|  48.4k|        __m128i ref_arr_temp1_8x16b_r1_0, res_8x16b_r1_0, res1_8x16b_r1_0;
  676|  48.4k|        __m128i u1_y_incr_8x16b_r0_0, u1_y_incr_8x16b_r0_1, u1_y_incr_8x16b_r0_low,
  677|  48.4k|            u1_y_incr_8x16b_r0_high;
  678|       |
  679|  48.4k|        __m128i prev_res_8x16b_r0_0;
  680|  48.4k|        __m128i prev_res_8x16b_r1_0;
  681|  48.4k|        __m128i prev_res_8x16b_r0_1;
  682|  48.4k|        __m128i prev_res_8x16b_r1_1;
  683|       |
  684|  48.4k|        __m128i u1_prev_y_incr_8x16b_r0_0;
  685|  48.4k|        __m128i u1_prev_y_incr_8x16b_r0_1;
  686|       |
  687|  48.4k|        __m128i ref_arr_8x16b_r0_1;
  688|  48.4k|        __m128i ref_arr_8x16b_r1_1;
  689|  48.4k|        __m128i phs_mask_8x16b_1, phs_mask_div8_8x16b_1, phs_mask_16min_8x16b_1;
  690|  48.4k|        __m128i x_ref_pos_mask_temp_r0_1;
  691|  48.4k|        __m128i x_ref_pos_mask_temp_r1_1;
  692|  48.4k|        __m128i ref_arr_temp0_8x16b_r0_1, res0_8x16b_r0_1, u1_incr_not_8x16b_r0_1;
  693|  48.4k|        __m128i ref_arr_temp1_8x16b_r0_1, res1_8x16b_r0_1;
  694|       |
  695|  48.4k|        __m128i ref_arr_temp0_8x16b_r1_1, res_8x16b_r0_1, res0_8x16b_r1_1, u1_incr_not_8x16b_r1_1;
  696|  48.4k|        __m128i ref_arr_temp1_8x16b_r1_1, res_8x16b_r1_1, res1_8x16b_r1_1;
  697|       |
  698|  48.4k|        __m128i vert_res0_8x16b_r0_0, vert_res0_8x16b_r0_1, res_4x32b_l_0, res_4x32b_h_0;
  699|  48.4k|        __m128i vert_res1_8x16b_r0_0, vert_res1_8x16b_r0_1, res_4x32b_l_1, res_4x32b_h_1;
  700|  48.4k|        __m128i res_8x16b_l, res_8x16b_h;
  701|  48.4k|        __m128i phs_y_mask_16min_8x16b, phs_y_mask_8x16b, phs_y_mask_mix_8x16b;
  702|  48.4k|        __m128i zero_8x16b;
  703|  48.4k|        WORD32 zero_r0_0, zero_r1_0, zero_r0_1, zero_r1_1, zero_r0_r1 = 0;
  704|  48.4k|        WORD32 strt_indx_h;
  705|  48.4k|        WORD16 *pi2_ref_array_temp;
  706|  48.4k|        UWORD8 *pu1_ref_x_ptr_incr_temp, *pu1_ref_y_ptr_incr_temp;
  707|  48.4k|        WORD32 i4_y_phase;
  708|  48.4k|        WORD32 out_stride_temp;
  709|  48.4k|        const_128 = _mm_set1_epi32(128);
  710|  48.4k|        zero_8x16b = _mm_set1_epi16(0);
  711|  48.4k|        const_ones = _mm_set1_epi8(1);
  712|  48.4k|        const_ones_8x16b = _mm_set1_epi16(1);
  713|       |
  714|   824k|        for(i4_y = 0; i4_y < (i4_temp_array_ht); i4_y++)
  ------------------
  |  Branch (714:23): [True: 775k, False: 48.4k]
  ------------------
  715|   775k|        {
  716|   775k|            arr_y_phase[i4_y] = (WORD8) ps_y_pos_phase[i4_y + i4_frm_mb_y].i2_phase;
  717|   775k|            arr_y_ref_pos[i4_y] = (WORD8) (ps_y_pos_phase[i4_y + i4_frm_mb_y].i2_ref_pos);
  718|   775k|        }
  719|  48.4k|        pi1_y_ref_pos = arr_y_ref_pos;
  720|  48.4k|        pi1_y_phase = arr_y_phase;
  721|       |
  722|  48.4k|        strt_indx_h = 0;
  723|  48.4k|        strt_indx_h = (ps_x_pos_phase[8 + i4_frm_mb_x].i2_ref_pos);
  724|   824k|        for(i4_x = 0; i4_x < i4_mb_wd; i4_x++)
  ------------------
  |  Branch (724:23): [True: 775k, False: 48.4k]
  ------------------
  725|   775k|        {
  726|   775k|            arr_x_ref_pos[i4_x] = (WORD8) ps_x_pos_phase[i4_x + i4_frm_mb_x].i2_ref_pos;
  727|   775k|            arr_x_phase[i4_x] = (WORD8) ps_x_pos_phase[i4_x + i4_frm_mb_x].i2_phase;
  728|   775k|        }
  729|       |
  730|  48.4k|        pi1_x_ref_pos = arr_x_ref_pos;
  731|  48.4k|        pi1_x_phase = arr_x_phase;
  732|       |
  733|  48.4k|        x_ref_pos_mask_r0 = _mm_loadu_si128((__m128i *) (pi1_x_ref_pos));
  734|  48.4k|        phs_mask_16x8b_0 = _mm_loadu_si128((__m128i *) (pi1_x_phase));
  735|  48.4k|        phs_mask_8x16b_0 = _mm_cvtepi8_epi16(phs_mask_16x8b_0);
  736|  48.4k|        phs_mask_8x16b_1 = _mm_cvtepi8_epi16(_mm_loadu_si128((__m128i *) (pi1_x_phase + 8)));
  737|       |
  738|  48.4k|        phs_mask_div8_8x16b_0 = _mm_srli_epi16(phs_mask_8x16b_0, 3);
  739|  48.4k|        phs_mask_div8_8x16b_1 = _mm_srli_epi16(phs_mask_8x16b_1, 3);
  740|  48.4k|        phs_mask_div8_8x16b_0 = _mm_packs_epi16(phs_mask_div8_8x16b_0, phs_mask_div8_8x16b_1);
  741|  48.4k|        const_16_8x16b = _mm_set1_epi16(16);
  742|       |
  743|  48.4k|        phs_mask_16min_8x16b_0 = _mm_sub_epi16(const_16_8x16b, phs_mask_8x16b_0);
  744|  48.4k|        phs_mask_16min_8x16b_1 = _mm_sub_epi16(const_16_8x16b, phs_mask_8x16b_1);
  745|       |
  746|  48.4k|        x_ref_rnd_mask_r0_0 = _mm_add_epi8(x_ref_pos_mask_r0, phs_mask_div8_8x16b_0);
  747|  48.4k|        mid_indx_16x8b = _mm_set1_epi8((strt_indx_h << 1));
  748|   824k|        for(i4_y = 0; i4_y < (i4_temp_array_ht); i4_y++)
  ------------------
  |  Branch (748:23): [True: 775k, False: 48.4k]
  ------------------
  749|   775k|        {
  750|   775k|            if((i4_y > 0) && (pi1_y_ref_pos[i4_y] == pi1_y_ref_pos[i4_y - 1]))
  ------------------
  |  Branch (750:16): [True: 727k, False: 48.4k]
  |  Branch (750:30): [True: 242k, False: 484k]
  ------------------
  751|   242k|            {
  752|   242k|                if(zero_r0_r1)
  ------------------
  |  Branch (752:20): [True: 238k, False: 4.37k]
  ------------------
  753|   238k|                {
  754|   238k|                    res_8x16b_l = _mm_set1_epi16(0);
  755|   238k|                    res_8x16b_h = _mm_set1_epi16(0);
  756|   238k|                    out_stride_temp = (i4_y * i4_out_stride);
  757|   238k|                    _mm_storeu_si128((__m128i *) (pi2_out + out_stride_temp), res_8x16b_l);
  758|   238k|                    _mm_storeu_si128((__m128i *) (pi2_out + out_stride_temp + 8), res_8x16b_h);
  759|   238k|                    continue;
  760|   238k|                }
  761|       |
  762|  4.37k|                res_8x16b_r0_0 = prev_res_8x16b_r0_0;
  763|  4.37k|                res_8x16b_r1_0 = prev_res_8x16b_r1_0;
  764|  4.37k|                res_8x16b_r0_1 = prev_res_8x16b_r0_1;
  765|  4.37k|                res_8x16b_r1_1 = prev_res_8x16b_r1_1;
  766|       |
  767|  4.37k|                u1_y_incr_8x16b_r0_0 = u1_prev_y_incr_8x16b_r0_0;
  768|  4.37k|                u1_y_incr_8x16b_r0_1 = u1_prev_y_incr_8x16b_r0_1;
  769|  4.37k|            }
  770|   533k|            else
  771|   533k|            {
  772|   533k|                pi2_ref_array_temp = pi2_ref_array + ((pi1_y_ref_pos[i4_y]) * i4_refarray_wd);
  773|   533k|                pu1_ref_x_ptr_incr_temp =
  774|   533k|                    pu1_ref_x_ptr_incr + ((pi1_y_ref_pos[i4_y]) * i4_refarray_wd);
  775|   533k|                ref_arr_8x16b_r0_0 = _mm_loadu_si128((__m128i *) (pi2_ref_array_temp));
  776|   533k|                ref_arr_8x16b_r1_0 =
  777|   533k|                    _mm_loadu_si128((__m128i *) (pi2_ref_array_temp + i4_refarray_wd));
  778|   533k|                ref_arr_8x16b_r0_1 =
  779|   533k|                    _mm_loadu_si128((__m128i *) (pi2_ref_array_temp + strt_indx_h));
  780|   533k|                ref_arr_8x16b_r1_1 = _mm_loadu_si128(
  781|   533k|                    (__m128i *) (pi2_ref_array_temp + i4_refarray_wd + strt_indx_h));
  782|       |
  783|   533k|                zero_r0_0 = _mm_test_all_ones(_mm_cmpeq_epi16(
  784|   533k|                    ref_arr_8x16b_r0_0, zero_8x16b));  // return 1 if all zeros, else 0
  785|   533k|                zero_r1_0 = _mm_test_all_ones(_mm_cmpeq_epi16(ref_arr_8x16b_r1_0, zero_8x16b));
  786|   533k|                zero_r0_1 = _mm_test_all_ones(_mm_cmpeq_epi16(ref_arr_8x16b_r0_1, zero_8x16b));
  787|   533k|                zero_r1_1 = _mm_test_all_ones(_mm_cmpeq_epi16(ref_arr_8x16b_r1_1, zero_8x16b));
  788|       |
  789|   533k|                zero_r0_r1 = zero_r0_0 && zero_r1_0 && zero_r0_1 && zero_r1_1;
  ------------------
  |  Branch (789:30): [True: 526k, False: 6.60k]
  |  Branch (789:43): [True: 525k, False: 1.26k]
  |  Branch (789:56): [True: 524k, False: 504]
  |  Branch (789:69): [True: 524k, False: 706]
  ------------------
  790|       |
  791|   533k|                if(!zero_r0_r1)
  ------------------
  |  Branch (791:20): [True: 9.07k, False: 524k]
  ------------------
  792|  9.07k|                {
  793|  9.07k|                    u1_incr_8x16b_r0_0 = _mm_loadu_si128((__m128i *) (pu1_ref_x_ptr_incr_temp));
  794|  9.07k|                    u1_incr_8x16b_r1_0 =
  795|  9.07k|                        _mm_loadu_si128((__m128i *) (pu1_ref_x_ptr_incr_temp + i4_refarray_wd));
  796|       |
  797|  9.07k|                    u1_incr_8x16b_r0_0 = _mm_shuffle_epi8(u1_incr_8x16b_r0_0, x_ref_pos_mask_r0);
  798|  9.07k|                    u1_incr_8x16b_r1_0 = _mm_shuffle_epi8(u1_incr_8x16b_r1_0, x_ref_pos_mask_r0);
  799|       |
  800|  9.07k|                    u1_incr_not_8x16b_r0_0 =
  801|  9.07k|                        _mm_andnot_si128(u1_incr_8x16b_r0_0, phs_mask_div8_8x16b_0);
  802|  9.07k|                    u1_incr_not_8x16b_r1_0 =
  803|  9.07k|                        _mm_andnot_si128(u1_incr_8x16b_r1_0, phs_mask_div8_8x16b_0);
  804|       |
  805|  9.07k|                    u1_incr_not_8x16b_r0_0 =
  806|  9.07k|                        _mm_add_epi8(u1_incr_not_8x16b_r0_0, x_ref_pos_mask_r0);
  807|  9.07k|                    u1_incr_not_8x16b_r1_0 =
  808|  9.07k|                        _mm_add_epi8(u1_incr_not_8x16b_r1_0, x_ref_pos_mask_r0);
  809|       |
  810|  9.07k|                    x_ref_pos_mask_temp_r0_0 =
  811|  9.07k|                        _mm_add_epi8(u1_incr_not_8x16b_r0_0, u1_incr_8x16b_r0_0);
  812|  9.07k|                    x_ref_pos_mask_temp_r1_0 =
  813|  9.07k|                        _mm_add_epi8(u1_incr_not_8x16b_r1_0, u1_incr_8x16b_r1_0);
  814|       |
  815|       |                    /* _mm_slli_epi8(u1_incr_not_8x16b_r0_0, 1)*/
  816|  9.07k|                    u1_incr_not_8x16b_r0_even =
  817|  9.07k|                        _mm_add_epi8(u1_incr_not_8x16b_r0_0, u1_incr_not_8x16b_r0_0);
  818|  9.07k|                    u1_incr_not_8x16b_r1_even =
  819|  9.07k|                        _mm_add_epi8(u1_incr_not_8x16b_r1_0, u1_incr_not_8x16b_r1_0);
  820|  9.07k|                    x_ref_pos_mask_temp_r0_even =
  821|  9.07k|                        _mm_add_epi8(x_ref_pos_mask_temp_r0_0, x_ref_pos_mask_temp_r0_0);
  822|  9.07k|                    x_ref_pos_mask_temp_r1_even =
  823|  9.07k|                        _mm_add_epi8(x_ref_pos_mask_temp_r1_0, x_ref_pos_mask_temp_r1_0);
  824|       |
  825|  9.07k|                    u1_incr_not_8x16b_r0_odd = _mm_add_epi8(u1_incr_not_8x16b_r0_even, const_ones);
  826|  9.07k|                    u1_incr_not_8x16b_r1_odd = _mm_add_epi8(u1_incr_not_8x16b_r1_even, const_ones);
  827|  9.07k|                    x_ref_pos_mask_temp_r0_odd =
  828|  9.07k|                        _mm_add_epi8(x_ref_pos_mask_temp_r0_even, const_ones);
  829|  9.07k|                    x_ref_pos_mask_temp_r1_odd =
  830|  9.07k|                        _mm_add_epi8(x_ref_pos_mask_temp_r1_even, const_ones);
  831|       |
  832|  9.07k|                    u1_incr_not_8x16b_r0_0 =
  833|  9.07k|                        _mm_unpacklo_epi8(u1_incr_not_8x16b_r0_even, u1_incr_not_8x16b_r0_odd);
  834|  9.07k|                    u1_incr_not_8x16b_r1_0 =
  835|  9.07k|                        _mm_unpacklo_epi8(u1_incr_not_8x16b_r1_even, u1_incr_not_8x16b_r1_odd);
  836|  9.07k|                    x_ref_pos_mask_temp_r0_0 =
  837|  9.07k|                        _mm_unpacklo_epi8(x_ref_pos_mask_temp_r0_even, x_ref_pos_mask_temp_r0_odd);
  838|  9.07k|                    x_ref_pos_mask_temp_r1_0 =
  839|  9.07k|                        _mm_unpacklo_epi8(x_ref_pos_mask_temp_r1_even, x_ref_pos_mask_temp_r1_odd);
  840|       |
  841|  9.07k|                    u1_incr_not_8x16b_r0_1 =
  842|  9.07k|                        _mm_unpackhi_epi8(u1_incr_not_8x16b_r0_even, u1_incr_not_8x16b_r0_odd);
  843|  9.07k|                    u1_incr_not_8x16b_r1_1 =
  844|  9.07k|                        _mm_unpackhi_epi8(u1_incr_not_8x16b_r1_even, u1_incr_not_8x16b_r1_odd);
  845|  9.07k|                    x_ref_pos_mask_temp_r0_1 =
  846|  9.07k|                        _mm_unpackhi_epi8(x_ref_pos_mask_temp_r0_even, x_ref_pos_mask_temp_r0_odd);
  847|  9.07k|                    x_ref_pos_mask_temp_r1_1 =
  848|  9.07k|                        _mm_unpackhi_epi8(x_ref_pos_mask_temp_r1_even, x_ref_pos_mask_temp_r1_odd);
  849|       |
  850|  9.07k|                    u1_incr_not_8x16b_r0_1 = _mm_sub_epi8(u1_incr_not_8x16b_r0_1, mid_indx_16x8b);
  851|  9.07k|                    u1_incr_not_8x16b_r1_1 = _mm_sub_epi8(u1_incr_not_8x16b_r1_1, mid_indx_16x8b);
  852|  9.07k|                    x_ref_pos_mask_temp_r0_1 =
  853|  9.07k|                        _mm_sub_epi8(x_ref_pos_mask_temp_r0_1, mid_indx_16x8b);
  854|  9.07k|                    x_ref_pos_mask_temp_r1_1 =
  855|  9.07k|                        _mm_sub_epi8(x_ref_pos_mask_temp_r1_1, mid_indx_16x8b);
  856|       |
  857|  9.07k|                    ref_arr_temp0_8x16b_r0_0 =
  858|  9.07k|                        _mm_shuffle_epi8(ref_arr_8x16b_r0_0, u1_incr_not_8x16b_r0_0);
  859|  9.07k|                    ref_arr_temp0_8x16b_r1_0 =
  860|  9.07k|                        _mm_shuffle_epi8(ref_arr_8x16b_r1_0, u1_incr_not_8x16b_r1_0);
  861|  9.07k|                    ref_arr_temp1_8x16b_r0_0 =
  862|  9.07k|                        _mm_shuffle_epi8(ref_arr_8x16b_r0_0, x_ref_pos_mask_temp_r0_0);
  863|  9.07k|                    ref_arr_temp1_8x16b_r1_0 =
  864|  9.07k|                        _mm_shuffle_epi8(ref_arr_8x16b_r1_0, x_ref_pos_mask_temp_r1_0);
  865|  9.07k|                    ref_arr_temp0_8x16b_r0_1 =
  866|  9.07k|                        _mm_shuffle_epi8(ref_arr_8x16b_r0_1, u1_incr_not_8x16b_r0_1);
  867|  9.07k|                    ref_arr_temp0_8x16b_r1_1 =
  868|  9.07k|                        _mm_shuffle_epi8(ref_arr_8x16b_r1_1, u1_incr_not_8x16b_r1_1);
  869|  9.07k|                    ref_arr_temp1_8x16b_r0_1 =
  870|  9.07k|                        _mm_shuffle_epi8(ref_arr_8x16b_r0_1, x_ref_pos_mask_temp_r0_1);
  871|  9.07k|                    ref_arr_temp1_8x16b_r1_1 =
  872|  9.07k|                        _mm_shuffle_epi8(ref_arr_8x16b_r1_1, x_ref_pos_mask_temp_r1_1);
  873|       |
  874|  9.07k|                    res0_8x16b_r0_0 =
  875|  9.07k|                        _mm_mullo_epi16(ref_arr_temp0_8x16b_r0_0, phs_mask_16min_8x16b_0);
  876|  9.07k|                    res0_8x16b_r1_0 =
  877|  9.07k|                        _mm_mullo_epi16(ref_arr_temp0_8x16b_r1_0, phs_mask_16min_8x16b_0);
  878|  9.07k|                    res1_8x16b_r0_0 = _mm_mullo_epi16(ref_arr_temp1_8x16b_r0_0, phs_mask_8x16b_0);
  879|  9.07k|                    res1_8x16b_r1_0 = _mm_mullo_epi16(ref_arr_temp1_8x16b_r1_0, phs_mask_8x16b_0);
  880|  9.07k|                    res0_8x16b_r0_1 =
  881|  9.07k|                        _mm_mullo_epi16(ref_arr_temp0_8x16b_r0_1, phs_mask_16min_8x16b_1);
  882|  9.07k|                    res0_8x16b_r1_1 =
  883|  9.07k|                        _mm_mullo_epi16(ref_arr_temp0_8x16b_r1_1, phs_mask_16min_8x16b_1);
  884|  9.07k|                    res1_8x16b_r0_1 = _mm_mullo_epi16(ref_arr_temp1_8x16b_r0_1, phs_mask_8x16b_1);
  885|  9.07k|                    res1_8x16b_r1_1 = _mm_mullo_epi16(ref_arr_temp1_8x16b_r1_1, phs_mask_8x16b_1);
  886|       |
  887|  9.07k|                    res_8x16b_r0_0 = _mm_add_epi16(res0_8x16b_r0_0, res1_8x16b_r0_0);
  888|  9.07k|                    res_8x16b_r1_0 = _mm_add_epi16(res0_8x16b_r1_0, res1_8x16b_r1_0);
  889|  9.07k|                    res_8x16b_r0_1 = _mm_add_epi16(res0_8x16b_r0_1, res1_8x16b_r0_1);
  890|  9.07k|                    res_8x16b_r1_1 = _mm_add_epi16(res0_8x16b_r1_1, res1_8x16b_r1_1);
  891|       |
  892|  9.07k|                    prev_res_8x16b_r0_0 = res_8x16b_r0_0;
  893|  9.07k|                    prev_res_8x16b_r1_0 = res_8x16b_r1_0;
  894|  9.07k|                    prev_res_8x16b_r0_1 = res_8x16b_r0_1;
  895|  9.07k|                    prev_res_8x16b_r1_1 = res_8x16b_r1_1;
  896|       |
  897|  9.07k|                    pu1_ref_y_ptr_incr_temp =
  898|  9.07k|                        pu1_ref_y_ptr_incr + (pi1_y_ref_pos[i4_y] * i4_refarray_wd);
  899|  9.07k|                    u1_y_incr_8x16b_r0_0 = _mm_loadu_si128((__m128i *) (pu1_ref_y_ptr_incr_temp));
  900|       |
  901|  9.07k|                    u1_y_incr_8x16b_r0_0 =
  902|  9.07k|                        _mm_shuffle_epi8(u1_y_incr_8x16b_r0_0, x_ref_rnd_mask_r0_0);
  903|       |
  904|  9.07k|                    u1_y_incr_8x16b_r0_low = _mm_cvtepi8_epi16(u1_y_incr_8x16b_r0_0);
  905|  9.07k|                    u1_y_incr_8x16b_r0_high =
  906|  9.07k|                        _mm_cvtepi8_epi16(_mm_unpackhi_epi64(u1_y_incr_8x16b_r0_0, const_ones));
  907|       |
  908|  9.07k|                    u1_y_incr_8x16b_r0_0 =
  909|  9.07k|                        _mm_cmpeq_epi16(u1_y_incr_8x16b_r0_low, const_ones_8x16b);
  910|  9.07k|                    u1_y_incr_8x16b_r0_1 =
  911|  9.07k|                        _mm_cmpeq_epi16(u1_y_incr_8x16b_r0_high, const_ones_8x16b);
  912|       |
  913|  9.07k|                    u1_prev_y_incr_8x16b_r0_0 = u1_y_incr_8x16b_r0_0;
  914|  9.07k|                    u1_prev_y_incr_8x16b_r0_1 = u1_y_incr_8x16b_r0_1;
  915|  9.07k|                }
  916|   533k|            }
  917|       |
  918|   537k|            if(zero_r0_r1)
  ------------------
  |  Branch (918:16): [True: 524k, False: 13.4k]
  ------------------
  919|   524k|            {
  920|   524k|                res_8x16b_l = _mm_set1_epi16(0);
  921|   524k|                res_8x16b_h = _mm_set1_epi16(0);
  922|   524k|            }
  923|  13.4k|            else
  924|  13.4k|            {
  925|  13.4k|                i4_y_phase = pi1_y_phase[i4_y];
  926|       |
  927|  13.4k|                if((i4_y_phase) >> 3)
  ------------------
  |  Branch (927:20): [True: 8.91k, False: 4.54k]
  ------------------
  928|  8.91k|                {
  929|  8.91k|                    vert_res0_8x16b_r0_0 =
  930|  8.91k|                        _mm_blendv_epi8(res_8x16b_r1_0, res_8x16b_r0_0, u1_y_incr_8x16b_r0_0);
  931|  8.91k|                    vert_res1_8x16b_r0_0 =
  932|  8.91k|                        _mm_blendv_epi8(res_8x16b_r1_0, res_8x16b_r1_0, u1_y_incr_8x16b_r0_0);
  933|  8.91k|                    vert_res0_8x16b_r0_1 =
  934|  8.91k|                        _mm_blendv_epi8(res_8x16b_r1_1, res_8x16b_r0_1, u1_y_incr_8x16b_r0_1);
  935|  8.91k|                    vert_res1_8x16b_r0_1 =
  936|  8.91k|                        _mm_blendv_epi8(res_8x16b_r1_1, res_8x16b_r1_1, u1_y_incr_8x16b_r0_1);
  937|  8.91k|                }
  938|  4.54k|                else
  939|  4.54k|                {
  940|  4.54k|                    vert_res0_8x16b_r0_0 =
  941|  4.54k|                        _mm_blendv_epi8(res_8x16b_r0_0, res_8x16b_r0_0, u1_y_incr_8x16b_r0_0);
  942|  4.54k|                    vert_res1_8x16b_r0_0 =
  943|  4.54k|                        _mm_blendv_epi8(res_8x16b_r0_0, res_8x16b_r1_0, u1_y_incr_8x16b_r0_0);
  944|  4.54k|                    vert_res0_8x16b_r0_1 =
  945|  4.54k|                        _mm_blendv_epi8(res_8x16b_r0_1, res_8x16b_r0_1, u1_y_incr_8x16b_r0_1);
  946|  4.54k|                    vert_res1_8x16b_r0_1 =
  947|  4.54k|                        _mm_blendv_epi8(res_8x16b_r0_1, res_8x16b_r1_1, u1_y_incr_8x16b_r0_1);
  948|  4.54k|                }
  949|  13.4k|                res0_8x16b_r0_0 = _mm_unpacklo_epi16(vert_res0_8x16b_r0_0, vert_res1_8x16b_r0_0);
  950|  13.4k|                res1_8x16b_r0_0 = _mm_unpackhi_epi16(vert_res0_8x16b_r0_0, vert_res1_8x16b_r0_0);
  951|  13.4k|                res0_8x16b_r0_1 = _mm_unpacklo_epi16(vert_res0_8x16b_r0_1, vert_res1_8x16b_r0_1);
  952|  13.4k|                res1_8x16b_r0_1 = _mm_unpackhi_epi16(vert_res0_8x16b_r0_1, vert_res1_8x16b_r0_1);
  953|       |
  954|  13.4k|                phs_y_mask_16min_8x16b = _mm_set1_epi16(16 - i4_y_phase);
  955|  13.4k|                phs_y_mask_8x16b = _mm_set1_epi16(i4_y_phase);
  956|  13.4k|                phs_y_mask_mix_8x16b = _mm_unpacklo_epi16(phs_y_mask_16min_8x16b, phs_y_mask_8x16b);
  957|       |
  958|  13.4k|                res_4x32b_l_0 = _mm_madd_epi16(res0_8x16b_r0_0, phs_y_mask_mix_8x16b);
  959|  13.4k|                res_4x32b_l_1 = _mm_madd_epi16(res1_8x16b_r0_0, phs_y_mask_mix_8x16b);
  960|  13.4k|                res_4x32b_h_0 = _mm_madd_epi16(res0_8x16b_r0_1, phs_y_mask_mix_8x16b);
  961|  13.4k|                res_4x32b_h_1 = _mm_madd_epi16(res1_8x16b_r0_1, phs_y_mask_mix_8x16b);
  962|       |
  963|  13.4k|                res_4x32b_l_0 = _mm_add_epi32(res_4x32b_l_0, const_128);
  964|  13.4k|                res_4x32b_l_1 = _mm_add_epi32(res_4x32b_l_1, const_128);
  965|  13.4k|                res_4x32b_h_0 = _mm_add_epi32(res_4x32b_h_0, const_128);
  966|  13.4k|                res_4x32b_h_1 = _mm_add_epi32(res_4x32b_h_1, const_128);
  967|       |
  968|  13.4k|                res_4x32b_l_0 = _mm_srai_epi32(res_4x32b_l_0, 8);
  969|  13.4k|                res_4x32b_l_1 = _mm_srai_epi32(res_4x32b_l_1, 8);
  970|  13.4k|                res_4x32b_h_0 = _mm_srai_epi32(res_4x32b_h_0, 8);
  971|  13.4k|                res_4x32b_h_1 = _mm_srai_epi32(res_4x32b_h_1, 8);
  972|  13.4k|                res_8x16b_l = _mm_packs_epi32(res_4x32b_l_0, res_4x32b_l_1);
  973|  13.4k|                res_8x16b_h = _mm_packs_epi32(res_4x32b_h_0, res_4x32b_h_1);
  974|  13.4k|            }
  975|       |
  976|   537k|            out_stride_temp = (i4_y * i4_out_stride);
  977|   537k|            _mm_storeu_si128((__m128i *) (pi2_out + out_stride_temp), res_8x16b_l);
  978|   537k|            _mm_storeu_si128((__m128i *) (pi2_out + out_stride_temp + 8), res_8x16b_h);
  979|   537k|        }
  980|  48.4k|    }
  981|  96.9k|    else
  982|  96.9k|    {
  983|  96.9k|        __m128i const_16_8x16b, const_128, const_ones, const_ones_8x16b;
  984|  96.9k|        __m128i ref_arr_8x16b_r0_0;
  985|  96.9k|        __m128i ref_arr_8x16b_r1_0;
  986|  96.9k|        __m128i phs_mask_8x16b_0, phs_mask_div8_8x16b_0, phs_mask_16min_8x16b_0;
  987|  96.9k|        __m128i x_ref_pos_mask_r0, x_ref_rnd_mask_r0_0;
  988|  96.9k|        __m128i x_ref_pos_mask_temp_r0_0;
  989|  96.9k|        __m128i x_ref_pos_mask_temp_r1_0;
  990|       |
  991|  96.9k|        __m128i u1_incr_8x16b_r0_0, ref_arr_temp0_8x16b_r0_0, res0_8x16b_r0_0,
  992|  96.9k|            u1_incr_not_8x16b_r0_0;
  993|  96.9k|        __m128i u1_incr_8x16b_r1_0, ref_arr_temp1_8x16b_r0_0, res1_8x16b_r0_0;
  994|  96.9k|        __m128i u1_y_incr_8x16b_r0_0;
  995|       |
  996|  96.9k|        __m128i u1_incr_not_8x16b_r0_odd, u1_incr_not_8x16b_r1_odd, x_ref_pos_mask_temp_r0_odd,
  997|  96.9k|            x_ref_pos_mask_temp_r1_odd;
  998|  96.9k|        __m128i u1_incr_not_8x16b_r0_even, u1_incr_not_8x16b_r1_even, x_ref_pos_mask_temp_r0_even,
  999|  96.9k|            x_ref_pos_mask_temp_r1_even;
 1000|       |
 1001|  96.9k|        __m128i ref_arr_temp0_8x16b_r1_0, res_8x16b_r0_0, res0_8x16b_r1_0, u1_incr_not_8x16b_r1_0;
 1002|  96.9k|        __m128i ref_arr_temp1_8x16b_r1_0, res_8x16b_r1_0, res1_8x16b_r1_0;
 1003|  96.9k|        __m128i u1_prev_y_incr_8x16b_r0_0;
 1004|  96.9k|        __m128i prev_res_8x16b_r0_0;
 1005|  96.9k|        __m128i prev_res_8x16b_r1_0;
 1006|       |
 1007|  96.9k|        __m128i vert_res0_8x16b_r0_0, res_4x32b_l_0, out_4x32b_l;
 1008|  96.9k|        __m128i vert_res1_8x16b_r0_0, res_4x32b_l_1, out_4x32b_h;
 1009|  96.9k|        __m128i phs_y_mask_16min_8x16b, phs_y_mask_8x16b, phs_y_mask_mix_8x16b;
 1010|  96.9k|        __m128i chroma_mask, chroma_mask2;
 1011|  96.9k|        __m128i zero_8x16b = _mm_set1_epi16(0);
 1012|  96.9k|        WORD32 zero_r0_0, zero_r1_0, zero_r0_r1 = 0;
 1013|  96.9k|        WORD16 *pi2_ref_array_temp;
 1014|  96.9k|        UWORD8 *pu1_ref_x_ptr_incr_temp, *pu1_ref_y_ptr_incr_temp;
 1015|  96.9k|        WORD32 i4_y_phase;
 1016|  96.9k|        WORD32 out_stride_temp;
 1017|  96.9k|        const_ones = _mm_set1_epi8(1);
 1018|  96.9k|        const_ones_8x16b = _mm_set1_epi16(1);
 1019|  96.9k|        const_128 = _mm_set1_epi32(128);
 1020|       |
 1021|   872k|        for(i4_y = 0; i4_y < (i4_temp_array_ht); i4_y++)
  ------------------
  |  Branch (1021:23): [True: 775k, False: 96.9k]
  ------------------
 1022|   775k|        {
 1023|   775k|            arr_y_phase[i4_y] = (WORD8) ps_y_pos_phase[i4_y + i4_frm_mb_y].i2_phase;
 1024|   775k|            arr_y_ref_pos[i4_y] = (WORD8) (ps_y_pos_phase[i4_y + i4_frm_mb_y].i2_ref_pos);
 1025|   775k|        }
 1026|  96.9k|        pi1_y_ref_pos = arr_y_ref_pos;
 1027|  96.9k|        pi1_y_phase = arr_y_phase;
 1028|       |
 1029|   872k|        for(i4_x = 0; i4_x < i4_mb_wd; i4_x++)
  ------------------
  |  Branch (1029:23): [True: 775k, False: 96.9k]
  ------------------
 1030|   775k|        {
 1031|   775k|            arr_x_ref_pos[i4_x] = (WORD8) ps_x_pos_phase[i4_x + i4_frm_mb_x].i2_ref_pos;
 1032|   775k|            arr_x_phase[i4_x] = (WORD8) ps_x_pos_phase[i4_x + i4_frm_mb_x].i2_phase;
 1033|   775k|        }
 1034|       |
 1035|  96.9k|        pi1_x_ref_pos = arr_x_ref_pos;
 1036|  96.9k|        pi1_x_phase = arr_x_phase;
 1037|       |
 1038|  96.9k|        phs_mask_8x16b_0 = _mm_cvtepi8_epi16(_mm_loadu_si128((__m128i *) (pi1_x_phase)));
 1039|  96.9k|        x_ref_pos_mask_r0 = _mm_loadu_si128((__m128i *) (pi1_x_ref_pos));
 1040|       |
 1041|  96.9k|        const_16_8x16b = _mm_set1_epi16(16);
 1042|  96.9k|        chroma_mask = _mm_set1_epi32(0xFFFF0000);
 1043|  96.9k|        chroma_mask2 = _mm_set1_epi32(0x0000FFFF);
 1044|  96.9k|        phs_mask_div8_8x16b_0 = _mm_srli_epi16(phs_mask_8x16b_0, 3);
 1045|  96.9k|        phs_mask_div8_8x16b_0 = _mm_packs_epi16(phs_mask_div8_8x16b_0, const_ones);
 1046|       |
 1047|  96.9k|        phs_mask_16min_8x16b_0 = _mm_sub_epi16(const_16_8x16b, phs_mask_8x16b_0);
 1048|  96.9k|        x_ref_rnd_mask_r0_0 = _mm_add_epi8(x_ref_pos_mask_r0, phs_mask_div8_8x16b_0);
 1049|       |
 1050|   872k|        for(i4_y = 0; i4_y < (i4_temp_array_ht); i4_y++)
  ------------------
  |  Branch (1050:23): [True: 775k, False: 96.9k]
  ------------------
 1051|   775k|        {
 1052|   775k|            if((i4_y > 0) && (pi1_y_ref_pos[i4_y] == pi1_y_ref_pos[i4_y - 1]))
  ------------------
  |  Branch (1052:16): [True: 678k, False: 96.9k]
  |  Branch (1052:30): [True: 224k, False: 453k]
  ------------------
 1053|   224k|            {
 1054|   224k|                if(zero_r0_r1)
  ------------------
  |  Branch (1054:20): [True: 221k, False: 3.90k]
  ------------------
 1055|   221k|                {
 1056|   221k|                    res_4x32b_l_0 = _mm_set1_epi32(0);
 1057|   221k|                    res_4x32b_l_1 = _mm_set1_epi32(0);
 1058|   221k|                    out_stride_temp = (i4_y * i4_out_stride);
 1059|       |
 1060|   221k|                    out_4x32b_l = _mm_loadu_si128((__m128i *) (pi2_out + out_stride_temp));
 1061|   221k|                    out_4x32b_h = _mm_loadu_si128((__m128i *) (pi2_out + out_stride_temp + 8));
 1062|       |
 1063|   221k|                    out_4x32b_l = _mm_and_si128(out_4x32b_l, chroma_mask);
 1064|   221k|                    out_4x32b_h = _mm_and_si128(out_4x32b_h, chroma_mask);
 1065|       |
 1066|   221k|                    res_4x32b_l_0 = _mm_and_si128(res_4x32b_l_0, chroma_mask2);
 1067|   221k|                    res_4x32b_l_1 = _mm_and_si128(res_4x32b_l_1, chroma_mask2);
 1068|       |
 1069|   221k|                    out_4x32b_l = _mm_add_epi8(res_4x32b_l_0, out_4x32b_l);
 1070|   221k|                    out_4x32b_h = _mm_add_epi8(res_4x32b_l_1, out_4x32b_h);
 1071|       |
 1072|   221k|                    _mm_storeu_si128((__m128i *) (pi2_out + out_stride_temp), out_4x32b_l);
 1073|   221k|                    _mm_storeu_si128((__m128i *) (pi2_out + out_stride_temp + 8), out_4x32b_h);
 1074|   221k|                    continue;
 1075|   221k|                }
 1076|       |
 1077|  3.90k|                res_8x16b_r0_0 = prev_res_8x16b_r0_0;
 1078|  3.90k|                res_8x16b_r1_0 = prev_res_8x16b_r1_0;
 1079|       |
 1080|  3.90k|                u1_y_incr_8x16b_r0_0 = u1_prev_y_incr_8x16b_r0_0;
 1081|  3.90k|            }
 1082|   550k|            else
 1083|   550k|            {
 1084|   550k|                pi2_ref_array_temp = pi2_ref_array + ((pi1_y_ref_pos[i4_y]) * i4_refarray_wd);
 1085|   550k|                pu1_ref_x_ptr_incr_temp =
 1086|   550k|                    pu1_ref_x_ptr_incr + ((pi1_y_ref_pos[i4_y]) * i4_refarray_wd);
 1087|   550k|                ref_arr_8x16b_r0_0 = _mm_loadu_si128((__m128i *) (pi2_ref_array_temp));
 1088|   550k|                ref_arr_8x16b_r1_0 =
 1089|   550k|                    _mm_loadu_si128((__m128i *) (pi2_ref_array_temp + i4_refarray_wd));
 1090|       |
 1091|   550k|                zero_r0_0 = _mm_test_all_ones(_mm_cmpeq_epi16(
 1092|   550k|                    ref_arr_8x16b_r0_0, zero_8x16b));  // return 1 if all zeros, else 0
 1093|   550k|                zero_r1_0 = _mm_test_all_ones(_mm_cmpeq_epi16(ref_arr_8x16b_r1_0, zero_8x16b));
 1094|       |
 1095|   550k|                zero_r0_r1 = zero_r0_0 && zero_r1_0;
  ------------------
  |  Branch (1095:30): [True: 541k, False: 9.47k]
  |  Branch (1095:43): [True: 540k, False: 876]
  ------------------
 1096|       |
 1097|   550k|                if(!zero_r0_r1)
  ------------------
  |  Branch (1097:20): [True: 10.3k, False: 540k]
  ------------------
 1098|  10.3k|                {
 1099|  10.3k|                    u1_incr_8x16b_r0_0 = _mm_loadu_si128((__m128i *) (pu1_ref_x_ptr_incr_temp));
 1100|  10.3k|                    u1_incr_8x16b_r1_0 =
 1101|  10.3k|                        _mm_loadu_si128((__m128i *) (pu1_ref_x_ptr_incr_temp + i4_refarray_wd));
 1102|       |
 1103|  10.3k|                    u1_incr_8x16b_r0_0 = _mm_shuffle_epi8(u1_incr_8x16b_r0_0, x_ref_pos_mask_r0);
 1104|  10.3k|                    u1_incr_8x16b_r1_0 = _mm_shuffle_epi8(u1_incr_8x16b_r1_0, x_ref_pos_mask_r0);
 1105|       |
 1106|  10.3k|                    u1_incr_not_8x16b_r0_0 =
 1107|  10.3k|                        _mm_andnot_si128(u1_incr_8x16b_r0_0, phs_mask_div8_8x16b_0);
 1108|  10.3k|                    u1_incr_not_8x16b_r1_0 =
 1109|  10.3k|                        _mm_andnot_si128(u1_incr_8x16b_r1_0, phs_mask_div8_8x16b_0);
 1110|       |
 1111|  10.3k|                    u1_incr_not_8x16b_r0_0 =
 1112|  10.3k|                        _mm_add_epi8(u1_incr_not_8x16b_r0_0, x_ref_pos_mask_r0);
 1113|  10.3k|                    u1_incr_not_8x16b_r1_0 =
 1114|  10.3k|                        _mm_add_epi8(u1_incr_not_8x16b_r1_0, x_ref_pos_mask_r0);
 1115|       |
 1116|  10.3k|                    x_ref_pos_mask_temp_r0_0 =
 1117|  10.3k|                        _mm_add_epi8(u1_incr_not_8x16b_r0_0, u1_incr_8x16b_r0_0);
 1118|  10.3k|                    x_ref_pos_mask_temp_r1_0 =
 1119|  10.3k|                        _mm_add_epi8(u1_incr_not_8x16b_r1_0, u1_incr_8x16b_r1_0);
 1120|       |
 1121|  10.3k|                    u1_incr_not_8x16b_r0_even =
 1122|  10.3k|                        _mm_add_epi8(u1_incr_not_8x16b_r0_0, u1_incr_not_8x16b_r0_0);
 1123|  10.3k|                    u1_incr_not_8x16b_r1_even =
 1124|  10.3k|                        _mm_add_epi8(u1_incr_not_8x16b_r1_0, u1_incr_not_8x16b_r1_0);
 1125|  10.3k|                    x_ref_pos_mask_temp_r0_even =
 1126|  10.3k|                        _mm_add_epi8(x_ref_pos_mask_temp_r0_0, x_ref_pos_mask_temp_r0_0);
 1127|  10.3k|                    x_ref_pos_mask_temp_r1_even =
 1128|  10.3k|                        _mm_add_epi8(x_ref_pos_mask_temp_r1_0, x_ref_pos_mask_temp_r1_0);
 1129|       |
 1130|  10.3k|                    u1_incr_not_8x16b_r0_odd = _mm_add_epi8(u1_incr_not_8x16b_r0_even, const_ones);
 1131|  10.3k|                    u1_incr_not_8x16b_r1_odd = _mm_add_epi8(u1_incr_not_8x16b_r1_even, const_ones);
 1132|  10.3k|                    x_ref_pos_mask_temp_r0_odd =
 1133|  10.3k|                        _mm_add_epi8(x_ref_pos_mask_temp_r0_even, const_ones);
 1134|  10.3k|                    x_ref_pos_mask_temp_r1_odd =
 1135|  10.3k|                        _mm_add_epi8(x_ref_pos_mask_temp_r1_even, const_ones);
 1136|       |
 1137|  10.3k|                    u1_incr_not_8x16b_r0_0 =
 1138|  10.3k|                        _mm_unpacklo_epi8(u1_incr_not_8x16b_r0_even, u1_incr_not_8x16b_r0_odd);
 1139|  10.3k|                    u1_incr_not_8x16b_r1_0 =
 1140|  10.3k|                        _mm_unpacklo_epi8(u1_incr_not_8x16b_r1_even, u1_incr_not_8x16b_r1_odd);
 1141|  10.3k|                    x_ref_pos_mask_temp_r0_0 =
 1142|  10.3k|                        _mm_unpacklo_epi8(x_ref_pos_mask_temp_r0_even, x_ref_pos_mask_temp_r0_odd);
 1143|  10.3k|                    x_ref_pos_mask_temp_r1_0 =
 1144|  10.3k|                        _mm_unpacklo_epi8(x_ref_pos_mask_temp_r1_even, x_ref_pos_mask_temp_r1_odd);
 1145|       |
 1146|  10.3k|                    ref_arr_temp0_8x16b_r0_0 =
 1147|  10.3k|                        _mm_shuffle_epi8(ref_arr_8x16b_r0_0, u1_incr_not_8x16b_r0_0);
 1148|  10.3k|                    ref_arr_temp0_8x16b_r1_0 =
 1149|  10.3k|                        _mm_shuffle_epi8(ref_arr_8x16b_r1_0, u1_incr_not_8x16b_r1_0);
 1150|  10.3k|                    ref_arr_temp1_8x16b_r0_0 =
 1151|  10.3k|                        _mm_shuffle_epi8(ref_arr_8x16b_r0_0, x_ref_pos_mask_temp_r0_0);
 1152|  10.3k|                    ref_arr_temp1_8x16b_r1_0 =
 1153|  10.3k|                        _mm_shuffle_epi8(ref_arr_8x16b_r1_0, x_ref_pos_mask_temp_r1_0);
 1154|       |
 1155|  10.3k|                    res0_8x16b_r0_0 =
 1156|  10.3k|                        _mm_mullo_epi16(ref_arr_temp0_8x16b_r0_0, phs_mask_16min_8x16b_0);
 1157|  10.3k|                    res0_8x16b_r1_0 =
 1158|  10.3k|                        _mm_mullo_epi16(ref_arr_temp0_8x16b_r1_0, phs_mask_16min_8x16b_0);
 1159|  10.3k|                    res1_8x16b_r0_0 = _mm_mullo_epi16(ref_arr_temp1_8x16b_r0_0, phs_mask_8x16b_0);
 1160|  10.3k|                    res1_8x16b_r1_0 = _mm_mullo_epi16(ref_arr_temp1_8x16b_r1_0, phs_mask_8x16b_0);
 1161|       |
 1162|  10.3k|                    res_8x16b_r0_0 = _mm_add_epi16(res0_8x16b_r0_0, res1_8x16b_r0_0);
 1163|  10.3k|                    res_8x16b_r1_0 = _mm_add_epi16(res0_8x16b_r1_0, res1_8x16b_r1_0);
 1164|       |
 1165|  10.3k|                    pu1_ref_y_ptr_incr_temp =
 1166|  10.3k|                        pu1_ref_y_ptr_incr + (pi1_y_ref_pos[i4_y] * i4_refarray_wd);
 1167|  10.3k|                    u1_y_incr_8x16b_r0_0 = _mm_loadu_si128((__m128i *) (pu1_ref_y_ptr_incr_temp));
 1168|       |
 1169|  10.3k|                    u1_y_incr_8x16b_r0_0 =
 1170|  10.3k|                        _mm_shuffle_epi8(u1_y_incr_8x16b_r0_0, x_ref_rnd_mask_r0_0);
 1171|       |
 1172|  10.3k|                    u1_y_incr_8x16b_r0_0 = _mm_cvtepi8_epi16(u1_y_incr_8x16b_r0_0);
 1173|  10.3k|                    u1_y_incr_8x16b_r0_0 = _mm_cmpeq_epi16(u1_y_incr_8x16b_r0_0, const_ones_8x16b);
 1174|  10.3k|                    u1_prev_y_incr_8x16b_r0_0 = u1_y_incr_8x16b_r0_0;
 1175|       |
 1176|  10.3k|                    prev_res_8x16b_r0_0 = res_8x16b_r0_0;
 1177|  10.3k|                    prev_res_8x16b_r1_0 = res_8x16b_r1_0;
 1178|  10.3k|                }
 1179|   550k|            }
 1180|       |
 1181|   554k|            if(zero_r0_r1)
  ------------------
  |  Branch (1181:16): [True: 540k, False: 14.2k]
  ------------------
 1182|   540k|            {
 1183|   540k|                res_4x32b_l_0 = _mm_set1_epi32(0);
 1184|   540k|                res_4x32b_l_1 = _mm_set1_epi32(0);
 1185|   540k|            }
 1186|  14.2k|            else
 1187|  14.2k|            {
 1188|  14.2k|                i4_y_phase = pi1_y_phase[i4_y];
 1189|       |
 1190|  14.2k|                if((i4_y_phase) >> 3)
  ------------------
  |  Branch (1190:20): [True: 9.76k, False: 4.48k]
  ------------------
 1191|  9.76k|                {
 1192|  9.76k|                    vert_res0_8x16b_r0_0 =
 1193|  9.76k|                        _mm_blendv_epi8(res_8x16b_r1_0, res_8x16b_r0_0, u1_y_incr_8x16b_r0_0);
 1194|  9.76k|                    vert_res1_8x16b_r0_0 =
 1195|  9.76k|                        _mm_blendv_epi8(res_8x16b_r1_0, res_8x16b_r1_0, u1_y_incr_8x16b_r0_0);
 1196|  9.76k|                }
 1197|  4.48k|                else
 1198|  4.48k|                {
 1199|  4.48k|                    vert_res0_8x16b_r0_0 =
 1200|  4.48k|                        _mm_blendv_epi8(res_8x16b_r0_0, res_8x16b_r0_0, u1_y_incr_8x16b_r0_0);
 1201|  4.48k|                    vert_res1_8x16b_r0_0 =
 1202|  4.48k|                        _mm_blendv_epi8(res_8x16b_r0_0, res_8x16b_r1_0, u1_y_incr_8x16b_r0_0);
 1203|  4.48k|                }
 1204|       |
 1205|  14.2k|                res0_8x16b_r0_0 = _mm_unpacklo_epi16(vert_res0_8x16b_r0_0, vert_res1_8x16b_r0_0);
 1206|  14.2k|                res1_8x16b_r0_0 = _mm_unpackhi_epi16(vert_res0_8x16b_r0_0, vert_res1_8x16b_r0_0);
 1207|       |
 1208|  14.2k|                phs_y_mask_16min_8x16b = _mm_set1_epi16(16 - i4_y_phase);
 1209|  14.2k|                phs_y_mask_8x16b = _mm_set1_epi16(i4_y_phase);
 1210|  14.2k|                phs_y_mask_mix_8x16b = _mm_unpacklo_epi16(phs_y_mask_16min_8x16b, phs_y_mask_8x16b);
 1211|       |
 1212|  14.2k|                res_4x32b_l_0 = _mm_madd_epi16(res0_8x16b_r0_0, phs_y_mask_mix_8x16b);
 1213|  14.2k|                res_4x32b_l_1 = _mm_madd_epi16(res1_8x16b_r0_0, phs_y_mask_mix_8x16b);
 1214|  14.2k|                res_4x32b_l_0 = _mm_add_epi32(res_4x32b_l_0, const_128);
 1215|  14.2k|                res_4x32b_l_1 = _mm_add_epi32(res_4x32b_l_1, const_128);
 1216|       |
 1217|  14.2k|                res_4x32b_l_0 = _mm_srai_epi32(res_4x32b_l_0, 8);
 1218|  14.2k|                res_4x32b_l_1 = _mm_srai_epi32(res_4x32b_l_1, 8);
 1219|  14.2k|            }
 1220|   554k|            out_stride_temp = (i4_y * i4_out_stride);
 1221|       |
 1222|   554k|            out_4x32b_l = _mm_loadu_si128((__m128i *) (pi2_out + out_stride_temp));
 1223|   554k|            out_4x32b_h = _mm_loadu_si128((__m128i *) (pi2_out + out_stride_temp + 8));
 1224|       |
 1225|   554k|            out_4x32b_l = _mm_and_si128(out_4x32b_l, chroma_mask);
 1226|   554k|            out_4x32b_h = _mm_and_si128(out_4x32b_h, chroma_mask);
 1227|       |
 1228|   554k|            res_4x32b_l_0 = _mm_and_si128(res_4x32b_l_0, chroma_mask2);
 1229|   554k|            res_4x32b_l_1 = _mm_and_si128(res_4x32b_l_1, chroma_mask2);
 1230|       |
 1231|   554k|            out_4x32b_l = _mm_add_epi8(res_4x32b_l_0, out_4x32b_l);
 1232|   554k|            out_4x32b_h = _mm_add_epi8(res_4x32b_l_1, out_4x32b_h);
 1233|       |
 1234|   554k|            _mm_storeu_si128((__m128i *) (pi2_out + out_stride_temp), out_4x32b_l);
 1235|   554k|            _mm_storeu_si128((__m128i *) (pi2_out + out_stride_temp + 8), out_4x32b_h);
 1236|   554k|        }
 1237|  96.9k|    }
 1238|   145k|    return;
 1239|   145k|} /* End of Interpolation Function */
isvcd_residual_reflayer_const_non_boundary_mb_sse42:
 1268|  39.2k|{
 1269|  39.2k|    WORD32 i4_y;
 1270|       |
 1271|  39.2k|    WORD16 *pi2_ref_data_byte;
 1272|  39.2k|    WORD16 *pi2_ref_array_temp;
 1273|  39.2k|    if(i4_chroma_flag == 0)
  ------------------
  |  Branch (1273:8): [True: 13.0k, False: 26.1k]
  ------------------
 1274|  13.0k|    {
 1275|  13.0k|        WORD8 index_0[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
 1276|  13.0k|        __m128i ref_mb_type_8x16_q0, ref_mb_type_8x16_q1, ref_mb_type_8x16_q2, ref_mb_type_8x16_q3,
 1277|  13.0k|            mb_quard1_part_x_8x16;
 1278|  13.0k|        __m128i ref_mb_type_8x16_0, ref_mb_type_8x16_1;
 1279|  13.0k|        __m128i ref_mb_type_8x16_low_0, ref_mb_type_8x16_low_1;
 1280|  13.0k|        __m128i mb_type_mask_8x16_0 = _mm_set1_epi8(-1);
 1281|  13.0k|        __m128i mb_type_mask_8x16_1 = _mm_set1_epi8(-1);
 1282|  13.0k|        __m128i mb_type_mask_8x16_low_0, mb_type_mask_8x16_low_1;
 1283|  13.0k|        __m128i mask_8x16_0;
 1284|  13.0k|        __m128i index_arr_0;
 1285|  13.0k|        __m128i inp_data_16x8_0, inp_data_16x8_1;
 1286|  13.0k|        __m128i res_16x8_0, res_16x8_1;
 1287|  13.0k|        __m128i one_8x16 = _mm_set1_epi8(1);
 1288|  13.0k|        __m128i zero_8x16 = _mm_set1_epi8(0);
 1289|       |
 1290|  13.0k|        index_arr_0 = _mm_loadu_si128((__m128i *) index_0);
 1291|  13.0k|        ref_mb_type_8x16_q0 = _mm_set1_epi8(i4_ref_mb_type_q0);
 1292|  13.0k|        ref_mb_type_8x16_q1 = _mm_set1_epi8(i4_ref_mb_type_q1);
 1293|  13.0k|        ref_mb_type_8x16_q2 = _mm_set1_epi8(i4_ref_mb_type_q2);
 1294|  13.0k|        ref_mb_type_8x16_q3 = _mm_set1_epi8(i4_ref_mb_type_q3);
 1295|  13.0k|        if((i4_mb_quard1_part_x >= i4_refarray_wd) && (i4_mb_quard1_part_y >= i4_refarray_ht))
  ------------------
  |  Branch (1295:12): [True: 0, False: 13.0k]
  |  Branch (1295:55): [True: 0, False: 0]
  ------------------
 1296|      0|        {
 1297|       |            // Quard 0
 1298|      0|            ref_mb_type_8x16_0 = ref_mb_type_8x16_q0;
 1299|      0|            ref_mb_type_8x16_1 = ref_mb_type_8x16_q0;
 1300|      0|            mb_type_mask_8x16_0 = _mm_cmpeq_epi8(ref_mb_type_8x16_0, one_8x16);
 1301|      0|            mb_type_mask_8x16_1 = mb_type_mask_8x16_0;
 1302|      0|        }
 1303|  13.0k|        else if((i4_mb_quard1_part_y >= (i4_refarray_ht - 1)) &&
  ------------------
  |  Branch (1303:17): [True: 4.22k, False: 8.85k]
  ------------------
 1304|  4.22k|                (i4_mb_quard1_part_x < i4_refarray_wd))
  ------------------
  |  Branch (1304:17): [True: 4.22k, False: 0]
  ------------------
 1305|  4.22k|        {
 1306|       |            // Quard 0 & 1
 1307|  4.22k|            if(i4_mb_quard1_part_x == 8)
  ------------------
  |  Branch (1307:16): [True: 0, False: 4.22k]
  ------------------
 1308|      0|            {
 1309|      0|                ref_mb_type_8x16_0 = ref_mb_type_8x16_q0;
 1310|      0|                ref_mb_type_8x16_1 = ref_mb_type_8x16_q1;
 1311|      0|            }
 1312|  4.22k|            else if(i4_mb_quard1_part_x < 8)
  ------------------
  |  Branch (1312:21): [True: 4.22k, False: 0]
  ------------------
 1313|  4.22k|            {
 1314|  4.22k|                mb_quard1_part_x_8x16 = _mm_set1_epi8((i4_mb_quard1_part_x << 1));
 1315|  4.22k|                mask_8x16_0 =
 1316|  4.22k|                    _mm_cmplt_epi8(index_arr_0, mb_quard1_part_x_8x16);  // return 1 if a<b, else 0
 1317|       |
 1318|  4.22k|                ref_mb_type_8x16_0 =
 1319|  4.22k|                    _mm_blendv_epi8(ref_mb_type_8x16_q1, ref_mb_type_8x16_q0, mask_8x16_0);
 1320|  4.22k|                ref_mb_type_8x16_1 = ref_mb_type_8x16_q1;
 1321|  4.22k|            }
 1322|      0|            else
 1323|      0|            {
 1324|      0|                mb_quard1_part_x_8x16 = _mm_set1_epi8((i4_mb_quard1_part_x - 8) << 1);
 1325|      0|                mask_8x16_0 =
 1326|      0|                    _mm_cmplt_epi8(index_arr_0, mb_quard1_part_x_8x16);  // return 1 if a<b, else 0
 1327|       |
 1328|      0|                ref_mb_type_8x16_0 = ref_mb_type_8x16_q0;
 1329|      0|                ref_mb_type_8x16_1 =
 1330|      0|                    _mm_blendv_epi8(ref_mb_type_8x16_q1, ref_mb_type_8x16_q0, mask_8x16_0);
 1331|      0|            }
 1332|       |
 1333|  4.22k|            mb_type_mask_8x16_0 = _mm_cmpeq_epi8(ref_mb_type_8x16_0, one_8x16);
 1334|  4.22k|            mb_type_mask_8x16_1 = _mm_cmpeq_epi8(ref_mb_type_8x16_1, one_8x16);
 1335|  4.22k|        }
 1336|  8.85k|        else
 1337|  8.85k|        {
 1338|  8.85k|            if(i4_mb_quard1_part_x >= i4_refarray_wd)
  ------------------
  |  Branch (1338:16): [True: 0, False: 8.85k]
  ------------------
 1339|      0|            {
 1340|      0|                ref_mb_type_8x16_0 = ref_mb_type_8x16_q0;
 1341|      0|                ref_mb_type_8x16_1 = ref_mb_type_8x16_q0;
 1342|       |
 1343|      0|                ref_mb_type_8x16_low_0 = ref_mb_type_8x16_q2;
 1344|      0|                ref_mb_type_8x16_low_1 = ref_mb_type_8x16_q2;
 1345|      0|            }
 1346|  8.85k|            else
 1347|  8.85k|            {
 1348|       |                // Quard 0, 1, 2, 3
 1349|  8.85k|                if(i4_mb_quard1_part_x == 8)
  ------------------
  |  Branch (1349:20): [True: 0, False: 8.85k]
  ------------------
 1350|      0|                {
 1351|      0|                    ref_mb_type_8x16_0 = ref_mb_type_8x16_q0;
 1352|      0|                    ref_mb_type_8x16_1 = ref_mb_type_8x16_q1;
 1353|       |
 1354|      0|                    ref_mb_type_8x16_low_0 = ref_mb_type_8x16_q2;
 1355|      0|                    ref_mb_type_8x16_low_1 = ref_mb_type_8x16_q3;
 1356|      0|                }
 1357|  8.85k|                else if(i4_mb_quard1_part_x < 8)
  ------------------
  |  Branch (1357:25): [True: 8.85k, False: 0]
  ------------------
 1358|  8.85k|                {
 1359|  8.85k|                    mb_quard1_part_x_8x16 = _mm_set1_epi8((i4_mb_quard1_part_x << 1));
 1360|  8.85k|                    mask_8x16_0 = _mm_cmplt_epi8(index_arr_0,
 1361|  8.85k|                                                 mb_quard1_part_x_8x16);  // return 1 if a<b, else 0
 1362|       |
 1363|  8.85k|                    ref_mb_type_8x16_0 =
 1364|  8.85k|                        _mm_blendv_epi8(ref_mb_type_8x16_q1, ref_mb_type_8x16_q0, mask_8x16_0);
 1365|  8.85k|                    ref_mb_type_8x16_1 = ref_mb_type_8x16_q1;
 1366|       |
 1367|  8.85k|                    ref_mb_type_8x16_low_0 =
 1368|  8.85k|                        _mm_blendv_epi8(ref_mb_type_8x16_q3, ref_mb_type_8x16_q2, mask_8x16_0);
 1369|  8.85k|                    ref_mb_type_8x16_low_1 = ref_mb_type_8x16_q3;
 1370|  8.85k|                }
 1371|      0|                else
 1372|      0|                {
 1373|      0|                    mb_quard1_part_x_8x16 = _mm_set1_epi8((i4_mb_quard1_part_x - 8) << 1);
 1374|      0|                    mask_8x16_0 = _mm_cmplt_epi8(index_arr_0,
 1375|      0|                                                 mb_quard1_part_x_8x16);  // return 1 if a<b, else 0
 1376|       |
 1377|      0|                    ref_mb_type_8x16_0 = ref_mb_type_8x16_q0;
 1378|      0|                    ref_mb_type_8x16_1 =
 1379|      0|                        _mm_blendv_epi8(ref_mb_type_8x16_q1, ref_mb_type_8x16_q0, mask_8x16_0);
 1380|       |
 1381|      0|                    ref_mb_type_8x16_low_0 = ref_mb_type_8x16_q2;
 1382|      0|                    ref_mb_type_8x16_low_1 =
 1383|      0|                        _mm_blendv_epi8(ref_mb_type_8x16_q3, ref_mb_type_8x16_q2, mask_8x16_0);
 1384|      0|                }
 1385|  8.85k|                mb_type_mask_8x16_0 = _mm_cmpeq_epi8(ref_mb_type_8x16_0, one_8x16);
 1386|  8.85k|                mb_type_mask_8x16_1 = _mm_cmpeq_epi8(ref_mb_type_8x16_1, one_8x16);
 1387|       |
 1388|  8.85k|                mb_type_mask_8x16_low_0 = _mm_cmpeq_epi8(ref_mb_type_8x16_low_0, one_8x16);
 1389|  8.85k|                mb_type_mask_8x16_low_1 = _mm_cmpeq_epi8(ref_mb_type_8x16_low_1, one_8x16);
 1390|  8.85k|            }
 1391|  8.85k|        }
 1392|       |
 1393|  13.0k|        if(i4_mb_quard1_part_y < i4_refarray_ht - 1)
  ------------------
  |  Branch (1393:12): [True: 8.85k, False: 4.22k]
  ------------------
 1394|  8.85k|        {
 1395|   115k|            for(i4_y = 0; i4_y < i4_refarray_ht; i4_y++)
  ------------------
  |  Branch (1395:27): [True: 106k, False: 8.85k]
  ------------------
 1396|   106k|            {
 1397|   106k|                pi2_ref_data_byte = pi2_inp_data + (i4_y * i4_inp_data_stride);
 1398|   106k|                inp_data_16x8_0 = _mm_loadu_si128((__m128i *) (pi2_ref_data_byte));
 1399|   106k|                inp_data_16x8_1 = _mm_loadu_si128((__m128i *) (pi2_ref_data_byte + 8));
 1400|       |
 1401|   106k|                if(i4_y < i4_mb_quard1_part_y)
  ------------------
  |  Branch (1401:20): [True: 35.1k, False: 71.1k]
  ------------------
 1402|  35.1k|                {
 1403|  35.1k|                    res_16x8_0 = _mm_blendv_epi8(zero_8x16, inp_data_16x8_0, mb_type_mask_8x16_0);
 1404|  35.1k|                    res_16x8_1 = _mm_blendv_epi8(zero_8x16, inp_data_16x8_1, mb_type_mask_8x16_1);
 1405|  35.1k|                }
 1406|  71.1k|                else
 1407|  71.1k|                {
 1408|  71.1k|                    res_16x8_0 =
 1409|  71.1k|                        _mm_blendv_epi8(zero_8x16, inp_data_16x8_0, mb_type_mask_8x16_low_0);
 1410|  71.1k|                    res_16x8_1 =
 1411|  71.1k|                        _mm_blendv_epi8(zero_8x16, inp_data_16x8_1, mb_type_mask_8x16_low_1);
 1412|  71.1k|                }
 1413|       |
 1414|   106k|                pi2_ref_array_temp = pi2_ref_array + (i4_y * i4_refarray_wd);
 1415|   106k|                _mm_storeu_si128((__m128i *) (pi2_ref_array_temp), res_16x8_0);
 1416|   106k|                _mm_storeu_si128((__m128i *) (pi2_ref_array_temp + 8), res_16x8_1);
 1417|   106k|            }
 1418|  8.85k|        }
 1419|  4.22k|        else
 1420|  4.22k|        {
 1421|  54.9k|            for(i4_y = 0; i4_y < i4_refarray_ht; i4_y++)
  ------------------
  |  Branch (1421:27): [True: 50.7k, False: 4.22k]
  ------------------
 1422|  50.7k|            {
 1423|  50.7k|                pi2_ref_data_byte = pi2_inp_data + (i4_y * i4_inp_data_stride);
 1424|  50.7k|                inp_data_16x8_0 = _mm_loadu_si128((__m128i *) (pi2_ref_data_byte));
 1425|  50.7k|                inp_data_16x8_1 = _mm_loadu_si128((__m128i *) (pi2_ref_data_byte + 8));
 1426|       |
 1427|  50.7k|                res_16x8_0 = _mm_blendv_epi8(zero_8x16, inp_data_16x8_0, mb_type_mask_8x16_0);
 1428|  50.7k|                res_16x8_1 = _mm_blendv_epi8(zero_8x16, inp_data_16x8_1, mb_type_mask_8x16_1);
 1429|       |
 1430|  50.7k|                pi2_ref_array_temp = pi2_ref_array + (i4_y * i4_refarray_wd);
 1431|  50.7k|                _mm_storeu_si128((__m128i *) (pi2_ref_array_temp), res_16x8_0);
 1432|  50.7k|                _mm_storeu_si128((__m128i *) (pi2_ref_array_temp + 8), res_16x8_1);
 1433|  50.7k|            }
 1434|  4.22k|        }
 1435|  13.0k|    }
 1436|  26.1k|    else
 1437|  26.1k|    {
 1438|  26.1k|        WORD8 index_0[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
 1439|  26.1k|        WORD8 even_mask_arr[16] = {0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15};
 1440|  26.1k|        __m128i ref_mb_type_8x16_q0, ref_mb_type_8x16_q1, ref_mb_type_8x16_q2, ref_mb_type_8x16_q3,
 1441|  26.1k|            mb_quard1_part_x_8x16;
 1442|  26.1k|        __m128i ref_mb_type_8x16_0;
 1443|  26.1k|        __m128i ref_mb_type_8x16_low_0;
 1444|  26.1k|        __m128i mb_type_mask_8x16_0 = _mm_set1_epi8(-1);
 1445|  26.1k|        __m128i mb_type_mask_8x16_low_0;
 1446|  26.1k|        __m128i mask_8x16_0;
 1447|  26.1k|        __m128i index_arr_0, even_mask;
 1448|  26.1k|        __m128i inp_data_16x8_0, inp_data_16x8_1, inp_data_16x8;
 1449|  26.1k|        __m128i res_16x8_0;
 1450|  26.1k|        __m128i one_8x16 = _mm_set1_epi8(1);
 1451|  26.1k|        __m128i zero_8x16 = _mm_set1_epi8(0);
 1452|       |
 1453|  26.1k|        index_arr_0 = _mm_loadu_si128((__m128i *) index_0);
 1454|  26.1k|        even_mask = _mm_loadu_si128((__m128i *) even_mask_arr);
 1455|       |
 1456|  26.1k|        ref_mb_type_8x16_q0 = _mm_set1_epi8(i4_ref_mb_type_q0);
 1457|  26.1k|        ref_mb_type_8x16_q1 = _mm_set1_epi8(i4_ref_mb_type_q1);
 1458|  26.1k|        ref_mb_type_8x16_q2 = _mm_set1_epi8(i4_ref_mb_type_q2);
 1459|  26.1k|        ref_mb_type_8x16_q3 = _mm_set1_epi8(i4_ref_mb_type_q3);
 1460|  26.1k|        if((i4_mb_quard1_part_x >= i4_refarray_wd) && (i4_mb_quard1_part_y >= i4_refarray_ht))
  ------------------
  |  Branch (1460:12): [True: 0, False: 26.1k]
  |  Branch (1460:55): [True: 0, False: 0]
  ------------------
 1461|      0|        {
 1462|       |            // Quard 0
 1463|      0|            ref_mb_type_8x16_0 = ref_mb_type_8x16_q0;
 1464|      0|            mb_type_mask_8x16_0 = _mm_cmpeq_epi8(ref_mb_type_8x16_0, one_8x16);
 1465|      0|        }
 1466|  26.1k|        else if((i4_mb_quard1_part_y >= (i4_refarray_ht - 1)) &&
  ------------------
  |  Branch (1466:17): [True: 8.45k, False: 17.7k]
  ------------------
 1467|  8.45k|                (i4_mb_quard1_part_x < i4_refarray_wd))
  ------------------
  |  Branch (1467:17): [True: 8.45k, False: 0]
  ------------------
 1468|  8.45k|        {
 1469|       |            // Quard 0 & 1
 1470|  8.45k|            mb_quard1_part_x_8x16 = _mm_set1_epi8((i4_mb_quard1_part_x << 1));
 1471|  8.45k|            mask_8x16_0 =
 1472|  8.45k|                _mm_cmplt_epi8(index_arr_0, mb_quard1_part_x_8x16);  // return 1 if a<b, else 0
 1473|       |
 1474|  8.45k|            ref_mb_type_8x16_0 =
 1475|  8.45k|                _mm_blendv_epi8(ref_mb_type_8x16_q1, ref_mb_type_8x16_q0, mask_8x16_0);
 1476|  8.45k|            mb_type_mask_8x16_0 = _mm_cmpeq_epi8(ref_mb_type_8x16_0, one_8x16);
 1477|  8.45k|        }
 1478|  17.7k|        else
 1479|  17.7k|        {
 1480|  17.7k|            if(i4_mb_quard1_part_x >= i4_refarray_wd)
  ------------------
  |  Branch (1480:16): [True: 0, False: 17.7k]
  ------------------
 1481|      0|            {
 1482|       |                // Quard 0 & 2
 1483|      0|                ref_mb_type_8x16_0 = ref_mb_type_8x16_q0;
 1484|      0|                ref_mb_type_8x16_low_0 = ref_mb_type_8x16_q2;
 1485|      0|            }
 1486|  17.7k|            else
 1487|  17.7k|            {
 1488|       |                // Quard 0, 1, 2, 3
 1489|  17.7k|                mb_quard1_part_x_8x16 = _mm_set1_epi8((i4_mb_quard1_part_x << 1));
 1490|  17.7k|                mask_8x16_0 =
 1491|  17.7k|                    _mm_cmplt_epi8(index_arr_0, mb_quard1_part_x_8x16);  // return 1 if a<b, else 0
 1492|       |
 1493|  17.7k|                ref_mb_type_8x16_0 =
 1494|  17.7k|                    _mm_blendv_epi8(ref_mb_type_8x16_q1, ref_mb_type_8x16_q0, mask_8x16_0);
 1495|  17.7k|                ref_mb_type_8x16_low_0 =
 1496|  17.7k|                    _mm_blendv_epi8(ref_mb_type_8x16_q3, ref_mb_type_8x16_q2, mask_8x16_0);
 1497|       |
 1498|  17.7k|                mb_type_mask_8x16_0 = _mm_cmpeq_epi8(ref_mb_type_8x16_0, one_8x16);
 1499|  17.7k|                mb_type_mask_8x16_low_0 = _mm_cmpeq_epi8(ref_mb_type_8x16_low_0, one_8x16);
 1500|  17.7k|            }
 1501|  17.7k|        }
 1502|       |
 1503|  26.1k|        if(i4_mb_quard1_part_y < i4_refarray_ht - 1)
  ------------------
  |  Branch (1503:12): [True: 17.7k, False: 8.45k]
  ------------------
 1504|  17.7k|        {
 1505|   131k|            for(i4_y = 0; i4_y < i4_refarray_ht; i4_y++)
  ------------------
  |  Branch (1505:27): [True: 113k, False: 17.7k]
  ------------------
 1506|   113k|            {
 1507|   113k|                pi2_ref_data_byte = pi2_inp_data + (i4_y * i4_inp_data_stride);
 1508|   113k|                inp_data_16x8_0 = _mm_loadu_si128((__m128i *) (pi2_ref_data_byte));
 1509|   113k|                inp_data_16x8_1 = _mm_loadu_si128((__m128i *) (pi2_ref_data_byte + 8));
 1510|       |
 1511|   113k|                inp_data_16x8_0 = _mm_shuffle_epi8(inp_data_16x8_0, even_mask);
 1512|   113k|                inp_data_16x8_1 = _mm_shuffle_epi8(inp_data_16x8_1, even_mask);
 1513|       |
 1514|   113k|                inp_data_16x8 = _mm_unpacklo_epi64(inp_data_16x8_0, inp_data_16x8_1);
 1515|   113k|                if(i4_y < i4_mb_quard1_part_y)
  ------------------
  |  Branch (1515:20): [True: 38.7k, False: 74.7k]
  ------------------
 1516|  38.7k|                {
 1517|  38.7k|                    res_16x8_0 = _mm_blendv_epi8(zero_8x16, inp_data_16x8, mb_type_mask_8x16_0);
 1518|  38.7k|                }
 1519|  74.7k|                else
 1520|  74.7k|                {
 1521|  74.7k|                    res_16x8_0 = _mm_blendv_epi8(zero_8x16, inp_data_16x8, mb_type_mask_8x16_low_0);
 1522|  74.7k|                }
 1523|       |
 1524|   113k|                pi2_ref_array_temp = pi2_ref_array + (i4_y * i4_refarray_wd);
 1525|   113k|                _mm_storeu_si128((__m128i *) (pi2_ref_array_temp), res_16x8_0);
 1526|   113k|            }
 1527|  17.7k|        }
 1528|  8.45k|        else
 1529|  8.45k|        {
 1530|  67.6k|            for(i4_y = 0; i4_y < i4_refarray_ht; i4_y++)
  ------------------
  |  Branch (1530:27): [True: 59.1k, False: 8.45k]
  ------------------
 1531|  59.1k|            {
 1532|  59.1k|                pi2_ref_data_byte = pi2_inp_data + (i4_y * i4_inp_data_stride);
 1533|  59.1k|                inp_data_16x8_0 = _mm_loadu_si128((__m128i *) (pi2_ref_data_byte));
 1534|  59.1k|                inp_data_16x8_1 = _mm_loadu_si128((__m128i *) (pi2_ref_data_byte + 8));
 1535|       |
 1536|  59.1k|                inp_data_16x8_0 = _mm_shuffle_epi8(inp_data_16x8_0, even_mask);
 1537|  59.1k|                inp_data_16x8_1 = _mm_shuffle_epi8(inp_data_16x8_1, even_mask);
 1538|  59.1k|                inp_data_16x8 = _mm_unpacklo_epi64(inp_data_16x8_0, inp_data_16x8_1);
 1539|       |
 1540|  59.1k|                res_16x8_0 = _mm_blendv_epi8(zero_8x16, inp_data_16x8, mb_type_mask_8x16_0);
 1541|  59.1k|                pi2_ref_array_temp = pi2_ref_array + (i4_y * i4_refarray_wd);
 1542|  59.1k|                _mm_storeu_si128((__m128i *) (pi2_ref_array_temp), res_16x8_0);
 1543|  59.1k|            }
 1544|  8.45k|        }
 1545|  26.1k|    }
 1546|  39.2k|}

_Z17iv_aligned_mallocPvii:
   82|  3.77M|{
   83|  3.77M|    void *buf = NULL;
   84|  3.77M|    (void) ctxt;
   85|  3.77M|    if(0 != posix_memalign(&buf, alignment, size))
  ------------------
  |  Branch (85:8): [True: 0, False: 3.77M]
  ------------------
   86|      0|    {
   87|      0|        return NULL;
   88|      0|    }
   89|  3.77M|    return buf;
   90|  3.77M|}
_Z15iv_aligned_freePvS_:
   93|  3.77M|{
   94|  3.77M|    (void) ctxt;
   95|  3.77M|    free(buf);
   96|  3.77M|}
_ZN5CodecC2E17IV_COLOR_FORMAT_Tm:
  126|  22.6k|{
  127|  22.6k|    mColorFormat = colorFormat;
  128|  22.6k|    mNumCores = numCores;
  129|  22.6k|    mCodec = nullptr;
  130|  22.6k|    mWidth = 0;
  131|  22.6k|    mHeight = 0;
  132|       |
  133|  22.6k|    memset(&mOutBufHandle, 0, sizeof(mOutBufHandle));
  134|  22.6k|}
_ZN5CodecD2Ev:
  136|  22.6k|Codec::~Codec() {}
_ZN5Codec11createCodecEv:
  138|  22.6k|{
  139|  22.6k|    IV_API_CALL_STATUS_T ret;
  140|  22.6k|    ih264d_create_ip_t create_ip{};
  141|  22.6k|    ih264d_create_op_t create_op{};
  142|  22.6k|    void *fxns = (void *) &ivd_api_function;
  ------------------
  |  |   55|  22.6k|#define ivd_api_function isvcd_api_function
  ------------------
  143|       |
  144|  22.6k|    create_ip.s_ivd_create_ip_t.e_cmd = IVD_CMD_CREATE;
  145|  22.6k|    create_ip.s_ivd_create_ip_t.u4_share_disp_buf = 0;
  146|  22.6k|    create_ip.s_ivd_create_ip_t.e_output_format = mColorFormat;
  147|  22.6k|    create_ip.s_ivd_create_ip_t.pf_aligned_alloc = iv_aligned_malloc;
  148|  22.6k|    create_ip.s_ivd_create_ip_t.pf_aligned_free = iv_aligned_free;
  149|  22.6k|    create_ip.s_ivd_create_ip_t.pv_mem_ctxt = NULL;
  150|  22.6k|    create_ip.s_ivd_create_ip_t.u4_size = sizeof(ih264d_create_ip_t);
  151|  22.6k|    create_op.s_ivd_create_op_t.u4_size = sizeof(ih264d_create_op_t);
  152|       |
  153|  22.6k|    ret = ivd_api_function(NULL, (void *) &create_ip, (void *) &create_op);
  ------------------
  |  |   55|  22.6k|#define ivd_api_function isvcd_api_function
  ------------------
  154|  22.6k|    if(ret != IV_SUCCESS)
  ------------------
  |  Branch (154:8): [True: 13, False: 22.5k]
  ------------------
  155|     13|    {
  156|     13|        return;
  157|     13|    }
  158|  22.5k|    mCodec = (iv_obj_t *) create_op.s_ivd_create_op_t.pv_handle;
  159|  22.5k|    mCodec->pv_fxns = fxns;
  160|  22.5k|    mCodec->u4_size = sizeof(iv_obj_t);
  161|  22.5k|}
_ZN5Codec11deleteCodecEv:
  164|  22.6k|{
  165|  22.6k|    ivd_delete_ip_t delete_ip{};
  166|  22.6k|    ivd_delete_op_t delete_op{};
  167|       |
  168|  22.6k|    delete_ip.e_cmd = IVD_CMD_DELETE;
  169|  22.6k|    delete_ip.u4_size = sizeof(ivd_delete_ip_t);
  170|  22.6k|    delete_op.u4_size = sizeof(ivd_delete_op_t);
  171|       |
  172|  22.6k|    ivd_api_function(mCodec, (void *) &delete_ip, (void *) &delete_op);
  ------------------
  |  |   55|  22.6k|#define ivd_api_function isvcd_api_function
  ------------------
  173|  22.6k|}
_ZN5Codec10resetCodecEv:
  176|  4.14k|{
  177|  4.14k|    ivd_ctl_reset_ip_t s_ctl_ip{};
  178|  4.14k|    ivd_ctl_reset_op_t s_ctl_op{};
  179|       |
  180|  4.14k|    s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
  181|  4.14k|    s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_RESET;
  182|  4.14k|    s_ctl_ip.u4_size = sizeof(ivd_ctl_reset_ip_t);
  183|  4.14k|    s_ctl_op.u4_size = sizeof(ivd_ctl_reset_op_t);
  184|       |
  185|  4.14k|    ivd_api_function(mCodec, (void *) &s_ctl_ip, (void *) &s_ctl_op);
  ------------------
  |  |   55|  4.14k|#define ivd_api_function isvcd_api_function
  ------------------
  186|  4.14k|}
_ZN5Codec8setCoresEv:
  189|  22.6k|{
  190|  22.6k|    ih264d_ctl_set_num_cores_ip_t s_ctl_ip{};
  191|  22.6k|    ih264d_ctl_set_num_cores_op_t s_ctl_op{};
  192|       |
  193|  22.6k|    s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
  194|  22.6k|    s_ctl_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T) IH264D_CMD_CTL_SET_NUM_CORES;
  195|  22.6k|    s_ctl_ip.u4_num_cores = mNumCores;
  196|  22.6k|    s_ctl_ip.u4_size = sizeof(ih264d_ctl_set_num_cores_ip_t);
  197|  22.6k|    s_ctl_op.u4_size = sizeof(ih264d_ctl_set_num_cores_op_t);
  198|       |
  199|  22.6k|    ivd_api_function(mCodec, (void *) &s_ctl_ip, (void *) &s_ctl_op);
  ------------------
  |  |   55|  22.6k|#define ivd_api_function isvcd_api_function
  ------------------
  200|  22.6k|}
_ZN5Codec11setTgtLayerEm:
  203|  22.6k|{
  204|  22.6k|    isvcd_set_target_layer_ip_t s_ctl_set_target_layer_ip{};
  205|  22.6k|    isvcd_set_target_layer_op_t s_ctl_set_target_layer_op{};
  206|       |
  207|  22.6k|    s_ctl_set_target_layer_ip.e_cmd = IVD_CMD_VIDEO_CTL;
  208|  22.6k|    s_ctl_set_target_layer_ip.e_sub_cmd =
  209|  22.6k|        (IVD_CONTROL_API_COMMAND_TYPE_T) ISVCD_CMD_CTL_SET_TGT_LAYER;
  210|  22.6k|    s_ctl_set_target_layer_ip.u1_tgt_priority_id = 63;
  211|  22.6k|    s_ctl_set_target_layer_ip.u1_tgt_temp_id = 7;
  212|  22.6k|    s_ctl_set_target_layer_ip.u1_tgt_quality_id = 0;
  213|  22.6k|    s_ctl_set_target_layer_ip.u1_tgt_dep_id = TgtLayer;
  214|  22.6k|    s_ctl_set_target_layer_ip.u4_size = sizeof(isvcd_set_target_layer_ip_t);
  215|  22.6k|    s_ctl_set_target_layer_op.u4_size = sizeof(isvcd_set_target_layer_op_t);
  216|       |
  217|  22.6k|    ivd_api_function(mCodec, (void *) &s_ctl_set_target_layer_ip,
  ------------------
  |  |   55|  22.6k|#define ivd_api_function isvcd_api_function
  ------------------
  218|  22.6k|                     (void *) &s_ctl_set_target_layer_op);
  219|  22.6k|}
_ZN5Codec9setParamsE23IVD_VIDEO_DECODE_MODE_T:
  222|  45.2k|{
  223|  45.2k|    ivd_ctl_set_config_ip_t s_ctl_ip{};
  224|  45.2k|    ivd_ctl_set_config_op_t s_ctl_op{};
  225|       |
  226|  45.2k|    s_ctl_ip.u4_disp_wd = 0;
  227|  45.2k|    s_ctl_ip.e_frm_skip_mode = IVD_SKIP_NONE;
  228|  45.2k|    s_ctl_ip.e_frm_out_mode = IVD_DISPLAY_FRAME_OUT;
  229|  45.2k|    s_ctl_ip.e_vid_dec_mode = mode;
  230|  45.2k|    s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
  231|  45.2k|    s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_SETPARAMS;
  232|  45.2k|    s_ctl_ip.u4_size = sizeof(ivd_ctl_set_config_ip_t);
  233|  45.2k|    s_ctl_op.u4_size = sizeof(ivd_ctl_set_config_op_t);
  234|       |
  235|  45.2k|    ivd_api_function(mCodec, (void *) &s_ctl_ip, (void *) &s_ctl_op);
  ------------------
  |  |   55|  45.2k|#define ivd_api_function isvcd_api_function
  ------------------
  236|  45.2k|}
_ZN5Codec15setArchitectureE10IVD_ARCH_T:
  239|  22.6k|{
  240|  22.6k|    ih264d_ctl_set_processor_ip_t s_ctl_ip{};
  241|  22.6k|    ih264d_ctl_set_processor_op_t s_ctl_op{};
  242|       |
  243|  22.6k|    s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
  244|  22.6k|    s_ctl_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T) IH264D_CMD_CTL_SET_PROCESSOR;
  245|  22.6k|    s_ctl_ip.u4_arch = arch;
  246|  22.6k|    s_ctl_ip.u4_soc = SOC_GENERIC;
  247|  22.6k|    s_ctl_ip.u4_size = sizeof(ih264d_ctl_set_processor_ip_t);
  248|  22.6k|    s_ctl_op.u4_size = sizeof(ih264d_ctl_set_processor_op_t);
  249|       |
  250|  22.6k|    ivd_api_function(mCodec, (void *) &s_ctl_ip, (void *) &s_ctl_op);
  ------------------
  |  |   55|  22.6k|#define ivd_api_function isvcd_api_function
  ------------------
  251|  22.6k|}
_ZN5Codec9freeFrameEv:
  254|  63.9k|{
  255|   163k|    for(unsigned int i = 0; i < mOutBufHandle.u4_num_bufs; i++)
  ------------------
  |  Branch (255:29): [True: 99.9k, False: 63.9k]
  ------------------
  256|  99.9k|    {
  257|  99.9k|        if(mOutBufHandle.pu1_bufs[i])
  ------------------
  |  Branch (257:12): [True: 99.9k, False: 0]
  ------------------
  258|  99.9k|        {
  259|       |            iv_aligned_free(NULL, mOutBufHandle.pu1_bufs[i]);
  260|  99.9k|            mOutBufHandle.pu1_bufs[i] = nullptr;
  261|  99.9k|        }
  262|  99.9k|    }
  263|  63.9k|}
_ZN5Codec10allocFrameEv:
  266|  41.3k|{
  267|  41.3k|    size_t sizes[4] = {0};
  268|  41.3k|    UWORD32 num_bufs = 0;
  269|       |
  270|  41.3k|    freeFrame();
  271|       |
  272|  41.3k|    memset(&mOutBufHandle, 0, sizeof(mOutBufHandle));
  273|       |
  274|  41.3k|    switch(mColorFormat)
  275|  41.3k|    {
  276|  12.6k|        case IV_YUV_420SP_UV:
  ------------------
  |  Branch (276:9): [True: 12.6k, False: 28.7k]
  ------------------
  277|  12.6k|            [[fallthrough]];
  278|  24.1k|        case IV_YUV_420SP_VU:
  ------------------
  |  Branch (278:9): [True: 11.4k, False: 29.8k]
  ------------------
  279|  24.1k|            sizes[0] = mWidth * mHeight;
  280|  24.1k|            sizes[1] = mWidth * mHeight >> 1;
  281|  24.1k|            num_bufs = 2;
  282|  24.1k|            break;
  283|      6|        case IV_YUV_422ILE:
  ------------------
  |  Branch (283:9): [True: 6, False: 41.3k]
  ------------------
  284|      6|            sizes[0] = mWidth * mHeight * 2;
  285|      6|            num_bufs = 1;
  286|      6|            break;
  287|      3|        case IV_RGB_565:
  ------------------
  |  Branch (287:9): [True: 3, False: 41.3k]
  ------------------
  288|      3|            sizes[0] = mWidth * mHeight * 2;
  289|      3|            num_bufs = 1;
  290|      3|            break;
  291|      4|        case IV_RGBA_8888:
  ------------------
  |  Branch (291:9): [True: 4, False: 41.3k]
  ------------------
  292|      4|            sizes[0] = mWidth * mHeight * 4;
  293|      4|            num_bufs = 1;
  294|      4|            break;
  295|  17.2k|        case IV_YUV_420P:
  ------------------
  |  Branch (295:9): [True: 17.2k, False: 24.1k]
  ------------------
  296|  17.2k|            [[fallthrough]];
  297|  17.2k|        default:
  ------------------
  |  Branch (297:9): [True: 0, False: 41.3k]
  ------------------
  298|  17.2k|            sizes[0] = mWidth * mHeight;
  299|  17.2k|            sizes[1] = mWidth * mHeight >> 2;
  300|  17.2k|            sizes[2] = mWidth * mHeight >> 2;
  301|  17.2k|            num_bufs = 3;
  302|  17.2k|            break;
  303|  41.3k|    }
  304|  41.3k|    mOutBufHandle.u4_num_bufs = num_bufs;
  305|   141k|    for(UWORD32 i = 0; i < num_bufs; i++)
  ------------------
  |  Branch (305:24): [True: 99.9k, False: 41.3k]
  ------------------
  306|  99.9k|    {
  307|  99.9k|        mOutBufHandle.u4_min_out_buf_size[i] = sizes[i];
  308|       |        mOutBufHandle.pu1_bufs[i] = (UWORD8 *) iv_aligned_malloc(NULL, 16, sizes[i]);
  309|  99.9k|    }
  310|  41.3k|}
_ZN5Codec12decodeHeaderEPKhm:
  313|  22.6k|{
  314|  22.6k|    setParams(IVD_DECODE_HEADER);
  315|  22.6k|    size_t numDecodeCalls = 0;
  316|   528k|    while(size > 0 && numDecodeCalls < kMaxNumDecodeCalls)
  ------------------
  |  Branch (316:11): [True: 508k, False: 20.5k]
  |  Branch (316:23): [True: 506k, False: 2.01k]
  ------------------
  317|   506k|    {
  318|   506k|        IV_API_CALL_STATUS_T ret;
  319|   506k|        isvcd_video_decode_ip_t s_video_decode_ip;
  320|   506k|        isvcd_video_decode_op_t s_video_decode_op;
  321|   506k|        size_t bytes_consumed;
  322|   506k|        memset(&s_video_decode_ip, 0, sizeof(s_video_decode_ip));
  323|   506k|        memset(&s_video_decode_op, 0, sizeof(s_video_decode_op));
  324|       |
  325|   506k|        s_video_decode_ip.s_ivd_video_decode_ip_t.e_cmd = IVD_CMD_VIDEO_DECODE;
  326|   506k|        s_video_decode_ip.s_ivd_video_decode_ip_t.u4_ts = 0;
  327|   506k|        s_video_decode_ip.s_ivd_video_decode_ip_t.pv_stream_buffer = (void *) data;
  328|   506k|        s_video_decode_ip.s_ivd_video_decode_ip_t.u4_num_Bytes = size;
  329|   506k|        s_video_decode_ip.s_ivd_video_decode_ip_t.u4_size = sizeof(s_video_decode_ip);
  330|   506k|        s_video_decode_op.s_ivd_video_decode_op_t.u4_size = sizeof(s_video_decode_op);
  331|       |
  332|   506k|        ret = ivd_api_function(mCodec, (void *) &s_video_decode_ip, (void *) &s_video_decode_op);
  ------------------
  |  |   55|   506k|#define ivd_api_function isvcd_api_function
  ------------------
  333|   506k|        (void(ret));
  334|   506k|        bytes_consumed = s_video_decode_op.s_ivd_video_decode_op_t.u4_num_bytes_consumed;
  335|       |        /* If no bytes are consumed, then consume 4 bytes to ensure fuzzer proceeds
  336|       |         * to feed next data */
  337|   506k|        if(!bytes_consumed) bytes_consumed = 4;
  ------------------
  |  Branch (337:12): [True: 457k, False: 48.1k]
  ------------------
  338|       |
  339|   506k|        bytes_consumed = std::min(size, bytes_consumed);
  340|       |
  341|   506k|        data += bytes_consumed;
  342|   506k|        size -= bytes_consumed;
  343|   506k|        numDecodeCalls++;
  344|       |
  345|   506k|        mWidth = std::min(s_video_decode_op.s_ivd_video_decode_op_t.u4_pic_wd, (UWORD32) 10240);
  346|   506k|        mHeight = std::min(s_video_decode_op.s_ivd_video_decode_op_t.u4_pic_ht, (UWORD32) 10240);
  347|       |
  348|       |        /* Break after successful header decode */
  349|   506k|        if(mWidth && mHeight)
  ------------------
  |  Branch (349:12): [True: 0, False: 506k]
  |  Branch (349:22): [True: 0, False: 0]
  ------------------
  350|      0|        {
  351|      0|            break;
  352|      0|        }
  353|   506k|    }
  354|       |    /* if width / height are invalid, set them to defaults */
  355|  22.6k|    if(!mWidth) mWidth = 1920;
  ------------------
  |  Branch (355:8): [True: 22.6k, False: 0]
  ------------------
  356|  22.6k|    if(!mHeight) mHeight = 1088;
  ------------------
  |  Branch (356:8): [True: 22.6k, False: 0]
  ------------------
  357|  22.6k|}
_ZN5Codec11decodeFrameEPKhmPm:
  360|   213k|{
  361|   213k|    IV_API_CALL_STATUS_T ret;
  362|   213k|    isvcd_video_decode_ip_t s_video_decode_ip{};
  363|   213k|    isvcd_video_decode_op_t s_video_decode_op{};
  364|       |
  365|   213k|    s_video_decode_ip.s_ivd_video_decode_ip_t.e_cmd = IVD_CMD_VIDEO_DECODE;
  366|   213k|    s_video_decode_ip.s_ivd_video_decode_ip_t.u4_ts = 0;
  367|   213k|    s_video_decode_ip.s_ivd_video_decode_ip_t.pv_stream_buffer = (void *) data;
  368|   213k|    s_video_decode_ip.s_ivd_video_decode_ip_t.u4_num_Bytes = size;
  369|   213k|    s_video_decode_ip.s_ivd_video_decode_ip_t.u4_size = sizeof(s_video_decode_ip);
  370|   213k|    s_video_decode_ip.s_ivd_video_decode_ip_t.s_out_buffer = mOutBufHandle;
  371|       |
  372|   213k|    s_video_decode_op.s_ivd_video_decode_op_t.u4_size = sizeof(s_video_decode_op);
  373|   213k|    s_video_decode_op.s_ivd_video_decode_op_t.u4_num_bytes_consumed = 0;
  374|   213k|    s_video_decode_op.s_ivd_video_decode_op_t.u4_pic_wd = 0;
  375|   213k|    s_video_decode_op.s_ivd_video_decode_op_t.u4_pic_ht = 0;
  376|       |
  377|   213k|    ret = ivd_api_function(mCodec, (void *) &s_video_decode_ip, (void *) &s_video_decode_op);
  ------------------
  |  |   55|   213k|#define ivd_api_function isvcd_api_function
  ------------------
  378|       |
  379|       |    /* In case of change in resolution, reset codec and feed the same data again
  380|       |     */
  381|   213k|    if(IVD_RES_CHANGED == (s_video_decode_op.s_ivd_video_decode_op_t.u4_error_code & 0xFF))
  ------------------
  |  Branch (381:8): [True: 4.14k, False: 209k]
  ------------------
  382|  4.14k|    {
  383|  4.14k|        resetCodec();
  384|  4.14k|        ret = ivd_api_function(mCodec, (void *) &s_video_decode_ip, (void *) &s_video_decode_op);
  ------------------
  |  |   55|  4.14k|#define ivd_api_function isvcd_api_function
  ------------------
  385|  4.14k|    }
  386|   213k|    *bytesConsumed = s_video_decode_op.s_ivd_video_decode_op_t.u4_num_bytes_consumed;
  387|       |
  388|       |    /* If no bytes are consumed, then consume 4 bytes to ensure fuzzer proceeds
  389|       |     * to feed next data */
  390|   213k|    if(!*bytesConsumed)
  ------------------
  |  Branch (390:8): [True: 46.8k, False: 166k]
  ------------------
  391|  46.8k|    {
  392|  46.8k|        *bytesConsumed = 4;
  393|  46.8k|    }
  394|   213k|    if(s_video_decode_op.s_ivd_video_decode_op_t.u4_pic_wd &&
  ------------------
  |  Branch (394:8): [True: 97.1k, False: 116k]
  ------------------
  395|  97.1k|       s_video_decode_op.s_ivd_video_decode_op_t.u4_pic_ht &&
  ------------------
  |  Branch (395:8): [True: 97.1k, False: 0]
  ------------------
  396|  97.1k|       (mWidth != s_video_decode_op.s_ivd_video_decode_op_t.u4_pic_wd ||
  ------------------
  |  Branch (396:9): [True: 18.5k, False: 78.5k]
  ------------------
  397|  78.5k|        mHeight != s_video_decode_op.s_ivd_video_decode_op_t.u4_pic_ht))
  ------------------
  |  Branch (397:9): [True: 190, False: 78.3k]
  ------------------
  398|  18.7k|    {
  399|  18.7k|        mWidth = std::min(s_video_decode_op.s_ivd_video_decode_op_t.u4_pic_wd, (UWORD32) 10240);
  400|  18.7k|        mHeight = std::min(s_video_decode_op.s_ivd_video_decode_op_t.u4_pic_ht, (UWORD32) 10240);
  401|  18.7k|        allocFrame();
  402|  18.7k|    }
  403|       |
  404|   213k|    return ret;
  405|   213k|}
LLVMFuzzerTestOneInput:
  408|  22.6k|{
  409|  22.6k|    if(size < 1)
  ------------------
  |  Branch (409:8): [True: 0, False: 22.6k]
  ------------------
  410|      0|    {
  411|      0|        return 0;
  412|      0|    }
  413|  22.6k|    size_t colorFormatOfst = std::min((size_t) OFFSET_COLOR_FORMAT, size - 1);
  414|  22.6k|    size_t numCoresOfst = std::min((size_t) OFFSET_NUM_CORES, size - 1);
  415|  22.6k|    size_t architectureOfst = std::min((size_t) OFFSET_ARCH, size - 1);
  416|  22.6k|    size_t architectureIdx = data[architectureOfst] % kSupportedArchitectures;
  417|  22.6k|    IVD_ARCH_T arch = (IVD_ARCH_T) supportedArchitectures[architectureIdx];
  418|  22.6k|    size_t colorFormatIdx = data[colorFormatOfst] % kSupportedColorFormats;
  419|  22.6k|    IV_COLOR_FORMAT_T colorFormat = (IV_COLOR_FORMAT_T) (supportedColorFormats[colorFormatIdx]);
  420|  22.6k|    uint32_t numCores = (data[numCoresOfst] % kMaxCores) + 1;
  421|       |
  422|  22.6k|    size_t numTgtLayerOfst = std::min((size_t) OFFSET_TGT_LAYER, size - 1);
  423|  22.6k|    uint32_t tgtLayer = (data[numTgtLayerOfst] % kMaxTgtLayer);
  424|       |
  425|  22.6k|    size_t numDecodeCalls = 0;
  426|  22.6k|    Codec *codec = new Codec(colorFormat, numCores);
  427|  22.6k|    codec->createCodec();
  428|  22.6k|    codec->setArchitecture(arch);
  429|  22.6k|    codec->setCores();
  430|  22.6k|    codec->setTgtLayer(tgtLayer);
  431|  22.6k|    codec->decodeHeader(data, size);
  432|  22.6k|    codec->setParams(IVD_DECODE_FRAME);
  433|  22.6k|    codec->allocFrame();
  434|       |
  435|   236k|    while(size > 0 && numDecodeCalls < kMaxNumDecodeCalls)
  ------------------
  |  Branch (435:11): [True: 213k, False: 22.4k]
  |  Branch (435:23): [True: 213k, False: 155]
  ------------------
  436|   213k|    {
  437|   213k|        IV_API_CALL_STATUS_T ret;
  438|   213k|        size_t bytesConsumed;
  439|   213k|        ret = codec->decodeFrame(data, size, &bytesConsumed);
  440|   213k|        (void(ret));
  441|   213k|        bytesConsumed = std::min(size, bytesConsumed);
  442|   213k|        data += bytesConsumed;
  443|   213k|        size -= bytesConsumed;
  444|   213k|        numDecodeCalls++;
  445|   213k|    }
  446|       |
  447|  22.6k|    codec->freeFrame();
  448|  22.6k|    codec->deleteCodec();
  449|  22.6k|    delete codec;
  450|  22.6k|    return 0;
  451|  22.6k|}

