/src/libavc/encoder/svc/isvce_downscaler.c

Source (jump to first uncovered line)
/******************************************************************************
 *
 * Copyright (C) 2022 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 *****************************************************************************
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 */

/**
*******************************************************************************
* @file
*  isvce_downscaler.c
*
* @brief
*  Contains downscaler functions required by the SVC encoder
*
* @author
*  ittiam
*
* @par List of Functions:
*  - isvce_get_downscaler_data_size()
*  - isvce_get_downscaler_padding_dims()
*  - isvce_get_downscaler_normalized_filtered_pixel()
*  - isvce_horizontal_downscale_and_transpose()
*  - isvce_process_downscaler()
*  - isvce_initialize_downscaler()
*
* @remarks
*  None
*
*******************************************************************************
*/

/*****************************************************************************/
/* File Includes                                                             */
/*****************************************************************************/

/* system include files */
#include <stdio.h>
#include <stdlib.h>

#include "ih264_typedefs.h"
#include "ih264_macros.h"
#include "isvc_macros.h"
#include "ih264_platform_macros.h"
#include "iv2.h"
#include "isvc_defs.h"
#include "isvce_defs.h"
#include "isvc_structs.h"
#include "isvc_structs.h"
#include "isvce_downscaler.h"
#include "isvce_downscaler_private_defs.h"

/**
******************************************************************************
* @brief  lanczos filter coefficients for 2x downscaling
* @remarks Though the length of the filter is 8, the
* same coefficients
* are replicated so that 2 rows can be processed at one
* go in SIMD
******************************************************************************
*/
static WORD8 gai1_lanczos_coefficients_2x[NUM_SCALER_FILTER_PHASES][NUM_SCALER_FILTER_TAPS * 2] = {
    {-7, 0, 39, 64, 39, 0, -7, 0, -7, 0, 39, 64, 39, 0, -7, 0},
    {-6, 0, 33, 62, 41, 4, -6, 0, -6, 0, 33, 62, 41, 4, -6, 0},
    {-5, -1, 29, 57, 45, 9, -5, -1, -5, -1, 29, 57, 45, 9, -5, -1},
    {-4, -2, 23, 55, 48, 14, -4, -2, -4, -2, 23, 55, 48, 14, -4, -2},
    {-3, -3, 18, 52, 52, 18, -3, -3, -3, -3, 18, 52, 52, 18, -3, -3},
    {-2, -4, 13, 49, 54, 24, -2, -4, -2, -4, 13, 49, 54, 24, -2, -4},
    {-1, -5, 9, 44, 58, 29, -1, -5, -1, -5, 9, 44, 58, 29, -1, -5},
    {0, -6, 3, 42, 61, 34, 0, -6, 0, -6, 3, 42, 61, 34, 0, -6}};

/**
******************************************************************************
* @brief  lanczos filter coefficients for 1.5x downscaling
* @remarks Though the length of the filter is 8, the same coefficients
* are replicated so that 2 rows can be processed at one go in SIMD.
******************************************************************************
*/
static WORD8 gai1_lanczos_coefficients_3by2x[NUM_SCALER_FILTER_PHASES][NUM_SCALER_FILTER_TAPS * 2] =
    {{0, -11, 32, 86, 32, -11, 0, 0, 0, -11, 32, 86, 32, -11, 0, 0},
     {0, -10, 26, 79, 39, -5, 0, 0, 0, -10, 26, 79, 39, -5, 0, 0},
     {0, -8, 21, 72, 46, 0, -2, 0, 0, -8, 21, 72, 46, 0, -2, 0},
     {0, -6, 15, 66, 52, 3, -3, 0, 0, -6, 15, 66, 52, 3, -3, 0},
     {0, -6, 10, 60, 60, 10, -6, 0, 0, -6, 10, 60, 60, 10, -6, 0},
     {0, -3, 3, 52, 66, 15, -6, 0, 0, -3, 3, 52, 66, 15, -6, 0},
     {0, -2, 0, 46, 72, 21, -8, 0, 0, -2, 0, 46, 72, 21, -8, 0},
     {0, 0, -5, 39, 79, 26, -10, 0, 0, 0, -5, 39, 79, 26, -10, 0}};

/**
*******************************************************************************
*
* @brief
*   gets the memory size required for downscaler
*
* @par Description:
*   returns the memory required by the downscaler context and state structs
*   for allocation.
*
* @returns
*
* @remarks
*
*
*******************************************************************************
*/

UWORD32 isvce_get_downscaler_data_size(UWORD8 u1_num_spatial_layers, DOUBLE d_scaling_factor,
                                       UWORD32 u4_width, UWORD32 u4_height)
{
    UWORD32 u4_size = 0;

    if(u1_num_spatial_layers > 1)
    {
        u4_size += sizeof(downscaler_state_t);

        u4_size +=
            (u4_height + NUM_SCALER_FILTER_TAPS * 2) * ((UWORD32) (u4_width / d_scaling_factor));
    }

    return u4_size;
}

/**
*******************************************************************************
*
* @brief
*   gets the padding size required for filtering
*
* @par Description:
*   gets the padding size required for filtering
*
* @returns
*
* @remarks
*
*
*******************************************************************************
*/

void isvce_get_downscaler_padding_dims(padding_dims_t *ps_pad_dims)
{
    ps_pad_dims->u1_left_pad_size = ALIGN8(NUM_SCALER_FILTER_TAPS / 2);
    ps_pad_dims->u1_right_pad_size = ALIGN8(NUM_SCALER_FILTER_TAPS / 2);
    ps_pad_dims->u1_top_pad_size = NUM_SCALER_FILTER_TAPS / 2;
    ps_pad_dims->u1_bottom_pad_size = NUM_SCALER_FILTER_TAPS / 2;
}

/**
*******************************************************************************
*
* @brief
*   processes downscaler
*
* @par Description:
*   calls the function for padding and scaling
*
* @param[in] ps_scaler
*  pointer to downdownscaler context
*
* @param[in] ps_src_buf_props
*  pointer to source buffer props struct
*
* @param[in] u4_blk_wd
*  width of the block to be processed
*
* @param[in] u4_blk_ht
*  height of the block to be processed
*
* @returns
*
* @remarks
*
*
*******************************************************************************
*/

void isvce_process_downscaler(downscaler_ctxt_t *ps_scaler, yuv_buf_props_t *ps_src_buf_props,
                              yuv_buf_props_t *ps_dst_buf_props, UWORD32 u4_blk_wd,
                              UWORD32 u4_blk_ht)
{
    buffer_container_t s_src_buf;
    buffer_container_t s_dst_buf;

    UWORD32 u4_scaled_block_size_x, u4_scaled_block_size_y;

    downscaler_state_t *ps_scaler_state = (downscaler_state_t *) ps_scaler->pv_scaler_state;

    ASSERT(ps_src_buf_props->e_color_format == IV_YUV_420SP_UV);

    u4_scaled_block_size_x = (UWORD32) (u4_blk_wd / ps_scaler->d_scaling_factor);
    u4_scaled_block_size_y = (UWORD32) (u4_blk_ht / ps_scaler->d_scaling_factor);

    /* luma */
    s_src_buf = ps_src_buf_props->as_component_bufs[Y];
    s_src_buf.pv_data = ((UWORD8 *) s_src_buf.pv_data) - (NUM_SCALER_FILTER_TAPS / 2) -
                        (NUM_SCALER_FILTER_TAPS / 2) * s_src_buf.i4_data_stride;

    s_dst_buf.pv_data = ps_scaler_state->pv_scratch_buf;
    s_dst_buf.i4_data_stride = u4_blk_ht + NUM_SCALER_FILTER_TAPS;

    ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters,
                                   u4_scaled_block_size_x, u4_blk_ht + NUM_SCALER_FILTER_TAPS, 0);

    s_src_buf = s_dst_buf;
    s_dst_buf = ps_dst_buf_props->as_component_bufs[Y];

    ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters,
                                   u4_scaled_block_size_y, u4_scaled_block_size_x, 0);

    /* chroma */
    u4_blk_ht /= 2;
    u4_scaled_block_size_y /= 2;

    s_src_buf = ps_src_buf_props->as_component_bufs[U];
    s_src_buf.pv_data = ((UWORD8 *) s_src_buf.pv_data) - NUM_SCALER_FILTER_TAPS -
                        (NUM_SCALER_FILTER_TAPS / 2) * s_src_buf.i4_data_stride;

    s_dst_buf.pv_data = ps_scaler_state->pv_scratch_buf;
    s_dst_buf.i4_data_stride = u4_blk_ht + NUM_SCALER_FILTER_TAPS;

    ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters,
                                   u4_scaled_block_size_x, u4_blk_ht + NUM_SCALER_FILTER_TAPS, 1);

    s_src_buf = s_dst_buf;
    s_dst_buf = ps_dst_buf_props->as_component_bufs[U];

    ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters,
                                   u4_scaled_block_size_y, u4_scaled_block_size_x, 0);
}

/**
*******************************************************************************
*
* @brief
*   normalized dot product computer for downscaler
*
* @par Description:
*   Given the downscaler filter coefficients, source buffer, the function
*   calculates the dot product between them, adds an offset and normalizes it
*
* @param[in] ps_scaler
*  pointer to src buf
*
* @param[in] pi1_filter
*  pointer to filter coefficients
*
* @returns
*
* @remarks
*
*******************************************************************************
*/

static UWORD8 isvce_get_downscaler_normalized_filtered_pixel(UWORD8 *pu1_src, WORD8 *pi1_filter)
{
    WORD32 i;
    WORD32 i4_norm_dot_product;
    UWORD8 u1_out_pixel;
    WORD32 i4_dot_product_sum = 0;
    WORD32 i4_rounding_offset = 1 << (FILTER_COEFF_Q - 1);
    WORD32 i4_normalizing_factor = 1 << FILTER_COEFF_Q;

    for(i = 0; i < NUM_SCALER_FILTER_TAPS; i++)
    {
        i4_dot_product_sum += (pu1_src[i] * pi1_filter[i]);
    }

    i4_norm_dot_product = ((i4_dot_product_sum + i4_rounding_offset) / i4_normalizing_factor);
    u1_out_pixel = (UWORD8) CLIP_U8(i4_norm_dot_product);

    return u1_out_pixel;
}

/**
*******************************************************************************
*
* @brief
*   horizontal scaler function
*
* @par Description:
*   Does horizontal scaling for the given block
*
* @param[in] ps_scaler
*  pointer to downscaler context
*
* @param[in] ps_src
*  pointer to source buffer container
*
* @param[in] ps_dst
*  pointer to destination buffer container
*
* @param[in] pai1_filters
*  pointer to array of downscaler filters
*
* @param[in] u4_blk_wd
*  width of the block after horizontal scaling (output block width)
*
* @param[in] u4_blk_ht
*  height of the current block (input block height)
*
* @param[in] u1_is_chroma
*  flag suggesting whether the buffer is luma or chroma
*
*
* @returns
*
* @remarks
*  The same function is used for vertical scaling too as
*  the horizontally scaled input in stored in transpose fashion.
*
*******************************************************************************
*/

static void isvce_horizontal_downscale_and_transpose(
    downscaler_ctxt_t *ps_scaler, buffer_container_t *ps_src, buffer_container_t *ps_dst,
    FILTER_COEFF_ARRAY pai1_filters, UWORD32 u4_blk_wd, UWORD32 u4_blk_ht, UWORD8 u1_is_chroma)
{
    WORD32 i, j, k;
    UWORD8 u1_phase;
    UWORD8 u1_filtered_out_pixel;
    UWORD8 *pu1_src_j, *pu1_dst_j;
    UWORD8 u1_filtered_out_u_pixel, u1_filtered_out_v_pixel;
    UWORD8 *pu1_in_pixel;
    UWORD8 *pu1_out_pixel;
    WORD8 *pi1_filter_grid;
    UWORD16 u2_full_pixel_inc;
    UWORD8 au1_temp_u_buff[NUM_SCALER_FILTER_TAPS];
    UWORD8 au1_temp_v_buff[NUM_SCALER_FILTER_TAPS];

    downscaler_state_t *ps_scaler_state = (downscaler_state_t *) ps_scaler->pv_scaler_state;

    UWORD32 u4_center_pixel_pos = ps_scaler_state->i4_init_offset;
    UWORD32 u4_src_horz_increments = ps_scaler_state->u4_horz_increment;
    UWORD8 *pu1_src = ps_src->pv_data;
    UWORD32 u4_in_stride = ps_src->i4_data_stride;
    UWORD8 *pu1_dst = ps_dst->pv_data;
    UWORD32 u4_out_stride = ps_dst->i4_data_stride;
    UWORD32 u4_center_pixel_pos_src = u4_center_pixel_pos;

    /* Offset the input so that the input pixel to be processed
    co-incides with the centre of filter (4th coefficient)*/
    pu1_src += (1 + u1_is_chroma);

    ASSERT((1 << DOWNSCALER_Q) == ps_scaler_state->u4_vert_increment);

    if(!u1_is_chroma)
    {
        for(j = 0; j < (WORD32) u4_blk_ht; j++)
        {
            pu1_src_j = pu1_src + (j * u4_in_stride);
            pu1_dst_j = pu1_dst + j;

            u4_center_pixel_pos = u4_center_pixel_pos_src;

            for(i = 0; i < (WORD32) u4_blk_wd; i++)
            {
                u1_phase = get_filter_phase(u4_center_pixel_pos);
                pi1_filter_grid = pai1_filters[u1_phase];

                /* Doing the Calculation for current Loop Count  */
                u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
                pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
                pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);

                u1_filtered_out_pixel =
                    isvce_get_downscaler_normalized_filtered_pixel(pu1_in_pixel, pi1_filter_grid);
                *pu1_out_pixel = u1_filtered_out_pixel;

                /* Update the context for next Loop Count */
                u4_center_pixel_pos += u4_src_horz_increments;
            }
        }
    }
    else
    {
        for(j = 0; j < (WORD32) u4_blk_ht; j++)
        {
            pu1_src_j = pu1_src + (j * u4_in_stride);
            pu1_dst_j = pu1_dst + j;

            u4_center_pixel_pos = u4_center_pixel_pos_src;

            for(i = 0; i < (WORD32) u4_blk_wd; i++)
            {
                u1_phase = get_filter_phase(u4_center_pixel_pos);
                pi1_filter_grid = pai1_filters[u1_phase];

                /*Doing the Calculation for current Loop Count  */
                u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
                pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
                pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);

                for(k = 0; k < NUM_SCALER_FILTER_TAPS; k++)
                {
                    au1_temp_u_buff[k] = *(pu1_in_pixel + (2 * k));
                    au1_temp_v_buff[k] = *(pu1_in_pixel + ((2 * k) + 1));
                }

                u1_filtered_out_u_pixel = isvce_get_downscaler_normalized_filtered_pixel(
                    au1_temp_u_buff, pi1_filter_grid);
                u1_filtered_out_v_pixel = isvce_get_downscaler_normalized_filtered_pixel(
                    au1_temp_v_buff, pi1_filter_grid);
                *pu1_out_pixel = u1_filtered_out_u_pixel;
                *(pu1_out_pixel + u4_out_stride) = u1_filtered_out_v_pixel;

                /* Update the context for next Loop Count */
                u4_center_pixel_pos += u4_src_horz_increments;
            }
        }
    }
}

void isvce_downscaler_function_selector(downscaler_state_t *ps_scaler_state, IV_ARCH_T e_arch)
{
    switch(e_arch)
    {
#if defined(X86)
        case ARCH_X86_SSE42:
        {
            ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose_sse42;

            break;
        }
#elif defined(ARMV8)
        case ARCH_ARM_A53:
        case ARCH_ARM_A57:
        case ARCH_ARM_V8_NEON:
        {
            ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose_neon;

            break;
        }
#elif defined(ARM) && !defined(DISABLE_NEON)
        case ARCH_ARM_A9Q:
        case ARCH_ARM_A9A:
        case ARCH_ARM_A9:
        case ARCH_ARM_A7:
        case ARCH_ARM_A5:
        case ARCH_ARM_A15:
        {
            ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose_neon;

            break;
        }
#endif
        default:
        {
            ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose;

            break;
        }
    }
}

/**
*******************************************************************************
*
* @brief
*   initializes the downscaler context
*
* @par Description:
*   initializes the downscaler context for the given scaling factor
*   with padding size, filter size, etc.
*
* @param[in] ps_scaler
*   pointer downscaler context
*
* @param[in] ps_mem_rec
*   pointer to memory allocated to downscaler process
*
* @param[in] d_scaling_factor
*   scaling reatio of width/ height between two consecutive SVC layers
*
* @param[in] u1_num_spatial_layers
*   scaling reatio of width/ height between two consecutive SVC layers
*
* @param[in] u4_wd
*   width of the input
*
* @param[in] u4_ht
*   height of the input
*
* @param[in] e_arch
*   architecure type
*
* @returns
*
* @remarks
*  when ARM intrinsics are added, update should be done here
*
*******************************************************************************
*/

void isvce_initialize_downscaler(downscaler_ctxt_t *ps_scaler, iv_mem_rec_t *ps_mem_rec,
                                 DOUBLE d_scaling_factor, UWORD8 u1_num_spatial_layers,
                                 UWORD32 u4_in_width, UWORD32 u4_in_height, IV_ARCH_T e_arch)
{
    if(u1_num_spatial_layers > 1)
    {
        downscaler_state_t *ps_scaler_state;

        UWORD8 *pu1_buf = (UWORD8 *) ps_mem_rec->pv_base;

        ps_scaler_state = (downscaler_state_t *) pu1_buf;
        pu1_buf += sizeof(ps_scaler_state[0]);

        ps_scaler_state->pv_scratch_buf = pu1_buf;
        ps_scaler_state->u4_in_wd = u4_in_width;
        ps_scaler_state->u4_in_ht = u4_in_height;

        ps_scaler->pv_scaler_state = ps_scaler_state;
        ps_scaler->d_scaling_factor = d_scaling_factor;
        ps_scaler->u1_num_spatial_layers = u1_num_spatial_layers;

        isvce_downscaler_function_selector(ps_scaler_state, e_arch);

        ps_scaler_state->u4_horz_increment = (UWORD32) (d_scaling_factor * (1 << DOWNSCALER_Q));

        ps_scaler_state->u4_vert_increment = (1 << DOWNSCALER_Q);
        ps_scaler_state->i4_init_offset = 0;
        ps_scaler_state->pai1_filters = (d_scaling_factor == 2.0) ? gai1_lanczos_coefficients_2x
                                                                  : gai1_lanczos_coefficients_3by2x;
    }
}

Coverage Report

Created: 2025-07-09 06:41

Line	Count	Source (jump to first uncovered line)
1		/******************************************************************************
2		*
3		* Copyright (C) 2022 The Android Open Source Project
4		*
5		* Licensed under the Apache License, Version 2.0 (the "License");
6		* you may not use this file except in compliance with the License.
7		* You may obtain a copy of the License at:
8		*
9		* http://www.apache.org/licenses/LICENSE-2.0
10		*
11		* Unless required by applicable law or agreed to in writing, software
12		* distributed under the License is distributed on an "AS IS" BASIS,
13		* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14		* See the License for the specific language governing permissions and
15		* limitations under the License.
16		*
17		*****************************************************************************
18		* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19		*/
20
21		/**
22		*******************************************************************************
23		* @file
24		* isvce_downscaler.c
25		*
26		* @brief
27		* Contains downscaler functions required by the SVC encoder
28		*
29		* @author
30		* ittiam
31		*
32		* @par List of Functions:
33		* - isvce_get_downscaler_data_size()
34		* - isvce_get_downscaler_padding_dims()
35		* - isvce_get_downscaler_normalized_filtered_pixel()
36		* - isvce_horizontal_downscale_and_transpose()
37		* - isvce_process_downscaler()
38		* - isvce_initialize_downscaler()
39		*
40		* @remarks
41		* None
42		*
43		*******************************************************************************
44		*/
45
46		/*****************************************************************************/
47		/* File Includes */
48		/*****************************************************************************/
49
50		/* system include files */
51		#include <stdio.h>
52		#include <stdlib.h>
53
54		#include "ih264_typedefs.h"
55		#include "ih264_macros.h"
56		#include "isvc_macros.h"
57		#include "ih264_platform_macros.h"
58		#include "iv2.h"
59		#include "isvc_defs.h"
60		#include "isvce_defs.h"
61		#include "isvc_structs.h"
62		#include "isvc_structs.h"
63		#include "isvce_downscaler.h"
64		#include "isvce_downscaler_private_defs.h"
65
66		/**
67		******************************************************************************
68		* @brief lanczos filter coefficients for 2x downscaling
69		* @remarks Though the length of the filter is 8, the
70		* same coefficients
71		* are replicated so that 2 rows can be processed at one
72		* go in SIMD
73		******************************************************************************
74		*/
75		static WORD8 gai1_lanczos_coefficients_2x[NUM_SCALER_FILTER_PHASES][NUM_SCALER_FILTER_TAPS * 2] = {
76		{-7, 0, 39, 64, 39, 0, -7, 0, -7, 0, 39, 64, 39, 0, -7, 0},
77		{-6, 0, 33, 62, 41, 4, -6, 0, -6, 0, 33, 62, 41, 4, -6, 0},
78		{-5, -1, 29, 57, 45, 9, -5, -1, -5, -1, 29, 57, 45, 9, -5, -1},
79		{-4, -2, 23, 55, 48, 14, -4, -2, -4, -2, 23, 55, 48, 14, -4, -2},
80		{-3, -3, 18, 52, 52, 18, -3, -3, -3, -3, 18, 52, 52, 18, -3, -3},
81		{-2, -4, 13, 49, 54, 24, -2, -4, -2, -4, 13, 49, 54, 24, -2, -4},
82		{-1, -5, 9, 44, 58, 29, -1, -5, -1, -5, 9, 44, 58, 29, -1, -5},
83		{0, -6, 3, 42, 61, 34, 0, -6, 0, -6, 3, 42, 61, 34, 0, -6}};
84
85		/**
86		******************************************************************************
87		* @brief lanczos filter coefficients for 1.5x downscaling
88		* @remarks Though the length of the filter is 8, the same coefficients
89		* are replicated so that 2 rows can be processed at one go in SIMD.
90		******************************************************************************
91		*/
92		static WORD8 gai1_lanczos_coefficients_3by2x[NUM_SCALER_FILTER_PHASES][NUM_SCALER_FILTER_TAPS * 2] =
93		{{0, -11, 32, 86, 32, -11, 0, 0, 0, -11, 32, 86, 32, -11, 0, 0},
94		{0, -10, 26, 79, 39, -5, 0, 0, 0, -10, 26, 79, 39, -5, 0, 0},
95		{0, -8, 21, 72, 46, 0, -2, 0, 0, -8, 21, 72, 46, 0, -2, 0},
96		{0, -6, 15, 66, 52, 3, -3, 0, 0, -6, 15, 66, 52, 3, -3, 0},
97		{0, -6, 10, 60, 60, 10, -6, 0, 0, -6, 10, 60, 60, 10, -6, 0},
98		{0, -3, 3, 52, 66, 15, -6, 0, 0, -3, 3, 52, 66, 15, -6, 0},
99		{0, -2, 0, 46, 72, 21, -8, 0, 0, -2, 0, 46, 72, 21, -8, 0},
100		{0, 0, -5, 39, 79, 26, -10, 0, 0, 0, -5, 39, 79, 26, -10, 0}};
101
102		/**
103		*******************************************************************************
104		*
105		* @brief
106		* gets the memory size required for downscaler
107		*
108		* @par Description:
109		* returns the memory required by the downscaler context and state structs
110		* for allocation.
111		*
112		* @returns
113		*
114		* @remarks
115		*
116		*
117		*******************************************************************************
118		*/
119
120		UWORD32 isvce_get_downscaler_data_size(UWORD8 u1_num_spatial_layers, DOUBLE d_scaling_factor,
121		UWORD32 u4_width, UWORD32 u4_height)
122	9.87k	{
123	9.87k	UWORD32 u4_size = 0;
124
125	9.87k	if(u1_num_spatial_layers > 1)
126	8.08k	{
127	8.08k	u4_size += sizeof(downscaler_state_t);
128
129	8.08k	u4_size +=
130	8.08k	(u4_height + NUM_SCALER_FILTER_TAPS * 2) * ((UWORD32) (u4_width / d_scaling_factor));
131	8.08k	}
132
133	9.87k	return u4_size;
134	9.87k	}
135
136		/**
137		*******************************************************************************
138		*
139		* @brief
140		* gets the padding size required for filtering
141		*
142		* @par Description:
143		* gets the padding size required for filtering
144		*
145		* @returns
146		*
147		* @remarks
148		*
149		*
150		*******************************************************************************
151		*/
152
153		void isvce_get_downscaler_padding_dims(padding_dims_t *ps_pad_dims)
154	115k	{
155	115k	ps_pad_dims->u1_left_pad_size = ALIGN8(NUM_SCALER_FILTER_TAPS / 2);
156	115k	ps_pad_dims->u1_right_pad_size = ALIGN8(NUM_SCALER_FILTER_TAPS / 2);
157	115k	ps_pad_dims->u1_top_pad_size = NUM_SCALER_FILTER_TAPS / 2;
158	115k	ps_pad_dims->u1_bottom_pad_size = NUM_SCALER_FILTER_TAPS / 2;
159	115k	}
160
161		/**
162		*******************************************************************************
163		*
164		* @brief
165		* processes downscaler
166		*
167		* @par Description:
168		* calls the function for padding and scaling
169		*
170		* @param[in] ps_scaler
171		* pointer to downdownscaler context
172		*
173		* @param[in] ps_src_buf_props
174		* pointer to source buffer props struct
175		*
176		* @param[in] u4_blk_wd
177		* width of the block to be processed
178		*
179		* @param[in] u4_blk_ht
180		* height of the block to be processed
181		*
182		* @returns
183		*
184		* @remarks
185		*
186		*
187		*******************************************************************************
188		*/
189
190		void isvce_process_downscaler(downscaler_ctxt_t ps_scaler, yuv_buf_props_t ps_src_buf_props,
191		yuv_buf_props_t *ps_dst_buf_props, UWORD32 u4_blk_wd,
192		UWORD32 u4_blk_ht)
193	507k	{
194	507k	buffer_container_t s_src_buf;
195	507k	buffer_container_t s_dst_buf;
196
197	507k	UWORD32 u4_scaled_block_size_x, u4_scaled_block_size_y;
198
199	507k	downscaler_state_t ps_scaler_state = (downscaler_state_t ) ps_scaler->pv_scaler_state;
200
201	507k	ASSERT(ps_src_buf_props->e_color_format == IV_YUV_420SP_UV);
202
203	507k	u4_scaled_block_size_x = (UWORD32) (u4_blk_wd / ps_scaler->d_scaling_factor);
204	507k	u4_scaled_block_size_y = (UWORD32) (u4_blk_ht / ps_scaler->d_scaling_factor);
205
206		/* luma */
207	507k	s_src_buf = ps_src_buf_props->as_component_bufs[Y];
208	507k	s_src_buf.pv_data = ((UWORD8 *) s_src_buf.pv_data) - (NUM_SCALER_FILTER_TAPS / 2) -
209	507k	(NUM_SCALER_FILTER_TAPS / 2) * s_src_buf.i4_data_stride;
210
211	507k	s_dst_buf.pv_data = ps_scaler_state->pv_scratch_buf;
212	507k	s_dst_buf.i4_data_stride = u4_blk_ht + NUM_SCALER_FILTER_TAPS;
213
214	507k	ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters,
215	507k	u4_scaled_block_size_x, u4_blk_ht + NUM_SCALER_FILTER_TAPS, 0);
216
217	507k	s_src_buf = s_dst_buf;
218	507k	s_dst_buf = ps_dst_buf_props->as_component_bufs[Y];
219
220	507k	ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters,
221	507k	u4_scaled_block_size_y, u4_scaled_block_size_x, 0);
222
223		/* chroma */
224	507k	u4_blk_ht /= 2;
225	507k	u4_scaled_block_size_y /= 2;
226
227	507k	s_src_buf = ps_src_buf_props->as_component_bufs[U];
228	507k	s_src_buf.pv_data = ((UWORD8 *) s_src_buf.pv_data) - NUM_SCALER_FILTER_TAPS -
229	507k	(NUM_SCALER_FILTER_TAPS / 2) * s_src_buf.i4_data_stride;
230
231	507k	s_dst_buf.pv_data = ps_scaler_state->pv_scratch_buf;
232	507k	s_dst_buf.i4_data_stride = u4_blk_ht + NUM_SCALER_FILTER_TAPS;
233
234	507k	ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters,
235	507k	u4_scaled_block_size_x, u4_blk_ht + NUM_SCALER_FILTER_TAPS, 1);
236
237	507k	s_src_buf = s_dst_buf;
238	507k	s_dst_buf = ps_dst_buf_props->as_component_bufs[U];
239
240	507k	ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters,
241	507k	u4_scaled_block_size_y, u4_scaled_block_size_x, 0);
242	507k	}
243
244		/**
245		*******************************************************************************
246		*
247		* @brief
248		* normalized dot product computer for downscaler
249		*
250		* @par Description:
251		* Given the downscaler filter coefficients, source buffer, the function
252		* calculates the dot product between them, adds an offset and normalizes it
253		*
254		* @param[in] ps_scaler
255		* pointer to src buf
256		*
257		* @param[in] pi1_filter
258		* pointer to filter coefficients
259		*
260		* @returns
261		*
262		* @remarks
263		*
264		*******************************************************************************
265		*/
266
267		static UWORD8 isvce_get_downscaler_normalized_filtered_pixel(UWORD8 pu1_src, WORD8 pi1_filter)
268	3.70G	{
269	3.70G	WORD32 i;
270	3.70G	WORD32 i4_norm_dot_product;
271	3.70G	UWORD8 u1_out_pixel;
272	3.70G	WORD32 i4_dot_product_sum = 0;
273	3.70G	WORD32 i4_rounding_offset = 1 << (FILTER_COEFF_Q - 1);
274	3.70G	WORD32 i4_normalizing_factor = 1 << FILTER_COEFF_Q;
275
276	33.3G	for(i = 0; i < NUM_SCALER_FILTER_TAPS; i++)
277	29.6G	{
278	29.6G	i4_dot_product_sum += (pu1_src[i] * pi1_filter[i]);
279	29.6G	}
280
281	3.70G	i4_norm_dot_product = ((i4_dot_product_sum + i4_rounding_offset) / i4_normalizing_factor);
282	3.70G	u1_out_pixel = (UWORD8) CLIP_U8(i4_norm_dot_product);
283
284	3.70G	return u1_out_pixel;
285	3.70G	}
286
287		/**
288		*******************************************************************************
289		*
290		* @brief
291		* horizontal scaler function
292		*
293		* @par Description:
294		* Does horizontal scaling for the given block
295		*
296		* @param[in] ps_scaler
297		* pointer to downscaler context
298		*
299		* @param[in] ps_src
300		* pointer to source buffer container
301		*
302		* @param[in] ps_dst
303		* pointer to destination buffer container
304		*
305		* @param[in] pai1_filters
306		* pointer to array of downscaler filters
307		*
308		* @param[in] u4_blk_wd
309		* width of the block after horizontal scaling (output block width)
310		*
311		* @param[in] u4_blk_ht
312		* height of the current block (input block height)
313		*
314		* @param[in] u1_is_chroma
315		* flag suggesting whether the buffer is luma or chroma
316		*
317		*
318		* @returns
319		*
320		* @remarks
321		* The same function is used for vertical scaling too as
322		* the horizontally scaled input in stored in transpose fashion.
323		*
324		*******************************************************************************
325		*/
326
327		static void isvce_horizontal_downscale_and_transpose(
328		downscaler_ctxt_t ps_scaler, buffer_container_t ps_src, buffer_container_t *ps_dst,
329		FILTER_COEFF_ARRAY pai1_filters, UWORD32 u4_blk_wd, UWORD32 u4_blk_ht, UWORD8 u1_is_chroma)
330	1.27M	{
331	1.27M	WORD32 i, j, k;
332	1.27M	UWORD8 u1_phase;
333	1.27M	UWORD8 u1_filtered_out_pixel;
334	1.27M	UWORD8 pu1_src_j, pu1_dst_j;
335	1.27M	UWORD8 u1_filtered_out_u_pixel, u1_filtered_out_v_pixel;
336	1.27M	UWORD8 *pu1_in_pixel;
337	1.27M	UWORD8 *pu1_out_pixel;
338	1.27M	WORD8 *pi1_filter_grid;
339	1.27M	UWORD16 u2_full_pixel_inc;
340	1.27M	UWORD8 au1_temp_u_buff[NUM_SCALER_FILTER_TAPS];
341	1.27M	UWORD8 au1_temp_v_buff[NUM_SCALER_FILTER_TAPS];
342
343	1.27M	downscaler_state_t ps_scaler_state = (downscaler_state_t ) ps_scaler->pv_scaler_state;
344
345	1.27M	UWORD32 u4_center_pixel_pos = ps_scaler_state->i4_init_offset;
346	1.27M	UWORD32 u4_src_horz_increments = ps_scaler_state->u4_horz_increment;
347	1.27M	UWORD8 *pu1_src = ps_src->pv_data;
348	1.27M	UWORD32 u4_in_stride = ps_src->i4_data_stride;
349	1.27M	UWORD8 *pu1_dst = ps_dst->pv_data;
350	1.27M	UWORD32 u4_out_stride = ps_dst->i4_data_stride;
351	1.27M	UWORD32 u4_center_pixel_pos_src = u4_center_pixel_pos;
352
353		/* Offset the input so that the input pixel to be processed
354		co-incides with the centre of filter (4th coefficient)*/
355	1.27M	pu1_src += (1 + u1_is_chroma);
356
357	1.27M	ASSERT((1 << DOWNSCALER_Q) == ps_scaler_state->u4_vert_increment);
358
359	1.27M	if(!u1_is_chroma)
360	955k	{
361	58.1M	for(j = 0; j < (WORD32) u4_blk_ht; j++)
362	57.2M	{
363	57.2M	pu1_src_j = pu1_src + (j * u4_in_stride);
364	57.2M	pu1_dst_j = pu1_dst + j;
365
366	57.2M	u4_center_pixel_pos = u4_center_pixel_pos_src;
367
368	2.32G	for(i = 0; i < (WORD32) u4_blk_wd; i++)
369	2.26G	{
370	2.26G	u1_phase = get_filter_phase(u4_center_pixel_pos);
371	2.26G	pi1_filter_grid = pai1_filters[u1_phase];
372
373		/* Doing the Calculation for current Loop Count */
374	2.26G	u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
375	2.26G	pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
376	2.26G	pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
377
378	2.26G	u1_filtered_out_pixel =
379	2.26G	isvce_get_downscaler_normalized_filtered_pixel(pu1_in_pixel, pi1_filter_grid);
380	2.26G	*pu1_out_pixel = u1_filtered_out_pixel;
381
382		/* Update the context for next Loop Count */
383	2.26G	u4_center_pixel_pos += u4_src_horz_increments;
384	2.26G	}
385	57.2M	}
386	955k	}
387	318k	else
388	318k	{
389	15.7M	for(j = 0; j < (WORD32) u4_blk_ht; j++)
390	15.3M	{
391	15.3M	pu1_src_j = pu1_src + (j * u4_in_stride);
392	15.3M	pu1_dst_j = pu1_dst + j;
393
394	15.3M	u4_center_pixel_pos = u4_center_pixel_pos_src;
395
396	731M	for(i = 0; i < (WORD32) u4_blk_wd; i++)
397	716M	{
398	716M	u1_phase = get_filter_phase(u4_center_pixel_pos);
399	716M	pi1_filter_grid = pai1_filters[u1_phase];
400
401		/Doing the Calculation for current Loop Count /
402	716M	u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
403	716M	pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
404	716M	pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
405
406	6.44G	for(k = 0; k < NUM_SCALER_FILTER_TAPS; k++)
407	5.72G	{
408	5.72G	au1_temp_u_buff[k] = (pu1_in_pixel + (2 k));
409	5.72G	au1_temp_v_buff[k] = (pu1_in_pixel + ((2 k) + 1));
410	5.72G	}
411
412	716M	u1_filtered_out_u_pixel = isvce_get_downscaler_normalized_filtered_pixel(
413	716M	au1_temp_u_buff, pi1_filter_grid);
414	716M	u1_filtered_out_v_pixel = isvce_get_downscaler_normalized_filtered_pixel(
415	716M	au1_temp_v_buff, pi1_filter_grid);
416	716M	*pu1_out_pixel = u1_filtered_out_u_pixel;
417	716M	*(pu1_out_pixel + u4_out_stride) = u1_filtered_out_v_pixel;
418
419		/* Update the context for next Loop Count */
420	716M	u4_center_pixel_pos += u4_src_horz_increments;
421	716M	}
422	15.3M	}
423	318k	}
424	1.27M	}
425
426		void isvce_downscaler_function_selector(downscaler_state_t *ps_scaler_state, IV_ARCH_T e_arch)
427	4.04k	{
428	4.04k	switch(e_arch)
429	4.04k	{
430	0	#if defined(X86)
431	1.55k	case ARCH_X86_SSE42:
432	1.55k	{
433	1.55k	ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose_sse42;
434
435	1.55k	break;
436	0	}
437		#elif defined(ARMV8)
438		case ARCH_ARM_A53:
439		case ARCH_ARM_A57:
440		case ARCH_ARM_V8_NEON:
441		{
442		ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose_neon;
443
444		break;
445		}
446		#elif defined(ARM) && !defined(DISABLE_NEON)
447		case ARCH_ARM_A9Q:
448		case ARCH_ARM_A9A:
449		case ARCH_ARM_A9:
450		case ARCH_ARM_A7:
451		case ARCH_ARM_A5:
452		case ARCH_ARM_A15:
453		{
454		ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose_neon;
455
456		break;
457		}
458		#endif
459	2.49k	default:
460	2.49k	{
461	2.49k	ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose;
462
463	2.49k	break;
464	0	}
465	4.04k	}
466	4.04k	}
467
468		/**
469		*******************************************************************************
470		*
471		* @brief
472		* initializes the downscaler context
473		*
474		* @par Description:
475		* initializes the downscaler context for the given scaling factor
476		* with padding size, filter size, etc.
477		*
478		* @param[in] ps_scaler
479		* pointer downscaler context
480		*
481		* @param[in] ps_mem_rec
482		* pointer to memory allocated to downscaler process
483		*
484		* @param[in] d_scaling_factor
485		* scaling reatio of width/ height between two consecutive SVC layers
486		*
487		* @param[in] u1_num_spatial_layers
488		* scaling reatio of width/ height between two consecutive SVC layers
489		*
490		* @param[in] u4_wd
491		* width of the input
492		*
493		* @param[in] u4_ht
494		* height of the input
495		*
496		* @param[in] e_arch
497		* architecure type
498		*
499		* @returns
500		*
501		* @remarks
502		* when ARM intrinsics are added, update should be done here
503		*
504		*******************************************************************************
505		*/
506
507		void isvce_initialize_downscaler(downscaler_ctxt_t ps_scaler, iv_mem_rec_t ps_mem_rec,
508		DOUBLE d_scaling_factor, UWORD8 u1_num_spatial_layers,
509		UWORD32 u4_in_width, UWORD32 u4_in_height, IV_ARCH_T e_arch)
510	4.93k	{
511	4.93k	if(u1_num_spatial_layers > 1)
512	4.04k	{
513	4.04k	downscaler_state_t *ps_scaler_state;
514
515	4.04k	UWORD8 pu1_buf = (UWORD8 ) ps_mem_rec->pv_base;
516
517	4.04k	ps_scaler_state = (downscaler_state_t *) pu1_buf;
518	4.04k	pu1_buf += sizeof(ps_scaler_state[0]);
519
520	4.04k	ps_scaler_state->pv_scratch_buf = pu1_buf;
521	4.04k	ps_scaler_state->u4_in_wd = u4_in_width;
522	4.04k	ps_scaler_state->u4_in_ht = u4_in_height;
523
524	4.04k	ps_scaler->pv_scaler_state = ps_scaler_state;
525	4.04k	ps_scaler->d_scaling_factor = d_scaling_factor;
526	4.04k	ps_scaler->u1_num_spatial_layers = u1_num_spatial_layers;
527
528	4.04k	isvce_downscaler_function_selector(ps_scaler_state, e_arch);
529
530	4.04k	ps_scaler_state->u4_horz_increment = (UWORD32) (d_scaling_factor * (1 << DOWNSCALER_Q));
531
532	4.04k	ps_scaler_state->u4_vert_increment = (1 << DOWNSCALER_Q);
533	4.04k	ps_scaler_state->i4_init_offset = 0;
534	4.04k	ps_scaler_state->pai1_filters = (d_scaling_factor == 2.0) ? gai1_lanczos_coefficients_2x
535	4.04k	: gai1_lanczos_coefficients_3by2x;
536	4.04k	}
537	4.93k	}