Coverage Report

Created: 2025-07-12 06:58

/src/libmpeg2/common/x86/icv_variance_ssse3.c
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2015 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
/**
21
*******************************************************************************
22
* @file
23
*  icv_variance_sse42.c
24
*
25
* @brief
26
*  This file contains the functions to compute variance
27
*
28
* @author
29
*  Ittiam
30
*
31
* @par List of Functions:
32
*  icv_variance_8x4_ssse3()
33
*
34
* @remarks
35
*  None
36
*
37
*******************************************************************************
38
*/
39
/*****************************************************************************/
40
/* File Includes                                                             */
41
/*****************************************************************************/
42
/* System include files */
43
#include <stdio.h>
44
#include <stdint.h>
45
#include <string.h>
46
#include <stdlib.h>
47
#include <assert.h>
48
#include <immintrin.h>
49
50
/* User include files */
51
#include "icv_datatypes.h"
52
#include "icv_macros.h"
53
#include "icv_platform_macros.h"
54
#include "icv.h"
55
56
/**
57
*******************************************************************************
58
*
59
* @brief
60
*  Computes variance of a given 8x4 block
61
*
62
* @par   Description
63
*  Compute variance of a given 8x4 block
64
*
65
* @param[in] pu1_src
66
*  Source
67
*
68
* @param[in] src_strd
69
*  Source stride
70
*
71
* @param[in] wd
72
*  Assumed to be 8
73
*
74
* @param[in] ht
75
*  Assumed to be 4
76
*
77
* @returns
78
*  Variance
79
*
80
* @remarks
81
*
82
*******************************************************************************
83
*/
84
WORD32 icv_variance_8x4_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 wd, WORD32 ht)
85
42.8M
{
86
42.8M
    WORD32 sum;
87
42.8M
    WORD32 sum_sqr;
88
42.8M
    WORD32 blk_sz;
89
42.8M
    WORD32 vrnc;
90
42.8M
    __m128  src_r0, src_r1;
91
42.8M
    __m128i ssrc_r0, ssrc_r1, ssrc_r2, ssrc_r3;
92
42.8M
    __m128i sum_r0, sum_r1;
93
42.8M
    __m128i sqr_r0, sqr_r1, sqr_r2, sqr_r3;
94
42.8M
    __m128i vsum, vsum_sqr;
95
42.8M
    __m128i zero;
96
42.8M
    UNUSED(wd);
97
42.8M
    UNUSED(ht);
98
99
42.8M
    ASSERT(wd == 8);
100
42.8M
    ASSERT(ht == 4);
101
102
42.9M
    sum     = 0;
103
42.9M
    sum_sqr = 0;
104
105
42.9M
    blk_sz = 8 * 4;
106
107
42.9M
    zero = _mm_setzero_si128();
108
109
    /* Load source */
110
42.9M
    src_r0 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_src));
111
42.9M
    pu1_src += src_strd;
112
113
42.9M
    src_r1 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_src));
114
42.9M
    pu1_src += src_strd;
115
116
42.9M
    src_r0 = _mm_loadh_pi (src_r0, (__m64 *) (pu1_src));
117
42.9M
    pu1_src += src_strd;
118
119
42.9M
    src_r1 = _mm_loadh_pi (src_r1, (__m64 *) (pu1_src));
120
42.9M
    pu1_src += src_strd;
121
122
    /* Compute sum of all elements */
123
    /* Use SAD with 0, since there is no pairwise addition */
124
42.9M
    sum_r0  = _mm_sad_epu8((__m128i)src_r0, zero);
125
42.9M
    sum_r1  = _mm_sad_epu8((__m128i)src_r1, zero);
126
127
    /* Accumulate SAD */
128
42.9M
    vsum    = _mm_add_epi64(sum_r0, sum_r1);
129
42.9M
    vsum    = _mm_add_epi64(vsum, _mm_srli_si128(vsum, 8));
130
131
42.9M
    sum = _mm_cvtsi128_si32(vsum);
132
133
    /* Unpack to 16 bits */
134
42.9M
    ssrc_r0 = _mm_unpacklo_epi8((__m128i)src_r0, zero);
135
42.9M
    ssrc_r1 = _mm_unpacklo_epi8((__m128i)src_r1, zero);
136
42.9M
    ssrc_r2 = _mm_unpackhi_epi8((__m128i)src_r0, zero);
137
42.9M
    ssrc_r3 = _mm_unpackhi_epi8((__m128i)src_r1, zero);
138
139
    /* Compute sum of squares */
140
42.9M
    sqr_r0 = _mm_madd_epi16(ssrc_r0,  ssrc_r0);
141
42.9M
    sqr_r1 = _mm_madd_epi16(ssrc_r1,  ssrc_r1);
142
42.9M
    sqr_r2 = _mm_madd_epi16(ssrc_r2,  ssrc_r2);
143
42.9M
    sqr_r3 = _mm_madd_epi16(ssrc_r3,  ssrc_r3);
144
145
42.9M
    vsum_sqr = _mm_add_epi32(sqr_r0,   sqr_r1);
146
42.9M
    vsum_sqr = _mm_add_epi32(vsum_sqr, sqr_r2);
147
42.9M
    vsum_sqr = _mm_add_epi32(vsum_sqr, sqr_r3);
148
149
42.9M
    vsum_sqr = _mm_add_epi32(vsum_sqr, _mm_srli_si128(vsum_sqr, 8));
150
42.9M
    vsum_sqr = _mm_add_epi32(vsum_sqr, _mm_srli_si128(vsum_sqr, 4));
151
42.9M
    sum_sqr  = _mm_cvtsi128_si32(vsum_sqr);
152
153
    /* Compute variance */
154
42.9M
    vrnc = ((sum_sqr * blk_sz) - (sum * sum)) / (blk_sz * blk_sz);
155
156
42.9M
    return vrnc;
157
42.8M
}
158