Coverage Report

Created: 2023-06-07 07:17

/src/libmpeg2/common/x86/ideint_cac_ssse3.c
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2015 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
/**
21
*******************************************************************************
22
* @file
23
*  ideint_cac_ssse3.c
24
*
25
* @brief
26
*  This file include the definitions of the combing  artifact check function
27
* of the de-interlacer and some  variant of that.
28
*
29
* @author
30
*  Ittiam
31
*
32
* @par List of Functions:
33
*  cac_4x8()
34
*  ideint_cac()
35
*
36
* @remarks
37
*  In the de-interlacer workspace, cac is not a seperate  assembly module as
38
* it comes along with the  de_int_decision() function. But in C-Model, to
39
* keep  the things cleaner, it was made to be a separate  function during
40
* cac experiments long after the  assembly was written by Mudit.
41
*
42
*******************************************************************************
43
*/
44
/*****************************************************************************/
45
/* File Includes                                                             */
46
/*****************************************************************************/
47
/* System include files */
48
#include <stdio.h>
49
#include <stdint.h>
50
#include <string.h>
51
#include <stdlib.h>
52
#include <immintrin.h>
53
54
/* User include files */
55
#include "icv_datatypes.h"
56
#include "icv_macros.h"
57
#include "icv.h"
58
#include "icv_variance.h"
59
#include "icv_sad.h"
60
#include "ideint.h"
61
#include "ideint_defs.h"
62
#include "ideint_structs.h"
63
#include "ideint_cac.h"
64
65
/**
66
*******************************************************************************
67
*
68
* @brief
69
* Combing artifact check function for 8x8 block
70
*
71
* @par   Description
72
* Determines CAC for 8x8 block by calling 8x4 CAC function
73
*
74
* @param[in] pu1_top
75
*  Top field
76
*
77
* @param[in] pu1_bot
78
*  Bottom field
79
*
80
* @param[in] top_strd
81
*  Top field Stride
82
*
83
* @param[in] bot_strd
84
*  Bottom field stride
85
*
86
* @returns
87
* combing artifact flag (1 = detected, 0 = not detected)
88
*
89
* @remarks
90
*
91
*******************************************************************************
92
*/
93
WORD32 ideint_cac_8x8_ssse3(UWORD8 *pu1_top,
94
                            UWORD8 *pu1_bot,
95
                            WORD32 top_strd,
96
                            WORD32 bot_strd)
97
30.2M
{
98
30.2M
    WORD32 ca;        /* combing artifact result                          */
99
30.2M
    WORD32 i;
100
30.2M
    WORD32 adj[2] = {0};
101
30.2M
    WORD32 alt[2] = {0};
102
30.2M
    WORD32 sum_1, sum_2, sum_3, sum_4;
103
30.2M
    WORD32 sum_diff, diff_sum;
104
105
30.2M
    __m128i top[4];
106
30.2M
    __m128i bot[4];
107
30.2M
    __m128i sum_t[4];
108
30.2M
    __m128i sum_b[4];
109
30.2M
    __m128i zero;
110
111
112
30.2M
    zero = _mm_setzero_si128();
113
114
132M
    for(i = 0; i < 4; i++)
115
102M
    {
116
        /* Load top */
117
102M
        top[i] = (__m128i)_mm_loadl_epi64((__m128i *) (pu1_top));
118
102M
        pu1_top += top_strd;
119
120
        /* Load bottom */
121
102M
        bot[i] = (__m128i)_mm_loadl_epi64((__m128i *) (pu1_bot));
122
102M
        pu1_bot += bot_strd;
123
124
        /* Unpack */
125
102M
        top[i] = _mm_unpacklo_epi8(top[i], zero);
126
102M
        bot[i] = _mm_unpacklo_epi8(bot[i], zero);
127
128
        /* Compute row sums */
129
102M
        sum_t[i]  = _mm_sad_epu8(top[i], zero);
130
102M
        sum_b[i]  = _mm_sad_epu8(bot[i], zero);
131
102M
    }
132
133
    /* Compute row based alt and adj */
134
81.4M
    for(i = 0; i < 4; i += 2)
135
51.1M
    {
136
51.1M
        sum_1 = _mm_cvtsi128_si32(sum_t[i + 0]);
137
51.1M
        sum_2 = _mm_cvtsi128_si32(sum_b[i + 0]);
138
51.1M
        sum_diff = ABS_DIF(sum_1, sum_2);
139
51.1M
        if(sum_diff >= RSUM_CSUM_THRESH)
140
52.3k
            adj[0] += sum_diff;
141
142
51.1M
        sum_3 = _mm_cvtsi128_si32(sum_t[i + 1]);
143
51.1M
        sum_4 = _mm_cvtsi128_si32(sum_b[i + 1]);
144
51.1M
        sum_diff = ABS_DIF(sum_3, sum_4);
145
51.1M
        if(sum_diff >= RSUM_CSUM_THRESH)
146
59.6k
            adj[0] += sum_diff;
147
148
51.1M
        alt[0] += ABS_DIF(sum_1, sum_3);
149
51.1M
        alt[0] += ABS_DIF(sum_2, sum_4);
150
151
51.1M
        sum_1 = _mm_cvtsi128_si32(_mm_srli_si128(sum_t[i + 0], 8));
152
51.1M
        sum_2 = _mm_cvtsi128_si32(_mm_srli_si128(sum_b[i + 0], 8));
153
51.1M
        sum_diff = ABS_DIF(sum_1, sum_2);
154
51.1M
        if(sum_diff >= RSUM_CSUM_THRESH)
155
51.2k
            adj[1] += sum_diff;
156
157
51.1M
        sum_3 = _mm_cvtsi128_si32(_mm_srli_si128(sum_t[i + 1], 8));
158
51.1M
        sum_4 = _mm_cvtsi128_si32(_mm_srli_si128(sum_b[i + 1], 8));
159
51.1M
        sum_diff = ABS_DIF(sum_3, sum_4);
160
51.1M
        if(sum_diff >= RSUM_CSUM_THRESH)
161
57.2k
            adj[1] += sum_diff;
162
163
51.1M
        alt[1] += ABS_DIF(sum_1, sum_3);
164
51.1M
        alt[1] += ABS_DIF(sum_2, sum_4);
165
51.1M
    }
166
167
    /* Compute column based adj */
168
30.2M
    {
169
30.2M
        __m128i avg1, avg2;
170
30.2M
        __m128i top_avg, bot_avg;
171
30.2M
        __m128i min, max, diff, thresh;
172
30.2M
        __m128i mask;
173
30.2M
        avg1 = _mm_avg_epu8(top[0], top[1]);
174
30.2M
        avg2 = _mm_avg_epu8(top[2], top[3]);
175
30.2M
        top_avg = _mm_avg_epu8(avg1, avg2);
176
177
30.2M
        avg1 = _mm_avg_epu8(bot[0], bot[1]);
178
30.2M
        avg2 = _mm_avg_epu8(bot[2], bot[3]);
179
30.2M
        bot_avg = _mm_avg_epu8(avg1, avg2);
180
181
30.2M
        min = _mm_min_epu8(top_avg, bot_avg);
182
30.2M
        max = _mm_max_epu8(top_avg, bot_avg);
183
184
30.2M
        diff = _mm_sub_epi16(max, min);
185
30.2M
        thresh = _mm_set1_epi16((RSUM_CSUM_THRESH >> 2) - 1);
186
187
30.2M
        mask = _mm_cmpgt_epi16(diff, thresh);
188
30.2M
        diff = _mm_and_si128(diff, mask);
189
190
30.2M
        diff_sum = _mm_extract_epi16(diff, 0);
191
30.2M
        diff_sum += _mm_extract_epi16(diff, 1);
192
30.2M
        diff_sum += _mm_extract_epi16(diff, 2);
193
30.2M
        diff_sum += _mm_extract_epi16(diff, 3);
194
195
30.2M
        adj[0] += diff_sum << 2;
196
197
30.2M
        diff_sum = _mm_extract_epi16(diff, 4);
198
30.2M
        diff_sum += _mm_extract_epi16(diff, 5);
199
30.2M
        diff_sum += _mm_extract_epi16(diff, 6);
200
30.2M
        diff_sum += _mm_extract_epi16(diff, 7);
201
202
30.2M
        adj[1] += diff_sum << 2;
203
204
30.2M
    }
205
206
    /* Compute column based alt */
207
30.2M
    {
208
30.2M
        __m128i avg1, avg2;
209
30.2M
        __m128i even_avg, odd_avg, diff;
210
30.2M
        avg1 = _mm_avg_epu8(top[0], bot[0]);
211
30.2M
        avg2 = _mm_avg_epu8(top[2], bot[2]);
212
30.2M
        even_avg = _mm_avg_epu8(avg1, avg2);
213
214
30.2M
        avg1 = _mm_avg_epu8(top[1], bot[1]);
215
30.2M
        avg2 = _mm_avg_epu8(top[3], bot[3]);
216
30.2M
        odd_avg = _mm_avg_epu8(avg1, avg2);
217
218
30.2M
        diff = _mm_sad_epu8(even_avg, odd_avg);
219
220
221
30.2M
        diff_sum = _mm_cvtsi128_si32(diff);
222
30.2M
        alt[0] += diff_sum << 2;
223
224
30.2M
        diff_sum = _mm_cvtsi128_si32(_mm_srli_si128(diff, 8));
225
30.2M
        alt[1] += diff_sum << 2;
226
227
30.2M
    }
228
30.2M
    alt[0] += (alt[0] >> SAD_BIAS_MULT_SHIFT) + (SAD_BIAS_ADDITIVE >> 1);
229
30.2M
    alt[1] += (alt[1] >> SAD_BIAS_MULT_SHIFT) + (SAD_BIAS_ADDITIVE >> 1);
230
231
30.2M
    ca    = (alt[0] < adj[0]);
232
30.2M
    ca   |= (alt[1] < adj[1]);
233
234
30.2M
    return ca;
235
30.2M
}
236