Coverage Report

Created: 2026-02-26 07:02

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libmpeg2/common/x86/ideint_cac_ssse3.c
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2015 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
/**
21
*******************************************************************************
22
* @file
23
*  ideint_cac_ssse3.c
24
*
25
* @brief
26
*  This file include the definitions of the combing  artifact check function
27
* of the de-interlacer and some  variant of that.
28
*
29
* @author
30
*  Ittiam
31
*
32
* @par List of Functions:
33
*  cac_4x8()
34
*  ideint_cac()
35
*
36
* @remarks
37
*  In the de-interlacer workspace, cac is not a seperate  assembly module as
38
* it comes along with the  de_int_decision() function. But in C-Model, to
39
* keep  the things cleaner, it was made to be a separate  function during
40
* cac experiments long after the  assembly was written by Mudit.
41
*
42
*******************************************************************************
43
*/
44
/*****************************************************************************/
45
/* File Includes                                                             */
46
/*****************************************************************************/
47
/* System include files */
48
#include <stdio.h>
49
#include <stdint.h>
50
#include <string.h>
51
#include <stdlib.h>
52
#include <immintrin.h>
53
54
/* User include files */
55
#include "icv_datatypes.h"
56
#include "icv_macros.h"
57
#include "icv.h"
58
#include "icv_variance.h"
59
#include "icv_sad.h"
60
#include "ideint.h"
61
#include "ideint_defs.h"
62
#include "ideint_structs.h"
63
#include "ideint_cac.h"
64
65
/**
66
*******************************************************************************
67
*
68
* @brief
69
* Combing artifact check function for 8x8 block
70
*
71
* @par   Description
72
* Determines CAC for 8x8 block by calling 8x4 CAC function
73
*
74
* @param[in] pu1_top
75
*  Top field
76
*
77
* @param[in] pu1_bot
78
*  Bottom field
79
*
80
* @param[in] top_strd
81
*  Top field Stride
82
*
83
* @param[in] bot_strd
84
*  Bottom field stride
85
*
86
* @returns
87
* combing artifact flag (1 = detected, 0 = not detected)
88
*
89
* @remarks
90
*
91
*******************************************************************************
92
*/
93
WORD32 ideint_cac_8x8_ssse3(UWORD8 *pu1_top,
94
                            UWORD8 *pu1_bot,
95
                            WORD32 top_strd,
96
                            WORD32 bot_strd)
97
26.9M
{
98
26.9M
    WORD32 ca;        /* combing artifact result                          */
99
26.9M
    WORD32 i;
100
26.9M
    WORD32 adj[2] = {0};
101
26.9M
    WORD32 alt[2] = {0};
102
26.9M
    WORD32 sum_1, sum_2, sum_3, sum_4;
103
26.9M
    WORD32 sum_diff, diff_sum;
104
105
26.9M
    __m128i top[4];
106
26.9M
    __m128i bot[4];
107
26.9M
    __m128i sum_t[4];
108
26.9M
    __m128i sum_b[4];
109
26.9M
    __m128i zero;
110
111
112
26.9M
    zero = _mm_setzero_si128();
113
114
134M
    for(i = 0; i < 4; i++)
115
107M
    {
116
        /* Load top */
117
107M
        top[i] = (__m128i)_mm_loadl_epi64((__m128i *) (pu1_top));
118
107M
        pu1_top += top_strd;
119
120
        /* Load bottom */
121
107M
        bot[i] = (__m128i)_mm_loadl_epi64((__m128i *) (pu1_bot));
122
107M
        pu1_bot += bot_strd;
123
124
        /* Unpack */
125
107M
        top[i] = _mm_unpacklo_epi8(top[i], zero);
126
107M
        bot[i] = _mm_unpacklo_epi8(bot[i], zero);
127
128
        /* Compute row sums */
129
107M
        sum_t[i]  = _mm_sad_epu8(top[i], zero);
130
107M
        sum_b[i]  = _mm_sad_epu8(bot[i], zero);
131
107M
    }
132
133
    /* Compute row based alt and adj */
134
78.6M
    for(i = 0; i < 4; i += 2)
135
51.6M
    {
136
51.6M
        sum_1 = _mm_cvtsi128_si32(sum_t[i + 0]);
137
51.6M
        sum_2 = _mm_cvtsi128_si32(sum_b[i + 0]);
138
51.6M
        sum_diff = ABS_DIF(sum_1, sum_2);
139
51.6M
        if(sum_diff >= RSUM_CSUM_THRESH)
140
948k
            adj[0] += sum_diff;
141
142
51.6M
        sum_3 = _mm_cvtsi128_si32(sum_t[i + 1]);
143
51.6M
        sum_4 = _mm_cvtsi128_si32(sum_b[i + 1]);
144
51.6M
        sum_diff = ABS_DIF(sum_3, sum_4);
145
51.6M
        if(sum_diff >= RSUM_CSUM_THRESH)
146
946k
            adj[0] += sum_diff;
147
148
51.6M
        alt[0] += ABS_DIF(sum_1, sum_3);
149
51.6M
        alt[0] += ABS_DIF(sum_2, sum_4);
150
151
51.6M
        sum_1 = _mm_cvtsi128_si32(_mm_srli_si128(sum_t[i + 0], 8));
152
51.6M
        sum_2 = _mm_cvtsi128_si32(_mm_srli_si128(sum_b[i + 0], 8));
153
51.6M
        sum_diff = ABS_DIF(sum_1, sum_2);
154
51.6M
        if(sum_diff >= RSUM_CSUM_THRESH)
155
944k
            adj[1] += sum_diff;
156
157
51.6M
        sum_3 = _mm_cvtsi128_si32(_mm_srli_si128(sum_t[i + 1], 8));
158
51.6M
        sum_4 = _mm_cvtsi128_si32(_mm_srli_si128(sum_b[i + 1], 8));
159
51.6M
        sum_diff = ABS_DIF(sum_3, sum_4);
160
51.6M
        if(sum_diff >= RSUM_CSUM_THRESH)
161
946k
            adj[1] += sum_diff;
162
163
51.6M
        alt[1] += ABS_DIF(sum_1, sum_3);
164
51.6M
        alt[1] += ABS_DIF(sum_2, sum_4);
165
51.6M
    }
166
167
    /* Compute column based adj */
168
26.9M
    {
169
26.9M
        __m128i avg1, avg2;
170
26.9M
        __m128i top_avg, bot_avg;
171
26.9M
        __m128i min, max, diff, thresh;
172
26.9M
        __m128i mask;
173
26.9M
        avg1 = _mm_avg_epu8(top[0], top[1]);
174
26.9M
        avg2 = _mm_avg_epu8(top[2], top[3]);
175
26.9M
        top_avg = _mm_avg_epu8(avg1, avg2);
176
177
26.9M
        avg1 = _mm_avg_epu8(bot[0], bot[1]);
178
26.9M
        avg2 = _mm_avg_epu8(bot[2], bot[3]);
179
26.9M
        bot_avg = _mm_avg_epu8(avg1, avg2);
180
181
26.9M
        min = _mm_min_epu8(top_avg, bot_avg);
182
26.9M
        max = _mm_max_epu8(top_avg, bot_avg);
183
184
26.9M
        diff = _mm_sub_epi16(max, min);
185
26.9M
        thresh = _mm_set1_epi16((RSUM_CSUM_THRESH >> 2) - 1);
186
187
26.9M
        mask = _mm_cmpgt_epi16(diff, thresh);
188
26.9M
        diff = _mm_and_si128(diff, mask);
189
190
26.9M
        diff_sum = _mm_extract_epi16(diff, 0);
191
26.9M
        diff_sum += _mm_extract_epi16(diff, 1);
192
26.9M
        diff_sum += _mm_extract_epi16(diff, 2);
193
26.9M
        diff_sum += _mm_extract_epi16(diff, 3);
194
195
26.9M
        adj[0] += diff_sum << 2;
196
197
26.9M
        diff_sum = _mm_extract_epi16(diff, 4);
198
26.9M
        diff_sum += _mm_extract_epi16(diff, 5);
199
26.9M
        diff_sum += _mm_extract_epi16(diff, 6);
200
26.9M
        diff_sum += _mm_extract_epi16(diff, 7);
201
202
26.9M
        adj[1] += diff_sum << 2;
203
204
26.9M
    }
205
206
    /* Compute column based alt */
207
26.9M
    {
208
26.9M
        __m128i avg1, avg2;
209
26.9M
        __m128i even_avg, odd_avg, diff;
210
26.9M
        avg1 = _mm_avg_epu8(top[0], bot[0]);
211
26.9M
        avg2 = _mm_avg_epu8(top[2], bot[2]);
212
26.9M
        even_avg = _mm_avg_epu8(avg1, avg2);
213
214
26.9M
        avg1 = _mm_avg_epu8(top[1], bot[1]);
215
26.9M
        avg2 = _mm_avg_epu8(top[3], bot[3]);
216
26.9M
        odd_avg = _mm_avg_epu8(avg1, avg2);
217
218
26.9M
        diff = _mm_sad_epu8(even_avg, odd_avg);
219
220
221
26.9M
        diff_sum = _mm_cvtsi128_si32(diff);
222
26.9M
        alt[0] += diff_sum << 2;
223
224
26.9M
        diff_sum = _mm_cvtsi128_si32(_mm_srli_si128(diff, 8));
225
26.9M
        alt[1] += diff_sum << 2;
226
227
26.9M
    }
228
26.9M
    alt[0] += (alt[0] >> SAD_BIAS_MULT_SHIFT) + (SAD_BIAS_ADDITIVE >> 1);
229
26.9M
    alt[1] += (alt[1] >> SAD_BIAS_MULT_SHIFT) + (SAD_BIAS_ADDITIVE >> 1);
230
231
26.9M
    ca    = (alt[0] < adj[0]);
232
26.9M
    ca   |= (alt[1] < adj[1]);
233
234
26.9M
    return ca;
235
26.9M
}
236