Coverage Report

Created: 2025-11-11 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libmpeg2/common/x86/icv_sad_ssse3.c
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2015 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
/**
21
*******************************************************************************
22
* @file
23
*  icv_sad.c
24
*
25
* @brief
26
*  This file contains the functions to compute SAD
27
*
28
* @author
29
*  Ittiam
30
*
31
* @par List of Functions:
32
*  icv_sad_8x4_ssse3()
33
*
34
* @remarks
35
*  None
36
*
37
*******************************************************************************
38
*/
39
/*****************************************************************************/
40
/* File Includes                                                             */
41
/*****************************************************************************/
42
/* System include files */
43
#include <stdio.h>
44
#include <stdint.h>
45
#include <string.h>
46
#include <stdlib.h>
47
#include <assert.h>
48
#include <immintrin.h>
49
50
/* User include files */
51
#include "icv_datatypes.h"
52
#include "icv_macros.h"
53
#include "icv_platform_macros.h"
54
#include "icv.h"
55
56
/**
57
*******************************************************************************
58
*
59
* @brief
60
*  Compute 8x4 SAD
61
*
62
* @par   Description
63
*  Compute 8x4 sum of absolute differences between source and reference block
64
*
65
* @param[in] pu1_src
66
*  Source buffer
67
*
68
* @param[in] pu1_ref
69
*  Reference buffer
70
*
71
* @param[in] src_strd
72
*  Source stride
73
*
74
* @param[in] ref_strd
75
*  Reference stride
76
*
77
* @param[in] wd
78
*  Assumed to be 8
79
*
80
* @param[in] ht
81
*  Assumed to be 4
82
83
* @returns
84
*  SAD
85
*
86
* @remarks
87
*
88
*******************************************************************************
89
*/
90
WORD32 icv_sad_8x4_ssse3(UWORD8 *pu1_src,
91
                         UWORD8 *pu1_ref,
92
                         WORD32 src_strd,
93
                         WORD32 ref_strd,
94
                         WORD32 wd,
95
                         WORD32 ht)
96
93.3M
{
97
93.3M
    WORD32 sad;
98
93.3M
    __m128 src_r0, src_r1;
99
93.3M
    __m128 ref_r0, ref_r1;
100
93.3M
    __m128i res_r0, res_r1;
101
102
93.3M
    UNUSED(wd);
103
93.3M
    UNUSED(ht);
104
93.3M
    ASSERT(wd == 8);
105
93.3M
    ASSERT(ht == 4);
106
107
    /* Load source */
108
93.4M
    src_r0 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_src));
109
93.4M
    pu1_src += src_strd;
110
111
93.4M
    src_r1 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_src));
112
93.4M
    pu1_src += src_strd;
113
114
93.4M
    src_r0 = _mm_loadh_pi (src_r0, (__m64 *) (pu1_src));
115
93.4M
    pu1_src += src_strd;
116
117
93.4M
    src_r1 = _mm_loadh_pi (src_r1, (__m64 *) (pu1_src));
118
93.4M
    pu1_src += src_strd;
119
120
121
    /* Load reference */
122
93.4M
    ref_r0 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_ref));
123
93.4M
    pu1_ref += ref_strd;
124
125
93.4M
    ref_r1 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_ref));
126
93.4M
    pu1_ref += ref_strd;
127
128
93.4M
    ref_r0 = _mm_loadh_pi (ref_r0, (__m64 *) (pu1_ref));
129
93.4M
    pu1_ref += ref_strd;
130
131
93.4M
    ref_r1 = _mm_loadh_pi (ref_r1, (__m64 *) (pu1_ref));
132
93.4M
    pu1_ref += ref_strd;
133
134
    /* Compute SAD for each row */
135
93.4M
    res_r0 = _mm_sad_epu8((__m128i)src_r0, (__m128i)ref_r0);
136
93.4M
    res_r1 = _mm_sad_epu8((__m128i)src_r1, (__m128i)ref_r1);
137
138
    /* Accumulate SAD */
139
93.4M
    res_r0 = _mm_add_epi64(res_r0,  res_r1);
140
93.4M
    res_r0 = _mm_add_epi64(res_r0, _mm_srli_si128(res_r0, 8));
141
142
93.4M
    sad  = _mm_cvtsi128_si32(res_r0);
143
144
93.4M
    return sad;
145
92.8M
}