Coverage Report

Created: 2024-07-27 06:35

/src/libavc/common/x86/ih264_padding_ssse3.c
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2015 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
/**
21
*******************************************************************************
22
* @file
23
*  ih264_padding_atom_intr.c
24
*
25
* @brief
26
*  Contains function definitions for Padding
27
*
28
* @author
29
*  Srinivas T
30
*
31
* @par List of Functions:
32
*   - ih264_pad_left_luma_ssse3()
33
*   - ih264_pad_left_chroma_ssse3()
34
*   - ih264_pad_right_luma_ssse3()
35
*   - ih264_pad_right_chroma_ssse3()
36
*
37
* @remarks
38
*  None
39
*
40
*******************************************************************************
41
*/
42
43
#include <string.h>
44
#include <assert.h>
45
#include "ih264_typedefs.h"
46
#include "ih264_platform_macros.h"
47
#include "ih264_mem_fns.h"
48
#include "ih264_debug.h"
49
50
#include <immintrin.h>
51
52
53
/**
54
*******************************************************************************
55
*
56
* @brief
57
*   Padding (luma block) at the left of a 2d array
58
*
59
* @par Description:
60
*   The left column of a 2d array is replicated for pad_size times at the left
61
*
62
*
63
* @param[in] pu1_src
64
*  UWORD8 pointer to the source
65
*
66
* @param[in] src_strd
67
*  integer source stride
68
*
69
* @param[in] ht
70
*  integer height of the array
71
*
72
* @param[in] wd
73
*  integer width of the array
74
*
75
* @param[in] pad_size
76
*  integer -padding size of the array
77
*
78
* @param[in] ht
79
*  integer height of the array
80
*
81
* @param[in] wd
82
*  integer width of the array
83
*
84
* @returns
85
*
86
* @remarks
87
*  None
88
*
89
*******************************************************************************
90
*/
91
92
void ih264_pad_left_luma_ssse3(UWORD8 *pu1_src,
93
                               WORD32 src_strd,
94
                               WORD32 ht,
95
                               WORD32 pad_size)
96
531k
{
97
531k
    WORD32 row;
98
531k
    WORD32 i;
99
531k
    UWORD8 *pu1_dst;
100
101
531k
    ASSERT(pad_size % 8 == 0);
102
103
41.8M
    for(row = 0; row < ht; row++)
104
41.3M
    {
105
41.3M
        __m128i src_temp0_16x8b;
106
107
41.3M
        pu1_dst = pu1_src - pad_size;
108
41.3M
        src_temp0_16x8b = _mm_set1_epi8(*pu1_src);
109
199M
        for(i = 0; i < pad_size; i += 8)
110
158M
        {
111
158M
            _mm_storel_epi64((__m128i *)(pu1_dst + i), src_temp0_16x8b);
112
158M
        }
113
41.3M
        pu1_src += src_strd;
114
41.3M
    }
115
116
531k
}
117
118
119
120
/**
121
*******************************************************************************
122
*
123
* @brief
124
*   Padding (chroma block) at the left of a 2d array
125
*
126
* @par Description:
127
*   The left column of a 2d array is replicated for pad_size times at the left
128
*
129
*
130
* @param[in] pu1_src
131
*  UWORD8 pointer to the source
132
*
133
* @param[in] src_strd
134
*  integer source stride
135
*
136
* @param[in] ht
137
*  integer height of the array
138
*
139
* @param[in] wd
140
*  integer width of the array (each colour component)
141
*
142
* @param[in] pad_size
143
*  integer -padding size of the array
144
*
145
* @param[in] ht
146
*  integer height of the array
147
*
148
* @param[in] wd
149
*  integer width of the array
150
*
151
* @returns
152
*
153
* @remarks
154
*  None
155
*
156
*******************************************************************************
157
*/
158
159
void ih264_pad_left_chroma_ssse3(UWORD8 *pu1_src,
160
                                 WORD32 src_strd,
161
                                 WORD32 ht,
162
                                 WORD32 pad_size)
163
531k
{
164
531k
    WORD32 row;
165
531k
    WORD32 col;
166
531k
    UWORD8 *pu1_dst;
167
168
531k
    ASSERT(pad_size % 8 == 0);
169
21.1M
    for(row = 0; row < ht; row++)
170
20.6M
    {
171
20.6M
        __m128i src_temp0_16x8b;
172
173
20.6M
        pu1_dst = pu1_src - pad_size;
174
20.6M
        src_temp0_16x8b = _mm_set1_epi16(*((UWORD16 *)pu1_src));
175
101M
        for(col = 0; col < pad_size; col += 8)
176
81.0M
        {
177
81.0M
            _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
178
81.0M
        }
179
20.6M
        pu1_src += src_strd;
180
20.6M
    }
181
182
531k
}
183
184
185
186
/**
187
*******************************************************************************
188
*
189
* @brief
190
* Padding (luma block) at the right of a 2d array
191
*
192
* @par Description:
193
* The right column of a 2d array is replicated for pad_size times at the right
194
*
195
*
196
* @param[in] pu1_src
197
*  UWORD8 pointer to the source
198
*
199
* @param[in] src_strd
200
*  integer source stride
201
*
202
* @param[in] ht
203
*  integer height of the array
204
*
205
* @param[in] wd
206
*  integer width of the array
207
*
208
* @param[in] pad_size
209
*  integer -padding size of the array
210
*
211
* @param[in] ht
212
*  integer height of the array
213
*
214
* @param[in] wd
215
*  integer width of the array
216
*
217
* @returns
218
*
219
* @remarks
220
*  None
221
*
222
*******************************************************************************
223
*/
224
225
void ih264_pad_right_luma_ssse3(UWORD8 *pu1_src,
226
                                WORD32 src_strd,
227
                                WORD32 ht,
228
                                WORD32 pad_size)
229
531k
{
230
531k
    WORD32 row;
231
531k
    WORD32 col;
232
531k
    UWORD8 *pu1_dst;
233
234
531k
    ASSERT(pad_size % 8 == 0);
235
236
41.8M
    for(row = 0; row < ht; row++)
237
41.3M
    {
238
41.3M
        __m128i src_temp0_16x8b;
239
240
41.3M
        pu1_dst = pu1_src;
241
41.3M
        src_temp0_16x8b = _mm_set1_epi8(*(pu1_src - 1));
242
199M
        for(col = 0; col < pad_size; col += 8)
243
158M
        {
244
158M
            _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
245
158M
        }
246
41.3M
        pu1_src += src_strd;
247
41.3M
    }
248
249
531k
}
250
251
252
253
/**
254
*******************************************************************************
255
*
256
* @brief
257
* Padding (chroma block) at the right of a 2d array
258
*
259
* @par Description:
260
* The right column of a 2d array is replicated for pad_size times at the right
261
*
262
*
263
* @param[in] pu1_src
264
*  UWORD8 pointer to the source
265
*
266
* @param[in] src_strd
267
*  integer source stride
268
*
269
* @param[in] ht
270
*  integer height of the array
271
*
272
* @param[in] wd
273
*  integer width of the array (each colour component)
274
*
275
* @param[in] pad_size
276
*  integer -padding size of the array
277
*
278
* @param[in] ht
279
*  integer height of the array
280
*
281
* @param[in] wd
282
*  integer width of the array
283
*
284
* @returns
285
*
286
* @remarks
287
*  None
288
*
289
*******************************************************************************
290
*/
291
292
void ih264_pad_right_chroma_ssse3(UWORD8 *pu1_src,
293
                                  WORD32 src_strd,
294
                                  WORD32 ht,
295
                                  WORD32 pad_size)
296
531k
{
297
531k
    WORD32 row;
298
531k
    WORD32 col;
299
531k
    UWORD8 *pu1_dst;
300
301
531k
    ASSERT(pad_size % 8 == 0);
302
303
21.1M
    for(row = 0; row < ht; row++)
304
20.6M
    {
305
20.6M
        __m128i src_temp0_16x8b;
306
307
20.6M
        pu1_dst = pu1_src;
308
20.6M
        src_temp0_16x8b = _mm_set1_epi16(*((UWORD16 *)(pu1_src - 2)));
309
101M
        for(col = 0; col < pad_size; col += 8)
310
81.0M
        {
311
81.0M
            _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
312
81.0M
        }
313
314
20.6M
        pu1_src += src_strd;
315
20.6M
    }
316
531k
}
317