/src/libavc/common/x86/ih264_padding_ssse3.c
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2015 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | /** |
21 | | ******************************************************************************* |
22 | | * @file |
23 | | * ih264_padding_atom_intr.c |
24 | | * |
25 | | * @brief |
26 | | * Contains function definitions for Padding |
27 | | * |
28 | | * @author |
29 | | * Srinivas T |
30 | | * |
31 | | * @par List of Functions: |
32 | | * - ih264_pad_left_luma_ssse3() |
33 | | * - ih264_pad_left_chroma_ssse3() |
34 | | * - ih264_pad_right_luma_ssse3() |
35 | | * - ih264_pad_right_chroma_ssse3() |
36 | | * |
37 | | * @remarks |
38 | | * None |
39 | | * |
40 | | ******************************************************************************* |
41 | | */ |
42 | | |
43 | | #include <string.h> |
44 | | #include <assert.h> |
45 | | #include "ih264_typedefs.h" |
46 | | #include "ih264_platform_macros.h" |
47 | | #include "ih264_mem_fns.h" |
48 | | #include "ih264_debug.h" |
49 | | |
50 | | #include <immintrin.h> |
51 | | |
52 | | |
53 | | /** |
54 | | ******************************************************************************* |
55 | | * |
56 | | * @brief |
57 | | * Padding (luma block) at the left of a 2d array |
58 | | * |
59 | | * @par Description: |
60 | | * The left column of a 2d array is replicated for pad_size times at the left |
61 | | * |
62 | | * |
63 | | * @param[in] pu1_src |
64 | | * UWORD8 pointer to the source |
65 | | * |
66 | | * @param[in] src_strd |
67 | | * integer source stride |
68 | | * |
69 | | * @param[in] ht |
70 | | * integer height of the array |
71 | | * |
72 | | * @param[in] wd |
73 | | * integer width of the array |
74 | | * |
75 | | * @param[in] pad_size |
76 | | * integer -padding size of the array |
77 | | * |
78 | | * @param[in] ht |
79 | | * integer height of the array |
80 | | * |
81 | | * @param[in] wd |
82 | | * integer width of the array |
83 | | * |
84 | | * @returns |
85 | | * |
86 | | * @remarks |
87 | | * None |
88 | | * |
89 | | ******************************************************************************* |
90 | | */ |
91 | | |
92 | | void ih264_pad_left_luma_ssse3(UWORD8 *pu1_src, |
93 | | WORD32 src_strd, |
94 | | WORD32 ht, |
95 | | WORD32 pad_size) |
96 | 531k | { |
97 | 531k | WORD32 row; |
98 | 531k | WORD32 i; |
99 | 531k | UWORD8 *pu1_dst; |
100 | | |
101 | 531k | ASSERT(pad_size % 8 == 0); |
102 | | |
103 | 41.8M | for(row = 0; row < ht; row++) |
104 | 41.3M | { |
105 | 41.3M | __m128i src_temp0_16x8b; |
106 | | |
107 | 41.3M | pu1_dst = pu1_src - pad_size; |
108 | 41.3M | src_temp0_16x8b = _mm_set1_epi8(*pu1_src); |
109 | 199M | for(i = 0; i < pad_size; i += 8) |
110 | 158M | { |
111 | 158M | _mm_storel_epi64((__m128i *)(pu1_dst + i), src_temp0_16x8b); |
112 | 158M | } |
113 | 41.3M | pu1_src += src_strd; |
114 | 41.3M | } |
115 | | |
116 | 531k | } |
117 | | |
118 | | |
119 | | |
120 | | /** |
121 | | ******************************************************************************* |
122 | | * |
123 | | * @brief |
124 | | * Padding (chroma block) at the left of a 2d array |
125 | | * |
126 | | * @par Description: |
127 | | * The left column of a 2d array is replicated for pad_size times at the left |
128 | | * |
129 | | * |
130 | | * @param[in] pu1_src |
131 | | * UWORD8 pointer to the source |
132 | | * |
133 | | * @param[in] src_strd |
134 | | * integer source stride |
135 | | * |
136 | | * @param[in] ht |
137 | | * integer height of the array |
138 | | * |
139 | | * @param[in] wd |
140 | | * integer width of the array (each colour component) |
141 | | * |
142 | | * @param[in] pad_size |
143 | | * integer -padding size of the array |
144 | | * |
145 | | * @param[in] ht |
146 | | * integer height of the array |
147 | | * |
148 | | * @param[in] wd |
149 | | * integer width of the array |
150 | | * |
151 | | * @returns |
152 | | * |
153 | | * @remarks |
154 | | * None |
155 | | * |
156 | | ******************************************************************************* |
157 | | */ |
158 | | |
159 | | void ih264_pad_left_chroma_ssse3(UWORD8 *pu1_src, |
160 | | WORD32 src_strd, |
161 | | WORD32 ht, |
162 | | WORD32 pad_size) |
163 | 531k | { |
164 | 531k | WORD32 row; |
165 | 531k | WORD32 col; |
166 | 531k | UWORD8 *pu1_dst; |
167 | | |
168 | 531k | ASSERT(pad_size % 8 == 0); |
169 | 21.1M | for(row = 0; row < ht; row++) |
170 | 20.6M | { |
171 | 20.6M | __m128i src_temp0_16x8b; |
172 | | |
173 | 20.6M | pu1_dst = pu1_src - pad_size; |
174 | 20.6M | src_temp0_16x8b = _mm_set1_epi16(*((UWORD16 *)pu1_src)); |
175 | 101M | for(col = 0; col < pad_size; col += 8) |
176 | 81.0M | { |
177 | 81.0M | _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b); |
178 | 81.0M | } |
179 | 20.6M | pu1_src += src_strd; |
180 | 20.6M | } |
181 | | |
182 | 531k | } |
183 | | |
184 | | |
185 | | |
186 | | /** |
187 | | ******************************************************************************* |
188 | | * |
189 | | * @brief |
190 | | * Padding (luma block) at the right of a 2d array |
191 | | * |
192 | | * @par Description: |
193 | | * The right column of a 2d array is replicated for pad_size times at the right |
194 | | * |
195 | | * |
196 | | * @param[in] pu1_src |
197 | | * UWORD8 pointer to the source |
198 | | * |
199 | | * @param[in] src_strd |
200 | | * integer source stride |
201 | | * |
202 | | * @param[in] ht |
203 | | * integer height of the array |
204 | | * |
205 | | * @param[in] wd |
206 | | * integer width of the array |
207 | | * |
208 | | * @param[in] pad_size |
209 | | * integer -padding size of the array |
210 | | * |
211 | | * @param[in] ht |
212 | | * integer height of the array |
213 | | * |
214 | | * @param[in] wd |
215 | | * integer width of the array |
216 | | * |
217 | | * @returns |
218 | | * |
219 | | * @remarks |
220 | | * None |
221 | | * |
222 | | ******************************************************************************* |
223 | | */ |
224 | | |
225 | | void ih264_pad_right_luma_ssse3(UWORD8 *pu1_src, |
226 | | WORD32 src_strd, |
227 | | WORD32 ht, |
228 | | WORD32 pad_size) |
229 | 531k | { |
230 | 531k | WORD32 row; |
231 | 531k | WORD32 col; |
232 | 531k | UWORD8 *pu1_dst; |
233 | | |
234 | 531k | ASSERT(pad_size % 8 == 0); |
235 | | |
236 | 41.8M | for(row = 0; row < ht; row++) |
237 | 41.3M | { |
238 | 41.3M | __m128i src_temp0_16x8b; |
239 | | |
240 | 41.3M | pu1_dst = pu1_src; |
241 | 41.3M | src_temp0_16x8b = _mm_set1_epi8(*(pu1_src - 1)); |
242 | 199M | for(col = 0; col < pad_size; col += 8) |
243 | 158M | { |
244 | 158M | _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b); |
245 | 158M | } |
246 | 41.3M | pu1_src += src_strd; |
247 | 41.3M | } |
248 | | |
249 | 531k | } |
250 | | |
251 | | |
252 | | |
253 | | /** |
254 | | ******************************************************************************* |
255 | | * |
256 | | * @brief |
257 | | * Padding (chroma block) at the right of a 2d array |
258 | | * |
259 | | * @par Description: |
260 | | * The right column of a 2d array is replicated for pad_size times at the right |
261 | | * |
262 | | * |
263 | | * @param[in] pu1_src |
264 | | * UWORD8 pointer to the source |
265 | | * |
266 | | * @param[in] src_strd |
267 | | * integer source stride |
268 | | * |
269 | | * @param[in] ht |
270 | | * integer height of the array |
271 | | * |
272 | | * @param[in] wd |
273 | | * integer width of the array (each colour component) |
274 | | * |
275 | | * @param[in] pad_size |
276 | | * integer -padding size of the array |
277 | | * |
278 | | * @param[in] ht |
279 | | * integer height of the array |
280 | | * |
281 | | * @param[in] wd |
282 | | * integer width of the array |
283 | | * |
284 | | * @returns |
285 | | * |
286 | | * @remarks |
287 | | * None |
288 | | * |
289 | | ******************************************************************************* |
290 | | */ |
291 | | |
292 | | void ih264_pad_right_chroma_ssse3(UWORD8 *pu1_src, |
293 | | WORD32 src_strd, |
294 | | WORD32 ht, |
295 | | WORD32 pad_size) |
296 | 531k | { |
297 | 531k | WORD32 row; |
298 | 531k | WORD32 col; |
299 | 531k | UWORD8 *pu1_dst; |
300 | | |
301 | 531k | ASSERT(pad_size % 8 == 0); |
302 | | |
303 | 21.1M | for(row = 0; row < ht; row++) |
304 | 20.6M | { |
305 | 20.6M | __m128i src_temp0_16x8b; |
306 | | |
307 | 20.6M | pu1_dst = pu1_src; |
308 | 20.6M | src_temp0_16x8b = _mm_set1_epi16(*((UWORD16 *)(pu1_src - 2))); |
309 | 101M | for(col = 0; col < pad_size; col += 8) |
310 | 81.0M | { |
311 | 81.0M | _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b); |
312 | 81.0M | } |
313 | | |
314 | 20.6M | pu1_src += src_strd; |
315 | 20.6M | } |
316 | 531k | } |
317 | | |