/src/libavc/common/x86/ih264_mem_fns_ssse3.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2015 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | /** |
21 | | ******************************************************************************* |
22 | | * @file |
23 | | * ih264_mem_fns_atom_intr.c |
24 | | * |
25 | | * @brief |
26 | | * Functions used for memory operations |
27 | | * |
28 | | * @author |
29 | | * Ittiam |
30 | | * |
31 | | * @par List of Functions: |
32 | | * |
33 | | * @remarks |
34 | | * None |
35 | | * |
36 | | ******************************************************************************* |
37 | | */ |
38 | | |
39 | | /*****************************************************************************/ |
40 | | /* File Includes */ |
41 | | /*****************************************************************************/ |
42 | | #include <stdio.h> |
43 | | #include <stddef.h> |
44 | | #include <stdlib.h> |
45 | | #include <string.h> |
46 | | #include <assert.h> |
47 | | |
48 | | #include "ih264_typedefs.h" |
49 | | #include "ih264_mem_fns.h" |
50 | | |
51 | | #include <immintrin.h> |
52 | | |
53 | | /** |
54 | | ******************************************************************************* |
55 | | * |
56 | | * @brief |
57 | | * memcpy of a 8,16 or 32 bytes |
58 | | * |
59 | | * @par Description: |
60 | | * Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes |
61 | | * |
62 | | * @param[in] pu1_dst |
63 | | * UWORD8 pointer to the destination |
64 | | * |
65 | | * @param[in] pu1_src |
66 | | * UWORD8 pointer to the source |
67 | | * |
68 | | * @param[in] num_bytes |
69 | | * number of bytes to copy |
70 | | * @returns |
71 | | * |
72 | | * @remarks |
73 | | * None |
74 | | * |
75 | | ******************************************************************************* |
76 | | */ |
77 | | |
78 | | |
79 | | |
80 | | |
81 | | void ih264_memcpy_mul_8_ssse3(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes) |
82 | 23.3M | { |
83 | 23.3M | int col; |
84 | 70.1M | for(col = num_bytes; col >= 8; col -= 8) |
85 | 46.7M | { |
86 | 46.7M | __m128i src_temp16x8b; |
87 | 46.7M | src_temp16x8b = _mm_loadl_epi64((__m128i *)(pu1_src)); |
88 | 46.7M | pu1_src += 8; |
89 | 46.7M | _mm_storel_epi64((__m128i *)(pu1_dst), src_temp16x8b); |
90 | 46.7M | pu1_dst += 8; |
91 | 46.7M | } |
92 | 23.3M | } |
93 | | |
94 | | /** |
95 | | ******************************************************************************* |
96 | | * |
97 | | * @brief |
98 | | * memset of a 8,16 or 32 bytes |
99 | | * |
100 | | * @par Description: |
101 | | * Does memset of 8bit data for 8,16 or 32 number of bytes |
102 | | * |
103 | | * @param[in] pu1_dst |
104 | | * UWORD8 pointer to the destination |
105 | | * |
106 | | * @param[in] value |
107 | | * UWORD8 value used for memset |
108 | | * |
109 | | * @param[in] num_bytes |
110 | | * number of bytes to set |
111 | | * @returns |
112 | | * |
113 | | * @remarks |
114 | | * None |
115 | | * |
116 | | ******************************************************************************* |
117 | | */ |
118 | | |
119 | | |
120 | | void ih264_memset_mul_8_ssse3(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes) |
121 | 1.76M | { |
122 | 1.76M | int col; |
123 | 1.76M | __m128i src_temp16x8b; |
124 | 1.76M | src_temp16x8b = _mm_set1_epi8(value); |
125 | 5.28M | for(col = num_bytes; col >= 8; col -= 8) |
126 | 3.52M | { |
127 | 3.52M | _mm_storel_epi64((__m128i *)(pu1_dst), src_temp16x8b); |
128 | 3.52M | pu1_dst += 8; |
129 | 3.52M | } |
130 | 1.76M | } |
131 | | |
132 | | /** |
133 | | ******************************************************************************* |
134 | | * |
135 | | * @brief |
136 | | * memset of 16bit data of a 8,16 or 32 bytes |
137 | | * |
138 | | * @par Description: |
139 | | * Does memset of 16bit data for 8,16 or 32 number of bytes |
140 | | * |
141 | | * @param[in] pu2_dst |
142 | | * UWORD8 pointer to the destination |
143 | | * |
144 | | * @param[in] value |
145 | | * UWORD16 value used for memset |
146 | | * |
147 | | * @param[in] num_words |
148 | | * number of words to set |
149 | | * @returns |
150 | | * |
151 | | * @remarks |
152 | | * None |
153 | | * |
154 | | ******************************************************************************* |
155 | | */ |
156 | | |
157 | | |
158 | | void ih264_memset_16bit_mul_8_ssse3(UWORD16 *pu2_dst, UWORD16 value, UWORD32 num_words) |
159 | 0 | { |
160 | 0 | int col; |
161 | 0 | __m128i src_temp16x8b; |
162 | 0 | src_temp16x8b = _mm_set1_epi16(value); |
163 | 0 | for(col = num_words; col >= 8; col -= 8) |
164 | 0 | { |
165 | 0 | _mm_storeu_si128((__m128i *)(pu2_dst), src_temp16x8b); |
166 | 0 | pu2_dst += 8; |
167 | 0 | } |
168 | 0 | } |
169 | | |