/src/ffmpeg/libavcodec/tpeldsp.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * thirdpel DSP functions |
3 | | * |
4 | | * This file is part of FFmpeg. |
5 | | * |
6 | | * FFmpeg is free software; you can redistribute it and/or |
7 | | * modify it under the terms of the GNU Lesser General Public |
8 | | * License as published by the Free Software Foundation; either |
9 | | * version 2.1 of the License, or (at your option) any later version. |
10 | | * |
11 | | * FFmpeg is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | | * Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public |
17 | | * License along with FFmpeg; if not, write to the Free Software |
18 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | | */ |
20 | | |
21 | | /** |
22 | | * @file |
23 | | * thirdpel DSP functions |
24 | | */ |
25 | | |
26 | | #include <stdint.h> |
27 | | |
28 | | #include "libavutil/attributes.h" |
29 | | #include "tpeldsp.h" |
30 | | |
31 | | #define BIT_DEPTH 8 |
32 | | #include "pel_template.c" |
33 | | |
34 | | static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, |
35 | | int stride, int width, int height) |
36 | 55.1k | { |
37 | 55.1k | switch (width) { |
38 | 6.69k | case 2: |
39 | 6.69k | put_pixels2_8_c(dst, src, stride, height); |
40 | 6.69k | break; |
41 | 17.1k | case 4: |
42 | 17.1k | put_pixels4_8_c(dst, src, stride, height); |
43 | 17.1k | break; |
44 | 23.1k | case 8: |
45 | 23.1k | put_pixels8_8_c(dst, src, stride, height); |
46 | 23.1k | break; |
47 | 8.13k | case 16: |
48 | 8.13k | put_pixels16_8_c(dst, src, stride, height); |
49 | 8.13k | break; |
50 | 55.1k | } |
51 | 55.1k | } |
52 | | |
53 | | static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, |
54 | | int stride, int width, int height) |
55 | 10.7k | { |
56 | 10.7k | int i, j; |
57 | | |
58 | 97.1k | for (i = 0; i < height; i++) { |
59 | 821k | for (j = 0; j < width; j++) |
60 | 735k | dst[j] = ((2 * src[j] + src[j + 1] + 1) * |
61 | 735k | 683) >> 11; |
62 | 86.3k | src += stride; |
63 | 86.3k | dst += stride; |
64 | 86.3k | } |
65 | 10.7k | } |
66 | | |
67 | | static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, |
68 | | int stride, int width, int height) |
69 | 18.4k | { |
70 | 18.4k | int i, j; |
71 | | |
72 | 146k | for (i = 0; i < height; i++) { |
73 | 1.24M | for (j = 0; j < width; j++) |
74 | 1.11M | dst[j] = ((src[j] + 2 * src[j + 1] + 1) * |
75 | 1.11M | 683) >> 11; |
76 | 128k | src += stride; |
77 | 128k | dst += stride; |
78 | 128k | } |
79 | 18.4k | } |
80 | | |
81 | | static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, |
82 | | int stride, int width, int height) |
83 | 9.68k | { |
84 | 9.68k | int i, j; |
85 | | |
86 | 91.7k | for (i = 0; i < height; i++) { |
87 | 745k | for (j = 0; j < width; j++) |
88 | 663k | dst[j] = ((2 * src[j] + src[j + stride] + 1) * |
89 | 663k | 683) >> 11; |
90 | 82.0k | src += stride; |
91 | 82.0k | dst += stride; |
92 | 82.0k | } |
93 | 9.68k | } |
94 | | |
95 | | static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, |
96 | | int stride, int width, int height) |
97 | 6.09k | { |
98 | 6.09k | int i, j; |
99 | | |
100 | 59.4k | for (i = 0; i < height; i++) { |
101 | 413k | for (j = 0; j < width; j++) |
102 | 359k | dst[j] = ((4 * src[j] + 3 * src[j + 1] + |
103 | 359k | 3 * src[j + stride] + 2 * src[j + stride + 1] + 6) * |
104 | 359k | 2731) >> 15; |
105 | 53.3k | src += stride; |
106 | 53.3k | dst += stride; |
107 | 53.3k | } |
108 | 6.09k | } |
109 | | |
110 | | static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, |
111 | | int stride, int width, int height) |
112 | 11.6k | { |
113 | 11.6k | int i, j; |
114 | | |
115 | 121k | for (i = 0; i < height; i++) { |
116 | 875k | for (j = 0; j < width; j++) |
117 | 765k | dst[j] = ((3 * src[j] + 2 * src[j + 1] + |
118 | 765k | 4 * src[j + stride] + 3 * src[j + stride + 1] + 6) * |
119 | 765k | 2731) >> 15; |
120 | 109k | src += stride; |
121 | 109k | dst += stride; |
122 | 109k | } |
123 | 11.6k | } |
124 | | |
125 | | static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, |
126 | | int stride, int width, int height) |
127 | 25.6k | { |
128 | 25.6k | int i, j; |
129 | | |
130 | 260k | for (i = 0; i < height; i++) { |
131 | 2.19M | for (j = 0; j < width; j++) |
132 | 1.95M | dst[j] = ((src[j] + 2 * src[j + stride] + 1) * |
133 | 1.95M | 683) >> 11; |
134 | 234k | src += stride; |
135 | 234k | dst += stride; |
136 | 234k | } |
137 | 25.6k | } |
138 | | |
139 | | static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, |
140 | | int stride, int width, int height) |
141 | 16.4k | { |
142 | 16.4k | int i, j; |
143 | | |
144 | 173k | for (i = 0; i < height; i++) { |
145 | 1.86M | for (j = 0; j < width; j++) |
146 | 1.71M | dst[j] = ((3 * src[j] + 4 * src[j + 1] + |
147 | 1.71M | 2 * src[j + stride] + 3 * src[j + stride + 1] + 6) * |
148 | 1.71M | 2731) >> 15; |
149 | 156k | src += stride; |
150 | 156k | dst += stride; |
151 | 156k | } |
152 | 16.4k | } |
153 | | |
154 | | static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, |
155 | | int stride, int width, int height) |
156 | 17.0k | { |
157 | 17.0k | int i, j; |
158 | | |
159 | 178k | for (i = 0; i < height; i++) { |
160 | 1.82M | for (j = 0; j < width; j++) |
161 | 1.66M | dst[j] = ((2 * src[j] + 3 * src[j + 1] + |
162 | 1.66M | 3 * src[j + stride] + 4 * src[j + stride + 1] + 6) * |
163 | 1.66M | 2731) >> 15; |
164 | 161k | src += stride; |
165 | 161k | dst += stride; |
166 | 161k | } |
167 | 17.0k | } |
168 | | |
169 | | static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, |
170 | | int stride, int width, int height) |
171 | 7.25k | { |
172 | 7.25k | switch (width) { |
173 | 0 | case 2: |
174 | 0 | avg_pixels2_8_c(dst, src, stride, height); |
175 | 0 | break; |
176 | 0 | case 4: |
177 | 0 | avg_pixels4_8_c(dst, src, stride, height); |
178 | 0 | break; |
179 | 4.83k | case 8: |
180 | 4.83k | avg_pixels8_8_c(dst, src, stride, height); |
181 | 4.83k | break; |
182 | 2.41k | case 16: |
183 | 2.41k | avg_pixels16_8_c(dst, src, stride, height); |
184 | 2.41k | break; |
185 | 7.25k | } |
186 | 7.25k | } |
187 | | |
188 | | static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, |
189 | | int stride, int width, int height) |
190 | 3.87k | { |
191 | 3.87k | int i, j; |
192 | | |
193 | 45.1k | for (i = 0; i < height; i++) { |
194 | 536k | for (j = 0; j < width; j++) |
195 | 495k | dst[j] = (dst[j] + |
196 | 495k | (((2 * src[j] + src[j + 1] + 1) * |
197 | 495k | 683) >> 11) + 1) >> 1; |
198 | 41.2k | src += stride; |
199 | 41.2k | dst += stride; |
200 | 41.2k | } |
201 | 3.87k | } |
202 | | |
203 | | static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, |
204 | | int stride, int width, int height) |
205 | 2.13k | { |
206 | 2.13k | int i, j; |
207 | | |
208 | 24.8k | for (i = 0; i < height; i++) { |
209 | 295k | for (j = 0; j < width; j++) |
210 | 272k | dst[j] = (dst[j] + |
211 | 272k | (((src[j] + 2 * src[j + 1] + 1) * |
212 | 272k | 683) >> 11) + 1) >> 1; |
213 | 22.7k | src += stride; |
214 | 22.7k | dst += stride; |
215 | 22.7k | } |
216 | 2.13k | } |
217 | | |
218 | | static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, |
219 | | int stride, int width, int height) |
220 | 5.14k | { |
221 | 5.14k | int i, j; |
222 | | |
223 | 59.9k | for (i = 0; i < height; i++) { |
224 | 713k | for (j = 0; j < width; j++) |
225 | 658k | dst[j] = (dst[j] + |
226 | 658k | (((2 * src[j] + src[j + stride] + 1) * |
227 | 658k | 683) >> 11) + 1) >> 1; |
228 | 54.8k | src += stride; |
229 | 54.8k | dst += stride; |
230 | 54.8k | } |
231 | 5.14k | } |
232 | | |
233 | | static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, |
234 | | int stride, int width, int height) |
235 | 7.00k | { |
236 | 7.00k | int i, j; |
237 | | |
238 | 81.7k | for (i = 0; i < height; i++) { |
239 | 971k | for (j = 0; j < width; j++) |
240 | 896k | dst[j] = (dst[j] + |
241 | 896k | (((4 * src[j] + 3 * src[j + 1] + |
242 | 896k | 3 * src[j + stride] + 2 * src[j + stride + 1] + 6) * |
243 | 896k | 2731) >> 15) + 1) >> 1; |
244 | 74.7k | src += stride; |
245 | 74.7k | dst += stride; |
246 | 74.7k | } |
247 | 7.00k | } |
248 | | |
249 | | static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, |
250 | | int stride, int width, int height) |
251 | 5.27k | { |
252 | 5.27k | int i, j; |
253 | | |
254 | 61.5k | for (i = 0; i < height; i++) { |
255 | 731k | for (j = 0; j < width; j++) |
256 | 675k | dst[j] = (dst[j] + |
257 | 675k | (((3 * src[j] + 2 * src[j + 1] + |
258 | 675k | 4 * src[j + stride] + 3 * src[j + stride + 1] + 6) * |
259 | 675k | 2731) >> 15) + 1) >> 1; |
260 | 56.2k | src += stride; |
261 | 56.2k | dst += stride; |
262 | 56.2k | } |
263 | 5.27k | } |
264 | | |
265 | | static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, |
266 | | int stride, int width, int height) |
267 | 2.45k | { |
268 | 2.45k | int i, j; |
269 | | |
270 | 28.5k | for (i = 0; i < height; i++) { |
271 | 339k | for (j = 0; j < width; j++) |
272 | 313k | dst[j] = (dst[j] + |
273 | 313k | (((src[j] + 2 * src[j + stride] + 1) * |
274 | 313k | 683) >> 11) + 1) >> 1; |
275 | 26.1k | src += stride; |
276 | 26.1k | dst += stride; |
277 | 26.1k | } |
278 | 2.45k | } |
279 | | |
280 | | static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, |
281 | | int stride, int width, int height) |
282 | 2.12k | { |
283 | 2.12k | int i, j; |
284 | | |
285 | 24.8k | for (i = 0; i < height; i++) { |
286 | 294k | for (j = 0; j < width; j++) |
287 | 272k | dst[j] = (dst[j] + |
288 | 272k | (((3 * src[j] + 4 * src[j + 1] + |
289 | 272k | 2 * src[j + stride] + 3 * src[j + stride + 1] + 6) * |
290 | 272k | 2731) >> 15) + 1) >> 1; |
291 | 22.6k | src += stride; |
292 | 22.6k | dst += stride; |
293 | 22.6k | } |
294 | 2.12k | } |
295 | | |
296 | | static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, |
297 | | int stride, int width, int height) |
298 | 2.81k | { |
299 | 2.81k | int i, j; |
300 | | |
301 | 32.8k | for (i = 0; i < height; i++) { |
302 | 390k | for (j = 0; j < width; j++) |
303 | 360k | dst[j] = (dst[j] + |
304 | 360k | (((2 * src[j] + 3 * src[j + 1] + |
305 | 360k | 3 * src[j + stride] + 4 * src[j + stride + 1] + 6) * |
306 | 360k | 2731) >> 15) + 1) >> 1; |
307 | 30.0k | src += stride; |
308 | 30.0k | dst += stride; |
309 | 30.0k | } |
310 | 2.81k | } |
311 | | |
312 | | av_cold void ff_tpeldsp_init(TpelDSPContext *c) |
313 | 2.59k | { |
314 | 2.59k | c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c; |
315 | 2.59k | c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c; |
316 | 2.59k | c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c; |
317 | 2.59k | c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c; |
318 | 2.59k | c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c; |
319 | 2.59k | c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c; |
320 | 2.59k | c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c; |
321 | 2.59k | c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c; |
322 | 2.59k | c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c; |
323 | | |
324 | 2.59k | c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c; |
325 | 2.59k | c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c; |
326 | 2.59k | c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c; |
327 | 2.59k | c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c; |
328 | 2.59k | c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c; |
329 | 2.59k | c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c; |
330 | 2.59k | c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c; |
331 | 2.59k | c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c; |
332 | 2.59k | c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c; |
333 | 2.59k | } |