/src/ffmpeg/libswscale/x86/yuv2rgb.c
Line | Count | Source |
1 | | /* |
2 | | * software YUV to RGB converter |
3 | | * |
4 | | * Copyright (C) 2001-2007 Michael Niedermayer |
5 | | * Copyright (C) 2009-2010 Konstantin Shishkov |
6 | | * |
7 | | * MMX/MMXEXT template stuff (needed for fast movntq support), |
8 | | * 1,4,8bpp support and context / deglobalize stuff |
9 | | * by Michael Niedermayer (michaelni@gmx.at) |
10 | | * |
11 | | * This file is part of FFmpeg. |
12 | | * |
13 | | * FFmpeg is free software; you can redistribute it and/or |
14 | | * modify it under the terms of the GNU Lesser General Public |
15 | | * License as published by the Free Software Foundation; either |
16 | | * version 2.1 of the License, or (at your option) any later version. |
17 | | * |
18 | | * FFmpeg is distributed in the hope that it will be useful, |
19 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
20 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
21 | | * Lesser General Public License for more details. |
22 | | * |
23 | | * You should have received a copy of the GNU Lesser General Public |
24 | | * License along with FFmpeg; if not, write to the Free Software |
25 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
26 | | */ |
27 | | |
28 | | #include <stdio.h> |
29 | | #include <stdlib.h> |
30 | | #include <inttypes.h> |
31 | | |
32 | | #include "config.h" |
33 | | #include "libswscale/rgb2rgb.h" |
34 | | #include "libswscale/swscale.h" |
35 | | #include "libswscale/swscale_internal.h" |
36 | | #include "libavutil/attributes.h" |
37 | | #include "libavutil/x86/asm.h" |
38 | | #include "libavutil/x86/cpu.h" |
39 | | #include "libavutil/cpu.h" |
40 | | |
41 | | #if HAVE_X86ASM |
42 | | |
43 | | #define YUV2RGB_LOOP(depth) \ |
44 | 0 | h_size = (c->opts.dst_w + 7) & ~7; \ |
45 | 0 | if (h_size * depth > FFABS(dstStride[0])) \ |
46 | 0 | h_size -= 8; \ |
47 | 0 | \ |
48 | 0 | vshift = c->opts.src_format != AV_PIX_FMT_YUV422P; \ |
49 | 0 | \ |
50 | 0 | for (y = 0; y < srcSliceH; y++) { \ |
51 | 0 | uint8_t *image = dst[0] + (y + srcSliceY) * dstStride[0]; \ |
52 | 0 | const uint8_t *py = src[0] + y * srcStride[0]; \ |
53 | 0 | const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; \ |
54 | 0 | const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \ |
55 | 0 | x86_reg index = -h_size / 2; \ |
56 | | |
57 | | extern void ff_yuv_420_rgb24_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, |
58 | | const uint8_t *pv_index, const uint64_t *pointer_c_dither, |
59 | | const uint8_t *py_2index); |
60 | | extern void ff_yuv_420_bgr24_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, |
61 | | const uint8_t *pv_index, const uint64_t *pointer_c_dither, |
62 | | const uint8_t *py_2index); |
63 | | |
64 | | extern void ff_yuv_420_rgb15_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, |
65 | | const uint8_t *pv_index, const uint64_t *pointer_c_dither, |
66 | | const uint8_t *py_2index); |
67 | | extern void ff_yuv_420_rgb16_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, |
68 | | const uint8_t *pv_index, const uint64_t *pointer_c_dither, |
69 | | const uint8_t *py_2index); |
70 | | extern void ff_yuv_420_rgb32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, |
71 | | const uint8_t *pv_index, const uint64_t *pointer_c_dither, |
72 | | const uint8_t *py_2index); |
73 | | extern void ff_yuv_420_bgr32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, |
74 | | const uint8_t *pv_index, const uint64_t *pointer_c_dither, |
75 | | const uint8_t *py_2index); |
76 | | extern void ff_yuva_420_rgb32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, |
77 | | const uint8_t *pv_index, const uint64_t *pointer_c_dither, |
78 | | const uint8_t *py_2index, const uint8_t *pa_2index); |
79 | | extern void ff_yuva_420_bgr32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, |
80 | | const uint8_t *pv_index, const uint64_t *pointer_c_dither, |
81 | | const uint8_t *py_2index, const uint8_t *pa_2index); |
82 | | #if ARCH_X86_64 |
83 | | extern void ff_yuv_420_gbrp24_ssse3(x86_reg index, uint8_t *image, uint8_t *dst_b, uint8_t *dst_r, |
84 | | const uint8_t *pu_index, const uint8_t *pv_index, |
85 | | const uint64_t *pointer_c_dither, |
86 | | const uint8_t *py_2index); |
87 | | #endif |
88 | | |
89 | | static inline int yuv420_rgb15_ssse3(SwsInternal *c, const uint8_t *const src[], |
90 | | const int srcStride[], |
91 | | int srcSliceY, int srcSliceH, |
92 | | uint8_t *const dst[], const int dstStride[]) |
93 | 0 | { |
94 | 0 | int y, h_size, vshift; |
95 | |
|
96 | 0 | YUV2RGB_LOOP(2) |
97 | |
|
98 | 0 | c->blueDither = ff_dither8[y & 1]; |
99 | 0 | c->greenDither = ff_dither8[y & 1]; |
100 | 0 | c->redDither = ff_dither8[(y + 1) & 1]; |
101 | |
|
102 | 0 | ff_yuv_420_rgb15_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); |
103 | 0 | } |
104 | 0 | return srcSliceH; |
105 | 0 | } |
106 | | |
107 | | static inline int yuv420_rgb16_ssse3(SwsInternal *c, const uint8_t *const src[], |
108 | | const int srcStride[], |
109 | | int srcSliceY, int srcSliceH, |
110 | | uint8_t *const dst[], const int dstStride[]) |
111 | 0 | { |
112 | 0 | int y, h_size, vshift; |
113 | |
|
114 | 0 | YUV2RGB_LOOP(2) |
115 | |
|
116 | 0 | c->blueDither = ff_dither8[y & 1]; |
117 | 0 | c->greenDither = ff_dither4[y & 1]; |
118 | 0 | c->redDither = ff_dither8[(y + 1) & 1]; |
119 | |
|
120 | 0 | ff_yuv_420_rgb16_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); |
121 | 0 | } |
122 | 0 | return srcSliceH; |
123 | 0 | } |
124 | | |
125 | | static inline int yuv420_rgb32_ssse3(SwsInternal *c, const uint8_t *const src[], |
126 | | const int srcStride[], |
127 | | int srcSliceY, int srcSliceH, |
128 | | uint8_t *const dst[], const int dstStride[]) |
129 | 0 | { |
130 | 0 | int y, h_size, vshift; |
131 | |
|
132 | 0 | YUV2RGB_LOOP(4) |
133 | |
|
134 | 0 | ff_yuv_420_rgb32_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); |
135 | 0 | } |
136 | 0 | return srcSliceH; |
137 | 0 | } |
138 | | |
139 | | static inline int yuv420_bgr32_ssse3(SwsInternal *c, const uint8_t *const src[], |
140 | | const int srcStride[], |
141 | | int srcSliceY, int srcSliceH, |
142 | | uint8_t *const dst[], const int dstStride[]) |
143 | 0 | { |
144 | 0 | int y, h_size, vshift; |
145 | |
|
146 | 0 | YUV2RGB_LOOP(4) |
147 | |
|
148 | 0 | ff_yuv_420_bgr32_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); |
149 | 0 | } |
150 | 0 | return srcSliceH; |
151 | 0 | } |
152 | | |
153 | | static inline int yuva420_rgb32_ssse3(SwsInternal *c, const uint8_t *const src[], |
154 | | const int srcStride[], |
155 | | int srcSliceY, int srcSliceH, |
156 | | uint8_t *const dst[], const int dstStride[]) |
157 | 0 | { |
158 | 0 | int y, h_size, vshift; |
159 | 0 | YUV2RGB_LOOP(4) |
160 | |
|
161 | 0 | const uint8_t *pa = src[3] + y * srcStride[3]; |
162 | 0 | ff_yuva_420_rgb32_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index, pa - 2 * index); |
163 | 0 | } |
164 | 0 | return srcSliceH; |
165 | 0 | } |
166 | | |
167 | | static inline int yuva420_bgr32_ssse3(SwsInternal *c, const uint8_t *const src[], |
168 | | const int srcStride[], |
169 | | int srcSliceY, int srcSliceH, |
170 | | uint8_t *const dst[], const int dstStride[]) |
171 | 0 | { |
172 | 0 | int y, h_size, vshift; |
173 | |
|
174 | 0 | YUV2RGB_LOOP(4) |
175 | |
|
176 | 0 | const uint8_t *pa = src[3] + y * srcStride[3]; |
177 | 0 | ff_yuva_420_bgr32_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index, pa - 2 * index); |
178 | 0 | } |
179 | 0 | return srcSliceH; |
180 | 0 | } |
181 | | |
182 | | static inline int yuv420_rgb24_ssse3(SwsInternal *c, const uint8_t *const src[], |
183 | | const int srcStride[], |
184 | | int srcSliceY, int srcSliceH, |
185 | | uint8_t *const dst[], const int dstStride[]) |
186 | 0 | { |
187 | 0 | int y, h_size, vshift; |
188 | |
|
189 | 0 | YUV2RGB_LOOP(3) |
190 | |
|
191 | 0 | ff_yuv_420_rgb24_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); |
192 | 0 | } |
193 | 0 | return srcSliceH; |
194 | 0 | } |
195 | | |
196 | | static inline int yuv420_bgr24_ssse3(SwsInternal *c, const uint8_t *const src[], |
197 | | const int srcStride[], |
198 | | int srcSliceY, int srcSliceH, |
199 | | uint8_t *const dst[], const int dstStride[]) |
200 | 0 | { |
201 | 0 | int y, h_size, vshift; |
202 | |
|
203 | 0 | YUV2RGB_LOOP(3) |
204 | |
|
205 | 0 | ff_yuv_420_bgr24_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); |
206 | 0 | } |
207 | 0 | return srcSliceH; |
208 | 0 | } |
209 | | |
210 | | #if ARCH_X86_64 |
211 | | static inline int yuv420_gbrp_ssse3(SwsInternal *c, const uint8_t *const src[], |
212 | | const int srcStride[], |
213 | | int srcSliceY, int srcSliceH, |
214 | | uint8_t *const dst[], const int dstStride[]) |
215 | 0 | { |
216 | 0 | int y, h_size, vshift; |
217 | |
|
218 | 0 | h_size = (c->opts.dst_w + 7) & ~7; |
219 | 0 | if (h_size * 3 > FFABS(dstStride[0])) |
220 | 0 | h_size -= 8; |
221 | |
|
222 | 0 | vshift = c->opts.src_format != AV_PIX_FMT_YUV422P; |
223 | |
|
224 | 0 | for (y = 0; y < srcSliceH; y++) { |
225 | 0 | uint8_t *dst_g = dst[0] + (y + srcSliceY) * dstStride[0]; |
226 | 0 | uint8_t *dst_b = dst[1] + (y + srcSliceY) * dstStride[1]; |
227 | 0 | uint8_t *dst_r = dst[2] + (y + srcSliceY) * dstStride[2]; |
228 | 0 | const uint8_t *py = src[0] + y * srcStride[0]; |
229 | 0 | const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; |
230 | 0 | const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; |
231 | 0 | x86_reg index = -h_size / 2; |
232 | |
|
233 | 0 | ff_yuv_420_gbrp24_ssse3(index, dst_g, dst_b, dst_r, pu - index, pv - index, &(c->redDither), py - 2 * index); |
234 | 0 | } |
235 | 0 | return srcSliceH; |
236 | 0 | } |
237 | | #endif |
238 | | |
239 | | #endif /* HAVE_X86ASM */ |
240 | | |
241 | | av_cold SwsFunc ff_yuv2rgb_init_x86(SwsInternal *c) |
242 | 0 | { |
243 | 0 | #if HAVE_X86ASM |
244 | 0 | int cpu_flags = av_get_cpu_flags(); |
245 | |
|
246 | 0 | if (EXTERNAL_SSSE3(cpu_flags)) { |
247 | 0 | switch (c->opts.dst_format) { |
248 | 0 | case AV_PIX_FMT_RGB32: |
249 | 0 | if (c->opts.src_format == AV_PIX_FMT_YUVA420P) { |
250 | 0 | #if CONFIG_SWSCALE_ALPHA |
251 | 0 | return yuva420_rgb32_ssse3; |
252 | 0 | #endif |
253 | 0 | break; |
254 | 0 | } else |
255 | 0 | return yuv420_rgb32_ssse3; |
256 | 0 | case AV_PIX_FMT_BGR32: |
257 | 0 | if (c->opts.src_format == AV_PIX_FMT_YUVA420P) { |
258 | 0 | #if CONFIG_SWSCALE_ALPHA |
259 | 0 | return yuva420_bgr32_ssse3; |
260 | 0 | #endif |
261 | 0 | break; |
262 | 0 | } else |
263 | 0 | return yuv420_bgr32_ssse3; |
264 | 0 | case AV_PIX_FMT_RGB24: |
265 | 0 | return yuv420_rgb24_ssse3; |
266 | 0 | case AV_PIX_FMT_BGR24: |
267 | 0 | return yuv420_bgr24_ssse3; |
268 | 0 | case AV_PIX_FMT_RGB565: |
269 | 0 | return yuv420_rgb16_ssse3; |
270 | 0 | case AV_PIX_FMT_RGB555: |
271 | 0 | return yuv420_rgb15_ssse3; |
272 | 0 | #if ARCH_X86_64 |
273 | 0 | case AV_PIX_FMT_GBRP: |
274 | 0 | return yuv420_gbrp_ssse3; |
275 | 0 | #endif |
276 | 0 | } |
277 | 0 | } |
278 | | |
279 | 0 | #endif /* HAVE_X86ASM */ |
280 | 0 | return NULL; |
281 | 0 | } |