/src/FreeRDP/libfreerdp/primitives/sse/prim_YCoCg_ssse3.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* FreeRDP: A Remote Desktop Protocol Client |
2 | | * Optimized YCoCg<->RGB conversion operations. |
3 | | * vi:ts=4 sw=4: |
4 | | * |
5 | | * (c) Copyright 2014 Hewlett-Packard Development Company, L.P. |
6 | | * |
7 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | * you may not use this file except in compliance with the License. |
9 | | * You may obtain a copy of the License at |
10 | | * |
11 | | * http://www.apache.org/licenses/LICENSE-2.0 |
12 | | * |
13 | | * Unless required by applicable law or agreed to in writing, software |
14 | | * distributed under the License is distributed on an "AS IS" BASIS, |
15 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
16 | | * See the License for the specific language governing permissions and |
17 | | * limitations under the License. |
18 | | */ |
19 | | |
20 | | #include <freerdp/config.h> |
21 | | |
22 | | #include <freerdp/types.h> |
23 | | #include <freerdp/primitives.h> |
24 | | #include <winpr/sysinfo.h> |
25 | | |
26 | | #include "prim_YCoCg.h" |
27 | | |
28 | | #include "prim_internal.h" |
29 | | #include "prim_templates.h" |
30 | | |
31 | | #if defined(SSE2_ENABLED) |
32 | | #include <emmintrin.h> |
33 | | #include <tmmintrin.h> |
34 | | |
35 | | static primitives_t* generic = NULL; |
36 | | |
37 | | /* ------------------------------------------------------------------------- */ |
38 | | static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_invert(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcStep, |
39 | | BYTE* WINPR_RESTRICT pDst, UINT32 DstFormat, |
40 | | UINT32 dstStep, UINT32 width, UINT32 height, |
41 | | UINT8 shift, BOOL withAlpha) |
42 | | { |
43 | | const BYTE* sptr = pSrc; |
44 | | BYTE* dptr = pDst; |
45 | | int sRowBump = srcStep - width * sizeof(UINT32); |
46 | | int dRowBump = dstStep - width * sizeof(UINT32); |
47 | | /* Shift left by "shift" and divide by two is the same as shift |
48 | | * left by "shift-1". |
49 | | */ |
50 | | int dataShift = shift - 1; |
51 | | BYTE mask = (BYTE)(0xFFU << dataShift); |
52 | | |
53 | | /* Let's say the data is of the form: |
54 | | * y0y0o0g0 a1y1o1g1 a2y2o2g2... |
55 | | * Apply: |
56 | | * |R| | 1 1/2 -1/2 | |y| |
57 | | * |G| = | 1 0 1/2 | * |o| |
58 | | * |B| | 1 -1/2 -1/2 | |g| |
59 | | * where Y is 8-bit unsigned and o & g are 8-bit signed. |
60 | | */ |
61 | | |
62 | | if ((width < 8) || (ULONG_PTR)dptr & 0x03) |
63 | | { |
64 | | /* Too small, or we'll never hit a 16-byte boundary. Punt. */ |
65 | | return generic->YCoCgToRGB_8u_AC4R(pSrc, srcStep, pDst, DstFormat, dstStep, width, height, |
66 | | shift, withAlpha); |
67 | | } |
68 | | |
69 | | for (UINT32 h = 0; h < height; h++) |
70 | | { |
71 | | UINT32 w = width; |
72 | | BOOL onStride = 0; |
73 | | |
74 | | /* Get to a 16-byte destination boundary. */ |
75 | | if ((ULONG_PTR)dptr & 0x0f) |
76 | | { |
77 | | pstatus_t status = 0; |
78 | | UINT32 startup = (16 - ((ULONG_PTR)dptr & 0x0f)) / 4; |
79 | | |
80 | | if (startup > width) |
81 | | startup = width; |
82 | | |
83 | | status = generic->YCoCgToRGB_8u_AC4R(sptr, srcStep, dptr, DstFormat, dstStep, startup, |
84 | | 1, shift, withAlpha); |
85 | | |
86 | | if (status != PRIMITIVES_SUCCESS) |
87 | | return status; |
88 | | |
89 | | sptr += startup * sizeof(UINT32); |
90 | | dptr += startup * sizeof(UINT32); |
91 | | w -= startup; |
92 | | } |
93 | | |
94 | | /* Each loop handles eight pixels at a time. */ |
95 | | onStride = (((ULONG_PTR)sptr & 0x0f) == 0) ? TRUE : FALSE; |
96 | | |
97 | | while (w >= 8) |
98 | | { |
99 | | __m128i R0; |
100 | | __m128i R1; |
101 | | __m128i R2; |
102 | | __m128i R3; |
103 | | __m128i R4; |
104 | | __m128i R5; |
105 | | __m128i R6; |
106 | | __m128i R7; |
107 | | |
108 | | if (onStride) |
109 | | { |
110 | | /* The faster path, 16-byte aligned load. */ |
111 | | R0 = _mm_load_si128((const __m128i*)sptr); |
112 | | sptr += (128 / 8); |
113 | | R1 = _mm_load_si128((const __m128i*)sptr); |
114 | | sptr += (128 / 8); |
115 | | } |
116 | | else |
117 | | { |
118 | | /* Off-stride, slower LDDQU load. */ |
119 | | R0 = _mm_lddqu_si128((const __m128i*)sptr); |
120 | | sptr += (128 / 8); |
121 | | R1 = _mm_lddqu_si128((const __m128i*)sptr); |
122 | | sptr += (128 / 8); |
123 | | } |
124 | | |
125 | | /* R0 = a3y3o3g3 a2y2o2g2 a1y1o1g1 a0y0o0g0 */ |
126 | | /* R1 = a7y7o7g7 a6y6o6g6 a5y5o5g5 a4y4o4g4 */ |
127 | | /* Shuffle to pack all the like types together. */ |
128 | | R2 = _mm_set_epi32(0x0f0b0703, 0x0e0a0602, 0x0d090501, 0x0c080400); |
129 | | R3 = _mm_shuffle_epi8(R0, R2); |
130 | | R4 = _mm_shuffle_epi8(R1, R2); |
131 | | /* R3 = a3a2a1a0 y3y2y1y0 o3o2o1o0 g3g2g1g0 */ |
132 | | /* R4 = a7a6a5a4 y7y6y5y4 o7o6o5o4 g7g6g5g4 */ |
133 | | R5 = _mm_unpackhi_epi32(R3, R4); |
134 | | R6 = _mm_unpacklo_epi32(R3, R4); |
135 | | |
136 | | /* R5 = a7a6a5a4 a3a2a1a0 y7y6y5y4 y3y2y1y0 */ |
137 | | /* R6 = o7o6o5o4 o3o2o1o0 g7g6g5g4 g3g2g1g0 */ |
138 | | /* Save alphas aside */ |
139 | | if (withAlpha) |
140 | | R7 = _mm_unpackhi_epi64(R5, R5); |
141 | | else |
142 | | R7 = _mm_set1_epi32(0xFFFFFFFFU); |
143 | | |
144 | | /* R7 = a7a6a5a4 a3a2a1a0 a7a6a5a4 a3a2a1a0 */ |
145 | | /* Expand Y's from 8-bit unsigned to 16-bit signed. */ |
146 | | R1 = _mm_set1_epi32(0); |
147 | | R0 = _mm_unpacklo_epi8(R5, R1); |
148 | | /* R0 = 00y700y6 00y500y4 00y300y2 00y100y0 */ |
149 | | /* Shift Co's and Cg's by (shift-1). -1 covers division by two. |
150 | | * Note: this must be done before sign-conversion. |
151 | | * Note also there is no slli_epi8, so we have to use a 16-bit |
152 | | * version and then mask. |
153 | | */ |
154 | | R6 = _mm_slli_epi16(R6, dataShift); |
155 | | R1 = _mm_set1_epi8(mask); |
156 | | R6 = _mm_and_si128(R6, R1); |
157 | | /* R6 = shifted o7o6o5o4 o3o2o1o0 g7g6g5g4 g3g2g1g0 */ |
158 | | /* Expand Co's from 8-bit signed to 16-bit signed */ |
159 | | R1 = _mm_unpackhi_epi8(R6, R6); |
160 | | R1 = _mm_srai_epi16(R1, 8); |
161 | | /* R1 = xxo7xxo6 xxo5xxo4 xxo3xxo2 xxo1xxo0 */ |
162 | | /* Expand Cg's form 8-bit signed to 16-bit signed */ |
163 | | R2 = _mm_unpacklo_epi8(R6, R6); |
164 | | R2 = _mm_srai_epi16(R2, 8); |
165 | | /* R2 = xxg7xxg6 xxg5xxg4 xxg3xxg2 xxg1xxg0 */ |
166 | | /* Get Y - halfCg and save */ |
167 | | R6 = _mm_subs_epi16(R0, R2); |
168 | | /* R = (Y-halfCg) + halfCo */ |
169 | | R3 = _mm_adds_epi16(R6, R1); |
170 | | /* R3 = xxR7xxR6 xxR5xxR4 xxR3xxR2 xxR1xxR0 */ |
171 | | /* G = Y + Cg(/2) */ |
172 | | R4 = _mm_adds_epi16(R0, R2); |
173 | | /* R4 = xxG7xxG6 xxG5xxG4 xxG3xxG2 xxG1xxG0 */ |
174 | | /* B = (Y-halfCg) - Co(/2) */ |
175 | | R5 = _mm_subs_epi16(R6, R1); |
176 | | /* R5 = xxB7xxB6 xxB5xxB4 xxB3xxB2 xxB1xxB0 */ |
177 | | /* Repack R's & B's. */ |
178 | | R0 = _mm_packus_epi16(R3, R5); |
179 | | /* R0 = R7R6R5R4 R3R2R1R0 B7B6B5B4 B3B2B1B0 */ |
180 | | /* Repack G's. */ |
181 | | R1 = _mm_packus_epi16(R4, R4); |
182 | | /* R1 = G7G6G6G4 G3G2G1G0 G7G6G6G4 G3G2G1G0 */ |
183 | | /* And add the A's. */ |
184 | | R1 = _mm_unpackhi_epi64(R1, R7); |
185 | | /* R1 = A7A6A6A4 A3A2A1A0 G7G6G6G4 G3G2G1G0 */ |
186 | | /* Now do interleaving again. */ |
187 | | R2 = _mm_unpacklo_epi8(R0, R1); |
188 | | /* R2 = G7B7G6B6 G5B5G4B4 G3B3G2B2 G1B1G0B0 */ |
189 | | R3 = _mm_unpackhi_epi8(R0, R1); |
190 | | /* R3 = A7R7A6R6 A5R5A4R4 A3R3A2R2 A1R1A0R0 */ |
191 | | R4 = _mm_unpacklo_epi16(R2, R3); |
192 | | /* R4 = A3R3G3B3 A2R2G2B2 A1R1G1B1 A0R0G0B0 */ |
193 | | R5 = _mm_unpackhi_epi16(R2, R3); |
194 | | /* R5 = A7R7G7B7 A6R6G6B6 A5R6G5B5 A4R4G4B4 */ |
195 | | _mm_store_si128((__m128i*)dptr, R4); |
196 | | dptr += (128 / 8); |
197 | | _mm_store_si128((__m128i*)dptr, R5); |
198 | | dptr += (128 / 8); |
199 | | w -= 8; |
200 | | } |
201 | | |
202 | | /* Handle any remainder pixels. */ |
203 | | if (w > 0) |
204 | | { |
205 | | pstatus_t status = 0; |
206 | | status = generic->YCoCgToRGB_8u_AC4R(sptr, srcStep, dptr, DstFormat, dstStep, w, 1, |
207 | | shift, withAlpha); |
208 | | |
209 | | if (status != PRIMITIVES_SUCCESS) |
210 | | return status; |
211 | | |
212 | | sptr += w * sizeof(UINT32); |
213 | | dptr += w * sizeof(UINT32); |
214 | | } |
215 | | |
216 | | sptr += sRowBump; |
217 | | dptr += dRowBump; |
218 | | } |
219 | | |
220 | | return PRIMITIVES_SUCCESS; |
221 | | } |
222 | | |
223 | | /* ------------------------------------------------------------------------- */ |
224 | | static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_no_invert(const BYTE* WINPR_RESTRICT pSrc, |
225 | | UINT32 srcStep, BYTE* WINPR_RESTRICT pDst, |
226 | | UINT32 DstFormat, UINT32 dstStep, UINT32 width, |
227 | | UINT32 height, UINT8 shift, BOOL withAlpha) |
228 | | { |
229 | | const BYTE* sptr = pSrc; |
230 | | BYTE* dptr = pDst; |
231 | | int sRowBump = srcStep - width * sizeof(UINT32); |
232 | | int dRowBump = dstStep - width * sizeof(UINT32); |
233 | | /* Shift left by "shift" and divide by two is the same as shift |
234 | | * left by "shift-1". |
235 | | */ |
236 | | int dataShift = shift - 1; |
237 | | BYTE mask = (BYTE)(0xFFU << dataShift); |
238 | | |
239 | | /* Let's say the data is of the form: |
240 | | * y0y0o0g0 a1y1o1g1 a2y2o2g2... |
241 | | * Apply: |
242 | | * |R| | 1 1/2 -1/2 | |y| |
243 | | * |G| = | 1 0 1/2 | * |o| |
244 | | * |B| | 1 -1/2 -1/2 | |g| |
245 | | * where Y is 8-bit unsigned and o & g are 8-bit signed. |
246 | | */ |
247 | | |
248 | | if ((width < 8) || (ULONG_PTR)dptr & 0x03) |
249 | | { |
250 | | /* Too small, or we'll never hit a 16-byte boundary. Punt. */ |
251 | | return generic->YCoCgToRGB_8u_AC4R(pSrc, srcStep, pDst, DstFormat, dstStep, width, height, |
252 | | shift, withAlpha); |
253 | | } |
254 | | |
255 | | for (UINT32 h = 0; h < height; h++) |
256 | | { |
257 | | int w = width; |
258 | | BOOL onStride = 0; |
259 | | |
260 | | /* Get to a 16-byte destination boundary. */ |
261 | | if ((ULONG_PTR)dptr & 0x0f) |
262 | | { |
263 | | pstatus_t status = 0; |
264 | | UINT32 startup = (16 - ((ULONG_PTR)dptr & 0x0f)) / 4; |
265 | | |
266 | | if (startup > width) |
267 | | startup = width; |
268 | | |
269 | | status = generic->YCoCgToRGB_8u_AC4R(sptr, srcStep, dptr, DstFormat, dstStep, startup, |
270 | | 1, shift, withAlpha); |
271 | | |
272 | | if (status != PRIMITIVES_SUCCESS) |
273 | | return status; |
274 | | |
275 | | sptr += startup * sizeof(UINT32); |
276 | | dptr += startup * sizeof(UINT32); |
277 | | w -= startup; |
278 | | } |
279 | | |
280 | | /* Each loop handles eight pixels at a time. */ |
281 | | onStride = (((const ULONG_PTR)sptr & 0x0f) == 0) ? TRUE : FALSE; |
282 | | |
283 | | while (w >= 8) |
284 | | { |
285 | | __m128i R0; |
286 | | __m128i R1; |
287 | | __m128i R2; |
288 | | __m128i R3; |
289 | | __m128i R4; |
290 | | __m128i R5; |
291 | | __m128i R6; |
292 | | __m128i R7; |
293 | | |
294 | | if (onStride) |
295 | | { |
296 | | /* The faster path, 16-byte aligned load. */ |
297 | | R0 = _mm_load_si128((const __m128i*)sptr); |
298 | | sptr += (128 / 8); |
299 | | R1 = _mm_load_si128((const __m128i*)sptr); |
300 | | sptr += (128 / 8); |
301 | | } |
302 | | else |
303 | | { |
304 | | /* Off-stride, slower LDDQU load. */ |
305 | | R0 = _mm_lddqu_si128((const __m128i*)sptr); |
306 | | sptr += (128 / 8); |
307 | | R1 = _mm_lddqu_si128((const __m128i*)sptr); |
308 | | sptr += (128 / 8); |
309 | | } |
310 | | |
311 | | /* R0 = a3y3o3g3 a2y2o2g2 a1y1o1g1 a0y0o0g0 */ |
312 | | /* R1 = a7y7o7g7 a6y6o6g6 a5y5o5g5 a4y4o4g4 */ |
313 | | /* Shuffle to pack all the like types together. */ |
314 | | R2 = _mm_set_epi32(0x0f0b0703, 0x0e0a0602, 0x0d090501, 0x0c080400); |
315 | | R3 = _mm_shuffle_epi8(R0, R2); |
316 | | R4 = _mm_shuffle_epi8(R1, R2); |
317 | | /* R3 = a3a2a1a0 y3y2y1y0 o3o2o1o0 g3g2g1g0 */ |
318 | | /* R4 = a7a6a5a4 y7y6y5y4 o7o6o5o4 g7g6g5g4 */ |
319 | | R5 = _mm_unpackhi_epi32(R3, R4); |
320 | | R6 = _mm_unpacklo_epi32(R3, R4); |
321 | | |
322 | | /* R5 = a7a6a5a4 a3a2a1a0 y7y6y5y4 y3y2y1y0 */ |
323 | | /* R6 = o7o6o5o4 o3o2o1o0 g7g6g5g4 g3g2g1g0 */ |
324 | | /* Save alphas aside */ |
325 | | if (withAlpha) |
326 | | R7 = _mm_unpackhi_epi64(R5, R5); |
327 | | else |
328 | | R7 = _mm_set1_epi32(0xFFFFFFFFU); |
329 | | |
330 | | /* R7 = a7a6a5a4 a3a2a1a0 a7a6a5a4 a3a2a1a0 */ |
331 | | /* Expand Y's from 8-bit unsigned to 16-bit signed. */ |
332 | | R1 = _mm_set1_epi32(0); |
333 | | R0 = _mm_unpacklo_epi8(R5, R1); |
334 | | /* R0 = 00y700y6 00y500y4 00y300y2 00y100y0 */ |
335 | | /* Shift Co's and Cg's by (shift-1). -1 covers division by two. |
336 | | * Note: this must be done before sign-conversion. |
337 | | * Note also there is no slli_epi8, so we have to use a 16-bit |
338 | | * version and then mask. |
339 | | */ |
340 | | R6 = _mm_slli_epi16(R6, dataShift); |
341 | | R1 = _mm_set1_epi8(mask); |
342 | | R6 = _mm_and_si128(R6, R1); |
343 | | /* R6 = shifted o7o6o5o4 o3o2o1o0 g7g6g5g4 g3g2g1g0 */ |
344 | | /* Expand Co's from 8-bit signed to 16-bit signed */ |
345 | | R1 = _mm_unpackhi_epi8(R6, R6); |
346 | | R1 = _mm_srai_epi16(R1, 8); |
347 | | /* R1 = xxo7xxo6 xxo5xxo4 xxo3xxo2 xxo1xxo0 */ |
348 | | /* Expand Cg's form 8-bit signed to 16-bit signed */ |
349 | | R2 = _mm_unpacklo_epi8(R6, R6); |
350 | | R2 = _mm_srai_epi16(R2, 8); |
351 | | /* R2 = xxg7xxg6 xxg5xxg4 xxg3xxg2 xxg1xxg0 */ |
352 | | /* Get Y - halfCg and save */ |
353 | | R6 = _mm_subs_epi16(R0, R2); |
354 | | /* R = (Y-halfCg) + halfCo */ |
355 | | R3 = _mm_adds_epi16(R6, R1); |
356 | | /* R3 = xxR7xxR6 xxR5xxR4 xxR3xxR2 xxR1xxR0 */ |
357 | | /* G = Y + Cg(/2) */ |
358 | | R4 = _mm_adds_epi16(R0, R2); |
359 | | /* R4 = xxG7xxG6 xxG5xxG4 xxG3xxG2 xxG1xxG0 */ |
360 | | /* B = (Y-halfCg) - Co(/2) */ |
361 | | R5 = _mm_subs_epi16(R6, R1); |
362 | | /* R5 = xxB7xxB6 xxB5xxB4 xxB3xxB2 xxB1xxB0 */ |
363 | | /* Repack R's & B's. */ |
364 | | /* This line is the only diff between inverted and non-inverted. |
365 | | * Unfortunately, it would be expensive to check "inverted" |
366 | | * every time through this loop. |
367 | | */ |
368 | | R0 = _mm_packus_epi16(R5, R3); |
369 | | /* R0 = B7B6B5B4 B3B2B1B0 R7R6R5R4 R3R2R1R0 */ |
370 | | /* Repack G's. */ |
371 | | R1 = _mm_packus_epi16(R4, R4); |
372 | | /* R1 = G7G6G6G4 G3G2G1G0 G7G6G6G4 G3G2G1G0 */ |
373 | | /* And add the A's. */ |
374 | | R1 = _mm_unpackhi_epi64(R1, R7); |
375 | | /* R1 = A7A6A6A4 A3A2A1A0 G7G6G6G4 G3G2G1G0 */ |
376 | | /* Now do interleaving again. */ |
377 | | R2 = _mm_unpacklo_epi8(R0, R1); |
378 | | /* R2 = G7B7G6B6 G5B5G4B4 G3B3G2B2 G1B1G0B0 */ |
379 | | R3 = _mm_unpackhi_epi8(R0, R1); |
380 | | /* R3 = A7R7A6R6 A5R5A4R4 A3R3A2R2 A1R1A0R0 */ |
381 | | R4 = _mm_unpacklo_epi16(R2, R3); |
382 | | /* R4 = A3R3G3B3 A2R2G2B2 A1R1G1B1 A0R0G0B0 */ |
383 | | R5 = _mm_unpackhi_epi16(R2, R3); |
384 | | /* R5 = A7R7G7B7 A6R6G6B6 A5R6G5B5 A4R4G4B4 */ |
385 | | _mm_store_si128((__m128i*)dptr, R4); |
386 | | dptr += (128 / 8); |
387 | | _mm_store_si128((__m128i*)dptr, R5); |
388 | | dptr += (128 / 8); |
389 | | w -= 8; |
390 | | } |
391 | | |
392 | | /* Handle any remainder pixels. */ |
393 | | if (w > 0) |
394 | | { |
395 | | pstatus_t status = 0; |
396 | | status = generic->YCoCgToRGB_8u_AC4R(sptr, srcStep, dptr, DstFormat, dstStep, w, 1, |
397 | | shift, withAlpha); |
398 | | |
399 | | if (status != PRIMITIVES_SUCCESS) |
400 | | return status; |
401 | | |
402 | | sptr += w * sizeof(UINT32); |
403 | | dptr += w * sizeof(UINT32); |
404 | | } |
405 | | |
406 | | sptr += sRowBump; |
407 | | dptr += dRowBump; |
408 | | } |
409 | | |
410 | | return PRIMITIVES_SUCCESS; |
411 | | } |
412 | | |
413 | | /* ------------------------------------------------------------------------- */ |
414 | | static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R(const BYTE* WINPR_RESTRICT pSrc, INT32 srcStep, |
415 | | BYTE* WINPR_RESTRICT pDst, UINT32 DstFormat, |
416 | | INT32 dstStep, UINT32 width, UINT32 height, UINT8 shift, |
417 | | BOOL withAlpha) |
418 | | { |
419 | | switch (DstFormat) |
420 | | { |
421 | | case PIXEL_FORMAT_BGRX32: |
422 | | case PIXEL_FORMAT_BGRA32: |
423 | | return ssse3_YCoCgRToRGB_8u_AC4R_invert(pSrc, srcStep, pDst, DstFormat, dstStep, width, |
424 | | height, shift, withAlpha); |
425 | | |
426 | | case PIXEL_FORMAT_RGBX32: |
427 | | case PIXEL_FORMAT_RGBA32: |
428 | | return ssse3_YCoCgRToRGB_8u_AC4R_no_invert(pSrc, srcStep, pDst, DstFormat, dstStep, |
429 | | width, height, shift, withAlpha); |
430 | | |
431 | | default: |
432 | | return generic->YCoCgToRGB_8u_AC4R(pSrc, srcStep, pDst, DstFormat, dstStep, width, |
433 | | height, shift, withAlpha); |
434 | | } |
435 | | } |
436 | | |
437 | | #endif |
438 | | |
439 | | /* ------------------------------------------------------------------------- */ |
440 | | void primitives_init_YCoCg_ssse3(primitives_t* WINPR_RESTRICT prims) |
441 | 0 | { |
442 | | #if defined(SSE2_ENABLED) |
443 | | generic = primitives_get_generic(); |
444 | | primitives_init_YCoCg(prims); |
445 | | |
446 | | if (IsProcessorFeaturePresentEx(PF_EX_SSSE3) && |
447 | | IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) |
448 | | { |
449 | | WLog_VRB(PRIM_TAG, "SSE3/SSSE3 optimizations"); |
450 | | prims->YCoCgToRGB_8u_AC4R = ssse3_YCoCgRToRGB_8u_AC4R; |
451 | | } |
452 | | #else |
453 | 0 | WLog_VRB(PRIM_TAG, "undefined WITH_SSE2"); |
454 | 0 | WINPR_UNUSED(prims); |
455 | 0 | #endif |
456 | 0 | } |