/src/FreeRDP/libfreerdp/primitives/neon/prim_YCoCg_neon.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /* FreeRDP: A Remote Desktop Protocol Client  | 
2  |  |  * Optimized YCoCg<->RGB conversion operations.  | 
3  |  |  * vi:ts=4 sw=4:  | 
4  |  |  *  | 
5  |  |  * (c) Copyright 2014 Hewlett-Packard Development Company, L.P.  | 
6  |  |  *  | 
7  |  |  * Licensed under the Apache License, Version 2.0 (the "License");  | 
8  |  |  * you may not use this file except in compliance with the License.  | 
9  |  |  * You may obtain a copy of the License at  | 
10  |  |  *  | 
11  |  |  *     http://www.apache.org/licenses/LICENSE-2.0  | 
12  |  |  *  | 
13  |  |  * Unless required by applicable law or agreed to in writing, software  | 
14  |  |  * distributed under the License is distributed on an "AS IS" BASIS,  | 
15  |  |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  | 
16  |  |  * See the License for the specific language governing permissions and  | 
17  |  |  * limitations under the License.  | 
18  |  |  */  | 
19  |  |  | 
20  |  | #include <freerdp/config.h>  | 
21  |  |  | 
22  |  | #include <freerdp/types.h>  | 
23  |  | #include <freerdp/primitives.h>  | 
24  |  | #include <winpr/sysinfo.h>  | 
25  |  |  | 
26  |  | #include "prim_internal.h"  | 
27  |  | #include "prim_templates.h"  | 
28  |  | #include "prim_YCoCg.h"  | 
29  |  |  | 
30  |  | #if defined(NEON_ENABLED)  | 
31  |  | #include <arm_neon.h>  | 
32  |  |  | 
33  |  | static primitives_t* generic = NULL;  | 
34  |  |  | 
35  |  | static pstatus_t neon_YCoCgToRGB_8u_X(const BYTE* WINPR_RESTRICT pSrc, INT32 srcStep,  | 
36  |  |                                       BYTE* WINPR_RESTRICT pDst, UINT32 DstFormat, INT32 dstStep,  | 
37  |  |                                       UINT32 width, UINT32 height, UINT8 shift, BYTE bPos,  | 
38  |  |                                       BYTE gPos, BYTE rPos, BYTE aPos, BOOL alpha)  | 
39  |  | { | 
40  |  |   BYTE* dptr = pDst;  | 
41  |  |   const BYTE* sptr = pSrc;  | 
42  |  |   const DWORD formatSize = FreeRDPGetBytesPerPixel(DstFormat);  | 
43  |  |   const int8_t cll = shift - 1; /* -1 builds in the /2's */  | 
44  |  |   const UINT32 srcPad = srcStep - (width * 4);  | 
45  |  |   const UINT32 dstPad = dstStep - (width * formatSize);  | 
46  |  |   const UINT32 pad = width % 8;  | 
47  |  |   const uint8x8_t aVal = vdup_n_u8(0xFF);  | 
48  |  |   const int8x8_t cllv = vdup_n_s8(cll);  | 
49  |  |  | 
50  |  |   for (UINT32 y = 0; y < height; y++)  | 
51  |  |   { | 
52  |  |     for (UINT32 x = 0; x < width - pad; x += 8)  | 
53  |  |     { | 
54  |  |       /* Note: shifts must be done before sign-conversion. */  | 
55  |  |       const uint8x8x4_t raw = vld4_u8(sptr);  | 
56  |  |       const int8x8_t CgRaw = vreinterpret_s8_u8(vshl_u8(raw.val[0], cllv));  | 
57  |  |       const int8x8_t CoRaw = vreinterpret_s8_u8(vshl_u8(raw.val[1], cllv));  | 
58  |  |       const int16x8_t Cg = vmovl_s8(CgRaw);  | 
59  |  |       const int16x8_t Co = vmovl_s8(CoRaw);  | 
60  |  |       const int16x8_t Y = vreinterpretq_s16_u16(vmovl_u8(raw.val[2])); /* UINT8 -> INT16 */  | 
61  |  |       const int16x8_t T = vsubq_s16(Y, Cg);  | 
62  |  |       const int16x8_t R = vaddq_s16(T, Co);  | 
63  |  |       const int16x8_t G = vaddq_s16(Y, Cg);  | 
64  |  |       const int16x8_t B = vsubq_s16(T, Co);  | 
65  |  |       uint8x8x4_t bgrx;  | 
66  |  |       bgrx.val[bPos] = vqmovun_s16(B);  | 
67  |  |       bgrx.val[gPos] = vqmovun_s16(G);  | 
68  |  |       bgrx.val[rPos] = vqmovun_s16(R);  | 
69  |  |  | 
70  |  |       if (alpha)  | 
71  |  |         bgrx.val[aPos] = raw.val[3];  | 
72  |  |       else  | 
73  |  |         bgrx.val[aPos] = aVal;  | 
74  |  |  | 
75  |  |       vst4_u8(dptr, bgrx);  | 
76  |  |       sptr += sizeof(raw);  | 
77  |  |       dptr += sizeof(bgrx);  | 
78  |  |     }  | 
79  |  |  | 
80  |  |     for (UINT32 x = 0; x < pad; x++)  | 
81  |  |     { | 
82  |  |       /* Note: shifts must be done before sign-conversion. */  | 
83  |  |       const INT16 Cg = (INT16)((INT8)((*sptr++) << cll));  | 
84  |  |       const INT16 Co = (INT16)((INT8)((*sptr++) << cll));  | 
85  |  |       const INT16 Y = (INT16)(*sptr++); /* UINT8->INT16 */  | 
86  |  |       const INT16 T = Y - Cg;  | 
87  |  |       const INT16 R = T + Co;  | 
88  |  |       const INT16 G = Y + Cg;  | 
89  |  |       const INT16 B = T - Co;  | 
90  |  |       BYTE bgra[4];  | 
91  |  |       bgra[bPos] = CLIP(B);  | 
92  |  |       bgra[gPos] = CLIP(G);  | 
93  |  |       bgra[rPos] = CLIP(R);  | 
94  |  |       bgra[aPos] = *sptr++;  | 
95  |  |  | 
96  |  |       if (!alpha)  | 
97  |  |         bgra[aPos] = 0xFF;  | 
98  |  |  | 
99  |  |       *dptr++ = bgra[0];  | 
100  |  |       *dptr++ = bgra[1];  | 
101  |  |       *dptr++ = bgra[2];  | 
102  |  |       *dptr++ = bgra[3];  | 
103  |  |     }  | 
104  |  |  | 
105  |  |     sptr += srcPad;  | 
106  |  |     dptr += dstPad;  | 
107  |  |   }  | 
108  |  |  | 
109  |  |   return PRIMITIVES_SUCCESS;  | 
110  |  | }  | 
111  |  |  | 
112  |  | static pstatus_t neon_YCoCgToRGB_8u_AC4R(const BYTE* WINPR_RESTRICT pSrc, INT32 srcStep,  | 
113  |  |                                          BYTE* WINPR_RESTRICT pDst, UINT32 DstFormat, INT32 dstStep,  | 
114  |  |                                          UINT32 width, UINT32 height, UINT8 shift, BOOL withAlpha)  | 
115  |  | { | 
116  |  |   switch (DstFormat)  | 
117  |  |   { | 
118  |  |     case PIXEL_FORMAT_BGRA32:  | 
119  |  |       return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height,  | 
120  |  |                                   shift, 2, 1, 0, 3, withAlpha);  | 
121  |  |  | 
122  |  |     case PIXEL_FORMAT_BGRX32:  | 
123  |  |       return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height,  | 
124  |  |                                   shift, 2, 1, 0, 3, withAlpha);  | 
125  |  |  | 
126  |  |     case PIXEL_FORMAT_RGBA32:  | 
127  |  |       return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height,  | 
128  |  |                                   shift, 0, 1, 2, 3, withAlpha);  | 
129  |  |  | 
130  |  |     case PIXEL_FORMAT_RGBX32:  | 
131  |  |       return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height,  | 
132  |  |                                   shift, 0, 1, 2, 3, withAlpha);  | 
133  |  |  | 
134  |  |     case PIXEL_FORMAT_ARGB32:  | 
135  |  |       return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height,  | 
136  |  |                                   shift, 1, 2, 3, 0, withAlpha);  | 
137  |  |  | 
138  |  |     case PIXEL_FORMAT_XRGB32:  | 
139  |  |       return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height,  | 
140  |  |                                   shift, 1, 2, 3, 0, withAlpha);  | 
141  |  |  | 
142  |  |     case PIXEL_FORMAT_ABGR32:  | 
143  |  |       return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height,  | 
144  |  |                                   shift, 3, 2, 1, 0, withAlpha);  | 
145  |  |  | 
146  |  |     case PIXEL_FORMAT_XBGR32:  | 
147  |  |       return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height,  | 
148  |  |                                   shift, 3, 2, 1, 0, withAlpha);  | 
149  |  |  | 
150  |  |     default:  | 
151  |  |       return generic->YCoCgToRGB_8u_AC4R(pSrc, srcStep, pDst, DstFormat, dstStep, width,  | 
152  |  |                                          height, shift, withAlpha);  | 
153  |  |   }  | 
154  |  | }  | 
155  |  | #endif  | 
156  |  |  | 
157  |  | /* ------------------------------------------------------------------------- */  | 
158  |  | void primitives_init_YCoCg_neon(primitives_t* WINPR_RESTRICT prims)  | 
159  | 0  | { | 
160  |  | #if defined(NEON_ENABLED)  | 
161  |  |   generic = primitives_get_generic();  | 
162  |  |   primitives_init_YCoCg(prims);  | 
163  |  |  | 
164  |  |   if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))  | 
165  |  |   { | 
166  |  |     WLog_VRB(PRIM_TAG, "NEON optimizations");  | 
167  |  |     prims->YCoCgToRGB_8u_AC4R = neon_YCoCgToRGB_8u_AC4R;  | 
168  |  |   }  | 
169  |  | #else  | 
170  | 0  |   WLog_VRB(PRIM_TAG, "undefined WITH_NEON");  | 
171  | 0  |   WINPR_UNUSED(prims);  | 
172  | 0  | #endif  | 
173  | 0  | }  |