/src/FreeRDP/libfreerdp/primitives/neon/prim_YCoCg_neon.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* FreeRDP: A Remote Desktop Protocol Client |
2 | | * Optimized YCoCg<->RGB conversion operations. |
3 | | * vi:ts=4 sw=4: |
4 | | * |
5 | | * (c) Copyright 2014 Hewlett-Packard Development Company, L.P. |
6 | | * |
7 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | * you may not use this file except in compliance with the License. |
9 | | * You may obtain a copy of the License at |
10 | | * |
11 | | * http://www.apache.org/licenses/LICENSE-2.0 |
12 | | * |
13 | | * Unless required by applicable law or agreed to in writing, software |
14 | | * distributed under the License is distributed on an "AS IS" BASIS, |
15 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
16 | | * See the License for the specific language governing permissions and |
17 | | * limitations under the License. |
18 | | */ |
19 | | |
20 | | #include <freerdp/config.h> |
21 | | |
22 | | #include <freerdp/types.h> |
23 | | #include <freerdp/primitives.h> |
24 | | #include <winpr/sysinfo.h> |
25 | | |
26 | | #include "prim_internal.h" |
27 | | #include "prim_templates.h" |
28 | | #include "prim_YCoCg.h" |
29 | | |
30 | | #if defined(NEON_ENABLED) |
31 | | #include <arm_neon.h> |
32 | | |
33 | | static primitives_t* generic = NULL; |
34 | | |
35 | | static pstatus_t neon_YCoCgToRGB_8u_X(const BYTE* WINPR_RESTRICT pSrc, INT32 srcStep, |
36 | | BYTE* WINPR_RESTRICT pDst, UINT32 DstFormat, INT32 dstStep, |
37 | | UINT32 width, UINT32 height, UINT8 shift, BYTE bPos, |
38 | | BYTE gPos, BYTE rPos, BYTE aPos, BOOL alpha) |
39 | | { |
40 | | BYTE* dptr = pDst; |
41 | | const BYTE* sptr = pSrc; |
42 | | const DWORD formatSize = FreeRDPGetBytesPerPixel(DstFormat); |
43 | | const int8_t cll = shift - 1; /* -1 builds in the /2's */ |
44 | | const UINT32 srcPad = srcStep - (width * 4); |
45 | | const UINT32 dstPad = dstStep - (width * formatSize); |
46 | | const UINT32 pad = width % 8; |
47 | | const uint8x8_t aVal = vdup_n_u8(0xFF); |
48 | | const int8x8_t cllv = vdup_n_s8(cll); |
49 | | |
50 | | for (UINT32 y = 0; y < height; y++) |
51 | | { |
52 | | for (UINT32 x = 0; x < width - pad; x += 8) |
53 | | { |
54 | | /* Note: shifts must be done before sign-conversion. */ |
55 | | const uint8x8x4_t raw = vld4_u8(sptr); |
56 | | const int8x8_t CgRaw = vreinterpret_s8_u8(vshl_u8(raw.val[0], cllv)); |
57 | | const int8x8_t CoRaw = vreinterpret_s8_u8(vshl_u8(raw.val[1], cllv)); |
58 | | const int16x8_t Cg = vmovl_s8(CgRaw); |
59 | | const int16x8_t Co = vmovl_s8(CoRaw); |
60 | | const int16x8_t Y = vreinterpretq_s16_u16(vmovl_u8(raw.val[2])); /* UINT8 -> INT16 */ |
61 | | const int16x8_t T = vsubq_s16(Y, Cg); |
62 | | const int16x8_t R = vaddq_s16(T, Co); |
63 | | const int16x8_t G = vaddq_s16(Y, Cg); |
64 | | const int16x8_t B = vsubq_s16(T, Co); |
65 | | uint8x8x4_t bgrx; |
66 | | bgrx.val[bPos] = vqmovun_s16(B); |
67 | | bgrx.val[gPos] = vqmovun_s16(G); |
68 | | bgrx.val[rPos] = vqmovun_s16(R); |
69 | | |
70 | | if (alpha) |
71 | | bgrx.val[aPos] = raw.val[3]; |
72 | | else |
73 | | bgrx.val[aPos] = aVal; |
74 | | |
75 | | vst4_u8(dptr, bgrx); |
76 | | sptr += sizeof(raw); |
77 | | dptr += sizeof(bgrx); |
78 | | } |
79 | | |
80 | | for (UINT32 x = 0; x < pad; x++) |
81 | | { |
82 | | /* Note: shifts must be done before sign-conversion. */ |
83 | | const INT16 Cg = (INT16)((INT8)((*sptr++) << cll)); |
84 | | const INT16 Co = (INT16)((INT8)((*sptr++) << cll)); |
85 | | const INT16 Y = (INT16)(*sptr++); /* UINT8->INT16 */ |
86 | | const INT16 T = Y - Cg; |
87 | | const INT16 R = T + Co; |
88 | | const INT16 G = Y + Cg; |
89 | | const INT16 B = T - Co; |
90 | | BYTE bgra[4]; |
91 | | bgra[bPos] = CLIP(B); |
92 | | bgra[gPos] = CLIP(G); |
93 | | bgra[rPos] = CLIP(R); |
94 | | bgra[aPos] = *sptr++; |
95 | | |
96 | | if (!alpha) |
97 | | bgra[aPos] = 0xFF; |
98 | | |
99 | | *dptr++ = bgra[0]; |
100 | | *dptr++ = bgra[1]; |
101 | | *dptr++ = bgra[2]; |
102 | | *dptr++ = bgra[3]; |
103 | | } |
104 | | |
105 | | sptr += srcPad; |
106 | | dptr += dstPad; |
107 | | } |
108 | | |
109 | | return PRIMITIVES_SUCCESS; |
110 | | } |
111 | | |
112 | | static pstatus_t neon_YCoCgToRGB_8u_AC4R(const BYTE* WINPR_RESTRICT pSrc, INT32 srcStep, |
113 | | BYTE* WINPR_RESTRICT pDst, UINT32 DstFormat, INT32 dstStep, |
114 | | UINT32 width, UINT32 height, UINT8 shift, BOOL withAlpha) |
115 | | { |
116 | | switch (DstFormat) |
117 | | { |
118 | | case PIXEL_FORMAT_BGRA32: |
119 | | return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height, |
120 | | shift, 2, 1, 0, 3, withAlpha); |
121 | | |
122 | | case PIXEL_FORMAT_BGRX32: |
123 | | return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height, |
124 | | shift, 2, 1, 0, 3, withAlpha); |
125 | | |
126 | | case PIXEL_FORMAT_RGBA32: |
127 | | return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height, |
128 | | shift, 0, 1, 2, 3, withAlpha); |
129 | | |
130 | | case PIXEL_FORMAT_RGBX32: |
131 | | return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height, |
132 | | shift, 0, 1, 2, 3, withAlpha); |
133 | | |
134 | | case PIXEL_FORMAT_ARGB32: |
135 | | return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height, |
136 | | shift, 1, 2, 3, 0, withAlpha); |
137 | | |
138 | | case PIXEL_FORMAT_XRGB32: |
139 | | return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height, |
140 | | shift, 1, 2, 3, 0, withAlpha); |
141 | | |
142 | | case PIXEL_FORMAT_ABGR32: |
143 | | return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height, |
144 | | shift, 3, 2, 1, 0, withAlpha); |
145 | | |
146 | | case PIXEL_FORMAT_XBGR32: |
147 | | return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height, |
148 | | shift, 3, 2, 1, 0, withAlpha); |
149 | | |
150 | | default: |
151 | | return generic->YCoCgToRGB_8u_AC4R(pSrc, srcStep, pDst, DstFormat, dstStep, width, |
152 | | height, shift, withAlpha); |
153 | | } |
154 | | } |
155 | | #endif |
156 | | |
157 | | /* ------------------------------------------------------------------------- */ |
158 | | void primitives_init_YCoCg_neon(primitives_t* WINPR_RESTRICT prims) |
159 | 0 | { |
160 | | #if defined(NEON_ENABLED) |
161 | | generic = primitives_get_generic(); |
162 | | primitives_init_YCoCg(prims); |
163 | | |
164 | | if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) |
165 | | { |
166 | | WLog_VRB(PRIM_TAG, "NEON optimizations"); |
167 | | prims->YCoCgToRGB_8u_AC4R = neon_YCoCgToRGB_8u_AC4R; |
168 | | } |
169 | | #else |
170 | 0 | WLog_VRB(PRIM_TAG, "undefined WITH_NEON"); |
171 | 0 | WINPR_UNUSED(prims); |
172 | 0 | #endif |
173 | 0 | } |