/src/FreeRDP/libfreerdp/primitives/neon/prim_YCoCg_neon.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* FreeRDP: A Remote Desktop Protocol Client |
2 | | * Optimized YCoCg<->RGB conversion operations. |
3 | | * vi:ts=4 sw=4: |
4 | | * |
5 | | * (c) Copyright 2014 Hewlett-Packard Development Company, L.P. |
6 | | * |
7 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | * you may not use this file except in compliance with the License. |
9 | | * You may obtain a copy of the License at |
10 | | * |
11 | | * http://www.apache.org/licenses/LICENSE-2.0 |
12 | | * |
13 | | * Unless required by applicable law or agreed to in writing, software |
14 | | * distributed under the License is distributed on an "AS IS" BASIS, |
15 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
16 | | * See the License for the specific language governing permissions and |
17 | | * limitations under the License. |
18 | | */ |
19 | | |
20 | | #include <freerdp/config.h> |
21 | | |
22 | | #include <freerdp/types.h> |
23 | | #include <freerdp/primitives.h> |
24 | | #include <winpr/sysinfo.h> |
25 | | |
26 | | #include "prim_internal.h" |
27 | | #include "prim_YCoCg.h" |
28 | | |
29 | | #if defined(NEON_INTRINSICS_ENABLED) |
30 | | #include <arm_neon.h> |
31 | | |
32 | | static primitives_t* generic = NULL; |
33 | | |
34 | | static pstatus_t neon_YCoCgToRGB_8u_X(const BYTE* WINPR_RESTRICT pSrc, INT32 srcStep, |
35 | | BYTE* WINPR_RESTRICT pDst, UINT32 DstFormat, INT32 dstStep, |
36 | | UINT32 width, UINT32 height, UINT8 shift, BYTE bPos, |
37 | | BYTE gPos, BYTE rPos, BYTE aPos, BOOL alpha) |
38 | | { |
39 | | BYTE* dptr = pDst; |
40 | | const BYTE* sptr = pSrc; |
41 | | const DWORD formatSize = FreeRDPGetBytesPerPixel(DstFormat); |
42 | | const int8_t cll = shift - 1; /* -1 builds in the /2's */ |
43 | | const UINT32 srcPad = srcStep - (width * 4); |
44 | | const UINT32 dstPad = dstStep - (width * formatSize); |
45 | | const UINT32 pad = width % 8; |
46 | | const uint8x8_t aVal = vdup_n_u8(0xFF); |
47 | | const int8x8_t cllv = vdup_n_s8(cll); |
48 | | |
49 | | for (UINT32 y = 0; y < height; y++) |
50 | | { |
51 | | for (UINT32 x = 0; x < width - pad; x += 8) |
52 | | { |
53 | | /* Note: shifts must be done before sign-conversion. */ |
54 | | const uint8x8x4_t raw = vld4_u8(sptr); |
55 | | const int8x8_t CgRaw = vreinterpret_s8_u8(vshl_u8(raw.val[0], cllv)); |
56 | | const int8x8_t CoRaw = vreinterpret_s8_u8(vshl_u8(raw.val[1], cllv)); |
57 | | const int16x8_t Cg = vmovl_s8(CgRaw); |
58 | | const int16x8_t Co = vmovl_s8(CoRaw); |
59 | | const int16x8_t Y = vreinterpretq_s16_u16(vmovl_u8(raw.val[2])); /* UINT8 -> INT16 */ |
60 | | const int16x8_t T = vsubq_s16(Y, Cg); |
61 | | const int16x8_t R = vaddq_s16(T, Co); |
62 | | const int16x8_t G = vaddq_s16(Y, Cg); |
63 | | const int16x8_t B = vsubq_s16(T, Co); |
64 | | uint8x8x4_t bgrx; |
65 | | bgrx.val[bPos] = vqmovun_s16(B); |
66 | | bgrx.val[gPos] = vqmovun_s16(G); |
67 | | bgrx.val[rPos] = vqmovun_s16(R); |
68 | | |
69 | | if (alpha) |
70 | | bgrx.val[aPos] = raw.val[3]; |
71 | | else |
72 | | bgrx.val[aPos] = aVal; |
73 | | |
74 | | vst4_u8(dptr, bgrx); |
75 | | sptr += sizeof(raw); |
76 | | dptr += sizeof(bgrx); |
77 | | } |
78 | | |
79 | | for (UINT32 x = 0; x < pad; x++) |
80 | | { |
81 | | /* Note: shifts must be done before sign-conversion. */ |
82 | | const INT16 Cg = (INT16)((INT8)((*sptr++) << cll)); |
83 | | const INT16 Co = (INT16)((INT8)((*sptr++) << cll)); |
84 | | const INT16 Y = (INT16)(*sptr++); /* UINT8->INT16 */ |
85 | | const INT16 T = Y - Cg; |
86 | | const INT16 R = T + Co; |
87 | | const INT16 G = Y + Cg; |
88 | | const INT16 B = T - Co; |
89 | | BYTE bgra[4]; |
90 | | bgra[bPos] = CLIP(B); |
91 | | bgra[gPos] = CLIP(G); |
92 | | bgra[rPos] = CLIP(R); |
93 | | bgra[aPos] = *sptr++; |
94 | | |
95 | | if (!alpha) |
96 | | bgra[aPos] = 0xFF; |
97 | | |
98 | | *dptr++ = bgra[0]; |
99 | | *dptr++ = bgra[1]; |
100 | | *dptr++ = bgra[2]; |
101 | | *dptr++ = bgra[3]; |
102 | | } |
103 | | |
104 | | sptr += srcPad; |
105 | | dptr += dstPad; |
106 | | } |
107 | | |
108 | | return PRIMITIVES_SUCCESS; |
109 | | } |
110 | | |
111 | | static pstatus_t neon_YCoCgToRGB_8u_AC4R(const BYTE* WINPR_RESTRICT pSrc, INT32 srcStep, |
112 | | BYTE* WINPR_RESTRICT pDst, UINT32 DstFormat, INT32 dstStep, |
113 | | UINT32 width, UINT32 height, UINT8 shift, BOOL withAlpha) |
114 | | { |
115 | | switch (DstFormat) |
116 | | { |
117 | | case PIXEL_FORMAT_BGRA32: |
118 | | return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height, |
119 | | shift, 2, 1, 0, 3, withAlpha); |
120 | | |
121 | | case PIXEL_FORMAT_BGRX32: |
122 | | return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height, |
123 | | shift, 2, 1, 0, 3, withAlpha); |
124 | | |
125 | | case PIXEL_FORMAT_RGBA32: |
126 | | return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height, |
127 | | shift, 0, 1, 2, 3, withAlpha); |
128 | | |
129 | | case PIXEL_FORMAT_RGBX32: |
130 | | return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height, |
131 | | shift, 0, 1, 2, 3, withAlpha); |
132 | | |
133 | | case PIXEL_FORMAT_ARGB32: |
134 | | return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height, |
135 | | shift, 1, 2, 3, 0, withAlpha); |
136 | | |
137 | | case PIXEL_FORMAT_XRGB32: |
138 | | return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height, |
139 | | shift, 1, 2, 3, 0, withAlpha); |
140 | | |
141 | | case PIXEL_FORMAT_ABGR32: |
142 | | return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height, |
143 | | shift, 3, 2, 1, 0, withAlpha); |
144 | | |
145 | | case PIXEL_FORMAT_XBGR32: |
146 | | return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height, |
147 | | shift, 3, 2, 1, 0, withAlpha); |
148 | | |
149 | | default: |
150 | | return generic->YCoCgToRGB_8u_AC4R(pSrc, srcStep, pDst, DstFormat, dstStep, width, |
151 | | height, shift, withAlpha); |
152 | | } |
153 | | } |
154 | | #endif |
155 | | |
156 | | /* ------------------------------------------------------------------------- */ |
157 | | void primitives_init_YCoCg_neon_int(primitives_t* WINPR_RESTRICT prims) |
158 | 0 | { |
159 | | #if defined(NEON_INTRINSICS_ENABLED) |
160 | | generic = primitives_get_generic(); |
161 | | |
162 | | WLog_VRB(PRIM_TAG, "NEON optimizations"); |
163 | | prims->YCoCgToRGB_8u_AC4R = neon_YCoCgToRGB_8u_AC4R; |
164 | | #else |
165 | 0 | WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or neon intrinsics not available"); |
166 | 0 | WINPR_UNUSED(prims); |
167 | 0 | #endif |
168 | 0 | } |