/src/FreeRDP/libfreerdp/primitives/prim_copy.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* FreeRDP: A Remote Desktop Protocol Client |
2 | | * Copy operations. |
3 | | * vi:ts=4 sw=4: |
4 | | * |
5 | | * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. |
6 | | * Licensed under the Apache License, Version 2.0 (the "License"); you may |
7 | | * not use this file except in compliance with the License. You may obtain |
8 | | * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. |
9 | | * Unless required by applicable law or agreed to in writing, software |
10 | | * distributed under the License is distributed on an "AS IS" BASIS, |
11 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
12 | | * or implied. See the License for the specific language governing |
13 | | * permissions and limitations under the License. |
14 | | */ |
15 | | |
16 | | #include <freerdp/config.h> |
17 | | |
18 | | #include <string.h> |
19 | | #include <freerdp/types.h> |
20 | | #include <freerdp/primitives.h> |
21 | | #ifdef WITH_IPP |
22 | | #include <ipps.h> |
23 | | #include <ippi.h> |
24 | | #endif /* WITH_IPP */ |
25 | | #include "prim_internal.h" |
26 | | |
27 | | static primitives_t* generic = NULL; |
28 | | |
29 | | /* ------------------------------------------------------------------------- */ |
30 | | /*static inline BOOL memory_regions_overlap_1d(*/ |
31 | | static BOOL memory_regions_overlap_1d(const BYTE* p1, const BYTE* p2, size_t bytes) |
32 | 0 | { |
33 | 0 | const ULONG_PTR p1m = (const ULONG_PTR)p1; |
34 | 0 | const ULONG_PTR p2m = (const ULONG_PTR)p2; |
35 | |
|
36 | 0 | if (p1m <= p2m) |
37 | 0 | { |
38 | 0 | if (p1m + bytes > p2m) |
39 | 0 | return TRUE; |
40 | 0 | } |
41 | 0 | else |
42 | 0 | { |
43 | 0 | if (p2m + bytes > p1m) |
44 | 0 | return TRUE; |
45 | 0 | } |
46 | | |
47 | | /* else */ |
48 | 0 | return FALSE; |
49 | 0 | } |
50 | | |
51 | | /* ------------------------------------------------------------------------- */ |
52 | | /*static inline BOOL memory_regions_overlap_2d( */ |
53 | | static BOOL memory_regions_overlap_2d(const BYTE* p1, int p1Step, int p1Size, const BYTE* p2, |
54 | | int p2Step, int p2Size, int width, int height) |
55 | 0 | { |
56 | 0 | ULONG_PTR p1m = (ULONG_PTR)p1; |
57 | 0 | ULONG_PTR p2m = (ULONG_PTR)p2; |
58 | |
|
59 | 0 | if (p1m <= p2m) |
60 | 0 | { |
61 | 0 | ULONG_PTR p1mEnd = p1m + 1ull * (height - 1) * p1Step + 1ull * width * p1Size; |
62 | |
|
63 | 0 | if (p1mEnd > p2m) |
64 | 0 | return TRUE; |
65 | 0 | } |
66 | 0 | else |
67 | 0 | { |
68 | 0 | ULONG_PTR p2mEnd = p2m + 1ull * (height - 1) * p2Step + 1ull * width * p2Size; |
69 | |
|
70 | 0 | if (p2mEnd > p1m) |
71 | 0 | return TRUE; |
72 | 0 | } |
73 | | |
74 | | /* else */ |
75 | 0 | return FALSE; |
76 | 0 | } |
77 | | |
78 | | /* ------------------------------------------------------------------------- */ |
79 | | static pstatus_t general_copy_8u(const BYTE* pSrc, BYTE* pDst, INT32 len) |
80 | 0 | { |
81 | 0 | if (memory_regions_overlap_1d(pSrc, pDst, (size_t)len)) |
82 | 0 | { |
83 | 0 | memmove((void*)pDst, (const void*)pSrc, (size_t)len); |
84 | 0 | } |
85 | 0 | else |
86 | 0 | { |
87 | 0 | memcpy((void*)pDst, (const void*)pSrc, (size_t)len); |
88 | 0 | } |
89 | |
|
90 | 0 | return PRIMITIVES_SUCCESS; |
91 | 0 | } |
92 | | |
93 | | /* ------------------------------------------------------------------------- */ |
94 | | /* Copy a block of pixels from one buffer to another. |
95 | | * The addresses are assumed to have been already offset to the upper-left |
96 | | * corners of the source and destination region of interest. |
97 | | */ |
98 | | static pstatus_t general_copy_8u_AC4r(const BYTE* pSrc, INT32 srcStep, BYTE* pDst, INT32 dstStep, |
99 | | INT32 width, INT32 height) |
100 | 0 | { |
101 | 0 | const BYTE* src = (const BYTE*)pSrc; |
102 | 0 | BYTE* dst = (BYTE*)pDst; |
103 | 0 | int rowbytes = width * sizeof(UINT32); |
104 | |
|
105 | 0 | if ((width == 0) || (height == 0)) |
106 | 0 | return PRIMITIVES_SUCCESS; |
107 | | |
108 | 0 | if (memory_regions_overlap_2d(pSrc, srcStep, sizeof(UINT32), pDst, dstStep, sizeof(UINT32), |
109 | 0 | width, height)) |
110 | 0 | { |
111 | 0 | do |
112 | 0 | { |
113 | 0 | generic->copy(src, dst, rowbytes); |
114 | 0 | src += srcStep; |
115 | 0 | dst += dstStep; |
116 | 0 | } while (--height); |
117 | 0 | } |
118 | 0 | else |
119 | 0 | { |
120 | | /* TODO: do it in one operation when the rowdata is adjacent. */ |
121 | 0 | do |
122 | 0 | { |
123 | | /* If we find a replacement for memcpy that is consistently |
124 | | * faster, this could be replaced with that. |
125 | | */ |
126 | 0 | memcpy(dst, src, rowbytes); |
127 | 0 | src += srcStep; |
128 | 0 | dst += dstStep; |
129 | 0 | } while (--height); |
130 | 0 | } |
131 | |
|
132 | 0 | return PRIMITIVES_SUCCESS; |
133 | 0 | } |
134 | | |
135 | | #ifdef WITH_IPP |
136 | | /* ------------------------------------------------------------------------- */ |
137 | | /* This is just ippiCopy_8u_AC4R without the IppiSize structure parameter. */ |
138 | | static pstatus_t ippiCopy_8u_AC4r(const BYTE* pSrc, INT32 srcStep, BYTE* pDst, INT32 dstStep, |
139 | | INT32 width, INT32 height) |
140 | | { |
141 | | IppiSize roi; |
142 | | roi.width = width; |
143 | | roi.height = height; |
144 | | return (pstatus_t)ippiCopy_8u_AC4R(pSrc, srcStep, pDst, dstStep, roi); |
145 | | } |
146 | | #endif /* WITH_IPP */ |
147 | | |
148 | | /* ------------------------------------------------------------------------- */ |
149 | | void primitives_init_copy(primitives_t* prims) |
150 | 0 | { |
151 | | /* Start with the default. */ |
152 | 0 | prims->copy_8u = general_copy_8u; |
153 | 0 | prims->copy_8u_AC4r = general_copy_8u_AC4r; |
154 | | /* This is just an alias with void* parameters */ |
155 | 0 | prims->copy = (__copy_t)(prims->copy_8u); |
156 | 0 | } |
157 | | |
158 | | #if defined(WITH_SSE2) || defined(WITH_NEON) |
159 | | void primitives_init_copy_opt(primitives_t* prims) |
160 | 0 | { |
161 | 0 | generic = primitives_get_generic(); |
162 | 0 | primitives_init_copy(prims); |
163 | | /* Pick tuned versions if possible. */ |
164 | | #ifdef WITH_IPP |
165 | | prims->copy_8u = (__copy_8u_t)ippsCopy_8u; |
166 | | prims->copy_8u_AC4r = (__copy_8u_AC4r_t)ippiCopy_8u_AC4r; |
167 | | #endif |
168 | | /* Performance with an SSE2 version with no prefetch seemed to be |
169 | | * all over the map vs. memcpy. |
170 | | * Sometimes it was significantly faster, sometimes dreadfully slower, |
171 | | * and it seemed to vary a lot depending on block size and processor. |
172 | | * Hence, no SSE version is used here unless once can be written that |
173 | | * is consistently faster than memcpy. |
174 | | */ |
175 | | /* This is just an alias with void* parameters */ |
176 | 0 | prims->copy = (__copy_t)(prims->copy_8u); |
177 | 0 | } |
178 | | #endif |