/src/FreeRDP/libfreerdp/primitives/primitives.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* primitives.c |
2 | | * This code queries processor features and calls the init/deinit routines. |
3 | | * vi:ts=4 sw=4 |
4 | | * |
5 | | * Copyright 2011 Martin Fleisz <martin.fleisz@thincast.com> |
6 | | * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. |
7 | | * Copyright 2019 David Fort <contact@hardening-consulting.com> |
8 | | * |
9 | | * Licensed under the Apache License, Version 2.0 (the "License"); you may |
10 | | * not use this file except in compliance with the License. You may obtain |
11 | | * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. |
12 | | * Unless required by applicable law or agreed to in writing, software |
13 | | * distributed under the License is distributed on an "AS IS" BASIS, |
14 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
15 | | * or implied. See the License for the specific language governing |
16 | | * permissions and limitations under the License. |
17 | | */ |
18 | | |
19 | | #include <freerdp/config.h> |
20 | | |
21 | | #include <string.h> |
22 | | #include <stdlib.h> |
23 | | |
24 | | #include <winpr/synch.h> |
25 | | #include <winpr/sysinfo.h> |
26 | | #include <winpr/crypto.h> |
27 | | #include <freerdp/primitives.h> |
28 | | |
29 | | #include "prim_internal.h" |
30 | | |
31 | | #include <freerdp/log.h> |
32 | | #define TAG FREERDP_TAG("primitives") |
33 | | |
34 | | /* hints to know which kind of primitives to use */ |
35 | | static primitive_hints primitivesHints = PRIMITIVES_AUTODETECT; |
36 | | static BOOL primitives_init_optimized(primitives_t* prims); |
37 | | |
38 | | void primitives_set_hints(primitive_hints hints) |
39 | 0 | { |
40 | 0 | primitivesHints = hints; |
41 | 0 | } |
42 | | |
43 | | primitive_hints primitives_get_hints(void) |
44 | 0 | { |
45 | 0 | return primitivesHints; |
46 | 0 | } |
47 | | |
48 | | /* Singleton pointer used throughout the program when requested. */ |
49 | | static primitives_t pPrimitivesGeneric = { 0 }; |
50 | | static INIT_ONCE generic_primitives_InitOnce = INIT_ONCE_STATIC_INIT; |
51 | | |
52 | | #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES) |
53 | | static primitives_t pPrimitivesCpu = { 0 }; |
54 | | static INIT_ONCE cpu_primitives_InitOnce = INIT_ONCE_STATIC_INIT; |
55 | | |
56 | | #endif |
57 | | #if defined(WITH_OPENCL) |
58 | | static primitives_t pPrimitivesGpu = { 0 }; |
59 | | static INIT_ONCE gpu_primitives_InitOnce = INIT_ONCE_STATIC_INIT; |
60 | | |
61 | | #endif |
62 | | |
63 | | static INIT_ONCE auto_primitives_InitOnce = INIT_ONCE_STATIC_INIT; |
64 | | |
65 | | static primitives_t pPrimitives = { 0 }; |
66 | | |
67 | | /* ------------------------------------------------------------------------- */ |
68 | | static BOOL primitives_init_generic(primitives_t* prims) |
69 | 1 | { |
70 | 1 | primitives_init_add(prims); |
71 | 1 | primitives_init_andor(prims); |
72 | 1 | primitives_init_alphaComp(prims); |
73 | 1 | primitives_init_copy(prims); |
74 | 1 | primitives_init_set(prims); |
75 | 1 | primitives_init_shift(prims); |
76 | 1 | primitives_init_sign(prims); |
77 | 1 | primitives_init_colors(prims); |
78 | 1 | primitives_init_YCoCg(prims); |
79 | 1 | primitives_init_YUV(prims); |
80 | 1 | prims->uninit = NULL; |
81 | 1 | return TRUE; |
82 | 1 | } |
83 | | |
84 | | static BOOL CALLBACK primitives_init_generic_cb(PINIT_ONCE once, PVOID param, PVOID* context) |
85 | 1 | { |
86 | 1 | WINPR_UNUSED(once); |
87 | 1 | WINPR_UNUSED(param); |
88 | 1 | WINPR_UNUSED(context); |
89 | 1 | return primitives_init_generic(&pPrimitivesGeneric); |
90 | 1 | } |
91 | | |
92 | | static BOOL primitives_init_optimized(primitives_t* prims) |
93 | 0 | { |
94 | 0 | primitives_init_generic(prims); |
95 | 0 |
|
96 | 0 | #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES) |
97 | 0 | primitives_init_add_opt(prims); |
98 | 0 | primitives_init_andor_opt(prims); |
99 | 0 | primitives_init_alphaComp_opt(prims); |
100 | 0 | primitives_init_copy_opt(prims); |
101 | 0 | primitives_init_set_opt(prims); |
102 | 0 | primitives_init_shift_opt(prims); |
103 | 0 | primitives_init_sign_opt(prims); |
104 | 0 | primitives_init_colors_opt(prims); |
105 | 0 | primitives_init_YCoCg_opt(prims); |
106 | 0 | primitives_init_YUV_opt(prims); |
107 | 0 | prims->flags |= PRIM_FLAGS_HAVE_EXTCPU; |
108 | 0 | #endif |
109 | 0 | return TRUE; |
110 | 0 | } |
111 | | |
112 | | typedef struct |
113 | | { |
114 | | BYTE* channels[3]; |
115 | | UINT32 steps[3]; |
116 | | prim_size_t roi; |
117 | | BYTE* outputBuffer; |
118 | | UINT32 outputStride; |
119 | | UINT32 testedFormat; |
120 | | } primitives_YUV_benchmark; |
121 | | |
122 | | static void primitives_YUV_benchmark_free(primitives_YUV_benchmark* bench) |
123 | 0 | { |
124 | 0 | if (!bench) |
125 | 0 | return; |
126 | 0 |
|
127 | 0 | free(bench->outputBuffer); |
128 | 0 |
|
129 | 0 | for (int i = 0; i < 3; i++) |
130 | 0 | free(bench->channels[i]); |
131 | 0 | memset(bench, 0, sizeof(primitives_YUV_benchmark)); |
132 | 0 | } |
133 | | |
134 | | static primitives_YUV_benchmark* primitives_YUV_benchmark_init(primitives_YUV_benchmark* ret) |
135 | 0 | { |
136 | 0 | prim_size_t* roi = NULL; |
137 | 0 | if (!ret) |
138 | 0 | return NULL; |
139 | 0 |
|
140 | 0 | memset(ret, 0, sizeof(primitives_YUV_benchmark)); |
141 | 0 | roi = &ret->roi; |
142 | 0 | roi->width = 1024; |
143 | 0 | roi->height = 768; |
144 | 0 | ret->outputStride = roi->width * 4; |
145 | 0 | ret->testedFormat = PIXEL_FORMAT_BGRA32; |
146 | 0 |
|
147 | 0 | ret->outputBuffer = calloc(ret->outputStride, roi->height); |
148 | 0 | if (!ret->outputBuffer) |
149 | 0 | goto fail; |
150 | 0 |
|
151 | 0 | for (int i = 0; i < 3; i++) |
152 | 0 | { |
153 | 0 | BYTE* buf = ret->channels[i] = calloc(roi->width, roi->height); |
154 | 0 | if (!buf) |
155 | 0 | goto fail; |
156 | 0 |
|
157 | 0 | winpr_RAND(buf, 1ull * roi->width * roi->height); |
158 | 0 | ret->steps[i] = roi->width; |
159 | 0 | } |
160 | 0 |
|
161 | 0 | return ret; |
162 | 0 |
|
163 | 0 | fail: |
164 | 0 | primitives_YUV_benchmark_free(ret); |
165 | 0 | return ret; |
166 | 0 | } |
167 | | |
168 | | static BOOL primitives_YUV_benchmark_run(primitives_YUV_benchmark* bench, primitives_t* prims, |
169 | | UINT64 runTime, UINT32* computations) |
170 | 0 | { |
171 | 0 | ULONGLONG dueDate = 0; |
172 | 0 | const BYTE* channels[3] = { 0 }; |
173 | 0 | pstatus_t status = 0; |
174 | 0 |
|
175 | 0 | *computations = 0; |
176 | 0 |
|
177 | 0 | for (size_t i = 0; i < 3; i++) |
178 | 0 | channels[i] = bench->channels[i]; |
179 | 0 |
|
180 | 0 | /* do a first dry run to initialize cache and such */ |
181 | 0 | status = prims->YUV420ToRGB_8u_P3AC4R(channels, bench->steps, bench->outputBuffer, |
182 | 0 | bench->outputStride, bench->testedFormat, &bench->roi); |
183 | 0 | if (status != PRIMITIVES_SUCCESS) |
184 | 0 | return FALSE; |
185 | 0 |
|
186 | 0 | /* let's run the benchmark */ |
187 | 0 | dueDate = GetTickCount64() + runTime; |
188 | 0 | while (GetTickCount64() < dueDate) |
189 | 0 | { |
190 | 0 | pstatus_t cstatus = |
191 | 0 | prims->YUV420ToRGB_8u_P3AC4R(channels, bench->steps, bench->outputBuffer, |
192 | 0 | bench->outputStride, bench->testedFormat, &bench->roi); |
193 | 0 | if (cstatus != PRIMITIVES_SUCCESS) |
194 | 0 | return FALSE; |
195 | 0 | *computations = *computations + 1; |
196 | 0 | } |
197 | 0 | return TRUE; |
198 | 0 | } |
199 | | |
200 | | static BOOL primitives_autodetect_best(primitives_t* prims) |
201 | 1 | { |
202 | 1 | BOOL ret = FALSE; |
203 | 1 | struct prim_benchmark |
204 | 1 | { |
205 | 1 | const char* name; |
206 | 1 | primitives_t* prims; |
207 | 1 | UINT32 flags; |
208 | 1 | UINT32 count; |
209 | 1 | }; |
210 | | |
211 | 1 | struct prim_benchmark testcases[] = |
212 | 1 | { |
213 | 1 | { "generic", NULL, PRIMITIVES_PURE_SOFT, 0 }, |
214 | | #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES) |
215 | | { "optimized", NULL, PRIMITIVES_ONLY_CPU, 0 }, |
216 | | #endif |
217 | | #if defined(WITH_OPENCL) |
218 | | { "opencl", NULL, PRIMITIVES_ONLY_GPU, 0 }, |
219 | | #endif |
220 | 1 | }; |
221 | 1 | const struct prim_benchmark* best = NULL; |
222 | | |
223 | 1 | #if !defined(HAVE_CPU_OPTIMIZED_PRIMITIVES) && !defined(WITH_OPENCL) |
224 | 1 | { |
225 | 1 | struct prim_benchmark* cur = &testcases[0]; |
226 | 1 | cur->prims = primitives_get_by_type(cur->flags); |
227 | 1 | if (!cur->prims) |
228 | 0 | { |
229 | 0 | WLog_WARN(TAG, "Failed to initialize %s primitives", cur->name); |
230 | 0 | return FALSE; |
231 | 0 | } |
232 | 1 | WLog_DBG(TAG, "primitives benchmark: only one backend, skipping..."); |
233 | 1 | best = cur; |
234 | 1 | } |
235 | | #else |
236 | | { |
237 | | UINT64 benchDuration = 150; /* 150 ms */ |
238 | | primitives_YUV_benchmark bench = { 0 }; |
239 | | primitives_YUV_benchmark* yuvBench = primitives_YUV_benchmark_init(&bench); |
240 | | if (!yuvBench) |
241 | | return FALSE; |
242 | | |
243 | | WLog_DBG(TAG, "primitives benchmark result:"); |
244 | | for (size_t x = 0; x < ARRAYSIZE(testcases); x++) |
245 | | { |
246 | | struct prim_benchmark* cur = &testcases[x]; |
247 | | cur->prims = primitives_get_by_type(cur->flags); |
248 | | if (!cur->prims) |
249 | | { |
250 | | WLog_WARN(TAG, "Failed to initialize %s primitives", cur->name); |
251 | | continue; |
252 | | } |
253 | | if (!primitives_YUV_benchmark_run(yuvBench, cur->prims, benchDuration, &cur->count)) |
254 | | { |
255 | | WLog_WARN(TAG, "error running %s YUV bench", cur->name); |
256 | | continue; |
257 | | } |
258 | | |
259 | | WLog_DBG(TAG, " * %s= %" PRIu32, cur->name, cur->count); |
260 | | if (!best || (best->count < cur->count)) |
261 | | best = cur; |
262 | | } |
263 | | primitives_YUV_benchmark_free(yuvBench); |
264 | | } |
265 | | #endif |
266 | | |
267 | 1 | if (!best) |
268 | 0 | { |
269 | 0 | WLog_ERR(TAG, "No primitives to test, aborting."); |
270 | 0 | goto out; |
271 | 0 | } |
272 | | /* finally compute the results */ |
273 | 1 | *prims = *best->prims; |
274 | | |
275 | 1 | WLog_DBG(TAG, "primitives autodetect, using %s", best->name); |
276 | 1 | ret = TRUE; |
277 | 1 | out: |
278 | 1 | if (!ret) |
279 | 0 | *prims = pPrimitivesGeneric; |
280 | | |
281 | 1 | return ret; |
282 | 1 | } |
283 | | |
284 | | #if defined(WITH_OPENCL) |
285 | | static BOOL CALLBACK primitives_init_gpu_cb(PINIT_ONCE once, PVOID param, PVOID* context) |
286 | | { |
287 | | WINPR_UNUSED(once); |
288 | | WINPR_UNUSED(param); |
289 | | WINPR_UNUSED(context); |
290 | | |
291 | | if (!primitives_init_opencl(&pPrimitivesGpu)) |
292 | | return FALSE; |
293 | | |
294 | | return TRUE; |
295 | | } |
296 | | #endif |
297 | | |
298 | | #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES) |
299 | | static BOOL CALLBACK primitives_init_cpu_cb(PINIT_ONCE once, PVOID param, PVOID* context) |
300 | | { |
301 | | WINPR_UNUSED(once); |
302 | | WINPR_UNUSED(param); |
303 | | WINPR_UNUSED(context); |
304 | | |
305 | | if (!primitives_init_optimized(&pPrimitivesCpu)) |
306 | | return FALSE; |
307 | | |
308 | | return TRUE; |
309 | | } |
310 | | #endif |
311 | | |
312 | | static BOOL CALLBACK primitives_auto_init_cb(PINIT_ONCE once, PVOID param, PVOID* context) |
313 | 1 | { |
314 | 1 | WINPR_UNUSED(once); |
315 | 1 | WINPR_UNUSED(param); |
316 | 1 | WINPR_UNUSED(context); |
317 | | |
318 | 1 | return primitives_init(&pPrimitives, primitivesHints); |
319 | 1 | } |
320 | | |
321 | | BOOL primitives_init(primitives_t* p, primitive_hints hints) |
322 | 1 | { |
323 | 1 | switch (hints) |
324 | 1 | { |
325 | 1 | case PRIMITIVES_AUTODETECT: |
326 | 1 | return primitives_autodetect_best(p); |
327 | 0 | case PRIMITIVES_PURE_SOFT: |
328 | 0 | *p = pPrimitivesGeneric; |
329 | 0 | return TRUE; |
330 | 0 | case PRIMITIVES_ONLY_CPU: |
331 | | #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES) |
332 | | *p = pPrimitivesCpu; |
333 | | return TRUE; |
334 | | #endif |
335 | 0 | case PRIMITIVES_ONLY_GPU: |
336 | | #if defined(WITH_OPENCL) |
337 | | *p = pPrimitivesGpu; |
338 | | return TRUE; |
339 | | #endif |
340 | 0 | default: |
341 | 0 | WLog_ERR(TAG, "unknown hint %d", hints); |
342 | 0 | return FALSE; |
343 | 1 | } |
344 | 1 | } |
345 | | |
346 | | void primitives_uninit(void) |
347 | 0 | { |
348 | | #if defined(WITH_OPENCL) |
349 | | if (pPrimitivesGpu.uninit) |
350 | | pPrimitivesGpu.uninit(); |
351 | | #endif |
352 | | #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES) |
353 | | if (pPrimitivesCpu.uninit) |
354 | | pPrimitivesCpu.uninit(); |
355 | | #endif |
356 | 0 | if (pPrimitivesGeneric.uninit) |
357 | 0 | pPrimitivesGeneric.uninit(); |
358 | 0 | } |
359 | | |
360 | | /* ------------------------------------------------------------------------- */ |
361 | | static void setup(void) |
362 | 22.8k | { |
363 | 22.8k | InitOnceExecuteOnce(&generic_primitives_InitOnce, primitives_init_generic_cb, NULL, NULL); |
364 | | #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES) |
365 | | InitOnceExecuteOnce(&cpu_primitives_InitOnce, primitives_init_cpu_cb, NULL, NULL); |
366 | | #endif |
367 | | #if defined(WITH_OPENCL) |
368 | | InitOnceExecuteOnce(&gpu_primitives_InitOnce, primitives_init_gpu_cb, NULL, NULL); |
369 | | #endif |
370 | 22.8k | InitOnceExecuteOnce(&auto_primitives_InitOnce, primitives_auto_init_cb, NULL, NULL); |
371 | 22.8k | } |
372 | | |
373 | | primitives_t* primitives_get(void) |
374 | 22.8k | { |
375 | 22.8k | setup(); |
376 | 22.8k | return &pPrimitives; |
377 | 22.8k | } |
378 | | |
379 | | primitives_t* primitives_get_generic(void) |
380 | 0 | { |
381 | 0 | InitOnceExecuteOnce(&generic_primitives_InitOnce, primitives_init_generic_cb, NULL, NULL); |
382 | 0 | return &pPrimitivesGeneric; |
383 | 0 | } |
384 | | |
385 | | primitives_t* primitives_get_by_type(DWORD type) |
386 | 1 | { |
387 | 1 | InitOnceExecuteOnce(&generic_primitives_InitOnce, primitives_init_generic_cb, NULL, NULL); |
388 | | |
389 | 1 | switch (type) |
390 | 1 | { |
391 | 0 | case PRIMITIVES_ONLY_GPU: |
392 | | #if defined(WITH_OPENCL) |
393 | | if (!InitOnceExecuteOnce(&gpu_primitives_InitOnce, primitives_init_gpu_cb, NULL, NULL)) |
394 | | return NULL; |
395 | | return &pPrimitivesGpu; |
396 | | #endif |
397 | 0 | case PRIMITIVES_ONLY_CPU: |
398 | | #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES) |
399 | | if (!InitOnceExecuteOnce(&cpu_primitives_InitOnce, primitives_init_cpu_cb, NULL, NULL)) |
400 | | return NULL; |
401 | | return &pPrimitivesCpu; |
402 | | #endif |
403 | 1 | case PRIMITIVES_PURE_SOFT: |
404 | 1 | default: |
405 | 1 | return &pPrimitivesGeneric; |
406 | 1 | } |
407 | 1 | } |
408 | | |
409 | | DWORD primitives_flags(primitives_t* p) |
410 | 0 | { |
411 | 0 | return p->flags; |
412 | 0 | } |