/src/FreeRDP/libfreerdp/codec/yuv.c
Line | Count | Source |
1 | | #include <winpr/sysinfo.h> |
2 | | #include <winpr/assert.h> |
3 | | #include <winpr/cast.h> |
4 | | #include <winpr/pool.h> |
5 | | |
6 | | #include <freerdp/settings.h> |
7 | | #include <freerdp/codec/region.h> |
8 | | #include <freerdp/primitives.h> |
9 | | #include <freerdp/log.h> |
10 | | #include <freerdp/codec/yuv.h> |
11 | | |
12 | | #define TAG FREERDP_TAG("codec") |
13 | | |
14 | 0 | #define TILE_SIZE 64 |
15 | | |
16 | | typedef struct |
17 | | { |
18 | | YUV_CONTEXT* context; |
19 | | const BYTE* pYUVData[3]; |
20 | | UINT32 iStride[3]; |
21 | | DWORD DstFormat; |
22 | | BYTE* dest; |
23 | | UINT32 nDstStep; |
24 | | RECTANGLE_16 rect; |
25 | | } YUV_PROCESS_WORK_PARAM; |
26 | | |
27 | | typedef struct |
28 | | { |
29 | | YUV_CONTEXT* context; |
30 | | const BYTE* pYUVData[3]; |
31 | | UINT32 iStride[3]; |
32 | | BYTE* pYUVDstData[3]; |
33 | | UINT32 iDstStride[3]; |
34 | | RECTANGLE_16 rect; |
35 | | BYTE type; |
36 | | } YUV_COMBINE_WORK_PARAM; |
37 | | |
38 | | typedef struct |
39 | | { |
40 | | YUV_CONTEXT* context; |
41 | | const BYTE* pSrcData; |
42 | | |
43 | | DWORD SrcFormat; |
44 | | UINT32 nSrcStep; |
45 | | RECTANGLE_16 rect; |
46 | | BYTE version; |
47 | | |
48 | | BYTE* pYUVLumaData[3]; |
49 | | BYTE* pYUVChromaData[3]; |
50 | | UINT32 iStride[3]; |
51 | | } YUV_ENCODE_WORK_PARAM; |
52 | | |
53 | | struct S_YUV_CONTEXT |
54 | | { |
55 | | UINT32 width, height; |
56 | | BOOL useThreads; |
57 | | BOOL encoder; |
58 | | UINT32 heightStep; |
59 | | |
60 | | UINT32 work_object_count; |
61 | | PTP_WORK* work_objects; |
62 | | YUV_ENCODE_WORK_PARAM* work_enc_params; |
63 | | YUV_PROCESS_WORK_PARAM* work_dec_params; |
64 | | YUV_COMBINE_WORK_PARAM* work_combined_params; |
65 | | }; |
66 | | |
67 | | static inline BOOL avc420_yuv_to_rgb(const BYTE* WINPR_RESTRICT pYUVData[3], |
68 | | const UINT32 iStride[3], |
69 | | const RECTANGLE_16* WINPR_RESTRICT rect, UINT32 nDstStep, |
70 | | BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat) |
71 | 0 | { |
72 | 0 | primitives_t* prims = primitives_get(); |
73 | 0 | prim_size_t roi; |
74 | 0 | const BYTE* pYUVPoint[3]; |
75 | |
|
76 | 0 | WINPR_ASSERT(pYUVData); |
77 | 0 | WINPR_ASSERT(iStride); |
78 | 0 | WINPR_ASSERT(rect); |
79 | 0 | WINPR_ASSERT(pDstData); |
80 | | |
81 | 0 | const INT32 width = rect->right - rect->left; |
82 | 0 | const INT32 height = rect->bottom - rect->top; |
83 | 0 | BYTE* pDstPoint = pDstData + 1ULL * rect->top * nDstStep + |
84 | 0 | 1ULL * rect->left * FreeRDPGetBytesPerPixel(DstFormat); |
85 | |
|
86 | 0 | pYUVPoint[0] = pYUVData[0] + 1ULL * rect->top * iStride[0] + rect->left; |
87 | 0 | pYUVPoint[1] = pYUVData[1] + 1ULL * rect->top / 2 * iStride[1] + rect->left / 2; |
88 | 0 | pYUVPoint[2] = pYUVData[2] + 1ULL * rect->top / 2 * iStride[2] + rect->left / 2; |
89 | |
|
90 | 0 | roi.width = WINPR_ASSERTING_INT_CAST(uint32_t, width); |
91 | 0 | roi.height = WINPR_ASSERTING_INT_CAST(uint32_t, height); |
92 | |
|
93 | 0 | if (prims->YUV420ToRGB_8u_P3AC4R(pYUVPoint, iStride, pDstPoint, nDstStep, DstFormat, &roi) != |
94 | 0 | PRIMITIVES_SUCCESS) |
95 | 0 | return FALSE; |
96 | | |
97 | 0 | return TRUE; |
98 | 0 | } |
99 | | |
100 | | static inline BOOL avc444_yuv_to_rgb(const BYTE* WINPR_RESTRICT pYUVData[3], |
101 | | const UINT32 iStride[3], |
102 | | const RECTANGLE_16* WINPR_RESTRICT rect, UINT32 nDstStep, |
103 | | BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat) |
104 | 0 | { |
105 | 0 | primitives_t* prims = primitives_get(); |
106 | 0 | prim_size_t roi; |
107 | 0 | const BYTE* pYUVPoint[3]; |
108 | |
|
109 | 0 | WINPR_ASSERT(pYUVData); |
110 | 0 | WINPR_ASSERT(iStride); |
111 | 0 | WINPR_ASSERT(rect); |
112 | 0 | WINPR_ASSERT(pDstData); |
113 | | |
114 | 0 | const INT32 width = rect->right - rect->left; |
115 | 0 | const INT32 height = rect->bottom - rect->top; |
116 | 0 | BYTE* pDstPoint = pDstData + 1ULL * rect->top * nDstStep + |
117 | 0 | 1ULL * rect->left * FreeRDPGetBytesPerPixel(DstFormat); |
118 | |
|
119 | 0 | pYUVPoint[0] = pYUVData[0] + 1ULL * rect->top * iStride[0] + rect->left; |
120 | 0 | pYUVPoint[1] = pYUVData[1] + 1ULL * rect->top * iStride[1] + rect->left; |
121 | 0 | pYUVPoint[2] = pYUVData[2] + 1ULL * rect->top * iStride[2] + rect->left; |
122 | |
|
123 | 0 | roi.width = WINPR_ASSERTING_INT_CAST(uint32_t, width); |
124 | 0 | roi.height = WINPR_ASSERTING_INT_CAST(uint32_t, height); |
125 | |
|
126 | 0 | if (prims->YUV444ToRGB_8u_P3AC4R(pYUVPoint, iStride, pDstPoint, nDstStep, DstFormat, &roi) != |
127 | 0 | PRIMITIVES_SUCCESS) |
128 | 0 | return FALSE; |
129 | | |
130 | 0 | return TRUE; |
131 | 0 | } |
132 | | |
133 | | static void CALLBACK yuv420_process_work_callback(PTP_CALLBACK_INSTANCE instance, void* context, |
134 | | PTP_WORK work) |
135 | 0 | { |
136 | 0 | YUV_PROCESS_WORK_PARAM* param = (YUV_PROCESS_WORK_PARAM*)context; |
137 | 0 | WINPR_UNUSED(instance); |
138 | 0 | WINPR_UNUSED(work); |
139 | 0 | WINPR_ASSERT(param); |
140 | | |
141 | 0 | if (!avc420_yuv_to_rgb(param->pYUVData, param->iStride, ¶m->rect, param->nDstStep, |
142 | 0 | param->dest, param->DstFormat)) |
143 | 0 | WLog_WARN(TAG, "avc420_yuv_to_rgb failed"); |
144 | 0 | } |
145 | | |
146 | | static void CALLBACK yuv444_process_work_callback(PTP_CALLBACK_INSTANCE instance, void* context, |
147 | | PTP_WORK work) |
148 | 0 | { |
149 | 0 | YUV_PROCESS_WORK_PARAM* param = (YUV_PROCESS_WORK_PARAM*)context; |
150 | 0 | WINPR_UNUSED(instance); |
151 | 0 | WINPR_UNUSED(work); |
152 | 0 | WINPR_ASSERT(param); |
153 | | |
154 | 0 | if (!avc444_yuv_to_rgb(param->pYUVData, param->iStride, ¶m->rect, param->nDstStep, |
155 | 0 | param->dest, param->DstFormat)) |
156 | 0 | WLog_WARN(TAG, "avc444_yuv_to_rgb failed"); |
157 | 0 | } |
158 | | |
159 | | BOOL yuv_context_reset(YUV_CONTEXT* WINPR_RESTRICT context, UINT32 width, UINT32 height) |
160 | 0 | { |
161 | 0 | BOOL rc = FALSE; |
162 | 0 | WINPR_ASSERT(context); |
163 | | |
164 | 0 | context->width = width; |
165 | 0 | context->height = height; |
166 | |
|
167 | 0 | context->heightStep = height; |
168 | |
|
169 | 0 | if (context->useThreads) |
170 | 0 | { |
171 | 0 | context->heightStep = 16; |
172 | | /* Preallocate workers for 16x16 tiles. |
173 | | * this is overallocation for most cases. |
174 | | * |
175 | | * ~2MB total for a 4k resolution, so negligible. |
176 | | */ |
177 | 0 | const size_t pw = (width + TILE_SIZE - width % TILE_SIZE) / 16; |
178 | 0 | const size_t ph = (height + TILE_SIZE - height % TILE_SIZE) / 16; |
179 | |
|
180 | 0 | const size_t count = pw * ph; |
181 | |
|
182 | 0 | context->work_object_count = 0; |
183 | 0 | if (context->encoder) |
184 | 0 | { |
185 | 0 | void* tmp = winpr_aligned_recalloc(context->work_enc_params, count, |
186 | 0 | sizeof(YUV_ENCODE_WORK_PARAM), 32); |
187 | 0 | if (!tmp) |
188 | 0 | goto fail; |
189 | 0 | memset(tmp, 0, count * sizeof(YUV_ENCODE_WORK_PARAM)); |
190 | |
|
191 | 0 | context->work_enc_params = tmp; |
192 | 0 | } |
193 | 0 | else |
194 | 0 | { |
195 | 0 | void* tmp = winpr_aligned_recalloc(context->work_dec_params, count, |
196 | 0 | sizeof(YUV_PROCESS_WORK_PARAM), 32); |
197 | 0 | if (!tmp) |
198 | 0 | goto fail; |
199 | 0 | memset(tmp, 0, count * sizeof(YUV_PROCESS_WORK_PARAM)); |
200 | |
|
201 | 0 | context->work_dec_params = tmp; |
202 | |
|
203 | 0 | void* ctmp = winpr_aligned_recalloc(context->work_combined_params, count, |
204 | 0 | sizeof(YUV_COMBINE_WORK_PARAM), 32); |
205 | 0 | if (!ctmp) |
206 | 0 | goto fail; |
207 | 0 | memset(ctmp, 0, count * sizeof(YUV_COMBINE_WORK_PARAM)); |
208 | |
|
209 | 0 | context->work_combined_params = ctmp; |
210 | 0 | } |
211 | | |
212 | 0 | void* wtmp = |
213 | 0 | winpr_aligned_recalloc((void*)context->work_objects, count, sizeof(PTP_WORK), 32); |
214 | 0 | if (!wtmp) |
215 | 0 | goto fail; |
216 | 0 | memset(wtmp, 0, count * sizeof(PTP_WORK)); |
217 | |
|
218 | 0 | context->work_objects = (PTP_WORK*)wtmp; |
219 | 0 | context->work_object_count = WINPR_ASSERTING_INT_CAST(uint32_t, count); |
220 | 0 | } |
221 | 0 | rc = TRUE; |
222 | 0 | fail: |
223 | 0 | return rc; |
224 | 0 | } |
225 | | |
226 | | YUV_CONTEXT* yuv_context_new(BOOL encoder, UINT32 ThreadingFlags) |
227 | 0 | { |
228 | 0 | SYSTEM_INFO sysInfos; |
229 | 0 | YUV_CONTEXT* ret = winpr_aligned_calloc(1, sizeof(*ret), 32); |
230 | 0 | if (!ret) |
231 | 0 | return NULL; |
232 | | |
233 | | /** do it here to avoid a race condition between threads */ |
234 | 0 | primitives_get(); |
235 | |
|
236 | 0 | ret->encoder = encoder; |
237 | 0 | if (!(ThreadingFlags & THREADING_FLAGS_DISABLE_THREADS)) |
238 | 0 | { |
239 | 0 | GetNativeSystemInfo(&sysInfos); |
240 | 0 | ret->useThreads = (sysInfos.dwNumberOfProcessors > 1); |
241 | 0 | } |
242 | |
|
243 | 0 | return ret; |
244 | 0 | } |
245 | | |
246 | | void yuv_context_free(YUV_CONTEXT* context) |
247 | 0 | { |
248 | 0 | if (!context) |
249 | 0 | return; |
250 | 0 | if (context->useThreads) |
251 | 0 | { |
252 | 0 | winpr_aligned_free((void*)context->work_objects); |
253 | 0 | winpr_aligned_free(context->work_combined_params); |
254 | 0 | winpr_aligned_free(context->work_enc_params); |
255 | 0 | winpr_aligned_free(context->work_dec_params); |
256 | 0 | } |
257 | 0 | winpr_aligned_free(context); |
258 | 0 | } |
259 | | |
260 | | static inline YUV_PROCESS_WORK_PARAM pool_decode_param(const RECTANGLE_16* WINPR_RESTRICT rect, |
261 | | YUV_CONTEXT* WINPR_RESTRICT context, |
262 | | const BYTE* WINPR_RESTRICT pYUVData[3], |
263 | | const UINT32 iStride[3], UINT32 DstFormat, |
264 | | BYTE* WINPR_RESTRICT dest, UINT32 nDstStep) |
265 | 0 | { |
266 | 0 | YUV_PROCESS_WORK_PARAM current = { 0 }; |
267 | |
|
268 | 0 | WINPR_ASSERT(rect); |
269 | 0 | WINPR_ASSERT(context); |
270 | 0 | WINPR_ASSERT(pYUVData); |
271 | 0 | WINPR_ASSERT(iStride); |
272 | 0 | WINPR_ASSERT(dest); |
273 | | |
274 | 0 | current.context = context; |
275 | 0 | current.DstFormat = DstFormat; |
276 | 0 | current.pYUVData[0] = pYUVData[0]; |
277 | 0 | current.pYUVData[1] = pYUVData[1]; |
278 | 0 | current.pYUVData[2] = pYUVData[2]; |
279 | 0 | current.iStride[0] = iStride[0]; |
280 | 0 | current.iStride[1] = iStride[1]; |
281 | 0 | current.iStride[2] = iStride[2]; |
282 | 0 | current.nDstStep = nDstStep; |
283 | 0 | current.dest = dest; |
284 | 0 | current.rect = *rect; |
285 | 0 | return current; |
286 | 0 | } |
287 | | |
288 | | static BOOL submit_object(PTP_WORK* WINPR_RESTRICT work_object, PTP_WORK_CALLBACK cb, |
289 | | const void* WINPR_RESTRICT param, YUV_CONTEXT* WINPR_RESTRICT context) |
290 | 0 | { |
291 | 0 | union |
292 | 0 | { |
293 | 0 | const void* cpv; |
294 | 0 | void* pv; |
295 | 0 | } cnv; |
296 | |
|
297 | 0 | cnv.cpv = param; |
298 | |
|
299 | 0 | if (!work_object) |
300 | 0 | return FALSE; |
301 | | |
302 | 0 | *work_object = NULL; |
303 | |
|
304 | 0 | if (!param || !context) |
305 | 0 | return FALSE; |
306 | | |
307 | 0 | *work_object = CreateThreadpoolWork(cb, cnv.pv, NULL); |
308 | 0 | if (!*work_object) |
309 | 0 | return FALSE; |
310 | | |
311 | 0 | SubmitThreadpoolWork(*work_object); |
312 | 0 | return TRUE; |
313 | 0 | } |
314 | | |
315 | | static void free_objects(PTP_WORK* work_objects, UINT32 waitCount) |
316 | 0 | { |
317 | 0 | WINPR_ASSERT(work_objects || (waitCount == 0)); |
318 | | |
319 | 0 | for (UINT32 i = 0; i < waitCount; i++) |
320 | 0 | { |
321 | 0 | PTP_WORK cur = work_objects[i]; |
322 | 0 | work_objects[i] = NULL; |
323 | |
|
324 | 0 | if (!cur) |
325 | 0 | continue; |
326 | | |
327 | 0 | WaitForThreadpoolWorkCallbacks(cur, FALSE); |
328 | 0 | CloseThreadpoolWork(cur); |
329 | 0 | } |
330 | 0 | } |
331 | | |
332 | | static BOOL intersects(UINT32 pos, const RECTANGLE_16* WINPR_RESTRICT regionRects, |
333 | | UINT32 numRegionRects) |
334 | 0 | { |
335 | 0 | WINPR_ASSERT(regionRects || (numRegionRects == 0)); |
336 | | |
337 | 0 | for (UINT32 x = pos + 1; x < numRegionRects; x++) |
338 | 0 | { |
339 | 0 | const RECTANGLE_16* what = ®ionRects[pos]; |
340 | 0 | const RECTANGLE_16* rect = ®ionRects[x]; |
341 | |
|
342 | 0 | if (rectangles_intersects(what, rect)) |
343 | 0 | { |
344 | 0 | WLog_WARN(TAG, "YUV decoder: intersecting rectangles, aborting"); |
345 | 0 | return TRUE; |
346 | 0 | } |
347 | 0 | } |
348 | | |
349 | 0 | return FALSE; |
350 | 0 | } |
351 | | |
352 | | static RECTANGLE_16 clamp(YUV_CONTEXT* WINPR_RESTRICT context, |
353 | | const RECTANGLE_16* WINPR_RESTRICT rect, UINT32 srcHeight) |
354 | 0 | { |
355 | 0 | WINPR_ASSERT(context); |
356 | 0 | WINPR_ASSERT(rect); |
357 | | |
358 | 0 | RECTANGLE_16 c = *rect; |
359 | 0 | const UINT32 height = MIN(context->height, srcHeight); |
360 | 0 | if (c.top > height) |
361 | 0 | c.top = WINPR_ASSERTING_INT_CAST(UINT16, height); |
362 | 0 | if (c.bottom > height) |
363 | 0 | c.bottom = WINPR_ASSERTING_INT_CAST(UINT16, height); |
364 | 0 | return c; |
365 | 0 | } |
366 | | |
367 | | static BOOL pool_decode(YUV_CONTEXT* WINPR_RESTRICT context, PTP_WORK_CALLBACK cb, |
368 | | const BYTE* WINPR_RESTRICT pYUVData[3], const UINT32 iStride[3], |
369 | | UINT32 yuvHeight, UINT32 DstFormat, BYTE* WINPR_RESTRICT dest, |
370 | | UINT32 nDstStep, const RECTANGLE_16* WINPR_RESTRICT regionRects, |
371 | | UINT32 numRegionRects) |
372 | 0 | { |
373 | 0 | BOOL rc = FALSE; |
374 | 0 | UINT32 waitCount = 0; |
375 | 0 | primitives_t* prims = primitives_get(); |
376 | |
|
377 | 0 | WINPR_ASSERT(context); |
378 | 0 | WINPR_ASSERT(cb); |
379 | 0 | WINPR_ASSERT(pYUVData); |
380 | 0 | WINPR_ASSERT(iStride); |
381 | 0 | WINPR_ASSERT(dest); |
382 | 0 | WINPR_ASSERT(regionRects || (numRegionRects == 0)); |
383 | | |
384 | 0 | if (context->encoder) |
385 | 0 | { |
386 | 0 | WLog_ERR(TAG, "YUV context set up for encoding, can not decode with it, aborting"); |
387 | 0 | return FALSE; |
388 | 0 | } |
389 | | |
390 | 0 | if (!context->useThreads || (primitives_flags(prims) & PRIM_FLAGS_HAVE_EXTGPU)) |
391 | 0 | { |
392 | 0 | for (UINT32 y = 0; y < numRegionRects; y++) |
393 | 0 | { |
394 | 0 | const RECTANGLE_16 rect = clamp(context, ®ionRects[y], yuvHeight); |
395 | 0 | YUV_PROCESS_WORK_PARAM current = |
396 | 0 | pool_decode_param(&rect, context, pYUVData, iStride, DstFormat, dest, nDstStep); |
397 | 0 | cb(NULL, ¤t, NULL); |
398 | 0 | } |
399 | 0 | return TRUE; |
400 | 0 | } |
401 | | |
402 | | /* case where we use threads */ |
403 | 0 | for (UINT32 x = 0; x < numRegionRects; x++) |
404 | 0 | { |
405 | 0 | RECTANGLE_16 r = clamp(context, ®ionRects[x], yuvHeight); |
406 | |
|
407 | 0 | if (intersects(x, regionRects, numRegionRects)) |
408 | 0 | continue; |
409 | | |
410 | 0 | while (r.left < r.right) |
411 | 0 | { |
412 | 0 | RECTANGLE_16 y = r; |
413 | 0 | y.right = MIN(r.right, r.left + TILE_SIZE); |
414 | |
|
415 | 0 | while (y.top < y.bottom) |
416 | 0 | { |
417 | 0 | RECTANGLE_16 z = y; |
418 | |
|
419 | 0 | if (context->work_object_count <= waitCount) |
420 | 0 | { |
421 | 0 | free_objects(context->work_objects, context->work_object_count); |
422 | 0 | waitCount = 0; |
423 | 0 | } |
424 | |
|
425 | 0 | YUV_PROCESS_WORK_PARAM* cur = &context->work_dec_params[waitCount]; |
426 | 0 | z.bottom = MIN(z.bottom, z.top + TILE_SIZE); |
427 | 0 | if (rectangle_is_empty(&z)) |
428 | 0 | continue; |
429 | 0 | *cur = pool_decode_param(&z, context, pYUVData, iStride, DstFormat, dest, nDstStep); |
430 | 0 | if (!submit_object(&context->work_objects[waitCount], cb, cur, context)) |
431 | 0 | goto fail; |
432 | 0 | waitCount++; |
433 | 0 | y.top += TILE_SIZE; |
434 | 0 | } |
435 | | |
436 | 0 | r.left += TILE_SIZE; |
437 | 0 | } |
438 | 0 | } |
439 | 0 | rc = TRUE; |
440 | 0 | fail: |
441 | 0 | free_objects(context->work_objects, context->work_object_count); |
442 | 0 | return rc; |
443 | 0 | } |
444 | | |
445 | | static inline BOOL check_rect(const YUV_CONTEXT* WINPR_RESTRICT yuv, |
446 | | const RECTANGLE_16* WINPR_RESTRICT rect, UINT32 nDstWidth, |
447 | | UINT32 nDstHeight) |
448 | 0 | { |
449 | 0 | WINPR_ASSERT(yuv); |
450 | 0 | WINPR_ASSERT(rect); |
451 | | |
452 | | /* Check, if the output rectangle is valid in decoded h264 frame. */ |
453 | 0 | if ((rect->right > yuv->width) || (rect->left > yuv->width)) |
454 | 0 | return FALSE; |
455 | | |
456 | 0 | if ((rect->top > yuv->height) || (rect->bottom > yuv->height)) |
457 | 0 | return FALSE; |
458 | | |
459 | | /* Check, if the output rectangle is valid in destination buffer. */ |
460 | 0 | if ((rect->right > nDstWidth) || (rect->left > nDstWidth)) |
461 | 0 | return FALSE; |
462 | | |
463 | 0 | if ((rect->bottom > nDstHeight) || (rect->top > nDstHeight)) |
464 | 0 | return FALSE; |
465 | | |
466 | 0 | return TRUE; |
467 | 0 | } |
468 | | |
469 | | static void CALLBACK yuv444_combine_work_callback(PTP_CALLBACK_INSTANCE instance, void* context, |
470 | | PTP_WORK work) |
471 | 0 | { |
472 | 0 | YUV_COMBINE_WORK_PARAM* param = (YUV_COMBINE_WORK_PARAM*)context; |
473 | 0 | primitives_t* prims = primitives_get(); |
474 | |
|
475 | 0 | WINPR_ASSERT(param); |
476 | 0 | YUV_CONTEXT* yuv = param->context; |
477 | 0 | WINPR_ASSERT(yuv); |
478 | | |
479 | 0 | const RECTANGLE_16* rect = ¶m->rect; |
480 | 0 | WINPR_ASSERT(rect); |
481 | | |
482 | 0 | const UINT32 alignedWidth = yuv->width + ((yuv->width % 16 != 0) ? 16 - yuv->width % 16 : 0); |
483 | 0 | const UINT32 alignedHeight = |
484 | 0 | yuv->height + ((yuv->height % 16 != 0) ? 16 - yuv->height % 16 : 0); |
485 | |
|
486 | 0 | WINPR_UNUSED(instance); |
487 | 0 | WINPR_UNUSED(work); |
488 | |
|
489 | 0 | if (!check_rect(param->context, rect, yuv->width, yuv->height)) |
490 | 0 | return; |
491 | | |
492 | 0 | if (prims->YUV420CombineToYUV444(param->type, param->pYUVData, param->iStride, alignedWidth, |
493 | 0 | alignedHeight, param->pYUVDstData, param->iDstStride, |
494 | 0 | rect) != PRIMITIVES_SUCCESS) |
495 | 0 | WLog_WARN(TAG, "YUV420CombineToYUV444 failed"); |
496 | 0 | } |
497 | | |
498 | | static inline YUV_COMBINE_WORK_PARAM |
499 | | pool_decode_rect_param(const RECTANGLE_16* WINPR_RESTRICT rect, YUV_CONTEXT* WINPR_RESTRICT context, |
500 | | BYTE type, const BYTE* WINPR_RESTRICT pYUVData[3], const UINT32 iStride[3], |
501 | | BYTE* WINPR_RESTRICT pYUVDstData[3], const UINT32 iDstStride[3]) |
502 | 0 | { |
503 | 0 | YUV_COMBINE_WORK_PARAM current = { 0 }; |
504 | |
|
505 | 0 | WINPR_ASSERT(rect); |
506 | 0 | WINPR_ASSERT(context); |
507 | 0 | WINPR_ASSERT(pYUVData); |
508 | 0 | WINPR_ASSERT(iStride); |
509 | 0 | WINPR_ASSERT(pYUVDstData); |
510 | 0 | WINPR_ASSERT(iDstStride); |
511 | | |
512 | 0 | current.context = context; |
513 | 0 | current.pYUVData[0] = pYUVData[0]; |
514 | 0 | current.pYUVData[1] = pYUVData[1]; |
515 | 0 | current.pYUVData[2] = pYUVData[2]; |
516 | 0 | current.pYUVDstData[0] = pYUVDstData[0]; |
517 | 0 | current.pYUVDstData[1] = pYUVDstData[1]; |
518 | 0 | current.pYUVDstData[2] = pYUVDstData[2]; |
519 | 0 | current.iStride[0] = iStride[0]; |
520 | 0 | current.iStride[1] = iStride[1]; |
521 | 0 | current.iStride[2] = iStride[2]; |
522 | 0 | current.iDstStride[0] = iDstStride[0]; |
523 | 0 | current.iDstStride[1] = iDstStride[1]; |
524 | 0 | current.iDstStride[2] = iDstStride[2]; |
525 | 0 | current.type = type; |
526 | 0 | current.rect = *rect; |
527 | 0 | return current; |
528 | 0 | } |
529 | | |
530 | | static BOOL pool_decode_rect(YUV_CONTEXT* WINPR_RESTRICT context, BYTE type, |
531 | | const BYTE* WINPR_RESTRICT pYUVData[3], const UINT32 iStride[3], |
532 | | BYTE* WINPR_RESTRICT pYUVDstData[3], const UINT32 iDstStride[3], |
533 | | const RECTANGLE_16* WINPR_RESTRICT regionRects, UINT32 numRegionRects) |
534 | 0 | { |
535 | 0 | BOOL rc = FALSE; |
536 | 0 | UINT32 waitCount = 0; |
537 | 0 | PTP_WORK_CALLBACK cb = yuv444_combine_work_callback; |
538 | 0 | primitives_t* prims = primitives_get(); |
539 | |
|
540 | 0 | WINPR_ASSERT(context); |
541 | 0 | WINPR_ASSERT(pYUVData); |
542 | 0 | WINPR_ASSERT(iStride); |
543 | 0 | WINPR_ASSERT(pYUVDstData); |
544 | 0 | WINPR_ASSERT(iDstStride); |
545 | 0 | WINPR_ASSERT(regionRects || (numRegionRects == 0)); |
546 | | |
547 | 0 | if (!context->useThreads || (primitives_flags(prims) & PRIM_FLAGS_HAVE_EXTGPU)) |
548 | 0 | { |
549 | 0 | for (UINT32 y = 0; y < numRegionRects; y++) |
550 | 0 | { |
551 | 0 | YUV_COMBINE_WORK_PARAM current = pool_decode_rect_param( |
552 | 0 | ®ionRects[y], context, type, pYUVData, iStride, pYUVDstData, iDstStride); |
553 | 0 | cb(NULL, ¤t, NULL); |
554 | 0 | } |
555 | 0 | return TRUE; |
556 | 0 | } |
557 | | |
558 | | /* case where we use threads */ |
559 | 0 | for (waitCount = 0; waitCount < numRegionRects; waitCount++) |
560 | 0 | { |
561 | 0 | YUV_COMBINE_WORK_PARAM* current = NULL; |
562 | |
|
563 | 0 | if (context->work_object_count <= waitCount) |
564 | 0 | { |
565 | 0 | free_objects(context->work_objects, context->work_object_count); |
566 | 0 | waitCount = 0; |
567 | 0 | } |
568 | 0 | current = &context->work_combined_params[waitCount]; |
569 | 0 | *current = pool_decode_rect_param(®ionRects[waitCount], context, type, pYUVData, iStride, |
570 | 0 | pYUVDstData, iDstStride); |
571 | |
|
572 | 0 | if (!submit_object(&context->work_objects[waitCount], cb, current, context)) |
573 | 0 | goto fail; |
574 | 0 | } |
575 | | |
576 | 0 | rc = TRUE; |
577 | 0 | fail: |
578 | 0 | free_objects(context->work_objects, context->work_object_count); |
579 | 0 | return rc; |
580 | 0 | } |
581 | | |
582 | | BOOL yuv444_context_decode(YUV_CONTEXT* WINPR_RESTRICT context, BYTE type, |
583 | | const BYTE* WINPR_RESTRICT pYUVData[3], const UINT32 iStride[3], |
584 | | UINT32 srcYuvHeight, BYTE* WINPR_RESTRICT pYUVDstData[3], |
585 | | const UINT32 iDstStride[3], DWORD DstFormat, BYTE* WINPR_RESTRICT dest, |
586 | | UINT32 nDstStep, const RECTANGLE_16* WINPR_RESTRICT regionRects, |
587 | | UINT32 numRegionRects) |
588 | 0 | { |
589 | 0 | const BYTE* pYUVCDstData[3]; |
590 | |
|
591 | 0 | WINPR_ASSERT(context); |
592 | 0 | WINPR_ASSERT(pYUVData); |
593 | 0 | WINPR_ASSERT(iStride); |
594 | 0 | WINPR_ASSERT(pYUVDstData); |
595 | 0 | WINPR_ASSERT(iDstStride); |
596 | 0 | WINPR_ASSERT(dest); |
597 | 0 | WINPR_ASSERT(regionRects || (numRegionRects == 0)); |
598 | | |
599 | 0 | if (context->encoder) |
600 | 0 | { |
601 | 0 | WLog_ERR(TAG, "YUV context set up for encoding, can not decode with it, aborting"); |
602 | 0 | return FALSE; |
603 | 0 | } |
604 | 0 | if (!pool_decode_rect(context, type, pYUVData, iStride, pYUVDstData, iDstStride, regionRects, |
605 | 0 | numRegionRects)) |
606 | 0 | return FALSE; |
607 | | |
608 | 0 | pYUVCDstData[0] = pYUVDstData[0]; |
609 | 0 | pYUVCDstData[1] = pYUVDstData[1]; |
610 | 0 | pYUVCDstData[2] = pYUVDstData[2]; |
611 | 0 | return pool_decode(context, yuv444_process_work_callback, pYUVCDstData, iDstStride, |
612 | 0 | srcYuvHeight, DstFormat, dest, nDstStep, regionRects, numRegionRects); |
613 | 0 | } |
614 | | |
615 | | BOOL yuv420_context_decode(YUV_CONTEXT* WINPR_RESTRICT context, |
616 | | const BYTE* WINPR_RESTRICT pYUVData[3], const UINT32 iStride[3], |
617 | | UINT32 yuvHeight, DWORD DstFormat, BYTE* WINPR_RESTRICT dest, |
618 | | UINT32 nDstStep, const RECTANGLE_16* WINPR_RESTRICT regionRects, |
619 | | UINT32 numRegionRects) |
620 | 0 | { |
621 | 0 | return pool_decode(context, yuv420_process_work_callback, pYUVData, iStride, yuvHeight, |
622 | 0 | DstFormat, dest, nDstStep, regionRects, numRegionRects); |
623 | 0 | } |
624 | | |
625 | | static void CALLBACK yuv420_encode_work_callback(PTP_CALLBACK_INSTANCE instance, void* context, |
626 | | PTP_WORK work) |
627 | 0 | { |
628 | 0 | prim_size_t roi; |
629 | 0 | YUV_ENCODE_WORK_PARAM* param = (YUV_ENCODE_WORK_PARAM*)context; |
630 | 0 | primitives_t* prims = primitives_get(); |
631 | 0 | BYTE* pYUVData[3]; |
632 | 0 | const BYTE* src = NULL; |
633 | |
|
634 | 0 | WINPR_UNUSED(instance); |
635 | 0 | WINPR_UNUSED(work); |
636 | 0 | WINPR_ASSERT(param); |
637 | | |
638 | 0 | roi.width = param->rect.right - param->rect.left; |
639 | 0 | roi.height = param->rect.bottom - param->rect.top; |
640 | 0 | src = param->pSrcData + 1ULL * param->nSrcStep * param->rect.top + |
641 | 0 | 1ULL * param->rect.left * FreeRDPGetBytesPerPixel(param->SrcFormat); |
642 | 0 | pYUVData[0] = |
643 | 0 | param->pYUVLumaData[0] + 1ULL * param->rect.top * param->iStride[0] + param->rect.left; |
644 | 0 | pYUVData[1] = param->pYUVLumaData[1] + 1ULL * param->rect.top / 2 * param->iStride[1] + |
645 | 0 | param->rect.left / 2; |
646 | 0 | pYUVData[2] = param->pYUVLumaData[2] + 1ULL * param->rect.top / 2 * param->iStride[2] + |
647 | 0 | param->rect.left / 2; |
648 | |
|
649 | 0 | if (prims->RGBToYUV420_8u_P3AC4R(src, param->SrcFormat, param->nSrcStep, pYUVData, |
650 | 0 | param->iStride, &roi) != PRIMITIVES_SUCCESS) |
651 | 0 | { |
652 | 0 | WLog_ERR(TAG, "error when decoding lines"); |
653 | 0 | } |
654 | 0 | } |
655 | | |
656 | | static void CALLBACK yuv444v1_encode_work_callback(PTP_CALLBACK_INSTANCE instance, void* context, |
657 | | PTP_WORK work) |
658 | 0 | { |
659 | 0 | prim_size_t roi; |
660 | 0 | YUV_ENCODE_WORK_PARAM* param = (YUV_ENCODE_WORK_PARAM*)context; |
661 | 0 | primitives_t* prims = primitives_get(); |
662 | 0 | BYTE* pYUVLumaData[3]; |
663 | 0 | BYTE* pYUVChromaData[3]; |
664 | 0 | const BYTE* src = NULL; |
665 | |
|
666 | 0 | WINPR_UNUSED(instance); |
667 | 0 | WINPR_UNUSED(work); |
668 | 0 | WINPR_ASSERT(param); |
669 | | |
670 | 0 | roi.width = param->rect.right - param->rect.left; |
671 | 0 | roi.height = param->rect.bottom - param->rect.top; |
672 | 0 | src = param->pSrcData + 1ULL * param->nSrcStep * param->rect.top + |
673 | 0 | 1ULL * param->rect.left * FreeRDPGetBytesPerPixel(param->SrcFormat); |
674 | 0 | pYUVLumaData[0] = |
675 | 0 | param->pYUVLumaData[0] + 1ULL * param->rect.top * param->iStride[0] + param->rect.left; |
676 | 0 | pYUVLumaData[1] = param->pYUVLumaData[1] + 1ULL * param->rect.top / 2 * param->iStride[1] + |
677 | 0 | param->rect.left / 2; |
678 | 0 | pYUVLumaData[2] = param->pYUVLumaData[2] + 1ULL * param->rect.top / 2 * param->iStride[2] + |
679 | 0 | param->rect.left / 2; |
680 | 0 | pYUVChromaData[0] = |
681 | 0 | param->pYUVChromaData[0] + 1ULL * param->rect.top * param->iStride[0] + param->rect.left; |
682 | 0 | pYUVChromaData[1] = param->pYUVChromaData[1] + 1ULL * param->rect.top / 2 * param->iStride[1] + |
683 | 0 | param->rect.left / 2; |
684 | 0 | pYUVChromaData[2] = param->pYUVChromaData[2] + 1ULL * param->rect.top / 2 * param->iStride[2] + |
685 | 0 | param->rect.left / 2; |
686 | 0 | if (prims->RGBToAVC444YUV(src, param->SrcFormat, param->nSrcStep, pYUVLumaData, param->iStride, |
687 | 0 | pYUVChromaData, param->iStride, &roi) != PRIMITIVES_SUCCESS) |
688 | 0 | { |
689 | 0 | WLog_ERR(TAG, "error when decoding lines"); |
690 | 0 | } |
691 | 0 | } |
692 | | |
693 | | static void CALLBACK yuv444v2_encode_work_callback(PTP_CALLBACK_INSTANCE instance, void* context, |
694 | | PTP_WORK work) |
695 | 0 | { |
696 | 0 | prim_size_t roi; |
697 | 0 | YUV_ENCODE_WORK_PARAM* param = (YUV_ENCODE_WORK_PARAM*)context; |
698 | 0 | primitives_t* prims = primitives_get(); |
699 | 0 | BYTE* pYUVLumaData[3]; |
700 | 0 | BYTE* pYUVChromaData[3]; |
701 | 0 | const BYTE* src = NULL; |
702 | |
|
703 | 0 | WINPR_UNUSED(instance); |
704 | 0 | WINPR_UNUSED(work); |
705 | 0 | WINPR_ASSERT(param); |
706 | | |
707 | 0 | roi.width = param->rect.right - param->rect.left; |
708 | 0 | roi.height = param->rect.bottom - param->rect.top; |
709 | 0 | src = param->pSrcData + 1ULL * param->nSrcStep * param->rect.top + |
710 | 0 | 1ULL * param->rect.left * FreeRDPGetBytesPerPixel(param->SrcFormat); |
711 | 0 | pYUVLumaData[0] = |
712 | 0 | param->pYUVLumaData[0] + 1ULL * param->rect.top * param->iStride[0] + param->rect.left; |
713 | 0 | pYUVLumaData[1] = param->pYUVLumaData[1] + 1ULL * param->rect.top / 2 * param->iStride[1] + |
714 | 0 | param->rect.left / 2; |
715 | 0 | pYUVLumaData[2] = param->pYUVLumaData[2] + 1ULL * param->rect.top / 2 * param->iStride[2] + |
716 | 0 | param->rect.left / 2; |
717 | 0 | pYUVChromaData[0] = |
718 | 0 | param->pYUVChromaData[0] + 1ULL * param->rect.top * param->iStride[0] + param->rect.left; |
719 | 0 | pYUVChromaData[1] = param->pYUVChromaData[1] + 1ULL * param->rect.top / 2 * param->iStride[1] + |
720 | 0 | param->rect.left / 2; |
721 | 0 | pYUVChromaData[2] = param->pYUVChromaData[2] + 1ULL * param->rect.top / 2 * param->iStride[2] + |
722 | 0 | param->rect.left / 2; |
723 | 0 | if (prims->RGBToAVC444YUVv2(src, param->SrcFormat, param->nSrcStep, pYUVLumaData, |
724 | 0 | param->iStride, pYUVChromaData, param->iStride, |
725 | 0 | &roi) != PRIMITIVES_SUCCESS) |
726 | 0 | { |
727 | 0 | WLog_ERR(TAG, "error when decoding lines"); |
728 | 0 | } |
729 | 0 | } |
730 | | |
731 | | static inline YUV_ENCODE_WORK_PARAM |
732 | | pool_encode_fill(const RECTANGLE_16* WINPR_RESTRICT rect, YUV_CONTEXT* WINPR_RESTRICT context, |
733 | | const BYTE* WINPR_RESTRICT pSrcData, UINT32 nSrcStep, UINT32 SrcFormat, |
734 | | const UINT32 iStride[], BYTE* WINPR_RESTRICT pYUVLumaData[], |
735 | | BYTE* WINPR_RESTRICT pYUVChromaData[]) |
736 | 0 | { |
737 | 0 | YUV_ENCODE_WORK_PARAM current = { 0 }; |
738 | |
|
739 | 0 | WINPR_ASSERT(rect); |
740 | 0 | WINPR_ASSERT(context); |
741 | 0 | WINPR_ASSERT(pSrcData); |
742 | 0 | WINPR_ASSERT(iStride); |
743 | 0 | WINPR_ASSERT(pYUVLumaData); |
744 | | |
745 | 0 | current.context = context; |
746 | 0 | current.pSrcData = pSrcData; |
747 | 0 | current.SrcFormat = SrcFormat; |
748 | 0 | current.nSrcStep = nSrcStep; |
749 | 0 | current.pYUVLumaData[0] = pYUVLumaData[0]; |
750 | 0 | current.pYUVLumaData[1] = pYUVLumaData[1]; |
751 | 0 | current.pYUVLumaData[2] = pYUVLumaData[2]; |
752 | 0 | if (pYUVChromaData) |
753 | 0 | { |
754 | 0 | current.pYUVChromaData[0] = pYUVChromaData[0]; |
755 | 0 | current.pYUVChromaData[1] = pYUVChromaData[1]; |
756 | 0 | current.pYUVChromaData[2] = pYUVChromaData[2]; |
757 | 0 | } |
758 | 0 | current.iStride[0] = iStride[0]; |
759 | 0 | current.iStride[1] = iStride[1]; |
760 | 0 | current.iStride[2] = iStride[2]; |
761 | |
|
762 | 0 | current.rect = *rect; |
763 | |
|
764 | 0 | return current; |
765 | 0 | } |
766 | | |
767 | | static uint32_t getSteps(uint32_t height, uint32_t step) |
768 | 0 | { |
769 | 0 | const uint32_t steps = (height + step / 2 + 1) / step; |
770 | 0 | if (steps < 1) |
771 | 0 | return 1; |
772 | 0 | return steps; |
773 | 0 | } |
774 | | |
775 | | static BOOL pool_encode(YUV_CONTEXT* WINPR_RESTRICT context, PTP_WORK_CALLBACK cb, |
776 | | const BYTE* WINPR_RESTRICT pSrcData, UINT32 nSrcStep, UINT32 SrcFormat, |
777 | | const UINT32 iStride[], BYTE* WINPR_RESTRICT pYUVLumaData[], |
778 | | BYTE* WINPR_RESTRICT pYUVChromaData[], |
779 | | const RECTANGLE_16* WINPR_RESTRICT regionRects, UINT32 numRegionRects) |
780 | 0 | { |
781 | 0 | BOOL rc = FALSE; |
782 | 0 | primitives_t* prims = primitives_get(); |
783 | 0 | UINT32 waitCount = 0; |
784 | |
|
785 | 0 | WINPR_ASSERT(context); |
786 | 0 | WINPR_ASSERT(cb); |
787 | 0 | WINPR_ASSERT(pSrcData); |
788 | 0 | WINPR_ASSERT(iStride); |
789 | 0 | WINPR_ASSERT(regionRects || (numRegionRects == 0)); |
790 | | |
791 | 0 | if (!context->encoder) |
792 | 0 | { |
793 | |
|
794 | 0 | WLog_ERR(TAG, "YUV context set up for decoding, can not encode with it, aborting"); |
795 | 0 | return FALSE; |
796 | 0 | } |
797 | | |
798 | 0 | if (!context->useThreads || (primitives_flags(prims) & PRIM_FLAGS_HAVE_EXTGPU)) |
799 | 0 | { |
800 | 0 | for (UINT32 x = 0; x < numRegionRects; x++) |
801 | 0 | { |
802 | 0 | YUV_ENCODE_WORK_PARAM current = |
803 | 0 | pool_encode_fill(®ionRects[x], context, pSrcData, nSrcStep, SrcFormat, iStride, |
804 | 0 | pYUVLumaData, pYUVChromaData); |
805 | 0 | cb(NULL, ¤t, NULL); |
806 | 0 | } |
807 | 0 | return TRUE; |
808 | 0 | } |
809 | | |
810 | | /* case where we use threads */ |
811 | 0 | for (UINT32 x = 0; x < numRegionRects; x++) |
812 | 0 | { |
813 | 0 | const RECTANGLE_16* rect = ®ionRects[x]; |
814 | 0 | const UINT32 height = rect->bottom - rect->top; |
815 | 0 | const UINT32 steps = getSteps(height, context->heightStep); |
816 | |
|
817 | 0 | waitCount += steps; |
818 | 0 | } |
819 | |
|
820 | 0 | for (UINT32 x = 0; x < numRegionRects; x++) |
821 | 0 | { |
822 | 0 | const RECTANGLE_16* rect = ®ionRects[x]; |
823 | 0 | const UINT32 height = rect->bottom - rect->top; |
824 | 0 | const UINT32 steps = getSteps(height, context->heightStep); |
825 | |
|
826 | 0 | for (UINT32 y = 0; y < steps; y++) |
827 | 0 | { |
828 | 0 | RECTANGLE_16 r = *rect; |
829 | 0 | YUV_ENCODE_WORK_PARAM* current = NULL; |
830 | |
|
831 | 0 | if (context->work_object_count <= waitCount) |
832 | 0 | { |
833 | 0 | free_objects(context->work_objects, context->work_object_count); |
834 | 0 | waitCount = 0; |
835 | 0 | } |
836 | |
|
837 | 0 | current = &context->work_enc_params[waitCount]; |
838 | 0 | r.top += y * context->heightStep; |
839 | 0 | *current = pool_encode_fill(&r, context, pSrcData, nSrcStep, SrcFormat, iStride, |
840 | 0 | pYUVLumaData, pYUVChromaData); |
841 | 0 | if (!submit_object(&context->work_objects[waitCount], cb, current, context)) |
842 | 0 | goto fail; |
843 | 0 | waitCount++; |
844 | 0 | } |
845 | 0 | } |
846 | | |
847 | 0 | rc = TRUE; |
848 | 0 | fail: |
849 | 0 | free_objects(context->work_objects, context->work_object_count); |
850 | 0 | return rc; |
851 | 0 | } |
852 | | |
853 | | BOOL yuv420_context_encode(YUV_CONTEXT* WINPR_RESTRICT context, const BYTE* WINPR_RESTRICT pSrcData, |
854 | | UINT32 nSrcStep, UINT32 SrcFormat, const UINT32 iStride[3], |
855 | | BYTE* WINPR_RESTRICT pYUVData[3], |
856 | | const RECTANGLE_16* WINPR_RESTRICT regionRects, UINT32 numRegionRects) |
857 | 0 | { |
858 | 0 | if (!context || !pSrcData || !iStride || !pYUVData || !regionRects) |
859 | 0 | return FALSE; |
860 | | |
861 | 0 | return pool_encode(context, yuv420_encode_work_callback, pSrcData, nSrcStep, SrcFormat, iStride, |
862 | 0 | pYUVData, NULL, regionRects, numRegionRects); |
863 | 0 | } |
864 | | |
865 | | BOOL yuv444_context_encode(YUV_CONTEXT* WINPR_RESTRICT context, BYTE version, |
866 | | const BYTE* WINPR_RESTRICT pSrcData, UINT32 nSrcStep, UINT32 SrcFormat, |
867 | | const UINT32 iStride[3], BYTE* WINPR_RESTRICT pYUVLumaData[3], |
868 | | BYTE* WINPR_RESTRICT pYUVChromaData[3], |
869 | | const RECTANGLE_16* WINPR_RESTRICT regionRects, UINT32 numRegionRects) |
870 | 0 | { |
871 | 0 | PTP_WORK_CALLBACK cb = NULL; |
872 | 0 | switch (version) |
873 | 0 | { |
874 | 0 | case 1: |
875 | 0 | cb = yuv444v1_encode_work_callback; |
876 | 0 | break; |
877 | 0 | case 2: |
878 | 0 | cb = yuv444v2_encode_work_callback; |
879 | 0 | break; |
880 | 0 | default: |
881 | 0 | return FALSE; |
882 | 0 | } |
883 | | |
884 | 0 | return pool_encode(context, cb, pSrcData, nSrcStep, SrcFormat, iStride, pYUVLumaData, |
885 | 0 | pYUVChromaData, regionRects, numRegionRects); |
886 | 0 | } |