Coverage Report

Created: 2024-09-08 06:20

/src/FreeRDP/libfreerdp/primitives/prim_YUV.c
Line
Count
Source (jump to first uncovered line)
1
/**
2
 * FreeRDP: A Remote Desktop Protocol Implementation
3
 * Generic YUV/RGB conversion operations
4
 *
5
 * Copyright 2014 Marc-Andre Moreau <marcandre.moreau@gmail.com>
6
 * Copyright 2015-2017 Armin Novak <armin.novak@thincast.com>
7
 * Copyright 2015-2017 Norbert Federa <norbert.federa@thincast.com>
8
 * Copyright 2015-2017 Vic Lee
9
 * Copyright 2015-2017 Thincast Technologies GmbH
10
 *
11
 * Licensed under the Apache License, Version 2.0 (the "License");
12
 * you may not use this file except in compliance with the License.
13
 * You may obtain a copy of the License at
14
 *
15
 *     http://www.apache.org/licenses/LICENSE-2.0
16
 *
17
 * Unless required by applicable law or agreed to in writing, software
18
 * distributed under the License is distributed on an "AS IS" BASIS,
19
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20
 * See the License for the specific language governing permissions and
21
 * limitations under the License.
22
 */
23
24
#include <winpr/wtypes.h>
25
26
#include <freerdp/config.h>
27
28
#include <freerdp/types.h>
29
#include <freerdp/primitives.h>
30
#include <freerdp/codec/color.h>
31
#include "prim_internal.h"
32
#include "prim_YUV.h"
33
34
static pstatus_t general_LumaToYUV444(const BYTE* const WINPR_RESTRICT pSrcRaw[3],
35
                                      const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDstRaw[3],
36
                                      const UINT32 dstStep[3],
37
                                      const RECTANGLE_16* WINPR_RESTRICT roi)
38
0
{
39
0
  const UINT32 nWidth = roi->right - roi->left;
40
0
  const UINT32 nHeight = roi->bottom - roi->top;
41
0
  const UINT32 halfWidth = (nWidth + 1) / 2;
42
0
  const UINT32 halfHeight = (nHeight + 1) / 2;
43
0
  const UINT32 oddY = 1;
44
0
  const UINT32 evenY = 0;
45
0
  const UINT32 oddX = 1;
46
0
  const UINT32 evenX = 0;
47
0
  const BYTE* pSrc[3] = { pSrcRaw[0] + 1ULL * roi->top * srcStep[0] + roi->left,
48
0
                        pSrcRaw[1] + 1ULL * roi->top / 2 * srcStep[1] + roi->left / 2,
49
0
                        pSrcRaw[2] + 1ULL * roi->top / 2 * srcStep[2] + roi->left / 2 };
50
0
  BYTE* pDst[3] = { pDstRaw[0] + 1ULL * roi->top * dstStep[0] + roi->left,
51
0
                  pDstRaw[1] + 1ULL * roi->top * dstStep[1] + roi->left,
52
0
                  pDstRaw[2] + 1ULL * roi->top * dstStep[2] + roi->left };
53
54
  /* Y data is already here... */
55
  /* B1 */
56
0
  for (size_t y = 0; y < nHeight; y++)
57
0
  {
58
0
    const BYTE* Ym = pSrc[0] + y * srcStep[0];
59
0
    BYTE* pY = pDst[0] + dstStep[0] * y;
60
0
    memcpy(pY, Ym, nWidth);
61
0
  }
62
63
  /* The first half of U, V are already here part of this frame. */
64
  /* B2 and B3 */
65
0
  for (size_t y = 0; y < halfHeight; y++)
66
0
  {
67
0
    const UINT32 val2y = (2ULL * y + evenY);
68
0
    const UINT32 val2y1 = val2y + oddY;
69
0
    const BYTE* Um = pSrc[1] + y * srcStep[1];
70
0
    const BYTE* Vm = pSrc[2] + y * srcStep[2];
71
0
    BYTE* pU = pDst[1] + 1ULL * dstStep[1] * val2y;
72
0
    BYTE* pV = pDst[2] + 1ULL * dstStep[2] * val2y;
73
0
    BYTE* pU1 = pDst[1] + 1ULL * dstStep[1] * val2y1;
74
0
    BYTE* pV1 = pDst[2] + 1ULL * dstStep[2] * val2y1;
75
76
0
    for (size_t x = 0; x < halfWidth; x++)
77
0
    {
78
0
      const UINT32 val2x = 2UL * x + evenX;
79
0
      const UINT32 val2x1 = val2x + oddX;
80
0
      pU[val2x] = Um[x];
81
0
      pV[val2x] = Vm[x];
82
0
      pU[val2x1] = Um[x];
83
0
      pV[val2x1] = Vm[x];
84
0
      pU1[val2x] = Um[x];
85
0
      pV1[val2x] = Vm[x];
86
0
      pU1[val2x1] = Um[x];
87
0
      pV1[val2x1] = Vm[x];
88
0
    }
89
0
  }
90
91
0
  return PRIMITIVES_SUCCESS;
92
0
}
93
94
static pstatus_t general_ChromaFilter(BYTE* WINPR_RESTRICT pDst[3], const UINT32 dstStep[3],
95
                                      const RECTANGLE_16* WINPR_RESTRICT roi)
96
0
{
97
0
  const UINT32 oddY = 1;
98
0
  const UINT32 evenY = 0;
99
0
  const UINT32 nWidth = roi->right - roi->left;
100
0
  const UINT32 nHeight = roi->bottom - roi->top;
101
0
  const UINT32 halfHeight = (nHeight + 1) / 2;
102
0
  const UINT32 halfWidth = (nWidth + 1) / 2;
103
104
  /* Filter */
105
0
  for (UINT32 y = roi->top; y < halfHeight + roi->top; y++)
106
0
  {
107
0
    const UINT32 val2y = (y * 2 + evenY);
108
0
    const UINT32 val2y1 = val2y + oddY;
109
0
    BYTE* pU1 = pDst[1] + 1ULL * dstStep[1] * val2y1;
110
0
    BYTE* pV1 = pDst[2] + 1ULL * dstStep[2] * val2y1;
111
0
    BYTE* pU = pDst[1] + 1ULL * dstStep[1] * val2y;
112
0
    BYTE* pV = pDst[2] + 1ULL * dstStep[2] * val2y;
113
114
0
    if (val2y1 > nHeight)
115
0
      continue;
116
117
0
    for (UINT32 x = roi->left; x < halfWidth + roi->left; x++)
118
0
    {
119
0
      const UINT32 val2x = (x * 2);
120
0
      const UINT32 val2x1 = val2x + 1;
121
0
      const BYTE inU = pU[val2x];
122
0
      const BYTE inV = pV[val2x];
123
0
      const INT32 up = inU * 4;
124
0
      const INT32 vp = inV * 4;
125
0
      INT32 u2020 = 0;
126
0
      INT32 v2020 = 0;
127
128
0
      if (val2x1 > nWidth)
129
0
        continue;
130
131
0
      u2020 = up - pU[val2x1] - pU1[val2x] - pU1[val2x1];
132
0
      v2020 = vp - pV[val2x1] - pV1[val2x] - pV1[val2x1];
133
134
0
      pU[val2x] = CONDITIONAL_CLIP(u2020, inU);
135
0
      pV[val2x] = CONDITIONAL_CLIP(v2020, inV);
136
0
    }
137
0
  }
138
139
0
  return PRIMITIVES_SUCCESS;
140
0
}
141
142
static pstatus_t general_ChromaV1ToYUV444(const BYTE* const WINPR_RESTRICT pSrcRaw[3],
143
                                          const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDstRaw[3],
144
                                          const UINT32 dstStep[3],
145
                                          const RECTANGLE_16* WINPR_RESTRICT roi)
146
0
{
147
0
  const UINT32 mod = 16;
148
0
  UINT32 uY = 0;
149
0
  UINT32 vY = 0;
150
0
  const UINT32 nWidth = roi->right - roi->left;
151
0
  const UINT32 nHeight = roi->bottom - roi->top;
152
0
  const UINT32 halfWidth = (nWidth) / 2;
153
0
  const UINT32 halfHeight = (nHeight) / 2;
154
0
  const UINT32 oddY = 1;
155
0
  const UINT32 evenY = 0;
156
0
  const UINT32 oddX = 1;
157
  /* The auxilary frame is aligned to multiples of 16x16.
158
   * We need the padded height for B4 and B5 conversion. */
159
0
  const UINT32 padHeigth = nHeight + 16 - nHeight % 16;
160
0
  const BYTE* pSrc[3] = { pSrcRaw[0] + 1ULL * roi->top * srcStep[0] + roi->left,
161
0
                        pSrcRaw[1] + 1ULL * roi->top / 2 * srcStep[1] + roi->left / 2,
162
0
                        pSrcRaw[2] + 1ULL * roi->top / 2 * srcStep[2] + roi->left / 2 };
163
0
  BYTE* pDst[3] = { pDstRaw[0] + 1ULL * roi->top * dstStep[0] + roi->left,
164
0
                  pDstRaw[1] + 1ULL * roi->top * dstStep[1] + roi->left,
165
0
                  pDstRaw[2] + 1ULL * roi->top * dstStep[2] + roi->left };
166
167
  /* The second half of U and V is a bit more tricky... */
168
  /* B4 and B5 */
169
0
  for (size_t y = 0; y < padHeigth; y++)
170
0
  {
171
0
    const BYTE* Ya = pSrc[0] + y * srcStep[0];
172
0
    BYTE* pX = NULL;
173
174
0
    if ((y) % mod < (mod + 1) / 2)
175
0
    {
176
0
      const size_t pos = (2 * uY++ + oddY);
177
178
0
      if (pos >= nHeight)
179
0
        continue;
180
181
0
      pX = pDst[1] + dstStep[1] * pos;
182
0
    }
183
0
    else
184
0
    {
185
0
      const size_t pos = (2 * vY++ + oddY);
186
187
0
      if (pos >= nHeight)
188
0
        continue;
189
190
0
      pX = pDst[2] + dstStep[2] * pos;
191
0
    }
192
193
0
    memcpy(pX, Ya, nWidth);
194
0
  }
195
196
  /* B6 and B7 */
197
0
  for (size_t y = 0; y < halfHeight; y++)
198
0
  {
199
0
    const UINT32 val2y = (y * 2ULL + evenY);
200
0
    const BYTE* Ua = pSrc[1] + y * srcStep[1];
201
0
    const BYTE* Va = pSrc[2] + y * srcStep[2];
202
0
    BYTE* pU = pDst[1] + 1ULL * dstStep[1] * val2y;
203
0
    BYTE* pV = pDst[2] + 1ULL * dstStep[2] * val2y;
204
205
0
    for (size_t x = 0; x < halfWidth; x++)
206
0
    {
207
0
      const UINT32 val2x1 = (x * 2 + oddX);
208
0
      pU[val2x1] = Ua[x];
209
0
      pV[val2x1] = Va[x];
210
0
    }
211
0
  }
212
213
  /* Filter */
214
0
  return general_ChromaFilter(pDst, dstStep, roi);
215
0
}
216
217
static pstatus_t general_ChromaV2ToYUV444(const BYTE* const WINPR_RESTRICT pSrc[3],
218
                                          const UINT32 srcStep[3], UINT32 nTotalWidth,
219
                                          UINT32 nTotalHeight, BYTE* WINPR_RESTRICT pDst[3],
220
                                          const UINT32 dstStep[3],
221
                                          const RECTANGLE_16* WINPR_RESTRICT roi)
222
0
{
223
0
  const UINT32 nWidth = roi->right - roi->left;
224
0
  const UINT32 nHeight = roi->bottom - roi->top;
225
0
  const UINT32 halfWidth = (nWidth + 1) / 2;
226
0
  const UINT32 halfHeight = (nHeight + 1) / 2;
227
0
  const UINT32 quaterWidth = (nWidth + 3) / 4;
228
229
  /* B4 and B5: odd UV values for width/2, height */
230
0
  for (UINT32 y = 0; y < nHeight; y++)
231
0
  {
232
0
    const UINT32 yTop = y + roi->top;
233
0
    const BYTE* pYaU = pSrc[0] + 1ULL * srcStep[0] * yTop + roi->left / 2;
234
0
    const BYTE* pYaV = pYaU + nTotalWidth / 2;
235
0
    BYTE* pU = pDst[1] + 1ULL * dstStep[1] * yTop + roi->left;
236
0
    BYTE* pV = pDst[2] + 1ULL * dstStep[2] * yTop + roi->left;
237
238
0
    for (size_t x = 0; x < halfWidth; x++)
239
0
    {
240
0
      const UINT32 odd = 2 * x + 1;
241
0
      pU[odd] = *pYaU++;
242
0
      pV[odd] = *pYaV++;
243
0
    }
244
0
  }
245
246
  /* B6 - B9 */
247
0
  for (size_t y = 0; y < halfHeight; y++)
248
0
  {
249
0
    const BYTE* pUaU = pSrc[1] + srcStep[1] * (y + roi->top / 2) + roi->left / 4;
250
0
    const BYTE* pUaV = pUaU + nTotalWidth / 4;
251
0
    const BYTE* pVaU = pSrc[2] + srcStep[2] * (y + roi->top / 2) + roi->left / 4;
252
0
    const BYTE* pVaV = pVaU + nTotalWidth / 4;
253
0
    BYTE* pU = pDst[1] + 1ULL * dstStep[1] * (2ULL * y + 1 + roi->top) + roi->left;
254
0
    BYTE* pV = pDst[2] + 1ULL * dstStep[2] * (2ULL * y + 1 + roi->top) + roi->left;
255
256
0
    for (size_t x = 0; x < quaterWidth; x++)
257
0
    {
258
0
      pU[4 * x + 0] = *pUaU++;
259
0
      pV[4 * x + 0] = *pUaV++;
260
0
      pU[4 * x + 2] = *pVaU++;
261
0
      pV[4 * x + 2] = *pVaV++;
262
0
    }
263
0
  }
264
265
0
  return general_ChromaFilter(pDst, dstStep, roi);
266
0
}
267
268
static pstatus_t general_YUV420CombineToYUV444(avc444_frame_type type,
269
                                               const BYTE* const WINPR_RESTRICT pSrc[3],
270
                                               const UINT32 srcStep[3], UINT32 nWidth,
271
                                               UINT32 nHeight, BYTE* WINPR_RESTRICT pDst[3],
272
                                               const UINT32 dstStep[3],
273
                                               const RECTANGLE_16* WINPR_RESTRICT roi)
274
0
{
275
0
  if (!pSrc || !pSrc[0] || !pSrc[1] || !pSrc[2])
276
0
    return -1;
277
278
0
  if (!pDst || !pDst[0] || !pDst[1] || !pDst[2])
279
0
    return -1;
280
281
0
  if (!roi)
282
0
    return -1;
283
284
0
  switch (type)
285
0
  {
286
0
    case AVC444_LUMA:
287
0
      return general_LumaToYUV444(pSrc, srcStep, pDst, dstStep, roi);
288
289
0
    case AVC444_CHROMAv1:
290
0
      return general_ChromaV1ToYUV444(pSrc, srcStep, pDst, dstStep, roi);
291
292
0
    case AVC444_CHROMAv2:
293
0
      return general_ChromaV2ToYUV444(pSrc, srcStep, nWidth, nHeight, pDst, dstStep, roi);
294
295
0
    default:
296
0
      return -1;
297
0
  }
298
0
}
299
300
static pstatus_t
301
general_YUV444SplitToYUV420(const BYTE* const WINPR_RESTRICT pSrc[3], const UINT32 srcStep[3],
302
                            BYTE* WINPR_RESTRICT pMainDst[3], const UINT32 dstMainStep[3],
303
                            BYTE* WINPR_RESTRICT pAuxDst[3], const UINT32 dstAuxStep[3],
304
                            const prim_size_t* WINPR_RESTRICT roi)
305
0
{
306
0
  UINT32 uY = 0;
307
0
  UINT32 vY = 0;
308
0
  UINT32 halfWidth = 0;
309
0
  UINT32 halfHeight = 0;
310
  /* The auxilary frame is aligned to multiples of 16x16.
311
   * We need the padded height for B4 and B5 conversion. */
312
0
  const UINT32 padHeigth = roi->height + 16 - roi->height % 16;
313
0
  halfWidth = (roi->width + 1) / 2;
314
0
  halfHeight = (roi->height + 1) / 2;
315
316
  /* B1 */
317
0
  for (size_t y = 0; y < roi->height; y++)
318
0
  {
319
0
    const BYTE* pSrcY = pSrc[0] + y * srcStep[0];
320
0
    BYTE* pY = pMainDst[0] + y * dstMainStep[0];
321
0
    memcpy(pY, pSrcY, roi->width);
322
0
  }
323
324
  /* B2 and B3 */
325
0
  for (size_t y = 0; y < halfHeight; y++)
326
0
  {
327
0
    const BYTE* pSrcU = pSrc[1] + 2ULL * y * srcStep[1];
328
0
    const BYTE* pSrcV = pSrc[2] + 2ULL * y * srcStep[2];
329
0
    const BYTE* pSrcU1 = pSrc[1] + (2ULL * y + 1ULL) * srcStep[1];
330
0
    const BYTE* pSrcV1 = pSrc[2] + (2ULL * y + 1ULL) * srcStep[2];
331
0
    BYTE* pU = pMainDst[1] + y * dstMainStep[1];
332
0
    BYTE* pV = pMainDst[2] + y * dstMainStep[2];
333
334
0
    for (size_t x = 0; x < halfWidth; x++)
335
0
    {
336
      /* Filter */
337
0
      const INT32 u = pSrcU[2 * x] + pSrcU[2 * x + 1] + pSrcU1[2 * x] + pSrcU1[2 * x + 1];
338
0
      const INT32 v = pSrcV[2 * x] + pSrcV[2 * x + 1] + pSrcV1[2 * x] + pSrcV1[2 * x + 1];
339
0
      pU[x] = CLIP(u / 4L);
340
0
      pV[x] = CLIP(v / 4L);
341
0
    }
342
0
  }
343
344
  /* B4 and B5 */
345
0
  for (size_t y = 0; y < padHeigth; y++)
346
0
  {
347
0
    BYTE* pY = pAuxDst[0] + y * dstAuxStep[0];
348
349
0
    if (y % 16 < 8)
350
0
    {
351
0
      const size_t pos = (2 * uY++ + 1);
352
0
      const BYTE* pSrcU = pSrc[1] + pos * srcStep[1];
353
354
0
      if (pos >= roi->height)
355
0
        continue;
356
357
0
      memcpy(pY, pSrcU, roi->width);
358
0
    }
359
0
    else
360
0
    {
361
0
      const size_t pos = (2 * vY++ + 1);
362
0
      const BYTE* pSrcV = pSrc[2] + pos * srcStep[2];
363
364
0
      if (pos >= roi->height)
365
0
        continue;
366
367
0
      memcpy(pY, pSrcV, roi->width);
368
0
    }
369
0
  }
370
371
  /* B6 and B7 */
372
0
  for (size_t y = 0; y < halfHeight; y++)
373
0
  {
374
0
    const BYTE* pSrcU = pSrc[1] + 2 * y * srcStep[1];
375
0
    const BYTE* pSrcV = pSrc[2] + 2 * y * srcStep[2];
376
0
    BYTE* pU = pAuxDst[1] + y * dstAuxStep[1];
377
0
    BYTE* pV = pAuxDst[2] + y * dstAuxStep[2];
378
379
0
    for (size_t x = 0; x < halfWidth; x++)
380
0
    {
381
0
      pU[x] = pSrcU[2 * x + 1];
382
0
      pV[x] = pSrcV[2 * x + 1];
383
0
    }
384
0
  }
385
386
0
  return PRIMITIVES_SUCCESS;
387
0
}
388
389
static pstatus_t general_YUV444ToRGB_8u_P3AC4R_general(const BYTE* const WINPR_RESTRICT pSrc[3],
390
                                                       const UINT32 srcStep[3],
391
                                                       BYTE* WINPR_RESTRICT pDst, UINT32 dstStep,
392
                                                       UINT32 DstFormat,
393
                                                       const prim_size_t* WINPR_RESTRICT roi)
394
0
{
395
0
  const DWORD formatSize = FreeRDPGetBytesPerPixel(DstFormat);
396
0
  fkt_writePixel writePixel = getPixelWriteFunction(DstFormat, FALSE);
397
398
0
  WINPR_ASSERT(pSrc);
399
0
  WINPR_ASSERT(pDst);
400
0
  WINPR_ASSERT(roi);
401
402
0
  const UINT32 nWidth = roi->width;
403
0
  const UINT32 nHeight = roi->height;
404
405
0
  for (size_t y = 0; y < nHeight; y++)
406
0
  {
407
0
    const BYTE* pY = pSrc[0] + y * srcStep[0];
408
0
    const BYTE* pU = pSrc[1] + y * srcStep[1];
409
0
    const BYTE* pV = pSrc[2] + y * srcStep[2];
410
0
    BYTE* pRGB = pDst + y * dstStep;
411
412
0
    for (size_t x = 0; x < nWidth; x++)
413
0
    {
414
0
      const BYTE Y = pY[x];
415
0
      const BYTE U = pU[x];
416
0
      const BYTE V = pV[x];
417
0
      const BYTE r = YUV2R(Y, U, V);
418
0
      const BYTE g = YUV2G(Y, U, V);
419
0
      const BYTE b = YUV2B(Y, U, V);
420
0
      pRGB = writePixel(pRGB, formatSize, DstFormat, r, g, b, 0);
421
0
    }
422
0
  }
423
424
0
  return PRIMITIVES_SUCCESS;
425
0
}
426
427
static pstatus_t general_YUV444ToRGB_8u_P3AC4R_BGRX(const BYTE* const WINPR_RESTRICT pSrc[3],
428
                                                    const UINT32 srcStep[3],
429
                                                    BYTE* WINPR_RESTRICT pDst, UINT32 dstStep,
430
                                                    UINT32 DstFormat,
431
                                                    const prim_size_t* WINPR_RESTRICT roi)
432
0
{
433
0
  const DWORD formatSize = FreeRDPGetBytesPerPixel(DstFormat);
434
435
0
  WINPR_ASSERT(pSrc);
436
0
  WINPR_ASSERT(pDst);
437
0
  WINPR_ASSERT(roi);
438
439
0
  const UINT32 nWidth = roi->width;
440
0
  const UINT32 nHeight = roi->height;
441
442
0
  for (size_t y = 0; y < nHeight; y++)
443
0
  {
444
0
    const BYTE* pY = pSrc[0] + y * srcStep[0];
445
0
    const BYTE* pU = pSrc[1] + y * srcStep[1];
446
0
    const BYTE* pV = pSrc[2] + y * srcStep[2];
447
0
    BYTE* pRGB = pDst + y * dstStep;
448
449
0
    for (size_t x = 0; x < nWidth; x++)
450
0
    {
451
0
      const BYTE Y = pY[x];
452
0
      const BYTE U = pU[x];
453
0
      const BYTE V = pV[x];
454
0
      const BYTE r = YUV2R(Y, U, V);
455
0
      const BYTE g = YUV2G(Y, U, V);
456
0
      const BYTE b = YUV2B(Y, U, V);
457
0
      pRGB = writePixelBGRX(pRGB, formatSize, DstFormat, r, g, b, 0);
458
0
    }
459
0
  }
460
461
0
  return PRIMITIVES_SUCCESS;
462
0
}
463
464
static pstatus_t general_YUV444ToRGB_8u_P3AC4R(const BYTE* const WINPR_RESTRICT pSrc[3],
465
                                               const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst,
466
                                               UINT32 dstStep, UINT32 DstFormat,
467
                                               const prim_size_t* WINPR_RESTRICT roi)
468
0
{
469
0
  switch (DstFormat)
470
0
  {
471
0
    case PIXEL_FORMAT_BGRA32:
472
0
    case PIXEL_FORMAT_BGRX32:
473
0
      return general_YUV444ToRGB_8u_P3AC4R_BGRX(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
474
475
0
    default:
476
0
      return general_YUV444ToRGB_8u_P3AC4R_general(pSrc, srcStep, pDst, dstStep, DstFormat,
477
0
                                                   roi);
478
0
  }
479
0
}
480
/**
481
 * | R |   ( | 256     0    403 | |    Y    | )
482
 * | G | = ( | 256   -48   -120 | | U - 128 | ) >> 8
483
 * | B |   ( | 256   475      0 | | V - 128 | )
484
 */
485
static pstatus_t general_YUV420ToRGB_8u_P3AC4R(const BYTE* const WINPR_RESTRICT pSrc[3],
486
                                               const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst,
487
                                               UINT32 dstStep, UINT32 DstFormat,
488
                                               const prim_size_t* WINPR_RESTRICT roi)
489
0
{
490
0
  UINT32 dstPad = 0;
491
0
  UINT32 srcPad[3];
492
0
  BYTE Y = 0;
493
0
  BYTE U = 0;
494
0
  BYTE V = 0;
495
0
  UINT32 halfWidth = 0;
496
0
  UINT32 halfHeight = 0;
497
0
  const BYTE* pY = NULL;
498
0
  const BYTE* pU = NULL;
499
0
  const BYTE* pV = NULL;
500
0
  BYTE* pRGB = pDst;
501
0
  UINT32 nWidth = 0;
502
0
  UINT32 nHeight = 0;
503
0
  UINT32 lastRow = 0;
504
0
  UINT32 lastCol = 0;
505
0
  const DWORD formatSize = FreeRDPGetBytesPerPixel(DstFormat);
506
0
  fkt_writePixel writePixel = getPixelWriteFunction(DstFormat, FALSE);
507
0
  pY = pSrc[0];
508
0
  pU = pSrc[1];
509
0
  pV = pSrc[2];
510
0
  lastCol = roi->width & 0x01;
511
0
  lastRow = roi->height & 0x01;
512
0
  nWidth = (roi->width + 1) & ~0x0001;
513
0
  nHeight = (roi->height + 1) & ~0x0001;
514
0
  halfWidth = nWidth / 2;
515
0
  halfHeight = nHeight / 2;
516
0
  srcPad[0] = (srcStep[0] - nWidth);
517
0
  srcPad[1] = (srcStep[1] - halfWidth);
518
0
  srcPad[2] = (srcStep[2] - halfWidth);
519
0
  dstPad = (dstStep - (nWidth * 4));
520
521
0
  for (UINT32 y = 0; y < halfHeight;)
522
0
  {
523
0
    if (++y == halfHeight)
524
0
      lastRow <<= 1;
525
526
0
    for (UINT32 x = 0; x < halfWidth;)
527
0
    {
528
0
      BYTE r = 0;
529
0
      BYTE g = 0;
530
0
      BYTE b = 0;
531
532
0
      if (++x == halfWidth)
533
0
        lastCol <<= 1;
534
535
0
      U = *pU++;
536
0
      V = *pV++;
537
      /* 1st pixel */
538
0
      Y = *pY++;
539
0
      r = YUV2R(Y, U, V);
540
0
      g = YUV2G(Y, U, V);
541
0
      b = YUV2B(Y, U, V);
542
0
      pRGB = writePixel(pRGB, formatSize, DstFormat, r, g, b, 0);
543
544
      /* 2nd pixel */
545
0
      if (!(lastCol & 0x02))
546
0
      {
547
0
        Y = *pY++;
548
0
        r = YUV2R(Y, U, V);
549
0
        g = YUV2G(Y, U, V);
550
0
        b = YUV2B(Y, U, V);
551
0
        pRGB = writePixel(pRGB, formatSize, DstFormat, r, g, b, 0);
552
0
      }
553
0
      else
554
0
      {
555
0
        pY++;
556
0
        pRGB += formatSize;
557
0
        lastCol >>= 1;
558
0
      }
559
0
    }
560
561
0
    pY += srcPad[0];
562
0
    pU -= halfWidth;
563
0
    pV -= halfWidth;
564
0
    pRGB += dstPad;
565
566
0
    if (lastRow & 0x02)
567
0
      break;
568
569
0
    for (UINT32 x = 0; x < halfWidth;)
570
0
    {
571
0
      BYTE r = 0;
572
0
      BYTE g = 0;
573
0
      BYTE b = 0;
574
575
0
      if (++x == halfWidth)
576
0
        lastCol <<= 1;
577
578
0
      U = *pU++;
579
0
      V = *pV++;
580
      /* 3rd pixel */
581
0
      Y = *pY++;
582
0
      r = YUV2R(Y, U, V);
583
0
      g = YUV2G(Y, U, V);
584
0
      b = YUV2B(Y, U, V);
585
0
      pRGB = writePixel(pRGB, formatSize, DstFormat, r, g, b, 0);
586
587
      /* 4th pixel */
588
0
      if (!(lastCol & 0x02))
589
0
      {
590
0
        Y = *pY++;
591
0
        r = YUV2R(Y, U, V);
592
0
        g = YUV2G(Y, U, V);
593
0
        b = YUV2B(Y, U, V);
594
0
        pRGB = writePixel(pRGB, formatSize, DstFormat, r, g, b, 0);
595
0
      }
596
0
      else
597
0
      {
598
0
        pY++;
599
0
        pRGB += formatSize;
600
0
        lastCol >>= 1;
601
0
      }
602
0
    }
603
604
0
    pY += srcPad[0];
605
0
    pU += srcPad[1];
606
0
    pV += srcPad[2];
607
0
    pRGB += dstPad;
608
0
  }
609
610
0
  return PRIMITIVES_SUCCESS;
611
0
}
612
613
/**
614
 * | Y |    ( |  54   183     18 | | R | )        |  0  |
615
 * | U | =  ( | -29   -99    128 | | G | ) >> 8 + | 128 |
616
 * | V |    ( | 128  -116    -12 | | B | )        | 128 |
617
 */
618
static INLINE BYTE RGB2Y(BYTE R, BYTE G, BYTE B)
619
0
{
620
0
  return (54 * R + 183 * G + 18 * B) >> 8;
621
0
}
622
623
static INLINE BYTE RGB2U(BYTE R, BYTE G, BYTE B)
624
0
{
625
0
  return ((-29 * R - 99 * G + 128 * B) >> 8) + 128;
626
0
}
627
628
static INLINE BYTE RGB2V(INT32 R, INT32 G, INT32 B)
629
0
{
630
0
  return ((128 * R - 116 * G - 12 * B) >> 8) + 128;
631
0
}
632
633
static pstatus_t general_RGBToYUV444_8u_P3AC4R(const BYTE* WINPR_RESTRICT pSrc, UINT32 SrcFormat,
634
                                               const UINT32 srcStep, BYTE* WINPR_RESTRICT pDst[3],
635
                                               UINT32 dstStep[3],
636
                                               const prim_size_t* WINPR_RESTRICT roi)
637
0
{
638
0
  const UINT32 bpp = FreeRDPGetBytesPerPixel(SrcFormat);
639
0
  UINT32 nWidth = 0;
640
0
  UINT32 nHeight = 0;
641
0
  nWidth = roi->width;
642
0
  nHeight = roi->height;
643
644
0
  for (size_t y = 0; y < nHeight; y++)
645
0
  {
646
0
    const BYTE* pRGB = pSrc + y * srcStep;
647
0
    BYTE* pY = pDst[0] + y * dstStep[0];
648
0
    BYTE* pU = pDst[1] + y * dstStep[1];
649
0
    BYTE* pV = pDst[2] + y * dstStep[2];
650
651
0
    for (size_t x = 0; x < nWidth; x++)
652
0
    {
653
0
      BYTE B = 0;
654
0
      BYTE G = 0;
655
0
      BYTE R = 0;
656
0
      const UINT32 color = FreeRDPReadColor(&pRGB[x * bpp], SrcFormat);
657
0
      FreeRDPSplitColor(color, SrcFormat, &R, &G, &B, NULL, NULL);
658
0
      pY[x] = RGB2Y(R, G, B);
659
0
      pU[x] = RGB2U(R, G, B);
660
0
      pV[x] = RGB2V(R, G, B);
661
0
    }
662
0
  }
663
664
0
  return PRIMITIVES_SUCCESS;
665
0
}
666
667
static INLINE pstatus_t general_RGBToYUV420_BGRX(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcStep,
668
                                                 BYTE* WINPR_RESTRICT pDst[3],
669
                                                 const UINT32 dstStep[3],
670
                                                 const prim_size_t* WINPR_RESTRICT roi)
671
0
{
672
0
  size_t i = 0;
673
0
  size_t x1 = 0;
674
0
  size_t x2 = 4;
675
0
  size_t x3 = srcStep;
676
0
  size_t x4 = srcStep + 4;
677
0
  size_t y1 = 0;
678
0
  size_t y2 = 1;
679
0
  size_t y3 = dstStep[0];
680
0
  size_t y4 = dstStep[0] + 1;
681
0
  UINT32 max_x = roi->width - 1;
682
0
  UINT32 max_y = roi->height - 1;
683
684
0
  for (size_t y = i = 0; y < roi->height; y += 2, i++)
685
0
  {
686
0
    const BYTE* src = pSrc + y * srcStep;
687
0
    BYTE* ydst = pDst[0] + y * dstStep[0];
688
0
    BYTE* udst = pDst[1] + i * dstStep[1];
689
0
    BYTE* vdst = pDst[2] + i * dstStep[2];
690
691
0
    for (size_t x = 0; x < roi->width; x += 2)
692
0
    {
693
0
      BYTE R = 0;
694
0
      BYTE G = 0;
695
0
      BYTE B = 0;
696
0
      INT32 Ra = 0;
697
0
      INT32 Ga = 0;
698
0
      INT32 Ba = 0;
699
      /* row 1, pixel 1 */
700
0
      Ba = B = *(src + x1 + 0);
701
0
      Ga = G = *(src + x1 + 1);
702
0
      Ra = R = *(src + x1 + 2);
703
0
      ydst[y1] = RGB2Y(R, G, B);
704
705
0
      if (x < max_x)
706
0
      {
707
        /* row 1, pixel 2 */
708
0
        Ba += B = *(src + x2 + 0);
709
0
        Ga += G = *(src + x2 + 1);
710
0
        Ra += R = *(src + x2 + 2);
711
0
        ydst[y2] = RGB2Y(R, G, B);
712
0
      }
713
714
0
      if (y < max_y)
715
0
      {
716
        /* row 2, pixel 1 */
717
0
        Ba += B = *(src + x3 + 0);
718
0
        Ga += G = *(src + x3 + 1);
719
0
        Ra += R = *(src + x3 + 2);
720
0
        ydst[y3] = RGB2Y(R, G, B);
721
722
0
        if (x < max_x)
723
0
        {
724
          /* row 2, pixel 2 */
725
0
          Ba += B = *(src + x4 + 0);
726
0
          Ga += G = *(src + x4 + 1);
727
0
          Ra += R = *(src + x4 + 2);
728
0
          ydst[y4] = RGB2Y(R, G, B);
729
0
        }
730
0
      }
731
732
0
      Ba >>= 2;
733
0
      Ga >>= 2;
734
0
      Ra >>= 2;
735
0
      *udst++ = RGB2U(Ra, Ga, Ba);
736
0
      *vdst++ = RGB2V(Ra, Ga, Ba);
737
0
      ydst += 2;
738
0
      src += 8;
739
0
    }
740
0
  }
741
742
0
  return PRIMITIVES_SUCCESS;
743
0
}
744
745
static INLINE pstatus_t general_RGBToYUV420_RGBX(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcStep,
746
                                                 BYTE* WINPR_RESTRICT pDst[3],
747
                                                 const UINT32 dstStep[3],
748
                                                 const prim_size_t* WINPR_RESTRICT roi)
749
0
{
750
0
  size_t x1 = 0;
751
0
  size_t x2 = 4;
752
0
  size_t x3 = srcStep;
753
0
  size_t x4 = srcStep + 4;
754
0
  size_t y1 = 0;
755
0
  size_t y2 = 1;
756
0
  size_t y3 = dstStep[0];
757
0
  size_t y4 = dstStep[0] + 1;
758
0
  UINT32 max_x = roi->width - 1;
759
0
  UINT32 max_y = roi->height - 1;
760
761
0
  for (size_t y = 0, i = 0; y < roi->height; y += 2, i++)
762
0
  {
763
0
    const BYTE* src = pSrc + y * srcStep;
764
0
    BYTE* ydst = pDst[0] + y * dstStep[0];
765
0
    BYTE* udst = pDst[1] + i * dstStep[1];
766
0
    BYTE* vdst = pDst[2] + i * dstStep[2];
767
768
0
    for (UINT32 x = 0; x < roi->width; x += 2)
769
0
    {
770
0
      BYTE R = 0;
771
0
      BYTE G = 0;
772
0
      BYTE B = 0;
773
0
      INT32 Ra = 0;
774
0
      INT32 Ga = 0;
775
0
      INT32 Ba = 0;
776
      /* row 1, pixel 1 */
777
0
      Ra = R = *(src + x1 + 0);
778
0
      Ga = G = *(src + x1 + 1);
779
0
      Ba = B = *(src + x1 + 2);
780
0
      ydst[y1] = RGB2Y(R, G, B);
781
782
0
      if (x < max_x)
783
0
      {
784
        /* row 1, pixel 2 */
785
0
        Ra += R = *(src + x2 + 0);
786
0
        Ga += G = *(src + x2 + 1);
787
0
        Ba += B = *(src + x2 + 2);
788
0
        ydst[y2] = RGB2Y(R, G, B);
789
0
      }
790
791
0
      if (y < max_y)
792
0
      {
793
        /* row 2, pixel 1 */
794
0
        Ra += R = *(src + x3 + 0);
795
0
        Ga += G = *(src + x3 + 1);
796
0
        Ba += B = *(src + x3 + 2);
797
0
        ydst[y3] = RGB2Y(R, G, B);
798
799
0
        if (x < max_x)
800
0
        {
801
          /* row 2, pixel 2 */
802
0
          Ra += R = *(src + x4 + 0);
803
0
          Ga += G = *(src + x4 + 1);
804
0
          Ba += B = *(src + x4 + 2);
805
0
          ydst[y4] = RGB2Y(R, G, B);
806
0
        }
807
0
      }
808
809
0
      Ba >>= 2;
810
0
      Ga >>= 2;
811
0
      Ra >>= 2;
812
0
      *udst++ = RGB2U(Ra, Ga, Ba);
813
0
      *vdst++ = RGB2V(Ra, Ga, Ba);
814
0
      ydst += 2;
815
0
      src += 8;
816
0
    }
817
0
  }
818
819
0
  return PRIMITIVES_SUCCESS;
820
0
}
821
822
static INLINE pstatus_t general_RGBToYUV420_ANY(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcFormat,
823
                                                UINT32 srcStep, BYTE* WINPR_RESTRICT pDst[3],
824
                                                const UINT32 dstStep[3],
825
                                                const prim_size_t* WINPR_RESTRICT roi)
826
0
{
827
0
  const UINT32 bpp = FreeRDPGetBytesPerPixel(srcFormat);
828
0
  size_t x1 = 0;
829
0
  size_t x2 = bpp;
830
0
  size_t x3 = srcStep;
831
0
  size_t x4 = srcStep + bpp;
832
0
  size_t y1 = 0;
833
0
  size_t y2 = 1;
834
0
  size_t y3 = dstStep[0];
835
0
  size_t y4 = dstStep[0] + 1;
836
0
  UINT32 max_x = roi->width - 1;
837
0
  UINT32 max_y = roi->height - 1;
838
839
0
  for (size_t y = 0, i = 0; y < roi->height; y += 2, i++)
840
0
  {
841
0
    const BYTE* src = pSrc + y * srcStep;
842
0
    BYTE* ydst = pDst[0] + y * dstStep[0];
843
0
    BYTE* udst = pDst[1] + i * dstStep[1];
844
0
    BYTE* vdst = pDst[2] + i * dstStep[2];
845
846
0
    for (size_t x = 0; x < roi->width; x += 2)
847
0
    {
848
0
      BYTE R = 0;
849
0
      BYTE G = 0;
850
0
      BYTE B = 0;
851
0
      INT32 Ra = 0;
852
0
      INT32 Ga = 0;
853
0
      INT32 Ba = 0;
854
0
      UINT32 color = 0;
855
      /* row 1, pixel 1 */
856
0
      color = FreeRDPReadColor(src + x1, srcFormat);
857
0
      FreeRDPSplitColor(color, srcFormat, &R, &G, &B, NULL, NULL);
858
0
      Ra = R;
859
0
      Ga = G;
860
0
      Ba = B;
861
0
      ydst[y1] = RGB2Y(R, G, B);
862
863
0
      if (x < max_x)
864
0
      {
865
        /* row 1, pixel 2 */
866
0
        color = FreeRDPReadColor(src + x2, srcFormat);
867
0
        FreeRDPSplitColor(color, srcFormat, &R, &G, &B, NULL, NULL);
868
0
        Ra += R;
869
0
        Ga += G;
870
0
        Ba += B;
871
0
        ydst[y2] = RGB2Y(R, G, B);
872
0
      }
873
874
0
      if (y < max_y)
875
0
      {
876
        /* row 2, pixel 1 */
877
0
        color = FreeRDPReadColor(src + x3, srcFormat);
878
0
        FreeRDPSplitColor(color, srcFormat, &R, &G, &B, NULL, NULL);
879
0
        Ra += R;
880
0
        Ga += G;
881
0
        Ba += B;
882
0
        ydst[y3] = RGB2Y(R, G, B);
883
884
0
        if (x < max_x)
885
0
        {
886
          /* row 2, pixel 2 */
887
0
          color = FreeRDPReadColor(src + x4, srcFormat);
888
0
          FreeRDPSplitColor(color, srcFormat, &R, &G, &B, NULL, NULL);
889
0
          Ra += R;
890
0
          Ga += G;
891
0
          Ba += B;
892
0
          ydst[y4] = RGB2Y(R, G, B);
893
0
        }
894
0
      }
895
896
0
      Ra >>= 2;
897
0
      Ga >>= 2;
898
0
      Ba >>= 2;
899
0
      *udst++ = RGB2U(Ra, Ga, Ba);
900
0
      *vdst++ = RGB2V(Ra, Ga, Ba);
901
0
      ydst += 2;
902
0
      src += 2ULL * bpp;
903
0
    }
904
0
  }
905
906
0
  return PRIMITIVES_SUCCESS;
907
0
}
908
909
static pstatus_t general_RGBToYUV420_8u_P3AC4R(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcFormat,
910
                                               UINT32 srcStep, BYTE* WINPR_RESTRICT pDst[3],
911
                                               const UINT32 dstStep[3],
912
                                               const prim_size_t* WINPR_RESTRICT roi)
913
0
{
914
0
  switch (srcFormat)
915
0
  {
916
0
    case PIXEL_FORMAT_BGRA32:
917
0
    case PIXEL_FORMAT_BGRX32:
918
0
      return general_RGBToYUV420_BGRX(pSrc, srcStep, pDst, dstStep, roi);
919
920
0
    case PIXEL_FORMAT_RGBA32:
921
0
    case PIXEL_FORMAT_RGBX32:
922
0
      return general_RGBToYUV420_RGBX(pSrc, srcStep, pDst, dstStep, roi);
923
924
0
    default:
925
0
      return general_RGBToYUV420_ANY(pSrc, srcFormat, srcStep, pDst, dstStep, roi);
926
0
  }
927
0
}
928
929
static INLINE void general_RGBToAVC444YUV_BGRX_DOUBLE_ROW(
930
    const BYTE* WINPR_RESTRICT srcEven, const BYTE* WINPR_RESTRICT srcOdd,
931
    BYTE* WINPR_RESTRICT b1Even, BYTE* WINPR_RESTRICT b1Odd, BYTE* WINPR_RESTRICT b2,
932
    BYTE* WINPR_RESTRICT b3, BYTE* WINPR_RESTRICT b4, BYTE* WINPR_RESTRICT b5,
933
    BYTE* WINPR_RESTRICT b6, BYTE* WINPR_RESTRICT b7, UINT32 width)
934
0
{
935
0
  for (UINT32 x = 0; x < width; x += 2)
936
0
  {
937
0
    const BOOL lastX = (x + 1) >= width;
938
0
    BYTE Y1e = 0;
939
0
    BYTE Y2e = 0;
940
0
    BYTE U1e = 0;
941
0
    BYTE V1e = 0;
942
0
    BYTE U2e = 0;
943
0
    BYTE V2e = 0;
944
0
    BYTE Y1o = 0;
945
0
    BYTE Y2o = 0;
946
0
    BYTE U1o = 0;
947
0
    BYTE V1o = 0;
948
0
    BYTE U2o = 0;
949
0
    BYTE V2o = 0;
950
    /* Read 4 pixels, 2 from even, 2 from odd lines */
951
0
    {
952
0
      const BYTE b = *srcEven++;
953
0
      const BYTE g = *srcEven++;
954
0
      const BYTE r = *srcEven++;
955
0
      srcEven++;
956
0
      Y1e = Y2e = Y1o = Y2o = RGB2Y(r, g, b);
957
0
      U1e = U2e = U1o = U2o = RGB2U(r, g, b);
958
0
      V1e = V2e = V1o = V2o = RGB2V(r, g, b);
959
0
    }
960
961
0
    if (!lastX)
962
0
    {
963
0
      const BYTE b = *srcEven++;
964
0
      const BYTE g = *srcEven++;
965
0
      const BYTE r = *srcEven++;
966
0
      srcEven++;
967
0
      Y2e = RGB2Y(r, g, b);
968
0
      U2e = RGB2U(r, g, b);
969
0
      V2e = RGB2V(r, g, b);
970
0
    }
971
972
0
    if (b1Odd)
973
0
    {
974
0
      const BYTE b = *srcOdd++;
975
0
      const BYTE g = *srcOdd++;
976
0
      const BYTE r = *srcOdd++;
977
0
      srcOdd++;
978
0
      Y1o = Y2o = RGB2Y(r, g, b);
979
0
      U1o = U2o = RGB2U(r, g, b);
980
0
      V1o = V2o = RGB2V(r, g, b);
981
0
    }
982
983
0
    if (b1Odd && !lastX)
984
0
    {
985
0
      const BYTE b = *srcOdd++;
986
0
      const BYTE g = *srcOdd++;
987
0
      const BYTE r = *srcOdd++;
988
0
      srcOdd++;
989
0
      Y2o = RGB2Y(r, g, b);
990
0
      U2o = RGB2U(r, g, b);
991
0
      V2o = RGB2V(r, g, b);
992
0
    }
993
994
    /* We have 4 Y pixels, so store them. */
995
0
    *b1Even++ = Y1e;
996
0
    *b1Even++ = Y2e;
997
998
0
    if (b1Odd)
999
0
    {
1000
0
      *b1Odd++ = Y1o;
1001
0
      *b1Odd++ = Y2o;
1002
0
    }
1003
1004
    /* 2x 2y pixel in luma UV plane use averaging
1005
     */
1006
0
    {
1007
0
      const BYTE Uavg = ((UINT16)U1e + (UINT16)U2e + (UINT16)U1o + (UINT16)U2o) / 4;
1008
0
      const BYTE Vavg = ((UINT16)V1e + (UINT16)V2e + (UINT16)V1o + (UINT16)V2o) / 4;
1009
0
      *b2++ = Uavg;
1010
0
      *b3++ = Vavg;
1011
0
    }
1012
1013
    /* UV from 2x, 2y+1 */
1014
0
    if (b1Odd)
1015
0
    {
1016
0
      *b4++ = U1o;
1017
0
      *b5++ = V1o;
1018
1019
0
      if (!lastX)
1020
0
      {
1021
0
        *b4++ = U2o;
1022
0
        *b5++ = V2o;
1023
0
      }
1024
0
    }
1025
1026
    /* UV from 2x+1, 2y */
1027
0
    if (!lastX)
1028
0
    {
1029
0
      *b6++ = U2e;
1030
0
      *b7++ = V2e;
1031
0
    }
1032
0
  }
1033
0
}
1034
1035
static INLINE pstatus_t general_RGBToAVC444YUV_BGRX(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcStep,
1036
                                                    BYTE* WINPR_RESTRICT pDst1[3],
1037
                                                    const UINT32 dst1Step[3],
1038
                                                    BYTE* WINPR_RESTRICT pDst2[3],
1039
                                                    const UINT32 dst2Step[3],
1040
                                                    const prim_size_t* WINPR_RESTRICT roi)
1041
0
{
1042
  /**
1043
   * Note:
1044
   * Read information in function general_RGBToAVC444YUV_ANY below !
1045
   */
1046
0
  const BYTE* pMaxSrc = pSrc + 1ULL * (roi->height - 1) * srcStep;
1047
1048
0
  for (size_t y = 0; y < roi->height; y += 2)
1049
0
  {
1050
0
    const BOOL last = (y >= (roi->height - 1));
1051
0
    const BYTE* srcEven = y < roi->height ? pSrc + y * srcStep : pMaxSrc;
1052
0
    const BYTE* srcOdd = !last ? pSrc + (y + 1) * srcStep : pMaxSrc;
1053
0
    const UINT32 i = y >> 1;
1054
0
    const UINT32 n = (i & ~7) + i;
1055
0
    BYTE* b1Even = pDst1[0] + y * dst1Step[0];
1056
0
    BYTE* b1Odd = !last ? (b1Even + dst1Step[0]) : NULL;
1057
0
    BYTE* b2 = pDst1[1] + (y / 2) * dst1Step[1];
1058
0
    BYTE* b3 = pDst1[2] + (y / 2) * dst1Step[2];
1059
0
    BYTE* b4 = pDst2[0] + 1ULL * dst2Step[0] * n;
1060
0
    BYTE* b5 = b4 + 8ULL * dst2Step[0];
1061
0
    BYTE* b6 = pDst2[1] + (y / 2) * dst2Step[1];
1062
0
    BYTE* b7 = pDst2[2] + (y / 2) * dst2Step[2];
1063
0
    general_RGBToAVC444YUV_BGRX_DOUBLE_ROW(srcEven, srcOdd, b1Even, b1Odd, b2, b3, b4, b5, b6,
1064
0
                                           b7, roi->width);
1065
0
  }
1066
1067
0
  return PRIMITIVES_SUCCESS;
1068
0
}
1069
1070
static INLINE void general_RGBToAVC444YUV_RGBX_DOUBLE_ROW(
1071
    const BYTE* WINPR_RESTRICT srcEven, const BYTE* WINPR_RESTRICT srcOdd,
1072
    BYTE* WINPR_RESTRICT b1Even, BYTE* WINPR_RESTRICT b1Odd, BYTE* WINPR_RESTRICT b2,
1073
    BYTE* WINPR_RESTRICT b3, BYTE* WINPR_RESTRICT b4, BYTE* WINPR_RESTRICT b5,
1074
    BYTE* WINPR_RESTRICT b6, BYTE* WINPR_RESTRICT b7, UINT32 width)
1075
0
{
1076
0
  for (UINT32 x = 0; x < width; x += 2)
1077
0
  {
1078
0
    const BOOL lastX = (x + 1) >= width;
1079
0
    BYTE Y1e = 0;
1080
0
    BYTE Y2e = 0;
1081
0
    BYTE U1e = 0;
1082
0
    BYTE V1e = 0;
1083
0
    BYTE U2e = 0;
1084
0
    BYTE V2e = 0;
1085
0
    BYTE Y1o = 0;
1086
0
    BYTE Y2o = 0;
1087
0
    BYTE U1o = 0;
1088
0
    BYTE V1o = 0;
1089
0
    BYTE U2o = 0;
1090
0
    BYTE V2o = 0;
1091
    /* Read 4 pixels, 2 from even, 2 from odd lines */
1092
0
    {
1093
0
      const BYTE r = *srcEven++;
1094
0
      const BYTE g = *srcEven++;
1095
0
      const BYTE b = *srcEven++;
1096
0
      srcEven++;
1097
0
      Y1e = Y2e = Y1o = Y2o = RGB2Y(r, g, b);
1098
0
      U1e = U2e = U1o = U2o = RGB2U(r, g, b);
1099
0
      V1e = V2e = V1o = V2o = RGB2V(r, g, b);
1100
0
    }
1101
1102
0
    if (!lastX)
1103
0
    {
1104
0
      const BYTE r = *srcEven++;
1105
0
      const BYTE g = *srcEven++;
1106
0
      const BYTE b = *srcEven++;
1107
0
      srcEven++;
1108
0
      Y2e = RGB2Y(r, g, b);
1109
0
      U2e = RGB2U(r, g, b);
1110
0
      V2e = RGB2V(r, g, b);
1111
0
    }
1112
1113
0
    if (b1Odd)
1114
0
    {
1115
0
      const BYTE r = *srcOdd++;
1116
0
      const BYTE g = *srcOdd++;
1117
0
      const BYTE b = *srcOdd++;
1118
0
      srcOdd++;
1119
0
      Y1o = Y2o = RGB2Y(r, g, b);
1120
0
      U1o = U2o = RGB2U(r, g, b);
1121
0
      V1o = V2o = RGB2V(r, g, b);
1122
0
    }
1123
1124
0
    if (b1Odd && !lastX)
1125
0
    {
1126
0
      const BYTE r = *srcOdd++;
1127
0
      const BYTE g = *srcOdd++;
1128
0
      const BYTE b = *srcOdd++;
1129
0
      srcOdd++;
1130
0
      Y2o = RGB2Y(r, g, b);
1131
0
      U2o = RGB2U(r, g, b);
1132
0
      V2o = RGB2V(r, g, b);
1133
0
    }
1134
1135
    /* We have 4 Y pixels, so store them. */
1136
0
    *b1Even++ = Y1e;
1137
0
    *b1Even++ = Y2e;
1138
1139
0
    if (b1Odd)
1140
0
    {
1141
0
      *b1Odd++ = Y1o;
1142
0
      *b1Odd++ = Y2o;
1143
0
    }
1144
1145
    /* 2x 2y pixel in luma UV plane use averaging
1146
     */
1147
0
    {
1148
0
      const BYTE Uavg = ((UINT16)U1e + (UINT16)U2e + (UINT16)U1o + (UINT16)U2o) / 4;
1149
0
      const BYTE Vavg = ((UINT16)V1e + (UINT16)V2e + (UINT16)V1o + (UINT16)V2o) / 4;
1150
0
      *b2++ = Uavg;
1151
0
      *b3++ = Vavg;
1152
0
    }
1153
1154
    /* UV from 2x, 2y+1 */
1155
0
    if (b1Odd)
1156
0
    {
1157
0
      *b4++ = U1o;
1158
0
      *b5++ = V1o;
1159
1160
0
      if (!lastX)
1161
0
      {
1162
0
        *b4++ = U2o;
1163
0
        *b5++ = V2o;
1164
0
      }
1165
0
    }
1166
1167
    /* UV from 2x+1, 2y */
1168
0
    if (!lastX)
1169
0
    {
1170
0
      *b6++ = U2e;
1171
0
      *b7++ = V2e;
1172
0
    }
1173
0
  }
1174
0
}
1175
1176
static INLINE pstatus_t general_RGBToAVC444YUV_RGBX(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcStep,
1177
                                                    BYTE* WINPR_RESTRICT pDst1[3],
1178
                                                    const UINT32 dst1Step[3],
1179
                                                    BYTE* WINPR_RESTRICT pDst2[3],
1180
                                                    const UINT32 dst2Step[3],
1181
                                                    const prim_size_t* WINPR_RESTRICT roi)
1182
0
{
1183
  /**
1184
   * Note:
1185
   * Read information in function general_RGBToAVC444YUV_ANY below !
1186
   */
1187
0
  const BYTE* pMaxSrc = pSrc + 1ULL * (roi->height - 1) * srcStep;
1188
1189
0
  for (size_t y = 0; y < roi->height; y += 2)
1190
0
  {
1191
0
    const BOOL last = (y >= (roi->height - 1));
1192
0
    const BYTE* srcEven = y < roi->height ? pSrc + y * srcStep : pMaxSrc;
1193
0
    const BYTE* srcOdd = !last ? pSrc + (y + 1) * srcStep : pMaxSrc;
1194
0
    const UINT32 i = y >> 1;
1195
0
    const UINT32 n = (i & ~7) + i;
1196
0
    BYTE* b1Even = pDst1[0] + y * dst1Step[0];
1197
0
    BYTE* b1Odd = !last ? (b1Even + dst1Step[0]) : NULL;
1198
0
    BYTE* b2 = pDst1[1] + (y / 2) * dst1Step[1];
1199
0
    BYTE* b3 = pDst1[2] + (y / 2) * dst1Step[2];
1200
0
    BYTE* b4 = pDst2[0] + 1ULL * dst2Step[0] * n;
1201
0
    BYTE* b5 = b4 + 8ULL * dst2Step[0];
1202
0
    BYTE* b6 = pDst2[1] + (y / 2) * dst2Step[1];
1203
0
    BYTE* b7 = pDst2[2] + (y / 2) * dst2Step[2];
1204
0
    general_RGBToAVC444YUV_RGBX_DOUBLE_ROW(srcEven, srcOdd, b1Even, b1Odd, b2, b3, b4, b5, b6,
1205
0
                                           b7, roi->width);
1206
0
  }
1207
1208
0
  return PRIMITIVES_SUCCESS;
1209
0
}
1210
1211
static INLINE void general_RGBToAVC444YUV_ANY_DOUBLE_ROW(
1212
    const BYTE* WINPR_RESTRICT srcEven, const BYTE* WINPR_RESTRICT srcOdd, UINT32 srcFormat,
1213
    BYTE* WINPR_RESTRICT b1Even, BYTE* WINPR_RESTRICT b1Odd, BYTE* WINPR_RESTRICT b2,
1214
    BYTE* WINPR_RESTRICT b3, BYTE* WINPR_RESTRICT b4, BYTE* WINPR_RESTRICT b5,
1215
    BYTE* WINPR_RESTRICT b6, BYTE* WINPR_RESTRICT b7, UINT32 width)
1216
0
{
1217
0
  const UINT32 bpp = FreeRDPGetBytesPerPixel(srcFormat);
1218
0
  for (UINT32 x = 0; x < width; x += 2)
1219
0
  {
1220
0
    const BOOL lastX = (x + 1) >= width;
1221
0
    BYTE Y1e = 0;
1222
0
    BYTE Y2e = 0;
1223
0
    BYTE U1e = 0;
1224
0
    BYTE V1e = 0;
1225
0
    BYTE U2e = 0;
1226
0
    BYTE V2e = 0;
1227
0
    BYTE Y1o = 0;
1228
0
    BYTE Y2o = 0;
1229
0
    BYTE U1o = 0;
1230
0
    BYTE V1o = 0;
1231
0
    BYTE U2o = 0;
1232
0
    BYTE V2o = 0;
1233
    /* Read 4 pixels, 2 from even, 2 from odd lines */
1234
0
    {
1235
0
      BYTE r = 0;
1236
0
      BYTE g = 0;
1237
0
      BYTE b = 0;
1238
0
      const UINT32 color = FreeRDPReadColor(srcEven, srcFormat);
1239
0
      srcEven += bpp;
1240
0
      FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
1241
0
      Y1e = Y2e = Y1o = Y2o = RGB2Y(r, g, b);
1242
0
      U1e = U2e = U1o = U2o = RGB2U(r, g, b);
1243
0
      V1e = V2e = V1o = V2o = RGB2V(r, g, b);
1244
0
    }
1245
1246
0
    if (!lastX)
1247
0
    {
1248
0
      BYTE r = 0;
1249
0
      BYTE g = 0;
1250
0
      BYTE b = 0;
1251
0
      const UINT32 color = FreeRDPReadColor(srcEven, srcFormat);
1252
0
      srcEven += bpp;
1253
0
      FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
1254
0
      Y2e = RGB2Y(r, g, b);
1255
0
      U2e = RGB2U(r, g, b);
1256
0
      V2e = RGB2V(r, g, b);
1257
0
    }
1258
1259
0
    if (b1Odd)
1260
0
    {
1261
0
      BYTE r = 0;
1262
0
      BYTE g = 0;
1263
0
      BYTE b = 0;
1264
0
      const UINT32 color = FreeRDPReadColor(srcOdd, srcFormat);
1265
0
      srcOdd += bpp;
1266
0
      FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
1267
0
      Y1o = Y2o = RGB2Y(r, g, b);
1268
0
      U1o = U2o = RGB2U(r, g, b);
1269
0
      V1o = V2o = RGB2V(r, g, b);
1270
0
    }
1271
1272
0
    if (b1Odd && !lastX)
1273
0
    {
1274
0
      BYTE r = 0;
1275
0
      BYTE g = 0;
1276
0
      BYTE b = 0;
1277
0
      const UINT32 color = FreeRDPReadColor(srcOdd, srcFormat);
1278
0
      srcOdd += bpp;
1279
0
      FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
1280
0
      Y2o = RGB2Y(r, g, b);
1281
0
      U2o = RGB2U(r, g, b);
1282
0
      V2o = RGB2V(r, g, b);
1283
0
    }
1284
1285
    /* We have 4 Y pixels, so store them. */
1286
0
    *b1Even++ = Y1e;
1287
0
    *b1Even++ = Y2e;
1288
1289
0
    if (b1Odd)
1290
0
    {
1291
0
      *b1Odd++ = Y1o;
1292
0
      *b1Odd++ = Y2o;
1293
0
    }
1294
1295
    /* 2x 2y pixel in luma UV plane use averaging
1296
     */
1297
0
    {
1298
0
      const BYTE Uavg = ((UINT16)U1e + (UINT16)U2e + (UINT16)U1o + (UINT16)U2o) / 4;
1299
0
      const BYTE Vavg = ((UINT16)V1e + (UINT16)V2e + (UINT16)V1o + (UINT16)V2o) / 4;
1300
0
      *b2++ = Uavg;
1301
0
      *b3++ = Vavg;
1302
0
    }
1303
1304
    /* UV from 2x, 2y+1 */
1305
0
    if (b1Odd)
1306
0
    {
1307
0
      *b4++ = U1o;
1308
0
      *b5++ = V1o;
1309
1310
0
      if (!lastX)
1311
0
      {
1312
0
        *b4++ = U2o;
1313
0
        *b5++ = V2o;
1314
0
      }
1315
0
    }
1316
1317
    /* UV from 2x+1, 2y */
1318
0
    if (!lastX)
1319
0
    {
1320
0
      *b6++ = U2e;
1321
0
      *b7++ = V2e;
1322
0
    }
1323
0
  }
1324
0
}
1325
1326
static INLINE pstatus_t general_RGBToAVC444YUV_ANY(
1327
    const BYTE* WINPR_RESTRICT pSrc, UINT32 srcFormat, UINT32 srcStep,
1328
    BYTE* WINPR_RESTRICT pDst1[3], const UINT32 dst1Step[3], BYTE* WINPR_RESTRICT pDst2[3],
1329
    const UINT32 dst2Step[3], const prim_size_t* WINPR_RESTRICT roi)
1330
0
{
1331
  /**
1332
   * Note: According to [MS-RDPEGFX 2.2.4.4 RFX_AVC420_BITMAP_STREAM] the
1333
   * width and height of the MPEG-4 AVC/H.264 codec bitstream MUST be aligned
1334
   * to a multiple of 16.
1335
   * Hence the passed destination YUV420/CHROMA420 buffers must have been
1336
   * allocated accordingly !!
1337
   */
1338
  /**
1339
   * [MS-RDPEGFX 3.3.8.3.2 YUV420p Stream Combination] defines the following "Bx areas":
1340
   *
1341
   * YUV420 frame (main view):
1342
   * B1:  From Y444 all pixels
1343
   * B2:  From U444 all pixels in even rows with even columns
1344
   * B3:  From V444 all pixels in even rows with even columns
1345
   *
1346
   * Chroma420 frame (auxillary view):
1347
   * B45: From U444 and V444 all pixels from all odd rows
1348
   *      (The odd U444 and V444 rows must be interleaved in 8-line blocks in B45 !!!)
1349
   * B6:  From U444 all pixels in even rows with odd columns
1350
   * B7:  From V444 all pixels in even rows with odd columns
1351
   *
1352
   * Microsoft's horrible unclear description in MS-RDPEGFX translated to pseudo code looks like
1353
   * this:
1354
   *
1355
   * for (y = 0; y < fullHeight; y++)
1356
   * {
1357
   *     for (x = 0; x < fullWidth; x++)
1358
   *     {
1359
   *         B1[x,y] = Y444[x,y];
1360
   *     }
1361
   *  }
1362
   *
1363
   * for (y = 0; y < halfHeight; y++)
1364
   * {
1365
   *     for (x = 0; x < halfWidth; x++)
1366
   *     {
1367
   *         B2[x,y] = U444[2 * x,     2 * y];
1368
   *         B3[x,y] = V444[2 * x,     2 * y];
1369
   *         B6[x,y] = U444[2 * x + 1, 2 * y];
1370
   *         B7[x,y] = V444[2 * x + 1, 2 * y];
1371
   *     }
1372
   *  }
1373
   *
1374
   * for (y = 0; y < halfHeight; y++)
1375
   * {
1376
   *     yU  = (y / 8) * 16;   // identify first row of correct 8-line U block in B45
1377
   *     yU += (y % 8);        // add offset rows in destination block
1378
   *     yV  = yU + 8;         // the corresponding v line is always 8 rows ahead
1379
   *
1380
   *     for (x = 0; x < fullWidth; x++)
1381
   *     {
1382
   *         B45[x,yU] = U444[x, 2 * y + 1];
1383
   *         B45[x,yV] = V444[x, 2 * y + 1];
1384
   *     }
1385
   *  }
1386
   *
1387
   */
1388
0
  const BYTE* pMaxSrc = pSrc + 1ULL * (roi->height - 1) * srcStep;
1389
1390
0
  for (size_t y = 0; y < roi->height; y += 2)
1391
0
  {
1392
0
    const BOOL last = (y >= (roi->height - 1));
1393
0
    const BYTE* srcEven = y < roi->height ? pSrc + y * srcStep : pMaxSrc;
1394
0
    const BYTE* srcOdd = !last ? pSrc + (y + 1) * srcStep : pMaxSrc;
1395
0
    const UINT32 i = y >> 1;
1396
0
    const UINT32 n = (i & ~7) + i;
1397
0
    BYTE* b1Even = pDst1[0] + y * dst1Step[0];
1398
0
    BYTE* b1Odd = !last ? (b1Even + dst1Step[0]) : NULL;
1399
0
    BYTE* b2 = pDst1[1] + (y / 2) * dst1Step[1];
1400
0
    BYTE* b3 = pDst1[2] + (y / 2) * dst1Step[2];
1401
0
    BYTE* b4 = pDst2[0] + 1ULL * dst2Step[0] * n;
1402
0
    BYTE* b5 = b4 + 8ULL * dst2Step[0];
1403
0
    BYTE* b6 = pDst2[1] + (y / 2) * dst2Step[1];
1404
0
    BYTE* b7 = pDst2[2] + (y / 2) * dst2Step[2];
1405
0
    general_RGBToAVC444YUV_ANY_DOUBLE_ROW(srcEven, srcOdd, srcFormat, b1Even, b1Odd, b2, b3, b4,
1406
0
                                          b5, b6, b7, roi->width);
1407
0
  }
1408
1409
0
  return PRIMITIVES_SUCCESS;
1410
0
}
1411
1412
static INLINE pstatus_t general_RGBToAVC444YUV(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcFormat,
1413
                                               UINT32 srcStep, BYTE* WINPR_RESTRICT pDst1[3],
1414
                                               const UINT32 dst1Step[3],
1415
                                               BYTE* WINPR_RESTRICT pDst2[3],
1416
                                               const UINT32 dst2Step[3],
1417
                                               const prim_size_t* WINPR_RESTRICT roi)
1418
0
{
1419
0
  if (!pSrc || !pDst1 || !dst1Step || !pDst2 || !dst2Step)
1420
0
    return -1;
1421
1422
0
  if (!pDst1[0] || !pDst1[1] || !pDst1[2])
1423
0
    return -1;
1424
1425
0
  if (!dst1Step[0] || !dst1Step[1] || !dst1Step[2])
1426
0
    return -1;
1427
1428
0
  if (!pDst2[0] || !pDst2[1] || !pDst2[2])
1429
0
    return -1;
1430
1431
0
  if (!dst2Step[0] || !dst2Step[1] || !dst2Step[2])
1432
0
    return -1;
1433
1434
0
  switch (srcFormat)
1435
0
  {
1436
0
    case PIXEL_FORMAT_BGRA32:
1437
0
    case PIXEL_FORMAT_BGRX32:
1438
0
      return general_RGBToAVC444YUV_BGRX(pSrc, srcStep, pDst1, dst1Step, pDst2, dst2Step,
1439
0
                                         roi);
1440
1441
0
    case PIXEL_FORMAT_RGBA32:
1442
0
    case PIXEL_FORMAT_RGBX32:
1443
0
      return general_RGBToAVC444YUV_RGBX(pSrc, srcStep, pDst1, dst1Step, pDst2, dst2Step,
1444
0
                                         roi);
1445
1446
0
    default:
1447
0
      return general_RGBToAVC444YUV_ANY(pSrc, srcFormat, srcStep, pDst1, dst1Step, pDst2,
1448
0
                                        dst2Step, roi);
1449
0
  }
1450
1451
0
  return !PRIMITIVES_SUCCESS;
1452
0
}
1453
1454
static INLINE void general_RGBToAVC444YUVv2_ANY_DOUBLE_ROW(
1455
    const BYTE* WINPR_RESTRICT srcEven, const BYTE* WINPR_RESTRICT srcOdd, UINT32 srcFormat,
1456
    BYTE* WINPR_RESTRICT yLumaDstEven, BYTE* WINPR_RESTRICT yLumaDstOdd,
1457
    BYTE* WINPR_RESTRICT uLumaDst, BYTE* WINPR_RESTRICT vLumaDst,
1458
    BYTE* WINPR_RESTRICT yEvenChromaDst1, BYTE* WINPR_RESTRICT yEvenChromaDst2,
1459
    BYTE* WINPR_RESTRICT yOddChromaDst1, BYTE* WINPR_RESTRICT yOddChromaDst2,
1460
    BYTE* WINPR_RESTRICT uChromaDst1, BYTE* WINPR_RESTRICT uChromaDst2,
1461
    BYTE* WINPR_RESTRICT vChromaDst1, BYTE* WINPR_RESTRICT vChromaDst2, UINT32 width)
1462
0
{
1463
0
  const UINT32 bpp = FreeRDPGetBytesPerPixel(srcFormat);
1464
1465
0
  for (UINT32 x = 0; x < width; x += 2)
1466
0
  {
1467
0
    BYTE Ya = 0;
1468
0
    BYTE Ua = 0;
1469
0
    BYTE Va = 0;
1470
0
    BYTE Yb = 0;
1471
0
    BYTE Ub = 0;
1472
0
    BYTE Vb = 0;
1473
0
    BYTE Yc = 0;
1474
0
    BYTE Uc = 0;
1475
0
    BYTE Vc = 0;
1476
0
    BYTE Yd = 0;
1477
0
    BYTE Ud = 0;
1478
0
    BYTE Vd = 0;
1479
0
    {
1480
0
      BYTE b = 0;
1481
0
      BYTE g = 0;
1482
0
      BYTE r = 0;
1483
0
      const UINT32 color = FreeRDPReadColor(srcEven, srcFormat);
1484
0
      srcEven += bpp;
1485
0
      FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
1486
0
      Ya = RGB2Y(r, g, b);
1487
0
      Ua = RGB2U(r, g, b);
1488
0
      Va = RGB2V(r, g, b);
1489
0
    }
1490
1491
0
    if (x < width - 1)
1492
0
    {
1493
0
      BYTE b = 0;
1494
0
      BYTE g = 0;
1495
0
      BYTE r = 0;
1496
0
      const UINT32 color = FreeRDPReadColor(srcEven, srcFormat);
1497
0
      srcEven += bpp;
1498
0
      FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
1499
0
      Yb = RGB2Y(r, g, b);
1500
0
      Ub = RGB2U(r, g, b);
1501
0
      Vb = RGB2V(r, g, b);
1502
0
    }
1503
0
    else
1504
0
    {
1505
0
      Yb = Ya;
1506
0
      Ub = Ua;
1507
0
      Vb = Va;
1508
0
    }
1509
1510
0
    if (srcOdd)
1511
0
    {
1512
0
      BYTE b = 0;
1513
0
      BYTE g = 0;
1514
0
      BYTE r = 0;
1515
0
      const UINT32 color = FreeRDPReadColor(srcOdd, srcFormat);
1516
0
      srcOdd += bpp;
1517
0
      FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
1518
0
      Yc = RGB2Y(r, g, b);
1519
0
      Uc = RGB2U(r, g, b);
1520
0
      Vc = RGB2V(r, g, b);
1521
0
    }
1522
0
    else
1523
0
    {
1524
0
      Yc = Ya;
1525
0
      Uc = Ua;
1526
0
      Vc = Va;
1527
0
    }
1528
1529
0
    if (srcOdd && (x < width - 1))
1530
0
    {
1531
0
      BYTE b = 0;
1532
0
      BYTE g = 0;
1533
0
      BYTE r = 0;
1534
0
      const UINT32 color = FreeRDPReadColor(srcOdd, srcFormat);
1535
0
      srcOdd += bpp;
1536
0
      FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
1537
0
      Yd = RGB2Y(r, g, b);
1538
0
      Ud = RGB2U(r, g, b);
1539
0
      Vd = RGB2V(r, g, b);
1540
0
    }
1541
0
    else
1542
0
    {
1543
0
      Yd = Ya;
1544
0
      Ud = Ua;
1545
0
      Vd = Va;
1546
0
    }
1547
1548
    /* Y [b1] */
1549
0
    *yLumaDstEven++ = Ya;
1550
1551
0
    if (x < width - 1)
1552
0
      *yLumaDstEven++ = Yb;
1553
1554
0
    if (srcOdd)
1555
0
      *yLumaDstOdd++ = Yc;
1556
1557
0
    if (srcOdd && (x < width - 1))
1558
0
      *yLumaDstOdd++ = Yd;
1559
1560
    /* 2x 2y [b2,b3] */
1561
0
    *uLumaDst++ = (Ua + Ub + Uc + Ud) / 4;
1562
0
    *vLumaDst++ = (Va + Vb + Vc + Vd) / 4;
1563
1564
    /* 2x+1, y [b4,b5] even */
1565
0
    if (x < width - 1)
1566
0
    {
1567
0
      *yEvenChromaDst1++ = Ub;
1568
0
      *yEvenChromaDst2++ = Vb;
1569
0
    }
1570
1571
0
    if (srcOdd)
1572
0
    {
1573
      /* 2x+1, y [b4,b5] odd */
1574
0
      if (x < width - 1)
1575
0
      {
1576
0
        *yOddChromaDst1++ = Ud;
1577
0
        *yOddChromaDst2++ = Vd;
1578
0
      }
1579
1580
      /* 4x 2y+1 [b6, b7] */
1581
0
      if (x % 4 == 0)
1582
0
      {
1583
0
        *uChromaDst1++ = Uc;
1584
0
        *uChromaDst2++ = Vc;
1585
0
      }
1586
      /* 4x+2 2y+1 [b8, b9] */
1587
0
      else
1588
0
      {
1589
0
        *vChromaDst1++ = Uc;
1590
0
        *vChromaDst2++ = Vc;
1591
0
      }
1592
0
    }
1593
0
  }
1594
0
}
1595
1596
static INLINE pstatus_t general_RGBToAVC444YUVv2_ANY(
1597
    const BYTE* WINPR_RESTRICT pSrc, UINT32 srcFormat, UINT32 srcStep,
1598
    BYTE* WINPR_RESTRICT pDst1[3], const UINT32 dst1Step[3], BYTE* WINPR_RESTRICT pDst2[3],
1599
    const UINT32 dst2Step[3], const prim_size_t* WINPR_RESTRICT roi)
1600
0
{
1601
  /**
1602
   * Note: According to [MS-RDPEGFX 2.2.4.4 RFX_AVC420_BITMAP_STREAM] the
1603
   * width and height of the MPEG-4 AVC/H.264 codec bitstream MUST be aligned
1604
   * to a multiple of 16.
1605
   * Hence the passed destination YUV420/CHROMA420 buffers must have been
1606
   * allocated accordingly !!
1607
   */
1608
  /**
1609
   * [MS-RDPEGFX 3.3.8.3.3 YUV420p Stream Combination for YUV444v2 mode] defines the following "Bx
1610
   * areas":
1611
   *
1612
   * YUV420 frame (main view):
1613
   * B1:  From Y444 all pixels
1614
   * B2:  From U444 all pixels in even rows with even rows and columns
1615
   * B3:  From V444 all pixels in even rows with even rows and columns
1616
   *
1617
   * Chroma420 frame (auxillary view):
1618
   * B45: From U444 and V444 all pixels from all odd columns
1619
   * B67: From U444 and V444 every 4th pixel in odd rows
1620
   * B89:  From U444 and V444 every 4th pixel (initial offset of 2) in odd rows
1621
   *
1622
   * Chroma Bxy areas correspond to the left and right half of the YUV420 plane.
1623
   * for (y = 0; y < fullHeight; y++)
1624
   * {
1625
   *     for (x = 0; x < fullWidth; x++)
1626
   *     {
1627
   *         B1[x,y] = Y444[x,y];
1628
   *     }
1629
   *
1630
   *     for (x = 0; x < halfWidth; x++)
1631
   *     {
1632
   *         B4[x,y] = U444[2 * x, 2 * y];
1633
   *         B5[x,y] = V444[2 * x, 2 * y];
1634
   *     }
1635
   *  }
1636
   *
1637
   * for (y = 0; y < halfHeight; y++)
1638
   * {
1639
   *     for (x = 0; x < halfWidth; x++)
1640
   *     {
1641
   *         B2[x,y] = U444[2 * x,     2 * y];
1642
   *         B3[x,y] = V444[2 * x,     2 * y];
1643
   *         B6[x,y] = U444[4 * x,     2 * y + 1];
1644
   *         B7[x,y] = V444[4 * x,     2 * y + 1];
1645
   *         B8[x,y] = V444[4 * x + 2, 2 * y + 1];
1646
   *         B9[x,y] = V444[4 * x + 2, 2 * y] + 1;
1647
   *     }
1648
   *  }
1649
   *
1650
   */
1651
0
  if (roi->height < 1 || roi->width < 1)
1652
0
    return !PRIMITIVES_SUCCESS;
1653
1654
0
  for (size_t y = 0; y < roi->height; y += 2)
1655
0
  {
1656
0
    const BYTE* srcEven = (pSrc + y * srcStep);
1657
0
    const BYTE* srcOdd = (y < roi->height - 1) ? (srcEven + srcStep) : NULL;
1658
0
    BYTE* dstLumaYEven = (pDst1[0] + y * dst1Step[0]);
1659
0
    BYTE* dstLumaYOdd = (dstLumaYEven + dst1Step[0]);
1660
0
    BYTE* dstLumaU = (pDst1[1] + (y / 2) * dst1Step[1]);
1661
0
    BYTE* dstLumaV = (pDst1[2] + (y / 2) * dst1Step[2]);
1662
0
    BYTE* dstEvenChromaY1 = (pDst2[0] + y * dst2Step[0]);
1663
0
    BYTE* dstEvenChromaY2 = dstEvenChromaY1 + roi->width / 2;
1664
0
    BYTE* dstOddChromaY1 = dstEvenChromaY1 + dst2Step[0];
1665
0
    BYTE* dstOddChromaY2 = dstEvenChromaY2 + dst2Step[0];
1666
0
    BYTE* dstChromaU1 = (pDst2[1] + (y / 2) * dst2Step[1]);
1667
0
    BYTE* dstChromaV1 = (pDst2[2] + (y / 2) * dst2Step[2]);
1668
0
    BYTE* dstChromaU2 = dstChromaU1 + roi->width / 4;
1669
0
    BYTE* dstChromaV2 = dstChromaV1 + roi->width / 4;
1670
0
    general_RGBToAVC444YUVv2_ANY_DOUBLE_ROW(
1671
0
        srcEven, srcOdd, srcFormat, dstLumaYEven, dstLumaYOdd, dstLumaU, dstLumaV,
1672
0
        dstEvenChromaY1, dstEvenChromaY2, dstOddChromaY1, dstOddChromaY2, dstChromaU1,
1673
0
        dstChromaU2, dstChromaV1, dstChromaV2, roi->width);
1674
0
  }
1675
1676
0
  return PRIMITIVES_SUCCESS;
1677
0
}
1678
1679
static INLINE void general_RGBToAVC444YUVv2_BGRX_DOUBLE_ROW(
1680
    const BYTE* WINPR_RESTRICT srcEven, const BYTE* WINPR_RESTRICT srcOdd,
1681
    BYTE* WINPR_RESTRICT yLumaDstEven, BYTE* WINPR_RESTRICT yLumaDstOdd,
1682
    BYTE* WINPR_RESTRICT uLumaDst, BYTE* WINPR_RESTRICT vLumaDst,
1683
    BYTE* WINPR_RESTRICT yEvenChromaDst1, BYTE* WINPR_RESTRICT yEvenChromaDst2,
1684
    BYTE* WINPR_RESTRICT yOddChromaDst1, BYTE* WINPR_RESTRICT yOddChromaDst2,
1685
    BYTE* WINPR_RESTRICT uChromaDst1, BYTE* WINPR_RESTRICT uChromaDst2,
1686
    BYTE* WINPR_RESTRICT vChromaDst1, BYTE* WINPR_RESTRICT vChromaDst2, UINT32 width)
1687
0
{
1688
0
  for (UINT32 x = 0; x < width; x += 2)
1689
0
  {
1690
0
    BYTE Ya = 0;
1691
0
    BYTE Ua = 0;
1692
0
    BYTE Va = 0;
1693
0
    BYTE Yb = 0;
1694
0
    BYTE Ub = 0;
1695
0
    BYTE Vb = 0;
1696
0
    BYTE Yc = 0;
1697
0
    BYTE Uc = 0;
1698
0
    BYTE Vc = 0;
1699
0
    BYTE Yd = 0;
1700
0
    BYTE Ud = 0;
1701
0
    BYTE Vd = 0;
1702
0
    {
1703
0
      const BYTE b = *srcEven++;
1704
0
      const BYTE g = *srcEven++;
1705
0
      const BYTE r = *srcEven++;
1706
0
      srcEven++;
1707
0
      Ya = RGB2Y(r, g, b);
1708
0
      Ua = RGB2U(r, g, b);
1709
0
      Va = RGB2V(r, g, b);
1710
0
    }
1711
1712
0
    if (x < width - 1)
1713
0
    {
1714
0
      const BYTE b = *srcEven++;
1715
0
      const BYTE g = *srcEven++;
1716
0
      const BYTE r = *srcEven++;
1717
0
      srcEven++;
1718
0
      Yb = RGB2Y(r, g, b);
1719
0
      Ub = RGB2U(r, g, b);
1720
0
      Vb = RGB2V(r, g, b);
1721
0
    }
1722
0
    else
1723
0
    {
1724
0
      Yb = Ya;
1725
0
      Ub = Ua;
1726
0
      Vb = Va;
1727
0
    }
1728
1729
0
    if (srcOdd)
1730
0
    {
1731
0
      const BYTE b = *srcOdd++;
1732
0
      const BYTE g = *srcOdd++;
1733
0
      const BYTE r = *srcOdd++;
1734
0
      srcOdd++;
1735
0
      Yc = RGB2Y(r, g, b);
1736
0
      Uc = RGB2U(r, g, b);
1737
0
      Vc = RGB2V(r, g, b);
1738
0
    }
1739
0
    else
1740
0
    {
1741
0
      Yc = Ya;
1742
0
      Uc = Ua;
1743
0
      Vc = Va;
1744
0
    }
1745
1746
0
    if (srcOdd && (x < width - 1))
1747
0
    {
1748
0
      const BYTE b = *srcOdd++;
1749
0
      const BYTE g = *srcOdd++;
1750
0
      const BYTE r = *srcOdd++;
1751
0
      srcOdd++;
1752
0
      Yd = RGB2Y(r, g, b);
1753
0
      Ud = RGB2U(r, g, b);
1754
0
      Vd = RGB2V(r, g, b);
1755
0
    }
1756
0
    else
1757
0
    {
1758
0
      Yd = Ya;
1759
0
      Ud = Ua;
1760
0
      Vd = Va;
1761
0
    }
1762
1763
    /* Y [b1] */
1764
0
    *yLumaDstEven++ = Ya;
1765
1766
0
    if (x < width - 1)
1767
0
      *yLumaDstEven++ = Yb;
1768
1769
0
    if (srcOdd)
1770
0
      *yLumaDstOdd++ = Yc;
1771
1772
0
    if (srcOdd && (x < width - 1))
1773
0
      *yLumaDstOdd++ = Yd;
1774
1775
    /* 2x 2y [b2,b3] */
1776
0
    *uLumaDst++ = (Ua + Ub + Uc + Ud) / 4;
1777
0
    *vLumaDst++ = (Va + Vb + Vc + Vd) / 4;
1778
1779
    /* 2x+1, y [b4,b5] even */
1780
0
    if (x < width - 1)
1781
0
    {
1782
0
      *yEvenChromaDst1++ = Ub;
1783
0
      *yEvenChromaDst2++ = Vb;
1784
0
    }
1785
1786
0
    if (srcOdd)
1787
0
    {
1788
      /* 2x+1, y [b4,b5] odd */
1789
0
      if (x < width - 1)
1790
0
      {
1791
0
        *yOddChromaDst1++ = Ud;
1792
0
        *yOddChromaDst2++ = Vd;
1793
0
      }
1794
1795
      /* 4x 2y+1 [b6, b7] */
1796
0
      if (x % 4 == 0)
1797
0
      {
1798
0
        *uChromaDst1++ = Uc;
1799
0
        *uChromaDst2++ = Vc;
1800
0
      }
1801
      /* 4x+2 2y+1 [b8, b9] */
1802
0
      else
1803
0
      {
1804
0
        *vChromaDst1++ = Uc;
1805
0
        *vChromaDst2++ = Vc;
1806
0
      }
1807
0
    }
1808
0
  }
1809
0
}
1810
1811
static INLINE pstatus_t general_RGBToAVC444YUVv2_BGRX(const BYTE* WINPR_RESTRICT pSrc,
1812
                                                      UINT32 srcStep, BYTE* WINPR_RESTRICT pDst1[3],
1813
                                                      const UINT32 dst1Step[3],
1814
                                                      BYTE* WINPR_RESTRICT pDst2[3],
1815
                                                      const UINT32 dst2Step[3],
1816
                                                      const prim_size_t* WINPR_RESTRICT roi)
1817
0
{
1818
0
  if (roi->height < 1 || roi->width < 1)
1819
0
    return !PRIMITIVES_SUCCESS;
1820
1821
0
  for (size_t y = 0; y < roi->height; y += 2)
1822
0
  {
1823
0
    const BYTE* srcEven = (pSrc + y * srcStep);
1824
0
    const BYTE* srcOdd = (y < roi->height - 1) ? (srcEven + srcStep) : NULL;
1825
0
    BYTE* dstLumaYEven = (pDst1[0] + y * dst1Step[0]);
1826
0
    BYTE* dstLumaYOdd = (dstLumaYEven + dst1Step[0]);
1827
0
    BYTE* dstLumaU = (pDst1[1] + (y / 2) * dst1Step[1]);
1828
0
    BYTE* dstLumaV = (pDst1[2] + (y / 2) * dst1Step[2]);
1829
0
    BYTE* dstEvenChromaY1 = (pDst2[0] + y * dst2Step[0]);
1830
0
    BYTE* dstEvenChromaY2 = dstEvenChromaY1 + roi->width / 2;
1831
0
    BYTE* dstOddChromaY1 = dstEvenChromaY1 + dst2Step[0];
1832
0
    BYTE* dstOddChromaY2 = dstEvenChromaY2 + dst2Step[0];
1833
0
    BYTE* dstChromaU1 = (pDst2[1] + (y / 2) * dst2Step[1]);
1834
0
    BYTE* dstChromaV1 = (pDst2[2] + (y / 2) * dst2Step[2]);
1835
0
    BYTE* dstChromaU2 = dstChromaU1 + roi->width / 4;
1836
0
    BYTE* dstChromaV2 = dstChromaV1 + roi->width / 4;
1837
0
    general_RGBToAVC444YUVv2_BGRX_DOUBLE_ROW(
1838
0
        srcEven, srcOdd, dstLumaYEven, dstLumaYOdd, dstLumaU, dstLumaV, dstEvenChromaY1,
1839
0
        dstEvenChromaY2, dstOddChromaY1, dstOddChromaY2, dstChromaU1, dstChromaU2, dstChromaV1,
1840
0
        dstChromaV2, roi->width);
1841
0
  }
1842
1843
0
  return PRIMITIVES_SUCCESS;
1844
0
}
1845
1846
static INLINE pstatus_t general_RGBToAVC444YUVv2(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcFormat,
1847
                                                 UINT32 srcStep, BYTE* WINPR_RESTRICT pDst1[3],
1848
                                                 const UINT32 dst1Step[3],
1849
                                                 BYTE* WINPR_RESTRICT pDst2[3],
1850
                                                 const UINT32 dst2Step[3],
1851
                                                 const prim_size_t* WINPR_RESTRICT roi)
1852
0
{
1853
0
  switch (srcFormat)
1854
0
  {
1855
0
    case PIXEL_FORMAT_BGRA32:
1856
0
    case PIXEL_FORMAT_BGRX32:
1857
0
      return general_RGBToAVC444YUVv2_BGRX(pSrc, srcStep, pDst1, dst1Step, pDst2, dst2Step,
1858
0
                                           roi);
1859
1860
0
    default:
1861
0
      return general_RGBToAVC444YUVv2_ANY(pSrc, srcFormat, srcStep, pDst1, dst1Step, pDst2,
1862
0
                                          dst2Step, roi);
1863
0
  }
1864
1865
0
  return !PRIMITIVES_SUCCESS;
1866
0
}
1867
1868
void primitives_init_YUV(primitives_t* WINPR_RESTRICT prims)
1869
1
{
1870
1
  prims->YUV420ToRGB_8u_P3AC4R = general_YUV420ToRGB_8u_P3AC4R;
1871
1
  prims->YUV444ToRGB_8u_P3AC4R = general_YUV444ToRGB_8u_P3AC4R;
1872
1
  prims->RGBToYUV420_8u_P3AC4R = general_RGBToYUV420_8u_P3AC4R;
1873
1
  prims->RGBToYUV444_8u_P3AC4R = general_RGBToYUV444_8u_P3AC4R;
1874
1
  prims->YUV420CombineToYUV444 = general_YUV420CombineToYUV444;
1875
1
  prims->YUV444SplitToYUV420 = general_YUV444SplitToYUV420;
1876
1
  prims->RGBToAVC444YUV = general_RGBToAVC444YUV;
1877
1
  prims->RGBToAVC444YUVv2 = general_RGBToAVC444YUVv2;
1878
1
}
1879
1880
void primitives_init_YUV_opt(primitives_t* WINPR_RESTRICT prims)
1881
0
{
1882
0
  primitives_init_YUV_ssse3(prims);
1883
0
  primitives_init_YUV_neon(prims);
1884
0
}