Coverage Report

Created: 2025-07-01 06:46

/src/FreeRDP/libfreerdp/primitives/prim_YUV.c
Line
Count
Source (jump to first uncovered line)
1
/**
2
 * FreeRDP: A Remote Desktop Protocol Implementation
3
 * Generic YUV/RGB conversion operations
4
 *
5
 * Copyright 2014 Marc-Andre Moreau <marcandre.moreau@gmail.com>
6
 * Copyright 2015-2017 Armin Novak <armin.novak@thincast.com>
7
 * Copyright 2015-2017 Norbert Federa <norbert.federa@thincast.com>
8
 * Copyright 2015-2017 Vic Lee
9
 * Copyright 2015-2017 Thincast Technologies GmbH
10
 *
11
 * Licensed under the Apache License, Version 2.0 (the "License");
12
 * you may not use this file except in compliance with the License.
13
 * You may obtain a copy of the License at
14
 *
15
 *     http://www.apache.org/licenses/LICENSE-2.0
16
 *
17
 * Unless required by applicable law or agreed to in writing, software
18
 * distributed under the License is distributed on an "AS IS" BASIS,
19
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20
 * See the License for the specific language governing permissions and
21
 * limitations under the License.
22
 */
23
24
#include <winpr/wtypes.h>
25
#include <winpr/assert.h>
26
#include <winpr/cast.h>
27
28
#include <freerdp/config.h>
29
30
#include <freerdp/types.h>
31
#include <freerdp/primitives.h>
32
#include <freerdp/codec/color.h>
33
#include "prim_internal.h"
34
#include "prim_YUV.h"
35
36
static inline pstatus_t general_LumaToYUV444(const BYTE* WINPR_RESTRICT pSrcRaw[3],
37
                                             const UINT32 srcStep[3],
38
                                             BYTE* WINPR_RESTRICT pDstRaw[3],
39
                                             const UINT32 dstStep[3],
40
                                             const RECTANGLE_16* WINPR_RESTRICT roi)
41
0
{
42
0
  const UINT32 nWidth = roi->right - roi->left;
43
0
  const UINT32 nHeight = roi->bottom - roi->top;
44
0
  const UINT32 halfWidth = (nWidth + 1) / 2;
45
0
  const UINT32 halfHeight = (nHeight + 1) / 2;
46
0
  const UINT32 oddY = 1;
47
0
  const UINT32 evenY = 0;
48
0
  const UINT32 oddX = 1;
49
0
  const UINT32 evenX = 0;
50
0
  const BYTE* pSrc[3] = { pSrcRaw[0] + 1ULL * roi->top * srcStep[0] + roi->left,
51
0
                        pSrcRaw[1] + 1ULL * roi->top / 2 * srcStep[1] + roi->left / 2,
52
0
                        pSrcRaw[2] + 1ULL * roi->top / 2 * srcStep[2] + roi->left / 2 };
53
0
  BYTE* pDst[3] = { pDstRaw[0] + 1ULL * roi->top * dstStep[0] + roi->left,
54
0
                  pDstRaw[1] + 1ULL * roi->top * dstStep[1] + roi->left,
55
0
                  pDstRaw[2] + 1ULL * roi->top * dstStep[2] + roi->left };
56
57
  /* Y data is already here... */
58
  /* B1 */
59
0
  for (size_t y = 0; y < nHeight; y++)
60
0
  {
61
0
    const BYTE* Ym = pSrc[0] + y * srcStep[0];
62
0
    BYTE* pY = pDst[0] + dstStep[0] * y;
63
0
    memcpy(pY, Ym, nWidth);
64
0
  }
65
66
  /* The first half of U, V are already here part of this frame. */
67
  /* B2 and B3 */
68
0
  for (UINT32 y = 0; y < halfHeight; y++)
69
0
  {
70
0
    const UINT32 val2y = (2UL * y + evenY);
71
0
    const UINT32 val2y1 = val2y + oddY;
72
0
    const BYTE* Um = pSrc[1] + 1ULL * y * srcStep[1];
73
0
    const BYTE* Vm = pSrc[2] + 1ULL * y * srcStep[2];
74
0
    BYTE* pU = pDst[1] + 1ULL * dstStep[1] * val2y;
75
0
    BYTE* pV = pDst[2] + 1ULL * dstStep[2] * val2y;
76
0
    BYTE* pU1 = pDst[1] + 1ULL * dstStep[1] * val2y1;
77
0
    BYTE* pV1 = pDst[2] + 1ULL * dstStep[2] * val2y1;
78
79
0
    for (UINT32 x = 0; x < halfWidth; x++)
80
0
    {
81
0
      const UINT32 val2x = 2UL * x + evenX;
82
0
      const UINT32 val2x1 = val2x + oddX;
83
0
      pU[val2x] = Um[x];
84
0
      pV[val2x] = Vm[x];
85
0
      pU[val2x1] = Um[x];
86
0
      pV[val2x1] = Vm[x];
87
0
      pU1[val2x] = Um[x];
88
0
      pV1[val2x] = Vm[x];
89
0
      pU1[val2x1] = Um[x];
90
0
      pV1[val2x1] = Vm[x];
91
0
    }
92
0
  }
93
94
0
  return PRIMITIVES_SUCCESS;
95
0
}
96
97
static inline pstatus_t general_ChromaV1ToYUV444(const BYTE* WINPR_RESTRICT pSrcRaw[3],
98
                                                 const UINT32 srcStep[3],
99
                                                 BYTE* WINPR_RESTRICT pDstRaw[3],
100
                                                 const UINT32 dstStep[3],
101
                                                 const RECTANGLE_16* WINPR_RESTRICT roi)
102
0
{
103
0
  const UINT32 mod = 16;
104
0
  UINT32 uY = 0;
105
0
  UINT32 vY = 0;
106
0
  const UINT32 nWidth = roi->right - roi->left;
107
0
  const UINT32 nHeight = roi->bottom - roi->top;
108
0
  const UINT32 halfWidth = (nWidth) / 2;
109
0
  const UINT32 halfHeight = (nHeight) / 2;
110
0
  const UINT32 oddY = 1;
111
0
  const UINT32 evenY = 0;
112
0
  const UINT32 oddX = 1;
113
  /* The auxiliary frame is aligned to multiples of 16x16.
114
   * We need the padded height for B4 and B5 conversion. */
115
0
  const UINT32 padHeigth = nHeight + 16 - nHeight % 16;
116
0
  const BYTE* pSrc[3] = { pSrcRaw[0] + 1ULL * roi->top * srcStep[0] + roi->left,
117
0
                        pSrcRaw[1] + 1ULL * roi->top / 2 * srcStep[1] + roi->left / 2,
118
0
                        pSrcRaw[2] + 1ULL * roi->top / 2 * srcStep[2] + roi->left / 2 };
119
0
  BYTE* pDst[3] = { pDstRaw[0] + 1ULL * roi->top * dstStep[0] + roi->left,
120
0
                  pDstRaw[1] + 1ULL * roi->top * dstStep[1] + roi->left,
121
0
                  pDstRaw[2] + 1ULL * roi->top * dstStep[2] + roi->left };
122
123
  /* The second half of U and V is a bit more tricky... */
124
  /* B4 and B5 */
125
0
  for (size_t y = 0; y < padHeigth; y++)
126
0
  {
127
0
    const BYTE* Ya = pSrc[0] + y * srcStep[0];
128
0
    BYTE* pX = NULL;
129
130
0
    if ((y) % mod < (mod + 1) / 2)
131
0
    {
132
0
      const size_t pos = (2 * uY++ + oddY);
133
134
0
      if (pos >= nHeight)
135
0
        continue;
136
137
0
      pX = pDst[1] + dstStep[1] * pos;
138
0
    }
139
0
    else
140
0
    {
141
0
      const size_t pos = (2 * vY++ + oddY);
142
143
0
      if (pos >= nHeight)
144
0
        continue;
145
146
0
      pX = pDst[2] + dstStep[2] * pos;
147
0
    }
148
149
0
    memcpy(pX, Ya, nWidth);
150
0
  }
151
152
  /* B6 and B7 */
153
0
  for (UINT32 y = 0; y < halfHeight; y++)
154
0
  {
155
0
    const UINT32 val2y = (y * 2UL + evenY);
156
0
    const BYTE* Ua = pSrc[1] + 1ULL * y * srcStep[1];
157
0
    const BYTE* Va = pSrc[2] + 1ULL * y * srcStep[2];
158
0
    BYTE* pU = pDst[1] + 1ULL * dstStep[1] * val2y;
159
0
    BYTE* pV = pDst[2] + 1ULL * dstStep[2] * val2y;
160
161
0
    for (UINT32 x = 0; x < halfWidth; x++)
162
0
    {
163
0
      const UINT32 val2x1 = (x * 2 + oddX);
164
0
      pU[val2x1] = Ua[x];
165
0
      pV[val2x1] = Va[x];
166
0
    }
167
0
  }
168
169
0
  return PRIMITIVES_SUCCESS;
170
0
}
171
172
static inline pstatus_t general_ChromaV2ToYUV444(const BYTE* WINPR_RESTRICT pSrc[3],
173
                                                 const UINT32 srcStep[3], UINT32 nTotalWidth,
174
                                                 WINPR_ATTR_UNUSED UINT32 nTotalHeight,
175
                                                 BYTE* WINPR_RESTRICT pDst[3],
176
                                                 const UINT32 dstStep[3],
177
                                                 const RECTANGLE_16* WINPR_RESTRICT roi)
178
0
{
179
0
  const UINT32 nWidth = roi->right - roi->left;
180
0
  const UINT32 nHeight = roi->bottom - roi->top;
181
0
  const UINT32 halfWidth = (nWidth + 1) / 2;
182
0
  const UINT32 halfHeight = (nHeight + 1) / 2;
183
0
  const UINT32 quaterWidth = (nWidth + 3) / 4;
184
185
  /* B4 and B5: odd UV values for width/2, height */
186
0
  for (UINT32 y = 0; y < nHeight; y++)
187
0
  {
188
0
    const UINT32 yTop = y + roi->top;
189
0
    const BYTE* pYaU = pSrc[0] + 1ULL * srcStep[0] * yTop + roi->left / 2;
190
0
    const BYTE* pYaV = pYaU + nTotalWidth / 2;
191
0
    BYTE* pU = pDst[1] + 1ULL * dstStep[1] * yTop + roi->left;
192
0
    BYTE* pV = pDst[2] + 1ULL * dstStep[2] * yTop + roi->left;
193
194
0
    for (UINT32 x = 0; x < halfWidth; x++)
195
0
    {
196
0
      const UINT32 odd = 2UL * x + 1UL;
197
0
      pU[odd] = *pYaU++;
198
0
      pV[odd] = *pYaV++;
199
0
    }
200
0
  }
201
202
  /* B6 - B9 */
203
0
  for (size_t y = 0; y < halfHeight; y++)
204
0
  {
205
0
    const BYTE* pUaU = pSrc[1] + srcStep[1] * (y + roi->top / 2) + roi->left / 4;
206
0
    const BYTE* pUaV = pUaU + nTotalWidth / 4;
207
0
    const BYTE* pVaU = pSrc[2] + srcStep[2] * (y + roi->top / 2) + roi->left / 4;
208
0
    const BYTE* pVaV = pVaU + nTotalWidth / 4;
209
0
    BYTE* pU = pDst[1] + 1ULL * dstStep[1] * (2ULL * y + 1 + roi->top) + roi->left;
210
0
    BYTE* pV = pDst[2] + 1ULL * dstStep[2] * (2ULL * y + 1 + roi->top) + roi->left;
211
212
0
    for (size_t x = 0; x < quaterWidth; x++)
213
0
    {
214
0
      pU[4 * x + 0] = *pUaU++;
215
0
      pV[4 * x + 0] = *pUaV++;
216
0
      pU[4 * x + 2] = *pVaU++;
217
0
      pV[4 * x + 2] = *pVaV++;
218
0
    }
219
0
  }
220
221
0
  return PRIMITIVES_SUCCESS;
222
0
}
223
224
static pstatus_t general_YUV420CombineToYUV444(avc444_frame_type type,
225
                                               const BYTE* WINPR_RESTRICT pSrc[3],
226
                                               const UINT32 srcStep[3], UINT32 nWidth,
227
                                               UINT32 nHeight, BYTE* WINPR_RESTRICT pDst[3],
228
                                               const UINT32 dstStep[3],
229
                                               const RECTANGLE_16* WINPR_RESTRICT roi)
230
0
{
231
0
  if (!pSrc || !pSrc[0] || !pSrc[1] || !pSrc[2])
232
0
    return -1;
233
234
0
  if (!pDst || !pDst[0] || !pDst[1] || !pDst[2])
235
0
    return -1;
236
237
0
  if (!roi)
238
0
    return -1;
239
240
0
  switch (type)
241
0
  {
242
0
    case AVC444_LUMA:
243
0
      return general_LumaToYUV444(pSrc, srcStep, pDst, dstStep, roi);
244
245
0
    case AVC444_CHROMAv1:
246
0
      return general_ChromaV1ToYUV444(pSrc, srcStep, pDst, dstStep, roi);
247
248
0
    case AVC444_CHROMAv2:
249
0
      return general_ChromaV2ToYUV444(pSrc, srcStep, nWidth, nHeight, pDst, dstStep, roi);
250
251
0
    default:
252
0
      return -1;
253
0
  }
254
0
}
255
256
static pstatus_t
257
general_YUV444SplitToYUV420(const BYTE* WINPR_RESTRICT pSrc[3], const UINT32 srcStep[3],
258
                            BYTE* WINPR_RESTRICT pMainDst[3], const UINT32 dstMainStep[3],
259
                            BYTE* WINPR_RESTRICT pAuxDst[3], const UINT32 dstAuxStep[3],
260
                            const prim_size_t* WINPR_RESTRICT roi)
261
0
{
262
0
  UINT32 uY = 0;
263
0
  UINT32 vY = 0;
264
265
  /* The auxiliary frame is aligned to multiples of 16x16.
266
   * We need the padded height for B4 and B5 conversion. */
267
0
  const UINT32 padHeigth = roi->height + 16 - roi->height % 16;
268
0
  const UINT32 halfWidth = (roi->width + 1) / 2;
269
0
  const UINT32 halfHeight = (roi->height + 1) / 2;
270
271
  /* B1 */
272
0
  for (size_t y = 0; y < roi->height; y++)
273
0
  {
274
0
    const BYTE* pSrcY = pSrc[0] + y * srcStep[0];
275
0
    BYTE* pY = pMainDst[0] + y * dstMainStep[0];
276
0
    memcpy(pY, pSrcY, roi->width);
277
0
  }
278
279
  /* B2 and B3 */
280
0
  for (size_t y = 0; y < halfHeight; y++)
281
0
  {
282
0
    const BYTE* pSrcU = pSrc[1] + 2ULL * y * srcStep[1];
283
0
    const BYTE* pSrcV = pSrc[2] + 2ULL * y * srcStep[2];
284
0
    BYTE* pU = pMainDst[1] + y * dstMainStep[1];
285
0
    BYTE* pV = pMainDst[2] + y * dstMainStep[2];
286
287
0
    for (size_t x = 0; x < halfWidth; x++)
288
0
    {
289
0
      pU[x] = pSrcV[2 * x];
290
0
      pV[x] = pSrcU[2 * x];
291
0
    }
292
0
  }
293
294
  /* B4 and B5 */
295
0
  for (size_t y = 0; y < padHeigth; y++)
296
0
  {
297
0
    BYTE* pY = pAuxDst[0] + y * dstAuxStep[0];
298
299
0
    if (y % 16 < 8)
300
0
    {
301
0
      const size_t pos = (2 * uY++ + 1);
302
0
      const BYTE* pSrcU = pSrc[1] + pos * srcStep[1];
303
304
0
      if (pos >= roi->height)
305
0
        continue;
306
307
0
      memcpy(pY, pSrcU, roi->width);
308
0
    }
309
0
    else
310
0
    {
311
0
      const size_t pos = (2 * vY++ + 1);
312
0
      const BYTE* pSrcV = pSrc[2] + pos * srcStep[2];
313
314
0
      if (pos >= roi->height)
315
0
        continue;
316
317
0
      memcpy(pY, pSrcV, roi->width);
318
0
    }
319
0
  }
320
321
  /* B6 and B7 */
322
0
  for (size_t y = 0; y < halfHeight; y++)
323
0
  {
324
0
    const BYTE* pSrcU = pSrc[1] + 2 * y * srcStep[1];
325
0
    const BYTE* pSrcV = pSrc[2] + 2 * y * srcStep[2];
326
0
    BYTE* pU = pAuxDst[1] + y * dstAuxStep[1];
327
0
    BYTE* pV = pAuxDst[2] + y * dstAuxStep[2];
328
329
0
    for (size_t x = 0; x < halfWidth; x++)
330
0
    {
331
0
      pU[x] = pSrcU[2 * x + 1];
332
0
      pV[x] = pSrcV[2 * x + 1];
333
0
    }
334
0
  }
335
336
0
  return PRIMITIVES_SUCCESS;
337
0
}
338
339
static inline void general_YUV444ToRGB_DOUBLE_ROW(BYTE* WINPR_RESTRICT pRGB[2], UINT32 DstFormat,
340
                                                  const BYTE* WINPR_RESTRICT pY[2],
341
                                                  const BYTE* WINPR_RESTRICT pU[2],
342
                                                  const BYTE* WINPR_RESTRICT pV[2], size_t nWidth)
343
0
{
344
0
  fkt_writePixel writePixel = getPixelWriteFunction(DstFormat, FALSE);
345
346
0
  WINPR_ASSERT(nWidth % 2 == 0);
347
0
  for (size_t x = 0; x < nWidth; x += 2)
348
0
  {
349
0
    for (size_t i = 0; i < 2; i++)
350
0
    {
351
0
      for (size_t j = 0; j < 2; j++)
352
0
      {
353
0
        const BYTE y = pY[i][x + j];
354
0
        INT32 u = pU[i][x + j];
355
0
        INT32 v = pV[i][x + j];
356
0
        if ((i == 0) && (j == 0))
357
0
        {
358
0
          const INT32 subU = (INT32)pU[0][x + 1] + pU[1][x] + pU[1][x + 1];
359
0
          const INT32 avgU = ((4 * u) - subU);
360
0
          u = CONDITIONAL_CLIP(avgU, WINPR_ASSERTING_INT_CAST(BYTE, u));
361
362
0
          const INT32 subV = (INT32)pV[0][x + 1] + pV[1][x] + pV[1][x + 1];
363
0
          const INT32 avgV = ((4 * v) - subV);
364
0
          v = CONDITIONAL_CLIP(avgV, WINPR_ASSERTING_INT_CAST(BYTE, v));
365
0
        }
366
0
        pRGB[i] = writeYUVPixel(pRGB[i], DstFormat, y, u, v, writePixel);
367
0
      }
368
0
    }
369
0
  }
370
0
}
371
372
static inline void general_YUV444ToRGB_SINGLE_ROW(BYTE* WINPR_RESTRICT pRGB, UINT32 DstFormat,
373
                                                  const BYTE* WINPR_RESTRICT pY,
374
                                                  const BYTE* WINPR_RESTRICT pU,
375
                                                  const BYTE* WINPR_RESTRICT pV, size_t nWidth)
376
0
{
377
0
  fkt_writePixel writePixel = getPixelWriteFunction(DstFormat, FALSE);
378
379
0
  WINPR_ASSERT(nWidth % 2 == 0);
380
0
  for (size_t x = 0; x < nWidth; x += 2)
381
0
  {
382
0
    for (size_t j = 0; j < 2; j++)
383
0
    {
384
0
      const BYTE y = pY[x + j];
385
0
      const BYTE u = pU[x + j];
386
0
      const BYTE v = pV[x + j];
387
0
      pRGB = writeYUVPixel(pRGB, DstFormat, y, u, v, writePixel);
388
0
    }
389
0
  }
390
0
}
391
392
static inline pstatus_t general_YUV444ToRGB_8u_P3AC4R_general(const BYTE* WINPR_RESTRICT pSrc[3],
393
                                                              const UINT32 srcStep[3],
394
                                                              BYTE* WINPR_RESTRICT pDst,
395
                                                              UINT32 dstStep, UINT32 DstFormat,
396
                                                              const prim_size_t* WINPR_RESTRICT roi)
397
0
{
398
0
  WINPR_ASSERT(pSrc);
399
0
  WINPR_ASSERT(pDst);
400
0
  WINPR_ASSERT(roi);
401
402
0
  const UINT32 nWidth = roi->width;
403
0
  const UINT32 nHeight = roi->height;
404
405
0
  size_t y = 0;
406
0
  for (; y < nHeight - nHeight % 2; y += 2)
407
0
  {
408
0
    const BYTE* WINPR_RESTRICT pY[2] = { pSrc[0] + y * srcStep[0],
409
0
                                       pSrc[0] + (y + 1) * srcStep[0] };
410
0
    const BYTE* WINPR_RESTRICT pU[2] = { pSrc[1] + y * srcStep[1],
411
0
                                       pSrc[1] + (y + 1) * srcStep[1] };
412
0
    const BYTE* WINPR_RESTRICT pV[2] = { pSrc[2] + y * srcStep[2],
413
0
                                       pSrc[2] + (y + 1) * srcStep[2] };
414
0
    BYTE* WINPR_RESTRICT pRGB[] = { pDst + y * dstStep, pDst + (y + 1) * dstStep };
415
416
0
    general_YUV444ToRGB_DOUBLE_ROW(pRGB, DstFormat, pY, pU, pV, nWidth);
417
0
  }
418
0
  for (; y < nHeight; y++)
419
0
  {
420
0
    const BYTE* WINPR_RESTRICT pY = pSrc[0] + y * srcStep[0];
421
0
    const BYTE* WINPR_RESTRICT pU = pSrc[1] + y * srcStep[1];
422
0
    const BYTE* WINPR_RESTRICT pV = pSrc[2] + y * srcStep[2];
423
0
    BYTE* WINPR_RESTRICT pRGB = pDst + y * dstStep;
424
425
0
    general_YUV444ToRGB_SINGLE_ROW(pRGB, DstFormat, pY, pU, pV, nWidth);
426
0
  }
427
428
0
  return PRIMITIVES_SUCCESS;
429
0
}
430
431
static inline void general_YUV444ToBGRX_DOUBLE_ROW(BYTE* WINPR_RESTRICT pRGB[2], UINT32 DstFormat,
432
                                                   const BYTE* WINPR_RESTRICT pY[2],
433
                                                   const BYTE* WINPR_RESTRICT pU[2],
434
                                                   const BYTE* WINPR_RESTRICT pV[2], size_t nWidth)
435
0
{
436
0
  WINPR_ASSERT(nWidth % 2 == 0);
437
0
  for (size_t x = 0; x < nWidth; x += 2)
438
0
  {
439
0
    const INT32 subU = pU[0][x + 1] + pU[1][x] + pU[1][x + 1];
440
0
    const INT32 avgU = ((4 * pU[0][x]) - subU);
441
0
    const BYTE useU = CONDITIONAL_CLIP(avgU, pU[0][x]);
442
0
    const INT32 subV = pV[0][x + 1] + pV[1][x] + pV[1][x + 1];
443
0
    const INT32 avgV = ((4 * pV[0][x]) - subV);
444
0
    const BYTE useV = CONDITIONAL_CLIP(avgV, pV[0][x]);
445
446
0
    const BYTE U[2][2] = { { useU, pU[0][x + 1] }, { pU[1][x], pU[1][x + 1] } };
447
0
    const BYTE V[2][2] = { { useV, pV[0][x + 1] }, { pV[1][x], pV[1][x + 1] } };
448
449
0
    for (size_t i = 0; i < 2; i++)
450
0
    {
451
0
      for (size_t j = 0; j < 2; j++)
452
0
      {
453
0
        const BYTE y = pY[i][x + j];
454
0
        const BYTE u = U[i][j];
455
0
        const BYTE v = V[i][j];
456
0
        pRGB[i] = writeYUVPixel(pRGB[i], DstFormat, y, u, v, writePixelBGRX);
457
0
      }
458
0
    }
459
0
  }
460
0
}
461
462
static inline void general_YUV444ToBGRX_SINGLE_ROW(BYTE* WINPR_RESTRICT pRGB, UINT32 DstFormat,
463
                                                   const BYTE* WINPR_RESTRICT pY,
464
                                                   const BYTE* WINPR_RESTRICT pU,
465
                                                   const BYTE* WINPR_RESTRICT pV, size_t nWidth)
466
0
{
467
0
  WINPR_ASSERT(nWidth % 2 == 0);
468
0
  for (size_t x = 0; x < nWidth; x += 2)
469
0
  {
470
0
    for (size_t j = 0; j < 2; j++)
471
0
    {
472
0
      const BYTE Y = pY[x + j];
473
0
      const BYTE U = pU[x + j];
474
0
      const BYTE V = pV[x + j];
475
0
      pRGB = writeYUVPixel(pRGB, DstFormat, Y, U, V, writePixelBGRX);
476
0
    }
477
0
  }
478
0
}
479
480
static inline pstatus_t general_YUV444ToRGB_8u_P3AC4R_BGRX(const BYTE* WINPR_RESTRICT pSrc[3],
481
                                                           const UINT32 srcStep[3],
482
                                                           BYTE* WINPR_RESTRICT pDst,
483
                                                           UINT32 dstStep, UINT32 DstFormat,
484
                                                           const prim_size_t* WINPR_RESTRICT roi)
485
0
{
486
0
  WINPR_ASSERT(pSrc);
487
0
  WINPR_ASSERT(pDst);
488
0
  WINPR_ASSERT(roi);
489
490
0
  const UINT32 nWidth = roi->width;
491
0
  const UINT32 nHeight = roi->height;
492
493
0
  size_t y = 0;
494
0
  for (; y < nHeight - nHeight % 2; y += 2)
495
0
  {
496
0
    const BYTE* pY[2] = { pSrc[0] + y * srcStep[0], pSrc[0] + (y + 1) * srcStep[0] };
497
0
    const BYTE* pU[2] = { pSrc[1] + y * srcStep[1], pSrc[1] + (y + 1) * srcStep[1] };
498
0
    const BYTE* pV[2] = { pSrc[2] + y * srcStep[2], pSrc[2] + (y + 1) * srcStep[2] };
499
0
    BYTE* pRGB[] = { pDst + y * dstStep, pDst + (y + 1) * dstStep };
500
501
0
    general_YUV444ToBGRX_DOUBLE_ROW(pRGB, DstFormat, pY, pU, pV, nWidth);
502
0
  }
503
504
0
  for (; y < nHeight; y++)
505
0
  {
506
0
    const BYTE* WINPR_RESTRICT pY = pSrc[0] + y * srcStep[0];
507
0
    const BYTE* WINPR_RESTRICT pU = pSrc[1] + y * srcStep[1];
508
0
    const BYTE* WINPR_RESTRICT pV = pSrc[2] + y * srcStep[2];
509
0
    BYTE* WINPR_RESTRICT pRGB = pDst + y * dstStep;
510
511
0
    general_YUV444ToBGRX_SINGLE_ROW(pRGB, DstFormat, pY, pU, pV, nWidth);
512
0
  }
513
0
  return PRIMITIVES_SUCCESS;
514
0
}
515
516
static pstatus_t general_YUV444ToRGB_8u_P3AC4R(const BYTE* WINPR_RESTRICT pSrc[3],
517
                                               const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst,
518
                                               UINT32 dstStep, UINT32 DstFormat,
519
                                               const prim_size_t* WINPR_RESTRICT roi)
520
0
{
521
0
  switch (DstFormat)
522
0
  {
523
0
    case PIXEL_FORMAT_BGRA32:
524
0
    case PIXEL_FORMAT_BGRX32:
525
0
      return general_YUV444ToRGB_8u_P3AC4R_BGRX(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
526
527
0
    default:
528
0
      return general_YUV444ToRGB_8u_P3AC4R_general(pSrc, srcStep, pDst, dstStep, DstFormat,
529
0
                                                   roi);
530
0
  }
531
0
}
532
/**
533
 * | R |   ( | 256     0    403 | |    Y    | )
534
 * | G | = ( | 256   -48   -120 | | U - 128 | ) >> 8
535
 * | B |   ( | 256   475      0 | | V - 128 | )
536
 */
537
static void general_YUV420ToRGB_8u_P3AC4R_double_line(BYTE* WINPR_RESTRICT pEven,
538
                                                      BYTE* WINPR_RESTRICT pOdd, UINT32 DstFormat,
539
                                                      const BYTE* WINPR_RESTRICT pYeven,
540
                                                      const BYTE* WINPR_RESTRICT pYodd,
541
                                                      const BYTE* WINPR_RESTRICT pU,
542
                                                      const BYTE* WINPR_RESTRICT pV, UINT32 width,
543
                                                      fkt_writePixel writePixel, UINT32 formatSize)
544
0
{
545
546
0
  UINT32 x = 0;
547
0
  for (; x < width / 2; x++)
548
0
  {
549
0
    const BYTE U = pU[x];
550
0
    const BYTE V = pV[x];
551
0
    const BYTE eY0 = pYeven[2ULL * x + 0];
552
0
    const BYTE eY1 = pYeven[2ULL * x + 1];
553
0
    writeYUVPixel(&pEven[2ULL * x * formatSize], DstFormat, eY0, U, V, writePixel);
554
0
    writeYUVPixel(&pEven[(2ULL * x + 1) * formatSize], DstFormat, eY1, U, V, writePixel);
555
556
0
    const BYTE oY0 = pYodd[2ULL * x + 0];
557
0
    const BYTE oY1 = pYodd[2ULL * x + 1];
558
0
    writeYUVPixel(&pOdd[2ULL * x * formatSize], DstFormat, oY0, U, V, writePixel);
559
0
    writeYUVPixel(&pOdd[(2ULL * x + 1) * formatSize], DstFormat, oY1, U, V, writePixel);
560
0
  }
561
562
0
  for (; x < (width + 1) / 2; x++)
563
0
  {
564
0
    const BYTE U = pU[x];
565
0
    const BYTE V = pV[x];
566
0
    const BYTE eY0 = pYeven[2ULL * x + 0];
567
0
    writeYUVPixel(&pEven[2ULL * x * formatSize], DstFormat, eY0, U, V, writePixel);
568
569
0
    const BYTE oY0 = pYodd[2ULL * x + 0];
570
0
    writeYUVPixel(&pOdd[2ULL * x * formatSize], DstFormat, oY0, U, V, writePixel);
571
0
  }
572
0
}
573
574
static void general_YUV420ToRGB_8u_P3AC4R_single_line(BYTE* WINPR_RESTRICT pEven, UINT32 DstFormat,
575
                                                      const BYTE* WINPR_RESTRICT pYeven,
576
                                                      const BYTE* WINPR_RESTRICT pU,
577
                                                      const BYTE* WINPR_RESTRICT pV, UINT32 width,
578
                                                      fkt_writePixel writePixel, UINT32 formatSize)
579
0
{
580
581
0
  UINT32 x = 0;
582
0
  for (; x < width / 2; x++)
583
0
  {
584
0
    const BYTE U = pU[x];
585
0
    const BYTE V = pV[x];
586
0
    const BYTE eY0 = pYeven[2ULL * x + 0];
587
0
    const BYTE eY1 = pYeven[2ULL * x + 1];
588
0
    writeYUVPixel(&pEven[2ULL * x * formatSize], DstFormat, eY0, U, V, writePixel);
589
0
    writeYUVPixel(&pEven[(2ULL * x + 1) * formatSize], DstFormat, eY1, U, V, writePixel);
590
0
  }
591
592
0
  for (; x < (width + 1) / 2; x++)
593
0
  {
594
0
    const BYTE U = pU[x];
595
0
    const BYTE V = pV[x];
596
0
    const BYTE eY0 = pYeven[2ULL * x + 0];
597
0
    writeYUVPixel(&pEven[2ULL * x * formatSize], DstFormat, eY0, U, V, writePixel);
598
0
  }
599
0
}
600
601
static pstatus_t general_YUV420ToRGB_8u_P3AC4R(const BYTE* WINPR_RESTRICT pSrc[3],
602
                                               const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst,
603
                                               UINT32 dstStep, UINT32 DstFormat,
604
                                               const prim_size_t* WINPR_RESTRICT roi)
605
0
{
606
0
  WINPR_ASSERT(roi);
607
0
  const DWORD formatSize = FreeRDPGetBytesPerPixel(DstFormat);
608
0
  fkt_writePixel writePixel = getPixelWriteFunction(DstFormat, FALSE);
609
0
  const UINT32 nWidth = roi->width;
610
0
  const UINT32 nHeight = roi->height;
611
612
0
  UINT32 y = 0;
613
0
  for (; y < nHeight / 2; y++)
614
0
  {
615
0
    const BYTE* pYe = &pSrc[0][(2ULL * y + 0) * srcStep[0]];
616
0
    const BYTE* pYo = &pSrc[0][(2ULL * y + 1) * srcStep[0]];
617
0
    const BYTE* pU = &pSrc[1][1ULL * srcStep[1] * y];
618
0
    const BYTE* pV = &pSrc[2][1ULL * srcStep[2] * y];
619
0
    BYTE* pRGBeven = &pDst[2ULL * y * dstStep];
620
0
    BYTE* pRGBodd = &pDst[(2ULL * y + 1) * dstStep];
621
0
    general_YUV420ToRGB_8u_P3AC4R_double_line(pRGBeven, pRGBodd, DstFormat, pYe, pYo, pU, pV,
622
0
                                              nWidth, writePixel, formatSize);
623
0
  }
624
625
  // Last row (if odd)
626
0
  for (; y < (nHeight + 1) / 2; y++)
627
0
  {
628
0
    const BYTE* pY = &pSrc[0][2ULL * srcStep[0] * y];
629
0
    const BYTE* pU = &pSrc[1][1ULL * srcStep[1] * y];
630
0
    const BYTE* pV = &pSrc[2][1ULL * srcStep[2] * y];
631
0
    BYTE* pEven = &pDst[2ULL * y * dstStep];
632
633
0
    general_YUV420ToRGB_8u_P3AC4R_single_line(pEven, DstFormat, pY, pU, pV, nWidth, writePixel,
634
0
                                              formatSize);
635
0
  }
636
637
0
  return PRIMITIVES_SUCCESS;
638
0
}
639
640
static inline void BGRX_fillYUV(size_t offset, const BYTE* WINPR_RESTRICT pRGB[2],
641
                                BYTE* WINPR_RESTRICT pY[2], BYTE* WINPR_RESTRICT pU[2],
642
                                BYTE* WINPR_RESTRICT pV[2])
643
0
{
644
0
  WINPR_ASSERT(pRGB);
645
0
  WINPR_ASSERT(pY);
646
0
  WINPR_ASSERT(pU);
647
0
  WINPR_ASSERT(pV);
648
649
0
  const UINT32 SrcFormat = PIXEL_FORMAT_BGRX32;
650
0
  const UINT32 bpp = 4;
651
652
0
  for (size_t i = 0; i < 2; i++)
653
0
  {
654
0
    for (size_t j = 0; j < 2; j++)
655
0
    {
656
0
      BYTE B = 0;
657
0
      BYTE G = 0;
658
0
      BYTE R = 0;
659
0
      const UINT32 color = FreeRDPReadColor(&pRGB[i][(offset + j) * bpp], SrcFormat);
660
0
      FreeRDPSplitColor(color, SrcFormat, &R, &G, &B, NULL, NULL);
661
0
      pY[i][offset + j] = RGB2Y(R, G, B);
662
0
      pU[i][offset + j] = RGB2U(R, G, B);
663
0
      pV[i][offset + j] = RGB2V(R, G, B);
664
0
    }
665
0
  }
666
667
  /* Apply chroma filter */
668
0
  const INT32 avgU = (pU[0][offset] + pU[0][offset + 1] + pU[1][offset] + pU[1][offset + 1]) / 4;
669
0
  pU[0][offset] = CONDITIONAL_CLIP(avgU, pU[0][offset]);
670
0
  const INT32 avgV = (pV[0][offset] + pV[0][offset + 1] + pV[1][offset] + pV[1][offset + 1]) / 4;
671
0
  pV[0][offset] = CONDITIONAL_CLIP(avgV, pV[0][offset]);
672
0
}
673
674
static inline void BGRX_fillYUV_single(size_t offset, const BYTE* WINPR_RESTRICT pRGB,
675
                                       BYTE* WINPR_RESTRICT pY, BYTE* WINPR_RESTRICT pU,
676
                                       BYTE* WINPR_RESTRICT pV)
677
0
{
678
0
  WINPR_ASSERT(pRGB);
679
0
  WINPR_ASSERT(pY);
680
0
  WINPR_ASSERT(pU);
681
0
  WINPR_ASSERT(pV);
682
683
0
  const UINT32 SrcFormat = PIXEL_FORMAT_BGRX32;
684
0
  const UINT32 bpp = 4;
685
686
0
  for (size_t j = 0; j < 2; j++)
687
0
  {
688
0
    BYTE B = 0;
689
0
    BYTE G = 0;
690
0
    BYTE R = 0;
691
0
    const UINT32 color = FreeRDPReadColor(&pRGB[(offset + j) * bpp], SrcFormat);
692
0
    FreeRDPSplitColor(color, SrcFormat, &R, &G, &B, NULL, NULL);
693
0
    pY[offset + j] = RGB2Y(R, G, B);
694
0
    pU[offset + j] = RGB2U(R, G, B);
695
0
    pV[offset + j] = RGB2V(R, G, B);
696
0
  }
697
0
}
698
699
static inline void general_BGRXToYUV444_DOUBLE_ROW(const BYTE* WINPR_RESTRICT pRGB[2],
700
                                                   BYTE* WINPR_RESTRICT pY[2],
701
                                                   BYTE* WINPR_RESTRICT pU[2],
702
                                                   BYTE* WINPR_RESTRICT pV[2], UINT32 nWidth)
703
0
{
704
705
0
  WINPR_ASSERT((nWidth % 2) == 0);
706
0
  for (size_t x = 0; x < nWidth; x += 2)
707
0
  {
708
0
    BGRX_fillYUV(x, pRGB, pY, pU, pV);
709
0
  }
710
0
}
711
712
static inline void general_BGRXToYUV444_SINGLE_ROW(const BYTE* WINPR_RESTRICT pRGB,
713
                                                   BYTE* WINPR_RESTRICT pY, BYTE* WINPR_RESTRICT pU,
714
                                                   BYTE* WINPR_RESTRICT pV, UINT32 nWidth)
715
0
{
716
717
0
  WINPR_ASSERT((nWidth % 2) == 0);
718
0
  for (size_t x = 0; x < nWidth; x += 2)
719
0
  {
720
0
    BGRX_fillYUV_single(x, pRGB, pY, pU, pV);
721
0
  }
722
0
}
723
724
static inline pstatus_t general_RGBToYUV444_8u_P3AC4R_BGRX(const BYTE* WINPR_RESTRICT pSrc,
725
                                                           const UINT32 srcStep,
726
                                                           BYTE* WINPR_RESTRICT pDst[3],
727
                                                           const UINT32 dstStep[3],
728
                                                           const prim_size_t* WINPR_RESTRICT roi)
729
0
{
730
0
  const UINT32 nWidth = roi->width;
731
0
  const UINT32 nHeight = roi->height;
732
733
0
  size_t y = 0;
734
0
  for (; y < nHeight - nHeight % 2; y += 2)
735
0
  {
736
0
    const BYTE* pRGB[] = { pSrc + y * srcStep, pSrc + (y + 1) * srcStep };
737
0
    BYTE* pY[] = { pDst[0] + y * dstStep[0], pDst[0] + (y + 1) * dstStep[0] };
738
0
    BYTE* pU[] = { pDst[1] + y * dstStep[1], pDst[1] + (y + 1) * dstStep[1] };
739
0
    BYTE* pV[] = { pDst[2] + y * dstStep[2], pDst[2] + (y + 1) * dstStep[2] };
740
741
0
    general_BGRXToYUV444_DOUBLE_ROW(pRGB, pY, pU, pV, nWidth);
742
0
  }
743
744
0
  for (; y < nHeight; y++)
745
0
  {
746
0
    const BYTE* pRGB = pSrc + y * srcStep;
747
0
    BYTE* pY = pDst[0] + y * dstStep[0];
748
0
    BYTE* pU = pDst[1] + y * dstStep[1];
749
0
    BYTE* pV = pDst[2] + y * dstStep[2];
750
751
0
    general_BGRXToYUV444_SINGLE_ROW(pRGB, pY, pU, pV, nWidth);
752
0
  }
753
754
0
  return PRIMITIVES_SUCCESS;
755
0
}
756
757
static inline void fillYUV(size_t offset, const BYTE* WINPR_RESTRICT pRGB[2], UINT32 SrcFormat,
758
                           BYTE* WINPR_RESTRICT pY[2], BYTE* WINPR_RESTRICT pU[2],
759
                           BYTE* WINPR_RESTRICT pV[2])
760
0
{
761
0
  WINPR_ASSERT(pRGB);
762
0
  WINPR_ASSERT(pY);
763
0
  WINPR_ASSERT(pU);
764
0
  WINPR_ASSERT(pV);
765
0
  const UINT32 bpp = FreeRDPGetBytesPerPixel(SrcFormat);
766
767
0
  INT32 avgU = 0;
768
0
  INT32 avgV = 0;
769
0
  for (size_t i = 0; i < 2; i++)
770
0
  {
771
0
    for (size_t j = 0; j < 2; j++)
772
0
    {
773
0
      BYTE B = 0;
774
0
      BYTE G = 0;
775
0
      BYTE R = 0;
776
0
      const UINT32 color = FreeRDPReadColor(&pRGB[i][(offset + j) * bpp], SrcFormat);
777
0
      FreeRDPSplitColor(color, SrcFormat, &R, &G, &B, NULL, NULL);
778
0
      const BYTE y = RGB2Y(R, G, B);
779
0
      const BYTE u = RGB2U(R, G, B);
780
0
      const BYTE v = RGB2V(R, G, B);
781
0
      avgU += u;
782
0
      avgV += v;
783
0
      pY[i][offset + j] = y;
784
0
      pU[i][offset + j] = u;
785
0
      pV[i][offset + j] = v;
786
0
    }
787
0
  }
788
789
  /* Apply chroma filter */
790
0
  avgU /= 4;
791
0
  pU[0][offset] = CLIP(avgU);
792
793
0
  avgV /= 4;
794
0
  pV[0][offset] = CLIP(avgV);
795
0
}
796
797
static inline void fillYUV_single(size_t offset, const BYTE* WINPR_RESTRICT pRGB, UINT32 SrcFormat,
798
                                  BYTE* WINPR_RESTRICT pY, BYTE* WINPR_RESTRICT pU,
799
                                  BYTE* WINPR_RESTRICT pV)
800
0
{
801
0
  WINPR_ASSERT(pRGB);
802
0
  WINPR_ASSERT(pY);
803
0
  WINPR_ASSERT(pU);
804
0
  WINPR_ASSERT(pV);
805
0
  const UINT32 bpp = FreeRDPGetBytesPerPixel(SrcFormat);
806
807
0
  for (size_t j = 0; j < 2; j++)
808
0
  {
809
0
    BYTE B = 0;
810
0
    BYTE G = 0;
811
0
    BYTE R = 0;
812
0
    const UINT32 color = FreeRDPReadColor(&pRGB[(offset + j) * bpp], SrcFormat);
813
0
    FreeRDPSplitColor(color, SrcFormat, &R, &G, &B, NULL, NULL);
814
0
    const BYTE y = RGB2Y(R, G, B);
815
0
    const BYTE u = RGB2U(R, G, B);
816
0
    const BYTE v = RGB2V(R, G, B);
817
0
    pY[offset + j] = y;
818
0
    pU[offset + j] = u;
819
0
    pV[offset + j] = v;
820
0
  }
821
0
}
822
823
static inline void general_RGBToYUV444_DOUBLE_ROW(const BYTE* WINPR_RESTRICT pRGB[2],
824
                                                  UINT32 SrcFormat, BYTE* WINPR_RESTRICT pY[2],
825
                                                  BYTE* WINPR_RESTRICT pU[2],
826
                                                  BYTE* WINPR_RESTRICT pV[2], UINT32 nWidth)
827
0
{
828
829
0
  WINPR_ASSERT((nWidth % 2) == 0);
830
0
  for (size_t x = 0; x < nWidth; x += 2)
831
0
  {
832
0
    fillYUV(x, pRGB, SrcFormat, pY, pU, pV);
833
0
  }
834
0
}
835
836
static inline void general_RGBToYUV444_SINGLE_ROW(const BYTE* WINPR_RESTRICT pRGB, UINT32 SrcFormat,
837
                                                  BYTE* WINPR_RESTRICT pY, BYTE* WINPR_RESTRICT pU,
838
                                                  BYTE* WINPR_RESTRICT pV, UINT32 nWidth)
839
0
{
840
841
0
  WINPR_ASSERT((nWidth % 2) == 0);
842
0
  for (size_t x = 0; x < nWidth; x += 2)
843
0
  {
844
0
    fillYUV_single(x, pRGB, SrcFormat, pY, pU, pV);
845
0
  }
846
0
}
847
848
static inline pstatus_t general_RGBToYUV444_8u_P3AC4R_RGB(const BYTE* WINPR_RESTRICT pSrc,
849
                                                          UINT32 SrcFormat, const UINT32 srcStep,
850
                                                          BYTE* WINPR_RESTRICT pDst[3],
851
                                                          const UINT32 dstStep[3],
852
                                                          const prim_size_t* WINPR_RESTRICT roi)
853
0
{
854
0
  const UINT32 nWidth = roi->width;
855
0
  const UINT32 nHeight = roi->height;
856
857
0
  size_t y = 0;
858
0
  for (; y < nHeight - nHeight % 2; y += 2)
859
0
  {
860
0
    const BYTE* pRGB[] = { pSrc + y * srcStep, pSrc + (y + 1) * srcStep };
861
0
    BYTE* pY[] = { &pDst[0][y * dstStep[0]], &pDst[0][(y + 1) * dstStep[0]] };
862
0
    BYTE* pU[] = { &pDst[1][y * dstStep[1]], &pDst[1][(y + 1) * dstStep[1]] };
863
0
    BYTE* pV[] = { &pDst[2][y * dstStep[2]], &pDst[2][(y + 1) * dstStep[2]] };
864
865
0
    general_RGBToYUV444_DOUBLE_ROW(pRGB, SrcFormat, pY, pU, pV, nWidth);
866
0
  }
867
0
  for (; y < nHeight; y++)
868
0
  {
869
0
    const BYTE* pRGB = pSrc + y * srcStep;
870
0
    BYTE* pY = &pDst[0][y * dstStep[0]];
871
0
    BYTE* pU = &pDst[1][y * dstStep[1]];
872
0
    BYTE* pV = &pDst[2][y * dstStep[2]];
873
874
0
    general_RGBToYUV444_SINGLE_ROW(pRGB, SrcFormat, pY, pU, pV, nWidth);
875
0
  }
876
877
0
  return PRIMITIVES_SUCCESS;
878
0
}
879
880
static pstatus_t general_RGBToYUV444_8u_P3AC4R(const BYTE* WINPR_RESTRICT pSrc, UINT32 SrcFormat,
881
                                               const UINT32 srcStep, BYTE* WINPR_RESTRICT pDst[3],
882
                                               const UINT32 dstStep[3],
883
                                               const prim_size_t* WINPR_RESTRICT roi)
884
0
{
885
0
  switch (SrcFormat)
886
0
  {
887
0
    case PIXEL_FORMAT_BGRA32:
888
0
    case PIXEL_FORMAT_BGRX32:
889
0
      return general_RGBToYUV444_8u_P3AC4R_BGRX(pSrc, srcStep, pDst, dstStep, roi);
890
0
    default:
891
0
      return general_RGBToYUV444_8u_P3AC4R_RGB(pSrc, SrcFormat, srcStep, pDst, dstStep, roi);
892
0
  }
893
0
}
894
895
static inline pstatus_t general_RGBToYUV420_BGRX(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcStep,
896
                                                 BYTE* WINPR_RESTRICT pDst[3],
897
                                                 const UINT32 dstStep[3],
898
                                                 const prim_size_t* WINPR_RESTRICT roi)
899
0
{
900
0
  size_t x1 = 0;
901
0
  size_t x2 = 4;
902
0
  size_t x3 = srcStep;
903
0
  size_t x4 = srcStep + 4;
904
0
  size_t y1 = 0;
905
0
  size_t y2 = 1;
906
0
  size_t y3 = dstStep[0];
907
0
  size_t y4 = dstStep[0] + 1;
908
0
  UINT32 max_x = roi->width - 1;
909
910
0
  size_t y = 0;
911
0
  for (size_t i = 0; y < roi->height - roi->height % 2; y += 2, i++)
912
0
  {
913
0
    const BYTE* src = pSrc + y * srcStep;
914
0
    BYTE* ydst = pDst[0] + y * dstStep[0];
915
0
    BYTE* udst = pDst[1] + i * dstStep[1];
916
0
    BYTE* vdst = pDst[2] + i * dstStep[2];
917
918
0
    for (size_t x = 0; x < roi->width; x += 2)
919
0
    {
920
0
      BYTE R = 0;
921
0
      BYTE G = 0;
922
0
      BYTE B = 0;
923
0
      INT32 Ra = 0;
924
0
      INT32 Ga = 0;
925
0
      INT32 Ba = 0;
926
      /* row 1, pixel 1 */
927
0
      Ba = B = *(src + x1 + 0);
928
0
      Ga = G = *(src + x1 + 1);
929
0
      Ra = R = *(src + x1 + 2);
930
0
      ydst[y1] = RGB2Y(R, G, B);
931
932
0
      if (x < max_x)
933
0
      {
934
        /* row 1, pixel 2 */
935
0
        Ba += B = *(src + x2 + 0);
936
0
        Ga += G = *(src + x2 + 1);
937
0
        Ra += R = *(src + x2 + 2);
938
0
        ydst[y2] = RGB2Y(R, G, B);
939
0
      }
940
941
      /* row 2, pixel 1 */
942
0
      Ba += B = *(src + x3 + 0);
943
0
      Ga += G = *(src + x3 + 1);
944
0
      Ra += R = *(src + x3 + 2);
945
0
      ydst[y3] = RGB2Y(R, G, B);
946
947
0
      if (x < max_x)
948
0
      {
949
        /* row 2, pixel 2 */
950
0
        Ba += B = *(src + x4 + 0);
951
0
        Ga += G = *(src + x4 + 1);
952
0
        Ra += R = *(src + x4 + 2);
953
0
        ydst[y4] = RGB2Y(R, G, B);
954
0
      }
955
956
0
      Ba >>= 2;
957
0
      Ga >>= 2;
958
0
      Ra >>= 2;
959
0
      *udst++ = RGB2U(Ra, Ga, Ba);
960
0
      *vdst++ = RGB2V(Ra, Ga, Ba);
961
0
      ydst += 2;
962
0
      src += 8;
963
0
    }
964
0
  }
965
966
0
  for (; y < roi->height; y++)
967
0
  {
968
0
    const BYTE* src = pSrc + y * srcStep;
969
0
    BYTE* ydst = pDst[0] + y * dstStep[0];
970
0
    BYTE* udst = pDst[1] + (y / 2) * dstStep[1];
971
0
    BYTE* vdst = pDst[2] + (y / 2) * dstStep[2];
972
973
0
    for (size_t x = 0; x < roi->width; x += 2)
974
0
    {
975
0
      BYTE R = 0;
976
0
      BYTE G = 0;
977
0
      BYTE B = 0;
978
0
      INT32 Ra = 0;
979
0
      INT32 Ga = 0;
980
0
      INT32 Ba = 0;
981
      /* row 1, pixel 1 */
982
0
      Ba = B = *(src + x1 + 0);
983
0
      Ga = G = *(src + x1 + 1);
984
0
      Ra = R = *(src + x1 + 2);
985
0
      ydst[y1] = RGB2Y(R, G, B);
986
987
0
      if (x < max_x)
988
0
      {
989
        /* row 1, pixel 2 */
990
0
        Ba += B = *(src + x2 + 0);
991
0
        Ga += G = *(src + x2 + 1);
992
0
        Ra += R = *(src + x2 + 2);
993
0
        ydst[y2] = RGB2Y(R, G, B);
994
0
      }
995
996
0
      Ba >>= 2;
997
0
      Ga >>= 2;
998
0
      Ra >>= 2;
999
0
      *udst++ = RGB2U(Ra, Ga, Ba);
1000
0
      *vdst++ = RGB2V(Ra, Ga, Ba);
1001
0
      ydst += 2;
1002
0
      src += 8;
1003
0
    }
1004
0
  }
1005
1006
0
  return PRIMITIVES_SUCCESS;
1007
0
}
1008
1009
static inline pstatus_t general_RGBToYUV420_RGBX(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcStep,
1010
                                                 BYTE* WINPR_RESTRICT pDst[3],
1011
                                                 const UINT32 dstStep[3],
1012
                                                 const prim_size_t* WINPR_RESTRICT roi)
1013
0
{
1014
0
  size_t x1 = 0;
1015
0
  size_t x2 = 4;
1016
0
  size_t x3 = srcStep;
1017
0
  size_t x4 = srcStep + 4;
1018
0
  size_t y1 = 0;
1019
0
  size_t y2 = 1;
1020
0
  size_t y3 = dstStep[0];
1021
0
  size_t y4 = dstStep[0] + 1;
1022
0
  UINT32 max_x = roi->width - 1;
1023
1024
0
  size_t y = 0;
1025
0
  for (size_t i = 0; y < roi->height - roi->height % 2; y += 2, i++)
1026
0
  {
1027
0
    const BYTE* src = pSrc + y * srcStep;
1028
0
    BYTE* ydst = pDst[0] + y * dstStep[0];
1029
0
    BYTE* udst = pDst[1] + i * dstStep[1];
1030
0
    BYTE* vdst = pDst[2] + i * dstStep[2];
1031
1032
0
    for (UINT32 x = 0; x < roi->width; x += 2)
1033
0
    {
1034
0
      BYTE R = *(src + x1 + 0);
1035
0
      BYTE G = *(src + x1 + 1);
1036
0
      BYTE B = *(src + x1 + 2);
1037
      /* row 1, pixel 1 */
1038
0
      INT32 Ra = R;
1039
0
      INT32 Ga = G;
1040
0
      INT32 Ba = B;
1041
0
      ydst[y1] = RGB2Y(R, G, B);
1042
1043
0
      if (x < max_x)
1044
0
      {
1045
        /* row 1, pixel 2 */
1046
0
        R = *(src + x2 + 0);
1047
0
        G = *(src + x2 + 1);
1048
0
        B = *(src + x2 + 2);
1049
0
        Ra += R;
1050
0
        Ga += G;
1051
0
        Ba += B;
1052
0
        ydst[y2] = RGB2Y(R, G, B);
1053
0
      }
1054
1055
      /* row 2, pixel 1 */
1056
0
      R = *(src + x3 + 0);
1057
0
      G = *(src + x3 + 1);
1058
0
      B = *(src + x3 + 2);
1059
1060
0
      Ra += R;
1061
0
      Ga += G;
1062
0
      Ba += B;
1063
0
      ydst[y3] = RGB2Y(R, G, B);
1064
1065
0
      if (x < max_x)
1066
0
      {
1067
        /* row 2, pixel 2 */
1068
0
        R = *(src + x4 + 0);
1069
0
        G = *(src + x4 + 1);
1070
0
        B = *(src + x4 + 2);
1071
1072
0
        Ra += R;
1073
0
        Ga += G;
1074
0
        Ba += B;
1075
0
        ydst[y4] = RGB2Y(R, G, B);
1076
0
      }
1077
1078
0
      Ba >>= 2;
1079
0
      Ga >>= 2;
1080
0
      Ra >>= 2;
1081
0
      *udst++ = RGB2U(Ra, Ga, Ba);
1082
0
      *vdst++ = RGB2V(Ra, Ga, Ba);
1083
0
      ydst += 2;
1084
0
      src += 8;
1085
0
    }
1086
0
  }
1087
1088
0
  for (; y < roi->height; y++)
1089
0
  {
1090
0
    const BYTE* src = pSrc + y * srcStep;
1091
0
    BYTE* ydst = pDst[0] + y * dstStep[0];
1092
0
    BYTE* udst = pDst[1] + (y / 2) * dstStep[1];
1093
0
    BYTE* vdst = pDst[2] + (y / 2) * dstStep[2];
1094
1095
0
    for (UINT32 x = 0; x < roi->width; x += 2)
1096
0
    {
1097
0
      BYTE R = *(src + x1 + 0);
1098
0
      BYTE G = *(src + x1 + 1);
1099
0
      BYTE B = *(src + x1 + 2);
1100
      /* row 1, pixel 1 */
1101
0
      INT32 Ra = R;
1102
0
      INT32 Ga = G;
1103
0
      INT32 Ba = B;
1104
0
      ydst[y1] = RGB2Y(R, G, B);
1105
1106
0
      if (x < max_x)
1107
0
      {
1108
        /* row 1, pixel 2 */
1109
0
        R = *(src + x2 + 0);
1110
0
        G = *(src + x2 + 1);
1111
0
        B = *(src + x2 + 2);
1112
0
        Ra += R;
1113
0
        Ga += G;
1114
0
        Ba += B;
1115
0
        ydst[y2] = RGB2Y(R, G, B);
1116
0
      }
1117
1118
0
      Ba >>= 2;
1119
0
      Ga >>= 2;
1120
0
      Ra >>= 2;
1121
0
      *udst++ = RGB2U(Ra, Ga, Ba);
1122
0
      *vdst++ = RGB2V(Ra, Ga, Ba);
1123
0
      ydst += 2;
1124
0
      src += 8;
1125
0
    }
1126
0
  }
1127
1128
0
  return PRIMITIVES_SUCCESS;
1129
0
}
1130
1131
static inline pstatus_t general_RGBToYUV420_ANY(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcFormat,
1132
                                                UINT32 srcStep, BYTE* WINPR_RESTRICT pDst[3],
1133
                                                const UINT32 dstStep[3],
1134
                                                const prim_size_t* WINPR_RESTRICT roi)
1135
0
{
1136
0
  const UINT32 bpp = FreeRDPGetBytesPerPixel(srcFormat);
1137
0
  size_t x1 = 0;
1138
0
  size_t x2 = bpp;
1139
0
  size_t x3 = srcStep;
1140
0
  size_t x4 = srcStep + bpp;
1141
0
  size_t y1 = 0;
1142
0
  size_t y2 = 1;
1143
0
  size_t y3 = dstStep[0];
1144
0
  size_t y4 = dstStep[0] + 1;
1145
0
  UINT32 max_x = roi->width - 1;
1146
1147
0
  size_t y = 0;
1148
0
  for (size_t i = 0; y < roi->height - roi->height % 2; y += 2, i++)
1149
0
  {
1150
0
    const BYTE* src = pSrc + y * srcStep;
1151
0
    BYTE* ydst = pDst[0] + y * dstStep[0];
1152
0
    BYTE* udst = pDst[1] + i * dstStep[1];
1153
0
    BYTE* vdst = pDst[2] + i * dstStep[2];
1154
1155
0
    for (size_t x = 0; x < roi->width; x += 2)
1156
0
    {
1157
0
      BYTE R = 0;
1158
0
      BYTE G = 0;
1159
0
      BYTE B = 0;
1160
0
      INT32 Ra = 0;
1161
0
      INT32 Ga = 0;
1162
0
      INT32 Ba = 0;
1163
0
      UINT32 color = 0;
1164
      /* row 1, pixel 1 */
1165
0
      color = FreeRDPReadColor(src + x1, srcFormat);
1166
0
      FreeRDPSplitColor(color, srcFormat, &R, &G, &B, NULL, NULL);
1167
0
      Ra = R;
1168
0
      Ga = G;
1169
0
      Ba = B;
1170
0
      ydst[y1] = RGB2Y(R, G, B);
1171
1172
0
      if (x < max_x)
1173
0
      {
1174
        /* row 1, pixel 2 */
1175
0
        color = FreeRDPReadColor(src + x2, srcFormat);
1176
0
        FreeRDPSplitColor(color, srcFormat, &R, &G, &B, NULL, NULL);
1177
0
        Ra += R;
1178
0
        Ga += G;
1179
0
        Ba += B;
1180
0
        ydst[y2] = RGB2Y(R, G, B);
1181
0
      }
1182
1183
      /* row 2, pixel 1 */
1184
0
      color = FreeRDPReadColor(src + x3, srcFormat);
1185
0
      FreeRDPSplitColor(color, srcFormat, &R, &G, &B, NULL, NULL);
1186
0
      Ra += R;
1187
0
      Ga += G;
1188
0
      Ba += B;
1189
0
      ydst[y3] = RGB2Y(R, G, B);
1190
1191
0
      if (x < max_x)
1192
0
      {
1193
        /* row 2, pixel 2 */
1194
0
        color = FreeRDPReadColor(src + x4, srcFormat);
1195
0
        FreeRDPSplitColor(color, srcFormat, &R, &G, &B, NULL, NULL);
1196
0
        Ra += R;
1197
0
        Ga += G;
1198
0
        Ba += B;
1199
0
        ydst[y4] = RGB2Y(R, G, B);
1200
0
      }
1201
1202
0
      Ra >>= 2;
1203
0
      Ga >>= 2;
1204
0
      Ba >>= 2;
1205
0
      *udst++ = RGB2U(Ra, Ga, Ba);
1206
0
      *vdst++ = RGB2V(Ra, Ga, Ba);
1207
0
      ydst += 2;
1208
0
      src += 2ULL * bpp;
1209
0
    }
1210
0
  }
1211
1212
0
  for (; y < roi->height; y++)
1213
0
  {
1214
0
    const BYTE* src = pSrc + y * srcStep;
1215
0
    BYTE* ydst = pDst[0] + y * dstStep[0];
1216
0
    BYTE* udst = pDst[1] + (y / 2) * dstStep[1];
1217
0
    BYTE* vdst = pDst[2] + (y / 2) * dstStep[2];
1218
1219
0
    for (size_t x = 0; x < roi->width; x += 2)
1220
0
    {
1221
0
      BYTE R = 0;
1222
0
      BYTE G = 0;
1223
0
      BYTE B = 0;
1224
      /* row 1, pixel 1 */
1225
0
      UINT32 color = FreeRDPReadColor(src + x1, srcFormat);
1226
0
      FreeRDPSplitColor(color, srcFormat, &R, &G, &B, NULL, NULL);
1227
0
      INT32 Ra = R;
1228
0
      INT32 Ga = G;
1229
0
      INT32 Ba = B;
1230
0
      ydst[y1] = RGB2Y(R, G, B);
1231
1232
0
      if (x < max_x)
1233
0
      {
1234
        /* row 1, pixel 2 */
1235
0
        color = FreeRDPReadColor(src + x2, srcFormat);
1236
0
        FreeRDPSplitColor(color, srcFormat, &R, &G, &B, NULL, NULL);
1237
0
        Ra += R;
1238
0
        Ga += G;
1239
0
        Ba += B;
1240
0
        ydst[y2] = RGB2Y(R, G, B);
1241
0
      }
1242
1243
0
      Ra >>= 2;
1244
0
      Ga >>= 2;
1245
0
      Ba >>= 2;
1246
0
      *udst++ = RGB2U(Ra, Ga, Ba);
1247
0
      *vdst++ = RGB2V(Ra, Ga, Ba);
1248
0
      ydst += 2;
1249
0
      src += 2ULL * bpp;
1250
0
    }
1251
0
  }
1252
1253
0
  return PRIMITIVES_SUCCESS;
1254
0
}
1255
1256
static pstatus_t general_RGBToYUV420_8u_P3AC4R(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcFormat,
1257
                                               UINT32 srcStep, BYTE* WINPR_RESTRICT pDst[3],
1258
                                               const UINT32 dstStep[3],
1259
                                               const prim_size_t* WINPR_RESTRICT roi)
1260
0
{
1261
0
  switch (srcFormat)
1262
0
  {
1263
0
    case PIXEL_FORMAT_BGRA32:
1264
0
    case PIXEL_FORMAT_BGRX32:
1265
0
      return general_RGBToYUV420_BGRX(pSrc, srcStep, pDst, dstStep, roi);
1266
1267
0
    case PIXEL_FORMAT_RGBA32:
1268
0
    case PIXEL_FORMAT_RGBX32:
1269
0
      return general_RGBToYUV420_RGBX(pSrc, srcStep, pDst, dstStep, roi);
1270
1271
0
    default:
1272
0
      return general_RGBToYUV420_ANY(pSrc, srcFormat, srcStep, pDst, dstStep, roi);
1273
0
  }
1274
0
}
1275
1276
static inline void int_general_RGBToAVC444YUV_BGRX_DOUBLE_ROW(
1277
    size_t offset, const BYTE* WINPR_RESTRICT pSrcEven, const BYTE* WINPR_RESTRICT pSrcOdd,
1278
    BYTE* WINPR_RESTRICT b1Even, BYTE* WINPR_RESTRICT b1Odd, BYTE* WINPR_RESTRICT b2,
1279
    BYTE* WINPR_RESTRICT b3, BYTE* WINPR_RESTRICT b4, BYTE* WINPR_RESTRICT b5,
1280
    BYTE* WINPR_RESTRICT b6, BYTE* WINPR_RESTRICT b7, UINT32 width)
1281
0
{
1282
0
  WINPR_ASSERT((width % 2) == 0);
1283
0
  for (size_t x = offset; x < width; x += 2)
1284
0
  {
1285
0
    const BYTE* srcEven = &pSrcEven[4ULL * x];
1286
0
    const BYTE* srcOdd = &pSrcOdd[4ULL * x];
1287
0
    const BOOL lastX = (x + 1) >= width;
1288
0
    BYTE Y1e = 0;
1289
0
    BYTE Y2e = 0;
1290
0
    BYTE U1e = 0;
1291
0
    BYTE V1e = 0;
1292
0
    BYTE U2e = 0;
1293
0
    BYTE V2e = 0;
1294
0
    BYTE Y1o = 0;
1295
0
    BYTE Y2o = 0;
1296
0
    BYTE U1o = 0;
1297
0
    BYTE V1o = 0;
1298
0
    BYTE U2o = 0;
1299
0
    BYTE V2o = 0;
1300
    /* Read 4 pixels, 2 from even, 2 from odd lines */
1301
0
    {
1302
0
      const BYTE b = *srcEven++;
1303
0
      const BYTE g = *srcEven++;
1304
0
      const BYTE r = *srcEven++;
1305
0
      srcEven++;
1306
0
      Y1e = Y2e = Y1o = Y2o = RGB2Y(r, g, b);
1307
0
      U1e = U2e = U1o = U2o = RGB2U(r, g, b);
1308
0
      V1e = V2e = V1o = V2o = RGB2V(r, g, b);
1309
0
    }
1310
1311
0
    if (!lastX)
1312
0
    {
1313
0
      const BYTE b = *srcEven++;
1314
0
      const BYTE g = *srcEven++;
1315
0
      const BYTE r = *srcEven++;
1316
0
      srcEven++;
1317
0
      Y2e = RGB2Y(r, g, b);
1318
0
      U2e = RGB2U(r, g, b);
1319
0
      V2e = RGB2V(r, g, b);
1320
0
    }
1321
1322
0
    if (b1Odd)
1323
0
    {
1324
0
      const BYTE b = *srcOdd++;
1325
0
      const BYTE g = *srcOdd++;
1326
0
      const BYTE r = *srcOdd++;
1327
0
      srcOdd++;
1328
0
      Y1o = Y2o = RGB2Y(r, g, b);
1329
0
      U1o = U2o = RGB2U(r, g, b);
1330
0
      V1o = V2o = RGB2V(r, g, b);
1331
0
    }
1332
1333
0
    if (b1Odd && !lastX)
1334
0
    {
1335
0
      const BYTE b = *srcOdd++;
1336
0
      const BYTE g = *srcOdd++;
1337
0
      const BYTE r = *srcOdd++;
1338
0
      srcOdd++;
1339
0
      Y2o = RGB2Y(r, g, b);
1340
0
      U2o = RGB2U(r, g, b);
1341
0
      V2o = RGB2V(r, g, b);
1342
0
    }
1343
1344
    /* We have 4 Y pixels, so store them. */
1345
0
    *b1Even++ = Y1e;
1346
0
    *b1Even++ = Y2e;
1347
1348
0
    if (b1Odd)
1349
0
    {
1350
0
      *b1Odd++ = Y1o;
1351
0
      *b1Odd++ = Y2o;
1352
0
    }
1353
1354
    /* 2x 2y pixel in luma UV plane use averaging
1355
     */
1356
0
    {
1357
0
      const BYTE Uavg = WINPR_ASSERTING_INT_CAST(BYTE, ((UINT16)U1e + U2e + U1o + U2o) / 4);
1358
0
      const BYTE Vavg = WINPR_ASSERTING_INT_CAST(BYTE, ((UINT16)V1e + V2e + V1o + V2o) / 4);
1359
0
      *b2++ = Uavg;
1360
0
      *b3++ = Vavg;
1361
0
    }
1362
1363
    /* UV from 2x, 2y+1 */
1364
0
    if (b1Odd)
1365
0
    {
1366
0
      *b4++ = U1o;
1367
0
      *b5++ = V1o;
1368
1369
0
      if (!lastX)
1370
0
      {
1371
0
        *b4++ = U2o;
1372
0
        *b5++ = V2o;
1373
0
      }
1374
0
    }
1375
1376
    /* UV from 2x+1, 2y */
1377
0
    if (!lastX)
1378
0
    {
1379
0
      *b6++ = U2e;
1380
0
      *b7++ = V2e;
1381
0
    }
1382
0
  }
1383
0
}
1384
1385
void general_RGBToAVC444YUV_BGRX_DOUBLE_ROW(size_t offset, const BYTE* WINPR_RESTRICT pSrcEven,
1386
                                            const BYTE* WINPR_RESTRICT pSrcOdd,
1387
                                            BYTE* WINPR_RESTRICT b1Even, BYTE* WINPR_RESTRICT b1Odd,
1388
                                            BYTE* WINPR_RESTRICT b2, BYTE* WINPR_RESTRICT b3,
1389
                                            BYTE* WINPR_RESTRICT b4, BYTE* WINPR_RESTRICT b5,
1390
                                            BYTE* WINPR_RESTRICT b6, BYTE* WINPR_RESTRICT b7,
1391
                                            UINT32 width)
1392
0
{
1393
0
  int_general_RGBToAVC444YUV_BGRX_DOUBLE_ROW(offset, pSrcEven, pSrcOdd, b1Even, b1Odd, b2, b3, b4,
1394
0
                                             b5, b6, b7, width);
1395
0
}
1396
1397
static inline pstatus_t general_RGBToAVC444YUV_BGRX(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcStep,
1398
                                                    BYTE* WINPR_RESTRICT pDst1[3],
1399
                                                    const UINT32 dst1Step[3],
1400
                                                    BYTE* WINPR_RESTRICT pDst2[3],
1401
                                                    const UINT32 dst2Step[3],
1402
                                                    const prim_size_t* WINPR_RESTRICT roi)
1403
0
{
1404
  /**
1405
   * Note:
1406
   * Read information in function general_RGBToAVC444YUV_ANY below !
1407
   */
1408
0
  size_t y = 0;
1409
0
  for (; y < roi->height - roi->height % 2; y += 2)
1410
0
  {
1411
0
    const BYTE* srcEven = pSrc + 1ULL * y * srcStep;
1412
0
    const BYTE* srcOdd = pSrc + 1ULL * (y + 1) * srcStep;
1413
0
    const size_t i = y >> 1;
1414
0
    const size_t n = (i & (uint32_t)~7) + i;
1415
0
    BYTE* b1Even = pDst1[0] + 1ULL * y * dst1Step[0];
1416
0
    BYTE* b1Odd = (b1Even + dst1Step[0]);
1417
0
    BYTE* b2 = pDst1[1] + 1ULL * (y / 2) * dst1Step[1];
1418
0
    BYTE* b3 = pDst1[2] + 1ULL * (y / 2) * dst1Step[2];
1419
0
    BYTE* b4 = pDst2[0] + 1ULL * dst2Step[0] * n;
1420
0
    BYTE* b5 = b4 + 8ULL * dst2Step[0];
1421
0
    BYTE* b6 = pDst2[1] + 1ULL * (y / 2) * dst2Step[1];
1422
0
    BYTE* b7 = pDst2[2] + 1ULL * (y / 2) * dst2Step[2];
1423
0
    int_general_RGBToAVC444YUV_BGRX_DOUBLE_ROW(0, srcEven, srcOdd, b1Even, b1Odd, b2, b3, b4,
1424
0
                                               b5, b6, b7, roi->width);
1425
0
  }
1426
0
  for (; y < roi->height; y++)
1427
0
  {
1428
0
    const BYTE* srcEven = pSrc + 1ULL * y * srcStep;
1429
0
    BYTE* b1Even = pDst1[0] + 1ULL * y * dst1Step[0];
1430
0
    BYTE* b2 = pDst1[1] + 1ULL * (y / 2) * dst1Step[1];
1431
0
    BYTE* b3 = pDst1[2] + 1ULL * (y / 2) * dst1Step[2];
1432
0
    BYTE* b6 = pDst2[1] + 1ULL * (y / 2) * dst2Step[1];
1433
0
    BYTE* b7 = pDst2[2] + 1ULL * (y / 2) * dst2Step[2];
1434
0
    int_general_RGBToAVC444YUV_BGRX_DOUBLE_ROW(0, srcEven, NULL, b1Even, NULL, b2, b3, NULL,
1435
0
                                               NULL, b6, b7, roi->width);
1436
0
  }
1437
1438
0
  return PRIMITIVES_SUCCESS;
1439
0
}
1440
1441
static inline void general_RGBToAVC444YUV_RGBX_DOUBLE_ROW(
1442
    const BYTE* WINPR_RESTRICT srcEven, const BYTE* WINPR_RESTRICT srcOdd,
1443
    BYTE* WINPR_RESTRICT b1Even, BYTE* WINPR_RESTRICT b1Odd, BYTE* WINPR_RESTRICT b2,
1444
    BYTE* WINPR_RESTRICT b3, BYTE* WINPR_RESTRICT b4, BYTE* WINPR_RESTRICT b5,
1445
    BYTE* WINPR_RESTRICT b6, BYTE* WINPR_RESTRICT b7, UINT32 width)
1446
0
{
1447
0
  WINPR_ASSERT((width % 2) == 0);
1448
0
  for (UINT32 x = 0; x < width; x += 2)
1449
0
  {
1450
0
    const BOOL lastX = (x + 1) >= width;
1451
0
    BYTE Y1e = 0;
1452
0
    BYTE Y2e = 0;
1453
0
    BYTE U1e = 0;
1454
0
    BYTE V1e = 0;
1455
0
    BYTE U2e = 0;
1456
0
    BYTE V2e = 0;
1457
0
    BYTE Y1o = 0;
1458
0
    BYTE Y2o = 0;
1459
0
    BYTE U1o = 0;
1460
0
    BYTE V1o = 0;
1461
0
    BYTE U2o = 0;
1462
0
    BYTE V2o = 0;
1463
    /* Read 4 pixels, 2 from even, 2 from odd lines */
1464
0
    {
1465
0
      const BYTE r = *srcEven++;
1466
0
      const BYTE g = *srcEven++;
1467
0
      const BYTE b = *srcEven++;
1468
0
      srcEven++;
1469
0
      Y1e = Y2e = Y1o = Y2o = RGB2Y(r, g, b);
1470
0
      U1e = U2e = U1o = U2o = RGB2U(r, g, b);
1471
0
      V1e = V2e = V1o = V2o = RGB2V(r, g, b);
1472
0
    }
1473
1474
0
    if (!lastX)
1475
0
    {
1476
0
      const BYTE r = *srcEven++;
1477
0
      const BYTE g = *srcEven++;
1478
0
      const BYTE b = *srcEven++;
1479
0
      srcEven++;
1480
0
      Y2e = RGB2Y(r, g, b);
1481
0
      U2e = RGB2U(r, g, b);
1482
0
      V2e = RGB2V(r, g, b);
1483
0
    }
1484
1485
0
    if (b1Odd)
1486
0
    {
1487
0
      const BYTE r = *srcOdd++;
1488
0
      const BYTE g = *srcOdd++;
1489
0
      const BYTE b = *srcOdd++;
1490
0
      srcOdd++;
1491
0
      Y1o = Y2o = RGB2Y(r, g, b);
1492
0
      U1o = U2o = RGB2U(r, g, b);
1493
0
      V1o = V2o = RGB2V(r, g, b);
1494
0
    }
1495
1496
0
    if (b1Odd && !lastX)
1497
0
    {
1498
0
      const BYTE r = *srcOdd++;
1499
0
      const BYTE g = *srcOdd++;
1500
0
      const BYTE b = *srcOdd++;
1501
0
      srcOdd++;
1502
0
      Y2o = RGB2Y(r, g, b);
1503
0
      U2o = RGB2U(r, g, b);
1504
0
      V2o = RGB2V(r, g, b);
1505
0
    }
1506
1507
    /* We have 4 Y pixels, so store them. */
1508
0
    *b1Even++ = Y1e;
1509
0
    *b1Even++ = Y2e;
1510
1511
0
    if (b1Odd)
1512
0
    {
1513
0
      *b1Odd++ = Y1o;
1514
0
      *b1Odd++ = Y2o;
1515
0
    }
1516
1517
    /* 2x 2y pixel in luma UV plane use averaging
1518
     */
1519
0
    {
1520
0
      const BYTE Uavg = WINPR_ASSERTING_INT_CAST(BYTE, ((UINT16)U1e + U2e + U1o + U2o) / 4);
1521
0
      const BYTE Vavg = WINPR_ASSERTING_INT_CAST(BYTE, ((UINT16)V1e + V2e + V1o + V2o) / 4);
1522
0
      *b2++ = Uavg;
1523
0
      *b3++ = Vavg;
1524
0
    }
1525
1526
    /* UV from 2x, 2y+1 */
1527
0
    if (b1Odd)
1528
0
    {
1529
0
      *b4++ = U1o;
1530
0
      *b5++ = V1o;
1531
1532
0
      if (!lastX)
1533
0
      {
1534
0
        *b4++ = U2o;
1535
0
        *b5++ = V2o;
1536
0
      }
1537
0
    }
1538
1539
    /* UV from 2x+1, 2y */
1540
0
    if (!lastX)
1541
0
    {
1542
0
      *b6++ = U2e;
1543
0
      *b7++ = V2e;
1544
0
    }
1545
0
  }
1546
0
}
1547
1548
static inline pstatus_t general_RGBToAVC444YUV_RGBX(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcStep,
1549
                                                    BYTE* WINPR_RESTRICT pDst1[3],
1550
                                                    const UINT32 dst1Step[3],
1551
                                                    BYTE* WINPR_RESTRICT pDst2[3],
1552
                                                    const UINT32 dst2Step[3],
1553
                                                    const prim_size_t* WINPR_RESTRICT roi)
1554
0
{
1555
  /**
1556
   * Note:
1557
   * Read information in function general_RGBToAVC444YUV_ANY below !
1558
   */
1559
1560
0
  size_t y = 0;
1561
0
  for (; y < roi->height - roi->height % 2; y += 2)
1562
0
  {
1563
0
    const BOOL last = (y >= (roi->height - 1));
1564
0
    const BYTE* srcEven = pSrc + 1ULL * y * srcStep;
1565
0
    const BYTE* srcOdd = pSrc + 1ULL * (y + 1) * srcStep;
1566
0
    const size_t i = y >> 1;
1567
0
    const size_t n = (i & (size_t)~7) + i;
1568
0
    BYTE* b1Even = pDst1[0] + 1ULL * y * dst1Step[0];
1569
0
    BYTE* b1Odd = !last ? (b1Even + dst1Step[0]) : NULL;
1570
0
    BYTE* b2 = pDst1[1] + 1ULL * (y / 2) * dst1Step[1];
1571
0
    BYTE* b3 = pDst1[2] + 1ULL * (y / 2) * dst1Step[2];
1572
0
    BYTE* b4 = pDst2[0] + 1ULL * dst2Step[0] * n;
1573
0
    BYTE* b5 = b4 + 8ULL * dst2Step[0];
1574
0
    BYTE* b6 = pDst2[1] + 1ULL * (y / 2) * dst2Step[1];
1575
0
    BYTE* b7 = pDst2[2] + 1ULL * (y / 2) * dst2Step[2];
1576
0
    general_RGBToAVC444YUV_RGBX_DOUBLE_ROW(srcEven, srcOdd, b1Even, b1Odd, b2, b3, b4, b5, b6,
1577
0
                                           b7, roi->width);
1578
0
  }
1579
0
  for (; y < roi->height; y++)
1580
0
  {
1581
0
    const BYTE* srcEven = pSrc + 1ULL * y * srcStep;
1582
0
    BYTE* b1Even = pDst1[0] + 1ULL * y * dst1Step[0];
1583
0
    BYTE* b2 = pDst1[1] + 1ULL * (y / 2) * dst1Step[1];
1584
0
    BYTE* b3 = pDst1[2] + 1ULL * (y / 2) * dst1Step[2];
1585
0
    BYTE* b6 = pDst2[1] + 1ULL * (y / 2) * dst2Step[1];
1586
0
    BYTE* b7 = pDst2[2] + 1ULL * (y / 2) * dst2Step[2];
1587
0
    general_RGBToAVC444YUV_RGBX_DOUBLE_ROW(srcEven, NULL, b1Even, NULL, b2, b3, NULL, NULL, b6,
1588
0
                                           b7, roi->width);
1589
0
  }
1590
0
  return PRIMITIVES_SUCCESS;
1591
0
}
1592
1593
static inline void general_RGBToAVC444YUV_ANY_DOUBLE_ROW(
1594
    const BYTE* WINPR_RESTRICT srcEven, const BYTE* WINPR_RESTRICT srcOdd, UINT32 srcFormat,
1595
    BYTE* WINPR_RESTRICT b1Even, BYTE* WINPR_RESTRICT b1Odd, BYTE* WINPR_RESTRICT b2,
1596
    BYTE* WINPR_RESTRICT b3, BYTE* WINPR_RESTRICT b4, BYTE* WINPR_RESTRICT b5,
1597
    BYTE* WINPR_RESTRICT b6, BYTE* WINPR_RESTRICT b7, UINT32 width)
1598
0
{
1599
0
  const UINT32 bpp = FreeRDPGetBytesPerPixel(srcFormat);
1600
0
  for (UINT32 x = 0; x < width; x += 2)
1601
0
  {
1602
0
    const BOOL lastX = (x + 1) >= width;
1603
0
    BYTE Y1e = 0;
1604
0
    BYTE Y2e = 0;
1605
0
    BYTE U1e = 0;
1606
0
    BYTE V1e = 0;
1607
0
    BYTE U2e = 0;
1608
0
    BYTE V2e = 0;
1609
0
    BYTE Y1o = 0;
1610
0
    BYTE Y2o = 0;
1611
0
    BYTE U1o = 0;
1612
0
    BYTE V1o = 0;
1613
0
    BYTE U2o = 0;
1614
0
    BYTE V2o = 0;
1615
    /* Read 4 pixels, 2 from even, 2 from odd lines */
1616
0
    {
1617
0
      BYTE r = 0;
1618
0
      BYTE g = 0;
1619
0
      BYTE b = 0;
1620
0
      const UINT32 color = FreeRDPReadColor(srcEven, srcFormat);
1621
0
      srcEven += bpp;
1622
0
      FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
1623
0
      Y1e = Y2e = Y1o = Y2o = RGB2Y(r, g, b);
1624
0
      U1e = U2e = U1o = U2o = RGB2U(r, g, b);
1625
0
      V1e = V2e = V1o = V2o = RGB2V(r, g, b);
1626
0
    }
1627
1628
0
    if (!lastX)
1629
0
    {
1630
0
      BYTE r = 0;
1631
0
      BYTE g = 0;
1632
0
      BYTE b = 0;
1633
0
      const UINT32 color = FreeRDPReadColor(srcEven, srcFormat);
1634
0
      srcEven += bpp;
1635
0
      FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
1636
0
      Y2e = RGB2Y(r, g, b);
1637
0
      U2e = RGB2U(r, g, b);
1638
0
      V2e = RGB2V(r, g, b);
1639
0
    }
1640
1641
0
    if (b1Odd)
1642
0
    {
1643
0
      BYTE r = 0;
1644
0
      BYTE g = 0;
1645
0
      BYTE b = 0;
1646
0
      const UINT32 color = FreeRDPReadColor(srcOdd, srcFormat);
1647
0
      srcOdd += bpp;
1648
0
      FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
1649
0
      Y1o = Y2o = RGB2Y(r, g, b);
1650
0
      U1o = U2o = RGB2U(r, g, b);
1651
0
      V1o = V2o = RGB2V(r, g, b);
1652
0
    }
1653
1654
0
    if (b1Odd && !lastX)
1655
0
    {
1656
0
      BYTE r = 0;
1657
0
      BYTE g = 0;
1658
0
      BYTE b = 0;
1659
0
      const UINT32 color = FreeRDPReadColor(srcOdd, srcFormat);
1660
0
      srcOdd += bpp;
1661
0
      FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
1662
0
      Y2o = RGB2Y(r, g, b);
1663
0
      U2o = RGB2U(r, g, b);
1664
0
      V2o = RGB2V(r, g, b);
1665
0
    }
1666
1667
    /* We have 4 Y pixels, so store them. */
1668
0
    *b1Even++ = Y1e;
1669
0
    *b1Even++ = Y2e;
1670
1671
0
    if (b1Odd)
1672
0
    {
1673
0
      *b1Odd++ = Y1o;
1674
0
      *b1Odd++ = Y2o;
1675
0
    }
1676
1677
    /* 2x 2y pixel in luma UV plane use averaging
1678
     */
1679
0
    {
1680
0
      const BYTE Uavg = WINPR_ASSERTING_INT_CAST(
1681
0
          BYTE, ((UINT16)U1e + (UINT16)U2e + (UINT16)U1o + (UINT16)U2o) / 4);
1682
0
      const BYTE Vavg = WINPR_ASSERTING_INT_CAST(
1683
0
          BYTE, ((UINT16)V1e + (UINT16)V2e + (UINT16)V1o + (UINT16)V2o) / 4);
1684
0
      *b2++ = Uavg;
1685
0
      *b3++ = Vavg;
1686
0
    }
1687
1688
    /* UV from 2x, 2y+1 */
1689
0
    if (b1Odd)
1690
0
    {
1691
0
      *b4++ = U1o;
1692
0
      *b5++ = V1o;
1693
1694
0
      if (!lastX)
1695
0
      {
1696
0
        *b4++ = U2o;
1697
0
        *b5++ = V2o;
1698
0
      }
1699
0
    }
1700
1701
    /* UV from 2x+1, 2y */
1702
0
    if (!lastX)
1703
0
    {
1704
0
      *b6++ = U2e;
1705
0
      *b7++ = V2e;
1706
0
    }
1707
0
  }
1708
0
}
1709
1710
static inline pstatus_t
1711
general_RGBToAVC444YUV_ANY(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcFormat, UINT32 srcStep,
1712
                           BYTE* WINPR_RESTRICT pDst1[3], const UINT32 dst1Step[3],
1713
                           BYTE* WINPR_RESTRICT pDst2[3], const UINT32 dst2Step[3],
1714
                           const prim_size_t* WINPR_RESTRICT roi)
1715
0
{
1716
  /**
1717
   * Note: According to [MS-RDPEGFX 2.2.4.4 RFX_AVC420_BITMAP_STREAM] the
1718
   * width and height of the MPEG-4 AVC/H.264 codec bitstream MUST be aligned
1719
   * to a multiple of 16.
1720
   * Hence the passed destination YUV420/CHROMA420 buffers must have been
1721
   * allocated accordingly !!
1722
   */
1723
  /**
1724
   * [MS-RDPEGFX 3.3.8.3.2 YUV420p Stream Combination] defines the following "Bx areas":
1725
   *
1726
   * YUV420 frame (main view):
1727
   * B1:  From Y444 all pixels
1728
   * B2:  From U444 all pixels in even rows with even columns
1729
   * B3:  From V444 all pixels in even rows with even columns
1730
   *
1731
   * Chroma420 frame (auxiliary view):
1732
   * B45: From U444 and V444 all pixels from all odd rows
1733
   *      (The odd U444 and V444 rows must be interleaved in 8-line blocks in B45 !!!)
1734
   * B6:  From U444 all pixels in even rows with odd columns
1735
   * B7:  From V444 all pixels in even rows with odd columns
1736
   *
1737
   * Microsoft's horrible unclear description in MS-RDPEGFX translated to pseudo code looks like
1738
   * this:
1739
   *
1740
   * for (y = 0; y < fullHeight; y++)
1741
   * {
1742
   *     for (x = 0; x < fullWidth; x++)
1743
   *     {
1744
   *         B1[x,y] = Y444[x,y];
1745
   *     }
1746
   *  }
1747
   *
1748
   * for (y = 0; y < halfHeight; y++)
1749
   * {
1750
   *     for (x = 0; x < halfWidth; x++)
1751
   *     {
1752
   *         B2[x,y] = U444[2 * x,     2 * y];
1753
   *         B3[x,y] = V444[2 * x,     2 * y];
1754
   *         B6[x,y] = U444[2 * x + 1, 2 * y];
1755
   *         B7[x,y] = V444[2 * x + 1, 2 * y];
1756
   *     }
1757
   *  }
1758
   *
1759
   * for (y = 0; y < halfHeight; y++)
1760
   * {
1761
   *     yU  = (y / 8) * 16;   // identify first row of correct 8-line U block in B45
1762
   *     yU += (y % 8);        // add offset rows in destination block
1763
   *     yV  = yU + 8;         // the corresponding v line is always 8 rows ahead
1764
   *
1765
   *     for (x = 0; x < fullWidth; x++)
1766
   *     {
1767
   *         B45[x,yU] = U444[x, 2 * y + 1];
1768
   *         B45[x,yV] = V444[x, 2 * y + 1];
1769
   *     }
1770
   *  }
1771
   *
1772
   */
1773
0
  const BYTE* pMaxSrc = pSrc + 1ULL * (roi->height - 1) * srcStep;
1774
1775
0
  for (size_t y = 0; y < roi->height; y += 2)
1776
0
  {
1777
0
    WINPR_ASSERT(y < UINT32_MAX);
1778
1779
0
    const BOOL last = (y >= (roi->height - 1));
1780
0
    const BYTE* srcEven = y < roi->height ? pSrc + y * srcStep : pMaxSrc;
1781
0
    const BYTE* srcOdd = !last ? pSrc + (y + 1) * srcStep : pMaxSrc;
1782
0
    const UINT32 i = (UINT32)y >> 1;
1783
0
    const UINT32 n = (i & (uint32_t)~7) + i;
1784
0
    BYTE* b1Even = pDst1[0] + y * dst1Step[0];
1785
0
    BYTE* b1Odd = !last ? (b1Even + dst1Step[0]) : NULL;
1786
0
    BYTE* b2 = pDst1[1] + (y / 2) * dst1Step[1];
1787
0
    BYTE* b3 = pDst1[2] + (y / 2) * dst1Step[2];
1788
0
    BYTE* b4 = pDst2[0] + 1ULL * dst2Step[0] * n;
1789
0
    BYTE* b5 = b4 + 8ULL * dst2Step[0];
1790
0
    BYTE* b6 = pDst2[1] + (y / 2) * dst2Step[1];
1791
0
    BYTE* b7 = pDst2[2] + (y / 2) * dst2Step[2];
1792
0
    general_RGBToAVC444YUV_ANY_DOUBLE_ROW(srcEven, srcOdd, srcFormat, b1Even, b1Odd, b2, b3, b4,
1793
0
                                          b5, b6, b7, roi->width);
1794
0
  }
1795
1796
0
  return PRIMITIVES_SUCCESS;
1797
0
}
1798
1799
static inline pstatus_t general_RGBToAVC444YUV(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcFormat,
1800
                                               UINT32 srcStep, BYTE* WINPR_RESTRICT pDst1[3],
1801
                                               const UINT32 dst1Step[3],
1802
                                               BYTE* WINPR_RESTRICT pDst2[3],
1803
                                               const UINT32 dst2Step[3],
1804
                                               const prim_size_t* WINPR_RESTRICT roi)
1805
0
{
1806
0
  if (!pSrc || !pDst1 || !dst1Step || !pDst2 || !dst2Step)
1807
0
    return -1;
1808
1809
0
  if (!pDst1[0] || !pDst1[1] || !pDst1[2])
1810
0
    return -1;
1811
1812
0
  if (!dst1Step[0] || !dst1Step[1] || !dst1Step[2])
1813
0
    return -1;
1814
1815
0
  if (!pDst2[0] || !pDst2[1] || !pDst2[2])
1816
0
    return -1;
1817
1818
0
  if (!dst2Step[0] || !dst2Step[1] || !dst2Step[2])
1819
0
    return -1;
1820
1821
0
  switch (srcFormat)
1822
0
  {
1823
1824
0
    case PIXEL_FORMAT_BGRA32:
1825
0
    case PIXEL_FORMAT_BGRX32:
1826
0
      return general_RGBToAVC444YUV_BGRX(pSrc, srcStep, pDst1, dst1Step, pDst2, dst2Step,
1827
0
                                         roi);
1828
1829
0
    case PIXEL_FORMAT_RGBA32:
1830
0
    case PIXEL_FORMAT_RGBX32:
1831
0
      return general_RGBToAVC444YUV_RGBX(pSrc, srcStep, pDst1, dst1Step, pDst2, dst2Step,
1832
0
                                         roi);
1833
1834
0
    default:
1835
0
      return general_RGBToAVC444YUV_ANY(pSrc, srcFormat, srcStep, pDst1, dst1Step, pDst2,
1836
0
                                        dst2Step, roi);
1837
0
  }
1838
1839
0
  return !PRIMITIVES_SUCCESS;
1840
0
}
1841
1842
static inline void general_RGBToAVC444YUVv2_ANY_DOUBLE_ROW(
1843
    const BYTE* WINPR_RESTRICT srcEven, const BYTE* WINPR_RESTRICT srcOdd, UINT32 srcFormat,
1844
    BYTE* WINPR_RESTRICT yLumaDstEven, BYTE* WINPR_RESTRICT yLumaDstOdd,
1845
    BYTE* WINPR_RESTRICT uLumaDst, BYTE* WINPR_RESTRICT vLumaDst,
1846
    BYTE* WINPR_RESTRICT yEvenChromaDst1, BYTE* WINPR_RESTRICT yEvenChromaDst2,
1847
    BYTE* WINPR_RESTRICT yOddChromaDst1, BYTE* WINPR_RESTRICT yOddChromaDst2,
1848
    BYTE* WINPR_RESTRICT uChromaDst1, BYTE* WINPR_RESTRICT uChromaDst2,
1849
    BYTE* WINPR_RESTRICT vChromaDst1, BYTE* WINPR_RESTRICT vChromaDst2, UINT32 width)
1850
0
{
1851
0
  const UINT32 bpp = FreeRDPGetBytesPerPixel(srcFormat);
1852
1853
0
  WINPR_ASSERT((width % 2) == 0);
1854
0
  for (UINT32 x = 0; x < width; x += 2)
1855
0
  {
1856
0
    BYTE Ya = 0;
1857
0
    BYTE Ua = 0;
1858
0
    BYTE Va = 0;
1859
0
    BYTE Yb = 0;
1860
0
    BYTE Ub = 0;
1861
0
    BYTE Vb = 0;
1862
0
    BYTE Yc = 0;
1863
0
    BYTE Uc = 0;
1864
0
    BYTE Vc = 0;
1865
0
    BYTE Yd = 0;
1866
0
    BYTE Ud = 0;
1867
0
    BYTE Vd = 0;
1868
0
    {
1869
0
      BYTE b = 0;
1870
0
      BYTE g = 0;
1871
0
      BYTE r = 0;
1872
0
      const UINT32 color = FreeRDPReadColor(srcEven, srcFormat);
1873
0
      srcEven += bpp;
1874
0
      FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
1875
0
      Ya = RGB2Y(r, g, b);
1876
0
      Ua = RGB2U(r, g, b);
1877
0
      Va = RGB2V(r, g, b);
1878
0
    }
1879
1880
0
    if (x < width - 1)
1881
0
    {
1882
0
      BYTE b = 0;
1883
0
      BYTE g = 0;
1884
0
      BYTE r = 0;
1885
0
      const UINT32 color = FreeRDPReadColor(srcEven, srcFormat);
1886
0
      srcEven += bpp;
1887
0
      FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
1888
0
      Yb = RGB2Y(r, g, b);
1889
0
      Ub = RGB2U(r, g, b);
1890
0
      Vb = RGB2V(r, g, b);
1891
0
    }
1892
0
    else
1893
0
    {
1894
0
      Yb = Ya;
1895
0
      Ub = Ua;
1896
0
      Vb = Va;
1897
0
    }
1898
1899
0
    if (srcOdd)
1900
0
    {
1901
0
      BYTE b = 0;
1902
0
      BYTE g = 0;
1903
0
      BYTE r = 0;
1904
0
      const UINT32 color = FreeRDPReadColor(srcOdd, srcFormat);
1905
0
      srcOdd += bpp;
1906
0
      FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
1907
0
      Yc = RGB2Y(r, g, b);
1908
0
      Uc = RGB2U(r, g, b);
1909
0
      Vc = RGB2V(r, g, b);
1910
0
    }
1911
0
    else
1912
0
    {
1913
0
      Yc = Ya;
1914
0
      Uc = Ua;
1915
0
      Vc = Va;
1916
0
    }
1917
1918
0
    if (srcOdd && (x < width - 1))
1919
0
    {
1920
0
      BYTE b = 0;
1921
0
      BYTE g = 0;
1922
0
      BYTE r = 0;
1923
0
      const UINT32 color = FreeRDPReadColor(srcOdd, srcFormat);
1924
0
      srcOdd += bpp;
1925
0
      FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
1926
0
      Yd = RGB2Y(r, g, b);
1927
0
      Ud = RGB2U(r, g, b);
1928
0
      Vd = RGB2V(r, g, b);
1929
0
    }
1930
0
    else
1931
0
    {
1932
0
      Yd = Ya;
1933
0
      Ud = Ua;
1934
0
      Vd = Va;
1935
0
    }
1936
1937
    /* Y [b1] */
1938
0
    *yLumaDstEven++ = Ya;
1939
1940
0
    if (x < width - 1)
1941
0
      *yLumaDstEven++ = Yb;
1942
1943
0
    if (srcOdd)
1944
0
      *yLumaDstOdd++ = Yc;
1945
1946
0
    if (srcOdd && (x < width - 1))
1947
0
      *yLumaDstOdd++ = Yd;
1948
1949
    /* 2x 2y [b2,b3] */
1950
0
    *uLumaDst++ = (Ua + Ub + Uc + Ud) / 4;
1951
0
    *vLumaDst++ = (Va + Vb + Vc + Vd) / 4;
1952
1953
    /* 2x+1, y [b4,b5] even */
1954
0
    if (x < width - 1)
1955
0
    {
1956
0
      *yEvenChromaDst1++ = Ub;
1957
0
      *yEvenChromaDst2++ = Vb;
1958
0
    }
1959
1960
0
    if (srcOdd)
1961
0
    {
1962
      /* 2x+1, y [b4,b5] odd */
1963
0
      if (x < width - 1)
1964
0
      {
1965
0
        *yOddChromaDst1++ = Ud;
1966
0
        *yOddChromaDst2++ = Vd;
1967
0
      }
1968
1969
      /* 4x 2y+1 [b6, b7] */
1970
0
      if (x % 4 == 0)
1971
0
      {
1972
0
        *uChromaDst1++ = Uc;
1973
0
        *uChromaDst2++ = Vc;
1974
0
      }
1975
      /* 4x+2 2y+1 [b8, b9] */
1976
0
      else
1977
0
      {
1978
0
        *vChromaDst1++ = Uc;
1979
0
        *vChromaDst2++ = Vc;
1980
0
      }
1981
0
    }
1982
0
  }
1983
0
}
1984
1985
static inline pstatus_t
1986
general_RGBToAVC444YUVv2_ANY(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcFormat, UINT32 srcStep,
1987
                             BYTE* WINPR_RESTRICT pDst1[3], const UINT32 dst1Step[3],
1988
                             BYTE* WINPR_RESTRICT pDst2[3], const UINT32 dst2Step[3],
1989
                             const prim_size_t* WINPR_RESTRICT roi)
1990
0
{
1991
  /**
1992
   * Note: According to [MS-RDPEGFX 2.2.4.4 RFX_AVC420_BITMAP_STREAM] the
1993
   * width and height of the MPEG-4 AVC/H.264 codec bitstream MUST be aligned
1994
   * to a multiple of 16.
1995
   * Hence the passed destination YUV420/CHROMA420 buffers must have been
1996
   * allocated accordingly !!
1997
   */
1998
  /**
1999
   * [MS-RDPEGFX 3.3.8.3.3 YUV420p Stream Combination for YUV444v2 mode] defines the following "Bx
2000
   * areas":
2001
   *
2002
   * YUV420 frame (main view):
2003
   * B1:  From Y444 all pixels
2004
   * B2:  From U444 all pixels in even rows with even rows and columns
2005
   * B3:  From V444 all pixels in even rows with even rows and columns
2006
   *
2007
   * Chroma420 frame (auxiliary view):
2008
   * B45: From U444 and V444 all pixels from all odd columns
2009
   * B67: From U444 and V444 every 4th pixel in odd rows
2010
   * B89:  From U444 and V444 every 4th pixel (initial offset of 2) in odd rows
2011
   *
2012
   * Chroma Bxy areas correspond to the left and right half of the YUV420 plane.
2013
   * for (y = 0; y < fullHeight; y++)
2014
   * {
2015
   *     for (x = 0; x < fullWidth; x++)
2016
   *     {
2017
   *         B1[x,y] = Y444[x,y];
2018
   *     }
2019
   *
2020
   *     for (x = 0; x < halfWidth; x++)
2021
   *     {
2022
   *         B4[x,y] = U444[2 * x, 2 * y];
2023
   *         B5[x,y] = V444[2 * x, 2 * y];
2024
   *     }
2025
   *  }
2026
   *
2027
   * for (y = 0; y < halfHeight; y++)
2028
   * {
2029
   *     for (x = 0; x < halfWidth; x++)
2030
   *     {
2031
   *         B2[x,y] = U444[2 * x,     2 * y];
2032
   *         B3[x,y] = V444[2 * x,     2 * y];
2033
   *         B6[x,y] = U444[4 * x,     2 * y + 1];
2034
   *         B7[x,y] = V444[4 * x,     2 * y + 1];
2035
   *         B8[x,y] = V444[4 * x + 2, 2 * y + 1];
2036
   *         B9[x,y] = V444[4 * x + 2, 2 * y] + 1;
2037
   *     }
2038
   *  }
2039
   *
2040
   */
2041
0
  if (roi->height < 1 || roi->width < 1)
2042
0
    return !PRIMITIVES_SUCCESS;
2043
2044
0
  size_t y = 0;
2045
0
  for (; y < roi->height - roi->height % 2; y += 2)
2046
0
  {
2047
0
    const BYTE* srcEven = (pSrc + y * srcStep);
2048
0
    const BYTE* srcOdd = (y < roi->height - 1) ? (srcEven + srcStep) : NULL;
2049
0
    BYTE* dstLumaYEven = (pDst1[0] + y * dst1Step[0]);
2050
0
    BYTE* dstLumaYOdd = (dstLumaYEven + dst1Step[0]);
2051
0
    BYTE* dstLumaU = (pDst1[1] + (y / 2) * dst1Step[1]);
2052
0
    BYTE* dstLumaV = (pDst1[2] + (y / 2) * dst1Step[2]);
2053
0
    BYTE* dstEvenChromaY1 = (pDst2[0] + y * dst2Step[0]);
2054
0
    BYTE* dstEvenChromaY2 = dstEvenChromaY1 + roi->width / 2;
2055
0
    BYTE* dstOddChromaY1 = dstEvenChromaY1 + dst2Step[0];
2056
0
    BYTE* dstOddChromaY2 = dstEvenChromaY2 + dst2Step[0];
2057
0
    BYTE* dstChromaU1 = (pDst2[1] + (y / 2) * dst2Step[1]);
2058
0
    BYTE* dstChromaV1 = (pDst2[2] + (y / 2) * dst2Step[2]);
2059
0
    BYTE* dstChromaU2 = dstChromaU1 + roi->width / 4;
2060
0
    BYTE* dstChromaV2 = dstChromaV1 + roi->width / 4;
2061
0
    general_RGBToAVC444YUVv2_ANY_DOUBLE_ROW(
2062
0
        srcEven, srcOdd, srcFormat, dstLumaYEven, dstLumaYOdd, dstLumaU, dstLumaV,
2063
0
        dstEvenChromaY1, dstEvenChromaY2, dstOddChromaY1, dstOddChromaY2, dstChromaU1,
2064
0
        dstChromaU2, dstChromaV1, dstChromaV2, roi->width);
2065
0
  }
2066
0
  for (; y < roi->height; y++)
2067
0
  {
2068
0
    const BYTE* srcEven = (pSrc + y * srcStep);
2069
0
    BYTE* dstLumaYEven = (pDst1[0] + y * dst1Step[0]);
2070
0
    BYTE* dstLumaU = (pDst1[1] + (y / 2) * dst1Step[1]);
2071
0
    BYTE* dstLumaV = (pDst1[2] + (y / 2) * dst1Step[2]);
2072
0
    BYTE* dstEvenChromaY1 = (pDst2[0] + y * dst2Step[0]);
2073
0
    BYTE* dstEvenChromaY2 = dstEvenChromaY1 + roi->width / 2;
2074
0
    general_RGBToAVC444YUVv2_ANY_DOUBLE_ROW(
2075
0
        srcEven, NULL, srcFormat, dstLumaYEven, NULL, dstLumaU, dstLumaV, dstEvenChromaY1,
2076
0
        dstEvenChromaY2, NULL, NULL, NULL, NULL, NULL, NULL, roi->width);
2077
0
  }
2078
2079
0
  return PRIMITIVES_SUCCESS;
2080
0
}
2081
2082
static inline void int_general_RGBToAVC444YUVv2_BGRX_DOUBLE_ROW(
2083
    size_t offset, const BYTE* WINPR_RESTRICT pSrcEven, const BYTE* WINPR_RESTRICT pSrcOdd,
2084
    BYTE* WINPR_RESTRICT yLumaDstEven, BYTE* WINPR_RESTRICT yLumaDstOdd,
2085
    BYTE* WINPR_RESTRICT uLumaDst, BYTE* WINPR_RESTRICT vLumaDst,
2086
    BYTE* WINPR_RESTRICT yEvenChromaDst1, BYTE* WINPR_RESTRICT yEvenChromaDst2,
2087
    BYTE* WINPR_RESTRICT yOddChromaDst1, BYTE* WINPR_RESTRICT yOddChromaDst2,
2088
    BYTE* WINPR_RESTRICT uChromaDst1, BYTE* WINPR_RESTRICT uChromaDst2,
2089
    BYTE* WINPR_RESTRICT vChromaDst1, BYTE* WINPR_RESTRICT vChromaDst2, UINT32 width)
2090
0
{
2091
0
  WINPR_ASSERT((width % 2) == 0);
2092
0
  WINPR_ASSERT(pSrcEven);
2093
0
  WINPR_ASSERT(yLumaDstEven);
2094
0
  WINPR_ASSERT(uLumaDst);
2095
0
  WINPR_ASSERT(vLumaDst);
2096
2097
0
  for (size_t x = offset; x < width; x += 2)
2098
0
  {
2099
0
    const BYTE* srcEven = &pSrcEven[4ULL * x];
2100
0
    const BYTE* srcOdd = pSrcOdd ? &pSrcOdd[4ULL * x] : NULL;
2101
0
    BYTE Ya = 0;
2102
0
    BYTE Ua = 0;
2103
0
    BYTE Va = 0;
2104
0
    BYTE Yb = 0;
2105
0
    BYTE Ub = 0;
2106
0
    BYTE Vb = 0;
2107
0
    BYTE Yc = 0;
2108
0
    BYTE Uc = 0;
2109
0
    BYTE Vc = 0;
2110
0
    BYTE Yd = 0;
2111
0
    BYTE Ud = 0;
2112
0
    BYTE Vd = 0;
2113
0
    {
2114
0
      const BYTE b = *srcEven++;
2115
0
      const BYTE g = *srcEven++;
2116
0
      const BYTE r = *srcEven++;
2117
0
      srcEven++;
2118
0
      Ya = RGB2Y(r, g, b);
2119
0
      Ua = RGB2U(r, g, b);
2120
0
      Va = RGB2V(r, g, b);
2121
0
    }
2122
2123
0
    if (x < width - 1)
2124
0
    {
2125
0
      const BYTE b = *srcEven++;
2126
0
      const BYTE g = *srcEven++;
2127
0
      const BYTE r = *srcEven++;
2128
0
      srcEven++;
2129
0
      Yb = RGB2Y(r, g, b);
2130
0
      Ub = RGB2U(r, g, b);
2131
0
      Vb = RGB2V(r, g, b);
2132
0
    }
2133
0
    else
2134
0
    {
2135
0
      Yb = Ya;
2136
0
      Ub = Ua;
2137
0
      Vb = Va;
2138
0
    }
2139
2140
0
    if (srcOdd)
2141
0
    {
2142
0
      const BYTE b = *srcOdd++;
2143
0
      const BYTE g = *srcOdd++;
2144
0
      const BYTE r = *srcOdd++;
2145
0
      srcOdd++;
2146
0
      Yc = RGB2Y(r, g, b);
2147
0
      Uc = RGB2U(r, g, b);
2148
0
      Vc = RGB2V(r, g, b);
2149
0
    }
2150
0
    else
2151
0
    {
2152
0
      Yc = Ya;
2153
0
      Uc = Ua;
2154
0
      Vc = Va;
2155
0
    }
2156
2157
0
    if (srcOdd && (x < width - 1))
2158
0
    {
2159
0
      const BYTE b = *srcOdd++;
2160
0
      const BYTE g = *srcOdd++;
2161
0
      const BYTE r = *srcOdd++;
2162
0
      srcOdd++;
2163
0
      Yd = RGB2Y(r, g, b);
2164
0
      Ud = RGB2U(r, g, b);
2165
0
      Vd = RGB2V(r, g, b);
2166
0
    }
2167
0
    else
2168
0
    {
2169
0
      Yd = Ya;
2170
0
      Ud = Ua;
2171
0
      Vd = Va;
2172
0
    }
2173
2174
    /* Y [b1] */
2175
0
    *yLumaDstEven++ = Ya;
2176
2177
0
    if (x < width - 1)
2178
0
      *yLumaDstEven++ = Yb;
2179
2180
0
    if (srcOdd && yLumaDstOdd)
2181
0
      *yLumaDstOdd++ = Yc;
2182
2183
0
    if (srcOdd && (x < width - 1) && yLumaDstOdd)
2184
0
      *yLumaDstOdd++ = Yd;
2185
2186
    /* 2x 2y [b2,b3] */
2187
0
    *uLumaDst++ = (Ua + Ub + Uc + Ud) / 4;
2188
0
    *vLumaDst++ = (Va + Vb + Vc + Vd) / 4;
2189
2190
    /* 2x+1, y [b4,b5] even */
2191
0
    if (x < width - 1)
2192
0
    {
2193
0
      *yEvenChromaDst1++ = Ub;
2194
0
      *yEvenChromaDst2++ = Vb;
2195
0
    }
2196
2197
0
    if (srcOdd)
2198
0
    {
2199
      /* 2x+1, y [b4,b5] odd */
2200
0
      if (x < width - 1)
2201
0
      {
2202
0
        *yOddChromaDst1++ = Ud;
2203
0
        *yOddChromaDst2++ = Vd;
2204
0
      }
2205
2206
      /* 4x 2y+1 [b6, b7] */
2207
0
      if (x % 4 == 0)
2208
0
      {
2209
0
        *uChromaDst1++ = Uc;
2210
0
        *uChromaDst2++ = Vc;
2211
0
      }
2212
      /* 4x+2 2y+1 [b8, b9] */
2213
0
      else
2214
0
      {
2215
0
        *vChromaDst1++ = Uc;
2216
0
        *vChromaDst2++ = Vc;
2217
0
      }
2218
0
    }
2219
0
  }
2220
0
}
2221
2222
void general_RGBToAVC444YUVv2_BGRX_DOUBLE_ROW(
2223
    size_t offset, const BYTE* WINPR_RESTRICT pSrcEven, const BYTE* WINPR_RESTRICT pSrcOdd,
2224
    BYTE* WINPR_RESTRICT yLumaDstEven, BYTE* WINPR_RESTRICT yLumaDstOdd,
2225
    BYTE* WINPR_RESTRICT uLumaDst, BYTE* WINPR_RESTRICT vLumaDst,
2226
    BYTE* WINPR_RESTRICT yEvenChromaDst1, BYTE* WINPR_RESTRICT yEvenChromaDst2,
2227
    BYTE* WINPR_RESTRICT yOddChromaDst1, BYTE* WINPR_RESTRICT yOddChromaDst2,
2228
    BYTE* WINPR_RESTRICT uChromaDst1, BYTE* WINPR_RESTRICT uChromaDst2,
2229
    BYTE* WINPR_RESTRICT vChromaDst1, BYTE* WINPR_RESTRICT vChromaDst2, UINT32 width)
2230
0
{
2231
0
  int_general_RGBToAVC444YUVv2_BGRX_DOUBLE_ROW(
2232
0
      offset, pSrcEven, pSrcOdd, yLumaDstEven, yLumaDstOdd, uLumaDst, vLumaDst, yEvenChromaDst1,
2233
0
      yEvenChromaDst2, yOddChromaDst1, yOddChromaDst2, uChromaDst1, uChromaDst2, vChromaDst1,
2234
0
      vChromaDst2, width);
2235
0
}
2236
2237
static inline pstatus_t general_RGBToAVC444YUVv2_BGRX(const BYTE* WINPR_RESTRICT pSrc,
2238
                                                      UINT32 srcStep, BYTE* WINPR_RESTRICT pDst1[3],
2239
                                                      const UINT32 dst1Step[3],
2240
                                                      BYTE* WINPR_RESTRICT pDst2[3],
2241
                                                      const UINT32 dst2Step[3],
2242
                                                      const prim_size_t* WINPR_RESTRICT roi)
2243
0
{
2244
0
  if (roi->height < 1 || roi->width < 1)
2245
0
    return !PRIMITIVES_SUCCESS;
2246
2247
0
  size_t y = 0;
2248
0
  for (; y < roi->height - roi->height % 2; y += 2)
2249
0
  {
2250
0
    const BYTE* srcEven = (pSrc + y * srcStep);
2251
0
    const BYTE* srcOdd = (srcEven + srcStep);
2252
0
    BYTE* dstLumaYEven = (pDst1[0] + y * dst1Step[0]);
2253
0
    BYTE* dstLumaYOdd = (dstLumaYEven + dst1Step[0]);
2254
0
    BYTE* dstLumaU = (pDst1[1] + (y / 2) * dst1Step[1]);
2255
0
    BYTE* dstLumaV = (pDst1[2] + (y / 2) * dst1Step[2]);
2256
0
    BYTE* dstEvenChromaY1 = (pDst2[0] + y * dst2Step[0]);
2257
0
    BYTE* dstEvenChromaY2 = dstEvenChromaY1 + roi->width / 2;
2258
0
    BYTE* dstOddChromaY1 = dstEvenChromaY1 + dst2Step[0];
2259
0
    BYTE* dstOddChromaY2 = dstEvenChromaY2 + dst2Step[0];
2260
0
    BYTE* dstChromaU1 = (pDst2[1] + (y / 2) * dst2Step[1]);
2261
0
    BYTE* dstChromaV1 = (pDst2[2] + (y / 2) * dst2Step[2]);
2262
0
    BYTE* dstChromaU2 = dstChromaU1 + roi->width / 4;
2263
0
    BYTE* dstChromaV2 = dstChromaV1 + roi->width / 4;
2264
0
    int_general_RGBToAVC444YUVv2_BGRX_DOUBLE_ROW(
2265
0
        0, srcEven, srcOdd, dstLumaYEven, dstLumaYOdd, dstLumaU, dstLumaV, dstEvenChromaY1,
2266
0
        dstEvenChromaY2, dstOddChromaY1, dstOddChromaY2, dstChromaU1, dstChromaU2, dstChromaV1,
2267
0
        dstChromaV2, roi->width);
2268
0
  }
2269
0
  for (; y < roi->height; y++)
2270
0
  {
2271
0
    const BYTE* srcEven = (pSrc + y * srcStep);
2272
0
    BYTE* dstLumaYEven = (pDst1[0] + y * dst1Step[0]);
2273
0
    BYTE* dstLumaU = (pDst1[1] + (y / 2) * dst1Step[1]);
2274
0
    BYTE* dstLumaV = (pDst1[2] + (y / 2) * dst1Step[2]);
2275
0
    BYTE* dstEvenChromaY1 = (pDst2[0] + y * dst2Step[0]);
2276
0
    BYTE* dstEvenChromaY2 = dstEvenChromaY1 + roi->width / 2;
2277
0
    int_general_RGBToAVC444YUVv2_BGRX_DOUBLE_ROW(
2278
0
        0, srcEven, NULL, dstLumaYEven, NULL, dstLumaU, dstLumaV, dstEvenChromaY1,
2279
0
        dstEvenChromaY2, NULL, NULL, NULL, NULL, NULL, NULL, roi->width);
2280
0
  }
2281
2282
0
  return PRIMITIVES_SUCCESS;
2283
0
}
2284
2285
static pstatus_t general_RGBToAVC444YUVv2(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcFormat,
2286
                                          UINT32 srcStep, BYTE* WINPR_RESTRICT pDst1[3],
2287
                                          const UINT32 dst1Step[3], BYTE* WINPR_RESTRICT pDst2[3],
2288
                                          const UINT32 dst2Step[3],
2289
                                          const prim_size_t* WINPR_RESTRICT roi)
2290
0
{
2291
0
  switch (srcFormat)
2292
0
  {
2293
0
    case PIXEL_FORMAT_BGRA32:
2294
0
    case PIXEL_FORMAT_BGRX32:
2295
0
      return general_RGBToAVC444YUVv2_BGRX(pSrc, srcStep, pDst1, dst1Step, pDst2, dst2Step,
2296
0
                                           roi);
2297
2298
0
    default:
2299
0
      return general_RGBToAVC444YUVv2_ANY(pSrc, srcFormat, srcStep, pDst1, dst1Step, pDst2,
2300
0
                                          dst2Step, roi);
2301
0
  }
2302
2303
0
  return !PRIMITIVES_SUCCESS;
2304
0
}
2305
2306
void primitives_init_YUV(primitives_t* WINPR_RESTRICT prims)
2307
0
{
2308
0
  prims->YUV420ToRGB_8u_P3AC4R = general_YUV420ToRGB_8u_P3AC4R;
2309
0
  prims->YUV444ToRGB_8u_P3AC4R = general_YUV444ToRGB_8u_P3AC4R;
2310
0
  prims->RGBToYUV420_8u_P3AC4R = general_RGBToYUV420_8u_P3AC4R;
2311
0
  prims->RGBToYUV444_8u_P3AC4R = general_RGBToYUV444_8u_P3AC4R;
2312
0
  prims->YUV420CombineToYUV444 = general_YUV420CombineToYUV444;
2313
0
  prims->YUV444SplitToYUV420 = general_YUV444SplitToYUV420;
2314
0
  prims->RGBToAVC444YUV = general_RGBToAVC444YUV;
2315
0
  prims->RGBToAVC444YUVv2 = general_RGBToAVC444YUVv2;
2316
0
}
2317
2318
void primitives_init_YUV_opt(primitives_t* WINPR_RESTRICT prims)
2319
0
{
2320
0
  primitives_init_YUV(prims);
2321
0
  primitives_init_YUV_sse41(prims);
2322
0
  primitives_init_YUV_neon(prims);
2323
0
}