Coverage Report

Created: 2023-06-07 07:34

/src/libhevc/decoder/ihevcd_fmt_conv.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
*
3
* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4
*
5
* Licensed under the Apache License, Version 2.0 (the "License");
6
* you may not use this file except in compliance with the License.
7
* You may obtain a copy of the License at:
8
*
9
* http://www.apache.org/licenses/LICENSE-2.0
10
*
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
16
*
17
******************************************************************************/
18
/**
19
*******************************************************************************
20
* @file
21
*  ihevcd_fmt_conv.c
22
*
23
* @brief
24
*  Contains functions for format conversion or frame copy of output buffer
25
*
26
* @author
27
*  Harish
28
*
29
* @par List of Functions:
30
*
31
* @remarks
32
*  None
33
*
34
*******************************************************************************
35
*/
36
/*****************************************************************************/
37
/* File Includes                                                             */
38
/*****************************************************************************/
39
#include <stdio.h>
40
#include <stddef.h>
41
#include <stdlib.h>
42
#include <string.h>
43
#include <assert.h>
44
45
#include "ihevc_typedefs.h"
46
#include "iv.h"
47
#include "ivd.h"
48
#include "ihevcd_cxa.h"
49
#include "ithread.h"
50
51
#include "ihevc_defs.h"
52
#include "ihevc_debug.h"
53
#include "ihevc_structs.h"
54
#include "ihevc_macros.h"
55
#include "ihevc_platform_macros.h"
56
#include "ihevc_cabac_tables.h"
57
#include "ihevc_disp_mgr.h"
58
59
#include "ihevcd_defs.h"
60
#include "ihevcd_function_selector.h"
61
#include "ihevcd_structs.h"
62
#include "ihevcd_error.h"
63
#include "ihevcd_nal.h"
64
#include "ihevcd_bitstream.h"
65
#include "ihevcd_fmt_conv.h"
66
#include "ihevcd_profile.h"
67
68
/* SIMD variants of format conversion modules do not support width less than 32 */
69
52.0k
#define MIN_FMT_CONV_SIMD_WIDTH 32
70
/**
71
*******************************************************************************
72
*
73
* @brief Function used from copying a 420SP buffer
74
*
75
* @par   Description
76
* Function used from copying a 420SP buffer
77
*
78
* @param[in] pu1_y_src
79
*   Input Y pointer
80
*
81
* @param[in] pu1_uv_src
82
*   Input UV pointer (UV is interleaved either in UV or VU format)
83
*
84
* @param[in] pu1_y_dst
85
*   Output Y pointer
86
*
87
* @param[in] pu1_uv_dst
88
*   Output UV pointer (UV is interleaved in the same format as that of input)
89
*
90
* @param[in] wd
91
*   Width
92
*
93
* @param[in] ht
94
*   Height
95
*
96
* @param[in] src_y_strd
97
*   Input Y Stride
98
*
99
* @param[in] src_uv_strd
100
*   Input UV stride
101
*
102
* @param[in] dst_y_strd
103
*   Output Y stride
104
*
105
* @param[in] dst_uv_strd
106
*   Output UV stride
107
*
108
* @returns None
109
*
110
* @remarks In case there is a need to perform partial frame copy then
111
* by passion appropriate source and destination pointers and appropriate
112
* values for wd and ht it can be done
113
*
114
*******************************************************************************
115
*/
116
void ihevcd_fmt_conv_420sp_to_rgb565(UWORD8 *pu1_y_src,
117
                                     UWORD8 *pu1_uv_src,
118
                                     UWORD16 *pu2_rgb_dst,
119
                                     WORD32 wd,
120
                                     WORD32 ht,
121
                                     WORD32 src_y_strd,
122
                                     WORD32 src_uv_strd,
123
                                     WORD32 dst_strd,
124
                                     WORD32 is_u_first)
125
1.13k
{
126
127
128
1.13k
    WORD16  i2_r, i2_g, i2_b;
129
1.13k
    UWORD32  u4_r, u4_g, u4_b;
130
1.13k
    WORD16  i2_i, i2_j;
131
1.13k
    UWORD8  *pu1_y_src_nxt;
132
1.13k
    UWORD16 *pu2_rgb_dst_NextRow;
133
134
1.13k
    UWORD8 *pu1_u_src, *pu1_v_src;
135
136
1.13k
    if(is_u_first)
137
1.13k
    {
138
1.13k
        pu1_u_src = (UWORD8 *)pu1_uv_src;
139
1.13k
        pu1_v_src = (UWORD8 *)pu1_uv_src + 1;
140
1.13k
    }
141
0
    else
142
0
    {
143
0
        pu1_u_src = (UWORD8 *)pu1_uv_src + 1;
144
0
        pu1_v_src = (UWORD8 *)pu1_uv_src;
145
0
    }
146
147
1.13k
    pu1_y_src_nxt   = pu1_y_src + src_y_strd;
148
1.13k
    pu2_rgb_dst_NextRow = pu2_rgb_dst + dst_strd;
149
150
46.2k
    for(i2_i = 0; i2_i < (ht >> 1); i2_i++)
151
45.1k
    {
152
14.3M
        for(i2_j = (wd >> 1); i2_j > 0; i2_j--)
153
14.3M
        {
154
14.3M
            i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13);
155
14.3M
            i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3) >> 13;
156
14.3M
            i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13;
157
158
14.3M
            pu1_u_src += 2;
159
14.3M
            pu1_v_src += 2;
160
            /* pixel 0 */
161
            /* B */
162
14.3M
            u4_b = CLIP_U8(*pu1_y_src + i2_b);
163
14.3M
            u4_b >>= 3;
164
            /* G */
165
14.3M
            u4_g = CLIP_U8(*pu1_y_src + i2_g);
166
14.3M
            u4_g >>= 2;
167
            /* R */
168
14.3M
            u4_r = CLIP_U8(*pu1_y_src + i2_r);
169
14.3M
            u4_r >>= 3;
170
171
14.3M
            pu1_y_src++;
172
14.3M
            *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
173
174
            /* pixel 1 */
175
            /* B */
176
14.3M
            u4_b = CLIP_U8(*pu1_y_src + i2_b);
177
14.3M
            u4_b >>= 3;
178
            /* G */
179
14.3M
            u4_g = CLIP_U8(*pu1_y_src + i2_g);
180
14.3M
            u4_g >>= 2;
181
            /* R */
182
14.3M
            u4_r = CLIP_U8(*pu1_y_src + i2_r);
183
14.3M
            u4_r >>= 3;
184
185
14.3M
            pu1_y_src++;
186
14.3M
            *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
187
188
            /* pixel 2 */
189
            /* B */
190
14.3M
            u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
191
14.3M
            u4_b >>= 3;
192
            /* G */
193
14.3M
            u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
194
14.3M
            u4_g >>= 2;
195
            /* R */
196
14.3M
            u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
197
14.3M
            u4_r >>= 3;
198
199
14.3M
            pu1_y_src_nxt++;
200
14.3M
            *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
201
202
            /* pixel 3 */
203
            /* B */
204
14.3M
            u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
205
14.3M
            u4_b >>= 3;
206
            /* G */
207
14.3M
            u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
208
14.3M
            u4_g >>= 2;
209
            /* R */
210
14.3M
            u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
211
14.3M
            u4_r >>= 3;
212
213
14.3M
            pu1_y_src_nxt++;
214
14.3M
            *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
215
216
14.3M
        }
217
218
45.1k
        pu1_u_src = pu1_u_src + src_uv_strd - wd;
219
45.1k
        pu1_v_src = pu1_v_src + src_uv_strd - wd;
220
221
45.1k
        pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd;
222
45.1k
        pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd;
223
224
45.1k
        pu2_rgb_dst = pu2_rgb_dst_NextRow - wd + dst_strd;
225
45.1k
        pu2_rgb_dst_NextRow = pu2_rgb_dst_NextRow + (dst_strd << 1) - wd;
226
45.1k
    }
227
228
229
1.13k
}
230
231
void ihevcd_fmt_conv_420sp_to_rgba8888(UWORD8 *pu1_y_src,
232
                                       UWORD8 *pu1_uv_src,
233
                                       UWORD32 *pu4_rgba_dst,
234
                                       WORD32 wd,
235
                                       WORD32 ht,
236
                                       WORD32 src_y_strd,
237
                                       WORD32 src_uv_strd,
238
                                       WORD32 dst_strd,
239
                                       WORD32 is_u_first)
240
0
{
241
242
243
0
    WORD16  i2_r, i2_g, i2_b;
244
0
    UWORD32  u4_r, u4_g, u4_b;
245
0
    WORD16  i2_i, i2_j;
246
0
    UWORD8  *pu1_y_src_nxt;
247
0
    UWORD32 *pu4_rgba_dst_NextRow;
248
249
0
    UWORD8 *pu1_u_src, *pu1_v_src;
250
251
0
    if(is_u_first)
252
0
    {
253
0
        pu1_u_src = (UWORD8 *)pu1_uv_src;
254
0
        pu1_v_src = (UWORD8 *)pu1_uv_src + 1;
255
0
    }
256
0
    else
257
0
    {
258
0
        pu1_u_src = (UWORD8 *)pu1_uv_src + 1;
259
0
        pu1_v_src = (UWORD8 *)pu1_uv_src;
260
0
    }
261
262
0
    pu1_y_src_nxt   = pu1_y_src + src_y_strd;
263
0
    pu4_rgba_dst_NextRow = pu4_rgba_dst + dst_strd;
264
265
0
    for(i2_i = 0; i2_i < (ht >> 1); i2_i++)
266
0
    {
267
0
        for(i2_j = (wd >> 1); i2_j > 0; i2_j--)
268
0
        {
269
0
            i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13);
270
0
            i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3) >> 13;
271
0
            i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13;
272
273
0
            pu1_u_src += 2;
274
0
            pu1_v_src += 2;
275
            /* pixel 0 */
276
            /* B */
277
0
            u4_b = CLIP_U8(*pu1_y_src + i2_b);
278
            /* G */
279
0
            u4_g = CLIP_U8(*pu1_y_src + i2_g);
280
            /* R */
281
0
            u4_r = CLIP_U8(*pu1_y_src + i2_r);
282
283
0
            pu1_y_src++;
284
0
            *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
285
286
            /* pixel 1 */
287
            /* B */
288
0
            u4_b = CLIP_U8(*pu1_y_src + i2_b);
289
            /* G */
290
0
            u4_g = CLIP_U8(*pu1_y_src + i2_g);
291
            /* R */
292
0
            u4_r = CLIP_U8(*pu1_y_src + i2_r);
293
294
0
            pu1_y_src++;
295
0
            *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
296
297
            /* pixel 2 */
298
            /* B */
299
0
            u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
300
            /* G */
301
0
            u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
302
            /* R */
303
0
            u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
304
305
0
            pu1_y_src_nxt++;
306
0
            *pu4_rgba_dst_NextRow++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
307
308
            /* pixel 3 */
309
            /* B */
310
0
            u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
311
            /* G */
312
0
            u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
313
            /* R */
314
0
            u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
315
316
0
            pu1_y_src_nxt++;
317
0
            *pu4_rgba_dst_NextRow++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
318
319
0
        }
320
321
0
        pu1_u_src = pu1_u_src + src_uv_strd - wd;
322
0
        pu1_v_src = pu1_v_src + src_uv_strd - wd;
323
324
0
        pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd;
325
0
        pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd;
326
327
0
        pu4_rgba_dst = pu4_rgba_dst_NextRow - wd + dst_strd;
328
0
        pu4_rgba_dst_NextRow = pu4_rgba_dst_NextRow + (dst_strd << 1) - wd;
329
0
    }
330
331
332
0
}
333
334
/**
335
*******************************************************************************
336
*
337
* @brief Function used from copying a 420SP buffer
338
*
339
* @par   Description
340
* Function used from copying a 420SP buffer
341
*
342
* @param[in] pu1_y_src
343
*   Input Y pointer
344
*
345
* @param[in] pu1_uv_src
346
*   Input UV pointer (UV is interleaved either in UV or VU format)
347
*
348
* @param[in] pu1_y_dst
349
*   Output Y pointer
350
*
351
* @param[in] pu1_uv_dst
352
*   Output UV pointer (UV is interleaved in the same format as that of input)
353
*
354
* @param[in] wd
355
*   Width
356
*
357
* @param[in] ht
358
*   Height
359
*
360
* @param[in] src_y_strd
361
*   Input Y Stride
362
*
363
* @param[in] src_uv_strd
364
*   Input UV stride
365
*
366
* @param[in] dst_y_strd
367
*   Output Y stride
368
*
369
* @param[in] dst_uv_strd
370
*   Output UV stride
371
*
372
* @returns None
373
*
374
* @remarks In case there is a need to perform partial frame copy then
375
* by passion appropriate source and destination pointers and appropriate
376
* values for wd and ht it can be done
377
*
378
*******************************************************************************
379
*/
380
381
void ihevcd_fmt_conv_420sp_to_420sp(UWORD8 *pu1_y_src,
382
                                    UWORD8 *pu1_uv_src,
383
                                    UWORD8 *pu1_y_dst,
384
                                    UWORD8 *pu1_uv_dst,
385
                                    WORD32 wd,
386
                                    WORD32 ht,
387
                                    WORD32 src_y_strd,
388
                                    WORD32 src_uv_strd,
389
                                    WORD32 dst_y_strd,
390
                                    WORD32 dst_uv_strd)
391
40.4k
{
392
40.4k
    UWORD8 *pu1_src, *pu1_dst;
393
40.4k
    WORD32 num_rows, num_cols, src_strd, dst_strd;
394
40.4k
    WORD32 i;
395
396
    /* copy luma */
397
40.4k
    pu1_src = (UWORD8 *)pu1_y_src;
398
40.4k
    pu1_dst = (UWORD8 *)pu1_y_dst;
399
400
40.4k
    num_rows = ht;
401
40.4k
    num_cols = wd;
402
403
40.4k
    src_strd = src_y_strd;
404
40.4k
    dst_strd = dst_y_strd;
405
406
1.46M
    for(i = 0; i < num_rows; i++)
407
1.42M
    {
408
1.42M
        memcpy(pu1_dst, pu1_src, num_cols);
409
1.42M
        pu1_dst += dst_strd;
410
1.42M
        pu1_src += src_strd;
411
1.42M
    }
412
413
    /* copy U and V */
414
40.4k
    pu1_src = (UWORD8 *)pu1_uv_src;
415
40.4k
    pu1_dst = (UWORD8 *)pu1_uv_dst;
416
417
40.4k
    num_rows = ht >> 1;
418
40.4k
    num_cols = wd;
419
420
40.4k
    src_strd = src_uv_strd;
421
40.4k
    dst_strd = dst_uv_strd;
422
423
818k
    for(i = 0; i < num_rows; i++)
424
778k
    {
425
778k
        memcpy(pu1_dst, pu1_src, num_cols);
426
778k
        pu1_dst += dst_strd;
427
778k
        pu1_src += src_strd;
428
778k
    }
429
40.4k
    return;
430
40.4k
}
431
432
433
434
/**
435
*******************************************************************************
436
*
437
* @brief Function used from copying a 420SP buffer
438
*
439
* @par   Description
440
* Function used from copying a 420SP buffer
441
*
442
* @param[in] pu1_y_src
443
*   Input Y pointer
444
*
445
* @param[in] pu1_uv_src
446
*   Input UV pointer (UV is interleaved either in UV or VU format)
447
*
448
* @param[in] pu1_y_dst
449
*   Output Y pointer
450
*
451
* @param[in] pu1_uv_dst
452
*   Output UV pointer (UV is interleaved in the same format as that of input)
453
*
454
* @param[in] wd
455
*   Width
456
*
457
* @param[in] ht
458
*   Height
459
*
460
* @param[in] src_y_strd
461
*   Input Y Stride
462
*
463
* @param[in] src_uv_strd
464
*   Input UV stride
465
*
466
* @param[in] dst_y_strd
467
*   Output Y stride
468
*
469
* @param[in] dst_uv_strd
470
*   Output UV stride
471
*
472
* @returns None
473
*
474
* @remarks In case there is a need to perform partial frame copy then
475
* by passion appropriate source and destination pointers and appropriate
476
* values for wd and ht it can be done
477
*
478
*******************************************************************************
479
*/
480
void ihevcd_fmt_conv_420sp_to_420sp_swap_uv(UWORD8 *pu1_y_src,
481
                                            UWORD8 *pu1_uv_src,
482
                                            UWORD8 *pu1_y_dst,
483
                                            UWORD8 *pu1_uv_dst,
484
                                            WORD32 wd,
485
                                            WORD32 ht,
486
                                            WORD32 src_y_strd,
487
                                            WORD32 src_uv_strd,
488
                                            WORD32 dst_y_strd,
489
                                            WORD32 dst_uv_strd)
490
0
{
491
0
    UWORD8 *pu1_src, *pu1_dst;
492
0
    WORD32 num_rows, num_cols, src_strd, dst_strd;
493
0
    WORD32 i;
494
495
    /* copy luma */
496
0
    pu1_src = (UWORD8 *)pu1_y_src;
497
0
    pu1_dst = (UWORD8 *)pu1_y_dst;
498
499
0
    num_rows = ht;
500
0
    num_cols = wd;
501
502
0
    src_strd = src_y_strd;
503
0
    dst_strd = dst_y_strd;
504
505
0
    for(i = 0; i < num_rows; i++)
506
0
    {
507
0
        memcpy(pu1_dst, pu1_src, num_cols);
508
0
        pu1_dst += dst_strd;
509
0
        pu1_src += src_strd;
510
0
    }
511
512
    /* copy U and V */
513
0
    pu1_src = (UWORD8 *)pu1_uv_src;
514
0
    pu1_dst = (UWORD8 *)pu1_uv_dst;
515
516
0
    num_rows = ht >> 1;
517
0
    num_cols = wd;
518
519
0
    src_strd = src_uv_strd;
520
0
    dst_strd = dst_uv_strd;
521
522
0
    for(i = 0; i < num_rows; i++)
523
0
    {
524
0
        WORD32 j;
525
0
        for(j = 0; j < num_cols; j += 2)
526
0
        {
527
0
            pu1_dst[j + 0] = pu1_src[j + 1];
528
0
            pu1_dst[j + 1] = pu1_src[j + 0];
529
0
        }
530
0
        pu1_dst += dst_strd;
531
0
        pu1_src += src_strd;
532
0
    }
533
0
    return;
534
0
}
535
/**
536
*******************************************************************************
537
*
538
* @brief Function used from copying a 420SP buffer
539
*
540
* @par   Description
541
* Function used from copying a 420SP buffer
542
*
543
* @param[in] pu1_y_src
544
*   Input Y pointer
545
*
546
* @param[in] pu1_uv_src
547
*   Input UV pointer (UV is interleaved either in UV or VU format)
548
*
549
* @param[in] pu1_y_dst
550
*   Output Y pointer
551
*
552
* @param[in] pu1_u_dst
553
*   Output U pointer
554
*
555
* @param[in] pu1_v_dst
556
*   Output V pointer
557
*
558
* @param[in] wd
559
*   Width
560
*
561
* @param[in] ht
562
*   Height
563
*
564
* @param[in] src_y_strd
565
*   Input Y Stride
566
*
567
* @param[in] src_uv_strd
568
*   Input UV stride
569
*
570
* @param[in] dst_y_strd
571
*   Output Y stride
572
*
573
* @param[in] dst_uv_strd
574
*   Output UV stride
575
*
576
* @param[in] is_u_first
577
*   Flag to indicate if U is the first byte in input chroma part
578
*
579
* @returns none
580
*
581
* @remarks In case there is a need to perform partial frame copy then
582
* by passion appropriate source and destination pointers and appropriate
583
* values for wd and ht it can be done
584
*
585
*******************************************************************************
586
*/
587
588
589
void ihevcd_fmt_conv_420sp_to_420p(UWORD8 *pu1_y_src,
590
                                   UWORD8 *pu1_uv_src,
591
                                   UWORD8 *pu1_y_dst,
592
                                   UWORD8 *pu1_u_dst,
593
                                   UWORD8 *pu1_v_dst,
594
                                   WORD32 wd,
595
                                   WORD32 ht,
596
                                   WORD32 src_y_strd,
597
                                   WORD32 src_uv_strd,
598
                                   WORD32 dst_y_strd,
599
                                   WORD32 dst_uv_strd,
600
                                   WORD32 is_u_first,
601
                                   WORD32 disable_luma_copy)
602
9.56k
{
603
9.56k
    UWORD8 *pu1_src, *pu1_dst;
604
9.56k
    UWORD8 *pu1_u_src, *pu1_v_src;
605
9.56k
    WORD32 num_rows, num_cols, src_strd, dst_strd;
606
9.56k
    WORD32 i, j;
607
608
9.56k
    if(0 == disable_luma_copy)
609
0
    {
610
        /* copy luma */
611
0
        pu1_src = (UWORD8 *)pu1_y_src;
612
0
        pu1_dst = (UWORD8 *)pu1_y_dst;
613
614
0
        num_rows = ht;
615
0
        num_cols = wd;
616
617
0
        src_strd = src_y_strd;
618
0
        dst_strd = dst_y_strd;
619
620
0
        for(i = 0; i < num_rows; i++)
621
0
        {
622
0
            memcpy(pu1_dst, pu1_src, num_cols);
623
0
            pu1_dst += dst_strd;
624
0
            pu1_src += src_strd;
625
0
        }
626
0
    }
627
    /* de-interleave U and V and copy to destination */
628
9.56k
    if(is_u_first)
629
9.56k
    {
630
9.56k
        pu1_u_src = (UWORD8 *)pu1_uv_src;
631
9.56k
        pu1_v_src = (UWORD8 *)pu1_uv_src + 1;
632
9.56k
    }
633
18.4E
    else
634
18.4E
    {
635
18.4E
        pu1_u_src = (UWORD8 *)pu1_uv_src + 1;
636
18.4E
        pu1_v_src = (UWORD8 *)pu1_uv_src;
637
18.4E
    }
638
639
640
9.56k
    num_rows = ht >> 1;
641
9.56k
    num_cols = wd >> 1;
642
643
9.56k
    src_strd = src_uv_strd;
644
9.56k
    dst_strd = dst_uv_strd;
645
646
721k
    for(i = 0; i < num_rows; i++)
647
711k
    {
648
261M
        for(j = 0; j < num_cols; j++)
649
260M
        {
650
260M
            pu1_u_dst[j] = pu1_u_src[j * 2];
651
260M
            pu1_v_dst[j] = pu1_v_src[j * 2];
652
260M
        }
653
654
711k
        pu1_u_dst += dst_strd;
655
711k
        pu1_v_dst += dst_strd;
656
711k
        pu1_u_src += src_strd;
657
711k
        pu1_v_src += src_strd;
658
711k
    }
659
9.56k
    return;
660
9.56k
}
661
662
663
664
/**
665
*******************************************************************************
666
*
667
* @brief Function used from format conversion or frame copy
668
*
669
* @par   Description
670
* Function used from copying or converting a reference frame to display buffer
671
* in non shared mode
672
*
673
* @param[in] pu1_y_dst
674
*   Output Y pointer
675
*
676
* @param[in] pu1_u_dst
677
*   Output U/UV pointer ( UV is interleaved in the same format as that of input)
678
*
679
* @param[in] pu1_v_dst
680
*   Output V pointer ( used in 420P output case)
681
*
682
* @param[in] blocking
683
*   To indicate whether format conversion should wait till frame is reconstructed
684
*   and then return after complete copy is done. To be set to 1 when called at the
685
*   end of frame processing and set to 0 when called between frame processing modules
686
*   in order to utilize available MCPS
687
*
688
* @returns Error from IHEVCD_ERROR_T
689
*
690
*******************************************************************************
691
*/
692
IHEVCD_ERROR_T ihevcd_fmt_conv(codec_t *ps_codec,
693
                               process_ctxt_t *ps_proc,
694
                               UWORD8 *pu1_y_dst,
695
                               UWORD8 *pu1_u_dst,
696
                               UWORD8 *pu1_v_dst,
697
                               WORD32 cur_row,
698
                               WORD32 num_rows)
699
57.9k
{
700
57.9k
    IHEVCD_ERROR_T ret = (IHEVCD_ERROR_T)IHEVCD_SUCCESS;
701
57.9k
    pic_buf_t *ps_disp_pic;
702
57.9k
    UWORD8 *pu1_y_src, *pu1_uv_src;
703
57.9k
    UWORD8 *pu1_y_dst_tmp, *pu1_uv_dst_tmp;
704
57.9k
    UWORD8 *pu1_u_dst_tmp, *pu1_v_dst_tmp;
705
57.9k
    UWORD16 *pu2_rgb_dst_tmp;
706
57.9k
    UWORD32 *pu4_rgb_dst_tmp;
707
57.9k
    WORD32 is_u_first;
708
57.9k
    UWORD8 *pu1_luma;
709
57.9k
    UWORD8 *pu1_chroma;
710
57.9k
    sps_t *ps_sps;
711
57.9k
    WORD32 disable_luma_copy;
712
57.9k
    WORD32 crop_unit_x, crop_unit_y;
713
714
57.9k
    if(0 == num_rows)
715
0
        return ret;
716
717
    /* In case processing is disabled, then no need to format convert/copy */
718
57.9k
    PROFILE_DISABLE_FMT_CONV();
719
57.9k
    ps_sps = ps_proc->ps_sps;
720
721
57.9k
    crop_unit_x = 1;
722
57.9k
    crop_unit_y = 1;
723
724
57.9k
    if(CHROMA_FMT_IDC_YUV420 == ps_sps->i1_chroma_format_idc)
725
57.9k
    {
726
57.9k
        crop_unit_x = 2;
727
57.9k
        crop_unit_y = 2;
728
57.9k
    }
729
730
57.9k
    ps_disp_pic = ps_codec->ps_disp_buf;
731
57.9k
    pu1_luma = ps_disp_pic->pu1_luma;
732
57.9k
    pu1_chroma = ps_disp_pic->pu1_chroma;
733
734
735
    /* Take care of cropping */
736
57.9k
    pu1_luma    += ps_codec->i4_strd * ps_sps->i2_pic_crop_top_offset * crop_unit_y + ps_sps->i2_pic_crop_left_offset * crop_unit_x;
737
738
    /* Left offset is multiplied by 2 because buffer is UV interleaved */
739
57.9k
    pu1_chroma  += ps_codec->i4_strd * ps_sps->i2_pic_crop_top_offset + ps_sps->i2_pic_crop_left_offset * 2;
740
741
742
57.9k
    is_u_first = (IV_YUV_420SP_UV == ps_codec->e_ref_chroma_fmt) ? 1 : 0;
743
744
    /* In case of 420P output luma copy is disabled for shared mode */
745
57.9k
    disable_luma_copy = 0;
746
57.9k
    if(1 == ps_codec->i4_share_disp_buf)
747
0
    {
748
0
        disable_luma_copy = 1;
749
0
    }
750
751
752
753
57.9k
    {
754
57.9k
        pu1_y_src   = pu1_luma + cur_row * ps_codec->i4_strd;
755
57.9k
        pu1_uv_src  = pu1_chroma + (cur_row / 2) * ps_codec->i4_strd;
756
757
        /* In case of shared mode, with 420P output, get chroma destination */
758
57.9k
        if((1 == ps_codec->i4_share_disp_buf) && (IV_YUV_420P == ps_codec->e_chroma_fmt))
759
0
        {
760
0
            WORD32 i;
761
0
            for(i = 0; i < ps_codec->i4_share_disp_buf_cnt; i++)
762
0
            {
763
0
                WORD32 diff = ps_disp_pic->pu1_luma - ps_codec->s_disp_buffer[i].pu1_bufs[0];
764
0
                if(diff == (ps_codec->i4_strd * PAD_TOP + PAD_LEFT))
765
0
                {
766
0
                    pu1_u_dst = ps_codec->s_disp_buffer[i].pu1_bufs[1];
767
0
                    pu1_u_dst += (ps_codec->i4_strd * PAD_TOP) / 4 + (PAD_LEFT / 2);
768
769
0
                    pu1_v_dst = ps_codec->s_disp_buffer[i].pu1_bufs[2];
770
0
                    pu1_v_dst += (ps_codec->i4_strd * PAD_TOP) / 4 + (PAD_LEFT / 2);
771
0
                    break;
772
0
                }
773
0
            }
774
0
        }
775
57.9k
        pu2_rgb_dst_tmp  = (UWORD16 *)pu1_y_dst;
776
57.9k
        pu2_rgb_dst_tmp  += cur_row * ps_codec->i4_disp_strd;
777
57.9k
        pu4_rgb_dst_tmp  = (UWORD32 *)pu1_y_dst;
778
57.9k
        pu4_rgb_dst_tmp  += cur_row * ps_codec->i4_disp_strd;
779
57.9k
        pu1_y_dst_tmp  = pu1_y_dst  + cur_row * ps_codec->i4_disp_strd;
780
57.9k
        pu1_uv_dst_tmp = pu1_u_dst  + (cur_row / 2) * ps_codec->i4_disp_strd;
781
57.9k
        pu1_u_dst_tmp = pu1_u_dst  + (cur_row / 2) * ps_codec->i4_disp_strd / 2;
782
57.9k
        pu1_v_dst_tmp = pu1_v_dst  + (cur_row / 2) * ps_codec->i4_disp_strd / 2;
783
784
        /* In case of multi threaded implementation, format conversion might be called
785
         * before reconstruction is completed. If the frame being converted/copied
786
         * is same as the frame being reconstructed,
787
         * Check how many rows can be format converted
788
         * Convert those many rows and then check for remaining rows and so on
789
         */
790
791
57.9k
        if((0 == ps_codec->i4_flush_mode) && (ps_codec->i4_disp_buf_id == ps_proc->i4_cur_pic_buf_id) && (1 < ps_codec->i4_num_cores))
792
53.6k
        {
793
53.6k
            WORD32 idx;
794
53.6k
            UWORD8 *pu1_buf;
795
53.6k
            WORD32 status;
796
53.6k
            WORD32 last_row = cur_row + num_rows;
797
53.6k
            WORD32 last_ctb_y;
798
53.6k
            UWORD32 ctb_in_row;
799
800
3.42M
            while(1)
801
3.42M
            {
802
3.42M
                last_row = cur_row + MAX(num_rows, (1 << ps_sps->i1_log2_ctb_size)) +
803
3.42M
                                ps_sps->i2_pic_crop_top_offset * crop_unit_y;
804
3.42M
                last_ctb_y = (last_row >> ps_sps->i1_log2_ctb_size) - 1;
805
                /* Since deblocking works with a shift of -4, -4 ,wait till next CTB row is processed */
806
3.42M
                last_ctb_y++;
807
                /* In case of a  conformance window, an extra wait of one row might be needed */
808
3.42M
                last_ctb_y++;
809
3.42M
                last_ctb_y = MIN(last_ctb_y, (ps_sps->i2_pic_ht_in_ctb - 1));
810
811
3.42M
                idx = (last_ctb_y * ps_sps->i2_pic_wd_in_ctb);
812
813
                /*Check if the row below is completely processed before proceeding with format conversion*/
814
3.42M
                status = 1;
815
205M
                for(ctb_in_row = 0; (WORD32)ctb_in_row < ps_sps->i2_pic_wd_in_ctb; ctb_in_row++)
816
202M
                {
817
202M
                    pu1_buf = (ps_codec->pu1_proc_map + idx + ctb_in_row);
818
202M
                    status &= *pu1_buf;
819
202M
                }
820
821
3.42M
                if(status)
822
53.5k
                {
823
53.5k
                    break;
824
53.5k
                }
825
3.37M
                else
826
3.37M
                {
827
3.37M
                    ithread_yield();
828
3.37M
                }
829
3.42M
            }
830
53.6k
        }
831
832
833
57.9k
        if((IV_YUV_420SP_UV == ps_codec->e_chroma_fmt) || (IV_YUV_420SP_VU == ps_codec->e_chroma_fmt))
834
41.1k
        {
835
41.1k
            ihevcd_fmt_conv_420sp_to_420sp_ft *fmt_conv_fptr;
836
41.1k
            if(ps_codec->i4_disp_wd >= MIN_FMT_CONV_SIMD_WIDTH)
837
40.9k
            {
838
40.9k
                fmt_conv_fptr = ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420sp_fptr;
839
40.9k
            }
840
175
            else
841
175
            {
842
175
                fmt_conv_fptr = ihevcd_fmt_conv_420sp_to_420sp;
843
175
            }
844
41.1k
            fmt_conv_fptr(pu1_y_src, pu1_uv_src,
845
41.1k
                          pu1_y_dst_tmp, pu1_uv_dst_tmp,
846
41.1k
                          ps_codec->i4_disp_wd,
847
41.1k
                          num_rows,
848
41.1k
                          ps_codec->i4_strd,
849
41.1k
                          ps_codec->i4_strd,
850
41.1k
                          ps_codec->i4_disp_strd,
851
41.1k
                          ps_codec->i4_disp_strd);
852
41.1k
        }
853
16.8k
        else if(IV_YUV_420P == ps_codec->e_chroma_fmt)
854
9.76k
        {
855
9.76k
            ihevcd_fmt_conv_420sp_to_420p_ft *fmt_conv_fptr;
856
9.76k
            if(ps_codec->i4_disp_wd >= MIN_FMT_CONV_SIMD_WIDTH)
857
7.30k
            {
858
7.30k
                fmt_conv_fptr = ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420p_fptr;
859
7.30k
            }
860
2.46k
            else
861
2.46k
            {
862
2.46k
                fmt_conv_fptr = ihevcd_fmt_conv_420sp_to_420p;
863
2.46k
            }
864
865
9.76k
            if(0 == disable_luma_copy)
866
9.73k
            {
867
                // copy luma
868
9.73k
                WORD32 i;
869
9.73k
                WORD32 num_cols = ps_codec->i4_disp_wd;
870
871
1.65M
                for(i = 0; i < num_rows; i++)
872
1.64M
                {
873
1.64M
                    memcpy(pu1_y_dst_tmp, pu1_y_src, num_cols);
874
1.64M
                    pu1_y_dst_tmp += ps_codec->i4_disp_strd;
875
1.64M
                    pu1_y_src += ps_codec->i4_strd;
876
1.64M
                }
877
878
9.73k
                disable_luma_copy = 1;
879
9.73k
            }
880
9.76k
            fmt_conv_fptr(pu1_y_src, pu1_uv_src,
881
9.76k
                          pu1_y_dst_tmp, pu1_u_dst_tmp, pu1_v_dst_tmp,
882
9.76k
                          ps_codec->i4_disp_wd,
883
9.76k
                          num_rows,
884
9.76k
                          ps_codec->i4_strd,
885
9.76k
                          ps_codec->i4_strd,
886
9.76k
                          ps_codec->i4_disp_strd,
887
9.76k
                          (ps_codec->i4_disp_strd / 2),
888
9.76k
                          is_u_first,
889
9.76k
                          disable_luma_copy);
890
9.76k
        }
891
7.04k
        else if(IV_RGB_565 == ps_codec->e_chroma_fmt)
892
1.13k
        {
893
1.13k
            ihevcd_fmt_conv_420sp_to_rgb565_ft *fmt_conv_fptr;
894
1.13k
            if(ps_codec->i4_disp_wd >= MIN_FMT_CONV_SIMD_WIDTH)
895
1.09k
            {
896
1.09k
                fmt_conv_fptr = ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgb565_fptr;
897
1.09k
            }
898
40
            else
899
40
            {
900
40
                fmt_conv_fptr = ihevcd_fmt_conv_420sp_to_rgb565;
901
40
            }
902
903
1.13k
            fmt_conv_fptr(pu1_y_src, pu1_uv_src,
904
1.13k
                          pu2_rgb_dst_tmp,
905
1.13k
                          ps_codec->i4_disp_wd,
906
1.13k
                          num_rows,
907
1.13k
                          ps_codec->i4_strd,
908
1.13k
                          ps_codec->i4_strd,
909
1.13k
                          ps_codec->i4_disp_strd,
910
1.13k
                          is_u_first);
911
1.13k
        }
912
5.91k
        else if(IV_RGBA_8888 == ps_codec->e_chroma_fmt)
913
0
        {
914
0
            ihevcd_fmt_conv_420sp_to_rgba8888_ft *fmt_conv_fptr;
915
0
            if(ps_codec->i4_disp_wd >= MIN_FMT_CONV_SIMD_WIDTH)
916
0
            {
917
0
                fmt_conv_fptr = ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgba8888_fptr;
918
0
            }
919
0
            else
920
0
            {
921
0
                fmt_conv_fptr = ihevcd_fmt_conv_420sp_to_rgba8888;
922
0
            }
923
924
0
            ASSERT(is_u_first == 1);
925
0
            fmt_conv_fptr(pu1_y_src,
926
0
                          pu1_uv_src,
927
0
                          pu4_rgb_dst_tmp,
928
0
                          ps_codec->i4_disp_wd,
929
0
                          num_rows,
930
0
                          ps_codec->i4_strd,
931
0
                          ps_codec->i4_strd,
932
0
                          ps_codec->i4_disp_strd,
933
0
                          is_u_first);
934
0
        }
935
936
937
938
57.9k
    }
939
57.9k
    return (ret);
940
57.9k
}
941