Coverage Report

Created: 2023-09-25 07:43

/src/libhevc/decoder/ihevcd_fmt_conv.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
*
3
* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4
*
5
* Licensed under the Apache License, Version 2.0 (the "License");
6
* you may not use this file except in compliance with the License.
7
* You may obtain a copy of the License at:
8
*
9
* http://www.apache.org/licenses/LICENSE-2.0
10
*
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
16
*
17
******************************************************************************/
18
/**
19
*******************************************************************************
20
* @file
21
*  ihevcd_fmt_conv.c
22
*
23
* @brief
24
*  Contains functions for format conversion or frame copy of output buffer
25
*
26
* @author
27
*  Harish
28
*
29
* @par List of Functions:
30
*
31
* @remarks
32
*  None
33
*
34
*******************************************************************************
35
*/
36
/*****************************************************************************/
37
/* File Includes                                                             */
38
/*****************************************************************************/
39
#include <stdio.h>
40
#include <stddef.h>
41
#include <stdlib.h>
42
#include <string.h>
43
#include <assert.h>
44
45
#include "ihevc_typedefs.h"
46
#include "iv.h"
47
#include "ivd.h"
48
#include "ihevcd_cxa.h"
49
#include "ithread.h"
50
51
#include "ihevc_defs.h"
52
#include "ihevc_debug.h"
53
#include "ihevc_structs.h"
54
#include "ihevc_macros.h"
55
#include "ihevc_platform_macros.h"
56
#include "ihevc_cabac_tables.h"
57
#include "ihevc_disp_mgr.h"
58
59
#include "ihevcd_defs.h"
60
#include "ihevcd_function_selector.h"
61
#include "ihevcd_structs.h"
62
#include "ihevcd_error.h"
63
#include "ihevcd_nal.h"
64
#include "ihevcd_bitstream.h"
65
#include "ihevcd_fmt_conv.h"
66
#include "ihevcd_profile.h"
67
68
/* SIMD variants of format conversion modules do not support width less than 32 */
69
109k
#define MIN_FMT_CONV_SIMD_WIDTH 32
70
/**
71
*******************************************************************************
72
*
73
* @brief Function used from copying a 420SP buffer
74
*
75
* @par   Description
76
* Function used from copying a 420SP buffer
77
*
78
* @param[in] pu1_y_src
79
*   Input Y pointer
80
*
81
* @param[in] pu1_uv_src
82
*   Input UV pointer (UV is interleaved either in UV or VU format)
83
*
84
* @param[in] pu1_y_dst
85
*   Output Y pointer
86
*
87
* @param[in] pu1_uv_dst
88
*   Output UV pointer (UV is interleaved in the same format as that of input)
89
*
90
* @param[in] wd
91
*   Width
92
*
93
* @param[in] ht
94
*   Height
95
*
96
* @param[in] src_y_strd
97
*   Input Y Stride
98
*
99
* @param[in] src_uv_strd
100
*   Input UV stride
101
*
102
* @param[in] dst_y_strd
103
*   Output Y stride
104
*
105
* @param[in] dst_uv_strd
106
*   Output UV stride
107
*
108
* @returns None
109
*
110
* @remarks In case there is a need to perform partial frame copy then
111
* by passion appropriate source and destination pointers and appropriate
112
* values for wd and ht it can be done
113
*
114
*******************************************************************************
115
*/
116
void ihevcd_fmt_conv_420sp_to_rgb565(UWORD8 *pu1_y_src,
117
                                     UWORD8 *pu1_uv_src,
118
                                     UWORD16 *pu2_rgb_dst,
119
                                     WORD32 wd,
120
                                     WORD32 ht,
121
                                     WORD32 src_y_strd,
122
                                     WORD32 src_uv_strd,
123
                                     WORD32 dst_strd,
124
                                     WORD32 is_u_first)
125
2.78k
{
126
127
128
2.78k
    WORD16  i2_r, i2_g, i2_b;
129
2.78k
    UWORD32  u4_r, u4_g, u4_b;
130
2.78k
    WORD16  i2_i, i2_j;
131
2.78k
    UWORD8  *pu1_y_src_nxt;
132
2.78k
    UWORD16 *pu2_rgb_dst_NextRow;
133
134
2.78k
    UWORD8 *pu1_u_src, *pu1_v_src;
135
136
2.78k
    if(is_u_first)
137
2.78k
    {
138
2.78k
        pu1_u_src = (UWORD8 *)pu1_uv_src;
139
2.78k
        pu1_v_src = (UWORD8 *)pu1_uv_src + 1;
140
2.78k
    }
141
0
    else
142
0
    {
143
0
        pu1_u_src = (UWORD8 *)pu1_uv_src + 1;
144
0
        pu1_v_src = (UWORD8 *)pu1_uv_src;
145
0
    }
146
147
2.78k
    pu1_y_src_nxt   = pu1_y_src + src_y_strd;
148
2.78k
    pu2_rgb_dst_NextRow = pu2_rgb_dst + dst_strd;
149
150
98.7k
    for(i2_i = 0; i2_i < (ht >> 1); i2_i++)
151
95.9k
    {
152
88.7M
        for(i2_j = (wd >> 1); i2_j > 0; i2_j--)
153
88.6M
        {
154
88.6M
            i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13);
155
88.6M
            i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3) >> 13;
156
88.6M
            i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13;
157
158
88.6M
            pu1_u_src += 2;
159
88.6M
            pu1_v_src += 2;
160
            /* pixel 0 */
161
            /* B */
162
88.6M
            u4_b = CLIP_U8(*pu1_y_src + i2_b);
163
88.6M
            u4_b >>= 3;
164
            /* G */
165
88.6M
            u4_g = CLIP_U8(*pu1_y_src + i2_g);
166
88.6M
            u4_g >>= 2;
167
            /* R */
168
88.6M
            u4_r = CLIP_U8(*pu1_y_src + i2_r);
169
88.6M
            u4_r >>= 3;
170
171
88.6M
            pu1_y_src++;
172
88.6M
            *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
173
174
            /* pixel 1 */
175
            /* B */
176
88.6M
            u4_b = CLIP_U8(*pu1_y_src + i2_b);
177
88.6M
            u4_b >>= 3;
178
            /* G */
179
88.6M
            u4_g = CLIP_U8(*pu1_y_src + i2_g);
180
88.6M
            u4_g >>= 2;
181
            /* R */
182
88.6M
            u4_r = CLIP_U8(*pu1_y_src + i2_r);
183
88.6M
            u4_r >>= 3;
184
185
88.6M
            pu1_y_src++;
186
88.6M
            *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
187
188
            /* pixel 2 */
189
            /* B */
190
88.6M
            u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
191
88.6M
            u4_b >>= 3;
192
            /* G */
193
88.6M
            u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
194
88.6M
            u4_g >>= 2;
195
            /* R */
196
88.6M
            u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
197
88.6M
            u4_r >>= 3;
198
199
88.6M
            pu1_y_src_nxt++;
200
88.6M
            *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
201
202
            /* pixel 3 */
203
            /* B */
204
88.6M
            u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
205
88.6M
            u4_b >>= 3;
206
            /* G */
207
88.6M
            u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
208
88.6M
            u4_g >>= 2;
209
            /* R */
210
88.6M
            u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
211
88.6M
            u4_r >>= 3;
212
213
88.6M
            pu1_y_src_nxt++;
214
88.6M
            *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
215
216
88.6M
        }
217
218
95.9k
        pu1_u_src = pu1_u_src + src_uv_strd - wd;
219
95.9k
        pu1_v_src = pu1_v_src + src_uv_strd - wd;
220
221
95.9k
        pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd;
222
95.9k
        pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd;
223
224
95.9k
        pu2_rgb_dst = pu2_rgb_dst_NextRow - wd + dst_strd;
225
95.9k
        pu2_rgb_dst_NextRow = pu2_rgb_dst_NextRow + (dst_strd << 1) - wd;
226
95.9k
    }
227
228
229
2.78k
}
230
231
void ihevcd_fmt_conv_420sp_to_rgba8888(UWORD8 *pu1_y_src,
232
                                       UWORD8 *pu1_uv_src,
233
                                       UWORD32 *pu4_rgba_dst,
234
                                       WORD32 wd,
235
                                       WORD32 ht,
236
                                       WORD32 src_y_strd,
237
                                       WORD32 src_uv_strd,
238
                                       WORD32 dst_strd,
239
                                       WORD32 is_u_first)
240
0
{
241
242
243
0
    WORD16  i2_r, i2_g, i2_b;
244
0
    UWORD32  u4_r, u4_g, u4_b;
245
0
    WORD16  i2_i, i2_j;
246
0
    UWORD8  *pu1_y_src_nxt;
247
0
    UWORD32 *pu4_rgba_dst_NextRow;
248
249
0
    UWORD8 *pu1_u_src, *pu1_v_src;
250
251
0
    if(is_u_first)
252
0
    {
253
0
        pu1_u_src = (UWORD8 *)pu1_uv_src;
254
0
        pu1_v_src = (UWORD8 *)pu1_uv_src + 1;
255
0
    }
256
0
    else
257
0
    {
258
0
        pu1_u_src = (UWORD8 *)pu1_uv_src + 1;
259
0
        pu1_v_src = (UWORD8 *)pu1_uv_src;
260
0
    }
261
262
0
    pu1_y_src_nxt   = pu1_y_src + src_y_strd;
263
0
    pu4_rgba_dst_NextRow = pu4_rgba_dst + dst_strd;
264
265
0
    for(i2_i = 0; i2_i < (ht >> 1); i2_i++)
266
0
    {
267
0
        for(i2_j = (wd >> 1); i2_j > 0; i2_j--)
268
0
        {
269
0
            i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13);
270
0
            i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3) >> 13;
271
0
            i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13;
272
273
0
            pu1_u_src += 2;
274
0
            pu1_v_src += 2;
275
            /* pixel 0 */
276
            /* B */
277
0
            u4_b = CLIP_U8(*pu1_y_src + i2_b);
278
            /* G */
279
0
            u4_g = CLIP_U8(*pu1_y_src + i2_g);
280
            /* R */
281
0
            u4_r = CLIP_U8(*pu1_y_src + i2_r);
282
283
0
            pu1_y_src++;
284
0
            *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
285
286
            /* pixel 1 */
287
            /* B */
288
0
            u4_b = CLIP_U8(*pu1_y_src + i2_b);
289
            /* G */
290
0
            u4_g = CLIP_U8(*pu1_y_src + i2_g);
291
            /* R */
292
0
            u4_r = CLIP_U8(*pu1_y_src + i2_r);
293
294
0
            pu1_y_src++;
295
0
            *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
296
297
            /* pixel 2 */
298
            /* B */
299
0
            u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
300
            /* G */
301
0
            u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
302
            /* R */
303
0
            u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
304
305
0
            pu1_y_src_nxt++;
306
0
            *pu4_rgba_dst_NextRow++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
307
308
            /* pixel 3 */
309
            /* B */
310
0
            u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
311
            /* G */
312
0
            u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
313
            /* R */
314
0
            u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
315
316
0
            pu1_y_src_nxt++;
317
0
            *pu4_rgba_dst_NextRow++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
318
319
0
        }
320
321
0
        pu1_u_src = pu1_u_src + src_uv_strd - wd;
322
0
        pu1_v_src = pu1_v_src + src_uv_strd - wd;
323
324
0
        pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd;
325
0
        pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd;
326
327
0
        pu4_rgba_dst = pu4_rgba_dst_NextRow - wd + dst_strd;
328
0
        pu4_rgba_dst_NextRow = pu4_rgba_dst_NextRow + (dst_strd << 1) - wd;
329
0
    }
330
331
332
0
}
333
334
/**
335
*******************************************************************************
336
*
337
* @brief Function used from copying a 420SP buffer
338
*
339
* @par   Description
340
* Function used from copying a 420SP buffer
341
*
342
* @param[in] pu1_y_src
343
*   Input Y pointer
344
*
345
* @param[in] pu1_uv_src
346
*   Input UV pointer (UV is interleaved either in UV or VU format)
347
*
348
* @param[in] pu1_y_dst
349
*   Output Y pointer
350
*
351
* @param[in] pu1_uv_dst
352
*   Output UV pointer (UV is interleaved in the same format as that of input)
353
*
354
* @param[in] wd
355
*   Width
356
*
357
* @param[in] ht
358
*   Height
359
*
360
* @param[in] src_y_strd
361
*   Input Y Stride
362
*
363
* @param[in] src_uv_strd
364
*   Input UV stride
365
*
366
* @param[in] dst_y_strd
367
*   Output Y stride
368
*
369
* @param[in] dst_uv_strd
370
*   Output UV stride
371
*
372
* @returns None
373
*
374
* @remarks In case there is a need to perform partial frame copy then
375
* by passion appropriate source and destination pointers and appropriate
376
* values for wd and ht it can be done
377
*
378
*******************************************************************************
379
*/
380
381
void ihevcd_fmt_conv_420sp_to_420sp(UWORD8 *pu1_y_src,
382
                                    UWORD8 *pu1_uv_src,
383
                                    UWORD8 *pu1_y_dst,
384
                                    UWORD8 *pu1_uv_dst,
385
                                    WORD32 wd,
386
                                    WORD32 ht,
387
                                    WORD32 src_y_strd,
388
                                    WORD32 src_uv_strd,
389
                                    WORD32 dst_y_strd,
390
                                    WORD32 dst_uv_strd)
391
81.5k
{
392
81.5k
    UWORD8 *pu1_src, *pu1_dst;
393
81.5k
    WORD32 num_rows, num_cols, src_strd, dst_strd;
394
81.5k
    WORD32 i;
395
396
    /* copy luma */
397
81.5k
    pu1_src = (UWORD8 *)pu1_y_src;
398
81.5k
    pu1_dst = (UWORD8 *)pu1_y_dst;
399
400
81.5k
    num_rows = ht;
401
81.5k
    num_cols = wd;
402
403
81.5k
    src_strd = src_y_strd;
404
81.5k
    dst_strd = dst_y_strd;
405
406
3.01M
    for(i = 0; i < num_rows; i++)
407
2.93M
    {
408
2.93M
        memcpy(pu1_dst, pu1_src, num_cols);
409
2.93M
        pu1_dst += dst_strd;
410
2.93M
        pu1_src += src_strd;
411
2.93M
    }
412
413
    /* copy U and V */
414
81.5k
    pu1_src = (UWORD8 *)pu1_uv_src;
415
81.5k
    pu1_dst = (UWORD8 *)pu1_uv_dst;
416
417
81.5k
    num_rows = ht >> 1;
418
81.5k
    num_cols = wd;
419
420
81.5k
    src_strd = src_uv_strd;
421
81.5k
    dst_strd = dst_uv_strd;
422
423
1.65M
    for(i = 0; i < num_rows; i++)
424
1.57M
    {
425
1.57M
        memcpy(pu1_dst, pu1_src, num_cols);
426
1.57M
        pu1_dst += dst_strd;
427
1.57M
        pu1_src += src_strd;
428
1.57M
    }
429
81.5k
    return;
430
81.5k
}
431
432
433
434
/**
435
*******************************************************************************
436
*
437
* @brief Function used from copying a 420SP buffer
438
*
439
* @par   Description
440
* Function used from copying a 420SP buffer
441
*
442
* @param[in] pu1_y_src
443
*   Input Y pointer
444
*
445
* @param[in] pu1_uv_src
446
*   Input UV pointer (UV is interleaved either in UV or VU format)
447
*
448
* @param[in] pu1_y_dst
449
*   Output Y pointer
450
*
451
* @param[in] pu1_uv_dst
452
*   Output UV pointer (UV is interleaved in the same format as that of input)
453
*
454
* @param[in] wd
455
*   Width
456
*
457
* @param[in] ht
458
*   Height
459
*
460
* @param[in] src_y_strd
461
*   Input Y Stride
462
*
463
* @param[in] src_uv_strd
464
*   Input UV stride
465
*
466
* @param[in] dst_y_strd
467
*   Output Y stride
468
*
469
* @param[in] dst_uv_strd
470
*   Output UV stride
471
*
472
* @returns None
473
*
474
* @remarks In case there is a need to perform partial frame copy then
475
* by passion appropriate source and destination pointers and appropriate
476
* values for wd and ht it can be done
477
*
478
*******************************************************************************
479
*/
480
void ihevcd_fmt_conv_420sp_to_420sp_swap_uv(UWORD8 *pu1_y_src,
481
                                            UWORD8 *pu1_uv_src,
482
                                            UWORD8 *pu1_y_dst,
483
                                            UWORD8 *pu1_uv_dst,
484
                                            WORD32 wd,
485
                                            WORD32 ht,
486
                                            WORD32 src_y_strd,
487
                                            WORD32 src_uv_strd,
488
                                            WORD32 dst_y_strd,
489
                                            WORD32 dst_uv_strd)
490
0
{
491
0
    UWORD8 *pu1_src, *pu1_dst;
492
0
    WORD32 num_rows, num_cols, src_strd, dst_strd;
493
0
    WORD32 i;
494
495
    /* copy luma */
496
0
    pu1_src = (UWORD8 *)pu1_y_src;
497
0
    pu1_dst = (UWORD8 *)pu1_y_dst;
498
499
0
    num_rows = ht;
500
0
    num_cols = wd;
501
502
0
    src_strd = src_y_strd;
503
0
    dst_strd = dst_y_strd;
504
505
0
    for(i = 0; i < num_rows; i++)
506
0
    {
507
0
        memcpy(pu1_dst, pu1_src, num_cols);
508
0
        pu1_dst += dst_strd;
509
0
        pu1_src += src_strd;
510
0
    }
511
512
    /* copy U and V */
513
0
    pu1_src = (UWORD8 *)pu1_uv_src;
514
0
    pu1_dst = (UWORD8 *)pu1_uv_dst;
515
516
0
    num_rows = ht >> 1;
517
0
    num_cols = wd;
518
519
0
    src_strd = src_uv_strd;
520
0
    dst_strd = dst_uv_strd;
521
522
0
    for(i = 0; i < num_rows; i++)
523
0
    {
524
0
        WORD32 j;
525
0
        for(j = 0; j < num_cols; j += 2)
526
0
        {
527
0
            pu1_dst[j + 0] = pu1_src[j + 1];
528
0
            pu1_dst[j + 1] = pu1_src[j + 0];
529
0
        }
530
0
        pu1_dst += dst_strd;
531
0
        pu1_src += src_strd;
532
0
    }
533
0
    return;
534
0
}
535
/**
536
*******************************************************************************
537
*
538
* @brief Function used from copying a 420SP buffer
539
*
540
* @par   Description
541
* Function used from copying a 420SP buffer
542
*
543
* @param[in] pu1_y_src
544
*   Input Y pointer
545
*
546
* @param[in] pu1_uv_src
547
*   Input UV pointer (UV is interleaved either in UV or VU format)
548
*
549
* @param[in] pu1_y_dst
550
*   Output Y pointer
551
*
552
* @param[in] pu1_u_dst
553
*   Output U pointer
554
*
555
* @param[in] pu1_v_dst
556
*   Output V pointer
557
*
558
* @param[in] wd
559
*   Width
560
*
561
* @param[in] ht
562
*   Height
563
*
564
* @param[in] src_y_strd
565
*   Input Y Stride
566
*
567
* @param[in] src_uv_strd
568
*   Input UV stride
569
*
570
* @param[in] dst_y_strd
571
*   Output Y stride
572
*
573
* @param[in] dst_uv_strd
574
*   Output UV stride
575
*
576
* @param[in] is_u_first
577
*   Flag to indicate if U is the first byte in input chroma part
578
*
579
* @returns none
580
*
581
* @remarks In case there is a need to perform partial frame copy then
582
* by passion appropriate source and destination pointers and appropriate
583
* values for wd and ht it can be done
584
*
585
*******************************************************************************
586
*/
587
588
589
void ihevcd_fmt_conv_420sp_to_420p(UWORD8 *pu1_y_src,
590
                                   UWORD8 *pu1_uv_src,
591
                                   UWORD8 *pu1_y_dst,
592
                                   UWORD8 *pu1_u_dst,
593
                                   UWORD8 *pu1_v_dst,
594
                                   WORD32 wd,
595
                                   WORD32 ht,
596
                                   WORD32 src_y_strd,
597
                                   WORD32 src_uv_strd,
598
                                   WORD32 dst_y_strd,
599
                                   WORD32 dst_uv_strd,
600
                                   WORD32 is_u_first,
601
                                   WORD32 disable_luma_copy)
602
20.4k
{
603
20.4k
    UWORD8 *pu1_src, *pu1_dst;
604
20.4k
    UWORD8 *pu1_u_src, *pu1_v_src;
605
20.4k
    WORD32 num_rows, num_cols, src_strd, dst_strd;
606
20.4k
    WORD32 i, j;
607
608
20.4k
    if(0 == disable_luma_copy)
609
0
    {
610
        /* copy luma */
611
0
        pu1_src = (UWORD8 *)pu1_y_src;
612
0
        pu1_dst = (UWORD8 *)pu1_y_dst;
613
614
0
        num_rows = ht;
615
0
        num_cols = wd;
616
617
0
        src_strd = src_y_strd;
618
0
        dst_strd = dst_y_strd;
619
620
0
        for(i = 0; i < num_rows; i++)
621
0
        {
622
0
            memcpy(pu1_dst, pu1_src, num_cols);
623
0
            pu1_dst += dst_strd;
624
0
            pu1_src += src_strd;
625
0
        }
626
0
    }
627
    /* de-interleave U and V and copy to destination */
628
20.4k
    if(is_u_first)
629
20.4k
    {
630
20.4k
        pu1_u_src = (UWORD8 *)pu1_uv_src;
631
20.4k
        pu1_v_src = (UWORD8 *)pu1_uv_src + 1;
632
20.4k
    }
633
18.4E
    else
634
18.4E
    {
635
18.4E
        pu1_u_src = (UWORD8 *)pu1_uv_src + 1;
636
18.4E
        pu1_v_src = (UWORD8 *)pu1_uv_src;
637
18.4E
    }
638
639
640
20.4k
    num_rows = ht >> 1;
641
20.4k
    num_cols = wd >> 1;
642
643
20.4k
    src_strd = src_uv_strd;
644
20.4k
    dst_strd = dst_uv_strd;
645
646
1.69M
    for(i = 0; i < num_rows; i++)
647
1.67M
    {
648
591M
        for(j = 0; j < num_cols; j++)
649
589M
        {
650
589M
            pu1_u_dst[j] = pu1_u_src[j * 2];
651
589M
            pu1_v_dst[j] = pu1_v_src[j * 2];
652
589M
        }
653
654
1.67M
        pu1_u_dst += dst_strd;
655
1.67M
        pu1_v_dst += dst_strd;
656
1.67M
        pu1_u_src += src_strd;
657
1.67M
        pu1_v_src += src_strd;
658
1.67M
    }
659
20.4k
    return;
660
20.4k
}
661
662
663
664
/**
665
*******************************************************************************
666
*
667
* @brief Function used from format conversion or frame copy
668
*
669
* @par   Description
670
* Function used from copying or converting a reference frame to display buffer
671
* in non shared mode
672
*
673
* @param[in] pu1_y_dst
674
*   Output Y pointer
675
*
676
* @param[in] pu1_u_dst
677
*   Output U/UV pointer ( UV is interleaved in the same format as that of input)
678
*
679
* @param[in] pu1_v_dst
680
*   Output V pointer ( used in 420P output case)
681
*
682
* @param[in] blocking
683
*   To indicate whether format conversion should wait till frame is reconstructed
684
*   and then return after complete copy is done. To be set to 1 when called at the
685
*   end of frame processing and set to 0 when called between frame processing modules
686
*   in order to utilize available MCPS
687
*
688
* @returns Error from IHEVCD_ERROR_T
689
*
690
*******************************************************************************
691
*/
692
IHEVCD_ERROR_T ihevcd_fmt_conv(codec_t *ps_codec,
693
                               process_ctxt_t *ps_proc,
694
                               UWORD8 *pu1_y_dst,
695
                               UWORD8 *pu1_u_dst,
696
                               UWORD8 *pu1_v_dst,
697
                               WORD32 cur_row,
698
                               WORD32 num_rows)
699
191k
{
700
191k
    IHEVCD_ERROR_T ret = (IHEVCD_ERROR_T)IHEVCD_SUCCESS;
701
191k
    pic_buf_t *ps_disp_pic;
702
191k
    UWORD8 *pu1_y_src, *pu1_uv_src;
703
191k
    UWORD8 *pu1_y_dst_tmp, *pu1_uv_dst_tmp;
704
191k
    UWORD8 *pu1_u_dst_tmp, *pu1_v_dst_tmp;
705
191k
    UWORD16 *pu2_rgb_dst_tmp;
706
191k
    UWORD32 *pu4_rgb_dst_tmp;
707
191k
    WORD32 is_u_first;
708
191k
    UWORD8 *pu1_luma;
709
191k
    UWORD8 *pu1_chroma;
710
191k
    sps_t *ps_sps;
711
191k
    WORD32 disable_luma_copy;
712
191k
    WORD32 crop_unit_x, crop_unit_y;
713
714
191k
    if(0 == num_rows)
715
44.6k
        return ret;
716
717
    /* In case processing is disabled, then no need to format convert/copy */
718
147k
    PROFILE_DISABLE_FMT_CONV();
719
147k
    ps_sps = ps_proc->ps_sps;
720
721
147k
    crop_unit_x = 1;
722
147k
    crop_unit_y = 1;
723
724
147k
    if(CHROMA_FMT_IDC_YUV420 == ps_sps->i1_chroma_format_idc)
725
147k
    {
726
147k
        crop_unit_x = 2;
727
147k
        crop_unit_y = 2;
728
147k
    }
729
730
147k
    ps_disp_pic = ps_codec->ps_disp_buf;
731
147k
    pu1_luma = ps_disp_pic->pu1_luma;
732
147k
    pu1_chroma = ps_disp_pic->pu1_chroma;
733
734
735
    /* Take care of cropping */
736
147k
    pu1_luma    += ps_codec->i4_strd * ps_sps->i2_pic_crop_top_offset * crop_unit_y + ps_sps->i2_pic_crop_left_offset * crop_unit_x;
737
738
    /* Left offset is multiplied by 2 because buffer is UV interleaved */
739
147k
    pu1_chroma  += ps_codec->i4_strd * ps_sps->i2_pic_crop_top_offset + ps_sps->i2_pic_crop_left_offset * 2;
740
741
742
147k
    is_u_first = (IV_YUV_420SP_UV == ps_codec->e_ref_chroma_fmt) ? 1 : 0;
743
744
    /* In case of 420P output luma copy is disabled for shared mode */
745
147k
    disable_luma_copy = 0;
746
147k
    if(1 == ps_codec->i4_share_disp_buf)
747
0
    {
748
0
        disable_luma_copy = 1;
749
0
    }
750
751
752
753
147k
    {
754
147k
        pu1_y_src   = pu1_luma + cur_row * ps_codec->i4_strd;
755
147k
        pu1_uv_src  = pu1_chroma + (cur_row / 2) * ps_codec->i4_strd;
756
757
        /* In case of shared mode, with 420P output, get chroma destination */
758
147k
        if((1 == ps_codec->i4_share_disp_buf) && (IV_YUV_420P == ps_codec->e_chroma_fmt))
759
0
        {
760
0
            WORD32 i;
761
0
            for(i = 0; i < ps_codec->i4_share_disp_buf_cnt; i++)
762
0
            {
763
0
                WORD32 diff = ps_disp_pic->pu1_luma - ps_codec->s_disp_buffer[i].pu1_bufs[0];
764
0
                if(diff == (ps_codec->i4_strd * PAD_TOP + PAD_LEFT))
765
0
                {
766
0
                    pu1_u_dst = ps_codec->s_disp_buffer[i].pu1_bufs[1];
767
0
                    pu1_u_dst += (ps_codec->i4_strd * PAD_TOP) / 4 + (PAD_LEFT / 2);
768
769
0
                    pu1_v_dst = ps_codec->s_disp_buffer[i].pu1_bufs[2];
770
0
                    pu1_v_dst += (ps_codec->i4_strd * PAD_TOP) / 4 + (PAD_LEFT / 2);
771
0
                    break;
772
0
                }
773
0
            }
774
0
        }
775
147k
        pu2_rgb_dst_tmp  = (UWORD16 *)pu1_y_dst;
776
147k
        pu2_rgb_dst_tmp  += cur_row * ps_codec->i4_disp_strd;
777
147k
        pu4_rgb_dst_tmp  = (UWORD32 *)pu1_y_dst;
778
147k
        pu4_rgb_dst_tmp  += cur_row * ps_codec->i4_disp_strd;
779
147k
        pu1_y_dst_tmp  = pu1_y_dst  + cur_row * ps_codec->i4_disp_strd;
780
147k
        pu1_uv_dst_tmp = pu1_u_dst  + (cur_row / 2) * ps_codec->i4_disp_strd;
781
147k
        pu1_u_dst_tmp = pu1_u_dst  + (cur_row / 2) * ps_codec->i4_disp_strd / 2;
782
147k
        pu1_v_dst_tmp = pu1_v_dst  + (cur_row / 2) * ps_codec->i4_disp_strd / 2;
783
784
        /* In case of multi threaded implementation, format conversion might be called
785
         * before reconstruction is completed. If the frame being converted/copied
786
         * is same as the frame being reconstructed,
787
         * Check how many rows can be format converted
788
         * Convert those many rows and then check for remaining rows and so on
789
         */
790
791
147k
        if((0 == ps_codec->i4_flush_mode) && (ps_codec->i4_disp_buf_id == ps_proc->i4_cur_pic_buf_id) && (1 < ps_codec->i4_num_cores))
792
125k
        {
793
125k
            WORD32 idx;
794
125k
            UWORD8 *pu1_buf;
795
125k
            WORD32 status;
796
125k
            WORD32 last_row = cur_row + num_rows;
797
125k
            WORD32 last_ctb_y;
798
125k
            UWORD32 ctb_in_row;
799
800
2.65M
            while(1)
801
2.65M
            {
802
2.65M
                last_row = cur_row + MAX(num_rows, (1 << ps_sps->i1_log2_ctb_size)) +
803
2.65M
                                ps_sps->i2_pic_crop_top_offset * crop_unit_y;
804
2.65M
                last_ctb_y = (last_row >> ps_sps->i1_log2_ctb_size) - 1;
805
                /* Since deblocking works with a shift of -4, -4 ,wait till next CTB row is processed */
806
2.65M
                last_ctb_y++;
807
                /* In case of a  conformance window, an extra wait of one row might be needed */
808
2.65M
                last_ctb_y++;
809
2.65M
                last_ctb_y = MIN(last_ctb_y, (ps_sps->i2_pic_ht_in_ctb - 1));
810
811
2.65M
                idx = (last_ctb_y * ps_sps->i2_pic_wd_in_ctb);
812
813
                /*Check if the row below is completely processed before proceeding with format conversion*/
814
2.65M
                status = 1;
815
162M
                for(ctb_in_row = 0; (WORD32)ctb_in_row < ps_sps->i2_pic_wd_in_ctb; ctb_in_row++)
816
159M
                {
817
159M
                    pu1_buf = (ps_codec->pu1_proc_map + idx + ctb_in_row);
818
159M
                    status &= *pu1_buf;
819
159M
                }
820
821
2.65M
                if(status)
822
124k
                {
823
124k
                    break;
824
124k
                }
825
2.53M
                else
826
2.53M
                {
827
2.53M
                    ithread_yield();
828
2.53M
                }
829
2.65M
            }
830
125k
        }
831
832
833
147k
        if((IV_YUV_420SP_UV == ps_codec->e_chroma_fmt) || (IV_YUV_420SP_VU == ps_codec->e_chroma_fmt))
834
81.5k
        {
835
81.5k
            ihevcd_fmt_conv_420sp_to_420sp_ft *fmt_conv_fptr;
836
81.5k
            if(ps_codec->i4_disp_wd >= MIN_FMT_CONV_SIMD_WIDTH)
837
81.2k
            {
838
81.2k
                fmt_conv_fptr = ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420sp_fptr;
839
81.2k
            }
840
331
            else
841
331
            {
842
331
                fmt_conv_fptr = ihevcd_fmt_conv_420sp_to_420sp;
843
331
            }
844
81.5k
            fmt_conv_fptr(pu1_y_src, pu1_uv_src,
845
81.5k
                          pu1_y_dst_tmp, pu1_uv_dst_tmp,
846
81.5k
                          ps_codec->i4_disp_wd,
847
81.5k
                          num_rows,
848
81.5k
                          ps_codec->i4_strd,
849
81.5k
                          ps_codec->i4_strd,
850
81.5k
                          ps_codec->i4_disp_strd,
851
81.5k
                          ps_codec->i4_disp_strd);
852
81.5k
        }
853
65.6k
        else if(IV_YUV_420P == ps_codec->e_chroma_fmt)
854
25.5k
        {
855
25.5k
            ihevcd_fmt_conv_420sp_to_420p_ft *fmt_conv_fptr;
856
25.5k
            if(ps_codec->i4_disp_wd >= MIN_FMT_CONV_SIMD_WIDTH)
857
25.0k
            {
858
25.0k
                fmt_conv_fptr = ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420p_fptr;
859
25.0k
            }
860
434
            else
861
434
            {
862
434
                fmt_conv_fptr = ihevcd_fmt_conv_420sp_to_420p;
863
434
            }
864
865
25.5k
            if(0 == disable_luma_copy)
866
25.5k
            {
867
                // copy luma
868
25.5k
                WORD32 i;
869
25.5k
                WORD32 num_cols = ps_codec->i4_disp_wd;
870
871
4.00M
                for(i = 0; i < num_rows; i++)
872
3.98M
                {
873
3.98M
                    memcpy(pu1_y_dst_tmp, pu1_y_src, num_cols);
874
3.98M
                    pu1_y_dst_tmp += ps_codec->i4_disp_strd;
875
3.98M
                    pu1_y_src += ps_codec->i4_strd;
876
3.98M
                }
877
878
25.5k
                disable_luma_copy = 1;
879
25.5k
            }
880
25.5k
            fmt_conv_fptr(pu1_y_src, pu1_uv_src,
881
25.5k
                          pu1_y_dst_tmp, pu1_u_dst_tmp, pu1_v_dst_tmp,
882
25.5k
                          ps_codec->i4_disp_wd,
883
25.5k
                          num_rows,
884
25.5k
                          ps_codec->i4_strd,
885
25.5k
                          ps_codec->i4_strd,
886
25.5k
                          ps_codec->i4_disp_strd,
887
25.5k
                          (ps_codec->i4_disp_strd / 2),
888
25.5k
                          is_u_first,
889
25.5k
                          disable_luma_copy);
890
25.5k
        }
891
40.1k
        else if(IV_RGB_565 == ps_codec->e_chroma_fmt)
892
2.78k
        {
893
2.78k
            ihevcd_fmt_conv_420sp_to_rgb565_ft *fmt_conv_fptr;
894
2.78k
            if(ps_codec->i4_disp_wd >= MIN_FMT_CONV_SIMD_WIDTH)
895
2.78k
            {
896
2.78k
                fmt_conv_fptr = ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgb565_fptr;
897
2.78k
            }
898
0
            else
899
0
            {
900
0
                fmt_conv_fptr = ihevcd_fmt_conv_420sp_to_rgb565;
901
0
            }
902
903
2.78k
            fmt_conv_fptr(pu1_y_src, pu1_uv_src,
904
2.78k
                          pu2_rgb_dst_tmp,
905
2.78k
                          ps_codec->i4_disp_wd,
906
2.78k
                          num_rows,
907
2.78k
                          ps_codec->i4_strd,
908
2.78k
                          ps_codec->i4_strd,
909
2.78k
                          ps_codec->i4_disp_strd,
910
2.78k
                          is_u_first);
911
2.78k
        }
912
37.3k
        else if(IV_RGBA_8888 == ps_codec->e_chroma_fmt)
913
0
        {
914
0
            ihevcd_fmt_conv_420sp_to_rgba8888_ft *fmt_conv_fptr;
915
0
            if(ps_codec->i4_disp_wd >= MIN_FMT_CONV_SIMD_WIDTH)
916
0
            {
917
0
                fmt_conv_fptr = ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgba8888_fptr;
918
0
            }
919
0
            else
920
0
            {
921
0
                fmt_conv_fptr = ihevcd_fmt_conv_420sp_to_rgba8888;
922
0
            }
923
924
0
            ASSERT(is_u_first == 1);
925
0
            fmt_conv_fptr(pu1_y_src,
926
0
                          pu1_uv_src,
927
0
                          pu4_rgb_dst_tmp,
928
0
                          ps_codec->i4_disp_wd,
929
0
                          num_rows,
930
0
                          ps_codec->i4_strd,
931
0
                          ps_codec->i4_strd,
932
0
                          ps_codec->i4_disp_strd,
933
0
                          is_u_first);
934
0
        }
935
936
937
938
147k
    }
939
147k
    return (ret);
940
147k
}
941