/src/libhevc/decoder/ihevcd_fmt_conv.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ******************************************************************************/ |
18 | | /** |
19 | | ******************************************************************************* |
20 | | * @file |
21 | | * ihevcd_fmt_conv.c |
22 | | * |
23 | | * @brief |
24 | | * Contains functions for format conversion or frame copy of output buffer |
25 | | * |
26 | | * @author |
27 | | * Harish |
28 | | * |
29 | | * @par List of Functions: |
30 | | * |
31 | | * @remarks |
32 | | * None |
33 | | * |
34 | | ******************************************************************************* |
35 | | */ |
36 | | /*****************************************************************************/ |
37 | | /* File Includes */ |
38 | | /*****************************************************************************/ |
39 | | #include <stdio.h> |
40 | | #include <stddef.h> |
41 | | #include <stdlib.h> |
42 | | #include <string.h> |
43 | | #include <assert.h> |
44 | | |
45 | | #include "ihevc_typedefs.h" |
46 | | #include "iv.h" |
47 | | #include "ivd.h" |
48 | | #include "ihevcd_cxa.h" |
49 | | #include "ithread.h" |
50 | | |
51 | | #include "ihevc_defs.h" |
52 | | #include "ihevc_debug.h" |
53 | | #include "ihevc_structs.h" |
54 | | #include "ihevc_macros.h" |
55 | | #include "ihevc_platform_macros.h" |
56 | | #include "ihevc_cabac_tables.h" |
57 | | #include "ihevc_disp_mgr.h" |
58 | | |
59 | | #include "ihevcd_defs.h" |
60 | | #include "ihevcd_function_selector.h" |
61 | | #include "ihevcd_structs.h" |
62 | | #include "ihevcd_error.h" |
63 | | #include "ihevcd_nal.h" |
64 | | #include "ihevcd_bitstream.h" |
65 | | #include "ihevcd_fmt_conv.h" |
66 | | #include "ihevcd_profile.h" |
67 | | |
68 | | /* SIMD variants of format conversion modules do not support width less than 32 */ |
69 | 109k | #define MIN_FMT_CONV_SIMD_WIDTH 32 |
70 | | /** |
71 | | ******************************************************************************* |
72 | | * |
73 | | * @brief Function used from copying a 420SP buffer |
74 | | * |
75 | | * @par Description |
76 | | * Function used from copying a 420SP buffer |
77 | | * |
78 | | * @param[in] pu1_y_src |
79 | | * Input Y pointer |
80 | | * |
81 | | * @param[in] pu1_uv_src |
82 | | * Input UV pointer (UV is interleaved either in UV or VU format) |
83 | | * |
84 | | * @param[in] pu1_y_dst |
85 | | * Output Y pointer |
86 | | * |
87 | | * @param[in] pu1_uv_dst |
88 | | * Output UV pointer (UV is interleaved in the same format as that of input) |
89 | | * |
90 | | * @param[in] wd |
91 | | * Width |
92 | | * |
93 | | * @param[in] ht |
94 | | * Height |
95 | | * |
96 | | * @param[in] src_y_strd |
97 | | * Input Y Stride |
98 | | * |
99 | | * @param[in] src_uv_strd |
100 | | * Input UV stride |
101 | | * |
102 | | * @param[in] dst_y_strd |
103 | | * Output Y stride |
104 | | * |
105 | | * @param[in] dst_uv_strd |
106 | | * Output UV stride |
107 | | * |
108 | | * @returns None |
109 | | * |
110 | | * @remarks In case there is a need to perform partial frame copy then |
111 | | * by passion appropriate source and destination pointers and appropriate |
112 | | * values for wd and ht it can be done |
113 | | * |
114 | | ******************************************************************************* |
115 | | */ |
116 | | void ihevcd_fmt_conv_420sp_to_rgb565(UWORD8 *pu1_y_src, |
117 | | UWORD8 *pu1_uv_src, |
118 | | UWORD16 *pu2_rgb_dst, |
119 | | WORD32 wd, |
120 | | WORD32 ht, |
121 | | WORD32 src_y_strd, |
122 | | WORD32 src_uv_strd, |
123 | | WORD32 dst_strd, |
124 | | WORD32 is_u_first) |
125 | 2.78k | { |
126 | | |
127 | | |
128 | 2.78k | WORD16 i2_r, i2_g, i2_b; |
129 | 2.78k | UWORD32 u4_r, u4_g, u4_b; |
130 | 2.78k | WORD16 i2_i, i2_j; |
131 | 2.78k | UWORD8 *pu1_y_src_nxt; |
132 | 2.78k | UWORD16 *pu2_rgb_dst_NextRow; |
133 | | |
134 | 2.78k | UWORD8 *pu1_u_src, *pu1_v_src; |
135 | | |
136 | 2.78k | if(is_u_first) |
137 | 2.78k | { |
138 | 2.78k | pu1_u_src = (UWORD8 *)pu1_uv_src; |
139 | 2.78k | pu1_v_src = (UWORD8 *)pu1_uv_src + 1; |
140 | 2.78k | } |
141 | 0 | else |
142 | 0 | { |
143 | 0 | pu1_u_src = (UWORD8 *)pu1_uv_src + 1; |
144 | 0 | pu1_v_src = (UWORD8 *)pu1_uv_src; |
145 | 0 | } |
146 | | |
147 | 2.78k | pu1_y_src_nxt = pu1_y_src + src_y_strd; |
148 | 2.78k | pu2_rgb_dst_NextRow = pu2_rgb_dst + dst_strd; |
149 | | |
150 | 98.7k | for(i2_i = 0; i2_i < (ht >> 1); i2_i++) |
151 | 95.9k | { |
152 | 88.7M | for(i2_j = (wd >> 1); i2_j > 0; i2_j--) |
153 | 88.6M | { |
154 | 88.6M | i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13); |
155 | 88.6M | i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3) >> 13; |
156 | 88.6M | i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13; |
157 | | |
158 | 88.6M | pu1_u_src += 2; |
159 | 88.6M | pu1_v_src += 2; |
160 | | /* pixel 0 */ |
161 | | /* B */ |
162 | 88.6M | u4_b = CLIP_U8(*pu1_y_src + i2_b); |
163 | 88.6M | u4_b >>= 3; |
164 | | /* G */ |
165 | 88.6M | u4_g = CLIP_U8(*pu1_y_src + i2_g); |
166 | 88.6M | u4_g >>= 2; |
167 | | /* R */ |
168 | 88.6M | u4_r = CLIP_U8(*pu1_y_src + i2_r); |
169 | 88.6M | u4_r >>= 3; |
170 | | |
171 | 88.6M | pu1_y_src++; |
172 | 88.6M | *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b); |
173 | | |
174 | | /* pixel 1 */ |
175 | | /* B */ |
176 | 88.6M | u4_b = CLIP_U8(*pu1_y_src + i2_b); |
177 | 88.6M | u4_b >>= 3; |
178 | | /* G */ |
179 | 88.6M | u4_g = CLIP_U8(*pu1_y_src + i2_g); |
180 | 88.6M | u4_g >>= 2; |
181 | | /* R */ |
182 | 88.6M | u4_r = CLIP_U8(*pu1_y_src + i2_r); |
183 | 88.6M | u4_r >>= 3; |
184 | | |
185 | 88.6M | pu1_y_src++; |
186 | 88.6M | *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b); |
187 | | |
188 | | /* pixel 2 */ |
189 | | /* B */ |
190 | 88.6M | u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b); |
191 | 88.6M | u4_b >>= 3; |
192 | | /* G */ |
193 | 88.6M | u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g); |
194 | 88.6M | u4_g >>= 2; |
195 | | /* R */ |
196 | 88.6M | u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r); |
197 | 88.6M | u4_r >>= 3; |
198 | | |
199 | 88.6M | pu1_y_src_nxt++; |
200 | 88.6M | *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b); |
201 | | |
202 | | /* pixel 3 */ |
203 | | /* B */ |
204 | 88.6M | u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b); |
205 | 88.6M | u4_b >>= 3; |
206 | | /* G */ |
207 | 88.6M | u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g); |
208 | 88.6M | u4_g >>= 2; |
209 | | /* R */ |
210 | 88.6M | u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r); |
211 | 88.6M | u4_r >>= 3; |
212 | | |
213 | 88.6M | pu1_y_src_nxt++; |
214 | 88.6M | *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b); |
215 | | |
216 | 88.6M | } |
217 | | |
218 | 95.9k | pu1_u_src = pu1_u_src + src_uv_strd - wd; |
219 | 95.9k | pu1_v_src = pu1_v_src + src_uv_strd - wd; |
220 | | |
221 | 95.9k | pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd; |
222 | 95.9k | pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd; |
223 | | |
224 | 95.9k | pu2_rgb_dst = pu2_rgb_dst_NextRow - wd + dst_strd; |
225 | 95.9k | pu2_rgb_dst_NextRow = pu2_rgb_dst_NextRow + (dst_strd << 1) - wd; |
226 | 95.9k | } |
227 | | |
228 | | |
229 | 2.78k | } |
230 | | |
231 | | void ihevcd_fmt_conv_420sp_to_rgba8888(UWORD8 *pu1_y_src, |
232 | | UWORD8 *pu1_uv_src, |
233 | | UWORD32 *pu4_rgba_dst, |
234 | | WORD32 wd, |
235 | | WORD32 ht, |
236 | | WORD32 src_y_strd, |
237 | | WORD32 src_uv_strd, |
238 | | WORD32 dst_strd, |
239 | | WORD32 is_u_first) |
240 | 0 | { |
241 | | |
242 | |
|
243 | 0 | WORD16 i2_r, i2_g, i2_b; |
244 | 0 | UWORD32 u4_r, u4_g, u4_b; |
245 | 0 | WORD16 i2_i, i2_j; |
246 | 0 | UWORD8 *pu1_y_src_nxt; |
247 | 0 | UWORD32 *pu4_rgba_dst_NextRow; |
248 | |
|
249 | 0 | UWORD8 *pu1_u_src, *pu1_v_src; |
250 | |
|
251 | 0 | if(is_u_first) |
252 | 0 | { |
253 | 0 | pu1_u_src = (UWORD8 *)pu1_uv_src; |
254 | 0 | pu1_v_src = (UWORD8 *)pu1_uv_src + 1; |
255 | 0 | } |
256 | 0 | else |
257 | 0 | { |
258 | 0 | pu1_u_src = (UWORD8 *)pu1_uv_src + 1; |
259 | 0 | pu1_v_src = (UWORD8 *)pu1_uv_src; |
260 | 0 | } |
261 | |
|
262 | 0 | pu1_y_src_nxt = pu1_y_src + src_y_strd; |
263 | 0 | pu4_rgba_dst_NextRow = pu4_rgba_dst + dst_strd; |
264 | |
|
265 | 0 | for(i2_i = 0; i2_i < (ht >> 1); i2_i++) |
266 | 0 | { |
267 | 0 | for(i2_j = (wd >> 1); i2_j > 0; i2_j--) |
268 | 0 | { |
269 | 0 | i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13); |
270 | 0 | i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3) >> 13; |
271 | 0 | i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13; |
272 | |
|
273 | 0 | pu1_u_src += 2; |
274 | 0 | pu1_v_src += 2; |
275 | | /* pixel 0 */ |
276 | | /* B */ |
277 | 0 | u4_b = CLIP_U8(*pu1_y_src + i2_b); |
278 | | /* G */ |
279 | 0 | u4_g = CLIP_U8(*pu1_y_src + i2_g); |
280 | | /* R */ |
281 | 0 | u4_r = CLIP_U8(*pu1_y_src + i2_r); |
282 | |
|
283 | 0 | pu1_y_src++; |
284 | 0 | *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0)); |
285 | | |
286 | | /* pixel 1 */ |
287 | | /* B */ |
288 | 0 | u4_b = CLIP_U8(*pu1_y_src + i2_b); |
289 | | /* G */ |
290 | 0 | u4_g = CLIP_U8(*pu1_y_src + i2_g); |
291 | | /* R */ |
292 | 0 | u4_r = CLIP_U8(*pu1_y_src + i2_r); |
293 | |
|
294 | 0 | pu1_y_src++; |
295 | 0 | *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0)); |
296 | | |
297 | | /* pixel 2 */ |
298 | | /* B */ |
299 | 0 | u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b); |
300 | | /* G */ |
301 | 0 | u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g); |
302 | | /* R */ |
303 | 0 | u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r); |
304 | |
|
305 | 0 | pu1_y_src_nxt++; |
306 | 0 | *pu4_rgba_dst_NextRow++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0)); |
307 | | |
308 | | /* pixel 3 */ |
309 | | /* B */ |
310 | 0 | u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b); |
311 | | /* G */ |
312 | 0 | u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g); |
313 | | /* R */ |
314 | 0 | u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r); |
315 | |
|
316 | 0 | pu1_y_src_nxt++; |
317 | 0 | *pu4_rgba_dst_NextRow++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0)); |
318 | |
|
319 | 0 | } |
320 | |
|
321 | 0 | pu1_u_src = pu1_u_src + src_uv_strd - wd; |
322 | 0 | pu1_v_src = pu1_v_src + src_uv_strd - wd; |
323 | |
|
324 | 0 | pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd; |
325 | 0 | pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd; |
326 | |
|
327 | 0 | pu4_rgba_dst = pu4_rgba_dst_NextRow - wd + dst_strd; |
328 | 0 | pu4_rgba_dst_NextRow = pu4_rgba_dst_NextRow + (dst_strd << 1) - wd; |
329 | 0 | } |
330 | | |
331 | |
|
332 | 0 | } |
333 | | |
334 | | /** |
335 | | ******************************************************************************* |
336 | | * |
337 | | * @brief Function used from copying a 420SP buffer |
338 | | * |
339 | | * @par Description |
340 | | * Function used from copying a 420SP buffer |
341 | | * |
342 | | * @param[in] pu1_y_src |
343 | | * Input Y pointer |
344 | | * |
345 | | * @param[in] pu1_uv_src |
346 | | * Input UV pointer (UV is interleaved either in UV or VU format) |
347 | | * |
348 | | * @param[in] pu1_y_dst |
349 | | * Output Y pointer |
350 | | * |
351 | | * @param[in] pu1_uv_dst |
352 | | * Output UV pointer (UV is interleaved in the same format as that of input) |
353 | | * |
354 | | * @param[in] wd |
355 | | * Width |
356 | | * |
357 | | * @param[in] ht |
358 | | * Height |
359 | | * |
360 | | * @param[in] src_y_strd |
361 | | * Input Y Stride |
362 | | * |
363 | | * @param[in] src_uv_strd |
364 | | * Input UV stride |
365 | | * |
366 | | * @param[in] dst_y_strd |
367 | | * Output Y stride |
368 | | * |
369 | | * @param[in] dst_uv_strd |
370 | | * Output UV stride |
371 | | * |
372 | | * @returns None |
373 | | * |
374 | | * @remarks In case there is a need to perform partial frame copy then |
375 | | * by passion appropriate source and destination pointers and appropriate |
376 | | * values for wd and ht it can be done |
377 | | * |
378 | | ******************************************************************************* |
379 | | */ |
380 | | |
381 | | void ihevcd_fmt_conv_420sp_to_420sp(UWORD8 *pu1_y_src, |
382 | | UWORD8 *pu1_uv_src, |
383 | | UWORD8 *pu1_y_dst, |
384 | | UWORD8 *pu1_uv_dst, |
385 | | WORD32 wd, |
386 | | WORD32 ht, |
387 | | WORD32 src_y_strd, |
388 | | WORD32 src_uv_strd, |
389 | | WORD32 dst_y_strd, |
390 | | WORD32 dst_uv_strd) |
391 | 81.5k | { |
392 | 81.5k | UWORD8 *pu1_src, *pu1_dst; |
393 | 81.5k | WORD32 num_rows, num_cols, src_strd, dst_strd; |
394 | 81.5k | WORD32 i; |
395 | | |
396 | | /* copy luma */ |
397 | 81.5k | pu1_src = (UWORD8 *)pu1_y_src; |
398 | 81.5k | pu1_dst = (UWORD8 *)pu1_y_dst; |
399 | | |
400 | 81.5k | num_rows = ht; |
401 | 81.5k | num_cols = wd; |
402 | | |
403 | 81.5k | src_strd = src_y_strd; |
404 | 81.5k | dst_strd = dst_y_strd; |
405 | | |
406 | 3.01M | for(i = 0; i < num_rows; i++) |
407 | 2.93M | { |
408 | 2.93M | memcpy(pu1_dst, pu1_src, num_cols); |
409 | 2.93M | pu1_dst += dst_strd; |
410 | 2.93M | pu1_src += src_strd; |
411 | 2.93M | } |
412 | | |
413 | | /* copy U and V */ |
414 | 81.5k | pu1_src = (UWORD8 *)pu1_uv_src; |
415 | 81.5k | pu1_dst = (UWORD8 *)pu1_uv_dst; |
416 | | |
417 | 81.5k | num_rows = ht >> 1; |
418 | 81.5k | num_cols = wd; |
419 | | |
420 | 81.5k | src_strd = src_uv_strd; |
421 | 81.5k | dst_strd = dst_uv_strd; |
422 | | |
423 | 1.65M | for(i = 0; i < num_rows; i++) |
424 | 1.57M | { |
425 | 1.57M | memcpy(pu1_dst, pu1_src, num_cols); |
426 | 1.57M | pu1_dst += dst_strd; |
427 | 1.57M | pu1_src += src_strd; |
428 | 1.57M | } |
429 | 81.5k | return; |
430 | 81.5k | } |
431 | | |
432 | | |
433 | | |
434 | | /** |
435 | | ******************************************************************************* |
436 | | * |
437 | | * @brief Function used from copying a 420SP buffer |
438 | | * |
439 | | * @par Description |
440 | | * Function used from copying a 420SP buffer |
441 | | * |
442 | | * @param[in] pu1_y_src |
443 | | * Input Y pointer |
444 | | * |
445 | | * @param[in] pu1_uv_src |
446 | | * Input UV pointer (UV is interleaved either in UV or VU format) |
447 | | * |
448 | | * @param[in] pu1_y_dst |
449 | | * Output Y pointer |
450 | | * |
451 | | * @param[in] pu1_uv_dst |
452 | | * Output UV pointer (UV is interleaved in the same format as that of input) |
453 | | * |
454 | | * @param[in] wd |
455 | | * Width |
456 | | * |
457 | | * @param[in] ht |
458 | | * Height |
459 | | * |
460 | | * @param[in] src_y_strd |
461 | | * Input Y Stride |
462 | | * |
463 | | * @param[in] src_uv_strd |
464 | | * Input UV stride |
465 | | * |
466 | | * @param[in] dst_y_strd |
467 | | * Output Y stride |
468 | | * |
469 | | * @param[in] dst_uv_strd |
470 | | * Output UV stride |
471 | | * |
472 | | * @returns None |
473 | | * |
474 | | * @remarks In case there is a need to perform partial frame copy then |
475 | | * by passion appropriate source and destination pointers and appropriate |
476 | | * values for wd and ht it can be done |
477 | | * |
478 | | ******************************************************************************* |
479 | | */ |
480 | | void ihevcd_fmt_conv_420sp_to_420sp_swap_uv(UWORD8 *pu1_y_src, |
481 | | UWORD8 *pu1_uv_src, |
482 | | UWORD8 *pu1_y_dst, |
483 | | UWORD8 *pu1_uv_dst, |
484 | | WORD32 wd, |
485 | | WORD32 ht, |
486 | | WORD32 src_y_strd, |
487 | | WORD32 src_uv_strd, |
488 | | WORD32 dst_y_strd, |
489 | | WORD32 dst_uv_strd) |
490 | 0 | { |
491 | 0 | UWORD8 *pu1_src, *pu1_dst; |
492 | 0 | WORD32 num_rows, num_cols, src_strd, dst_strd; |
493 | 0 | WORD32 i; |
494 | | |
495 | | /* copy luma */ |
496 | 0 | pu1_src = (UWORD8 *)pu1_y_src; |
497 | 0 | pu1_dst = (UWORD8 *)pu1_y_dst; |
498 | |
|
499 | 0 | num_rows = ht; |
500 | 0 | num_cols = wd; |
501 | |
|
502 | 0 | src_strd = src_y_strd; |
503 | 0 | dst_strd = dst_y_strd; |
504 | |
|
505 | 0 | for(i = 0; i < num_rows; i++) |
506 | 0 | { |
507 | 0 | memcpy(pu1_dst, pu1_src, num_cols); |
508 | 0 | pu1_dst += dst_strd; |
509 | 0 | pu1_src += src_strd; |
510 | 0 | } |
511 | | |
512 | | /* copy U and V */ |
513 | 0 | pu1_src = (UWORD8 *)pu1_uv_src; |
514 | 0 | pu1_dst = (UWORD8 *)pu1_uv_dst; |
515 | |
|
516 | 0 | num_rows = ht >> 1; |
517 | 0 | num_cols = wd; |
518 | |
|
519 | 0 | src_strd = src_uv_strd; |
520 | 0 | dst_strd = dst_uv_strd; |
521 | |
|
522 | 0 | for(i = 0; i < num_rows; i++) |
523 | 0 | { |
524 | 0 | WORD32 j; |
525 | 0 | for(j = 0; j < num_cols; j += 2) |
526 | 0 | { |
527 | 0 | pu1_dst[j + 0] = pu1_src[j + 1]; |
528 | 0 | pu1_dst[j + 1] = pu1_src[j + 0]; |
529 | 0 | } |
530 | 0 | pu1_dst += dst_strd; |
531 | 0 | pu1_src += src_strd; |
532 | 0 | } |
533 | 0 | return; |
534 | 0 | } |
535 | | /** |
536 | | ******************************************************************************* |
537 | | * |
538 | | * @brief Function used from copying a 420SP buffer |
539 | | * |
540 | | * @par Description |
541 | | * Function used from copying a 420SP buffer |
542 | | * |
543 | | * @param[in] pu1_y_src |
544 | | * Input Y pointer |
545 | | * |
546 | | * @param[in] pu1_uv_src |
547 | | * Input UV pointer (UV is interleaved either in UV or VU format) |
548 | | * |
549 | | * @param[in] pu1_y_dst |
550 | | * Output Y pointer |
551 | | * |
552 | | * @param[in] pu1_u_dst |
553 | | * Output U pointer |
554 | | * |
555 | | * @param[in] pu1_v_dst |
556 | | * Output V pointer |
557 | | * |
558 | | * @param[in] wd |
559 | | * Width |
560 | | * |
561 | | * @param[in] ht |
562 | | * Height |
563 | | * |
564 | | * @param[in] src_y_strd |
565 | | * Input Y Stride |
566 | | * |
567 | | * @param[in] src_uv_strd |
568 | | * Input UV stride |
569 | | * |
570 | | * @param[in] dst_y_strd |
571 | | * Output Y stride |
572 | | * |
573 | | * @param[in] dst_uv_strd |
574 | | * Output UV stride |
575 | | * |
576 | | * @param[in] is_u_first |
577 | | * Flag to indicate if U is the first byte in input chroma part |
578 | | * |
579 | | * @returns none |
580 | | * |
581 | | * @remarks In case there is a need to perform partial frame copy then |
582 | | * by passion appropriate source and destination pointers and appropriate |
583 | | * values for wd and ht it can be done |
584 | | * |
585 | | ******************************************************************************* |
586 | | */ |
587 | | |
588 | | |
589 | | void ihevcd_fmt_conv_420sp_to_420p(UWORD8 *pu1_y_src, |
590 | | UWORD8 *pu1_uv_src, |
591 | | UWORD8 *pu1_y_dst, |
592 | | UWORD8 *pu1_u_dst, |
593 | | UWORD8 *pu1_v_dst, |
594 | | WORD32 wd, |
595 | | WORD32 ht, |
596 | | WORD32 src_y_strd, |
597 | | WORD32 src_uv_strd, |
598 | | WORD32 dst_y_strd, |
599 | | WORD32 dst_uv_strd, |
600 | | WORD32 is_u_first, |
601 | | WORD32 disable_luma_copy) |
602 | 20.4k | { |
603 | 20.4k | UWORD8 *pu1_src, *pu1_dst; |
604 | 20.4k | UWORD8 *pu1_u_src, *pu1_v_src; |
605 | 20.4k | WORD32 num_rows, num_cols, src_strd, dst_strd; |
606 | 20.4k | WORD32 i, j; |
607 | | |
608 | 20.4k | if(0 == disable_luma_copy) |
609 | 0 | { |
610 | | /* copy luma */ |
611 | 0 | pu1_src = (UWORD8 *)pu1_y_src; |
612 | 0 | pu1_dst = (UWORD8 *)pu1_y_dst; |
613 | |
|
614 | 0 | num_rows = ht; |
615 | 0 | num_cols = wd; |
616 | |
|
617 | 0 | src_strd = src_y_strd; |
618 | 0 | dst_strd = dst_y_strd; |
619 | |
|
620 | 0 | for(i = 0; i < num_rows; i++) |
621 | 0 | { |
622 | 0 | memcpy(pu1_dst, pu1_src, num_cols); |
623 | 0 | pu1_dst += dst_strd; |
624 | 0 | pu1_src += src_strd; |
625 | 0 | } |
626 | 0 | } |
627 | | /* de-interleave U and V and copy to destination */ |
628 | 20.4k | if(is_u_first) |
629 | 20.4k | { |
630 | 20.4k | pu1_u_src = (UWORD8 *)pu1_uv_src; |
631 | 20.4k | pu1_v_src = (UWORD8 *)pu1_uv_src + 1; |
632 | 20.4k | } |
633 | 18.4E | else |
634 | 18.4E | { |
635 | 18.4E | pu1_u_src = (UWORD8 *)pu1_uv_src + 1; |
636 | 18.4E | pu1_v_src = (UWORD8 *)pu1_uv_src; |
637 | 18.4E | } |
638 | | |
639 | | |
640 | 20.4k | num_rows = ht >> 1; |
641 | 20.4k | num_cols = wd >> 1; |
642 | | |
643 | 20.4k | src_strd = src_uv_strd; |
644 | 20.4k | dst_strd = dst_uv_strd; |
645 | | |
646 | 1.69M | for(i = 0; i < num_rows; i++) |
647 | 1.67M | { |
648 | 591M | for(j = 0; j < num_cols; j++) |
649 | 589M | { |
650 | 589M | pu1_u_dst[j] = pu1_u_src[j * 2]; |
651 | 589M | pu1_v_dst[j] = pu1_v_src[j * 2]; |
652 | 589M | } |
653 | | |
654 | 1.67M | pu1_u_dst += dst_strd; |
655 | 1.67M | pu1_v_dst += dst_strd; |
656 | 1.67M | pu1_u_src += src_strd; |
657 | 1.67M | pu1_v_src += src_strd; |
658 | 1.67M | } |
659 | 20.4k | return; |
660 | 20.4k | } |
661 | | |
662 | | |
663 | | |
664 | | /** |
665 | | ******************************************************************************* |
666 | | * |
667 | | * @brief Function used from format conversion or frame copy |
668 | | * |
669 | | * @par Description |
670 | | * Function used from copying or converting a reference frame to display buffer |
671 | | * in non shared mode |
672 | | * |
673 | | * @param[in] pu1_y_dst |
674 | | * Output Y pointer |
675 | | * |
676 | | * @param[in] pu1_u_dst |
677 | | * Output U/UV pointer ( UV is interleaved in the same format as that of input) |
678 | | * |
679 | | * @param[in] pu1_v_dst |
680 | | * Output V pointer ( used in 420P output case) |
681 | | * |
682 | | * @param[in] blocking |
683 | | * To indicate whether format conversion should wait till frame is reconstructed |
684 | | * and then return after complete copy is done. To be set to 1 when called at the |
685 | | * end of frame processing and set to 0 when called between frame processing modules |
686 | | * in order to utilize available MCPS |
687 | | * |
688 | | * @returns Error from IHEVCD_ERROR_T |
689 | | * |
690 | | ******************************************************************************* |
691 | | */ |
692 | | IHEVCD_ERROR_T ihevcd_fmt_conv(codec_t *ps_codec, |
693 | | process_ctxt_t *ps_proc, |
694 | | UWORD8 *pu1_y_dst, |
695 | | UWORD8 *pu1_u_dst, |
696 | | UWORD8 *pu1_v_dst, |
697 | | WORD32 cur_row, |
698 | | WORD32 num_rows) |
699 | 191k | { |
700 | 191k | IHEVCD_ERROR_T ret = (IHEVCD_ERROR_T)IHEVCD_SUCCESS; |
701 | 191k | pic_buf_t *ps_disp_pic; |
702 | 191k | UWORD8 *pu1_y_src, *pu1_uv_src; |
703 | 191k | UWORD8 *pu1_y_dst_tmp, *pu1_uv_dst_tmp; |
704 | 191k | UWORD8 *pu1_u_dst_tmp, *pu1_v_dst_tmp; |
705 | 191k | UWORD16 *pu2_rgb_dst_tmp; |
706 | 191k | UWORD32 *pu4_rgb_dst_tmp; |
707 | 191k | WORD32 is_u_first; |
708 | 191k | UWORD8 *pu1_luma; |
709 | 191k | UWORD8 *pu1_chroma; |
710 | 191k | sps_t *ps_sps; |
711 | 191k | WORD32 disable_luma_copy; |
712 | 191k | WORD32 crop_unit_x, crop_unit_y; |
713 | | |
714 | 191k | if(0 == num_rows) |
715 | 44.6k | return ret; |
716 | | |
717 | | /* In case processing is disabled, then no need to format convert/copy */ |
718 | 147k | PROFILE_DISABLE_FMT_CONV(); |
719 | 147k | ps_sps = ps_proc->ps_sps; |
720 | | |
721 | 147k | crop_unit_x = 1; |
722 | 147k | crop_unit_y = 1; |
723 | | |
724 | 147k | if(CHROMA_FMT_IDC_YUV420 == ps_sps->i1_chroma_format_idc) |
725 | 147k | { |
726 | 147k | crop_unit_x = 2; |
727 | 147k | crop_unit_y = 2; |
728 | 147k | } |
729 | | |
730 | 147k | ps_disp_pic = ps_codec->ps_disp_buf; |
731 | 147k | pu1_luma = ps_disp_pic->pu1_luma; |
732 | 147k | pu1_chroma = ps_disp_pic->pu1_chroma; |
733 | | |
734 | | |
735 | | /* Take care of cropping */ |
736 | 147k | pu1_luma += ps_codec->i4_strd * ps_sps->i2_pic_crop_top_offset * crop_unit_y + ps_sps->i2_pic_crop_left_offset * crop_unit_x; |
737 | | |
738 | | /* Left offset is multiplied by 2 because buffer is UV interleaved */ |
739 | 147k | pu1_chroma += ps_codec->i4_strd * ps_sps->i2_pic_crop_top_offset + ps_sps->i2_pic_crop_left_offset * 2; |
740 | | |
741 | | |
742 | 147k | is_u_first = (IV_YUV_420SP_UV == ps_codec->e_ref_chroma_fmt) ? 1 : 0; |
743 | | |
744 | | /* In case of 420P output luma copy is disabled for shared mode */ |
745 | 147k | disable_luma_copy = 0; |
746 | 147k | if(1 == ps_codec->i4_share_disp_buf) |
747 | 0 | { |
748 | 0 | disable_luma_copy = 1; |
749 | 0 | } |
750 | | |
751 | | |
752 | | |
753 | 147k | { |
754 | 147k | pu1_y_src = pu1_luma + cur_row * ps_codec->i4_strd; |
755 | 147k | pu1_uv_src = pu1_chroma + (cur_row / 2) * ps_codec->i4_strd; |
756 | | |
757 | | /* In case of shared mode, with 420P output, get chroma destination */ |
758 | 147k | if((1 == ps_codec->i4_share_disp_buf) && (IV_YUV_420P == ps_codec->e_chroma_fmt)) |
759 | 0 | { |
760 | 0 | WORD32 i; |
761 | 0 | for(i = 0; i < ps_codec->i4_share_disp_buf_cnt; i++) |
762 | 0 | { |
763 | 0 | WORD32 diff = ps_disp_pic->pu1_luma - ps_codec->s_disp_buffer[i].pu1_bufs[0]; |
764 | 0 | if(diff == (ps_codec->i4_strd * PAD_TOP + PAD_LEFT)) |
765 | 0 | { |
766 | 0 | pu1_u_dst = ps_codec->s_disp_buffer[i].pu1_bufs[1]; |
767 | 0 | pu1_u_dst += (ps_codec->i4_strd * PAD_TOP) / 4 + (PAD_LEFT / 2); |
768 | |
|
769 | 0 | pu1_v_dst = ps_codec->s_disp_buffer[i].pu1_bufs[2]; |
770 | 0 | pu1_v_dst += (ps_codec->i4_strd * PAD_TOP) / 4 + (PAD_LEFT / 2); |
771 | 0 | break; |
772 | 0 | } |
773 | 0 | } |
774 | 0 | } |
775 | 147k | pu2_rgb_dst_tmp = (UWORD16 *)pu1_y_dst; |
776 | 147k | pu2_rgb_dst_tmp += cur_row * ps_codec->i4_disp_strd; |
777 | 147k | pu4_rgb_dst_tmp = (UWORD32 *)pu1_y_dst; |
778 | 147k | pu4_rgb_dst_tmp += cur_row * ps_codec->i4_disp_strd; |
779 | 147k | pu1_y_dst_tmp = pu1_y_dst + cur_row * ps_codec->i4_disp_strd; |
780 | 147k | pu1_uv_dst_tmp = pu1_u_dst + (cur_row / 2) * ps_codec->i4_disp_strd; |
781 | 147k | pu1_u_dst_tmp = pu1_u_dst + (cur_row / 2) * ps_codec->i4_disp_strd / 2; |
782 | 147k | pu1_v_dst_tmp = pu1_v_dst + (cur_row / 2) * ps_codec->i4_disp_strd / 2; |
783 | | |
784 | | /* In case of multi threaded implementation, format conversion might be called |
785 | | * before reconstruction is completed. If the frame being converted/copied |
786 | | * is same as the frame being reconstructed, |
787 | | * Check how many rows can be format converted |
788 | | * Convert those many rows and then check for remaining rows and so on |
789 | | */ |
790 | | |
791 | 147k | if((0 == ps_codec->i4_flush_mode) && (ps_codec->i4_disp_buf_id == ps_proc->i4_cur_pic_buf_id) && (1 < ps_codec->i4_num_cores)) |
792 | 125k | { |
793 | 125k | WORD32 idx; |
794 | 125k | UWORD8 *pu1_buf; |
795 | 125k | WORD32 status; |
796 | 125k | WORD32 last_row = cur_row + num_rows; |
797 | 125k | WORD32 last_ctb_y; |
798 | 125k | UWORD32 ctb_in_row; |
799 | | |
800 | 2.65M | while(1) |
801 | 2.65M | { |
802 | 2.65M | last_row = cur_row + MAX(num_rows, (1 << ps_sps->i1_log2_ctb_size)) + |
803 | 2.65M | ps_sps->i2_pic_crop_top_offset * crop_unit_y; |
804 | 2.65M | last_ctb_y = (last_row >> ps_sps->i1_log2_ctb_size) - 1; |
805 | | /* Since deblocking works with a shift of -4, -4 ,wait till next CTB row is processed */ |
806 | 2.65M | last_ctb_y++; |
807 | | /* In case of a conformance window, an extra wait of one row might be needed */ |
808 | 2.65M | last_ctb_y++; |
809 | 2.65M | last_ctb_y = MIN(last_ctb_y, (ps_sps->i2_pic_ht_in_ctb - 1)); |
810 | | |
811 | 2.65M | idx = (last_ctb_y * ps_sps->i2_pic_wd_in_ctb); |
812 | | |
813 | | /*Check if the row below is completely processed before proceeding with format conversion*/ |
814 | 2.65M | status = 1; |
815 | 162M | for(ctb_in_row = 0; (WORD32)ctb_in_row < ps_sps->i2_pic_wd_in_ctb; ctb_in_row++) |
816 | 159M | { |
817 | 159M | pu1_buf = (ps_codec->pu1_proc_map + idx + ctb_in_row); |
818 | 159M | status &= *pu1_buf; |
819 | 159M | } |
820 | | |
821 | 2.65M | if(status) |
822 | 124k | { |
823 | 124k | break; |
824 | 124k | } |
825 | 2.53M | else |
826 | 2.53M | { |
827 | 2.53M | ithread_yield(); |
828 | 2.53M | } |
829 | 2.65M | } |
830 | 125k | } |
831 | | |
832 | | |
833 | 147k | if((IV_YUV_420SP_UV == ps_codec->e_chroma_fmt) || (IV_YUV_420SP_VU == ps_codec->e_chroma_fmt)) |
834 | 81.5k | { |
835 | 81.5k | ihevcd_fmt_conv_420sp_to_420sp_ft *fmt_conv_fptr; |
836 | 81.5k | if(ps_codec->i4_disp_wd >= MIN_FMT_CONV_SIMD_WIDTH) |
837 | 81.2k | { |
838 | 81.2k | fmt_conv_fptr = ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420sp_fptr; |
839 | 81.2k | } |
840 | 331 | else |
841 | 331 | { |
842 | 331 | fmt_conv_fptr = ihevcd_fmt_conv_420sp_to_420sp; |
843 | 331 | } |
844 | 81.5k | fmt_conv_fptr(pu1_y_src, pu1_uv_src, |
845 | 81.5k | pu1_y_dst_tmp, pu1_uv_dst_tmp, |
846 | 81.5k | ps_codec->i4_disp_wd, |
847 | 81.5k | num_rows, |
848 | 81.5k | ps_codec->i4_strd, |
849 | 81.5k | ps_codec->i4_strd, |
850 | 81.5k | ps_codec->i4_disp_strd, |
851 | 81.5k | ps_codec->i4_disp_strd); |
852 | 81.5k | } |
853 | 65.6k | else if(IV_YUV_420P == ps_codec->e_chroma_fmt) |
854 | 25.5k | { |
855 | 25.5k | ihevcd_fmt_conv_420sp_to_420p_ft *fmt_conv_fptr; |
856 | 25.5k | if(ps_codec->i4_disp_wd >= MIN_FMT_CONV_SIMD_WIDTH) |
857 | 25.0k | { |
858 | 25.0k | fmt_conv_fptr = ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420p_fptr; |
859 | 25.0k | } |
860 | 434 | else |
861 | 434 | { |
862 | 434 | fmt_conv_fptr = ihevcd_fmt_conv_420sp_to_420p; |
863 | 434 | } |
864 | | |
865 | 25.5k | if(0 == disable_luma_copy) |
866 | 25.5k | { |
867 | | // copy luma |
868 | 25.5k | WORD32 i; |
869 | 25.5k | WORD32 num_cols = ps_codec->i4_disp_wd; |
870 | | |
871 | 4.00M | for(i = 0; i < num_rows; i++) |
872 | 3.98M | { |
873 | 3.98M | memcpy(pu1_y_dst_tmp, pu1_y_src, num_cols); |
874 | 3.98M | pu1_y_dst_tmp += ps_codec->i4_disp_strd; |
875 | 3.98M | pu1_y_src += ps_codec->i4_strd; |
876 | 3.98M | } |
877 | | |
878 | 25.5k | disable_luma_copy = 1; |
879 | 25.5k | } |
880 | 25.5k | fmt_conv_fptr(pu1_y_src, pu1_uv_src, |
881 | 25.5k | pu1_y_dst_tmp, pu1_u_dst_tmp, pu1_v_dst_tmp, |
882 | 25.5k | ps_codec->i4_disp_wd, |
883 | 25.5k | num_rows, |
884 | 25.5k | ps_codec->i4_strd, |
885 | 25.5k | ps_codec->i4_strd, |
886 | 25.5k | ps_codec->i4_disp_strd, |
887 | 25.5k | (ps_codec->i4_disp_strd / 2), |
888 | 25.5k | is_u_first, |
889 | 25.5k | disable_luma_copy); |
890 | 25.5k | } |
891 | 40.1k | else if(IV_RGB_565 == ps_codec->e_chroma_fmt) |
892 | 2.78k | { |
893 | 2.78k | ihevcd_fmt_conv_420sp_to_rgb565_ft *fmt_conv_fptr; |
894 | 2.78k | if(ps_codec->i4_disp_wd >= MIN_FMT_CONV_SIMD_WIDTH) |
895 | 2.78k | { |
896 | 2.78k | fmt_conv_fptr = ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgb565_fptr; |
897 | 2.78k | } |
898 | 0 | else |
899 | 0 | { |
900 | 0 | fmt_conv_fptr = ihevcd_fmt_conv_420sp_to_rgb565; |
901 | 0 | } |
902 | | |
903 | 2.78k | fmt_conv_fptr(pu1_y_src, pu1_uv_src, |
904 | 2.78k | pu2_rgb_dst_tmp, |
905 | 2.78k | ps_codec->i4_disp_wd, |
906 | 2.78k | num_rows, |
907 | 2.78k | ps_codec->i4_strd, |
908 | 2.78k | ps_codec->i4_strd, |
909 | 2.78k | ps_codec->i4_disp_strd, |
910 | 2.78k | is_u_first); |
911 | 2.78k | } |
912 | 37.3k | else if(IV_RGBA_8888 == ps_codec->e_chroma_fmt) |
913 | 0 | { |
914 | 0 | ihevcd_fmt_conv_420sp_to_rgba8888_ft *fmt_conv_fptr; |
915 | 0 | if(ps_codec->i4_disp_wd >= MIN_FMT_CONV_SIMD_WIDTH) |
916 | 0 | { |
917 | 0 | fmt_conv_fptr = ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgba8888_fptr; |
918 | 0 | } |
919 | 0 | else |
920 | 0 | { |
921 | 0 | fmt_conv_fptr = ihevcd_fmt_conv_420sp_to_rgba8888; |
922 | 0 | } |
923 | |
|
924 | 0 | ASSERT(is_u_first == 1); |
925 | 0 | fmt_conv_fptr(pu1_y_src, |
926 | 0 | pu1_uv_src, |
927 | 0 | pu4_rgb_dst_tmp, |
928 | 0 | ps_codec->i4_disp_wd, |
929 | 0 | num_rows, |
930 | 0 | ps_codec->i4_strd, |
931 | 0 | ps_codec->i4_strd, |
932 | 0 | ps_codec->i4_disp_strd, |
933 | 0 | is_u_first); |
934 | 0 | } |
935 | | |
936 | | |
937 | | |
938 | 147k | } |
939 | 147k | return (ret); |
940 | 147k | } |
941 | | |