/src/libhevc/encoder/ihevce_stasino_helpers.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2018 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | /** |
21 | | ******************************************************************************* |
22 | | * @file |
23 | | * ihevce_stasino_helpers.c |
24 | | * |
25 | | * @brief |
26 | | * |
27 | | * @author |
28 | | * Ittiam |
29 | | * |
30 | | * @par List of Functions: |
31 | | * |
32 | | * @remarks |
33 | | * None |
34 | | * |
35 | | ******************************************************************************* |
36 | | */ |
37 | | |
38 | | /*****************************************************************************/ |
39 | | /* File Includes */ |
40 | | /*****************************************************************************/ |
41 | | /* System include files */ |
42 | | #include <stdio.h> |
43 | | #include <stdlib.h> |
44 | | #include <assert.h> |
45 | | #include <string.h> |
46 | | |
47 | | /* User include files */ |
48 | | #include "ihevc_typedefs.h" |
49 | | #include "itt_video_api.h" |
50 | | #include "ihevce_api.h" |
51 | | |
52 | | #include "rc_cntrl_param.h" |
53 | | #include "rc_frame_info_collector.h" |
54 | | #include "rc_look_ahead_params.h" |
55 | | |
56 | | #include "ihevc_defs.h" |
57 | | #include "ihevc_structs.h" |
58 | | #include "ihevc_platform_macros.h" |
59 | | #include "ihevc_deblk.h" |
60 | | #include "ihevc_itrans_recon.h" |
61 | | #include "ihevc_chroma_itrans_recon.h" |
62 | | #include "ihevc_chroma_intra_pred.h" |
63 | | #include "ihevc_intra_pred.h" |
64 | | #include "ihevc_inter_pred.h" |
65 | | #include "ihevc_mem_fns.h" |
66 | | #include "ihevc_padding.h" |
67 | | #include "ihevc_weighted_pred.h" |
68 | | #include "ihevc_sao.h" |
69 | | #include "ihevc_resi_trans.h" |
70 | | #include "ihevc_quant_iquant_ssd.h" |
71 | | #include "ihevc_cabac_tables.h" |
72 | | |
73 | | #include "ihevce_defs.h" |
74 | | #include "ihevce_lap_enc_structs.h" |
75 | | #include "ihevce_multi_thrd_structs.h" |
76 | | #include "ihevce_me_common_defs.h" |
77 | | #include "ihevce_had_satd.h" |
78 | | #include "ihevce_error_codes.h" |
79 | | #include "ihevce_bitstream.h" |
80 | | #include "ihevce_cabac.h" |
81 | | #include "ihevce_rdoq_macros.h" |
82 | | #include "ihevce_function_selector.h" |
83 | | #include "ihevce_enc_structs.h" |
84 | | #include "ihevce_entropy_structs.h" |
85 | | #include "ihevce_cmn_utils_instr_set_router.h" |
86 | | #include "ihevce_enc_loop_structs.h" |
87 | | #include "ihevce_stasino_helpers.h" |
88 | | |
89 | | /*****************************************************************************/ |
90 | | /* Function Definitions */ |
91 | | /*****************************************************************************/ |
92 | | |
93 | | /** |
94 | | ******************************************************************************* |
95 | | * |
96 | | * @brief |
97 | | * This function calculates the variance of given data set. |
98 | | * |
99 | | * @par Description: |
100 | | * This function is mainly used to find the variance of the block of pixel values. |
101 | | * The block can be rectangular also. Single pass variance calculation |
102 | | * implementation. |
103 | | * |
104 | | * @param[in] p_input |
105 | | * The input buffer to calculate the variance. |
106 | | * |
107 | | * @param[out] pi4_mean |
108 | | * Pointer ot the mean of the datset |
109 | | * |
110 | | * @param[out] pi4_variance |
111 | | * Pointer tot he variabce of the data set |
112 | | * |
113 | | * @param[in] u1_is_hbd |
114 | | * 1 if the data is in high bit depth |
115 | | * |
116 | | * @param[in] stride |
117 | | * Stride for the input buffer |
118 | | * |
119 | | * @param[in] block_height |
120 | | * height of the pixel block |
121 | | * |
122 | | * @param[in] block_width |
123 | | * width of the pixel block |
124 | | * |
125 | | * @remarks |
126 | | * None |
127 | | * |
128 | | ******************************************************************************* |
129 | | */ |
130 | | void ihevce_calc_variance( |
131 | | void *pv_input, |
132 | | WORD32 i4_stride, |
133 | | WORD32 *pi4_mean, |
134 | | UWORD32 *pu4_variance, |
135 | | UWORD8 u1_block_height, |
136 | | UWORD8 u1_block_width, |
137 | | UWORD8 u1_is_hbd, |
138 | | UWORD8 u1_disable_normalization) |
139 | 0 | { |
140 | 0 | UWORD8 *pui1_buffer; // pointer for 8 bit usecase |
141 | 0 | WORD32 i, j; |
142 | 0 | WORD32 total_elements; |
143 | |
|
144 | 0 | LWORD64 mean; |
145 | 0 | ULWORD64 variance; |
146 | 0 | ULWORD64 sum; |
147 | 0 | ULWORD64 sq_sum; |
148 | | |
149 | | /* intialisation */ |
150 | 0 | total_elements = u1_block_height * u1_block_width; |
151 | 0 | mean = 0; |
152 | 0 | variance = 0; |
153 | 0 | sum = 0; |
154 | 0 | sq_sum = 0; |
155 | | |
156 | | /* handle the case of 8/10 bit depth separately */ |
157 | 0 | if(!u1_is_hbd) |
158 | 0 | { |
159 | 0 | pui1_buffer = (UWORD8 *)pv_input; |
160 | | |
161 | | /* loop over all the values in the block */ |
162 | 0 | for(i = 0; i < u1_block_height; i++) |
163 | 0 | { |
164 | | /* loop over a row in the block */ |
165 | 0 | for(j = 0; j < u1_block_width; j++) |
166 | 0 | { |
167 | 0 | sum += pui1_buffer[i * i4_stride + j]; |
168 | 0 | sq_sum += (pui1_buffer[i * i4_stride + j] * pui1_buffer[i * i4_stride + j]); |
169 | 0 | } |
170 | 0 | } |
171 | |
|
172 | 0 | if(!u1_disable_normalization) |
173 | 0 | { |
174 | 0 | mean = sum / total_elements; |
175 | 0 | variance = |
176 | 0 | ((total_elements * sq_sum) - (sum * sum)) / (total_elements * (total_elements)); |
177 | 0 | } |
178 | 0 | else |
179 | 0 | { |
180 | 0 | mean = sum; |
181 | 0 | variance = ((total_elements * sq_sum) - (sum * sum)); |
182 | 0 | } |
183 | 0 | } |
184 | | |
185 | | /* copy back the values to the output variables */ |
186 | 0 | *pi4_mean = mean; |
187 | 0 | *pu4_variance = variance; |
188 | 0 | } |
189 | | |
190 | | /** |
191 | | ******************************************************************************* |
192 | | * |
193 | | * @brief |
194 | | * This function calcluates the variance of given data set which is WORD16 |
195 | | * |
196 | | * @par Description: |
197 | | * This function is mainly used to find the variance of the block of pixel values. |
198 | | * Single pass variance calculation implementation. |
199 | | * |
200 | | * @param[in] pv_input |
201 | | * The input buffer to calculate the variance. |
202 | | * |
203 | | * |
204 | | * @param[in] stride |
205 | | * Stride for the input buffer |
206 | | * |
207 | | * @param[out] pi4_mean |
208 | | * Pointer ot the mean of the datset |
209 | | * |
210 | | * @param[out] pi4_variance |
211 | | * Pointer tot he variabce of the data set |
212 | | * |
213 | | * @param[in] block_height |
214 | | * height of the pixel block |
215 | | * |
216 | | * @param[in] block_width |
217 | | * width of the pixel block |
218 | | * |
219 | | * |
220 | | * @remarks |
221 | | * None |
222 | | * |
223 | | *******************************************************************************/ |
224 | | void ihevce_calc_variance_signed( |
225 | | WORD16 *pv_input, |
226 | | WORD32 i4_stride, |
227 | | WORD32 *pi4_mean, |
228 | | UWORD32 *pu4_variance, |
229 | | UWORD8 u1_block_height, |
230 | | UWORD8 u1_block_width) |
231 | 0 | { |
232 | 0 | WORD16 *pi2_buffer; // poinbter for 10 bit use case |
233 | |
|
234 | 0 | WORD32 i, j; |
235 | 0 | WORD32 total_elements; |
236 | |
|
237 | 0 | LWORD64 mean; |
238 | 0 | LWORD64 variance; |
239 | 0 | LWORD64 sum; |
240 | 0 | LWORD64 sq_sum; |
241 | | |
242 | | /* intialisation */ |
243 | 0 | total_elements = u1_block_height * u1_block_width; |
244 | 0 | mean = 0; |
245 | 0 | variance = 0; |
246 | 0 | sum = 0; |
247 | 0 | sq_sum = 0; |
248 | |
|
249 | 0 | pi2_buffer = pv_input; |
250 | |
|
251 | 0 | for(i = 0; i < u1_block_height; i++) |
252 | 0 | { |
253 | 0 | for(j = 0; j < u1_block_width; j++) |
254 | 0 | { |
255 | 0 | sum += pi2_buffer[i * i4_stride + j]; |
256 | 0 | sq_sum += (pi2_buffer[i * i4_stride + j] * pi2_buffer[i * i4_stride + j]); |
257 | 0 | } |
258 | 0 | } |
259 | |
|
260 | 0 | mean = sum; /// total_elements; |
261 | 0 | variance = ((total_elements * sq_sum) - (sum * sum)); // / (total_elements * (total_elements) ) |
262 | | |
263 | | /* copy back the values to the output variables */ |
264 | 0 | *pi4_mean = mean; |
265 | 0 | *pu4_variance = variance; |
266 | 0 | } |
267 | | |
268 | | /** |
269 | | ******************************************************************************* |
270 | | * |
271 | | * @brief |
272 | | * This function calculates the variance of a chrominance plane for 420SP data |
273 | | * |
274 | | * @par Description: |
275 | | * This function is mainly used to find the variance of the block of pixel values. |
276 | | * The block can be rectangular also. Single pass variance calculation |
277 | | * implementation. |
278 | | * |
279 | | * @param[in] p_input |
280 | | * The input buffer to calculate the variance. |
281 | | * |
282 | | * @param[in] stride |
283 | | * Stride for the input buffer |
284 | | * |
285 | | * @param[out] pi4_mean |
286 | | * Pointer ot the mean of the datset |
287 | | * |
288 | | * @param[out] pi4_variance |
289 | | * Pointer tot he variabce of the data set |
290 | | * |
291 | | * @param[in] block_height |
292 | | * height of the pixel block |
293 | | * |
294 | | * @param[in] block_width |
295 | | * width of the pixel block |
296 | | * |
297 | | * @param[in] u1_is_hbd |
298 | | * 1 if the data is in high bit depth |
299 | | * |
300 | | * @param[in] e_chroma_plane |
301 | | * is U or V |
302 | | * |
303 | | * @remarks |
304 | | * None |
305 | | * |
306 | | ******************************************************************************* |
307 | | */ |
308 | | void ihevce_calc_chroma_variance( |
309 | | void *pv_input, |
310 | | WORD32 i4_stride, |
311 | | WORD32 *pi4_mean, |
312 | | UWORD32 *pu4_variance, |
313 | | UWORD8 u1_block_height, |
314 | | UWORD8 u1_block_width, |
315 | | UWORD8 u1_is_hbd, |
316 | | CHROMA_PLANE_ID_T e_chroma_plane) |
317 | 0 | { |
318 | 0 | UWORD8 *pui1_buffer; // pointer for 8 bit usecase |
319 | 0 | WORD32 i, j; |
320 | 0 | WORD32 total_elements; |
321 | |
|
322 | 0 | LWORD64 mean; |
323 | 0 | ULWORD64 variance; |
324 | 0 | LWORD64 sum; |
325 | 0 | LWORD64 sq_sum; |
326 | | |
327 | | /* intialisation */ |
328 | 0 | total_elements = u1_block_height * u1_block_width; |
329 | 0 | mean = 0; |
330 | 0 | variance = 0; |
331 | 0 | sum = 0; |
332 | 0 | sq_sum = 0; |
333 | | |
334 | | /* handle the case of 8/10 bit depth separately */ |
335 | 0 | if(!u1_is_hbd) |
336 | 0 | { |
337 | 0 | pui1_buffer = (UWORD8 *)pv_input; |
338 | |
|
339 | 0 | pui1_buffer += e_chroma_plane; |
340 | | |
341 | | /* loop over all the values in the block */ |
342 | 0 | for(i = 0; i < u1_block_height; i++) |
343 | 0 | { |
344 | | /* loop over a row in the block */ |
345 | 0 | for(j = 0; j < u1_block_width; j++) |
346 | 0 | { |
347 | 0 | sum += pui1_buffer[i * i4_stride + j * 2]; |
348 | 0 | sq_sum += (pui1_buffer[i * i4_stride + j * 2] * pui1_buffer[i * i4_stride + j * 2]); |
349 | 0 | } |
350 | 0 | } |
351 | |
|
352 | 0 | mean = sum / total_elements; |
353 | 0 | variance = ((total_elements * sq_sum) - (sum * sum)) / (total_elements * (total_elements)); |
354 | 0 | } |
355 | | |
356 | | /* copy back the values to the output variables */ |
357 | 0 | *pi4_mean = mean; |
358 | 0 | *pu4_variance = variance; |
359 | 0 | } |
360 | | |
361 | | LWORD64 ihevce_inject_stim_into_distortion( |
362 | | void *pv_src, |
363 | | WORD32 i4_src_stride, |
364 | | void *pv_pred, |
365 | | WORD32 i4_pred_stride, |
366 | | LWORD64 i8_distortion, |
367 | | WORD32 i4_alpha_stim_multiplier, |
368 | | UWORD8 u1_blk_size, |
369 | | UWORD8 u1_is_hbd, |
370 | | UWORD8 u1_enable_psyRDOPT, |
371 | | CHROMA_PLANE_ID_T e_chroma_plane) |
372 | 0 | { |
373 | 0 | if(!u1_enable_psyRDOPT) |
374 | 0 | { |
375 | 0 | UWORD32 u4_src_variance; |
376 | 0 | UWORD32 u4_pred_variance; |
377 | 0 | WORD32 i4_mean; |
378 | 0 | WORD32 i4_noise_term; |
379 | |
|
380 | 0 | if(NULL_PLANE == e_chroma_plane) |
381 | 0 | { |
382 | 0 | ihevce_calc_variance( |
383 | 0 | pv_src, |
384 | 0 | i4_src_stride, |
385 | 0 | &i4_mean, |
386 | 0 | &u4_src_variance, |
387 | 0 | u1_blk_size, |
388 | 0 | u1_blk_size, |
389 | 0 | u1_is_hbd, |
390 | 0 | 0); |
391 | |
|
392 | 0 | ihevce_calc_variance( |
393 | 0 | pv_pred, |
394 | 0 | i4_pred_stride, |
395 | 0 | &i4_mean, |
396 | 0 | &u4_pred_variance, |
397 | 0 | u1_blk_size, |
398 | 0 | u1_blk_size, |
399 | 0 | u1_is_hbd, |
400 | 0 | 0); |
401 | 0 | } |
402 | 0 | else |
403 | 0 | { |
404 | 0 | ihevce_calc_chroma_variance( |
405 | 0 | pv_src, |
406 | 0 | i4_src_stride, |
407 | 0 | &i4_mean, |
408 | 0 | &u4_src_variance, |
409 | 0 | u1_blk_size, |
410 | 0 | u1_blk_size, |
411 | 0 | u1_is_hbd, |
412 | 0 | e_chroma_plane); |
413 | |
|
414 | 0 | ihevce_calc_chroma_variance( |
415 | 0 | pv_pred, |
416 | 0 | i4_pred_stride, |
417 | 0 | &i4_mean, |
418 | 0 | &u4_pred_variance, |
419 | 0 | u1_blk_size, |
420 | 0 | u1_blk_size, |
421 | 0 | u1_is_hbd, |
422 | 0 | e_chroma_plane); |
423 | 0 | } |
424 | |
|
425 | 0 | i4_noise_term = |
426 | 0 | ihevce_compute_noise_term(i4_alpha_stim_multiplier, u4_src_variance, u4_pred_variance); |
427 | |
|
428 | 0 | MULTIPLY_STIM_WITH_DISTORTION(i8_distortion, i4_noise_term, STIM_Q_FORMAT, ALPHA_Q_FORMAT); |
429 | |
|
430 | 0 | return i8_distortion; |
431 | 0 | } |
432 | 0 | else |
433 | 0 | { |
434 | 0 | return i8_distortion; |
435 | 0 | } |
436 | 0 | } |
437 | | |
438 | | UWORD8 ihevce_determine_cu_noise_based_on_8x8Blk_data( |
439 | | UWORD8 *pu1_is_8x8Blk_noisy, UWORD8 u1_cu_x_pos, UWORD8 u1_cu_y_pos, UWORD8 u1_cu_size) |
440 | 0 | { |
441 | 0 | UWORD8 u1_num_noisy_children = 0; |
442 | 0 | UWORD8 u1_start_index = (u1_cu_x_pos / 8) + u1_cu_y_pos; |
443 | |
|
444 | 0 | if(8 == u1_cu_size) |
445 | 0 | { |
446 | 0 | return pu1_is_8x8Blk_noisy[u1_start_index]; |
447 | 0 | } |
448 | | |
449 | 0 | u1_num_noisy_children += ihevce_determine_cu_noise_based_on_8x8Blk_data( |
450 | 0 | pu1_is_8x8Blk_noisy, u1_cu_x_pos, u1_cu_y_pos, u1_cu_size / 2); |
451 | |
|
452 | 0 | u1_num_noisy_children += ihevce_determine_cu_noise_based_on_8x8Blk_data( |
453 | 0 | pu1_is_8x8Blk_noisy, u1_cu_x_pos + (u1_cu_size / 2), u1_cu_y_pos, u1_cu_size / 2); |
454 | |
|
455 | 0 | u1_num_noisy_children += ihevce_determine_cu_noise_based_on_8x8Blk_data( |
456 | 0 | pu1_is_8x8Blk_noisy, u1_cu_x_pos, u1_cu_y_pos + (u1_cu_size / 2), u1_cu_size / 2); |
457 | |
|
458 | 0 | u1_num_noisy_children += ihevce_determine_cu_noise_based_on_8x8Blk_data( |
459 | 0 | pu1_is_8x8Blk_noisy, |
460 | 0 | u1_cu_x_pos + (u1_cu_size / 2), |
461 | 0 | u1_cu_y_pos + (u1_cu_size / 2), |
462 | 0 | u1_cu_size / 2); |
463 | |
|
464 | 0 | return (u1_num_noisy_children >= 2); |
465 | 0 | } |
466 | | |
467 | | /*! |
468 | | ****************************************************************************** |
469 | | * \if Function name : ihevce_psy_rd_cost_croma \endif |
470 | | * |
471 | | * \brief |
472 | | * Calculates the psyco visual cost for RD opt. This is |
473 | | * |
474 | | * \param[in] pui4_source_satd |
475 | | * This is the pointer to the array of 8x8 satd of the corresponding source CTB. This is pre calculated. |
476 | | * \param[in] *pui1_recon |
477 | | * This si the pointer to the pred data. |
478 | | * \param[in] recon_stride |
479 | | * This si the pred stride |
480 | | * \param[in] pic_type |
481 | | * Picture type. |
482 | | * \param[in] layer_id |
483 | | * Indicates the temporal layer. |
484 | | * \param[in] lambda |
485 | | * This is the weighting factor for the cost. |
486 | | * \param[in] is_hbd |
487 | | * This is the high bit depth flag which indicates if the bit depth of the pixels is 10 bit or 8 bit. |
488 | | * \param[in] sub_sampling_type |
489 | | * This is the chroma subsampling type. 11 - for 420 and 13 for 422 |
490 | | * \return |
491 | | * the cost for the psyRDopt |
492 | | * |
493 | | * \author |
494 | | * Ittiam |
495 | | * |
496 | | ***************************************************************************** |
497 | | */ |
498 | | LWORD64 ihevce_psy_rd_cost_croma( |
499 | | LWORD64 *pui4_source_satd, |
500 | | void *p_recon, |
501 | | WORD32 recon_stride_vert, |
502 | | WORD32 recond_stride_horz, |
503 | | WORD32 cu_size_luma, |
504 | | WORD32 pic_type, |
505 | | WORD32 layer_id, |
506 | | WORD32 lambda, |
507 | | WORD32 start_index, |
508 | | WORD32 is_hbd, |
509 | | WORD32 sub_sampling_type, |
510 | | ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list) |
511 | 0 | { |
512 | | /* declare local variables to store the SATD values for the pred for the current block. */ |
513 | 0 | LWORD64 psy_rd_cost; |
514 | 0 | UWORD32 lambda_mod; |
515 | 0 | WORD32 psy_factor; |
516 | | |
517 | | /* declare local variables */ |
518 | 0 | WORD32 i; |
519 | 0 | WORD32 cu_total_size; |
520 | 0 | WORD32 num_comp_had_blocks; |
521 | |
|
522 | 0 | UWORD8 *pu1_l0_block; |
523 | 0 | UWORD8 *pu1_l0_block_prev; |
524 | 0 | UWORD8 *pu1_recon; |
525 | 0 | WORD32 ht_offset; |
526 | 0 | WORD32 wd_offset; |
527 | 0 | WORD32 cu_ht; |
528 | 0 | WORD32 cu_wd; |
529 | |
|
530 | 0 | WORD32 num_horz_blocks; |
531 | |
|
532 | 0 | WORD16 pi2_residue_had[64]; |
533 | | /* this is used as a buffer with all values equal to 0. This is emulate the case with |
534 | | pred being zero in HAD fucntion */ |
535 | 0 | UWORD8 ai1_zeros_buffer[64]; |
536 | |
|
537 | 0 | WORD32 had_block_size; |
538 | 0 | LWORD64 source_satd; // to hold source for current 8x8 block |
539 | 0 | LWORD64 recon_satd; // holds the current recon 8x8 satd |
540 | |
|
541 | 0 | WORD32 index_for_src_satd; |
542 | |
|
543 | 0 | (void)recond_stride_horz; |
544 | 0 | (void)pic_type; |
545 | 0 | (void)layer_id; |
546 | 0 | if(!is_hbd) |
547 | 0 | { |
548 | 0 | pu1_recon = (UWORD8 *)p_recon; |
549 | 0 | } |
550 | | |
551 | | /**** initialize the variables ****/ |
552 | 0 | had_block_size = 4; |
553 | |
|
554 | 0 | if(sub_sampling_type == 1) // 420 |
555 | 0 | { |
556 | 0 | cu_ht = cu_size_luma / 2; |
557 | 0 | cu_wd = cu_size_luma / 2; |
558 | 0 | } |
559 | 0 | else |
560 | 0 | { |
561 | 0 | cu_ht = cu_size_luma; |
562 | 0 | cu_wd = cu_size_luma / 2; |
563 | 0 | } |
564 | |
|
565 | 0 | num_horz_blocks = 2 * cu_wd / had_block_size; //ctb_width / had_block_size; |
566 | 0 | ht_offset = -had_block_size; |
567 | 0 | wd_offset = 0; //-had_block_size; |
568 | |
|
569 | 0 | cu_total_size = cu_ht * cu_wd; |
570 | 0 | num_comp_had_blocks = 2 * cu_total_size / (had_block_size * had_block_size); |
571 | |
|
572 | 0 | index_for_src_satd = start_index; |
573 | |
|
574 | 0 | for(i = 0; i < 64; i++) |
575 | 0 | { |
576 | 0 | ai1_zeros_buffer[i] = 0; |
577 | 0 | } |
578 | |
|
579 | 0 | psy_factor = PSY_STRENGTH_CHROMA; |
580 | 0 | psy_rd_cost = 0; |
581 | 0 | lambda_mod = lambda * psy_factor; |
582 | | |
583 | | /************************************************************/ |
584 | | /* loop over for every 4x4 blocks in the CU for Cb */ |
585 | 0 | for(i = 0; i < num_comp_had_blocks; i++) |
586 | 0 | { |
587 | 0 | if(i % num_horz_blocks == 0) |
588 | 0 | { |
589 | 0 | wd_offset = -had_block_size; |
590 | 0 | ht_offset += had_block_size; |
591 | 0 | } |
592 | 0 | wd_offset += had_block_size; |
593 | | |
594 | | /* source satd for the current 8x8 block */ |
595 | 0 | source_satd = pui4_source_satd[index_for_src_satd]; |
596 | |
|
597 | 0 | if(i % 2 != 0) |
598 | 0 | { |
599 | 0 | if(!is_hbd) |
600 | 0 | { |
601 | 0 | pu1_l0_block = pu1_l0_block_prev + 1; |
602 | 0 | } |
603 | 0 | } |
604 | 0 | else |
605 | 0 | { |
606 | 0 | if(!is_hbd) |
607 | 0 | { |
608 | | /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */ |
609 | 0 | pu1_l0_block = pu1_recon + recon_stride_vert * ht_offset + wd_offset; |
610 | 0 | pu1_l0_block_prev = pu1_l0_block; |
611 | 0 | } |
612 | 0 | } |
613 | |
|
614 | 0 | if(had_block_size == 4) |
615 | 0 | { |
616 | 0 | if(!is_hbd) |
617 | 0 | { |
618 | 0 | recon_satd = ps_cmn_utils_optimised_function_list->pf_chroma_AC_HAD_4x4_8bit( |
619 | 0 | pu1_l0_block, |
620 | 0 | recon_stride_vert, |
621 | 0 | ai1_zeros_buffer, |
622 | 0 | had_block_size, |
623 | 0 | pi2_residue_had, |
624 | 0 | had_block_size); |
625 | 0 | } |
626 | | |
627 | | /* get the additional cost function based on the absolute SATD diff of source and recon. */ |
628 | 0 | psy_rd_cost += (lambda_mod * llabs(source_satd - recon_satd)); |
629 | |
|
630 | 0 | index_for_src_satd++; |
631 | |
|
632 | 0 | if((i % num_horz_blocks) == (num_horz_blocks - 1)) |
633 | 0 | { |
634 | 0 | index_for_src_satd -= num_horz_blocks; |
635 | 0 | index_for_src_satd += |
636 | 0 | (MAX_CU_SIZE / 8); /* Assuming CTB size = 64 and blocksize = 8 */ |
637 | 0 | } |
638 | |
|
639 | 0 | } // if had block size ==4 |
640 | 0 | } // for loop for all 4x4 block in the cu |
641 | |
|
642 | 0 | psy_rd_cost = psy_rd_cost >> (Q_PSY_STRENGTH_CHROMA + LAMBDA_Q_SHIFT); |
643 | | /* reutrn the additional cost for the psy RD opt */ |
644 | 0 | return (psy_rd_cost); |
645 | 0 | } |
646 | | |
647 | | /*! |
648 | | ****************************************************************************** |
649 | | * \if Function name : ihevce_psy_rd_cost \endif |
650 | | * |
651 | | * \brief |
652 | | * Calculates the psyco visual cost for RD opt. This is |
653 | | * |
654 | | * \param[in] pui4_source_satd |
655 | | * This is the pointer to the array of 8x8 satd of the corresponding source CTB. This is pre calculated. |
656 | | * \param[in] *pui1_recon |
657 | | * This si the pointer to the pred data. |
658 | | * \param[in] recon_stride |
659 | | * This si the pred stride |
660 | | * \param[in] pic_type |
661 | | * Picture type. |
662 | | * \param[in] layer_id |
663 | | * Indicates the temporal layer. |
664 | | * \param[in] lambda |
665 | | * This is the weighting factor for the cost. |
666 | | * |
667 | | * \return |
668 | | * the cost for the psyRDopt |
669 | | * |
670 | | * \author |
671 | | * Ittiam |
672 | | * |
673 | | ***************************************************************************** |
674 | | */ |
675 | | LWORD64 ihevce_psy_rd_cost( |
676 | | LWORD64 *pui4_source_satd, |
677 | | void *pv_recon, |
678 | | WORD32 recon_stride_vert, |
679 | | WORD32 recond_stride_horz, |
680 | | WORD32 cu_size, |
681 | | WORD32 pic_type, |
682 | | WORD32 layer_id, |
683 | | WORD32 lambda, |
684 | | WORD32 start_index, |
685 | | WORD32 is_hbd, |
686 | | UWORD32 u4_psy_strength, |
687 | | ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list) |
688 | 0 | { |
689 | | /* declare local variables to store the SATD values for the pred for the current block. */ |
690 | 0 | LWORD64 psy_rd_cost; // TODO : check if overflow is there. |
691 | 0 | UWORD32 lambda_mod; |
692 | 0 | WORD32 psy_factor; |
693 | | |
694 | | /* declare local variables */ |
695 | 0 | WORD32 i; |
696 | 0 | WORD32 cu_total_size; |
697 | 0 | WORD32 num_comp_had_blocks; |
698 | |
|
699 | 0 | UWORD8 *pu1_l0_block; |
700 | 0 | UWORD8 *pu1_recon; |
701 | |
|
702 | 0 | WORD32 ht_offset; |
703 | 0 | WORD32 wd_offset; |
704 | 0 | WORD32 cu_ht; |
705 | 0 | WORD32 cu_wd; |
706 | |
|
707 | 0 | WORD32 num_horz_blocks; |
708 | | |
709 | | //WORD16 pi2_residue_had[64]; |
710 | 0 | WORD16 pi2_residue_had_zscan[64]; |
711 | | //WORD16 pi2_residue[64]; |
712 | | /* this is used as a buffer with all values equal to 0. This is emulate the case with |
713 | | pred being zero in HAD fucntion */ |
714 | 0 | UWORD8 ai1_zeros_buffer[64]; |
715 | |
|
716 | 0 | WORD32 had_block_size; |
717 | 0 | LWORD64 source_satd; // to hold source for current 8x8 block |
718 | 0 | LWORD64 recon_satd; // holds the current recon 8x8 satd |
719 | |
|
720 | 0 | WORD32 index_for_src_satd; |
721 | |
|
722 | 0 | (void)recond_stride_horz; |
723 | 0 | (void)pic_type; |
724 | 0 | (void)layer_id; |
725 | | /***** initialize the variables ****/ |
726 | 0 | had_block_size = 8; |
727 | 0 | cu_ht = cu_size; |
728 | 0 | cu_wd = cu_size; |
729 | |
|
730 | 0 | num_horz_blocks = cu_wd / had_block_size; //ctb_width / had_block_size; |
731 | |
|
732 | 0 | ht_offset = -had_block_size; |
733 | 0 | wd_offset = 0 - had_block_size; |
734 | |
|
735 | 0 | cu_total_size = cu_ht * cu_wd; |
736 | 0 | num_comp_had_blocks = cu_total_size / (had_block_size * had_block_size); |
737 | |
|
738 | 0 | index_for_src_satd = start_index; |
739 | |
|
740 | 0 | for(i = 0; i < 64; i++) |
741 | 0 | { |
742 | 0 | ai1_zeros_buffer[i] = 0; |
743 | 0 | } |
744 | 0 | psy_factor = u4_psy_strength; //PSY_STRENGTH; |
745 | 0 | psy_rd_cost = 0; |
746 | 0 | lambda_mod = lambda * psy_factor; |
747 | |
|
748 | 0 | if(!is_hbd) |
749 | 0 | { |
750 | 0 | pu1_recon = (UWORD8 *)pv_recon; |
751 | 0 | } |
752 | | |
753 | | /**************************************************************/ |
754 | | /* loop over for every 8x8 blocks in the CU */ |
755 | 0 | for(i = 0; i < num_comp_had_blocks; i++) |
756 | 0 | { |
757 | 0 | if(i % num_horz_blocks == 0) |
758 | 0 | { |
759 | 0 | wd_offset = -had_block_size; |
760 | 0 | ht_offset += had_block_size; |
761 | 0 | } |
762 | 0 | wd_offset += had_block_size; |
763 | | |
764 | | /* source satd for the current 8x8 block */ |
765 | 0 | source_satd = pui4_source_satd[index_for_src_satd]; |
766 | |
|
767 | 0 | if(had_block_size == 8) |
768 | 0 | { |
769 | | //WORD32 index; |
770 | | //WORD32 u4_satd; |
771 | | //WORD32 dst_strd = 8; |
772 | | //WORD32 i4_frm_qstep = 0; |
773 | | //WORD32 early_cbf; |
774 | 0 | if(!is_hbd) |
775 | 0 | { |
776 | | /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */ |
777 | 0 | pu1_l0_block = pu1_recon + recon_stride_vert * ht_offset + wd_offset; |
778 | |
|
779 | 0 | recon_satd = ps_cmn_utils_optimised_function_list->pf_AC_HAD_8x8_8bit( |
780 | 0 | pu1_l0_block, |
781 | 0 | recon_stride_vert, |
782 | 0 | ai1_zeros_buffer, |
783 | 0 | had_block_size, |
784 | 0 | pi2_residue_had_zscan, |
785 | 0 | had_block_size); |
786 | 0 | } |
787 | | |
788 | | /* get the additional cost function based on the absolute SATD diff of source and recon. */ |
789 | 0 | psy_rd_cost += (lambda_mod * llabs(source_satd - recon_satd)); |
790 | |
|
791 | 0 | index_for_src_satd++; |
792 | 0 | if((i % num_horz_blocks) == (num_horz_blocks - 1)) |
793 | 0 | { |
794 | 0 | index_for_src_satd -= num_horz_blocks; |
795 | 0 | index_for_src_satd += |
796 | 0 | (MAX_CU_SIZE / 8); /* Assuming CTB size = 64 and blocksize = 8 */ |
797 | 0 | } |
798 | 0 | } // if |
799 | 0 | } // for loop |
800 | 0 | psy_rd_cost = psy_rd_cost >> (Q_PSY_STRENGTH + LAMBDA_Q_SHIFT); |
801 | | |
802 | | /* reutrn the additional cost for the psy RD opt */ |
803 | 0 | return (psy_rd_cost); |
804 | 0 | } |
805 | | |
806 | | unsigned long ihevce_calc_stim_injected_variance( |
807 | | ULWORD64 *pu8_sigmaX, |
808 | | ULWORD64 *pu8_sigmaXSquared, |
809 | | ULWORD64 *u8_var, |
810 | | WORD32 i4_inv_wpred_wt, |
811 | | WORD32 i4_inv_wt_shift_val, |
812 | | WORD32 i4_wpred_log_wdc, |
813 | | WORD32 i4_part_id) |
814 | 0 | { |
815 | 0 | ULWORD64 u8_X_Square, u8_temp_var; |
816 | 0 | WORD32 i4_bits_req; |
817 | |
|
818 | 0 | const WORD32 i4_default_src_wt = ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT; |
819 | |
|
820 | 0 | u8_X_Square = (pu8_sigmaX[i4_part_id] * pu8_sigmaX[i4_part_id]); |
821 | 0 | u8_temp_var = pu8_sigmaXSquared[i4_part_id] - u8_X_Square; |
822 | |
|
823 | 0 | if(i4_inv_wpred_wt != i4_default_src_wt) |
824 | 0 | { |
825 | 0 | i4_inv_wpred_wt = i4_inv_wpred_wt >> i4_inv_wt_shift_val; |
826 | |
|
827 | 0 | u8_temp_var = SHR_NEG( |
828 | 0 | (u8_temp_var * i4_inv_wpred_wt * i4_inv_wpred_wt), |
829 | 0 | (30 - (2 * i4_inv_wt_shift_val) - i4_wpred_log_wdc * 2)); |
830 | 0 | } |
831 | |
|
832 | 0 | GETRANGE64(i4_bits_req, u8_temp_var); |
833 | |
|
834 | 0 | if(i4_bits_req > 27) |
835 | 0 | { |
836 | 0 | *u8_var = u8_temp_var >> (i4_bits_req - 27); |
837 | 0 | return (i4_bits_req - 27); |
838 | 0 | } |
839 | 0 | else |
840 | 0 | { |
841 | 0 | *u8_var = u8_temp_var; |
842 | 0 | return 0; |
843 | 0 | } |
844 | 0 | } |
845 | | |
846 | | unsigned long ihevce_calc_variance_for_diff_weights( |
847 | | ULWORD64 *pu8_sigmaX, |
848 | | ULWORD64 *pu8_sigmaXSquared, |
849 | | ULWORD64 *u8_var, |
850 | | WORD32 *pi4_inv_wt, |
851 | | WORD32 *pi4_inv_wt_shift_val, |
852 | | pu_result_t *ps_result, |
853 | | WORD32 i4_wpred_log_wdc, |
854 | | PART_ID_T *pe_part_id, |
855 | | UWORD8 u1_cu_size, |
856 | | UWORD8 u1_num_parts, |
857 | | UWORD8 u1_is_for_src) |
858 | 0 | { |
859 | 0 | WORD32 i4_k; |
860 | 0 | UWORD32 u4_wd, u4_ht; |
861 | 0 | UWORD8 u1_num_base_blks; |
862 | 0 | UWORD32 u4_num_pixels_in_part; |
863 | 0 | UWORD8 u1_index; |
864 | 0 | WORD32 i4_bits_req; |
865 | |
|
866 | 0 | UWORD8 u1_base_blk_size = 4; |
867 | 0 | UWORD32 u4_tot_num_pixels = u1_cu_size * u1_cu_size; |
868 | 0 | ULWORD64 u8_temp_sigmaX[MAX_NUM_INTER_PARTS] = { 0, 0 }; |
869 | 0 | ULWORD64 u8_temp_sigmaXsquared[MAX_NUM_INTER_PARTS] = { 0, 0 }; |
870 | 0 | ULWORD64 u8_z; |
871 | |
|
872 | 0 | const WORD32 i4_default_src_wt = ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT; |
873 | |
|
874 | 0 | for(i4_k = 0; i4_k < u1_num_parts; i4_k++) |
875 | 0 | { |
876 | 0 | u4_wd = ps_result[i4_k].pu.b4_wd + 1; |
877 | 0 | u4_ht = ps_result[i4_k].pu.b4_ht + 1; |
878 | 0 | u1_num_base_blks = u4_wd * u4_ht; |
879 | 0 | u4_num_pixels_in_part = u1_num_base_blks * u1_base_blk_size * u1_base_blk_size; |
880 | |
|
881 | 0 | if(u1_is_for_src) |
882 | 0 | { |
883 | 0 | u1_index = pe_part_id[i4_k]; |
884 | 0 | } |
885 | 0 | else |
886 | 0 | { |
887 | 0 | u1_index = i4_k; |
888 | 0 | } |
889 | |
|
890 | 0 | u8_temp_sigmaXsquared[i4_k] = pu8_sigmaXSquared[u1_index] / u4_num_pixels_in_part; |
891 | 0 | u8_temp_sigmaX[i4_k] = pu8_sigmaX[u1_index]; |
892 | |
|
893 | 0 | if(u1_is_for_src) |
894 | 0 | { |
895 | 0 | if(pi4_inv_wt[i4_k] != i4_default_src_wt) |
896 | 0 | { |
897 | 0 | pi4_inv_wt[i4_k] = pi4_inv_wt[i4_k] >> pi4_inv_wt_shift_val[i4_k]; |
898 | 0 | u8_temp_sigmaX[i4_k] = SHR_NEG( |
899 | 0 | (u8_temp_sigmaX[i4_k] * pi4_inv_wt[i4_k]), |
900 | 0 | (15 - pi4_inv_wt_shift_val[i4_k] - i4_wpred_log_wdc)); |
901 | 0 | u8_temp_sigmaXsquared[i4_k] = SHR_NEG( |
902 | 0 | (u8_temp_sigmaXsquared[i4_k] * pi4_inv_wt[i4_k] * pi4_inv_wt[i4_k]), |
903 | 0 | (30 - (2 * pi4_inv_wt_shift_val[i4_k]) - i4_wpred_log_wdc * 2)); |
904 | 0 | } |
905 | 0 | } |
906 | 0 | } |
907 | |
|
908 | 0 | u8_z = (u4_tot_num_pixels * (u8_temp_sigmaXsquared[0] + u8_temp_sigmaXsquared[1])) - |
909 | 0 | ((u8_temp_sigmaX[0] + u8_temp_sigmaX[1]) * (u8_temp_sigmaX[0] + u8_temp_sigmaX[1])); |
910 | |
|
911 | 0 | GETRANGE64(i4_bits_req, u8_z); |
912 | |
|
913 | 0 | if(i4_bits_req > 27) |
914 | 0 | { |
915 | 0 | *u8_var = u8_z >> (i4_bits_req - 27); |
916 | 0 | return (i4_bits_req - 27); |
917 | 0 | } |
918 | 0 | else |
919 | 0 | { |
920 | 0 | *u8_var = u8_z; |
921 | 0 | return 0; |
922 | 0 | } |
923 | 0 | } |