/src/libavc/encoder/svc/isvce_intra_modes_eval.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2022 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | |
21 | | /** |
22 | | ******************************************************************************* |
23 | | * @file |
24 | | * isvce_intra_modes_eval.c |
25 | | * |
26 | | * @brief |
27 | | * This file contains definitions of routines that perform rate distortion |
28 | | * analysis on a macroblock if they are to be coded as intra. |
29 | | * |
30 | | * @author |
31 | | * ittiam |
32 | | * |
33 | | * @par List of Functions: |
34 | | * - isvce_derive_neighbor_availability_of_mbs() |
35 | | * - isvce_derive_ngbr_avbl_of_mb_partitions() |
36 | | * - isvce_evaluate_intra16x16_modes_for_least_cost_rdoptoff() |
37 | | * - isvce_evaluate_intra8x8_modes_for_least_cost_rdoptoff() |
38 | | * - isvce_evaluate_intra4x4_modes_for_least_cost_rdoptoff() |
39 | | * - isvce_evaluate_intra4x4_modes_for_least_cost_rdopton() |
40 | | * - isvce_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff() |
41 | | * - isvce_evaluate_intra16x16_modes() |
42 | | * - isvce_evaluate_intra4x4_modes() |
43 | | * - isvce_evaluate_intra_chroma_modes() |
44 | | * |
45 | | * @remarks |
46 | | * None |
47 | | * |
48 | | ******************************************************************************* |
49 | | */ |
50 | | |
51 | | /*****************************************************************************/ |
52 | | /* File Includes */ |
53 | | /*****************************************************************************/ |
54 | | |
55 | | /* System include files */ |
56 | | #include <stdio.h> |
57 | | #include <string.h> |
58 | | #include <limits.h> |
59 | | #include <assert.h> |
60 | | |
61 | | /* User include files */ |
62 | | #include "ih264e_config.h" |
63 | | #include "ih264_typedefs.h" |
64 | | #include "iv2.h" |
65 | | #include "ive2.h" |
66 | | #include "ih264_debug.h" |
67 | | #include "isvc_defs.h" |
68 | | #include "isvc_macros.h" |
69 | | #include "ih264_intra_pred_filters.h" |
70 | | #include "isvc_structs.h" |
71 | | #include "isvc_common_tables.h" |
72 | | #include "isvc_trans_quant_itrans_iquant.h" |
73 | | #include "isvc_inter_pred_filters.h" |
74 | | #include "isvc_mem_fns.h" |
75 | | #include "ih264_padding.h" |
76 | | #include "ih264_size_defs.h" |
77 | | #include "ih264_deblk_edge_filters.h" |
78 | | #include "isvc_cabac_tables.h" |
79 | | #include "isvce_defs.h" |
80 | | #include "ime_distortion_metrics.h" |
81 | | #include "ih264e_error.h" |
82 | | #include "ih264e_bitstream.h" |
83 | | #include "ime_defs.h" |
84 | | #include "ime_structs.h" |
85 | | #include "irc_cntrl_param.h" |
86 | | #include "irc_frame_info_collector.h" |
87 | | #include "isvce_rate_control.h" |
88 | | #include "isvce_cabac_structs.h" |
89 | | #include "isvce_structs.h" |
90 | | #include "ih264e_intra_modes_eval.h" |
91 | | #include "isvce_globals.h" |
92 | | #include "ime_platform_macros.h" |
93 | | |
94 | | /*****************************************************************************/ |
95 | | /* Function Definitions */ |
96 | | /*****************************************************************************/ |
97 | | |
98 | | /** |
99 | | ****************************************************************************** |
100 | | * |
101 | | * @brief |
102 | | * derivation process for subblock/partition availability |
103 | | * |
104 | | * @par Description |
105 | | * Calculates the availability of the left, top, topright and topleft subblock |
106 | | * or partitions. |
107 | | * |
108 | | * @param[in] ps_proc_ctxt |
109 | | * pointer to macroblock context (handle) |
110 | | * |
111 | | * @param[in] i1_pel_pos_x |
112 | | * column position of the pel wrt the current block |
113 | | * |
114 | | * @param[in] i1_pel_pos_y |
115 | | * row position of the pel in wrt current block |
116 | | * |
117 | | * @remarks Assumptions: before calling this function it is assumed that |
118 | | * the neighbor availability of the current macroblock is already derived. |
119 | | * Based on table 6-3 of H264 specification |
120 | | * |
121 | | * @return availability status (yes or no) |
122 | | * |
123 | | ****************************************************************************** |
124 | | */ |
125 | | UWORD8 isvce_derive_ngbr_avbl_of_mb_partitions(block_neighbors_t *ps_ngbr_avbl, WORD8 i1_pel_pos_x, |
126 | | WORD8 i1_pel_pos_y) |
127 | 0 | { |
128 | 0 | UWORD8 u1_neighbor_avail = 0; |
129 | | |
130 | | /**********************************************************************/ |
131 | | /* values of i1_pel_pos_x in the range 0-15 inclusive correspond to */ |
132 | | /* various columns of a macroblock */ |
133 | | /* */ |
134 | | /* values of i1_pel_pos_y in the range 0-15 inclusive correspond to */ |
135 | | /* various rows of a macroblock */ |
136 | | /* */ |
137 | | /* other values of i1_pel_pos_x & i1_pel_pos_y represents elements */ |
138 | | /* outside the bound of an mb ie., represents its neighbors. */ |
139 | | /**********************************************************************/ |
140 | 0 | if(i1_pel_pos_x < 0) |
141 | 0 | { /* column(-1) */ |
142 | 0 | if(i1_pel_pos_y < 0) |
143 | 0 | { /* row(-1) */ |
144 | 0 | u1_neighbor_avail = ps_ngbr_avbl->u1_mb_d; /* current mb topleft availability */ |
145 | 0 | } |
146 | 0 | else if(i1_pel_pos_y >= 0 && i1_pel_pos_y < 16) |
147 | 0 | { /* all rows of a macroblock */ |
148 | 0 | u1_neighbor_avail = ps_ngbr_avbl->u1_mb_a; /* current mb left availability */ |
149 | 0 | } |
150 | 0 | else /* if (i1_pel_pos_y >= 16) */ |
151 | 0 | { /* rows(+16) */ |
152 | 0 | u1_neighbor_avail = 0; /* current mb bottom left availability */ |
153 | 0 | } |
154 | 0 | } |
155 | 0 | else if(i1_pel_pos_x >= 0 && i1_pel_pos_x < 16) |
156 | 0 | { /* all columns of a macroblock */ |
157 | 0 | if(i1_pel_pos_y < 0) |
158 | 0 | { /* row(-1) */ |
159 | 0 | u1_neighbor_avail = ps_ngbr_avbl->u1_mb_b; /* current mb top availability */ |
160 | 0 | } |
161 | 0 | else if(i1_pel_pos_y >= 0 && i1_pel_pos_y < 16) |
162 | 0 | { /* all rows of a macroblock */ |
163 | 0 | u1_neighbor_avail = 1; /* current mb availability */ |
164 | | /* availability of the partition is dependent on the position of the |
165 | | * partition inside the mb */ |
166 | | /* although the availability is declared as 1 in all cases these needs to |
167 | | * be corrected somewhere else and this is not done in here */ |
168 | 0 | } |
169 | 0 | else /* if (i1_pel_pos_y >= 16) */ |
170 | 0 | { /* rows(+16) */ |
171 | 0 | u1_neighbor_avail = 0; /* current mb bottom availability */ |
172 | 0 | } |
173 | 0 | } |
174 | 0 | else if(i1_pel_pos_x >= 16) |
175 | 0 | { /* column(+16) */ |
176 | 0 | if(i1_pel_pos_y < 0) |
177 | 0 | { /* row(-1) */ |
178 | 0 | u1_neighbor_avail = ps_ngbr_avbl->u1_mb_c; /* current mb top right availability */ |
179 | 0 | } |
180 | 0 | else /* if (i1_pel_pos_y >= 0) */ |
181 | 0 | { /* all other rows */ |
182 | 0 | u1_neighbor_avail = 0; /* current mb right & bottom right availability */ |
183 | 0 | } |
184 | 0 | } |
185 | |
|
186 | 0 | return u1_neighbor_avail; |
187 | 0 | } |
188 | | |
189 | | /** |
190 | | ****************************************************************************** |
191 | | * |
192 | | * @brief |
193 | | * evaluate best intra 16x16 mode (rate distortion opt off) |
194 | | * |
195 | | * @par Description |
196 | | * This function evaluates all the possible intra 16x16 modes and finds the mode |
197 | | * that best represents the macro-block (least distortion) and occupies fewer |
198 | | * bits in the bit-stream. |
199 | | * |
200 | | * @param[in] ps_proc_ctxt |
201 | | * pointer to process context (handle) |
202 | | * |
203 | | * @remarks |
204 | | * Ideally the cost of encoding a macroblock is calculated as |
205 | | * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the |
206 | | * input block and the reconstructed block and rate is the number of bits taken |
207 | | * to place the macroblock in the bit-stream. In this routine the rate does not |
208 | | * exactly point to the total number of bits it takes, rather it points to |
209 | | *header bits necessary for encoding the macroblock. Assuming the deltaQP, cbp |
210 | | *bits and residual bits fall in to texture bits the number of bits taken to |
211 | | *encoding mbtype is considered as rate, we compute cost. Further we will |
212 | | *approximate the distortion as the deviation b/w input and the predicted block |
213 | | *as opposed to input and reconstructed block. |
214 | | * |
215 | | * NOTE: As per the Document JVT-O079, for intra 16x16 macroblock, |
216 | | * the SAD and cost are one and the same. |
217 | | * |
218 | | * @return none |
219 | | * |
220 | | ****************************************************************************** |
221 | | */ |
222 | | |
223 | | void isvce_evaluate_intra16x16_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc) |
224 | 9.61M | { |
225 | | /* Codec Context */ |
226 | 9.61M | isvce_codec_t *ps_codec = ps_proc->ps_codec; |
227 | 9.61M | isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns; |
228 | 9.61M | mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns; |
229 | | |
230 | | /* SAD(distortion metric) of an 8x8 block */ |
231 | 9.61M | WORD32 i4_mb_distortion = INT_MAX, i4_mb_distortion_least = INT_MAX; |
232 | | |
233 | | /* lambda */ |
234 | 9.61M | UWORD32 u4_lambda = ps_proc->u4_lambda; |
235 | | |
236 | | /* cost = distortion + lambda*rate */ |
237 | 9.61M | WORD32 i4_mb_cost = INT_MAX, i4_mb_cost_least = INT_MAX; |
238 | | |
239 | | /* intra mode */ |
240 | 9.61M | UWORD32 u4_intra_mode, u4_best_intra_16x16_mode = DC_I16x16; |
241 | | |
242 | | /* neighbor pels for intra prediction */ |
243 | 9.61M | UWORD8 *pu1_ngbr_pels_i16 = ps_proc->au1_ngbr_pels; |
244 | | |
245 | | /* neighbor availability */ |
246 | 9.61M | WORD32 i4_ngbr_avbl; |
247 | | |
248 | | /* pointer to src macro block */ |
249 | 9.61M | UWORD8 *pu1_curr_mb = ps_proc->s_src_buf_props.as_component_bufs[0].pv_data; |
250 | 9.61M | UWORD8 *pu1_ref_mb = ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data; |
251 | | |
252 | | /* pointer to prediction macro block */ |
253 | 9.61M | UWORD8 *pu1_pred_mb_intra_16x16 = ps_proc->pu1_pred_mb_intra_16x16; |
254 | 9.61M | UWORD8 *pu1_pred_mb_intra_16x16_plane = ps_proc->pu1_pred_mb_intra_16x16_plane; |
255 | | |
256 | | /* strides */ |
257 | 9.61M | WORD32 i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[0].i4_data_stride; |
258 | 9.61M | WORD32 i4_pred_strd = ps_proc->i4_pred_strd; |
259 | 9.61M | WORD32 i4_rec_strd = ps_proc->s_rec_buf_props.as_component_bufs[0].i4_data_stride; |
260 | | |
261 | | /* pointer to neighbors left, top, topleft */ |
262 | 9.61M | UWORD8 *pu1_mb_a = pu1_ref_mb - 1; |
263 | 9.61M | UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd; |
264 | 9.61M | UWORD8 *pu1_mb_d = pu1_mb_b - 1; |
265 | 9.61M | UWORD8 u1_mb_a, u1_mb_b, u1_mb_d; |
266 | | /* valid intra modes map */ |
267 | 9.61M | UWORD32 u4_valid_intra_modes; |
268 | | |
269 | | /* lut for valid intra modes */ |
270 | 9.61M | const UWORD8 u1_valid_intra_modes[8] = {4, 6, 4, 6, 5, 7, 5, 15}; |
271 | | |
272 | 9.61M | UWORD32 i, u4_enable_fast_sad = 0, offset = 0; |
273 | 9.61M | isvce_mb_info_t *ps_top_mb_syn_ele = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x; |
274 | 9.61M | UWORD32 u4_constrained_intra_pred = |
275 | 9.61M | ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id]; |
276 | | |
277 | 9.61M | if(ps_proc->i4_slice_type != ISLICE) |
278 | 1.00M | { |
279 | | /* Offset for MBtype */ |
280 | 18.4E | offset = (ps_proc->i4_slice_type == PSLICE) ? 5 : 23; |
281 | 1.00M | u4_enable_fast_sad = ps_proc->s_me_ctxt.u4_enable_fast_sad; |
282 | 1.00M | } |
283 | | |
284 | | /* locating neighbors that are available for prediction */ |
285 | | |
286 | | /* gather prediction pels from the neighbors, if particular set is not |
287 | | * available it is set to zero*/ |
288 | | /* left pels */ |
289 | 9.61M | u1_mb_a = |
290 | 9.61M | ((ps_proc->ps_ngbr_avbl->u1_mb_a) && |
291 | 9.61M | (u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra && |
292 | 1.97M | !ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag) |
293 | 9.31M | : 1)); |
294 | 9.61M | if(u1_mb_a) |
295 | 8.38M | { |
296 | 142M | for(i = 0; i < 16; i++) pu1_ngbr_pels_i16[16 - 1 - i] = pu1_mb_a[i * i4_rec_strd]; |
297 | 8.38M | } |
298 | 1.22M | else |
299 | 1.22M | { |
300 | 1.22M | ps_mem_fxns->pf_mem_set_mul8(pu1_ngbr_pels_i16, 0, MB_SIZE); |
301 | 1.22M | } |
302 | | /* top pels */ |
303 | 9.61M | u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) && |
304 | 9.61M | (u4_constrained_intra_pred |
305 | 9.07M | ? (ps_top_mb_syn_ele->u1_is_intra && !ps_top_mb_syn_ele->u1_base_mode_flag) |
306 | 9.07M | : 1)); |
307 | 9.61M | if(u1_mb_b) |
308 | 8.23M | { |
309 | 8.23M | ps_mem_fxns->pf_mem_cpy_mul8(pu1_ngbr_pels_i16 + 16 + 1, pu1_mb_b, 16); |
310 | 8.23M | } |
311 | 1.38M | else |
312 | 1.38M | { |
313 | 1.38M | ps_mem_fxns->pf_mem_set_mul8(pu1_ngbr_pels_i16 + 16 + 1, 0, MB_SIZE); |
314 | 1.38M | } |
315 | | /* topleft pels */ |
316 | 9.61M | u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) && |
317 | 9.61M | (u4_constrained_intra_pred ? (ps_top_mb_syn_ele[-1].u1_is_intra && |
318 | 1.77M | !ps_top_mb_syn_ele[-1].u1_base_mode_flag) |
319 | 8.79M | : 1)); |
320 | 9.61M | if(u1_mb_d) |
321 | 8.00M | { |
322 | 8.00M | pu1_ngbr_pels_i16[16] = *pu1_mb_d; |
323 | 8.00M | } |
324 | 1.60M | else |
325 | 1.60M | { |
326 | 1.60M | pu1_ngbr_pels_i16[16] = 0; |
327 | 1.60M | } |
328 | | |
329 | 9.61M | i4_ngbr_avbl = (u1_mb_a) + (u1_mb_b << 2) + (u1_mb_d << 1); |
330 | 9.61M | ps_proc->i4_ngbr_avbl_16x16_mb = i4_ngbr_avbl; |
331 | | |
332 | | /* set valid intra modes for evaluation */ |
333 | 9.61M | u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl]; |
334 | | |
335 | 9.61M | if(ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST || |
336 | 9.61M | ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST) |
337 | 1.04M | u4_valid_intra_modes &= ~(1 << PLANE_I16x16); |
338 | | |
339 | | /* evaluate b/w HORZ_I16x16, VERT_I16x16 & DC_I16x16 */ |
340 | 9.61M | ps_codec->pf_ih264e_evaluate_intra16x16_modes( |
341 | 9.61M | pu1_curr_mb, pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16, i4_src_strd, i4_pred_strd, |
342 | 9.61M | i4_ngbr_avbl, &u4_intra_mode, &i4_mb_distortion_least, u4_valid_intra_modes); |
343 | | |
344 | | /* cost = distortion + lambda*rate */ |
345 | 9.61M | i4_mb_cost_least = i4_mb_distortion_least; |
346 | | |
347 | 9.61M | if(((u4_valid_intra_modes >> 3) & 1) != 0) |
348 | 7.11M | { |
349 | | /* intra prediction for PLANE mode*/ |
350 | 7.11M | (ps_codec->apf_intra_pred_16_l)[PLANE_I16x16]( |
351 | 7.11M | pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16_plane, 0, i4_pred_strd, i4_ngbr_avbl); |
352 | | |
353 | | /* evaluate distortion between the actual blk and the estimated blk for the |
354 | | * given mode */ |
355 | 7.11M | ps_codec->apf_compute_sad_16x16[u4_enable_fast_sad]( |
356 | 7.11M | pu1_curr_mb, pu1_pred_mb_intra_16x16_plane, i4_src_strd, i4_pred_strd, i4_mb_cost_least, |
357 | 7.11M | &i4_mb_distortion); |
358 | | |
359 | | /* cost = distortion + lambda*rate */ |
360 | 7.11M | i4_mb_cost = i4_mb_distortion; |
361 | | |
362 | | /* update the least cost information if necessary */ |
363 | 7.11M | if(i4_mb_cost < i4_mb_distortion_least) |
364 | 295k | { |
365 | 295k | u4_intra_mode = PLANE_I16x16; |
366 | | |
367 | 295k | i4_mb_cost_least = i4_mb_cost; |
368 | 295k | i4_mb_distortion_least = i4_mb_distortion; |
369 | 295k | } |
370 | 7.11M | } |
371 | | |
372 | 9.61M | u4_best_intra_16x16_mode = u4_intra_mode; |
373 | | |
374 | 9.61M | DEBUG("%d partition cost, %d intra mode\n", i4_mb_cost_least * 32, u4_best_intra_16x16_mode); |
375 | | |
376 | 9.61M | ps_proc->u1_l_i16_mode = u4_best_intra_16x16_mode; |
377 | | |
378 | | /* cost = distortion + lambda*rate */ |
379 | 9.61M | i4_mb_cost_least = |
380 | 9.61M | i4_mb_distortion_least + u4_lambda * u1_uev_codelength[offset + u4_best_intra_16x16_mode]; |
381 | | |
382 | | /* update the type of the mb if necessary */ |
383 | 9.61M | if(i4_mb_cost_least < ps_proc->i4_mb_cost) |
384 | 5.61M | { |
385 | 5.61M | ps_proc->i4_mb_cost = i4_mb_cost_least; |
386 | 5.61M | ps_proc->i4_mb_distortion = i4_mb_distortion_least; |
387 | 5.61M | ps_proc->ps_mb_info->u2_mb_type = I16x16; |
388 | 5.61M | } |
389 | 9.61M | } |
390 | | |
391 | | /** |
392 | | ****************************************************************************** |
393 | | * |
394 | | * @brief |
395 | | * evaluate best intra 8x8 mode (rate distortion opt on) |
396 | | * |
397 | | * @par Description |
398 | | * This function evaluates all the possible intra 8x8 modes and finds the mode |
399 | | * that best represents the macro-block (least distortion) and occupies fewer |
400 | | * bits in the bit-stream. |
401 | | * |
402 | | * @param[in] ps_proc_ctxt |
403 | | * pointer to proc ctxt |
404 | | * |
405 | | * @remarks Ideally the cost of encoding a macroblock is calculated as |
406 | | * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the |
407 | | * input block and the reconstructed block and rate is the number of bits taken |
408 | | * to place the macroblock in the bit-stream. In this routine the rate does not |
409 | | * exactly point to the total number of bits it takes, rather it points to |
410 | | *header bits necessary for encoding the macroblock. Assuming the deltaQP, cbp |
411 | | *bits and residual bits fall in to texture bits the number of bits taken to |
412 | | *encoding mbtype is considered as rate, we compute cost. Further we will |
413 | | *approximate the distortion as the deviation b/w input and the predicted block |
414 | | *as opposed to input and reconstructed block. |
415 | | * |
416 | | * NOTE: TODO: This function needs to be tested |
417 | | * |
418 | | * @return none |
419 | | * |
420 | | ****************************************************************************** |
421 | | */ |
422 | | void isvce_evaluate_intra8x8_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc) |
423 | 0 | { |
424 | | /* Codec Context */ |
425 | 0 | isvce_codec_t *ps_codec = ps_proc->ps_codec; |
426 | | |
427 | | /* SAD(distortion metric) of an 4x4 block */ |
428 | 0 | WORD32 i4_partition_distortion, i4_partition_distortion_least = INT_MAX, |
429 | 0 | i4_total_distortion = 0; |
430 | | |
431 | | /* lambda */ |
432 | 0 | UWORD32 u4_lambda = ps_proc->u4_lambda; |
433 | | |
434 | | /* cost = distortion + lambda*rate */ |
435 | 0 | WORD32 i4_partition_cost, i4_partition_cost_least, i4_total_cost = u4_lambda; |
436 | | |
437 | | /* cost due to mbtype */ |
438 | 0 | UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda; |
439 | | |
440 | | /* intra mode */ |
441 | 0 | UWORD32 u4_intra_mode, u4_best_intra_8x8_mode = DC_I8x8, u4_estimated_intra_8x8_mode; |
442 | | |
443 | | /* neighbor pels for intra prediction */ |
444 | 0 | UWORD8 *pu1_ngbr_pels_i8 = ps_proc->au1_ngbr_pels; |
445 | | |
446 | | /* pointer to curr partition */ |
447 | 0 | UWORD8 *pu1_mb_curr; |
448 | | |
449 | | /* pointer to prediction macro block */ |
450 | 0 | UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; |
451 | | |
452 | | /* strides */ |
453 | 0 | WORD32 i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[0].i4_data_stride; |
454 | 0 | WORD32 i4_pred_strd = ps_proc->i4_pred_strd; |
455 | | |
456 | | /* neighbors left, top, top right, top left */ |
457 | 0 | UWORD8 *pu1_mb_a; |
458 | 0 | UWORD8 *pu1_mb_b; |
459 | 0 | UWORD8 *pu1_mb_d; |
460 | | |
461 | | /* neighbor availability */ |
462 | 0 | WORD32 i4_ngbr_avbl; |
463 | 0 | block_neighbors_t s_ngbr_avbl; |
464 | | |
465 | | /* temp vars */ |
466 | 0 | UWORD32 b8, u4_pix_x, u4_pix_y; |
467 | 0 | UWORD32 u4_constrained_intra_pred = |
468 | 0 | ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id]; |
469 | 0 | block_neighbors_t s_ngbr_avbl_MB; |
470 | | |
471 | | /* ngbr mb syntax information */ |
472 | 0 | UWORD8 *pu1_top_mb_intra_modes = |
473 | 0 | ps_proc->s_nbr_info.ps_top_mb_intra_modes[ps_proc->i4_mb_x].au1_intra_modes; |
474 | 0 | isvce_mb_info_t *ps_top_mb_syn_ele = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x; |
475 | 0 | isvce_mb_info_t *ps_top_right_mb_syn_ele = ps_top_mb_syn_ele + 1; |
476 | | /* valid intra modes map */ |
477 | 0 | UWORD32 u4_valid_intra_modes; |
478 | |
|
479 | 0 | if(ps_proc->ps_ngbr_avbl->u1_mb_c) |
480 | 0 | { |
481 | 0 | ps_top_right_mb_syn_ele = ps_top_mb_syn_ele + 1; |
482 | 0 | } |
483 | | /* left pels */ |
484 | 0 | s_ngbr_avbl_MB.u1_mb_a = |
485 | 0 | ((ps_proc->ps_ngbr_avbl->u1_mb_a) && |
486 | 0 | (u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra && |
487 | 0 | !ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag) |
488 | 0 | : 1)); |
489 | | |
490 | | /* top pels */ |
491 | 0 | s_ngbr_avbl_MB.u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) && |
492 | 0 | (u4_constrained_intra_pred ? (ps_top_mb_syn_ele->u1_is_intra && |
493 | 0 | !ps_top_mb_syn_ele->u1_base_mode_flag) |
494 | 0 | : 1)); |
495 | | |
496 | | /* topleft pels */ |
497 | 0 | s_ngbr_avbl_MB.u1_mb_d = |
498 | 0 | ((ps_proc->ps_ngbr_avbl->u1_mb_d) && |
499 | 0 | (u4_constrained_intra_pred |
500 | 0 | ? (ps_top_mb_syn_ele[-1].u1_is_intra && !ps_top_mb_syn_ele[-1].u1_base_mode_flag) |
501 | 0 | : 1)); |
502 | | |
503 | | /* top right */ |
504 | 0 | s_ngbr_avbl_MB.u1_mb_c = |
505 | 0 | ((ps_proc->ps_ngbr_avbl->u1_mb_c) && |
506 | 0 | (u4_constrained_intra_pred ? (ps_top_right_mb_syn_ele->u1_is_intra && |
507 | 0 | !ps_top_right_mb_syn_ele->u1_base_mode_flag) |
508 | 0 | : 1)); |
509 | |
|
510 | 0 | for(b8 = 0; b8 < 4; b8++) |
511 | 0 | { |
512 | 0 | u4_pix_x = (b8 & 0x01) << 3; |
513 | 0 | u4_pix_y = (b8 >> 1) << 3; |
514 | |
|
515 | 0 | pu1_mb_curr = ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[0].pv_data) + |
516 | 0 | u4_pix_x + (u4_pix_y * i4_src_strd); |
517 | | /* when rdopt is off, we use the input as reference for constructing |
518 | | * prediction buffer */ |
519 | | /* as opposed to using the recon pels. (open loop intra prediction) */ |
520 | 0 | pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */ |
521 | 0 | pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */ |
522 | 0 | pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */ |
523 | | |
524 | | /* locating neighbors that are available for prediction */ |
525 | | /* TODO : update the neighbor availability information basing on constrained |
526 | | * intra pred information */ |
527 | | /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be |
528 | | * split in to distinct routines */ |
529 | | /* basing on neighbors available and hence evade the computation of neighbor |
530 | | * availability totally. */ |
531 | 0 | s_ngbr_avbl.u1_mb_a = isvce_derive_ngbr_avbl_of_mb_partitions( |
532 | 0 | &s_ngbr_avbl_MB, u4_pix_x - 1, u4_pix_y); /* xD = -1, yD = 0 */ |
533 | 0 | s_ngbr_avbl.u1_mb_b = isvce_derive_ngbr_avbl_of_mb_partitions( |
534 | 0 | &s_ngbr_avbl_MB, u4_pix_x, u4_pix_y - 1); /* xD = 0, yD = -1 */ |
535 | 0 | s_ngbr_avbl.u1_mb_c = isvce_derive_ngbr_avbl_of_mb_partitions( |
536 | 0 | &s_ngbr_avbl_MB, u4_pix_x + 8, u4_pix_y - 1); /* xD = BLK_8x8_SIZE, yD = -1 */ |
537 | 0 | s_ngbr_avbl.u1_mb_d = isvce_derive_ngbr_avbl_of_mb_partitions( |
538 | 0 | &s_ngbr_avbl_MB, u4_pix_x - 1, u4_pix_y - 1); /* xD = -1, yD = -1 */ |
539 | | |
540 | | /* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b * |
541 | | * TOP_MB_AVAILABLE_MASK + blk_c * TOP_RIGHT_MB_AVAILABLE_MASK + blk_d * |
542 | | * TOP_LEFT_MB_AVAILABLE_MASK */ |
543 | 0 | i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + |
544 | 0 | (s_ngbr_avbl.u1_mb_b << 2) + (s_ngbr_avbl.u1_mb_c << 3) + |
545 | 0 | (s_ngbr_avbl.u1_mb_a << 4); |
546 | | /* if top partition is available and top right is not available for intra |
547 | | * prediction, then */ |
548 | | /* padd top right samples using top sample and make top right also available |
549 | | */ |
550 | | /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + |
551 | | * (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | |
552 | | * s_ngbr_avbl.u1_mb_c) << 3); */ |
553 | 0 | ps_proc->ai4_neighbor_avail_8x8_subblks[b8] = i4_ngbr_avbl; |
554 | |
|
555 | 0 | ih264_intra_pred_luma_8x8_mode_ref_filtering(pu1_mb_a, pu1_mb_b, pu1_mb_d, pu1_ngbr_pels_i8, |
556 | 0 | i4_src_strd, i4_ngbr_avbl); |
557 | |
|
558 | 0 | i4_partition_cost_least = INT_MAX; |
559 | | /* set valid intra modes for evaluation */ |
560 | 0 | u4_valid_intra_modes = 0x1ff; |
561 | |
|
562 | 0 | if(!s_ngbr_avbl.u1_mb_b) |
563 | 0 | { |
564 | 0 | u4_valid_intra_modes &= ~(1 << VERT_I4x4); |
565 | 0 | u4_valid_intra_modes &= ~(1 << DIAG_DL_I4x4); |
566 | 0 | u4_valid_intra_modes &= ~(1 << VERT_L_I4x4); |
567 | 0 | } |
568 | 0 | if(!s_ngbr_avbl.u1_mb_a) |
569 | 0 | { |
570 | 0 | u4_valid_intra_modes &= ~(1 << HORZ_I4x4); |
571 | 0 | u4_valid_intra_modes &= ~(1 << HORZ_U_I4x4); |
572 | 0 | } |
573 | 0 | if(!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b || !s_ngbr_avbl.u1_mb_d) |
574 | 0 | { |
575 | 0 | u4_valid_intra_modes &= ~(1 << DIAG_DR_I4x4); |
576 | 0 | u4_valid_intra_modes &= ~(1 << VERT_R_I4x4); |
577 | 0 | u4_valid_intra_modes &= ~(1 << HORZ_D_I4x4); |
578 | 0 | } |
579 | | |
580 | | /* estimate the intra 8x8 mode for the current partition (for evaluating |
581 | | * cost) */ |
582 | 0 | if(!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b) |
583 | 0 | { |
584 | 0 | u4_estimated_intra_8x8_mode = DC_I8x8; |
585 | 0 | } |
586 | 0 | else |
587 | 0 | { |
588 | 0 | UWORD32 u4_left_intra_8x8_mode = DC_I8x8; |
589 | 0 | UWORD32 u4_top_intra_8x8_mode = DC_I8x8; |
590 | |
|
591 | 0 | if(u4_pix_x == 0) |
592 | 0 | { |
593 | 0 | if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I8x8) |
594 | 0 | { |
595 | 0 | u4_left_intra_8x8_mode = |
596 | 0 | ps_proc->s_nbr_info.ps_left_mb_intra_modes->au1_intra_modes[b8 + 1]; |
597 | 0 | } |
598 | 0 | else if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I4x4) |
599 | 0 | { |
600 | 0 | u4_left_intra_8x8_mode = ps_proc->s_nbr_info.ps_left_mb_intra_modes |
601 | 0 | ->au1_intra_modes[(b8 + 1) * 4 + 2]; |
602 | 0 | } |
603 | 0 | } |
604 | 0 | else |
605 | 0 | { |
606 | 0 | u4_left_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8 - 1]; |
607 | 0 | } |
608 | |
|
609 | 0 | if(u4_pix_y == 0) |
610 | 0 | { |
611 | 0 | if(ps_top_mb_syn_ele->u2_mb_type == I8x8) |
612 | 0 | { |
613 | 0 | u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[b8 + 2]; |
614 | 0 | } |
615 | 0 | else if(ps_top_mb_syn_ele->u2_mb_type == I4x4) |
616 | 0 | { |
617 | 0 | u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[(b8 + 2) * 4 + 2]; |
618 | 0 | } |
619 | 0 | } |
620 | 0 | else |
621 | 0 | { |
622 | 0 | u4_top_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8 - 2]; |
623 | 0 | } |
624 | |
|
625 | 0 | u4_estimated_intra_8x8_mode = MIN(u4_left_intra_8x8_mode, u4_top_intra_8x8_mode); |
626 | 0 | } |
627 | | |
628 | | /* perform intra mode 8x8 evaluation */ |
629 | 0 | for(u4_intra_mode = VERT_I8x8; u4_valid_intra_modes != 0; |
630 | 0 | u4_intra_mode++, u4_valid_intra_modes >>= 1) |
631 | 0 | { |
632 | 0 | if((u4_valid_intra_modes & 1) == 0) continue; |
633 | | |
634 | | /* intra prediction */ |
635 | 0 | (ps_codec->apf_intra_pred_8_l)[u4_intra_mode](pu1_ngbr_pels_i8, pu1_pred_mb, 0, |
636 | 0 | i4_pred_strd, i4_ngbr_avbl); |
637 | | |
638 | | /* evaluate distortion between the actual blk and the estimated blk for |
639 | | * the given mode */ |
640 | 0 | ime_compute_sad_8x8(pu1_mb_curr, pu1_pred_mb, i4_src_strd, i4_pred_strd, |
641 | 0 | i4_partition_cost_least, &i4_partition_distortion); |
642 | |
|
643 | 0 | i4_partition_cost = |
644 | 0 | i4_partition_distortion + ((u4_estimated_intra_8x8_mode == u4_intra_mode) |
645 | 0 | ? u4_cost_one_bit |
646 | 0 | : u4_cost_four_bits); |
647 | | |
648 | | /* update the least cost information if necessary */ |
649 | 0 | if(i4_partition_cost < i4_partition_cost_least) |
650 | 0 | { |
651 | 0 | i4_partition_cost_least = i4_partition_cost; |
652 | 0 | i4_partition_distortion_least = i4_partition_distortion; |
653 | 0 | u4_best_intra_8x8_mode = u4_intra_mode; |
654 | 0 | } |
655 | 0 | } |
656 | | /* macroblock distortion */ |
657 | 0 | i4_total_cost += i4_partition_cost_least; |
658 | 0 | i4_total_distortion += i4_partition_distortion_least; |
659 | | /* mb partition mode */ |
660 | 0 | ps_proc->au1_intra_luma_mb_8x8_modes[b8] = u4_best_intra_8x8_mode; |
661 | 0 | } |
662 | | |
663 | | /* update the type of the mb if necessary */ |
664 | 0 | if(i4_total_cost < ps_proc->i4_mb_cost) |
665 | 0 | { |
666 | 0 | ps_proc->i4_mb_cost = i4_total_cost; |
667 | 0 | ps_proc->i4_mb_distortion = i4_total_distortion; |
668 | 0 | ps_proc->ps_mb_info->u2_mb_type = I8x8; |
669 | 0 | } |
670 | 0 | } |
671 | | |
672 | | /** |
673 | | ****************************************************************************** |
674 | | * |
675 | | * @brief |
676 | | * evaluate best intra 4x4 mode (rate distortion opt off) |
677 | | * |
678 | | * @par Description |
679 | | * This function evaluates all the possible intra 4x4 modes and finds the mode |
680 | | * that best represents the macro-block (least distortion) and occupies fewer |
681 | | * bits in the bit-stream. |
682 | | * |
683 | | * @param[in] ps_proc_ctxt |
684 | | * pointer to proc ctxt |
685 | | * |
686 | | * @remarks |
687 | | * Ideally the cost of encoding a macroblock is calculated as |
688 | | * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the |
689 | | * input block and the reconstructed block and rate is the number of bits taken |
690 | | * to place the macroblock in the bit-stream. In this routine the rate does not |
691 | | * exactly point to the total number of bits it takes, rather it points to |
692 | | *header bits necessary for encoding the macroblock. Assuming the deltaQP, cbp |
693 | | *bits and residual bits fall in to texture bits the number of bits taken to |
694 | | *encoding mbtype is considered as rate, we compute cost. Further we will |
695 | | *approximate the distortion as the deviation b/w input and the predicted block |
696 | | *as opposed to input and reconstructed block. |
697 | | * |
698 | | * NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock, |
699 | | * 24*lambda is added to the SAD before comparison with the best SAD for |
700 | | * inter prediction. This is an empirical value to prevent using too many intra |
701 | | * blocks. |
702 | | * |
703 | | * @return none |
704 | | * |
705 | | ****************************************************************************** |
706 | | */ |
707 | | void isvce_evaluate_intra4x4_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc) |
708 | 872k | { |
709 | | /* Codec Context */ |
710 | 872k | isvce_codec_t *ps_codec = ps_proc->ps_codec; |
711 | | |
712 | | /* SAD(distortion metric) of an 4x4 block */ |
713 | 872k | WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0; |
714 | | |
715 | | /* lambda */ |
716 | 872k | UWORD32 u4_lambda = ps_proc->u4_lambda; |
717 | | |
718 | | /* cost = distortion + lambda*rate */ |
719 | 872k | WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda; |
720 | | |
721 | | /* cost due to mbtype */ |
722 | 872k | UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda; |
723 | | |
724 | | /* intra mode */ |
725 | 872k | UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode; |
726 | | |
727 | | /* neighbor pels for intra prediction */ |
728 | 872k | UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels; |
729 | | |
730 | | /* pointer to curr partition */ |
731 | 872k | UWORD8 *pu1_mb_curr; |
732 | | |
733 | | /* pointer to prediction macro block */ |
734 | 872k | UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; |
735 | | |
736 | | /* strides */ |
737 | 872k | WORD32 i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[0].i4_data_stride; |
738 | 872k | WORD32 i4_pred_strd = ps_proc->i4_pred_strd; |
739 | | |
740 | | /* neighbors left, top, top right, top left */ |
741 | 872k | UWORD8 *pu1_mb_a; |
742 | 872k | UWORD8 *pu1_mb_b; |
743 | 872k | UWORD8 *pu1_mb_c; |
744 | 872k | UWORD8 *pu1_mb_d; |
745 | | |
746 | | /* neighbor availability */ |
747 | 872k | WORD32 i4_ngbr_avbl; |
748 | 872k | block_neighbors_t s_ngbr_avbl; |
749 | | |
750 | | /* temp vars */ |
751 | 872k | UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y; |
752 | | |
753 | | /* ngbr sub mb modes */ |
754 | 872k | UWORD8 *pu1_top_mb_intra_modes = |
755 | 872k | ps_proc->s_nbr_info.ps_top_mb_intra_modes[ps_proc->i4_mb_x].au1_intra_modes; |
756 | 872k | isvce_mb_info_t *ps_top_mb_syn_ele = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x; |
757 | 872k | isvce_mb_info_t *ps_top_right_mb_syn_ele = ps_top_mb_syn_ele + 1; |
758 | | |
759 | | /* valid intra modes map */ |
760 | 872k | UWORD32 u4_valid_intra_modes; |
761 | 872k | UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511}; |
762 | | |
763 | 872k | UWORD32 u4_constrained_intra_pred = |
764 | 872k | ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id]; |
765 | 872k | UWORD8 u1_mb_a, u1_mb_b, u1_mb_c, u1_mb_d; |
766 | 872k | if(ps_proc->ps_ngbr_avbl->u1_mb_c) |
767 | 766k | { |
768 | 766k | ps_top_right_mb_syn_ele = ps_top_mb_syn_ele + 1; |
769 | 766k | } |
770 | | /* left pels */ |
771 | 872k | u1_mb_a = |
772 | 872k | ((ps_proc->ps_ngbr_avbl->u1_mb_a) && |
773 | 872k | (u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra && |
774 | 242k | !ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag) |
775 | 817k | : 1)); |
776 | | |
777 | | /* top pels */ |
778 | 872k | u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) && |
779 | 872k | (u4_constrained_intra_pred |
780 | 789k | ? (ps_top_mb_syn_ele->u1_is_intra && !ps_top_mb_syn_ele->u1_base_mode_flag) |
781 | 789k | : 1)); |
782 | | |
783 | | /* topleft pels */ |
784 | 872k | u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) && |
785 | 872k | (u4_constrained_intra_pred ? (ps_top_mb_syn_ele[-1].u1_is_intra && |
786 | 210k | !ps_top_mb_syn_ele[-1].u1_base_mode_flag) |
787 | 750k | : 1)); |
788 | | |
789 | | /* top right */ |
790 | 872k | u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c) && |
791 | 872k | (u4_constrained_intra_pred ? (ps_top_right_mb_syn_ele->u1_is_intra && |
792 | 216k | !ps_top_right_mb_syn_ele->u1_base_mode_flag) |
793 | 765k | : 1)); |
794 | | |
795 | 872k | i4_ngbr_avbl = (u1_mb_a) + (u1_mb_d << 1) + (u1_mb_b << 2) + (u1_mb_c << 3); |
796 | 872k | memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16); |
797 | | |
798 | 4.18M | for(b8 = 0; b8 < 4; b8++) |
799 | 3.30M | { |
800 | 3.30M | u4_blk_x = (b8 & 0x01) << 3; |
801 | 3.30M | u4_blk_y = (b8 >> 1) << 3; |
802 | 15.1M | for(b4 = 0; b4 < 4; b4++) |
803 | 11.8M | { |
804 | 11.8M | u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2); |
805 | 11.8M | u4_pix_y = u4_blk_y + ((b4 >> 1) << 2); |
806 | | |
807 | 11.8M | pu1_mb_curr = ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[0].pv_data) + |
808 | 11.8M | u4_pix_x + (u4_pix_y * i4_src_strd); |
809 | | /* when rdopt is off, we use the input as reference for constructing |
810 | | * prediction buffer */ |
811 | | /* as opposed to using the recon pels. (open loop intra prediction) */ |
812 | 11.8M | pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */ |
813 | 11.8M | pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */ |
814 | 11.8M | pu1_mb_c = pu1_mb_b + 4; /* pointer to top macro block */ |
815 | 11.8M | pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */ |
816 | | |
817 | | /* locating neighbors that are available for prediction */ |
818 | | /* TODO : update the neighbor availability information basing on |
819 | | * constrained intra pred information */ |
820 | | /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be |
821 | | * split in to distinct routines */ |
822 | | /* basing on neighbors available and hence evade the computation of |
823 | | * neighbor availability totally. */ |
824 | | |
825 | 11.8M | i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4]; |
826 | 11.8M | s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1); |
827 | 11.8M | s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1; |
828 | 11.8M | s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2; |
829 | 11.8M | s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3; |
830 | | /* set valid intra modes for evaluation */ |
831 | 11.8M | u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7]; |
832 | | |
833 | | /* if top partition is available and top right is not available for intra |
834 | | * prediction, then */ |
835 | | /* padd top right samples using top sample and make top right also |
836 | | * available */ |
837 | | /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + |
838 | | * (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | |
839 | | * s_ngbr_avbl.u1_mb_c) << 3); */ |
840 | | |
841 | | /* gather prediction pels from the neighbors */ |
842 | 11.8M | if(s_ngbr_avbl.u1_mb_a) |
843 | 11.4M | { |
844 | 57.2M | for(i = 0; i < 4; i++) pu1_ngbr_pels_i4[4 - 1 - i] = pu1_mb_a[i * i4_src_strd]; |
845 | 11.4M | } |
846 | 406k | else |
847 | 406k | { |
848 | 406k | memset(pu1_ngbr_pels_i4, 0, 4); |
849 | 406k | } |
850 | | |
851 | 11.8M | if(s_ngbr_avbl.u1_mb_b) |
852 | 11.5M | { |
853 | 11.5M | memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4); |
854 | 11.5M | } |
855 | 315k | else |
856 | 315k | { |
857 | 315k | memset(pu1_ngbr_pels_i4 + 5, 0, 4); |
858 | 315k | } |
859 | | |
860 | 11.8M | if(s_ngbr_avbl.u1_mb_d) |
861 | 11.2M | pu1_ngbr_pels_i4[4] = *pu1_mb_d; |
862 | 631k | else |
863 | 631k | pu1_ngbr_pels_i4[4] = 0; |
864 | | |
865 | 11.8M | if(s_ngbr_avbl.u1_mb_c) |
866 | 8.11M | { |
867 | 8.11M | memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4); |
868 | 8.11M | } |
869 | 3.73M | else if(s_ngbr_avbl.u1_mb_b) |
870 | 4.14M | { |
871 | 4.14M | memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4); |
872 | 4.14M | s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b; |
873 | 4.14M | } |
874 | | |
875 | 11.8M | i4_partition_cost_least = INT_MAX; |
876 | | |
877 | | /* predict the intra 4x4 mode for the current partition (for evaluating |
878 | | * cost) */ |
879 | 11.8M | if(!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b) |
880 | 952k | { |
881 | 952k | u4_estimated_intra_4x4_mode = DC_I4x4; |
882 | 952k | } |
883 | 10.8M | else |
884 | 10.8M | { |
885 | 10.8M | UWORD32 u4_left_intra_4x4_mode = DC_I4x4; |
886 | 10.8M | UWORD32 u4_top_intra_4x4_mode = DC_I4x4; |
887 | | |
888 | 10.8M | if(u4_pix_x == 0) |
889 | 2.85M | { |
890 | 2.85M | if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I4x4) |
891 | 2.05M | { |
892 | 2.05M | u4_left_intra_4x4_mode = |
893 | 2.05M | ps_proc->s_nbr_info.ps_left_mb_intra_modes |
894 | 2.05M | ->au1_intra_modes[gau1_raster_to_zscan_map[3 + u4_pix_y]]; |
895 | 2.05M | } |
896 | 796k | else if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I8x8) |
897 | 0 | { |
898 | 0 | u4_left_intra_4x4_mode = |
899 | 0 | ps_proc->s_nbr_info.ps_left_mb_intra_modes->au1_intra_modes[b8 + 1]; |
900 | 0 | } |
901 | 2.85M | } |
902 | 8.04M | else |
903 | 8.04M | { |
904 | 8.04M | u4_left_intra_4x4_mode = |
905 | 8.04M | ps_proc->au1_intra_luma_mb_4x4_modes |
906 | 8.04M | [gau1_raster_to_zscan_map[(u4_pix_x >> 2) + u4_pix_y - 1]]; |
907 | 8.04M | } |
908 | | |
909 | 10.8M | if(u4_pix_y == 0) |
910 | 2.77M | { |
911 | 2.77M | if(ps_top_mb_syn_ele->u2_mb_type == I4x4) |
912 | 1.99M | { |
913 | 1.99M | u4_top_intra_4x4_mode = |
914 | 1.99M | pu1_top_mb_intra_modes[gau1_raster_to_zscan_map[12 + (u4_pix_x >> 2)]]; |
915 | 1.99M | } |
916 | 778k | else if(ps_top_mb_syn_ele->u2_mb_type == I8x8) |
917 | 0 | { |
918 | 0 | u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2]; |
919 | 0 | } |
920 | 2.77M | } |
921 | 8.11M | else |
922 | 8.11M | { |
923 | 8.11M | u4_top_intra_4x4_mode = |
924 | 8.11M | ps_proc->au1_intra_luma_mb_4x4_modes |
925 | 8.11M | [gau1_raster_to_zscan_map[(u4_pix_x >> 2) + u4_pix_y - 4]]; |
926 | 8.11M | } |
927 | | |
928 | 10.8M | u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode); |
929 | 10.8M | } |
930 | | |
931 | 11.8M | ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = |
932 | 11.8M | u4_estimated_intra_4x4_mode; |
933 | | |
934 | | /* mode evaluation and prediction */ |
935 | 11.8M | ps_codec->pf_ih264e_evaluate_intra_4x4_modes( |
936 | 11.8M | pu1_mb_curr, pu1_ngbr_pels_i4, pu1_pred_mb, i4_src_strd, i4_pred_strd, i4_ngbr_avbl, |
937 | 11.8M | &u4_best_intra_4x4_mode, &i4_partition_cost_least, u4_valid_intra_modes, u4_lambda, |
938 | 11.8M | u4_estimated_intra_4x4_mode); |
939 | | |
940 | 11.8M | i4_partition_distortion_least = |
941 | 11.8M | i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode) |
942 | 11.8M | ? u4_cost_one_bit |
943 | 11.8M | : u4_cost_four_bits); |
944 | | |
945 | 11.8M | DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least, |
946 | 11.8M | u4_best_intra_4x4_mode); |
947 | | /* macroblock distortion */ |
948 | 11.8M | i4_total_distortion += i4_partition_distortion_least; |
949 | 11.8M | i4_total_cost += i4_partition_cost_least; |
950 | | /* mb partition mode */ |
951 | 11.8M | ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode; |
952 | 11.8M | } |
953 | 3.30M | } |
954 | | |
955 | | /* update the type of the mb if necessary */ |
956 | 872k | if(i4_total_cost < ps_proc->i4_mb_cost) |
957 | 582k | { |
958 | 582k | ps_proc->i4_mb_cost = i4_total_cost; |
959 | 582k | ps_proc->i4_mb_distortion = i4_total_distortion; |
960 | 582k | ps_proc->ps_mb_info->u2_mb_type = I4x4; |
961 | 582k | } |
962 | 872k | } |
963 | | |
964 | | /** |
965 | | ****************************************************************************** |
966 | | * |
967 | | * @brief evaluate best intra 4x4 mode (rate distortion opt on) |
968 | | * |
969 | | * @par Description |
970 | | * This function evaluates all the possible intra 4x4 modes and finds the mode |
971 | | * that best represents the macro-block (least distortion) and occupies fewer |
972 | | * bits in the bit-stream. |
973 | | * |
974 | | * @param[in] ps_proc_ctxt |
975 | | * pointer to proc ctxt |
976 | | * |
977 | | * @remarks |
978 | | * Ideally the cost of encoding a macroblock is calculated as |
979 | | * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the |
980 | | * input block and the reconstructed block and rate is the number of bits taken |
981 | | * to place the macroblock in the bit-stream. In this routine the rate does not |
982 | | * exactly point to the total number of bits it takes, rather it points to |
983 | | *header bits necessary for encoding the macroblock. Assuming the deltaQP, cbp |
984 | | *bits and residual bits fall in to texture bits the number of bits taken to |
985 | | *encoding mbtype is considered as rate, we compute cost. Further we will |
986 | | *approximate the distortion as the deviation b/w input and the predicted block |
987 | | *as opposed to input and reconstructed block. |
988 | | * |
989 | | * NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock, |
990 | | * 24*lambda is added to the SAD before comparison with the best SAD for |
991 | | * inter prediction. This is an empirical value to prevent using too many intra |
992 | | * blocks. |
993 | | * |
994 | | * @return none |
995 | | * |
996 | | ****************************************************************************** |
997 | | */ |
998 | | void isvce_evaluate_intra4x4_modes_for_least_cost_rdopton(isvce_process_ctxt_t *ps_proc) |
999 | 329k | { |
1000 | 329k | block_neighbors_t s_ngbr_avbl; |
1001 | 329k | buffer_container_t s_src; |
1002 | 329k | buffer_container_t s_pred; |
1003 | 329k | buffer_container_t s_recon; |
1004 | 329k | buffer_container_t s_quant_coeffs; |
1005 | 329k | buffer_container_t s_res_pred; |
1006 | | |
1007 | | /* neighbors left, top, top right, top left */ |
1008 | 329k | UWORD8 *pu1_mb_a; |
1009 | 329k | UWORD8 *pu1_mb_b; |
1010 | 329k | UWORD8 *pu1_mb_c; |
1011 | 329k | UWORD8 *pu1_mb_d; |
1012 | 329k | UWORD8 *pu1_mb_curr; |
1013 | 329k | UWORD8 *pu1_mb_ref_left, *pu1_mb_ref_top; |
1014 | 329k | UWORD8 *pu1_ref_mb_intra_4x4; |
1015 | 329k | WORD32 i4_ref_strd_left, i4_ref_strd_top; |
1016 | 329k | WORD32 i4_ngbr_avbl; |
1017 | 329k | UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y; |
1018 | | /* valid intra modes map */ |
1019 | 329k | UWORD32 u4_valid_intra_modes; |
1020 | | /* Dummy variable for 4x4 trans function */ |
1021 | 329k | WORD16 i2_dc_dummy; |
1022 | 329k | UWORD8 u1_mb_a, u1_mb_b, u1_mb_c, u1_mb_d; |
1023 | | |
1024 | 329k | isvce_codec_t *ps_codec = ps_proc->ps_codec; |
1025 | 329k | quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; |
1026 | 329k | isvce_mb_info_t *ps_top_mb = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x; |
1027 | 329k | isvce_mb_info_t *ps_top_right_mb = ps_top_mb + 1; |
1028 | 329k | isvce_mb_info_t *ps_top_left_mb = ps_top_mb - 1; |
1029 | 329k | isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns; |
1030 | 329k | enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns; |
1031 | 329k | resi_trans_quant_constants_t s_resi_trans_quant_constants = { |
1032 | 329k | .pu2_scale_matrix = ps_qp_params->pu2_scale_mat, |
1033 | 329k | .pu2_threshold_matrix = ps_qp_params->pu2_thres_mat, |
1034 | 329k | .u4_qbits = ps_qp_params->u1_qbits, |
1035 | 329k | .u4_round_factor = ps_qp_params->u4_dead_zone}; |
1036 | 329k | iq_it_res_rec_constants_t s_iq_it_res_rec_constants = { |
1037 | 329k | .pu2_iscal_mat = ps_qp_params->pu2_iscale_mat, |
1038 | 329k | .pu2_weigh_mat = ps_qp_params->pu2_weigh_mat, |
1039 | 329k | .u4_qp_div_6 = ps_qp_params->u1_qp_div}; |
1040 | | |
1041 | 329k | const UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511}; |
1042 | 329k | WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0; |
1043 | 329k | UWORD32 u4_lambda = ps_proc->u4_lambda; |
1044 | 329k | WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda; |
1045 | | /* cost due to mbtype */ |
1046 | 329k | UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda; |
1047 | 329k | UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode; |
1048 | 329k | UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels; |
1049 | 329k | WORD16 *pi2_quant_coeffs = ps_proc->pi2_res_buf_intra_4x4; |
1050 | 329k | UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; |
1051 | 329k | WORD32 i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[0].i4_data_stride; |
1052 | 329k | WORD32 i4_pred_strd = ps_proc->i4_pred_strd; |
1053 | 329k | UWORD8 *pu1_nnz = (UWORD8 *) ps_proc->au4_nnz_intra_4x4; |
1054 | 329k | UWORD8 *pu1_top_mb_intra_modes = |
1055 | 329k | ps_proc->s_nbr_info.ps_top_mb_intra_modes[ps_proc->i4_mb_x].au1_intra_modes; |
1056 | 329k | UWORD32 u4_constrained_intra_pred = |
1057 | 329k | ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id]; |
1058 | 329k | UWORD8 u1_resi_trans_fxn_idx = isvc_get_resi_trans_quant_variant_idx(0); |
1059 | 329k | UWORD8 u1_iq_it_recon_fxn_idx = isvc_get_iq_it_recon_variant_idx(1, 0); |
1060 | | |
1061 | 329k | s_res_pred = ps_proc->ps_mb_res_buf->as_component_bufs[Y]; |
1062 | | |
1063 | | /* compute ngbr availability for sub blks */ |
1064 | 329k | if(ps_proc->ps_ngbr_avbl->u1_mb_c) |
1065 | 269k | { |
1066 | 269k | ps_top_right_mb = ps_top_mb + 1; |
1067 | 269k | } |
1068 | | |
1069 | | /* left pels */ |
1070 | 329k | u1_mb_a = |
1071 | 329k | ((ps_proc->ps_ngbr_avbl->u1_mb_a) && |
1072 | 329k | (u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra && |
1073 | 207k | !ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag) |
1074 | 299k | : 1)); |
1075 | | |
1076 | | /* top pels */ |
1077 | 329k | u1_mb_b = |
1078 | 329k | ((ps_proc->ps_ngbr_avbl->u1_mb_b) && |
1079 | 329k | (u4_constrained_intra_pred ? (ps_top_mb->u1_is_intra && !ps_top_mb->u1_base_mode_flag) |
1080 | 278k | : 1)); |
1081 | | |
1082 | | /* topleft pels */ |
1083 | 329k | u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) && |
1084 | 329k | (u4_constrained_intra_pred |
1085 | 256k | ? (ps_top_left_mb->u1_is_intra && !ps_top_left_mb->u1_base_mode_flag) |
1086 | 256k | : 1)); |
1087 | | |
1088 | | /* top right pels */ |
1089 | 329k | u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c) && |
1090 | 329k | (u4_constrained_intra_pred |
1091 | 269k | ? (ps_top_right_mb->u1_is_intra && !ps_top_right_mb->u1_base_mode_flag) |
1092 | 269k | : 1)); |
1093 | | |
1094 | 329k | i4_ngbr_avbl = (u1_mb_a) + (u1_mb_d << 1) + (u1_mb_b << 2) + (u1_mb_c << 3); |
1095 | 329k | memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16); |
1096 | | |
1097 | 1.62M | for(b8 = 0; b8 < 4; b8++) |
1098 | 1.29M | { |
1099 | 1.29M | u4_blk_x = (b8 & 0x01) << 3; |
1100 | 1.29M | u4_blk_y = (b8 >> 1) << 3; |
1101 | 6.20M | for(b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_quant_coeffs += MB_SIZE) |
1102 | 4.91M | { |
1103 | 4.91M | u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2); |
1104 | 4.91M | u4_pix_y = u4_blk_y + ((b4 >> 1) << 2); |
1105 | | |
1106 | 4.91M | pu1_ref_mb_intra_4x4 = |
1107 | 4.91M | ps_proc->pu1_ref_mb_intra_4x4 + u4_pix_x + (u4_pix_y * i4_pred_strd); |
1108 | 4.91M | pu1_mb_curr = ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[0].pv_data) + |
1109 | 4.91M | u4_pix_x + (u4_pix_y * i4_src_strd); |
1110 | 4.91M | pu1_pred_mb = ps_proc->pu1_pred_mb + u4_pix_x + (u4_pix_y * i4_pred_strd); |
1111 | 4.91M | if(u4_pix_x == 0) |
1112 | 1.29M | { |
1113 | 1.29M | i4_ref_strd_left = ps_proc->s_rec_buf_props.as_component_bufs[0].i4_data_stride; |
1114 | 1.29M | pu1_mb_ref_left = |
1115 | 1.29M | ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data) + u4_pix_x + |
1116 | 1.29M | (u4_pix_y * i4_ref_strd_left); |
1117 | 1.29M | } |
1118 | 3.61M | else |
1119 | 3.61M | { |
1120 | 3.61M | i4_ref_strd_left = i4_pred_strd; |
1121 | 3.61M | pu1_mb_ref_left = pu1_ref_mb_intra_4x4; |
1122 | 3.61M | } |
1123 | 4.91M | if(u4_pix_y == 0) |
1124 | 1.29M | { |
1125 | 1.29M | i4_ref_strd_top = ps_proc->s_rec_buf_props.as_component_bufs[0].i4_data_stride; |
1126 | 1.29M | pu1_mb_ref_top = |
1127 | 1.29M | ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data) + u4_pix_x + |
1128 | 1.29M | (u4_pix_y * i4_ref_strd_top); |
1129 | 1.29M | } |
1130 | 3.61M | else |
1131 | 3.61M | { |
1132 | 3.61M | i4_ref_strd_top = i4_pred_strd; |
1133 | 3.61M | pu1_mb_ref_top = pu1_ref_mb_intra_4x4; |
1134 | 3.61M | } |
1135 | | |
1136 | 4.91M | pu1_mb_a = pu1_mb_ref_left - 1; /* pointer to left macro block */ |
1137 | 4.91M | pu1_mb_b = pu1_mb_ref_top - i4_ref_strd_top; /* pointer to top macro block */ |
1138 | 4.91M | pu1_mb_c = pu1_mb_b + 4; /* pointer to top right macro block */ |
1139 | 4.91M | if(u4_pix_y == 0) |
1140 | 1.29M | pu1_mb_d = pu1_mb_b - 1; |
1141 | 3.61M | else |
1142 | 3.61M | pu1_mb_d = pu1_mb_a - i4_ref_strd_left; /* pointer to top left macro block */ |
1143 | | |
1144 | | /* locating neighbors that are available for prediction */ |
1145 | | /* TODO : update the neighbor availability information basing on |
1146 | | * constrained intra pred information */ |
1147 | | /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be |
1148 | | * split in to distinct routines */ |
1149 | | /* basing on neighbors available and hence evade the computation of |
1150 | | * neighbor availability totally. */ |
1151 | | |
1152 | 4.91M | i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4]; |
1153 | 4.91M | s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1); |
1154 | 4.91M | s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1; |
1155 | 4.91M | s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2; |
1156 | 4.91M | s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3; |
1157 | | /* set valid intra modes for evaluation */ |
1158 | 4.91M | u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7]; |
1159 | | |
1160 | | /* if top partition is available and top right is not available for intra |
1161 | | * prediction, then */ |
1162 | | /* padd top right samples using top sample and make top right also |
1163 | | * available */ |
1164 | | /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + |
1165 | | * (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | |
1166 | | * s_ngbr_avbl.u1_mb_c) << 3); */ |
1167 | | |
1168 | | /* gather prediction pels from the neighbors */ |
1169 | 4.91M | if(s_ngbr_avbl.u1_mb_a) |
1170 | 4.32M | { |
1171 | 21.6M | for(i = 0; i < 4; i++) pu1_ngbr_pels_i4[4 - 1 - i] = pu1_mb_a[i * i4_ref_strd_left]; |
1172 | 4.32M | } |
1173 | 584k | else |
1174 | 584k | { |
1175 | 584k | memset(pu1_ngbr_pels_i4, 0, 4); |
1176 | 584k | } |
1177 | 4.91M | if(s_ngbr_avbl.u1_mb_b) |
1178 | 4.25M | { |
1179 | 4.25M | memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4); |
1180 | 4.25M | } |
1181 | 657k | else |
1182 | 657k | { |
1183 | 657k | memset(pu1_ngbr_pels_i4 + 4 + 1, 0, 4); |
1184 | 657k | } |
1185 | 4.91M | if(s_ngbr_avbl.u1_mb_d) |
1186 | 3.78M | pu1_ngbr_pels_i4[4] = *pu1_mb_d; |
1187 | 1.12M | else |
1188 | 1.12M | pu1_ngbr_pels_i4[4] = 0; |
1189 | 4.91M | if(s_ngbr_avbl.u1_mb_c) |
1190 | 2.75M | { |
1191 | 2.75M | memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4); |
1192 | 2.75M | } |
1193 | 2.15M | else if(s_ngbr_avbl.u1_mb_b) |
1194 | 1.61M | { |
1195 | 1.61M | memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4); |
1196 | 1.61M | s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b; |
1197 | 1.61M | } |
1198 | | |
1199 | 4.91M | i4_partition_cost_least = INT_MAX; |
1200 | | |
1201 | | /* predict the intra 4x4 mode for the current partition (for evaluating |
1202 | | * cost) */ |
1203 | 4.91M | if(!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b) |
1204 | 1.24M | { |
1205 | 1.24M | u4_estimated_intra_4x4_mode = DC_I4x4; |
1206 | 1.24M | } |
1207 | 3.66M | else |
1208 | 3.66M | { |
1209 | 3.66M | UWORD32 u4_left_intra_4x4_mode = DC_I4x4; |
1210 | 3.66M | UWORD32 u4_top_intra_4x4_mode = DC_I4x4; |
1211 | | |
1212 | 3.66M | if(u4_pix_x == 0) |
1213 | 604k | { |
1214 | 604k | if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I4x4) |
1215 | 204k | { |
1216 | 204k | u4_left_intra_4x4_mode = |
1217 | 204k | ps_proc->s_nbr_info.ps_left_mb_intra_modes |
1218 | 204k | ->au1_intra_modes[gau1_raster_to_zscan_map[3 + u4_pix_y]]; |
1219 | 204k | } |
1220 | 399k | else if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I8x8) |
1221 | 0 | { |
1222 | 0 | u4_left_intra_4x4_mode = |
1223 | 0 | ps_proc->s_nbr_info.ps_left_mb_intra_modes->au1_intra_modes[b8 + 1]; |
1224 | 0 | } |
1225 | 604k | } |
1226 | 3.05M | else |
1227 | 3.05M | { |
1228 | 3.05M | u4_left_intra_4x4_mode = |
1229 | 3.05M | ps_proc->au1_intra_luma_mb_4x4_modes |
1230 | 3.05M | [gau1_raster_to_zscan_map[(u4_pix_x >> 2) + u4_pix_y - 1]]; |
1231 | 3.05M | } |
1232 | | |
1233 | 3.66M | if(u4_pix_y == 0) |
1234 | 541k | { |
1235 | 541k | if(ps_top_mb->u2_mb_type == I4x4) |
1236 | 141k | { |
1237 | 141k | u4_top_intra_4x4_mode = |
1238 | 141k | pu1_top_mb_intra_modes[gau1_raster_to_zscan_map[12 + (u4_pix_x >> 2)]]; |
1239 | 141k | } |
1240 | 400k | else if(ps_top_mb->u2_mb_type == I8x8) |
1241 | 0 | { |
1242 | 0 | u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2]; |
1243 | 0 | } |
1244 | 541k | } |
1245 | 3.12M | else |
1246 | 3.12M | { |
1247 | 3.12M | u4_top_intra_4x4_mode = |
1248 | 3.12M | ps_proc->au1_intra_luma_mb_4x4_modes |
1249 | 3.12M | [gau1_raster_to_zscan_map[(u4_pix_x >> 2) + u4_pix_y - 4]]; |
1250 | 3.12M | } |
1251 | | |
1252 | 3.66M | u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode); |
1253 | 3.66M | } |
1254 | | |
1255 | 4.91M | ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = |
1256 | 4.91M | u4_estimated_intra_4x4_mode; |
1257 | | |
1258 | | /*mode evaluation and prediction*/ |
1259 | 4.91M | ps_codec->pf_ih264e_evaluate_intra_4x4_modes( |
1260 | 4.91M | pu1_mb_curr, pu1_ngbr_pels_i4, pu1_pred_mb, i4_src_strd, i4_pred_strd, i4_ngbr_avbl, |
1261 | 4.91M | &u4_best_intra_4x4_mode, &i4_partition_cost_least, u4_valid_intra_modes, u4_lambda, |
1262 | 4.91M | u4_estimated_intra_4x4_mode); |
1263 | | |
1264 | 4.91M | i4_partition_distortion_least = |
1265 | 4.91M | i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode) |
1266 | 4.91M | ? u4_cost_one_bit |
1267 | 4.91M | : u4_cost_four_bits); |
1268 | | |
1269 | 4.91M | DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least, |
1270 | 4.91M | u4_best_intra_4x4_mode); |
1271 | | |
1272 | | /* macroblock distortion */ |
1273 | 4.91M | i4_total_distortion += i4_partition_distortion_least; |
1274 | 4.91M | i4_total_cost += i4_partition_cost_least; |
1275 | | |
1276 | | /* mb partition mode */ |
1277 | 4.91M | ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode; |
1278 | | |
1279 | | /********************************************************/ |
1280 | | /* error estimation, */ |
1281 | | /* transform */ |
1282 | | /* quantization */ |
1283 | | /********************************************************/ |
1284 | 4.91M | s_src.pv_data = pu1_mb_curr; |
1285 | 4.91M | s_src.i4_data_stride = i4_src_strd; |
1286 | | |
1287 | 4.91M | s_pred.pv_data = pu1_pred_mb; |
1288 | 4.91M | s_pred.i4_data_stride = i4_pred_strd; |
1289 | | |
1290 | 4.91M | s_quant_coeffs.pv_data = pi2_quant_coeffs; |
1291 | 4.91M | s_quant_coeffs.i4_data_stride = 4; |
1292 | | |
1293 | 4.91M | ps_enc_loop_fxns->apf_resi_trans_quant_4x4[u1_resi_trans_fxn_idx]( |
1294 | 4.91M | &s_src, &s_pred, &s_quant_coeffs, &s_res_pred, |
1295 | | /* No op stride, this implies a buff of lenght 1x16 */ |
1296 | 4.91M | &s_resi_trans_quant_constants, pu1_nnz, &i2_dc_dummy, 0); |
1297 | | |
1298 | | /********************************************************/ |
1299 | | /* ierror estimation, */ |
1300 | | /* itransform */ |
1301 | | /* iquantization */ |
1302 | | /********************************************************/ |
1303 | | |
1304 | | /* Tx blk coeffs are stored blk by blk */ |
1305 | | /* Hence, in order to access rows of each Tx blk, one needs to stride of |
1306 | | * TxxSize */ |
1307 | 4.91M | s_quant_coeffs.i4_data_stride = 4; |
1308 | | |
1309 | 4.91M | s_recon.pv_data = pu1_ref_mb_intra_4x4; |
1310 | 4.91M | s_recon.i4_data_stride = i4_pred_strd; |
1311 | | |
1312 | 4.91M | ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[u1_iq_it_recon_fxn_idx]( |
1313 | 4.91M | &s_quant_coeffs, &s_pred, &s_res_pred, &s_res_pred, &s_recon, |
1314 | 4.91M | &s_iq_it_res_rec_constants, ps_proc->pv_scratch_buff, s_quant_coeffs.pv_data, 0, 0); |
1315 | 4.91M | } |
1316 | 1.29M | } |
1317 | | |
1318 | | /* update the type of the mb if necessary */ |
1319 | 329k | if(i4_total_cost < ps_proc->i4_mb_cost) |
1320 | 69.5k | { |
1321 | 69.5k | ps_proc->i4_mb_cost = i4_total_cost; |
1322 | 69.5k | ps_proc->i4_mb_distortion = i4_total_distortion; |
1323 | 69.5k | ps_proc->ps_mb_info->u2_mb_type = I4x4; |
1324 | 69.5k | } |
1325 | 329k | } |
1326 | | |
1327 | | /** |
1328 | | ****************************************************************************** |
1329 | | * |
1330 | | * @brief |
1331 | | * evaluate best chroma intra 8x8 mode (rate distortion opt off) |
1332 | | * |
1333 | | * @par Description |
1334 | | * This function evaluates all the possible chroma intra 8x8 modes and finds |
1335 | | * the mode that best represents the macroblock (least distortion) and occupies |
1336 | | * fewer bits in the bitstream. |
1337 | | * |
1338 | | * @param[in] ps_proc_ctxt |
1339 | | * pointer to macroblock context (handle) |
1340 | | * |
1341 | | * @remarks |
1342 | | * For chroma best intra pred mode is calculated based only on SAD |
1343 | | * |
1344 | | * @returns none |
1345 | | * |
1346 | | ****************************************************************************** |
1347 | | */ |
1348 | | |
1349 | | void isvce_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc) |
1350 | 5.79M | { |
1351 | | /* Codec Context */ |
1352 | 5.79M | isvce_codec_t *ps_codec = ps_proc->ps_codec; |
1353 | 5.79M | isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns; |
1354 | 5.79M | mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns; |
1355 | | |
1356 | | /* SAD(distortion metric) of an 8x8 block */ |
1357 | 5.79M | WORD32 i4_mb_distortion, i4_chroma_mb_distortion; |
1358 | | |
1359 | | /* intra mode */ |
1360 | 5.79M | UWORD32 u4_best_chroma_intra_8x8_mode = DC_CH_I8x8; |
1361 | | |
1362 | | /* neighbor pels for intra prediction */ |
1363 | 5.79M | UWORD8 *pu1_ngbr_pels_c_i8x8 = ps_proc->au1_ngbr_pels; |
1364 | | |
1365 | | /* pointer to curr macro block */ |
1366 | 5.79M | UWORD8 *pu1_curr_mb = ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[1].pv_data); |
1367 | 5.79M | UWORD8 *pu1_ref_mb = ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[1].pv_data); |
1368 | | |
1369 | | /* pointer to prediction macro block */ |
1370 | 5.79M | UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma; |
1371 | 5.79M | UWORD8 *pu1_pred_mb_plane = ps_proc->pu1_pred_mb_intra_chroma_plane; |
1372 | | |
1373 | | /* strides */ |
1374 | 5.79M | WORD32 i4_src_strd_c = ps_proc->s_src_buf_props.as_component_bufs[1].i4_data_stride; |
1375 | 5.79M | WORD32 i4_pred_strd = ps_proc->i4_pred_strd; |
1376 | 5.79M | WORD32 i4_rec_strd_c = ps_proc->s_rec_buf_props.as_component_bufs[1].i4_data_stride; |
1377 | | |
1378 | | /* neighbors left, top, top left */ |
1379 | 5.79M | UWORD8 *pu1_mb_a = pu1_ref_mb - 2; |
1380 | 5.79M | UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd_c; |
1381 | 5.79M | UWORD8 *pu1_mb_d = pu1_mb_b - 2; |
1382 | | |
1383 | | /* neighbor availability */ |
1384 | 5.79M | const UWORD8 u1_valid_intra_modes[8] = {1, 3, 1, 3, 5, 7, 5, 15}; |
1385 | 5.79M | WORD32 i4_ngbr_avbl; |
1386 | | |
1387 | | /* valid intra modes map */ |
1388 | 5.79M | UWORD32 u4_valid_intra_modes; |
1389 | 5.79M | isvce_mb_info_t *ps_top_mb_syn_ele = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x; |
1390 | | |
1391 | | /* temp var */ |
1392 | 5.79M | UWORD8 i; |
1393 | 5.79M | UWORD32 u4_constrained_intra_pred = |
1394 | 5.79M | ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id]; |
1395 | 5.79M | UWORD8 u1_mb_a, u1_mb_b, u1_mb_d; |
1396 | | /* locating neighbors that are available for prediction */ |
1397 | | |
1398 | | /* gather prediction pels from the neighbors */ |
1399 | | /* left pels */ |
1400 | 5.79M | u1_mb_a = |
1401 | 5.79M | ((ps_proc->ps_ngbr_avbl->u1_mb_a) && |
1402 | 5.79M | (u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra && |
1403 | 1.06M | !ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag) |
1404 | 5.62M | : 1)); |
1405 | 5.79M | if(u1_mb_a) |
1406 | 5.61M | { |
1407 | 50.5M | for(i = 0; i < 16; i += 2) |
1408 | 44.8M | { |
1409 | 44.8M | pu1_ngbr_pels_c_i8x8[16 - 2 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c]; |
1410 | 44.8M | pu1_ngbr_pels_c_i8x8[16 - 1 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c + 1]; |
1411 | 44.8M | } |
1412 | 5.61M | } |
1413 | 179k | else |
1414 | 179k | { |
1415 | 179k | ps_mem_fxns->pf_mem_set_mul8(pu1_ngbr_pels_c_i8x8, 0, MB_SIZE); |
1416 | 179k | } |
1417 | | |
1418 | | /* top pels */ |
1419 | 5.79M | u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) && |
1420 | 5.79M | (u4_constrained_intra_pred |
1421 | 5.56M | ? (ps_top_mb_syn_ele->u1_is_intra && !ps_top_mb_syn_ele->u1_base_mode_flag) |
1422 | 5.56M | : 1)); |
1423 | 5.79M | if(u1_mb_b) |
1424 | 5.54M | { |
1425 | 5.54M | ps_mem_fxns->pf_mem_cpy_mul8(&pu1_ngbr_pels_c_i8x8[18], pu1_mb_b, 16); |
1426 | 5.54M | } |
1427 | 241k | else |
1428 | 241k | { |
1429 | 241k | ps_mem_fxns->pf_mem_set_mul8((pu1_ngbr_pels_c_i8x8 + 18), 0, MB_SIZE); |
1430 | 241k | } |
1431 | | |
1432 | | /* top left pels */ |
1433 | 5.79M | u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) && |
1434 | 5.79M | (u4_constrained_intra_pred ? (ps_top_mb_syn_ele[-1].u1_is_intra && |
1435 | 1.00M | !ps_top_mb_syn_ele[-1].u1_base_mode_flag) |
1436 | 5.42M | : 1)); |
1437 | 5.79M | if(u1_mb_d) |
1438 | 5.40M | { |
1439 | 5.40M | pu1_ngbr_pels_c_i8x8[16] = *pu1_mb_d; |
1440 | 5.40M | pu1_ngbr_pels_c_i8x8[17] = *(pu1_mb_d + 1); |
1441 | 5.40M | } |
1442 | 5.79M | i4_ngbr_avbl = (u1_mb_a) + (u1_mb_b << 2) + (u1_mb_d << 1); |
1443 | 5.79M | ps_proc->i4_chroma_neighbor_avail_8x8_mb = i4_ngbr_avbl; |
1444 | | |
1445 | 5.79M | u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl]; |
1446 | | |
1447 | 5.79M | if(ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST || |
1448 | 5.79M | ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST) |
1449 | 629k | u4_valid_intra_modes &= ~(1 << PLANE_CH_I8x8); |
1450 | | |
1451 | 5.79M | i4_chroma_mb_distortion = INT_MAX; |
1452 | | |
1453 | | /* perform intra mode chroma 8x8 evaluation */ |
1454 | | /* intra prediction */ |
1455 | 5.79M | ps_codec->pf_ih264e_evaluate_intra_chroma_modes( |
1456 | 5.79M | pu1_curr_mb, pu1_ngbr_pels_c_i8x8, pu1_pred_mb, i4_src_strd_c, i4_pred_strd, i4_ngbr_avbl, |
1457 | 5.79M | &u4_best_chroma_intra_8x8_mode, &i4_chroma_mb_distortion, u4_valid_intra_modes); |
1458 | | |
1459 | 5.79M | if(u4_valid_intra_modes & 8) /* if Chroma PLANE is valid*/ |
1460 | 4.79M | { |
1461 | 4.79M | (ps_codec->apf_intra_pred_c)[PLANE_CH_I8x8](pu1_ngbr_pels_c_i8x8, pu1_pred_mb_plane, 0, |
1462 | 4.79M | i4_pred_strd, i4_ngbr_avbl); |
1463 | | |
1464 | | /* evaluate distortion(sad) */ |
1465 | 4.79M | ps_codec->pf_compute_sad_16x8(pu1_curr_mb, pu1_pred_mb_plane, i4_src_strd_c, i4_pred_strd, |
1466 | 4.79M | i4_chroma_mb_distortion, &i4_mb_distortion); |
1467 | | |
1468 | | /* update the least distortion information if necessary */ |
1469 | 4.79M | if(i4_mb_distortion < i4_chroma_mb_distortion) |
1470 | 11.6k | { |
1471 | 11.6k | i4_chroma_mb_distortion = i4_mb_distortion; |
1472 | 11.6k | u4_best_chroma_intra_8x8_mode = PLANE_CH_I8x8; |
1473 | 11.6k | } |
1474 | 4.79M | } |
1475 | | |
1476 | 5.79M | DEBUG("%d partition cost, %d intra mode\n", i4_chroma_mb_distortion, |
1477 | 5.79M | u4_best_chroma_intra_8x8_mode); |
1478 | | |
1479 | 5.79M | ps_proc->u1_c_i8_mode = u4_best_chroma_intra_8x8_mode; |
1480 | 5.79M | } |
1481 | | |
1482 | | /** |
1483 | | ****************************************************************************** |
1484 | | * |
1485 | | * @brief |
1486 | | * Evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the |
1487 | | * prediction. |
1488 | | * |
1489 | | * @par Description |
1490 | | * This function evaluates first three 16x16 modes and compute corresponding sad |
1491 | | * and return the buffer predicted with best mode. |
1492 | | * |
1493 | | * @param[in] pu1_src |
1494 | | * UWORD8 pointer to the source |
1495 | | * |
1496 | | * @param[in] pu1_ngbr_pels_i16 |
1497 | | * UWORD8 pointer to neighbouring pels |
1498 | | * |
1499 | | * @param[out] pu1_dst |
1500 | | * UWORD8 pointer to the destination |
1501 | | * |
1502 | | * @param[in] src_strd |
1503 | | * integer source stride |
1504 | | * |
1505 | | * @param[in] dst_strd |
1506 | | * integer destination stride |
1507 | | * |
1508 | | * @param[in] u4_n_avblty |
1509 | | * availability of neighbouring pixels |
1510 | | * |
1511 | | * @param[in] u4_intra_mode |
1512 | | * Pointer to the variable in which best mode is returned |
1513 | | * |
1514 | | * @param[in] pu4_sadmin |
1515 | | * Pointer to the variable in which minimum sad is returned |
1516 | | * |
1517 | | * @param[in] u4_valid_intra_modes |
1518 | | * Says what all modes are valid |
1519 | | * |
1520 | | * @returns none |
1521 | | * |
1522 | | ****************************************************************************** |
1523 | | */ |
1524 | | void isvce_evaluate_intra16x16_modes(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels_i16, UWORD8 *pu1_dst, |
1525 | | UWORD32 src_strd, UWORD32 dst_strd, WORD32 u4_n_avblty, |
1526 | | UWORD32 *u4_intra_mode, WORD32 *pu4_sadmin, |
1527 | | UWORD32 u4_valid_intra_modes) |
1528 | 0 | { |
1529 | 0 | UWORD8 *pu1_neighbour; |
1530 | 0 | UWORD8 *pu1_src_temp = pu1_src; |
1531 | 0 | UWORD8 left = 0, top = 0; |
1532 | 0 | WORD32 u4_dcval = 0; |
1533 | 0 | WORD32 i, j; |
1534 | 0 | WORD32 i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX, i4_sad_dc = INT_MAX, i4_min_sad = INT_MAX; |
1535 | 0 | UWORD8 val; |
1536 | |
|
1537 | 0 | left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK); |
1538 | 0 | top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2; |
1539 | | |
1540 | | /* left available */ |
1541 | 0 | if(left) |
1542 | 0 | { |
1543 | 0 | i4_sad_horz = 0; |
1544 | |
|
1545 | 0 | for(i = 0; i < 16; i++) |
1546 | 0 | { |
1547 | 0 | val = pu1_ngbr_pels_i16[15 - i]; |
1548 | |
|
1549 | 0 | u4_dcval += val; |
1550 | |
|
1551 | 0 | for(j = 0; j < 16; j++) |
1552 | 0 | { |
1553 | 0 | i4_sad_horz += ABS(val - pu1_src_temp[j]); |
1554 | 0 | } |
1555 | |
|
1556 | 0 | pu1_src_temp += src_strd; |
1557 | 0 | } |
1558 | 0 | u4_dcval += 8; |
1559 | 0 | } |
1560 | |
|
1561 | 0 | pu1_src_temp = pu1_src; |
1562 | | /* top available */ |
1563 | 0 | if(top) |
1564 | 0 | { |
1565 | 0 | i4_sad_vert = 0; |
1566 | |
|
1567 | 0 | for(i = 0; i < 16; i++) |
1568 | 0 | { |
1569 | 0 | u4_dcval += pu1_ngbr_pels_i16[17 + i]; |
1570 | |
|
1571 | 0 | for(j = 0; j < 16; j++) |
1572 | 0 | { |
1573 | 0 | i4_sad_vert += ABS(pu1_ngbr_pels_i16[17 + j] - pu1_src_temp[j]); |
1574 | 0 | } |
1575 | 0 | pu1_src_temp += src_strd; |
1576 | 0 | } |
1577 | 0 | u4_dcval += 8; |
1578 | 0 | } |
1579 | |
|
1580 | 0 | u4_dcval = (u4_dcval) >> (3 + left + top); |
1581 | |
|
1582 | 0 | pu1_src_temp = pu1_src; |
1583 | | |
1584 | | /* none available */ |
1585 | 0 | u4_dcval += (left == 0) * (top == 0) * 128; |
1586 | |
|
1587 | 0 | i4_sad_dc = 0; |
1588 | |
|
1589 | 0 | for(i = 0; i < 16; i++) |
1590 | 0 | { |
1591 | 0 | for(j = 0; j < 16; j++) |
1592 | 0 | { |
1593 | 0 | i4_sad_dc += ABS(u4_dcval - pu1_src_temp[j]); |
1594 | 0 | } |
1595 | 0 | pu1_src_temp += src_strd; |
1596 | 0 | } |
1597 | |
|
1598 | 0 | if((u4_valid_intra_modes & 04) == 0) /* If DC is disabled */ |
1599 | 0 | i4_sad_dc = INT_MAX; |
1600 | |
|
1601 | 0 | if((u4_valid_intra_modes & 01) == 0) /* If VERT is disabled */ |
1602 | 0 | i4_sad_vert = INT_MAX; |
1603 | |
|
1604 | 0 | if((u4_valid_intra_modes & 02) == 0) /* If HORZ is disabled */ |
1605 | 0 | i4_sad_horz = INT_MAX; |
1606 | |
|
1607 | 0 | i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert); |
1608 | | |
1609 | | /* Finding Minimum sad and doing corresponding prediction */ |
1610 | 0 | if(i4_min_sad < *pu4_sadmin) |
1611 | 0 | { |
1612 | 0 | *pu4_sadmin = i4_min_sad; |
1613 | 0 | if(i4_min_sad == i4_sad_vert) |
1614 | 0 | { |
1615 | 0 | *u4_intra_mode = VERT_I16x16; |
1616 | 0 | pu1_neighbour = pu1_ngbr_pels_i16 + 17; |
1617 | 0 | for(j = 0; j < 16; j++) |
1618 | 0 | { |
1619 | 0 | memcpy(pu1_dst, pu1_neighbour, MB_SIZE); |
1620 | 0 | pu1_dst += dst_strd; |
1621 | 0 | } |
1622 | 0 | } |
1623 | 0 | else if(i4_min_sad == i4_sad_horz) |
1624 | 0 | { |
1625 | 0 | *u4_intra_mode = HORZ_I16x16; |
1626 | 0 | for(j = 0; j < 16; j++) |
1627 | 0 | { |
1628 | 0 | val = pu1_ngbr_pels_i16[15 - j]; |
1629 | 0 | memset(pu1_dst, val, MB_SIZE); |
1630 | 0 | pu1_dst += dst_strd; |
1631 | 0 | } |
1632 | 0 | } |
1633 | 0 | else |
1634 | 0 | { |
1635 | 0 | *u4_intra_mode = DC_I16x16; |
1636 | 0 | for(j = 0; j < 16; j++) |
1637 | 0 | { |
1638 | 0 | memset(pu1_dst, u4_dcval, MB_SIZE); |
1639 | 0 | pu1_dst += dst_strd; |
1640 | 0 | } |
1641 | 0 | } |
1642 | 0 | } |
1643 | 0 | } |
1644 | | |
1645 | | /** |
1646 | | ****************************************************************************** |
1647 | | * |
1648 | | * @brief |
1649 | | * Evaluate best intra 4x4 mode and perform prediction. |
1650 | | * |
1651 | | * @par Description |
1652 | | * This function evaluates 4x4 modes and compute corresponding sad |
1653 | | * and return the buffer predicted with best mode. |
1654 | | * |
1655 | | * @param[in] pu1_src |
1656 | | * UWORD8 pointer to the source |
1657 | | * |
1658 | | * @param[in] pu1_ngbr_pels |
1659 | | * UWORD8 pointer to neighbouring pels |
1660 | | * |
1661 | | * @param[out] pu1_dst |
1662 | | * UWORD8 pointer to the destination |
1663 | | * |
1664 | | * @param[in] src_strd |
1665 | | * integer source stride |
1666 | | * |
1667 | | * @param[in] dst_strd |
1668 | | * integer destination stride |
1669 | | * |
1670 | | * @param[in] u4_n_avblty |
1671 | | * availability of neighbouring pixels |
1672 | | * |
1673 | | * @param[in] u4_intra_mode |
1674 | | * Pointer to the variable in which best mode is returned |
1675 | | * |
1676 | | * @param[in] pu4_sadmin |
1677 | | * Pointer to the variable in which minimum cost is returned |
1678 | | * |
1679 | | * @param[in] u4_valid_intra_modes |
1680 | | * Says what all modes are valid |
1681 | | * |
1682 | | * @param[in] u4_lambda |
1683 | | * Lamda value for computing cost from SAD |
1684 | | * |
1685 | | * @param[in] u4_predictd_mode |
1686 | | * Predicted mode for cost computation |
1687 | | * |
1688 | | * @returns none |
1689 | | * |
1690 | | ****************************************************************************** |
1691 | | */ |
1692 | | void isvce_evaluate_intra_4x4_modes(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels, UWORD8 *pu1_dst, |
1693 | | UWORD32 src_strd, UWORD32 dst_strd, WORD32 u4_n_avblty, |
1694 | | UWORD32 *u4_intra_mode, WORD32 *pu4_sadmin, |
1695 | | UWORD32 u4_valid_intra_modes, UWORD32 u4_lambda, |
1696 | | UWORD32 u4_predictd_mode) |
1697 | 0 | { |
1698 | 0 | UWORD8 *pu1_src_temp = pu1_src; |
1699 | 0 | UWORD8 *pu1_pred = pu1_ngbr_pels; |
1700 | 0 | UWORD8 left = 0, top = 0; |
1701 | 0 | UWORD8 u1_pred_val = 0; |
1702 | 0 | UWORD8 u1_pred_vals[4] = {0}; |
1703 | 0 | UWORD8 *pu1_pred_val = NULL; |
1704 | | /* To store FILT121 operated values*/ |
1705 | 0 | UWORD8 u1_pred_vals_diag_121[15] = {0}; |
1706 | | /* To store FILT11 operated values*/ |
1707 | 0 | UWORD8 u1_pred_vals_diag_11[15] = {0}; |
1708 | 0 | UWORD8 u1_pred_vals_vert_r[8] = {0}; |
1709 | 0 | UWORD8 u1_pred_vals_horz_d[10] = {0}; |
1710 | 0 | UWORD8 u1_pred_vals_horz_u[10] = {0}; |
1711 | 0 | WORD32 u4_dcval = 0; |
1712 | 0 | WORD32 i4_sad[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX, |
1713 | 0 | INT_MAX, INT_MAX, INT_MAX, INT_MAX}; |
1714 | |
|
1715 | 0 | WORD32 i4_cost[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX, |
1716 | 0 | INT_MAX, INT_MAX, INT_MAX, INT_MAX}; |
1717 | 0 | WORD32 i, i4_min_cost = INT_MAX; |
1718 | |
|
1719 | 0 | left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK); |
1720 | 0 | top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2; |
1721 | | |
1722 | | /* Computing SAD */ |
1723 | | |
1724 | | /* VERT mode valid */ |
1725 | 0 | if(u4_valid_intra_modes & 1) |
1726 | 0 | { |
1727 | 0 | pu1_pred = pu1_ngbr_pels + 5; |
1728 | 0 | i4_sad[VERT_I4x4] = 0; |
1729 | 0 | i4_cost[VERT_I4x4] = 0; |
1730 | |
|
1731 | 0 | USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]); |
1732 | 0 | pu1_src_temp += src_strd; |
1733 | 0 | USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]); |
1734 | 0 | pu1_src_temp += src_strd; |
1735 | 0 | USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]); |
1736 | 0 | pu1_src_temp += src_strd; |
1737 | 0 | USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]); |
1738 | |
|
1739 | 0 | i4_cost[VERT_I4x4] = |
1740 | 0 | i4_sad[VERT_I4x4] + ((u4_predictd_mode == VERT_I4x4) ? u4_lambda : 4 * u4_lambda); |
1741 | 0 | } |
1742 | | |
1743 | | /* HORZ mode valid */ |
1744 | 0 | if(u4_valid_intra_modes & 2) |
1745 | 0 | { |
1746 | 0 | i4_sad[HORZ_I4x4] = 0; |
1747 | 0 | i4_cost[HORZ_I4x4] = 0; |
1748 | 0 | pu1_src_temp = pu1_src; |
1749 | |
|
1750 | 0 | u1_pred_val = pu1_ngbr_pels[3]; |
1751 | |
|
1752 | 0 | i4_sad[HORZ_I4x4] += |
1753 | 0 | ABS(pu1_src_temp[0] - u1_pred_val) + ABS(pu1_src_temp[1] - u1_pred_val) + |
1754 | 0 | ABS(pu1_src_temp[2] - u1_pred_val) + ABS(pu1_src_temp[3] - u1_pred_val); |
1755 | 0 | pu1_src_temp += src_strd; |
1756 | |
|
1757 | 0 | u1_pred_val = pu1_ngbr_pels[2]; |
1758 | |
|
1759 | 0 | i4_sad[HORZ_I4x4] += |
1760 | 0 | ABS(pu1_src_temp[0] - u1_pred_val) + ABS(pu1_src_temp[1] - u1_pred_val) + |
1761 | 0 | ABS(pu1_src_temp[2] - u1_pred_val) + ABS(pu1_src_temp[3] - u1_pred_val); |
1762 | 0 | pu1_src_temp += src_strd; |
1763 | |
|
1764 | 0 | u1_pred_val = pu1_ngbr_pels[1]; |
1765 | |
|
1766 | 0 | i4_sad[HORZ_I4x4] += |
1767 | 0 | ABS(pu1_src_temp[0] - u1_pred_val) + ABS(pu1_src_temp[1] - u1_pred_val) + |
1768 | 0 | ABS(pu1_src_temp[2] - u1_pred_val) + ABS(pu1_src_temp[3] - u1_pred_val); |
1769 | 0 | pu1_src_temp += src_strd; |
1770 | |
|
1771 | 0 | u1_pred_val = pu1_ngbr_pels[0]; |
1772 | |
|
1773 | 0 | i4_sad[HORZ_I4x4] += |
1774 | 0 | ABS(pu1_src_temp[0] - u1_pred_val) + ABS(pu1_src_temp[1] - u1_pred_val) + |
1775 | 0 | ABS(pu1_src_temp[2] - u1_pred_val) + ABS(pu1_src_temp[3] - u1_pred_val); |
1776 | |
|
1777 | 0 | i4_cost[HORZ_I4x4] = |
1778 | 0 | i4_sad[HORZ_I4x4] + ((u4_predictd_mode == HORZ_I4x4) ? u4_lambda : 4 * u4_lambda); |
1779 | 0 | } |
1780 | | |
1781 | | /* DC mode valid */ |
1782 | 0 | if(u4_valid_intra_modes & 4) |
1783 | 0 | { |
1784 | 0 | i4_sad[DC_I4x4] = 0; |
1785 | 0 | i4_cost[DC_I4x4] = 0; |
1786 | 0 | pu1_src_temp = pu1_src; |
1787 | |
|
1788 | 0 | if(left) |
1789 | 0 | u4_dcval = |
1790 | 0 | pu1_ngbr_pels[0] + pu1_ngbr_pels[1] + pu1_ngbr_pels[2] + pu1_ngbr_pels[3] + 2; |
1791 | 0 | if(top) |
1792 | 0 | u4_dcval += |
1793 | 0 | pu1_ngbr_pels[5] + pu1_ngbr_pels[6] + pu1_ngbr_pels[7] + pu1_ngbr_pels[8] + 2; |
1794 | |
|
1795 | 0 | u4_dcval = (u4_dcval) ? (u4_dcval >> (1 + left + top)) : 128; |
1796 | | |
1797 | | /* none available */ |
1798 | 0 | memset(u1_pred_vals, u4_dcval, 4); |
1799 | 0 | USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]); |
1800 | 0 | pu1_src_temp += src_strd; |
1801 | 0 | USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]); |
1802 | 0 | pu1_src_temp += src_strd; |
1803 | 0 | USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]); |
1804 | 0 | pu1_src_temp += src_strd; |
1805 | 0 | USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]); |
1806 | 0 | pu1_src_temp += src_strd; |
1807 | |
|
1808 | 0 | i4_cost[DC_I4x4] = |
1809 | 0 | i4_sad[DC_I4x4] + ((u4_predictd_mode == DC_I4x4) ? u4_lambda : 4 * u4_lambda); |
1810 | 0 | } |
1811 | | |
1812 | | /* if modes other than VERT, HORZ and DC are valid */ |
1813 | 0 | if(u4_valid_intra_modes > 7) |
1814 | 0 | { |
1815 | 0 | pu1_pred = pu1_ngbr_pels; |
1816 | 0 | pu1_pred[13] = pu1_pred[14] = pu1_pred[12]; |
1817 | | |
1818 | | /* Performing FILT121 and FILT11 operation for all neighbour values*/ |
1819 | 0 | for(i = 0; i < 13; i++) |
1820 | 0 | { |
1821 | 0 | u1_pred_vals_diag_121[i] = FILT121(pu1_pred[0], pu1_pred[1], pu1_pred[2]); |
1822 | 0 | u1_pred_vals_diag_11[i] = FILT11(pu1_pred[0], pu1_pred[1]); |
1823 | |
|
1824 | 0 | pu1_pred++; |
1825 | 0 | } |
1826 | |
|
1827 | 0 | if(u4_valid_intra_modes & 8) /* DIAG_DL */ |
1828 | 0 | { |
1829 | 0 | i4_sad[DIAG_DL_I4x4] = 0; |
1830 | 0 | i4_cost[DIAG_DL_I4x4] = 0; |
1831 | 0 | pu1_src_temp = pu1_src; |
1832 | 0 | pu1_pred_val = u1_pred_vals_diag_121 + 5; |
1833 | |
|
1834 | 0 | USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DL_I4x4]); |
1835 | 0 | pu1_src_temp += src_strd; |
1836 | 0 | USADA8(pu1_src_temp, (pu1_pred_val + 1), i4_sad[DIAG_DL_I4x4]); |
1837 | 0 | pu1_src_temp += src_strd; |
1838 | 0 | USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[DIAG_DL_I4x4]); |
1839 | 0 | pu1_src_temp += src_strd; |
1840 | 0 | USADA8(pu1_src_temp, (pu1_pred_val + 3), i4_sad[DIAG_DL_I4x4]); |
1841 | 0 | pu1_src_temp += src_strd; |
1842 | 0 | i4_cost[DIAG_DL_I4x4] = |
1843 | 0 | i4_sad[DIAG_DL_I4x4] + |
1844 | 0 | ((u4_predictd_mode == DIAG_DL_I4x4) ? u4_lambda : 4 * u4_lambda); |
1845 | 0 | } |
1846 | |
|
1847 | 0 | if(u4_valid_intra_modes & 16) /* DIAG_DR */ |
1848 | 0 | { |
1849 | 0 | i4_sad[DIAG_DR_I4x4] = 0; |
1850 | 0 | i4_cost[DIAG_DR_I4x4] = 0; |
1851 | 0 | pu1_src_temp = pu1_src; |
1852 | 0 | pu1_pred_val = u1_pred_vals_diag_121 + 3; |
1853 | |
|
1854 | 0 | USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DR_I4x4]); |
1855 | 0 | pu1_src_temp += src_strd; |
1856 | 0 | USADA8(pu1_src_temp, (pu1_pred_val - 1), i4_sad[DIAG_DR_I4x4]); |
1857 | 0 | pu1_src_temp += src_strd; |
1858 | 0 | USADA8(pu1_src_temp, (pu1_pred_val - 2), i4_sad[DIAG_DR_I4x4]); |
1859 | 0 | pu1_src_temp += src_strd; |
1860 | 0 | USADA8(pu1_src_temp, (pu1_pred_val - 3), i4_sad[DIAG_DR_I4x4]); |
1861 | 0 | pu1_src_temp += src_strd; |
1862 | 0 | i4_cost[DIAG_DR_I4x4] = |
1863 | 0 | i4_sad[DIAG_DR_I4x4] + |
1864 | 0 | ((u4_predictd_mode == DIAG_DR_I4x4) ? u4_lambda : 4 * u4_lambda); |
1865 | 0 | } |
1866 | |
|
1867 | 0 | if(u4_valid_intra_modes & 32) /* VERT_R mode valid ????*/ |
1868 | 0 | { |
1869 | 0 | i4_sad[VERT_R_I4x4] = 0; |
1870 | |
|
1871 | 0 | pu1_src_temp = pu1_src; |
1872 | 0 | u1_pred_vals_vert_r[0] = u1_pred_vals_diag_121[2]; |
1873 | 0 | memcpy((u1_pred_vals_vert_r + 1), (u1_pred_vals_diag_11 + 4), 3); |
1874 | 0 | u1_pred_vals_vert_r[4] = u1_pred_vals_diag_121[1]; |
1875 | 0 | memcpy((u1_pred_vals_vert_r + 5), (u1_pred_vals_diag_121 + 3), 3); |
1876 | |
|
1877 | 0 | pu1_pred_val = u1_pred_vals_diag_11 + 4; |
1878 | 0 | USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]); |
1879 | 0 | pu1_pred_val = u1_pred_vals_diag_121 + 3; |
1880 | 0 | pu1_src_temp += src_strd; |
1881 | 0 | USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]); |
1882 | 0 | pu1_src_temp += src_strd; |
1883 | 0 | USADA8(pu1_src_temp, (u1_pred_vals_vert_r), i4_sad[VERT_R_I4x4]); |
1884 | 0 | pu1_src_temp += src_strd; |
1885 | 0 | USADA8(pu1_src_temp, (u1_pred_vals_vert_r + 4), i4_sad[VERT_R_I4x4]); |
1886 | |
|
1887 | 0 | i4_cost[VERT_R_I4x4] = i4_sad[VERT_R_I4x4] + |
1888 | 0 | ((u4_predictd_mode == VERT_R_I4x4) ? u4_lambda : 4 * u4_lambda); |
1889 | 0 | } |
1890 | |
|
1891 | 0 | if(u4_valid_intra_modes & 64) /* HORZ_D mode valid ????*/ |
1892 | 0 | { |
1893 | 0 | i4_sad[HORZ_D_I4x4] = 0; |
1894 | |
|
1895 | 0 | pu1_src_temp = pu1_src; |
1896 | 0 | u1_pred_vals_horz_d[6] = u1_pred_vals_diag_11[3]; |
1897 | 0 | memcpy((u1_pred_vals_horz_d + 7), (u1_pred_vals_diag_121 + 3), 3); |
1898 | 0 | u1_pred_vals_horz_d[0] = u1_pred_vals_diag_11[0]; |
1899 | 0 | u1_pred_vals_horz_d[1] = u1_pred_vals_diag_121[0]; |
1900 | 0 | u1_pred_vals_horz_d[2] = u1_pred_vals_diag_11[1]; |
1901 | 0 | u1_pred_vals_horz_d[3] = u1_pred_vals_diag_121[1]; |
1902 | 0 | u1_pred_vals_horz_d[4] = u1_pred_vals_diag_11[2]; |
1903 | 0 | u1_pred_vals_horz_d[5] = u1_pred_vals_diag_121[2]; |
1904 | |
|
1905 | 0 | pu1_pred_val = u1_pred_vals_horz_d; |
1906 | 0 | USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_D_I4x4]); |
1907 | 0 | pu1_src_temp += src_strd; |
1908 | 0 | USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_D_I4x4]); |
1909 | 0 | pu1_src_temp += src_strd; |
1910 | 0 | USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_D_I4x4]); |
1911 | 0 | pu1_src_temp += src_strd; |
1912 | 0 | USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_D_I4x4]); |
1913 | |
|
1914 | 0 | i4_cost[HORZ_D_I4x4] = i4_sad[HORZ_D_I4x4] + |
1915 | 0 | ((u4_predictd_mode == HORZ_D_I4x4) ? u4_lambda : 4 * u4_lambda); |
1916 | 0 | } |
1917 | |
|
1918 | 0 | if(u4_valid_intra_modes & 128) /* VERT_L mode valid ????*/ |
1919 | 0 | { |
1920 | 0 | i4_sad[VERT_L_I4x4] = 0; |
1921 | 0 | pu1_src_temp = pu1_src; |
1922 | 0 | pu1_pred_val = u1_pred_vals_diag_11 + 5; |
1923 | 0 | USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]); |
1924 | 0 | pu1_src_temp += src_strd; |
1925 | 0 | pu1_pred_val = u1_pred_vals_diag_121 + 5; |
1926 | 0 | USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]); |
1927 | 0 | pu1_src_temp += src_strd; |
1928 | 0 | pu1_pred_val = u1_pred_vals_diag_11 + 6; |
1929 | 0 | USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]); |
1930 | 0 | pu1_src_temp += src_strd; |
1931 | 0 | pu1_pred_val = u1_pred_vals_diag_121 + 6; |
1932 | 0 | USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]); |
1933 | |
|
1934 | 0 | i4_cost[VERT_L_I4x4] = i4_sad[VERT_L_I4x4] + |
1935 | 0 | ((u4_predictd_mode == VERT_L_I4x4) ? u4_lambda : 4 * u4_lambda); |
1936 | 0 | } |
1937 | |
|
1938 | 0 | if(u4_valid_intra_modes & 256) /* HORZ_U mode valid ????*/ |
1939 | 0 | { |
1940 | 0 | i4_sad[HORZ_U_I4x4] = 0; |
1941 | 0 | pu1_src_temp = pu1_src; |
1942 | 0 | u1_pred_vals_horz_u[0] = u1_pred_vals_diag_11[2]; |
1943 | 0 | u1_pred_vals_horz_u[1] = u1_pred_vals_diag_121[1]; |
1944 | 0 | u1_pred_vals_horz_u[2] = u1_pred_vals_diag_11[1]; |
1945 | 0 | u1_pred_vals_horz_u[3] = u1_pred_vals_diag_121[0]; |
1946 | 0 | u1_pred_vals_horz_u[4] = u1_pred_vals_diag_11[0]; |
1947 | 0 | u1_pred_vals_horz_u[5] = FILT121(pu1_ngbr_pels[0], pu1_ngbr_pels[0], pu1_ngbr_pels[1]); |
1948 | |
|
1949 | 0 | memset((u1_pred_vals_horz_u + 6), pu1_ngbr_pels[0], 4); |
1950 | |
|
1951 | 0 | pu1_pred_val = u1_pred_vals_horz_u; |
1952 | 0 | USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_U_I4x4]); |
1953 | 0 | pu1_src_temp += src_strd; |
1954 | 0 | USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_U_I4x4]); |
1955 | 0 | pu1_src_temp += src_strd; |
1956 | 0 | USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_U_I4x4]); |
1957 | 0 | pu1_src_temp += src_strd; |
1958 | 0 | USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_U_I4x4]); |
1959 | |
|
1960 | 0 | i4_cost[HORZ_U_I4x4] = i4_sad[HORZ_U_I4x4] + |
1961 | 0 | ((u4_predictd_mode == HORZ_U_I4x4) ? u4_lambda : 4 * u4_lambda); |
1962 | 0 | } |
1963 | |
|
1964 | 0 | i4_min_cost = |
1965 | 0 | MIN3(MIN3(i4_cost[0], i4_cost[1], i4_cost[2]), MIN3(i4_cost[3], i4_cost[4], i4_cost[5]), |
1966 | 0 | MIN3(i4_cost[6], i4_cost[7], i4_cost[8])); |
1967 | 0 | } |
1968 | 0 | else |
1969 | 0 | { |
1970 | | /* Only first three modes valid */ |
1971 | 0 | i4_min_cost = MIN3(i4_cost[0], i4_cost[1], i4_cost[2]); |
1972 | 0 | } |
1973 | |
|
1974 | 0 | *pu4_sadmin = i4_min_cost; |
1975 | |
|
1976 | 0 | if(i4_min_cost == i4_cost[0]) |
1977 | 0 | { |
1978 | 0 | *u4_intra_mode = VERT_I4x4; |
1979 | 0 | pu1_pred_val = pu1_ngbr_pels + 5; |
1980 | 0 | memcpy(pu1_dst, (pu1_pred_val), 4); |
1981 | 0 | pu1_dst += dst_strd; |
1982 | 0 | memcpy(pu1_dst, (pu1_pred_val), 4); |
1983 | 0 | pu1_dst += dst_strd; |
1984 | 0 | memcpy(pu1_dst, (pu1_pred_val), 4); |
1985 | 0 | pu1_dst += dst_strd; |
1986 | 0 | memcpy(pu1_dst, (pu1_pred_val), 4); |
1987 | 0 | } |
1988 | 0 | else if(i4_min_cost == i4_cost[1]) |
1989 | 0 | { |
1990 | 0 | *u4_intra_mode = HORZ_I4x4; |
1991 | 0 | memset(pu1_dst, pu1_ngbr_pels[3], 4); |
1992 | 0 | pu1_dst += dst_strd; |
1993 | 0 | memset(pu1_dst, pu1_ngbr_pels[2], 4); |
1994 | 0 | pu1_dst += dst_strd; |
1995 | 0 | memset(pu1_dst, pu1_ngbr_pels[1], 4); |
1996 | 0 | pu1_dst += dst_strd; |
1997 | 0 | memset(pu1_dst, pu1_ngbr_pels[0], 4); |
1998 | 0 | } |
1999 | 0 | else if(i4_min_cost == i4_cost[2]) |
2000 | 0 | { |
2001 | 0 | *u4_intra_mode = DC_I4x4; |
2002 | 0 | memset(pu1_dst, u4_dcval, 4); |
2003 | 0 | pu1_dst += dst_strd; |
2004 | 0 | memset(pu1_dst, u4_dcval, 4); |
2005 | 0 | pu1_dst += dst_strd; |
2006 | 0 | memset(pu1_dst, u4_dcval, 4); |
2007 | 0 | pu1_dst += dst_strd; |
2008 | 0 | memset(pu1_dst, u4_dcval, 4); |
2009 | 0 | } |
2010 | | |
2011 | 0 | else if(i4_min_cost == i4_cost[3]) |
2012 | 0 | { |
2013 | 0 | *u4_intra_mode = DIAG_DL_I4x4; |
2014 | 0 | pu1_pred_val = u1_pred_vals_diag_121 + 5; |
2015 | 0 | memcpy(pu1_dst, (pu1_pred_val), 4); |
2016 | 0 | pu1_dst += dst_strd; |
2017 | 0 | memcpy(pu1_dst, (pu1_pred_val + 1), 4); |
2018 | 0 | pu1_dst += dst_strd; |
2019 | 0 | memcpy(pu1_dst, (pu1_pred_val + 2), 4); |
2020 | 0 | pu1_dst += dst_strd; |
2021 | 0 | memcpy(pu1_dst, (pu1_pred_val + 3), 4); |
2022 | 0 | } |
2023 | 0 | else if(i4_min_cost == i4_cost[4]) |
2024 | 0 | { |
2025 | 0 | *u4_intra_mode = DIAG_DR_I4x4; |
2026 | 0 | pu1_pred_val = u1_pred_vals_diag_121 + 3; |
2027 | |
|
2028 | 0 | memcpy(pu1_dst, (pu1_pred_val), 4); |
2029 | 0 | pu1_dst += dst_strd; |
2030 | 0 | memcpy(pu1_dst, (pu1_pred_val - 1), 4); |
2031 | 0 | pu1_dst += dst_strd; |
2032 | 0 | memcpy(pu1_dst, (pu1_pred_val - 2), 4); |
2033 | 0 | pu1_dst += dst_strd; |
2034 | 0 | memcpy(pu1_dst, (pu1_pred_val - 3), 4); |
2035 | 0 | } |
2036 | | |
2037 | 0 | else if(i4_min_cost == i4_cost[5]) |
2038 | 0 | { |
2039 | 0 | *u4_intra_mode = VERT_R_I4x4; |
2040 | 0 | pu1_pred_val = u1_pred_vals_diag_11 + 4; |
2041 | 0 | memcpy(pu1_dst, (pu1_pred_val), 4); |
2042 | 0 | pu1_dst += dst_strd; |
2043 | 0 | pu1_pred_val = u1_pred_vals_diag_121 + 3; |
2044 | 0 | memcpy(pu1_dst, (pu1_pred_val), 4); |
2045 | 0 | pu1_dst += dst_strd; |
2046 | 0 | memcpy(pu1_dst, (u1_pred_vals_vert_r), 4); |
2047 | 0 | pu1_dst += dst_strd; |
2048 | 0 | memcpy(pu1_dst, (u1_pred_vals_vert_r + 4), 4); |
2049 | 0 | } |
2050 | 0 | else if(i4_min_cost == i4_cost[6]) |
2051 | 0 | { |
2052 | 0 | *u4_intra_mode = HORZ_D_I4x4; |
2053 | 0 | pu1_pred_val = u1_pred_vals_horz_d; |
2054 | 0 | memcpy(pu1_dst, (pu1_pred_val + 6), 4); |
2055 | 0 | pu1_dst += dst_strd; |
2056 | 0 | memcpy(pu1_dst, (pu1_pred_val + 4), 4); |
2057 | 0 | pu1_dst += dst_strd; |
2058 | 0 | memcpy(pu1_dst, (pu1_pred_val + 2), 4); |
2059 | 0 | pu1_dst += dst_strd; |
2060 | 0 | memcpy(pu1_dst, (pu1_pred_val), 4); |
2061 | 0 | pu1_dst += dst_strd; |
2062 | 0 | } |
2063 | 0 | else if(i4_min_cost == i4_cost[7]) |
2064 | 0 | { |
2065 | 0 | *u4_intra_mode = VERT_L_I4x4; |
2066 | 0 | pu1_pred_val = u1_pred_vals_diag_11 + 5; |
2067 | 0 | memcpy(pu1_dst, (pu1_pred_val), 4); |
2068 | 0 | pu1_dst += dst_strd; |
2069 | 0 | pu1_pred_val = u1_pred_vals_diag_121 + 5; |
2070 | 0 | memcpy(pu1_dst, (pu1_pred_val), 4); |
2071 | 0 | pu1_dst += dst_strd; |
2072 | 0 | pu1_pred_val = u1_pred_vals_diag_11 + 6; |
2073 | 0 | memcpy(pu1_dst, (pu1_pred_val), 4); |
2074 | 0 | pu1_dst += dst_strd; |
2075 | 0 | pu1_pred_val = u1_pred_vals_diag_121 + 6; |
2076 | 0 | memcpy(pu1_dst, (pu1_pred_val), 4); |
2077 | 0 | } |
2078 | 0 | else if(i4_min_cost == i4_cost[8]) |
2079 | 0 | { |
2080 | 0 | *u4_intra_mode = HORZ_U_I4x4; |
2081 | 0 | pu1_pred_val = u1_pred_vals_horz_u; |
2082 | 0 | memcpy(pu1_dst, (pu1_pred_val), 4); |
2083 | 0 | pu1_dst += dst_strd; |
2084 | 0 | memcpy(pu1_dst, (pu1_pred_val + 2), 4); |
2085 | 0 | pu1_dst += dst_strd; |
2086 | 0 | memcpy(pu1_dst, (pu1_pred_val + 4), 4); |
2087 | 0 | pu1_dst += dst_strd; |
2088 | 0 | memcpy(pu1_dst, (pu1_pred_val + 6), 4); |
2089 | 0 | pu1_dst += dst_strd; |
2090 | 0 | } |
2091 | |
|
2092 | 0 | return; |
2093 | 0 | } |
2094 | | |
2095 | | /** |
2096 | | ****************************************************************************** |
2097 | | * |
2098 | | * @brief: |
2099 | | * Evaluate best intr chroma mode (among VERT, HORZ and DC ) and do the |
2100 | | *prediction. |
2101 | | * |
2102 | | * @par Description |
2103 | | * This function evaluates first three intra chroma modes and compute |
2104 | | *corresponding sad and return the buffer predicted with best mode. |
2105 | | * |
2106 | | * @param[in] pu1_src |
2107 | | * UWORD8 pointer to the source |
2108 | | * |
2109 | | * @param[in] pu1_ngbr_pels |
2110 | | * UWORD8 pointer to neighbouring pels |
2111 | | * |
2112 | | * @param[out] pu1_dst |
2113 | | * UWORD8 pointer to the destination |
2114 | | * |
2115 | | * @param[in] src_strd |
2116 | | * integer source stride |
2117 | | * |
2118 | | * @param[in] dst_strd |
2119 | | * integer destination stride |
2120 | | * |
2121 | | * @param[in] u4_n_avblty |
2122 | | * availability of neighbouring pixels |
2123 | | * |
2124 | | * @param[in] u4_intra_mode |
2125 | | * Pointer to the variable in which best mode is returned |
2126 | | * |
2127 | | * @param[in] pu4_sadmin |
2128 | | * Pointer to the variable in which minimum sad is returned |
2129 | | * |
2130 | | * @param[in] u4_valid_intra_modes |
2131 | | * Says what all modes are valid |
2132 | | * |
2133 | | * @return none |
2134 | | * |
2135 | | ****************************************************************************** |
2136 | | */ |
2137 | | void isvce_evaluate_intra_chroma_modes(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels, UWORD8 *pu1_dst, |
2138 | | UWORD32 src_strd, UWORD32 dst_strd, WORD32 u4_n_avblty, |
2139 | | UWORD32 *u4_intra_mode, WORD32 *pu4_sadmin, |
2140 | | UWORD32 u4_valid_intra_modes) |
2141 | 0 | { |
2142 | 0 | UWORD8 *pu1_neighbour; |
2143 | 0 | UWORD8 *pu1_src_temp = pu1_src; |
2144 | 0 | UWORD8 left = 0, top = 0; |
2145 | 0 | WORD32 u4_dcval_u_l[2] = {0, 0}, /*sum left neighbours for 'U' ,two separate sets - sum of |
2146 | | first four from top,and sum of four values from bottom */ |
2147 | 0 | u4_dcval_u_t[2] = {0, 0}; /*sum top neighbours for 'U'*/ |
2148 | |
|
2149 | 0 | WORD32 u4_dcval_v_l[2] = {0, 0}, /*sum left neighbours for 'V'*/ |
2150 | 0 | u4_dcval_v_t[2] = {0, 0}; /*sum top neighbours for 'V'*/ |
2151 | |
|
2152 | 0 | WORD32 i, j, row, col, i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX, i4_sad_dc = INT_MAX, |
2153 | 0 | i4_min_sad = INT_MAX; |
2154 | 0 | UWORD8 val_u, val_v; |
2155 | |
|
2156 | 0 | WORD32 u4_dc_val[2][2][2]; /* ----------- |
2157 | | | | | Chroma can have four |
2158 | | | 00 | 01 | separate dc value... |
2159 | | ----------- u4_dc_val corresponds to this dc |
2160 | | values | | | with u4_dc_val[2][2][U] and |
2161 | | u4_dc_val[2][2][V] | 10 | 11 | |
2162 | | ----------- */ |
2163 | 0 | left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK); |
2164 | 0 | top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2; |
2165 | | |
2166 | | /*Evaluating HORZ*/ |
2167 | 0 | if(left) /* Ifleft available*/ |
2168 | 0 | { |
2169 | 0 | i4_sad_horz = 0; |
2170 | |
|
2171 | 0 | for(i = 0; i < 8; i++) |
2172 | 0 | { |
2173 | 0 | val_v = pu1_ngbr_pels[15 - 2 * i]; |
2174 | 0 | val_u = pu1_ngbr_pels[15 - 2 * i - 1]; |
2175 | 0 | row = i / 4; |
2176 | 0 | u4_dcval_u_l[row] += val_u; |
2177 | 0 | u4_dcval_v_l[row] += val_v; |
2178 | 0 | for(j = 0; j < 8; j++) |
2179 | 0 | { |
2180 | 0 | i4_sad_horz += ABS(val_u - pu1_src_temp[2 * j]); /* Finding SAD for HORZ mode*/ |
2181 | 0 | i4_sad_horz += ABS(val_v - pu1_src_temp[2 * j + 1]); |
2182 | 0 | } |
2183 | |
|
2184 | 0 | pu1_src_temp += src_strd; |
2185 | 0 | } |
2186 | 0 | u4_dcval_u_l[0] += 2; |
2187 | 0 | u4_dcval_u_l[1] += 2; |
2188 | 0 | u4_dcval_v_l[0] += 2; |
2189 | 0 | u4_dcval_v_l[1] += 2; |
2190 | 0 | } |
2191 | | |
2192 | | /*Evaluating VERT**/ |
2193 | 0 | pu1_src_temp = pu1_src; |
2194 | 0 | if(top) /* top available*/ |
2195 | 0 | { |
2196 | 0 | i4_sad_vert = 0; |
2197 | |
|
2198 | 0 | for(i = 0; i < 8; i++) |
2199 | 0 | { |
2200 | 0 | col = i / 4; |
2201 | |
|
2202 | 0 | val_u = pu1_ngbr_pels[18 + i * 2]; |
2203 | 0 | val_v = pu1_ngbr_pels[18 + i * 2 + 1]; |
2204 | 0 | u4_dcval_u_t[col] += val_u; |
2205 | 0 | u4_dcval_v_t[col] += val_v; |
2206 | |
|
2207 | 0 | for(j = 0; j < 16; j++) |
2208 | 0 | { |
2209 | 0 | i4_sad_vert += |
2210 | 0 | ABS(pu1_ngbr_pels[18 + j] - pu1_src_temp[j]); /* Finding SAD for VERT mode*/ |
2211 | 0 | } |
2212 | 0 | pu1_src_temp += src_strd; |
2213 | 0 | } |
2214 | 0 | u4_dcval_u_t[0] += 2; |
2215 | 0 | u4_dcval_u_t[1] += 2; |
2216 | 0 | u4_dcval_v_t[0] += 2; |
2217 | 0 | u4_dcval_v_t[1] += 2; |
2218 | 0 | } |
2219 | | |
2220 | | /* computing DC value*/ |
2221 | | /* Equation 8-128 in spec*/ |
2222 | 0 | u4_dc_val[0][0][0] = (u4_dcval_u_l[0] + u4_dcval_u_t[0]) >> (1 + left + top); |
2223 | 0 | u4_dc_val[0][0][1] = (u4_dcval_v_l[0] + u4_dcval_v_t[0]) >> (1 + left + top); |
2224 | 0 | u4_dc_val[1][1][0] = (u4_dcval_u_l[1] + u4_dcval_u_t[1]) >> (1 + left + top); |
2225 | 0 | u4_dc_val[1][1][1] = (u4_dcval_v_l[1] + u4_dcval_v_t[1]) >> (1 + left + top); |
2226 | |
|
2227 | 0 | if(top) |
2228 | 0 | { |
2229 | | /* Equation 8-132 in spec*/ |
2230 | 0 | u4_dc_val[0][1][0] = (u4_dcval_u_t[1]) >> (1 + top); |
2231 | 0 | u4_dc_val[0][1][1] = (u4_dcval_v_t[1]) >> (1 + top); |
2232 | 0 | } |
2233 | 0 | else |
2234 | 0 | { |
2235 | 0 | u4_dc_val[0][1][0] = (u4_dcval_u_l[0]) >> (1 + left); |
2236 | 0 | u4_dc_val[0][1][1] = (u4_dcval_v_l[0]) >> (1 + left); |
2237 | 0 | } |
2238 | |
|
2239 | 0 | if(left) |
2240 | 0 | { |
2241 | 0 | u4_dc_val[1][0][0] = (u4_dcval_u_l[1]) >> (1 + left); |
2242 | 0 | u4_dc_val[1][0][1] = (u4_dcval_v_l[1]) >> (1 + left); |
2243 | 0 | } |
2244 | 0 | else |
2245 | 0 | { |
2246 | 0 | u4_dc_val[1][0][0] = (u4_dcval_u_t[0]) >> (1 + top); |
2247 | 0 | u4_dc_val[1][0][1] = (u4_dcval_v_t[0]) >> (1 + top); |
2248 | 0 | } |
2249 | |
|
2250 | 0 | if(!(left || top)) |
2251 | 0 | { |
2252 | | /*none available*/ |
2253 | 0 | u4_dc_val[0][0][0] = u4_dc_val[0][0][1] = u4_dc_val[0][1][0] = u4_dc_val[0][1][1] = |
2254 | 0 | u4_dc_val[1][0][0] = u4_dc_val[1][0][1] = u4_dc_val[1][1][0] = u4_dc_val[1][1][1] = 128; |
2255 | 0 | } |
2256 | | |
2257 | | /* Evaluating DC */ |
2258 | 0 | pu1_src_temp = pu1_src; |
2259 | 0 | i4_sad_dc = 0; |
2260 | 0 | for(i = 0; i < 8; i++) |
2261 | 0 | { |
2262 | 0 | for(j = 0; j < 8; j++) |
2263 | 0 | { |
2264 | 0 | col = j / 4; |
2265 | 0 | row = i / 4; |
2266 | 0 | val_u = u4_dc_val[row][col][0]; |
2267 | 0 | val_v = u4_dc_val[row][col][1]; |
2268 | |
|
2269 | 0 | i4_sad_dc += ABS(val_u - pu1_src_temp[2 * j]); /* Finding SAD for DC mode*/ |
2270 | 0 | i4_sad_dc += ABS(val_v - pu1_src_temp[2 * j + 1]); |
2271 | 0 | } |
2272 | 0 | pu1_src_temp += src_strd; |
2273 | 0 | } |
2274 | |
|
2275 | 0 | if((u4_valid_intra_modes & 01) == 0) /* If DC is disabled*/ |
2276 | 0 | i4_sad_dc = INT_MAX; |
2277 | 0 | if((u4_valid_intra_modes & 02) == 0) /* If HORZ is disabled*/ |
2278 | 0 | i4_sad_horz = INT_MAX; |
2279 | 0 | if((u4_valid_intra_modes & 04) == 0) /* If VERT is disabled*/ |
2280 | 0 | i4_sad_vert = INT_MAX; |
2281 | |
|
2282 | 0 | i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert); |
2283 | | |
2284 | | /* Finding Minimum sad and doing corresponding prediction*/ |
2285 | 0 | if(i4_min_sad < *pu4_sadmin) |
2286 | 0 | { |
2287 | 0 | *pu4_sadmin = i4_min_sad; |
2288 | |
|
2289 | 0 | if(i4_min_sad == i4_sad_dc) |
2290 | 0 | { |
2291 | 0 | *u4_intra_mode = DC_CH_I8x8; |
2292 | 0 | for(i = 0; i < 8; i++) |
2293 | 0 | { |
2294 | 0 | for(j = 0; j < 8; j++) |
2295 | 0 | { |
2296 | 0 | col = j / 4; |
2297 | 0 | row = i / 4; |
2298 | |
|
2299 | 0 | pu1_dst[2 * j] = u4_dc_val[row][col][0]; |
2300 | 0 | pu1_dst[2 * j + 1] = u4_dc_val[row][col][1]; |
2301 | 0 | } |
2302 | 0 | pu1_dst += dst_strd; |
2303 | 0 | } |
2304 | 0 | } |
2305 | 0 | else if(i4_min_sad == i4_sad_horz) |
2306 | 0 | { |
2307 | 0 | *u4_intra_mode = HORZ_CH_I8x8; |
2308 | 0 | for(j = 0; j < 8; j++) |
2309 | 0 | { |
2310 | 0 | val_v = pu1_ngbr_pels[15 - 2 * j]; |
2311 | 0 | val_u = pu1_ngbr_pels[15 - 2 * j - 1]; |
2312 | |
|
2313 | 0 | for(i = 0; i < 8; i++) |
2314 | 0 | { |
2315 | 0 | pu1_dst[2 * i] = val_u; |
2316 | 0 | pu1_dst[2 * i + 1] = val_v; |
2317 | 0 | } |
2318 | 0 | pu1_dst += dst_strd; |
2319 | 0 | } |
2320 | 0 | } |
2321 | 0 | else |
2322 | 0 | { |
2323 | 0 | *u4_intra_mode = VERT_CH_I8x8; |
2324 | 0 | pu1_neighbour = pu1_ngbr_pels + 18; |
2325 | 0 | for(j = 0; j < 8; j++) |
2326 | 0 | { |
2327 | 0 | memcpy(pu1_dst, pu1_neighbour, MB_SIZE); |
2328 | 0 | pu1_dst += dst_strd; |
2329 | 0 | } |
2330 | 0 | } |
2331 | 0 | } |
2332 | |
|
2333 | 0 | return; |
2334 | 0 | } |