/src/libavc/encoder/ih264e_intra_modes_eval.c
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2015 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | |
21 | | /** |
22 | | ******************************************************************************* |
23 | | * @file |
24 | | * ih264e_intra_modes_eval.c |
25 | | * |
26 | | * @brief |
27 | | * This file contains definitions of routines that perform rate distortion |
28 | | * analysis on a macroblock if they are to be coded as intra. |
29 | | * |
30 | | * @author |
31 | | * ittiam |
32 | | * |
33 | | * @par List of Functions: |
34 | | * - ih264e_derive_neighbor_availability_of_mbs |
35 | | * - ih264e_derive_ngbr_avbl_of_mb_partitions |
36 | | * - ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff |
37 | | * - ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff |
38 | | * - ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff |
39 | | * - ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton |
40 | | * - ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff |
41 | | * - ih264e_evaluate_intra16x16_modes |
42 | | * - ih264e_evaluate_intra4x4_modes |
43 | | * - ih264e_evaluate_intra_chroma_modes |
44 | | * |
45 | | * @remarks |
46 | | * none |
47 | | * |
48 | | ******************************************************************************* |
49 | | */ |
50 | | |
51 | | /*****************************************************************************/ |
52 | | /* File Includes */ |
53 | | /*****************************************************************************/ |
54 | | |
55 | | /* System Include Files */ |
56 | | #include <stdio.h> |
57 | | #include <string.h> |
58 | | #include <limits.h> |
59 | | #include <assert.h> |
60 | | |
61 | | /* User Include Files */ |
62 | | #include "ih264e_config.h" |
63 | | #include "ih264_typedefs.h" |
64 | | #include "iv2.h" |
65 | | #include "ive2.h" |
66 | | |
67 | | #include "ih264_debug.h" |
68 | | #include "ih264_macros.h" |
69 | | #include "ih264_defs.h" |
70 | | #include "ih264_mem_fns.h" |
71 | | #include "ih264_padding.h" |
72 | | #include "ih264_structs.h" |
73 | | #include "ih264_trans_quant_itrans_iquant.h" |
74 | | #include "ih264_inter_pred_filters.h" |
75 | | #include "ih264_intra_pred_filters.h" |
76 | | #include "ih264_deblk_edge_filters.h" |
77 | | #include "ih264_common_tables.h" |
78 | | #include "ih264_cabac_tables.h" |
79 | | |
80 | | #include "ime_defs.h" |
81 | | #include "ime_distortion_metrics.h" |
82 | | #include "ime_structs.h" |
83 | | #include "ime_platform_macros.h" |
84 | | |
85 | | #include "irc_cntrl_param.h" |
86 | | #include "irc_frame_info_collector.h" |
87 | | |
88 | | #include "ih264e_error.h" |
89 | | #include "ih264e_defs.h" |
90 | | #include "ih264e_globals.h" |
91 | | #include "ih264e_rate_control.h" |
92 | | #include "ih264e_bitstream.h" |
93 | | #include "ih264e_cabac_structs.h" |
94 | | #include "ih264e_structs.h" |
95 | | #include "ih264e_intra_modes_eval.h" |
96 | | |
97 | | |
98 | | /*****************************************************************************/ |
99 | | /* Function Definitions */ |
100 | | /*****************************************************************************/ |
101 | | |
102 | | /** |
103 | | ****************************************************************************** |
104 | | * |
105 | | * @brief |
106 | | * derivation process for macroblock availability |
107 | | * |
108 | | * @par Description |
109 | | * Calculates the availability of the left, top, topright and topleft macroblocks. |
110 | | * |
111 | | * @param[in] ps_proc_ctxt |
112 | | * pointer to proc context (handle) |
113 | | * |
114 | | * @remarks Based on section 6.4.5 in H264 spec |
115 | | * |
116 | | * @return none |
117 | | * |
118 | | ****************************************************************************** |
119 | | */ |
120 | | void ih264e_derive_nghbr_avbl_of_mbs(process_ctxt_t *ps_proc) |
121 | 6.00M | { |
122 | 6.00M | UWORD8 *pu1_slice_idx_curr = ps_proc->pu1_slice_idx; |
123 | 6.00M | UWORD8 *pu1_slice_idx_b; |
124 | 6.00M | UWORD8 *pu1_slice_idx_a; |
125 | 6.00M | UWORD8 *pu1_slice_idx_c; |
126 | 6.00M | UWORD8 *pu1_slice_idx_d; |
127 | 6.00M | block_neighbors_t *ps_ngbr_avbl; |
128 | 6.00M | WORD32 i4_mb_x, i4_mb_y; |
129 | 6.00M | WORD32 i4_wd_mbs; |
130 | | |
131 | 6.00M | i4_mb_x = ps_proc->i4_mb_x; |
132 | 6.00M | i4_mb_y = ps_proc->i4_mb_y; |
133 | | |
134 | 6.00M | i4_wd_mbs = ps_proc->i4_wd_mbs; |
135 | | |
136 | 6.00M | pu1_slice_idx_curr += (i4_mb_y * i4_wd_mbs) + i4_mb_x; |
137 | 6.00M | pu1_slice_idx_a = pu1_slice_idx_curr - 1; |
138 | 6.00M | pu1_slice_idx_b = pu1_slice_idx_curr - i4_wd_mbs; |
139 | 6.00M | pu1_slice_idx_c = pu1_slice_idx_b + 1; |
140 | 6.00M | pu1_slice_idx_d = pu1_slice_idx_b - 1; |
141 | 6.00M | ps_ngbr_avbl = ps_proc->ps_ngbr_avbl; |
142 | | |
143 | | /**********************************************************************/ |
144 | | /* The macroblock is marked as available, unless one of the following */ |
145 | | /* conditions is true in which case the macroblock shall be marked as */ |
146 | | /* not available. */ |
147 | | /* 1. mbAddr < 0 */ |
148 | | /* 2 mbAddr > CurrMbAddr */ |
149 | | /* 3. the macroblock with address mbAddr belongs to a different slice */ |
150 | | /* than the macroblock with address CurrMbAddr */ |
151 | | /**********************************************************************/ |
152 | | |
153 | | /* left macroblock availability */ |
154 | 6.00M | if (i4_mb_x == 0) |
155 | 236k | { /* macroblocks along first column */ |
156 | 236k | ps_ngbr_avbl->u1_mb_a = 0; |
157 | 236k | } |
158 | 5.77M | else |
159 | 5.77M | { /* macroblocks belong to same slice? */ |
160 | 5.77M | if (*pu1_slice_idx_a != *pu1_slice_idx_curr) |
161 | 0 | ps_ngbr_avbl->u1_mb_a = 0; |
162 | 5.77M | else |
163 | 5.77M | ps_ngbr_avbl->u1_mb_a = 1; |
164 | 5.77M | } |
165 | | |
166 | | /* top macroblock availability */ |
167 | 6.00M | if (i4_mb_y == 0) |
168 | 261k | { /* macroblocks along first row */ |
169 | 261k | ps_ngbr_avbl->u1_mb_b = 0; |
170 | 261k | } |
171 | 5.74M | else |
172 | 5.74M | { /* macroblocks belong to same slice? */ |
173 | 5.74M | if (*pu1_slice_idx_b != *pu1_slice_idx_curr) |
174 | 0 | ps_ngbr_avbl->u1_mb_b = 0; |
175 | 5.74M | else |
176 | 5.74M | ps_ngbr_avbl->u1_mb_b = 1; |
177 | 5.74M | } |
178 | | |
179 | | /* top right macroblock availability */ |
180 | 6.00M | if (i4_mb_x == i4_wd_mbs-1 || i4_mb_y == 0) |
181 | 426k | { /* macroblocks along last column */ |
182 | 426k | ps_ngbr_avbl->u1_mb_c = 0; |
183 | 426k | } |
184 | 5.58M | else |
185 | 5.58M | { /* macroblocks belong to same slice? */ |
186 | 5.58M | if (*pu1_slice_idx_c != *pu1_slice_idx_curr) |
187 | 0 | ps_ngbr_avbl->u1_mb_c = 0; |
188 | 5.58M | else |
189 | 5.58M | ps_ngbr_avbl->u1_mb_c = 1; |
190 | 5.58M | } |
191 | | |
192 | | /* top left macroblock availability */ |
193 | 6.00M | if (i4_mb_x == 0 || i4_mb_y == 0) |
194 | 426k | { /* macroblocks along first column */ |
195 | 426k | ps_ngbr_avbl->u1_mb_d = 0; |
196 | 426k | } |
197 | 5.58M | else |
198 | 5.58M | { /* macroblocks belong to same slice? */ |
199 | 5.58M | if (*pu1_slice_idx_d != *pu1_slice_idx_curr) |
200 | 0 | ps_ngbr_avbl->u1_mb_d = 0; |
201 | 5.58M | else |
202 | 5.58M | ps_ngbr_avbl->u1_mb_d = 1; |
203 | 5.58M | } |
204 | 6.00M | } |
205 | | |
206 | | /** |
207 | | ****************************************************************************** |
208 | | * |
209 | | * @brief |
210 | | * derivation process for subblock/partition availability |
211 | | * |
212 | | * @par Description |
213 | | * Calculates the availability of the left, top, topright and topleft subblock |
214 | | * or partitions. |
215 | | * |
216 | | * @param[in] ps_proc_ctxt |
217 | | * pointer to macroblock context (handle) |
218 | | * |
219 | | * @param[in] i1_pel_pos_x |
220 | | * column position of the pel wrt the current block |
221 | | * |
222 | | * @param[in] i1_pel_pos_y |
223 | | * row position of the pel in wrt current block |
224 | | * |
225 | | * @remarks Assumptions: before calling this function it is assumed that |
226 | | * the neighbor availability of the current macroblock is already derived. |
227 | | * Based on table 6-3 of H264 specification |
228 | | * |
229 | | * @return availability status (yes or no) |
230 | | * |
231 | | ****************************************************************************** |
232 | | */ |
233 | | UWORD8 ih264e_derive_ngbr_avbl_of_mb_partitions(block_neighbors_t *ps_ngbr_avbl, |
234 | | WORD8 i1_pel_pos_x, |
235 | | WORD8 i1_pel_pos_y) |
236 | 0 | { |
237 | 0 | UWORD8 u1_neighbor_avail=0; |
238 | | |
239 | | /**********************************************************************/ |
240 | | /* values of i1_pel_pos_x in the range 0-15 inclusive correspond to */ |
241 | | /* various columns of a macroblock */ |
242 | | /* */ |
243 | | /* values of i1_pel_pos_y in the range 0-15 inclusive correspond to */ |
244 | | /* various rows of a macroblock */ |
245 | | /* */ |
246 | | /* other values of i1_pel_pos_x & i1_pel_pos_y represents elements */ |
247 | | /* outside the bound of an mb ie., represents its neighbors. */ |
248 | | /**********************************************************************/ |
249 | 0 | if (i1_pel_pos_x < 0) |
250 | 0 | { /* column(-1) */ |
251 | 0 | if (i1_pel_pos_y < 0) |
252 | 0 | { /* row(-1) */ |
253 | 0 | u1_neighbor_avail = ps_ngbr_avbl->u1_mb_d; /* current mb topleft availability */ |
254 | 0 | } |
255 | 0 | else if (i1_pel_pos_y >= 0 && i1_pel_pos_y < 16) |
256 | 0 | { /* all rows of a macroblock */ |
257 | 0 | u1_neighbor_avail = ps_ngbr_avbl->u1_mb_a; /* current mb left availability */ |
258 | 0 | } |
259 | 0 | else /* if (i1_pel_pos_y >= 16) */ |
260 | 0 | { /* rows(+16) */ |
261 | 0 | u1_neighbor_avail = 0; /* current mb bottom left availability */ |
262 | 0 | } |
263 | 0 | } |
264 | 0 | else if (i1_pel_pos_x >= 0 && i1_pel_pos_x < 16) |
265 | 0 | { /* all columns of a macroblock */ |
266 | 0 | if (i1_pel_pos_y < 0) |
267 | 0 | { /* row(-1) */ |
268 | 0 | u1_neighbor_avail = ps_ngbr_avbl->u1_mb_b; /* current mb top availability */ |
269 | 0 | } |
270 | 0 | else if (i1_pel_pos_y >= 0 && i1_pel_pos_y < 16) |
271 | 0 | { /* all rows of a macroblock */ |
272 | 0 | u1_neighbor_avail = 1; /* current mb availability */ |
273 | | /* availability of the partition is dependent on the position of the partition inside the mb */ |
274 | | /* although the availability is declared as 1 in all cases these needs to be corrected somewhere else and this is not done in here */ |
275 | 0 | } |
276 | 0 | else /* if (i1_pel_pos_y >= 16) */ |
277 | 0 | { /* rows(+16) */ |
278 | 0 | u1_neighbor_avail = 0; /* current mb bottom availability */ |
279 | 0 | } |
280 | 0 | } |
281 | 0 | else if (i1_pel_pos_x >= 16) |
282 | 0 | { /* column(+16) */ |
283 | 0 | if (i1_pel_pos_y < 0) |
284 | 0 | { /* row(-1) */ |
285 | 0 | u1_neighbor_avail = ps_ngbr_avbl->u1_mb_c; /* current mb top right availability */ |
286 | 0 | } |
287 | 0 | else /* if (i1_pel_pos_y >= 0) */ |
288 | 0 | { /* all other rows */ |
289 | 0 | u1_neighbor_avail = 0; /* current mb right & bottom right availability */ |
290 | 0 | } |
291 | 0 | } |
292 | |
|
293 | 0 | return u1_neighbor_avail; |
294 | 0 | } |
295 | | |
296 | | /** |
297 | | ****************************************************************************** |
298 | | * |
299 | | * @brief |
300 | | * evaluate best intra 16x16 mode (rate distortion opt off) |
301 | | * |
302 | | * @par Description |
303 | | * This function evaluates all the possible intra 16x16 modes and finds the mode |
304 | | * that best represents the macro-block (least distortion) and occupies fewer |
305 | | * bits in the bit-stream. |
306 | | * |
307 | | * @param[in] ps_proc_ctxt |
308 | | * pointer to process context (handle) |
309 | | * |
310 | | * @remarks |
311 | | * Ideally the cost of encoding a macroblock is calculated as |
312 | | * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the |
313 | | * input block and the reconstructed block and rate is the number of bits taken |
314 | | * to place the macroblock in the bit-stream. In this routine the rate does not |
315 | | * exactly point to the total number of bits it takes, rather it points to header |
316 | | * bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits |
317 | | * and residual bits fall in to texture bits the number of bits taken to encoding |
318 | | * mbtype is considered as rate, we compute cost. Further we will approximate |
319 | | * the distortion as the deviation b/w input and the predicted block as opposed |
320 | | * to input and reconstructed block. |
321 | | * |
322 | | * NOTE: As per the Document JVT-O079, for intra 16x16 macroblock, |
323 | | * the SAD and cost are one and the same. |
324 | | * |
325 | | * @return none |
326 | | * |
327 | | ****************************************************************************** |
328 | | */ |
329 | | void ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc) |
330 | 4.00M | { |
331 | | /* Codec Context */ |
332 | 4.00M | codec_t *ps_codec = ps_proc->ps_codec; |
333 | | |
334 | | /* SAD(distortion metric) of an 8x8 block */ |
335 | 4.00M | WORD32 i4_mb_distortion = INT_MAX, i4_mb_distortion_least = INT_MAX; |
336 | | |
337 | | /* lambda */ |
338 | 4.00M | UWORD32 u4_lambda = ps_proc->u4_lambda; |
339 | | |
340 | | /* cost = distortion + lambda*rate */ |
341 | 4.00M | WORD32 i4_mb_cost= INT_MAX, i4_mb_cost_least = INT_MAX; |
342 | | |
343 | | /* intra mode */ |
344 | 4.00M | UWORD32 u4_intra_mode, u4_best_intra_16x16_mode = DC_I16x16; |
345 | | |
346 | | /* neighbor pels for intra prediction */ |
347 | 4.00M | UWORD8 *pu1_ngbr_pels_i16 = ps_proc->au1_ngbr_pels; |
348 | | |
349 | | /* neighbor availability */ |
350 | 4.00M | WORD32 i4_ngbr_avbl; |
351 | | |
352 | | /* pointer to src macro block */ |
353 | 4.00M | UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma; |
354 | 4.00M | UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma; |
355 | | |
356 | | /* pointer to prediction macro block */ |
357 | 4.00M | UWORD8 *pu1_pred_mb_intra_16x16 = ps_proc->pu1_pred_mb_intra_16x16; |
358 | 4.00M | UWORD8 *pu1_pred_mb_intra_16x16_plane = ps_proc->pu1_pred_mb_intra_16x16_plane; |
359 | | |
360 | | /* strides */ |
361 | 4.00M | WORD32 i4_src_strd = ps_proc->i4_src_strd; |
362 | 4.00M | WORD32 i4_pred_strd = ps_proc->i4_pred_strd; |
363 | 4.00M | WORD32 i4_rec_strd = ps_proc->i4_rec_strd; |
364 | | |
365 | | /* pointer to neighbors left, top, topleft */ |
366 | 4.00M | UWORD8 *pu1_mb_a = pu1_ref_mb - 1; |
367 | 4.00M | UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd; |
368 | 4.00M | UWORD8 *pu1_mb_d = pu1_mb_b - 1; |
369 | 4.00M | UWORD8 u1_mb_a, u1_mb_b, u1_mb_d; |
370 | | |
371 | | /* valid intra modes map */ |
372 | 4.00M | UWORD32 u4_valid_intra_modes; |
373 | | |
374 | | /* lut for valid intra modes */ |
375 | 4.00M | const UWORD8 u1_valid_intra_modes[8] = {4, 6, 4, 6, 5, 7, 5, 15}; |
376 | | |
377 | | /* temp var */ |
378 | 4.00M | UWORD32 i, u4_enable_fast_sad = 0, offset = 0; |
379 | 4.00M | mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x; |
380 | 4.00M | UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred; |
381 | | |
382 | | /* init temp var */ |
383 | 4.00M | if (ps_proc->i4_slice_type != ISLICE) |
384 | 113k | { |
385 | | /* Offset for MBtype */ |
386 | 113k | offset = (ps_proc->i4_slice_type == PSLICE) ? 5 : 23; |
387 | 113k | u4_enable_fast_sad = ps_proc->s_me_ctxt.u4_enable_fast_sad; |
388 | 113k | } |
389 | | |
390 | | /* locating neighbors that are available for prediction */ |
391 | | |
392 | | /* gather prediction pels from the neighbors, if particular set is not available |
393 | | * it is set to zero*/ |
394 | | /* left pels */ |
395 | 4.00M | u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a) |
396 | 3.90M | && (u4_constrained_intra_pred ? ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1)); |
397 | 4.00M | if (u1_mb_a) |
398 | 3.88M | { |
399 | 65.4M | for(i = 0; i < 16; i++) |
400 | 61.6M | pu1_ngbr_pels_i16[16-1-i] = pu1_mb_a[i * i4_rec_strd]; |
401 | 3.88M | } |
402 | 125k | else |
403 | 125k | { |
404 | 125k | ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_i16,0,MB_SIZE); |
405 | 125k | } |
406 | | /* top pels */ |
407 | 4.00M | u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) |
408 | 3.88M | && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1)); |
409 | 4.00M | if (u1_mb_b) |
410 | 3.86M | { |
411 | 3.86M | ps_codec->pf_mem_cpy_mul8(pu1_ngbr_pels_i16+16+1,pu1_mb_b,16); |
412 | 3.86M | } |
413 | 142k | else |
414 | 142k | { |
415 | 142k | ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_i16+16+1,0,MB_SIZE); |
416 | 142k | } |
417 | | /* topleft pels */ |
418 | 4.00M | u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) |
419 | 3.81M | && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1)); |
420 | 4.00M | if (u1_mb_d) |
421 | 3.80M | { |
422 | 3.80M | pu1_ngbr_pels_i16[16] = *pu1_mb_d; |
423 | 3.80M | } |
424 | 201k | else |
425 | 201k | { |
426 | 201k | pu1_ngbr_pels_i16[16] = 0; |
427 | 201k | } |
428 | | |
429 | 4.00M | i4_ngbr_avbl = (u1_mb_a) + (u1_mb_b << 2) + (u1_mb_d << 1); |
430 | 4.00M | ps_proc->i4_ngbr_avbl_16x16_mb = i4_ngbr_avbl; |
431 | | |
432 | | /* set valid intra modes for evaluation */ |
433 | 4.00M | u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl]; |
434 | | |
435 | 4.00M | if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST || |
436 | 3.72M | ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST) |
437 | 362k | u4_valid_intra_modes &= ~(1 << PLANE_I16x16); |
438 | | |
439 | | /* evaluate b/w HORZ_I16x16, VERT_I16x16 & DC_I16x16 */ |
440 | 4.00M | ps_codec->pf_ih264e_evaluate_intra16x16_modes(pu1_curr_mb, pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16, |
441 | 4.00M | i4_src_strd, i4_pred_strd, |
442 | 4.00M | i4_ngbr_avbl, &u4_intra_mode, &i4_mb_distortion_least, |
443 | 4.00M | u4_valid_intra_modes); |
444 | | |
445 | | /* cost = distortion + lambda*rate */ |
446 | 4.00M | i4_mb_cost_least = i4_mb_distortion_least; |
447 | | |
448 | 4.00M | if (((u4_valid_intra_modes >> 3) & 1) != 0) |
449 | 3.47M | { |
450 | | /* intra prediction for PLANE mode*/ |
451 | 3.47M | (ps_codec->apf_intra_pred_16_l)[PLANE_I16x16](pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16_plane, 0, i4_pred_strd, i4_ngbr_avbl); |
452 | | |
453 | | /* evaluate distortion between the actual blk and the estimated blk for the given mode */ |
454 | 3.47M | ps_codec->apf_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_pred_mb_intra_16x16_plane, i4_src_strd, i4_pred_strd, i4_mb_cost_least, &i4_mb_distortion); |
455 | | |
456 | | /* cost = distortion + lambda*rate */ |
457 | 3.47M | i4_mb_cost = i4_mb_distortion; |
458 | | |
459 | | /* update the least cost information if necessary */ |
460 | 3.47M | if(i4_mb_cost < i4_mb_distortion_least) |
461 | 6.35k | { |
462 | 6.35k | u4_intra_mode = PLANE_I16x16; |
463 | | |
464 | 6.35k | i4_mb_cost_least = i4_mb_cost; |
465 | 6.35k | i4_mb_distortion_least = i4_mb_distortion; |
466 | 6.35k | } |
467 | 3.47M | } |
468 | | |
469 | 4.00M | u4_best_intra_16x16_mode = u4_intra_mode; |
470 | | |
471 | 4.00M | DEBUG("%d partition cost, %d intra mode\n", i4_mb_cost_least * 32, u4_best_intra_16x16_mode); |
472 | | |
473 | 4.00M | ps_proc->u1_l_i16_mode = u4_best_intra_16x16_mode; |
474 | | |
475 | | /* cost = distortion + lambda*rate */ |
476 | 4.00M | i4_mb_cost_least = i4_mb_distortion_least + u4_lambda*u1_uev_codelength[offset + u4_best_intra_16x16_mode]; |
477 | | |
478 | | |
479 | | /* update the type of the mb if necessary */ |
480 | 4.00M | if (i4_mb_cost_least < ps_proc->i4_mb_cost) |
481 | 3.19M | { |
482 | 3.19M | ps_proc->i4_mb_cost = i4_mb_cost_least; |
483 | 3.19M | ps_proc->i4_mb_distortion = i4_mb_distortion_least; |
484 | 3.19M | ps_proc->u4_mb_type = I16x16; |
485 | 3.19M | } |
486 | 4.00M | if (i4_mb_cost_least < ps_proc->i4_mb_intra_cost) |
487 | 3.25M | { |
488 | 3.25M | ps_proc->i4_mb_intra_cost = i4_mb_cost_least; |
489 | 3.25M | } |
490 | | |
491 | 4.00M | return ; |
492 | 4.00M | } |
493 | | |
494 | | |
495 | | /** |
496 | | ****************************************************************************** |
497 | | * |
498 | | * @brief |
499 | | * evaluate best intra 8x8 mode (rate distortion opt on) |
500 | | * |
501 | | * @par Description |
502 | | * This function evaluates all the possible intra 8x8 modes and finds the mode |
503 | | * that best represents the macro-block (least distortion) and occupies fewer |
504 | | * bits in the bit-stream. |
505 | | * |
506 | | * @param[in] ps_proc_ctxt |
507 | | * pointer to proc ctxt |
508 | | * |
509 | | * @remarks Ideally the cost of encoding a macroblock is calculated as |
510 | | * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the |
511 | | * input block and the reconstructed block and rate is the number of bits taken |
512 | | * to place the macroblock in the bit-stream. In this routine the rate does not |
513 | | * exactly point to the total number of bits it takes, rather it points to header |
514 | | * bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits |
515 | | * and residual bits fall in to texture bits the number of bits taken to encoding |
516 | | * mbtype is considered as rate, we compute cost. Further we will approximate |
517 | | * the distortion as the deviation b/w input and the predicted block as opposed |
518 | | * to input and reconstructed block. |
519 | | * |
520 | | * NOTE: TODO: This function needs to be tested |
521 | | * |
522 | | * @return none |
523 | | * |
524 | | ****************************************************************************** |
525 | | */ |
526 | | void ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc) |
527 | 0 | { |
528 | | /* Codec Context */ |
529 | 0 | codec_t *ps_codec = ps_proc->ps_codec; |
530 | | |
531 | | /* SAD(distortion metric) of an 4x4 block */ |
532 | 0 | WORD32 i4_partition_distortion, i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0; |
533 | | |
534 | | /* lambda */ |
535 | 0 | UWORD32 u4_lambda = ps_proc->u4_lambda; |
536 | | |
537 | | /* cost = distortion + lambda*rate */ |
538 | 0 | WORD32 i4_partition_cost, i4_partition_cost_least, i4_total_cost = u4_lambda; |
539 | | |
540 | | /* cost due to mbtype */ |
541 | 0 | UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda; |
542 | | |
543 | | /* intra mode */ |
544 | 0 | UWORD32 u4_intra_mode, u4_best_intra_8x8_mode = DC_I8x8, u4_estimated_intra_8x8_mode; |
545 | | |
546 | | /* neighbor pels for intra prediction */ |
547 | 0 | UWORD8 *pu1_ngbr_pels_i8 = ps_proc->au1_ngbr_pels; |
548 | | |
549 | | /* pointer to curr partition */ |
550 | 0 | UWORD8 *pu1_mb_curr; |
551 | | |
552 | | /* pointer to prediction macro block */ |
553 | 0 | UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; |
554 | | |
555 | | /* strides */ |
556 | 0 | WORD32 i4_src_strd = ps_proc->i4_src_strd; |
557 | 0 | WORD32 i4_pred_strd = ps_proc->i4_pred_strd; |
558 | | |
559 | | /* neighbors left, top, top right, top left */ |
560 | 0 | UWORD8 *pu1_mb_a; |
561 | 0 | UWORD8 *pu1_mb_b; |
562 | 0 | UWORD8 *pu1_mb_d; |
563 | | |
564 | | /* neighbor availability */ |
565 | 0 | WORD32 i4_ngbr_avbl; |
566 | 0 | block_neighbors_t s_ngbr_avbl; |
567 | | |
568 | | /* temp vars */ |
569 | 0 | UWORD32 b8, u4_pix_x, u4_pix_y; |
570 | 0 | UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred; |
571 | 0 | block_neighbors_t s_ngbr_avbl_MB; |
572 | | |
573 | | /* ngbr mb syntax information */ |
574 | 0 | UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4); |
575 | 0 | mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x; |
576 | 0 | mb_info_t *ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x; |
577 | | |
578 | | /* valid intra modes map */ |
579 | 0 | UWORD32 u4_valid_intra_modes; |
580 | |
|
581 | 0 | if (ps_proc->ps_ngbr_avbl->u1_mb_c) |
582 | 0 | { |
583 | 0 | ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + (ps_proc->i4_mb_x + 1); |
584 | 0 | } |
585 | | /* left pels */ |
586 | 0 | s_ngbr_avbl_MB.u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a) |
587 | 0 | && (u4_constrained_intra_pred ? ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1)); |
588 | | |
589 | | /* top pels */ |
590 | 0 | s_ngbr_avbl_MB.u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) |
591 | 0 | && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1)); |
592 | | |
593 | | /* topleft pels */ |
594 | 0 | s_ngbr_avbl_MB.u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) |
595 | 0 | && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1)); |
596 | | |
597 | | /* top right */ |
598 | 0 | s_ngbr_avbl_MB.u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c) |
599 | 0 | && (u4_constrained_intra_pred ? ps_top_right_mb_syn_ele->u2_is_intra : 1)); |
600 | | |
601 | |
|
602 | 0 | for (b8 = 0; b8 < 4; b8++) |
603 | 0 | { |
604 | 0 | u4_pix_x = (b8 & 0x01) << 3; |
605 | 0 | u4_pix_y = (b8 >> 1) << 3; |
606 | |
|
607 | 0 | pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd); |
608 | | /* when rdopt is off, we use the input as reference for constructing prediction buffer */ |
609 | | /* as opposed to using the recon pels. (open loop intra prediction) */ |
610 | 0 | pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */ |
611 | 0 | pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */ |
612 | 0 | pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */ |
613 | | |
614 | | /* locating neighbors that are available for prediction */ |
615 | | /* TODO : update the neighbor availability information basing on constrained intra pred information */ |
616 | | /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */ |
617 | | /* basing on neighbors available and hence evade the computation of neighbor availability totally. */ |
618 | 0 | s_ngbr_avbl.u1_mb_a = ih264e_derive_ngbr_avbl_of_mb_partitions(&s_ngbr_avbl_MB, u4_pix_x - 1, u4_pix_y); /* xD = -1, yD = 0 */ |
619 | 0 | s_ngbr_avbl.u1_mb_b = ih264e_derive_ngbr_avbl_of_mb_partitions(&s_ngbr_avbl_MB, u4_pix_x, u4_pix_y - 1); /* xD = 0, yD = -1 */ |
620 | 0 | s_ngbr_avbl.u1_mb_c = ih264e_derive_ngbr_avbl_of_mb_partitions(&s_ngbr_avbl_MB, u4_pix_x + 8, u4_pix_y - 1); /* xD = BLK_8x8_SIZE, yD = -1 */ |
621 | 0 | s_ngbr_avbl.u1_mb_d = ih264e_derive_ngbr_avbl_of_mb_partitions(&s_ngbr_avbl_MB, u4_pix_x - 1, u4_pix_y - 1); /* xD = -1, yD = -1 */ |
622 | | |
623 | | /* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b * TOP_MB_AVAILABLE_MASK + blk_c * TOP_RIGHT_MB_AVAILABLE_MASK + blk_d * TOP_LEFT_MB_AVAILABLE_MASK */ |
624 | 0 | i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + (s_ngbr_avbl.u1_mb_c << 3) + |
625 | 0 | (s_ngbr_avbl.u1_mb_a << 4); |
626 | | /* if top partition is available and top right is not available for intra prediction, then */ |
627 | | /* padd top right samples using top sample and make top right also available */ |
628 | | /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */ |
629 | 0 | ps_proc->ai4_neighbor_avail_8x8_subblks[b8] = i4_ngbr_avbl; |
630 | | |
631 | |
|
632 | 0 | ih264_intra_pred_luma_8x8_mode_ref_filtering(pu1_mb_a, pu1_mb_b, pu1_mb_d, pu1_ngbr_pels_i8, |
633 | 0 | i4_src_strd, i4_ngbr_avbl); |
634 | |
|
635 | 0 | i4_partition_cost_least = INT_MAX; |
636 | | /* set valid intra modes for evaluation */ |
637 | 0 | u4_valid_intra_modes = 0x1ff; |
638 | |
|
639 | 0 | if (!s_ngbr_avbl.u1_mb_b) |
640 | 0 | { |
641 | 0 | u4_valid_intra_modes &= ~(1 << VERT_I4x4); |
642 | 0 | u4_valid_intra_modes &= ~(1 << DIAG_DL_I4x4); |
643 | 0 | u4_valid_intra_modes &= ~(1 << VERT_L_I4x4); |
644 | 0 | } |
645 | 0 | if (!s_ngbr_avbl.u1_mb_a) |
646 | 0 | { |
647 | 0 | u4_valid_intra_modes &= ~(1 << HORZ_I4x4); |
648 | 0 | u4_valid_intra_modes &= ~(1 << HORZ_U_I4x4); |
649 | 0 | } |
650 | 0 | if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b || !s_ngbr_avbl.u1_mb_d) |
651 | 0 | { |
652 | 0 | u4_valid_intra_modes &= ~(1 << DIAG_DR_I4x4); |
653 | 0 | u4_valid_intra_modes &= ~(1 << VERT_R_I4x4); |
654 | 0 | u4_valid_intra_modes &= ~(1 << HORZ_D_I4x4); |
655 | 0 | } |
656 | | |
657 | | /* estimate the intra 8x8 mode for the current partition (for evaluating cost) */ |
658 | 0 | if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b) |
659 | 0 | { |
660 | 0 | u4_estimated_intra_8x8_mode = DC_I8x8; |
661 | 0 | } |
662 | 0 | else |
663 | 0 | { |
664 | 0 | UWORD32 u4_left_intra_8x8_mode = DC_I8x8; |
665 | 0 | UWORD32 u4_top_intra_8x8_mode = DC_I8x8; |
666 | |
|
667 | 0 | if (u4_pix_x == 0) |
668 | 0 | { |
669 | 0 | if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8) |
670 | 0 | { |
671 | 0 | u4_left_intra_8x8_mode = ps_proc->au1_left_mb_intra_modes[b8+1]; |
672 | 0 | } |
673 | 0 | else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4) |
674 | 0 | { |
675 | 0 | u4_left_intra_8x8_mode = ps_proc->au1_left_mb_intra_modes[(b8+1)*4+2]; |
676 | 0 | } |
677 | 0 | } |
678 | 0 | else |
679 | 0 | { |
680 | 0 | u4_left_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8-1]; |
681 | 0 | } |
682 | |
|
683 | 0 | if (u4_pix_y == 0) |
684 | 0 | { |
685 | 0 | if (ps_top_mb_syn_ele->u2_mb_type == I8x8) |
686 | 0 | { |
687 | 0 | u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[b8+2]; |
688 | 0 | } |
689 | 0 | else if (ps_top_mb_syn_ele->u2_mb_type == I4x4) |
690 | 0 | { |
691 | 0 | u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[(b8+2)*4+2]; |
692 | 0 | } |
693 | 0 | } |
694 | 0 | else |
695 | 0 | { |
696 | 0 | u4_top_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8-2]; |
697 | 0 | } |
698 | |
|
699 | 0 | u4_estimated_intra_8x8_mode = MIN(u4_left_intra_8x8_mode, u4_top_intra_8x8_mode); |
700 | 0 | } |
701 | | |
702 | | /* perform intra mode 8x8 evaluation */ |
703 | 0 | for (u4_intra_mode = VERT_I8x8; u4_valid_intra_modes != 0; u4_intra_mode++, u4_valid_intra_modes >>= 1) |
704 | 0 | { |
705 | 0 | if ( (u4_valid_intra_modes & 1) == 0) |
706 | 0 | continue; |
707 | | |
708 | | /* intra prediction */ |
709 | 0 | (ps_codec->apf_intra_pred_8_l)[u4_intra_mode](pu1_ngbr_pels_i8, pu1_pred_mb, 0, i4_pred_strd, i4_ngbr_avbl); |
710 | | |
711 | | /* evaluate distortion between the actual blk and the estimated blk for the given mode */ |
712 | 0 | ime_compute_sad_8x8(pu1_mb_curr, pu1_pred_mb, i4_src_strd, i4_pred_strd, i4_partition_cost_least, &i4_partition_distortion); |
713 | |
|
714 | 0 | i4_partition_cost = i4_partition_distortion + ((u4_estimated_intra_8x8_mode == u4_intra_mode)?u4_cost_one_bit:u4_cost_four_bits); |
715 | | |
716 | | /* update the least cost information if necessary */ |
717 | 0 | if (i4_partition_cost < i4_partition_cost_least) |
718 | 0 | { |
719 | 0 | i4_partition_cost_least = i4_partition_cost; |
720 | 0 | i4_partition_distortion_least = i4_partition_distortion; |
721 | 0 | u4_best_intra_8x8_mode = u4_intra_mode; |
722 | 0 | } |
723 | 0 | } |
724 | | /* macroblock distortion */ |
725 | 0 | i4_total_cost += i4_partition_cost_least; |
726 | 0 | i4_total_distortion += i4_partition_distortion_least; |
727 | | /* mb partition mode */ |
728 | 0 | ps_proc->au1_intra_luma_mb_8x8_modes[b8] = u4_best_intra_8x8_mode; |
729 | |
|
730 | 0 | } |
731 | | |
732 | | /* update the type of the mb if necessary */ |
733 | 0 | if (i4_total_cost < ps_proc->i4_mb_cost) |
734 | 0 | { |
735 | 0 | ps_proc->i4_mb_cost = i4_total_cost; |
736 | 0 | ps_proc->i4_mb_distortion = i4_total_distortion; |
737 | 0 | ps_proc->u4_mb_type = I8x8; |
738 | 0 | } |
739 | 0 | if (i4_total_cost < ps_proc->i4_mb_intra_cost) |
740 | 0 | { |
741 | 0 | ps_proc->i4_mb_intra_cost = i4_total_cost; |
742 | 0 | } |
743 | |
|
744 | 0 | return ; |
745 | 0 | } |
746 | | |
747 | | |
748 | | /** |
749 | | ****************************************************************************** |
750 | | * |
751 | | * @brief |
752 | | * evaluate best intra 4x4 mode (rate distortion opt off) |
753 | | * |
754 | | * @par Description |
755 | | * This function evaluates all the possible intra 4x4 modes and finds the mode |
756 | | * that best represents the macro-block (least distortion) and occupies fewer |
757 | | * bits in the bit-stream. |
758 | | * |
759 | | * @param[in] ps_proc_ctxt |
760 | | * pointer to proc ctxt |
761 | | * |
762 | | * @remarks |
763 | | * Ideally the cost of encoding a macroblock is calculated as |
764 | | * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the |
765 | | * input block and the reconstructed block and rate is the number of bits taken |
766 | | * to place the macroblock in the bit-stream. In this routine the rate does not |
767 | | * exactly point to the total number of bits it takes, rather it points to header |
768 | | * bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits |
769 | | * and residual bits fall in to texture bits the number of bits taken to encoding |
770 | | * mbtype is considered as rate, we compute cost. Further we will approximate |
771 | | * the distortion as the deviation b/w input and the predicted block as opposed |
772 | | * to input and reconstructed block. |
773 | | * |
774 | | * NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock, |
775 | | * 24*lambda is added to the SAD before comparison with the best SAD for |
776 | | * inter prediction. This is an empirical value to prevent using too many intra |
777 | | * blocks. |
778 | | * |
779 | | * @return none |
780 | | * |
781 | | ****************************************************************************** |
782 | | */ |
783 | | void ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc) |
784 | 2.86M | { |
785 | | /* Codec Context */ |
786 | 2.86M | codec_t *ps_codec = ps_proc->ps_codec; |
787 | | |
788 | | /* SAD(distortion metric) of an 4x4 block */ |
789 | 2.86M | WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0; |
790 | | |
791 | | /* lambda */ |
792 | 2.86M | UWORD32 u4_lambda = ps_proc->u4_lambda; |
793 | | |
794 | | /* cost = distortion + lambda*rate */ |
795 | 2.86M | WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda; |
796 | | |
797 | | /* cost due to mbtype */ |
798 | 2.86M | UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda; |
799 | | |
800 | | /* intra mode */ |
801 | 2.86M | UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode; |
802 | | |
803 | | /* neighbor pels for intra prediction */ |
804 | 2.86M | UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels; |
805 | | |
806 | | /* pointer to curr partition */ |
807 | 2.86M | UWORD8 *pu1_mb_curr; |
808 | | |
809 | | /* pointer to prediction macro block */ |
810 | 2.86M | UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; |
811 | | |
812 | | /* strides */ |
813 | 2.86M | WORD32 i4_src_strd = ps_proc->i4_src_strd; |
814 | 2.86M | WORD32 i4_pred_strd = ps_proc->i4_pred_strd; |
815 | | |
816 | | /* neighbors left, top, top right, top left */ |
817 | 2.86M | UWORD8 *pu1_mb_a; |
818 | 2.86M | UWORD8 *pu1_mb_b; |
819 | 2.86M | UWORD8 *pu1_mb_c; |
820 | 2.86M | UWORD8 *pu1_mb_d; |
821 | | |
822 | | /* neighbor availability */ |
823 | 2.86M | WORD32 i4_ngbr_avbl; |
824 | 2.86M | block_neighbors_t s_ngbr_avbl; |
825 | | |
826 | | /* temp vars */ |
827 | 2.86M | UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y; |
828 | | |
829 | | /* scan order inside 4x4 block */ |
830 | 2.86M | const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}; |
831 | | |
832 | | /* ngbr sub mb modes */ |
833 | 2.86M | UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4); |
834 | 2.86M | mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x; |
835 | 2.86M | mb_info_t *ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x; |
836 | | |
837 | | /* valid intra modes map */ |
838 | 2.86M | UWORD32 u4_valid_intra_modes; |
839 | 2.86M | UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511}; |
840 | | |
841 | 2.86M | UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred; |
842 | 2.86M | UWORD8 u1_mb_a, u1_mb_b, u1_mb_c, u1_mb_d; |
843 | | |
844 | 2.86M | if (ps_proc->ps_ngbr_avbl->u1_mb_c) |
845 | 2.75M | { |
846 | 2.75M | ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x + 1; |
847 | 2.75M | } |
848 | | /* left pels */ |
849 | 2.86M | u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a) |
850 | 2.79M | && (u4_constrained_intra_pred ? ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1)); |
851 | | |
852 | | /* top pels */ |
853 | 2.86M | u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) |
854 | 2.80M | && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1)); |
855 | | |
856 | | /* topleft pels */ |
857 | 2.86M | u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) |
858 | 2.75M | && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1)); |
859 | | |
860 | | /* top right */ |
861 | 2.86M | u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c) |
862 | 2.75M | && (u4_constrained_intra_pred ? ps_top_right_mb_syn_ele->u2_is_intra : 1)); |
863 | | |
864 | 2.86M | i4_ngbr_avbl = (u1_mb_a) + (u1_mb_d << 1) + (u1_mb_b << 2) + (u1_mb_c << 3); |
865 | 2.86M | memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16); |
866 | | |
867 | 14.2M | for (b8 = 0; b8 < 4; b8++) |
868 | 11.4M | { |
869 | 11.4M | u4_blk_x = (b8 & 0x01) << 3; |
870 | 11.4M | u4_blk_y = (b8 >> 1) << 3; |
871 | 56.0M | for (b4 = 0; b4 < 4; b4++) |
872 | 44.6M | { |
873 | 44.6M | u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2); |
874 | 44.6M | u4_pix_y = u4_blk_y + ((b4 >> 1) << 2); |
875 | | |
876 | 44.6M | pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd); |
877 | | /* when rdopt is off, we use the input as reference for constructing prediction buffer */ |
878 | | /* as opposed to using the recon pels. (open loop intra prediction) */ |
879 | 44.6M | pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */ |
880 | 44.6M | pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */ |
881 | 44.6M | pu1_mb_c = pu1_mb_b + 4; /* pointer to top macro block */ |
882 | 44.6M | pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */ |
883 | | |
884 | | /* locating neighbors that are available for prediction */ |
885 | | /* TODO : update the neighbor availability information basing on constrained intra pred information */ |
886 | | /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */ |
887 | | /* basing on neighbors available and hence evade the computation of neighbor availability totally. */ |
888 | | |
889 | 44.6M | i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4]; |
890 | 44.6M | s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1); |
891 | 44.6M | s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1; |
892 | 44.6M | s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2; |
893 | 44.6M | s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3; |
894 | | /* set valid intra modes for evaluation */ |
895 | 44.6M | u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7]; |
896 | | |
897 | | /* if top partition is available and top right is not available for intra prediction, then */ |
898 | | /* padd top right samples using top sample and make top right also available */ |
899 | | /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */ |
900 | | |
901 | | /* gather prediction pels from the neighbors */ |
902 | 44.6M | if (s_ngbr_avbl.u1_mb_a) |
903 | 44.4M | { |
904 | 219M | for(i = 0; i < 4; i++) |
905 | 175M | pu1_ngbr_pels_i4[4 - 1 -i] = pu1_mb_a[i * i4_src_strd]; |
906 | 44.4M | } |
907 | 177k | else |
908 | 177k | { |
909 | 177k | memset(pu1_ngbr_pels_i4, 0, 4); |
910 | 177k | } |
911 | | |
912 | 44.6M | if (s_ngbr_avbl.u1_mb_b) |
913 | 44.1M | { |
914 | 44.1M | memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4); |
915 | 44.1M | } |
916 | 549k | else |
917 | 549k | { |
918 | 549k | memset(pu1_ngbr_pels_i4 + 5, 0, 4); |
919 | 549k | } |
920 | | |
921 | 44.6M | if (s_ngbr_avbl.u1_mb_d) |
922 | 43.7M | pu1_ngbr_pels_i4[4] = *pu1_mb_d; |
923 | 906k | else |
924 | 906k | pu1_ngbr_pels_i4[4] = 0; |
925 | | |
926 | 44.6M | if (s_ngbr_avbl.u1_mb_c) |
927 | 30.3M | { |
928 | 30.3M | memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4); |
929 | 30.3M | } |
930 | 14.2M | else if (s_ngbr_avbl.u1_mb_b) |
931 | 14.2M | { |
932 | 14.2M | memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4); |
933 | 14.2M | s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b; |
934 | 14.2M | } |
935 | | |
936 | 44.6M | i4_partition_cost_least = INT_MAX; |
937 | | |
938 | | /* predict the intra 4x4 mode for the current partition (for evaluating cost) */ |
939 | 44.6M | if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b) |
940 | 509k | { |
941 | 509k | u4_estimated_intra_4x4_mode = DC_I4x4; |
942 | 509k | } |
943 | 44.1M | else |
944 | 44.1M | { |
945 | 44.1M | UWORD32 u4_left_intra_4x4_mode = DC_I4x4; |
946 | 44.1M | UWORD32 u4_top_intra_4x4_mode = DC_I4x4; |
947 | | |
948 | 44.1M | if (u4_pix_x == 0) |
949 | 11.0M | { |
950 | 11.0M | if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4) |
951 | 7.78M | { |
952 | 7.78M | u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[u1_scan_order[3 + u4_pix_y]]; |
953 | 7.78M | } |
954 | 3.21M | else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8) |
955 | 0 | { |
956 | 0 | u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[b8 + 1]; |
957 | 0 | } |
958 | 11.0M | } |
959 | 33.1M | else |
960 | 33.1M | { |
961 | 33.1M | u4_left_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 1]]; |
962 | 33.1M | } |
963 | | |
964 | 44.1M | if (u4_pix_y == 0) |
965 | 10.9M | { |
966 | 10.9M | if (ps_top_mb_syn_ele->u2_mb_type == I4x4) |
967 | 7.76M | { |
968 | 7.76M | u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[u1_scan_order[12 + (u4_pix_x >> 2)]]; |
969 | 7.76M | } |
970 | 3.20M | else if (ps_top_mb_syn_ele->u2_mb_type == I8x8) |
971 | 0 | { |
972 | 0 | u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2]; |
973 | 0 | } |
974 | 10.9M | } |
975 | 33.1M | else |
976 | 33.1M | { |
977 | 33.1M | u4_top_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 4]]; |
978 | 33.1M | } |
979 | | |
980 | 44.1M | u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode); |
981 | 44.1M | } |
982 | | |
983 | 44.6M | ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_estimated_intra_4x4_mode; |
984 | | |
985 | | /* mode evaluation and prediction */ |
986 | 44.6M | ps_codec->pf_ih264e_evaluate_intra_4x4_modes(pu1_mb_curr, |
987 | 44.6M | pu1_ngbr_pels_i4, |
988 | 44.6M | pu1_pred_mb, i4_src_strd, |
989 | 44.6M | i4_pred_strd, i4_ngbr_avbl, |
990 | 44.6M | &u4_best_intra_4x4_mode, |
991 | 44.6M | &i4_partition_cost_least, |
992 | 44.6M | u4_valid_intra_modes, |
993 | 44.6M | u4_lambda, |
994 | 44.6M | u4_estimated_intra_4x4_mode); |
995 | | |
996 | | |
997 | 44.6M | i4_partition_distortion_least = i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode) ? u4_cost_one_bit : u4_cost_four_bits); |
998 | | |
999 | 44.6M | DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least, u4_best_intra_4x4_mode); |
1000 | | /* macroblock distortion */ |
1001 | 44.6M | i4_total_distortion += i4_partition_distortion_least; |
1002 | 44.6M | i4_total_cost += i4_partition_cost_least; |
1003 | | /* mb partition mode */ |
1004 | 44.6M | ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode; |
1005 | 44.6M | } |
1006 | 11.4M | } |
1007 | | |
1008 | | /* update the type of the mb if necessary */ |
1009 | 2.86M | if (i4_total_cost < ps_proc->i4_mb_cost) |
1010 | 2.87M | { |
1011 | 2.87M | ps_proc->i4_mb_cost = i4_total_cost; |
1012 | 2.87M | ps_proc->i4_mb_distortion = i4_total_distortion; |
1013 | 2.87M | ps_proc->u4_mb_type = I4x4; |
1014 | 2.87M | } |
1015 | 2.86M | if (i4_total_cost < ps_proc->i4_mb_intra_cost) |
1016 | 2.86M | { |
1017 | 2.86M | ps_proc->i4_mb_intra_cost = i4_total_cost; |
1018 | 2.86M | } |
1019 | | |
1020 | 2.86M | return ; |
1021 | 2.86M | } |
1022 | | |
1023 | | /** |
1024 | | ****************************************************************************** |
1025 | | * |
1026 | | * @brief evaluate best intra 4x4 mode (rate distortion opt on) |
1027 | | * |
1028 | | * @par Description |
1029 | | * This function evaluates all the possible intra 4x4 modes and finds the mode |
1030 | | * that best represents the macro-block (least distortion) and occupies fewer |
1031 | | * bits in the bit-stream. |
1032 | | * |
1033 | | * @param[in] ps_proc_ctxt |
1034 | | * pointer to proc ctxt |
1035 | | * |
1036 | | * @remarks |
1037 | | * Ideally the cost of encoding a macroblock is calculated as |
1038 | | * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the |
1039 | | * input block and the reconstructed block and rate is the number of bits taken |
1040 | | * to place the macroblock in the bit-stream. In this routine the rate does not |
1041 | | * exactly point to the total number of bits it takes, rather it points to header |
1042 | | * bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits |
1043 | | * and residual bits fall in to texture bits the number of bits taken to encoding |
1044 | | * mbtype is considered as rate, we compute cost. Further we will approximate |
1045 | | * the distortion as the deviation b/w input and the predicted block as opposed |
1046 | | * to input and reconstructed block. |
1047 | | * |
1048 | | * NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock, |
1049 | | * 24*lambda is added to the SAD before comparison with the best SAD for |
1050 | | * inter prediction. This is an empirical value to prevent using too many intra |
1051 | | * blocks. |
1052 | | * |
1053 | | * @return none |
1054 | | * |
1055 | | ****************************************************************************** |
1056 | | */ |
1057 | | void ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(process_ctxt_t *ps_proc) |
1058 | 1.69M | { |
1059 | | /* Codec Context */ |
1060 | 1.69M | codec_t *ps_codec = ps_proc->ps_codec; |
1061 | | |
1062 | | /* SAD(distortion metric) of an 4x4 block */ |
1063 | 1.69M | WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0; |
1064 | | |
1065 | | /* lambda */ |
1066 | 1.69M | UWORD32 u4_lambda = ps_proc->u4_lambda; |
1067 | | |
1068 | | /* cost = distortion + lambda*rate */ |
1069 | 1.69M | WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda; |
1070 | | |
1071 | | /* cost due to mbtype */ |
1072 | 1.69M | UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda; |
1073 | | |
1074 | | /* intra mode */ |
1075 | 1.69M | UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode; |
1076 | | |
1077 | | /* neighbor pels for intra prediction */ |
1078 | 1.69M | UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels; |
1079 | | |
1080 | | /* pointer to curr partition */ |
1081 | 1.69M | UWORD8 *pu1_mb_curr; |
1082 | 1.69M | UWORD8 *pu1_mb_ref_left, *pu1_mb_ref_top; |
1083 | 1.69M | UWORD8 *pu1_ref_mb_intra_4x4; |
1084 | | |
1085 | | /* pointer to residual macro block */ |
1086 | 1.69M | WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4; |
1087 | | |
1088 | | /* pointer to prediction macro block */ |
1089 | 1.69M | UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; |
1090 | | |
1091 | | /* strides */ |
1092 | 1.69M | WORD32 i4_src_strd = ps_proc->i4_src_strd; |
1093 | 1.69M | WORD32 i4_pred_strd = ps_proc->i4_pred_strd; |
1094 | 1.69M | WORD32 i4_ref_strd_left, i4_ref_strd_top; |
1095 | | |
1096 | | /* neighbors left, top, top right, top left */ |
1097 | 1.69M | UWORD8 *pu1_mb_a; |
1098 | 1.69M | UWORD8 *pu1_mb_b; |
1099 | 1.69M | UWORD8 *pu1_mb_c; |
1100 | 1.69M | UWORD8 *pu1_mb_d; |
1101 | | |
1102 | | /* number of non zero coeffs*/ |
1103 | 1.69M | UWORD8 *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz_intra_4x4; |
1104 | | |
1105 | | /* quantization parameters */ |
1106 | 1.69M | quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; |
1107 | | |
1108 | | /* neighbor availability */ |
1109 | 1.69M | WORD32 i4_ngbr_avbl; |
1110 | 1.69M | block_neighbors_t s_ngbr_avbl; |
1111 | | |
1112 | | /* temp vars */ |
1113 | 1.69M | UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y; |
1114 | | |
1115 | | /* scan order inside 4x4 block */ |
1116 | 1.69M | const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}; |
1117 | | |
1118 | | /* ngbr sub mb modes */ |
1119 | 1.69M | UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4); |
1120 | 1.69M | mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x; |
1121 | 1.69M | mb_info_t *ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x; |
1122 | | |
1123 | | /* valid intra modes map */ |
1124 | 1.69M | UWORD32 u4_valid_intra_modes; |
1125 | 1.69M | UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511}; |
1126 | | |
1127 | | /* Dummy variable for 4x4 trans function */ |
1128 | 1.69M | WORD16 i2_dc_dummy; |
1129 | 1.69M | UWORD8 u1_mb_a, u1_mb_b, u1_mb_c, u1_mb_d; |
1130 | 1.69M | UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred; |
1131 | | |
1132 | | /* compute ngbr availability for sub blks */ |
1133 | 1.69M | if (ps_proc->ps_ngbr_avbl->u1_mb_c) |
1134 | 1.62M | { |
1135 | 1.62M | ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + (ps_proc->i4_mb_x + 1); |
1136 | 1.62M | } |
1137 | | |
1138 | | /* left pels */ |
1139 | 1.69M | u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a) |
1140 | 1.66M | && (u4_constrained_intra_pred ? ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1)); |
1141 | | |
1142 | | /* top pels */ |
1143 | 1.69M | u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) |
1144 | 1.65M | && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1)); |
1145 | | |
1146 | | /* topleft pels */ |
1147 | 1.69M | u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) |
1148 | 1.62M | && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1)); |
1149 | | |
1150 | | /* top right pels */ |
1151 | 1.69M | u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c) |
1152 | 1.62M | && (u4_constrained_intra_pred ? ps_top_right_mb_syn_ele->u2_is_intra : 1)); |
1153 | | |
1154 | 1.69M | i4_ngbr_avbl = (u1_mb_a) + (u1_mb_d << 1) + (u1_mb_b << 2) + (u1_mb_c << 3); |
1155 | 1.69M | memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16); |
1156 | | |
1157 | 8.48M | for (b8 = 0; b8 < 4; b8++) |
1158 | 6.78M | { |
1159 | 6.78M | u4_blk_x = (b8 & 0x01) << 3; |
1160 | 6.78M | u4_blk_y = (b8 >> 1) << 3; |
1161 | 33.5M | for (b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_res_mb += MB_SIZE) |
1162 | 26.8M | { |
1163 | 26.8M | u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2); |
1164 | 26.8M | u4_pix_y = u4_blk_y + ((b4 >> 1) << 2); |
1165 | | |
1166 | 26.8M | pu1_ref_mb_intra_4x4 = ps_proc->pu1_ref_mb_intra_4x4 + u4_pix_x + (u4_pix_y * i4_pred_strd); |
1167 | 26.8M | pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd); |
1168 | 26.8M | if (u4_pix_x == 0) |
1169 | 6.78M | { |
1170 | 6.78M | i4_ref_strd_left = ps_proc->i4_rec_strd; |
1171 | 6.78M | pu1_mb_ref_left = ps_proc->pu1_rec_buf_luma + u4_pix_x + (u4_pix_y * i4_ref_strd_left); |
1172 | 6.78M | } |
1173 | 20.0M | else |
1174 | 20.0M | { |
1175 | 20.0M | i4_ref_strd_left = i4_pred_strd; |
1176 | 20.0M | pu1_mb_ref_left = pu1_ref_mb_intra_4x4; |
1177 | 20.0M | } |
1178 | 26.8M | if (u4_pix_y == 0) |
1179 | 6.77M | { |
1180 | 6.77M | i4_ref_strd_top = ps_proc->i4_rec_strd; |
1181 | 6.77M | pu1_mb_ref_top = ps_proc->pu1_rec_buf_luma + u4_pix_x + (u4_pix_y * i4_ref_strd_top); |
1182 | 6.77M | } |
1183 | 20.0M | else |
1184 | 20.0M | { |
1185 | 20.0M | i4_ref_strd_top = i4_pred_strd; |
1186 | 20.0M | pu1_mb_ref_top = pu1_ref_mb_intra_4x4; |
1187 | 20.0M | } |
1188 | | |
1189 | 26.8M | pu1_mb_a = pu1_mb_ref_left - 1; /* pointer to left macro block */ |
1190 | 26.8M | pu1_mb_b = pu1_mb_ref_top - i4_ref_strd_top; /* pointer to top macro block */ |
1191 | 26.8M | pu1_mb_c = pu1_mb_b + 4; /* pointer to top right macro block */ |
1192 | 26.8M | if (u4_pix_y == 0) |
1193 | 6.77M | pu1_mb_d = pu1_mb_b - 1; |
1194 | 20.0M | else |
1195 | 20.0M | pu1_mb_d = pu1_mb_a - i4_ref_strd_left; /* pointer to top left macro block */ |
1196 | | |
1197 | | /* locating neighbors that are available for prediction */ |
1198 | | /* TODO : update the neighbor availability information basing on constrained intra pred information */ |
1199 | | /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */ |
1200 | | /* basing on neighbors available and hence evade the computation of neighbor availability totally. */ |
1201 | | |
1202 | 26.8M | i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4]; |
1203 | 26.8M | s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1); |
1204 | 26.8M | s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1; |
1205 | 26.8M | s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2; |
1206 | 26.8M | s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3; |
1207 | | /* set valid intra modes for evaluation */ |
1208 | 26.8M | u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7]; |
1209 | | |
1210 | | /* if top partition is available and top right is not available for intra prediction, then */ |
1211 | | /* padd top right samples using top sample and make top right also available */ |
1212 | | /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */ |
1213 | | |
1214 | | /* gather prediction pels from the neighbors */ |
1215 | 26.8M | if (s_ngbr_avbl.u1_mb_a) |
1216 | 26.6M | { |
1217 | 132M | for(i = 0; i < 4; i++) |
1218 | 106M | pu1_ngbr_pels_i4[4 - 1 -i] = pu1_mb_a[i * i4_ref_strd_left]; |
1219 | 26.6M | } |
1220 | 113k | else |
1221 | 113k | { |
1222 | 113k | memset(pu1_ngbr_pels_i4,0,4); |
1223 | 113k | } |
1224 | 26.8M | if(s_ngbr_avbl.u1_mb_b) |
1225 | 26.5M | { |
1226 | 26.5M | memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4); |
1227 | 26.5M | } |
1228 | 226k | else |
1229 | 226k | { |
1230 | 226k | memset(pu1_ngbr_pels_i4 + 4 + 1, 0, 4); |
1231 | 226k | } |
1232 | 26.8M | if (s_ngbr_avbl.u1_mb_d) |
1233 | 26.4M | pu1_ngbr_pels_i4[4] = *pu1_mb_d; |
1234 | 388k | else |
1235 | 388k | pu1_ngbr_pels_i4[4] = 0; |
1236 | 26.8M | if (s_ngbr_avbl.u1_mb_c) |
1237 | 18.2M | { |
1238 | 18.2M | memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4); |
1239 | 18.2M | } |
1240 | 8.55M | else if (s_ngbr_avbl.u1_mb_b) |
1241 | 8.48M | { |
1242 | 8.48M | memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4); |
1243 | 8.48M | s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b; |
1244 | 8.48M | } |
1245 | | |
1246 | 26.8M | i4_partition_cost_least = INT_MAX; |
1247 | | |
1248 | | /* predict the intra 4x4 mode for the current partition (for evaluating cost) */ |
1249 | 26.8M | if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b) |
1250 | 345k | { |
1251 | 345k | u4_estimated_intra_4x4_mode = DC_I4x4; |
1252 | 345k | } |
1253 | 26.4M | else |
1254 | 26.4M | { |
1255 | 26.4M | UWORD32 u4_left_intra_4x4_mode = DC_I4x4; |
1256 | 26.4M | UWORD32 u4_top_intra_4x4_mode = DC_I4x4; |
1257 | | |
1258 | 26.4M | if (u4_pix_x == 0) |
1259 | 6.58M | { |
1260 | 6.58M | if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4) |
1261 | 1.73M | { |
1262 | 1.73M | u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[u1_scan_order[3 + u4_pix_y]]; |
1263 | 1.73M | } |
1264 | 4.85M | else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8) |
1265 | 0 | { |
1266 | 0 | u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[b8 + 1]; |
1267 | 0 | } |
1268 | 6.58M | } |
1269 | 19.8M | else |
1270 | 19.8M | { |
1271 | 19.8M | u4_left_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 1]]; |
1272 | 19.8M | } |
1273 | | |
1274 | 26.4M | if (u4_pix_y == 0) |
1275 | 6.54M | { |
1276 | 6.54M | if (ps_top_mb_syn_ele->u2_mb_type == I4x4) |
1277 | 1.71M | { |
1278 | 1.71M | u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[u1_scan_order[12 + (u4_pix_x >> 2)]]; |
1279 | 1.71M | } |
1280 | 4.83M | else if (ps_top_mb_syn_ele->u2_mb_type == I8x8) |
1281 | 0 | { |
1282 | 0 | u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2]; |
1283 | 0 | } |
1284 | 6.54M | } |
1285 | 19.9M | else |
1286 | 19.9M | { |
1287 | 19.9M | u4_top_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 4]]; |
1288 | 19.9M | } |
1289 | | |
1290 | 26.4M | u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode); |
1291 | 26.4M | } |
1292 | | |
1293 | 26.8M | ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_estimated_intra_4x4_mode; |
1294 | | |
1295 | | /*mode evaluation and prediction*/ |
1296 | 26.8M | ps_codec->pf_ih264e_evaluate_intra_4x4_modes(pu1_mb_curr, |
1297 | 26.8M | pu1_ngbr_pels_i4, |
1298 | 26.8M | pu1_pred_mb, i4_src_strd, |
1299 | 26.8M | i4_pred_strd, i4_ngbr_avbl, |
1300 | 26.8M | &u4_best_intra_4x4_mode, |
1301 | 26.8M | &i4_partition_cost_least, |
1302 | 26.8M | u4_valid_intra_modes, |
1303 | 26.8M | u4_lambda, |
1304 | 26.8M | u4_estimated_intra_4x4_mode); |
1305 | | |
1306 | | |
1307 | 26.8M | i4_partition_distortion_least = i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode)?u4_cost_one_bit:u4_cost_four_bits); |
1308 | | |
1309 | 26.8M | DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least, u4_best_intra_4x4_mode); |
1310 | | |
1311 | | /* macroblock distortion */ |
1312 | 26.8M | i4_total_distortion += i4_partition_distortion_least; |
1313 | 26.8M | i4_total_cost += i4_partition_cost_least; |
1314 | | |
1315 | | /* mb partition mode */ |
1316 | 26.8M | ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode; |
1317 | | |
1318 | | |
1319 | | /********************************************************/ |
1320 | | /* error estimation, */ |
1321 | | /* transform */ |
1322 | | /* quantization */ |
1323 | | /********************************************************/ |
1324 | 26.8M | ps_codec->pf_resi_trans_quant_4x4(pu1_mb_curr, pu1_pred_mb, |
1325 | 26.8M | pi2_res_mb, i4_src_strd, |
1326 | 26.8M | i4_pred_strd, |
1327 | | /* No op stride, this implies a buff of lenght 1x16 */ |
1328 | 26.8M | ps_qp_params->pu2_scale_mat, |
1329 | 26.8M | ps_qp_params->pu2_thres_mat, |
1330 | 26.8M | ps_qp_params->u1_qbits, |
1331 | 26.8M | ps_qp_params->u4_dead_zone, |
1332 | 26.8M | pu1_nnz, &i2_dc_dummy); |
1333 | | |
1334 | | /********************************************************/ |
1335 | | /* ierror estimation, */ |
1336 | | /* itransform */ |
1337 | | /* iquantization */ |
1338 | | /********************************************************/ |
1339 | 26.8M | ps_codec->pf_iquant_itrans_recon_4x4(pi2_res_mb, pu1_pred_mb, |
1340 | 26.8M | pu1_ref_mb_intra_4x4, |
1341 | 26.8M | i4_pred_strd, i4_pred_strd, |
1342 | 26.8M | ps_qp_params->pu2_iscale_mat, |
1343 | 26.8M | ps_qp_params->pu2_weigh_mat, |
1344 | 26.8M | ps_qp_params->u1_qp_div, |
1345 | 26.8M | ps_proc->pv_scratch_buff, 0, |
1346 | 26.8M | NULL); |
1347 | 26.8M | } |
1348 | 6.78M | } |
1349 | | |
1350 | | /* update the type of the mb if necessary */ |
1351 | 1.69M | if (i4_total_cost < ps_proc->i4_mb_cost) |
1352 | 1.67M | { |
1353 | 1.67M | ps_proc->i4_mb_cost = i4_total_cost; |
1354 | 1.67M | ps_proc->i4_mb_distortion = i4_total_distortion; |
1355 | 1.67M | ps_proc->u4_mb_type = I4x4; |
1356 | 1.67M | } |
1357 | 1.69M | if (i4_total_cost < ps_proc->i4_mb_intra_cost) |
1358 | 1.70M | { |
1359 | 1.70M | ps_proc->i4_mb_intra_cost = i4_total_cost; |
1360 | 1.70M | } |
1361 | | |
1362 | 1.69M | return ; |
1363 | 1.69M | } |
1364 | | |
1365 | | /** |
1366 | | ****************************************************************************** |
1367 | | * |
1368 | | * @brief |
1369 | | * evaluate best chroma intra 8x8 mode (rate distortion opt off) |
1370 | | * |
1371 | | * @par Description |
1372 | | * This function evaluates all the possible chroma intra 8x8 modes and finds |
1373 | | * the mode that best represents the macroblock (least distortion) and occupies |
1374 | | * fewer bits in the bitstream. |
1375 | | * |
1376 | | * @param[in] ps_proc_ctxt |
1377 | | * pointer to macroblock context (handle) |
1378 | | * |
1379 | | * @remarks |
1380 | | * For chroma best intra pred mode is calculated based only on SAD |
1381 | | * |
1382 | | * @returns none |
1383 | | * |
1384 | | ****************************************************************************** |
1385 | | */ |
1386 | | void ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc) |
1387 | 5.67M | { |
1388 | | /* Codec Context */ |
1389 | 5.67M | codec_t *ps_codec = ps_proc->ps_codec; |
1390 | | |
1391 | | /* SAD(distortion metric) of an 8x8 block */ |
1392 | 5.67M | WORD32 i4_mb_distortion, i4_chroma_mb_distortion; |
1393 | | |
1394 | | /* intra mode */ |
1395 | 5.67M | UWORD32 u4_best_chroma_intra_8x8_mode = DC_CH_I8x8; |
1396 | | |
1397 | | /* neighbor pels for intra prediction */ |
1398 | 5.67M | UWORD8 *pu1_ngbr_pels_c_i8x8 = ps_proc->au1_ngbr_pels; |
1399 | | |
1400 | | /* pointer to curr macro block */ |
1401 | 5.67M | UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma; |
1402 | 5.67M | UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_chroma; |
1403 | | |
1404 | | /* pointer to prediction macro block */ |
1405 | 5.67M | UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma; |
1406 | 5.67M | UWORD8 *pu1_pred_mb_plane = ps_proc->pu1_pred_mb_intra_chroma_plane; |
1407 | | |
1408 | | /* strides */ |
1409 | 5.67M | WORD32 i4_src_strd_c = ps_proc->i4_src_chroma_strd; |
1410 | 5.67M | WORD32 i4_pred_strd = ps_proc->i4_pred_strd; |
1411 | 5.67M | WORD32 i4_rec_strd_c = ps_proc->i4_rec_strd; |
1412 | | |
1413 | | /* neighbors left, top, top left */ |
1414 | 5.67M | UWORD8 *pu1_mb_a = pu1_ref_mb - 2; |
1415 | 5.67M | UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd_c; |
1416 | 5.67M | UWORD8 *pu1_mb_d = pu1_mb_b - 2; |
1417 | | |
1418 | | /* neighbor availability */ |
1419 | 5.67M | const UWORD8 u1_valid_intra_modes[8] = {1, 3, 1, 3, 5, 7, 5, 15}; |
1420 | 5.67M | WORD32 i4_ngbr_avbl; |
1421 | | |
1422 | | /* valid intra modes map */ |
1423 | 5.67M | UWORD32 u4_valid_intra_modes; |
1424 | 5.67M | mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x; |
1425 | | |
1426 | | /* temp var */ |
1427 | 5.67M | UWORD8 i; |
1428 | 5.67M | UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred; |
1429 | 5.67M | UWORD8 u1_mb_a, u1_mb_b, u1_mb_d; |
1430 | | |
1431 | | /* locating neighbors that are available for prediction */ |
1432 | | /* gather prediction pels from the neighbors */ |
1433 | | /* left pels */ |
1434 | 5.67M | u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a) |
1435 | 5.55M | && (u4_constrained_intra_pred ? ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1)); |
1436 | 5.67M | if (u1_mb_a) |
1437 | 5.54M | { |
1438 | 49.6M | for (i = 0; i < 16; i += 2) |
1439 | 44.1M | { |
1440 | 44.1M | pu1_ngbr_pels_c_i8x8[16 - 2 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c]; |
1441 | 44.1M | pu1_ngbr_pels_c_i8x8[16 - 1 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c + 1]; |
1442 | 44.1M | } |
1443 | 5.54M | } |
1444 | 134k | else |
1445 | 134k | { |
1446 | 134k | ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_c_i8x8, 0, MB_SIZE); |
1447 | 134k | } |
1448 | | |
1449 | | /* top pels */ |
1450 | 5.67M | u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) |
1451 | 5.56M | && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1)); |
1452 | 5.67M | if (u1_mb_b) |
1453 | 5.55M | { |
1454 | 5.55M | ps_codec->pf_mem_cpy_mul8(&pu1_ngbr_pels_c_i8x8[18], pu1_mb_b, 16); |
1455 | 5.55M | } |
1456 | 124k | else |
1457 | 124k | { |
1458 | 124k | ps_codec->pf_mem_set_mul8((pu1_ngbr_pels_c_i8x8 + 18), 0, MB_SIZE); |
1459 | 124k | } |
1460 | | |
1461 | | /* top left pels */ |
1462 | 5.67M | u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) |
1463 | 5.46M | && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1)); |
1464 | 5.67M | if (u1_mb_d) |
1465 | 5.45M | { |
1466 | 5.45M | pu1_ngbr_pels_c_i8x8[16] = *pu1_mb_d; |
1467 | 5.45M | pu1_ngbr_pels_c_i8x8[17] = *(pu1_mb_d + 1); |
1468 | 5.45M | } |
1469 | 5.67M | i4_ngbr_avbl = (u1_mb_a) + (u1_mb_b << 2) + (u1_mb_d << 1); |
1470 | 5.67M | ps_proc->i4_chroma_neighbor_avail_8x8_mb = i4_ngbr_avbl; |
1471 | | |
1472 | 5.67M | u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl]; |
1473 | | |
1474 | 5.67M | if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST || |
1475 | 5.29M | ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST) |
1476 | 662k | u4_valid_intra_modes &= ~(1 << PLANE_CH_I8x8); |
1477 | | |
1478 | 5.67M | i4_chroma_mb_distortion = INT_MAX; |
1479 | | |
1480 | | /* perform intra mode chroma 8x8 evaluation */ |
1481 | | /* intra prediction */ |
1482 | 5.67M | ps_codec->pf_ih264e_evaluate_intra_chroma_modes(pu1_curr_mb, |
1483 | 5.67M | pu1_ngbr_pels_c_i8x8, |
1484 | 5.67M | pu1_pred_mb, |
1485 | 5.67M | i4_src_strd_c, |
1486 | 5.67M | i4_pred_strd, |
1487 | 5.67M | i4_ngbr_avbl, |
1488 | 5.67M | &u4_best_chroma_intra_8x8_mode, |
1489 | 5.67M | &i4_chroma_mb_distortion, |
1490 | 5.67M | u4_valid_intra_modes); |
1491 | | |
1492 | 5.67M | if (u4_valid_intra_modes & 8)/* if Chroma PLANE is valid*/ |
1493 | 4.82M | { |
1494 | 4.82M | (ps_codec->apf_intra_pred_c)[PLANE_CH_I8x8](pu1_ngbr_pels_c_i8x8, pu1_pred_mb_plane, 0, i4_pred_strd, i4_ngbr_avbl); |
1495 | | |
1496 | | /* evaluate distortion(sad) */ |
1497 | 4.82M | ps_codec->pf_compute_sad_16x8(pu1_curr_mb, pu1_pred_mb_plane, i4_src_strd_c, i4_pred_strd, i4_chroma_mb_distortion, &i4_mb_distortion); |
1498 | | |
1499 | | /* update the least distortion information if necessary */ |
1500 | 4.82M | if(i4_mb_distortion < i4_chroma_mb_distortion) |
1501 | 6.27k | { |
1502 | 6.27k | i4_chroma_mb_distortion = i4_mb_distortion; |
1503 | 6.27k | u4_best_chroma_intra_8x8_mode = PLANE_CH_I8x8; |
1504 | 6.27k | } |
1505 | 4.82M | } |
1506 | | |
1507 | 5.67M | DEBUG("%d partition cost, %d intra mode\n", i4_chroma_mb_distortion, u4_best_chroma_intra_8x8_mode); |
1508 | | |
1509 | 5.67M | ps_proc->u1_c_i8_mode = u4_best_chroma_intra_8x8_mode; |
1510 | | |
1511 | 5.67M | return ; |
1512 | 5.67M | } |
1513 | | |
1514 | | |
1515 | | /** |
1516 | | ****************************************************************************** |
1517 | | * |
1518 | | * @brief |
1519 | | * Evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the |
1520 | | * prediction. |
1521 | | * |
1522 | | * @par Description |
1523 | | * This function evaluates first three 16x16 modes and compute corresponding sad |
1524 | | * and return the buffer predicted with best mode. |
1525 | | * |
1526 | | * @param[in] pu1_src |
1527 | | * UWORD8 pointer to the source |
1528 | | * |
1529 | | * @param[in] pu1_ngbr_pels_i16 |
1530 | | * UWORD8 pointer to neighbouring pels |
1531 | | * |
1532 | | * @param[out] pu1_dst |
1533 | | * UWORD8 pointer to the destination |
1534 | | * |
1535 | | * @param[in] src_strd |
1536 | | * integer source stride |
1537 | | * |
1538 | | * @param[in] dst_strd |
1539 | | * integer destination stride |
1540 | | * |
1541 | | * @param[in] u4_n_avblty |
1542 | | * availability of neighbouring pixels |
1543 | | * |
1544 | | * @param[in] u4_intra_mode |
1545 | | * Pointer to the variable in which best mode is returned |
1546 | | * |
1547 | | * @param[in] pu4_sadmin |
1548 | | * Pointer to the variable in which minimum sad is returned |
1549 | | * |
1550 | | * @param[in] u4_valid_intra_modes |
1551 | | * Says what all modes are valid |
1552 | | * |
1553 | | * @returns none |
1554 | | * |
1555 | | ****************************************************************************** |
1556 | | */ |
1557 | | void ih264e_evaluate_intra16x16_modes(UWORD8 *pu1_src, |
1558 | | UWORD8 *pu1_ngbr_pels_i16, |
1559 | | UWORD8 *pu1_dst, |
1560 | | UWORD32 src_strd, |
1561 | | UWORD32 dst_strd, |
1562 | | WORD32 u4_n_avblty, |
1563 | | UWORD32 *u4_intra_mode, |
1564 | | WORD32 *pu4_sadmin, |
1565 | | UWORD32 u4_valid_intra_modes) |
1566 | 6.27M | { |
1567 | 6.27M | UWORD8 *pu1_neighbour; |
1568 | 6.27M | UWORD8 *pu1_src_temp = pu1_src; |
1569 | 6.27M | UWORD8 left = 0, top = 0; |
1570 | 6.27M | WORD32 u4_dcval = 0; |
1571 | 6.27M | WORD32 i, j; |
1572 | 6.27M | WORD32 i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX, i4_sad_dc = INT_MAX, |
1573 | 6.27M | i4_min_sad = INT_MAX; |
1574 | 6.27M | UWORD8 val; |
1575 | | |
1576 | 6.27M | left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK); |
1577 | 6.27M | top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2; |
1578 | | |
1579 | | /* left available */ |
1580 | 6.27M | if (left) |
1581 | 5.43M | { |
1582 | 5.43M | i4_sad_horz = 0; |
1583 | | |
1584 | 89.9M | for (i = 0; i < 16; i++) |
1585 | 84.5M | { |
1586 | 84.5M | val = pu1_ngbr_pels_i16[15 - i]; |
1587 | | |
1588 | 84.5M | u4_dcval += val; |
1589 | | |
1590 | 1.43G | for (j = 0; j < 16; j++) |
1591 | 1.35G | { |
1592 | 1.35G | i4_sad_horz += ABS(val - pu1_src_temp[j]); |
1593 | 1.35G | } |
1594 | | |
1595 | 84.5M | pu1_src_temp += src_strd; |
1596 | 84.5M | } |
1597 | 5.43M | u4_dcval += 8; |
1598 | 5.43M | } |
1599 | | |
1600 | 6.27M | pu1_src_temp = pu1_src; |
1601 | | /* top available */ |
1602 | 6.27M | if (top) |
1603 | 5.26M | { |
1604 | 5.26M | i4_sad_vert = 0; |
1605 | | |
1606 | 87.7M | for (i = 0; i < 16; i++) |
1607 | 82.4M | { |
1608 | 82.4M | u4_dcval += pu1_ngbr_pels_i16[17 + i]; |
1609 | | |
1610 | 1.39G | for (j = 0; j < 16; j++) |
1611 | 1.31G | { |
1612 | 1.31G | i4_sad_vert += ABS(pu1_ngbr_pels_i16[17 + j] - pu1_src_temp[j]); |
1613 | 1.31G | } |
1614 | 82.4M | pu1_src_temp += src_strd; |
1615 | | |
1616 | 82.4M | } |
1617 | 5.26M | u4_dcval += 8; |
1618 | 5.26M | } |
1619 | | |
1620 | 6.27M | u4_dcval = (u4_dcval) >> (3 + left + top); |
1621 | | |
1622 | 6.27M | pu1_src_temp = pu1_src; |
1623 | | |
1624 | | /* none available */ |
1625 | 6.27M | u4_dcval += (left == 0) * (top == 0) * 128; |
1626 | | |
1627 | 6.27M | i4_sad_dc = 0; |
1628 | | |
1629 | 104M | for (i = 0; i < 16; i++) |
1630 | 98.5M | { |
1631 | 1.67G | for (j = 0; j < 16; j++) |
1632 | 1.57G | { |
1633 | 1.57G | i4_sad_dc += ABS(u4_dcval - pu1_src_temp[j]); |
1634 | 1.57G | } |
1635 | 98.5M | pu1_src_temp += src_strd; |
1636 | 98.5M | } |
1637 | | |
1638 | 6.27M | if ((u4_valid_intra_modes & 04) == 0)/* If DC is disabled */ |
1639 | 0 | i4_sad_dc = INT_MAX; |
1640 | | |
1641 | 6.27M | if ((u4_valid_intra_modes & 01) == 0)/* If VERT is disabled */ |
1642 | 1.01M | i4_sad_vert = INT_MAX; |
1643 | | |
1644 | 6.27M | if ((u4_valid_intra_modes & 02) == 0)/* If HORZ is disabled */ |
1645 | 836k | i4_sad_horz = INT_MAX; |
1646 | | |
1647 | 6.27M | i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert); |
1648 | | |
1649 | | /* Finding Minimum sad and doing corresponding prediction */ |
1650 | 6.27M | if (i4_min_sad < *pu4_sadmin) |
1651 | 6.29M | { |
1652 | 6.29M | *pu4_sadmin = i4_min_sad; |
1653 | 6.29M | if (i4_min_sad == i4_sad_vert) |
1654 | 5.07M | { |
1655 | 5.07M | *u4_intra_mode = VERT_I16x16; |
1656 | 5.07M | pu1_neighbour = pu1_ngbr_pels_i16 + 17; |
1657 | 85.8M | for (j = 0; j < 16; j++) |
1658 | 80.7M | { |
1659 | 80.7M | memcpy(pu1_dst, pu1_neighbour, MB_SIZE); |
1660 | 80.7M | pu1_dst += dst_strd; |
1661 | 80.7M | } |
1662 | 5.07M | } |
1663 | 1.22M | else if (i4_min_sad == i4_sad_horz) |
1664 | 419k | { |
1665 | 419k | *u4_intra_mode = HORZ_I16x16; |
1666 | 7.12M | for (j = 0; j < 16; j++) |
1667 | 6.70M | { |
1668 | 6.70M | val = pu1_ngbr_pels_i16[15 - j]; |
1669 | 6.70M | memset(pu1_dst, val, MB_SIZE); |
1670 | 6.70M | pu1_dst += dst_strd; |
1671 | 6.70M | } |
1672 | 419k | } |
1673 | 803k | else |
1674 | 803k | { |
1675 | 803k | *u4_intra_mode = DC_I16x16; |
1676 | 13.5M | for (j = 0; j < 16; j++) |
1677 | 12.7M | { |
1678 | 12.7M | memset(pu1_dst, u4_dcval, MB_SIZE); |
1679 | 12.7M | pu1_dst += dst_strd; |
1680 | 12.7M | } |
1681 | 803k | } |
1682 | 6.29M | } |
1683 | 6.27M | return; |
1684 | 6.27M | } |
1685 | | |
1686 | | /** |
1687 | | ****************************************************************************** |
1688 | | * |
1689 | | * @brief |
1690 | | * Evaluate best intra 4x4 mode and perform prediction. |
1691 | | * |
1692 | | * @par Description |
1693 | | * This function evaluates 4x4 modes and compute corresponding sad |
1694 | | * and return the buffer predicted with best mode. |
1695 | | * |
1696 | | * @param[in] pu1_src |
1697 | | * UWORD8 pointer to the source |
1698 | | * |
1699 | | * @param[in] pu1_ngbr_pels |
1700 | | * UWORD8 pointer to neighbouring pels |
1701 | | * |
1702 | | * @param[out] pu1_dst |
1703 | | * UWORD8 pointer to the destination |
1704 | | * |
1705 | | * @param[in] src_strd |
1706 | | * integer source stride |
1707 | | * |
1708 | | * @param[in] dst_strd |
1709 | | * integer destination stride |
1710 | | * |
1711 | | * @param[in] u4_n_avblty |
1712 | | * availability of neighbouring pixels |
1713 | | * |
1714 | | * @param[in] u4_intra_mode |
1715 | | * Pointer to the variable in which best mode is returned |
1716 | | * |
1717 | | * @param[in] pu4_sadmin |
1718 | | * Pointer to the variable in which minimum cost is returned |
1719 | | * |
1720 | | * @param[in] u4_valid_intra_modes |
1721 | | * Says what all modes are valid |
1722 | | * |
1723 | | * @param[in] u4_lambda |
1724 | | * Lamda value for computing cost from SAD |
1725 | | * |
1726 | | * @param[in] u4_predictd_mode |
1727 | | * Predicted mode for cost computation |
1728 | | * |
1729 | | * @returns none |
1730 | | * |
1731 | | ****************************************************************************** |
1732 | | */ |
1733 | | void ih264e_evaluate_intra_4x4_modes(UWORD8 *pu1_src, |
1734 | | UWORD8 *pu1_ngbr_pels, |
1735 | | UWORD8 *pu1_dst, |
1736 | | UWORD32 src_strd, |
1737 | | UWORD32 dst_strd, |
1738 | | WORD32 u4_n_avblty, |
1739 | | UWORD32 *u4_intra_mode, |
1740 | | WORD32 *pu4_sadmin, |
1741 | | UWORD32 u4_valid_intra_modes, |
1742 | | UWORD32 u4_lambda, |
1743 | | UWORD32 u4_predictd_mode) |
1744 | 10.3M | { |
1745 | 10.3M | UWORD8 *pu1_src_temp = pu1_src; |
1746 | 10.3M | UWORD8 *pu1_pred = pu1_ngbr_pels; |
1747 | 10.3M | UWORD8 left = 0, top = 0; |
1748 | 10.3M | UWORD8 u1_pred_val = 0; |
1749 | 10.3M | UWORD8 u1_pred_vals[4] = {0}; |
1750 | 10.3M | UWORD8 *pu1_pred_val = NULL; |
1751 | | /* To store FILT121 operated values*/ |
1752 | 10.3M | UWORD8 u1_pred_vals_diag_121[15] = {0}; |
1753 | | /* To store FILT11 operated values*/ |
1754 | 10.3M | UWORD8 u1_pred_vals_diag_11[15] = {0}; |
1755 | 10.3M | UWORD8 u1_pred_vals_vert_r[8] = {0}; |
1756 | 10.3M | UWORD8 u1_pred_vals_horz_d[10] = {0}; |
1757 | 10.3M | UWORD8 u1_pred_vals_horz_u[10] = {0}; |
1758 | 10.3M | WORD32 u4_dcval = 0; |
1759 | 10.3M | WORD32 i4_sad[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX, |
1760 | 10.3M | INT_MAX, INT_MAX, INT_MAX, INT_MAX}; |
1761 | | |
1762 | 10.3M | WORD32 i4_cost[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX, |
1763 | 10.3M | INT_MAX, INT_MAX, INT_MAX, INT_MAX}; |
1764 | 10.3M | WORD32 i, i4_min_cost = INT_MAX; |
1765 | | |
1766 | 10.3M | left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK); |
1767 | 10.3M | top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2; |
1768 | | |
1769 | | /* Computing SAD */ |
1770 | | |
1771 | | /* VERT mode valid */ |
1772 | 10.3M | if (u4_valid_intra_modes & 1) |
1773 | 9.56M | { |
1774 | 9.56M | pu1_pred = pu1_ngbr_pels + 5; |
1775 | 9.56M | i4_sad[VERT_I4x4] = 0; |
1776 | 9.56M | i4_cost[VERT_I4x4] = 0; |
1777 | | |
1778 | 9.56M | USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]); |
1779 | 9.56M | pu1_src_temp += src_strd; |
1780 | 9.56M | USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]); |
1781 | 9.56M | pu1_src_temp += src_strd; |
1782 | 9.56M | USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]); |
1783 | 9.56M | pu1_src_temp += src_strd; |
1784 | 9.56M | USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]); |
1785 | | |
1786 | 9.56M | i4_cost[VERT_I4x4] = i4_sad[VERT_I4x4] + ((u4_predictd_mode == VERT_I4x4) ? |
1787 | 8.20M | u4_lambda : 4 * u4_lambda); |
1788 | 9.56M | } |
1789 | | |
1790 | | /* HORZ mode valid */ |
1791 | 10.3M | if (u4_valid_intra_modes & 2) |
1792 | 9.70M | { |
1793 | 9.70M | i4_sad[HORZ_I4x4] = 0; |
1794 | 9.70M | i4_cost[HORZ_I4x4] =0; |
1795 | 9.70M | pu1_src_temp = pu1_src; |
1796 | | |
1797 | 9.70M | u1_pred_val = pu1_ngbr_pels[3]; |
1798 | | |
1799 | 9.70M | i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val) |
1800 | 9.70M | + ABS(pu1_src_temp[1] - u1_pred_val) |
1801 | 9.70M | + ABS(pu1_src_temp[2] - u1_pred_val) |
1802 | 9.70M | + ABS(pu1_src_temp[3] - u1_pred_val); |
1803 | 9.70M | pu1_src_temp += src_strd; |
1804 | | |
1805 | 9.70M | u1_pred_val = pu1_ngbr_pels[2]; |
1806 | | |
1807 | 9.70M | i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val) |
1808 | 9.70M | + ABS(pu1_src_temp[1] - u1_pred_val) |
1809 | 9.70M | + ABS(pu1_src_temp[2] - u1_pred_val) |
1810 | 9.70M | + ABS(pu1_src_temp[3] - u1_pred_val); |
1811 | 9.70M | pu1_src_temp += src_strd; |
1812 | | |
1813 | 9.70M | u1_pred_val = pu1_ngbr_pels[1]; |
1814 | | |
1815 | 9.70M | i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val) |
1816 | 9.70M | + ABS(pu1_src_temp[1] - u1_pred_val) |
1817 | 9.70M | + ABS(pu1_src_temp[2] - u1_pred_val) |
1818 | 9.70M | + ABS(pu1_src_temp[3] - u1_pred_val); |
1819 | 9.70M | pu1_src_temp += src_strd; |
1820 | | |
1821 | 9.70M | u1_pred_val = pu1_ngbr_pels[0]; |
1822 | | |
1823 | 9.70M | i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val) |
1824 | 9.70M | + ABS(pu1_src_temp[1] - u1_pred_val) |
1825 | 9.70M | + ABS(pu1_src_temp[2] - u1_pred_val) |
1826 | 9.70M | + ABS(pu1_src_temp[3] - u1_pred_val); |
1827 | | |
1828 | 9.70M | i4_cost[HORZ_I4x4] = i4_sad[HORZ_I4x4] + ((u4_predictd_mode == HORZ_I4x4) ? |
1829 | 6.85M | u4_lambda : 4 * u4_lambda); |
1830 | 9.70M | } |
1831 | | |
1832 | | /* DC mode valid */ |
1833 | 10.3M | if (u4_valid_intra_modes & 4) |
1834 | 10.3M | { |
1835 | 10.3M | i4_sad[DC_I4x4] = 0; |
1836 | 10.3M | i4_cost[DC_I4x4] = 0; |
1837 | 10.3M | pu1_src_temp = pu1_src; |
1838 | | |
1839 | 10.3M | if (left) |
1840 | 9.69M | u4_dcval = pu1_ngbr_pels[0] + pu1_ngbr_pels[1] + pu1_ngbr_pels[2] |
1841 | 9.69M | + pu1_ngbr_pels[3] + 2; |
1842 | 10.3M | if (top) |
1843 | 9.59M | u4_dcval += pu1_ngbr_pels[5] + pu1_ngbr_pels[6] + pu1_ngbr_pels[7] |
1844 | 9.59M | + pu1_ngbr_pels[8] + 2; |
1845 | | |
1846 | 10.3M | u4_dcval = (u4_dcval) ? (u4_dcval >> (1 + left + top)) : 128; |
1847 | | |
1848 | | /* none available */ |
1849 | 10.3M | memset(u1_pred_vals, u4_dcval, 4); |
1850 | 10.3M | USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]); |
1851 | 10.3M | pu1_src_temp += src_strd; |
1852 | 10.3M | USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]); |
1853 | 10.3M | pu1_src_temp += src_strd; |
1854 | 10.3M | USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]); |
1855 | 10.3M | pu1_src_temp += src_strd; |
1856 | 10.3M | USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]); |
1857 | 10.3M | pu1_src_temp += src_strd; |
1858 | | |
1859 | 10.3M | i4_cost[DC_I4x4] = i4_sad[DC_I4x4] + ((u4_predictd_mode == DC_I4x4) ? |
1860 | 6.14M | u4_lambda : 4 * u4_lambda); |
1861 | 10.3M | } |
1862 | | |
1863 | | /* if modes other than VERT, HORZ and DC are valid */ |
1864 | 10.3M | if (u4_valid_intra_modes > 7) |
1865 | 10.2M | { |
1866 | 10.2M | pu1_pred = pu1_ngbr_pels; |
1867 | 10.2M | pu1_pred[13] = pu1_pred[14] = pu1_pred[12]; |
1868 | | |
1869 | | /* Performing FILT121 and FILT11 operation for all neighbour values*/ |
1870 | 140M | for (i = 0; i < 13; i++) |
1871 | 130M | { |
1872 | 130M | u1_pred_vals_diag_121[i] = FILT121(pu1_pred[0], pu1_pred[1], pu1_pred[2]); |
1873 | 130M | u1_pred_vals_diag_11[i] = FILT11(pu1_pred[0], pu1_pred[1]); |
1874 | | |
1875 | 130M | pu1_pred++; |
1876 | 130M | } |
1877 | | |
1878 | 10.2M | if (u4_valid_intra_modes & 8)/* DIAG_DL */ |
1879 | 9.41M | { |
1880 | 9.41M | i4_sad[DIAG_DL_I4x4] = 0; |
1881 | 9.41M | i4_cost[DIAG_DL_I4x4] = 0; |
1882 | 9.41M | pu1_src_temp = pu1_src; |
1883 | 9.41M | pu1_pred_val = u1_pred_vals_diag_121 + 5; |
1884 | | |
1885 | 9.41M | USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DL_I4x4]); |
1886 | 9.41M | pu1_src_temp += src_strd; |
1887 | 9.41M | USADA8(pu1_src_temp, (pu1_pred_val + 1), i4_sad[DIAG_DL_I4x4]); |
1888 | 9.41M | pu1_src_temp += src_strd; |
1889 | 9.41M | USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[DIAG_DL_I4x4]); |
1890 | 9.41M | pu1_src_temp += src_strd; |
1891 | 9.41M | USADA8(pu1_src_temp, (pu1_pred_val + 3), i4_sad[DIAG_DL_I4x4]); |
1892 | 9.41M | pu1_src_temp += src_strd; |
1893 | 9.41M | i4_cost[DIAG_DL_I4x4] = i4_sad[DIAG_DL_I4x4] + ((u4_predictd_mode == DIAG_DL_I4x4) ? |
1894 | 9.40M | u4_lambda : 4 * u4_lambda); |
1895 | 9.41M | } |
1896 | | |
1897 | 10.2M | if (u4_valid_intra_modes & 16)/* DIAG_DR */ |
1898 | 9.01M | { |
1899 | 9.01M | i4_sad[DIAG_DR_I4x4] = 0; |
1900 | 9.01M | i4_cost[DIAG_DR_I4x4] = 0; |
1901 | 9.01M | pu1_src_temp = pu1_src; |
1902 | 9.01M | pu1_pred_val = u1_pred_vals_diag_121 + 3; |
1903 | | |
1904 | 9.01M | USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DR_I4x4]); |
1905 | 9.01M | pu1_src_temp += src_strd; |
1906 | 9.01M | USADA8(pu1_src_temp, (pu1_pred_val - 1), i4_sad[DIAG_DR_I4x4]); |
1907 | 9.01M | pu1_src_temp += src_strd; |
1908 | 9.01M | USADA8(pu1_src_temp, (pu1_pred_val - 2), i4_sad[DIAG_DR_I4x4]); |
1909 | 9.01M | pu1_src_temp += src_strd; |
1910 | 9.01M | USADA8(pu1_src_temp, (pu1_pred_val - 3), i4_sad[DIAG_DR_I4x4]); |
1911 | 9.01M | pu1_src_temp += src_strd; |
1912 | 9.01M | i4_cost[DIAG_DR_I4x4] = i4_sad[DIAG_DR_I4x4] + ((u4_predictd_mode == DIAG_DR_I4x4) ? |
1913 | 9.01M | u4_lambda : 4 * u4_lambda); |
1914 | | |
1915 | 9.01M | } |
1916 | | |
1917 | 10.2M | if (u4_valid_intra_modes & 32)/* VERT_R mode valid ????*/ |
1918 | 9.02M | { |
1919 | 9.02M | i4_sad[VERT_R_I4x4] = 0; |
1920 | | |
1921 | 9.02M | pu1_src_temp = pu1_src; |
1922 | 9.02M | u1_pred_vals_vert_r[0] = u1_pred_vals_diag_121[2]; |
1923 | 9.02M | memcpy((u1_pred_vals_vert_r + 1), (u1_pred_vals_diag_11 + 4), 3); |
1924 | 9.02M | u1_pred_vals_vert_r[4] = u1_pred_vals_diag_121[1]; |
1925 | 9.02M | memcpy((u1_pred_vals_vert_r + 5), (u1_pred_vals_diag_121 + 3), 3); |
1926 | | |
1927 | 9.02M | pu1_pred_val = u1_pred_vals_diag_11 + 4; |
1928 | 9.02M | USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]); |
1929 | 9.02M | pu1_pred_val = u1_pred_vals_diag_121 + 3; |
1930 | 9.02M | pu1_src_temp += src_strd; |
1931 | 9.02M | USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]); |
1932 | 9.02M | pu1_src_temp += src_strd; |
1933 | 9.02M | USADA8(pu1_src_temp, (u1_pred_vals_vert_r), i4_sad[VERT_R_I4x4]); |
1934 | 9.02M | pu1_src_temp += src_strd; |
1935 | 9.02M | USADA8(pu1_src_temp, (u1_pred_vals_vert_r + 4), |
1936 | 9.02M | i4_sad[VERT_R_I4x4]); |
1937 | | |
1938 | 9.02M | i4_cost[VERT_R_I4x4] = i4_sad[VERT_R_I4x4] + ((u4_predictd_mode == VERT_R_I4x4) ? |
1939 | 9.01M | u4_lambda : 4 * u4_lambda); |
1940 | 9.02M | } |
1941 | | |
1942 | 10.2M | if (u4_valid_intra_modes & 64)/* HORZ_D mode valid ????*/ |
1943 | 9.00M | { |
1944 | 9.00M | i4_sad[HORZ_D_I4x4] = 0; |
1945 | | |
1946 | 9.00M | pu1_src_temp = pu1_src; |
1947 | 9.00M | u1_pred_vals_horz_d[6] = u1_pred_vals_diag_11[3]; |
1948 | 9.00M | memcpy((u1_pred_vals_horz_d + 7), (u1_pred_vals_diag_121 + 3), 3); |
1949 | 9.00M | u1_pred_vals_horz_d[0] = u1_pred_vals_diag_11[0]; |
1950 | 9.00M | u1_pred_vals_horz_d[1] = u1_pred_vals_diag_121[0]; |
1951 | 9.00M | u1_pred_vals_horz_d[2] = u1_pred_vals_diag_11[1]; |
1952 | 9.00M | u1_pred_vals_horz_d[3] = u1_pred_vals_diag_121[1]; |
1953 | 9.00M | u1_pred_vals_horz_d[4] = u1_pred_vals_diag_11[2]; |
1954 | 9.00M | u1_pred_vals_horz_d[5] = u1_pred_vals_diag_121[2]; |
1955 | | |
1956 | 9.00M | pu1_pred_val = u1_pred_vals_horz_d; |
1957 | 9.00M | USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_D_I4x4]); |
1958 | 9.00M | pu1_src_temp += src_strd; |
1959 | 9.00M | USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_D_I4x4]); |
1960 | 9.00M | pu1_src_temp += src_strd; |
1961 | 9.00M | USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_D_I4x4]); |
1962 | 9.00M | pu1_src_temp += src_strd; |
1963 | 9.00M | USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_D_I4x4]); |
1964 | | |
1965 | 9.00M | i4_cost[HORZ_D_I4x4] = i4_sad[HORZ_D_I4x4] + ((u4_predictd_mode == HORZ_D_I4x4) ? |
1966 | 9.00M | u4_lambda : 4 * u4_lambda); |
1967 | 9.00M | } |
1968 | | |
1969 | 10.2M | if (u4_valid_intra_modes & 128)/* VERT_L mode valid ????*/ |
1970 | 9.56M | { |
1971 | 9.56M | i4_sad[VERT_L_I4x4] = 0; |
1972 | 9.56M | pu1_src_temp = pu1_src; |
1973 | 9.56M | pu1_pred_val = u1_pred_vals_diag_11 + 5; |
1974 | 9.56M | USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]); |
1975 | 9.56M | pu1_src_temp += src_strd; |
1976 | 9.56M | pu1_pred_val = u1_pred_vals_diag_121 + 5; |
1977 | 9.56M | USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]); |
1978 | 9.56M | pu1_src_temp += src_strd; |
1979 | 9.56M | pu1_pred_val = u1_pred_vals_diag_11 + 6; |
1980 | 9.56M | USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]); |
1981 | 9.56M | pu1_src_temp += src_strd; |
1982 | 9.56M | pu1_pred_val = u1_pred_vals_diag_121 + 6; |
1983 | 9.56M | USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]); |
1984 | | |
1985 | 9.56M | i4_cost[VERT_L_I4x4] = i4_sad[VERT_L_I4x4] + ((u4_predictd_mode == VERT_L_I4x4) ? |
1986 | 9.56M | u4_lambda : 4 * u4_lambda); |
1987 | 9.56M | } |
1988 | | |
1989 | 10.2M | if (u4_valid_intra_modes & 256)/* HORZ_U mode valid ????*/ |
1990 | 9.72M | { |
1991 | 9.72M | i4_sad[HORZ_U_I4x4] = 0; |
1992 | 9.72M | pu1_src_temp = pu1_src; |
1993 | 9.72M | u1_pred_vals_horz_u[0] = u1_pred_vals_diag_11[2]; |
1994 | 9.72M | u1_pred_vals_horz_u[1] = u1_pred_vals_diag_121[1]; |
1995 | 9.72M | u1_pred_vals_horz_u[2] = u1_pred_vals_diag_11[1]; |
1996 | 9.72M | u1_pred_vals_horz_u[3] = u1_pred_vals_diag_121[0]; |
1997 | 9.72M | u1_pred_vals_horz_u[4] = u1_pred_vals_diag_11[0]; |
1998 | 9.72M | u1_pred_vals_horz_u[5] = FILT121(pu1_ngbr_pels[0], pu1_ngbr_pels[0], pu1_ngbr_pels[1]); |
1999 | | |
2000 | 9.72M | memset((u1_pred_vals_horz_u + 6), pu1_ngbr_pels[0], 4); |
2001 | | |
2002 | 9.72M | pu1_pred_val = u1_pred_vals_horz_u; |
2003 | 9.72M | USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_U_I4x4]); |
2004 | 9.72M | pu1_src_temp += src_strd; |
2005 | 9.72M | USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_U_I4x4]); |
2006 | 9.72M | pu1_src_temp += src_strd; |
2007 | 9.72M | USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_U_I4x4]); |
2008 | 9.72M | pu1_src_temp += src_strd; |
2009 | 9.72M | USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_U_I4x4]); |
2010 | | |
2011 | 9.72M | i4_cost[HORZ_U_I4x4] = i4_sad[HORZ_U_I4x4] + ((u4_predictd_mode == HORZ_U_I4x4) ? |
2012 | 9.72M | u4_lambda : 4 * u4_lambda); |
2013 | 9.72M | } |
2014 | | |
2015 | 10.2M | i4_min_cost = MIN3(MIN3(i4_cost[0], i4_cost[1], i4_cost[2]), |
2016 | 10.2M | MIN3(i4_cost[3], i4_cost[4], i4_cost[5]), |
2017 | 10.2M | MIN3(i4_cost[6], i4_cost[7], i4_cost[8])); |
2018 | | |
2019 | 10.2M | } |
2020 | 127k | else |
2021 | 127k | { |
2022 | | /* Only first three modes valid */ |
2023 | 127k | i4_min_cost = MIN3(i4_cost[0], i4_cost[1], i4_cost[2]); |
2024 | 127k | } |
2025 | | |
2026 | 10.3M | *pu4_sadmin = i4_min_cost; |
2027 | | |
2028 | 10.3M | if (i4_min_cost == i4_cost[0]) |
2029 | 1.30M | { |
2030 | 1.30M | *u4_intra_mode = VERT_I4x4; |
2031 | 1.30M | pu1_pred_val = pu1_ngbr_pels + 5; |
2032 | 1.30M | memcpy(pu1_dst, (pu1_pred_val), 4); |
2033 | 1.30M | pu1_dst += dst_strd; |
2034 | 1.30M | memcpy(pu1_dst, (pu1_pred_val), 4); |
2035 | 1.30M | pu1_dst += dst_strd; |
2036 | 1.30M | memcpy(pu1_dst, (pu1_pred_val), 4); |
2037 | 1.30M | pu1_dst += dst_strd; |
2038 | 1.30M | memcpy(pu1_dst, (pu1_pred_val), 4); |
2039 | 1.30M | } |
2040 | 9.05M | else if (i4_min_cost == i4_cost[1]) |
2041 | 2.98M | { |
2042 | 2.98M | *u4_intra_mode = HORZ_I4x4; |
2043 | 2.98M | memset(pu1_dst, pu1_ngbr_pels[3], 4); |
2044 | 2.98M | pu1_dst += dst_strd; |
2045 | 2.98M | memset(pu1_dst, pu1_ngbr_pels[2], 4); |
2046 | 2.98M | pu1_dst += dst_strd; |
2047 | 2.98M | memset(pu1_dst, pu1_ngbr_pels[1], 4); |
2048 | 2.98M | pu1_dst += dst_strd; |
2049 | 2.98M | memset(pu1_dst, pu1_ngbr_pels[0], 4); |
2050 | 2.98M | } |
2051 | 6.07M | else if (i4_min_cost == i4_cost[2]) |
2052 | 5.99M | { |
2053 | 5.99M | *u4_intra_mode = DC_I4x4; |
2054 | 5.99M | memset(pu1_dst, u4_dcval, 4); |
2055 | 5.99M | pu1_dst += dst_strd; |
2056 | 5.99M | memset(pu1_dst, u4_dcval, 4); |
2057 | 5.99M | pu1_dst += dst_strd; |
2058 | 5.99M | memset(pu1_dst, u4_dcval, 4); |
2059 | 5.99M | pu1_dst += dst_strd; |
2060 | 5.99M | memset(pu1_dst, u4_dcval, 4); |
2061 | 5.99M | } |
2062 | 84.4k | else if (i4_min_cost == i4_cost[3]) |
2063 | 22.4k | { |
2064 | 22.4k | *u4_intra_mode = DIAG_DL_I4x4; |
2065 | 22.4k | pu1_pred_val = u1_pred_vals_diag_121 + 5; |
2066 | 22.4k | memcpy(pu1_dst, (pu1_pred_val), 4); |
2067 | 22.4k | pu1_dst += dst_strd; |
2068 | 22.4k | memcpy(pu1_dst, (pu1_pred_val + 1), 4); |
2069 | 22.4k | pu1_dst += dst_strd; |
2070 | 22.4k | memcpy(pu1_dst, (pu1_pred_val + 2), 4); |
2071 | 22.4k | pu1_dst += dst_strd; |
2072 | 22.4k | memcpy(pu1_dst, (pu1_pred_val + 3), 4); |
2073 | 22.4k | } |
2074 | 61.9k | else if (i4_min_cost == i4_cost[4]) |
2075 | 11.3k | { |
2076 | 11.3k | *u4_intra_mode = DIAG_DR_I4x4; |
2077 | 11.3k | pu1_pred_val = u1_pred_vals_diag_121 + 3; |
2078 | | |
2079 | 11.3k | memcpy(pu1_dst, (pu1_pred_val), 4); |
2080 | 11.3k | pu1_dst += dst_strd; |
2081 | 11.3k | memcpy(pu1_dst, (pu1_pred_val - 1), 4); |
2082 | 11.3k | pu1_dst += dst_strd; |
2083 | 11.3k | memcpy(pu1_dst, (pu1_pred_val - 2), 4); |
2084 | 11.3k | pu1_dst += dst_strd; |
2085 | 11.3k | memcpy(pu1_dst, (pu1_pred_val - 3), 4); |
2086 | 11.3k | } |
2087 | 50.6k | else if (i4_min_cost == i4_cost[5]) |
2088 | 12.8k | { |
2089 | 12.8k | *u4_intra_mode = VERT_R_I4x4; |
2090 | 12.8k | pu1_pred_val = u1_pred_vals_diag_11 + 4; |
2091 | 12.8k | memcpy(pu1_dst, (pu1_pred_val), 4); |
2092 | 12.8k | pu1_dst += dst_strd; |
2093 | 12.8k | pu1_pred_val = u1_pred_vals_diag_121 + 3; |
2094 | 12.8k | memcpy(pu1_dst, (pu1_pred_val), 4); |
2095 | 12.8k | pu1_dst += dst_strd; |
2096 | 12.8k | memcpy(pu1_dst, (u1_pred_vals_vert_r), 4); |
2097 | 12.8k | pu1_dst += dst_strd; |
2098 | 12.8k | memcpy(pu1_dst, (u1_pred_vals_vert_r + 4), 4); |
2099 | 12.8k | } |
2100 | 37.7k | else if (i4_min_cost == i4_cost[6]) |
2101 | 11.8k | { |
2102 | 11.8k | *u4_intra_mode = HORZ_D_I4x4; |
2103 | 11.8k | pu1_pred_val = u1_pred_vals_horz_d; |
2104 | 11.8k | memcpy(pu1_dst, (pu1_pred_val + 6), 4); |
2105 | 11.8k | pu1_dst += dst_strd; |
2106 | 11.8k | memcpy(pu1_dst, (pu1_pred_val + 4), 4); |
2107 | 11.8k | pu1_dst += dst_strd; |
2108 | 11.8k | memcpy(pu1_dst, (pu1_pred_val + 2), 4); |
2109 | 11.8k | pu1_dst += dst_strd; |
2110 | 11.8k | memcpy(pu1_dst, (pu1_pred_val), 4); |
2111 | 11.8k | pu1_dst += dst_strd; |
2112 | 11.8k | } |
2113 | 25.8k | else if (i4_min_cost == i4_cost[7]) |
2114 | 13.0k | { |
2115 | 13.0k | *u4_intra_mode = VERT_L_I4x4; |
2116 | 13.0k | pu1_pred_val = u1_pred_vals_diag_11 + 5; |
2117 | 13.0k | memcpy(pu1_dst, (pu1_pred_val), 4); |
2118 | 13.0k | pu1_dst += dst_strd; |
2119 | 13.0k | pu1_pred_val = u1_pred_vals_diag_121 + 5; |
2120 | 13.0k | memcpy(pu1_dst, (pu1_pred_val), 4); |
2121 | 13.0k | pu1_dst += dst_strd; |
2122 | 13.0k | pu1_pred_val = u1_pred_vals_diag_11 + 6; |
2123 | 13.0k | memcpy(pu1_dst, (pu1_pred_val), 4); |
2124 | 13.0k | pu1_dst += dst_strd; |
2125 | 13.0k | pu1_pred_val = u1_pred_vals_diag_121 + 6; |
2126 | 13.0k | memcpy(pu1_dst, (pu1_pred_val), 4); |
2127 | 13.0k | } |
2128 | 12.7k | else if (i4_min_cost == i4_cost[8]) |
2129 | 24.1k | { |
2130 | 24.1k | *u4_intra_mode = HORZ_U_I4x4; |
2131 | 24.1k | pu1_pred_val = u1_pred_vals_horz_u; |
2132 | 24.1k | memcpy(pu1_dst, (pu1_pred_val), 4); |
2133 | 24.1k | pu1_dst += dst_strd; |
2134 | 24.1k | memcpy(pu1_dst, (pu1_pred_val + 2), 4); |
2135 | 24.1k | pu1_dst += dst_strd; |
2136 | 24.1k | memcpy(pu1_dst, (pu1_pred_val + 4), 4); |
2137 | 24.1k | pu1_dst += dst_strd; |
2138 | 24.1k | memcpy(pu1_dst, (pu1_pred_val + 6), 4); |
2139 | 24.1k | pu1_dst += dst_strd; |
2140 | 24.1k | } |
2141 | | |
2142 | 10.3M | return; |
2143 | 10.3M | } |
2144 | | |
2145 | | /** |
2146 | | ****************************************************************************** |
2147 | | * |
2148 | | * @brief: |
2149 | | * Evaluate best intr chroma mode (among VERT, HORZ and DC ) and do the prediction. |
2150 | | * |
2151 | | * @par Description |
2152 | | * This function evaluates first three intra chroma modes and compute corresponding sad |
2153 | | * and return the buffer predicted with best mode. |
2154 | | * |
2155 | | * @param[in] pu1_src |
2156 | | * UWORD8 pointer to the source |
2157 | | * |
2158 | | * @param[in] pu1_ngbr_pels |
2159 | | * UWORD8 pointer to neighbouring pels |
2160 | | * |
2161 | | * @param[out] pu1_dst |
2162 | | * UWORD8 pointer to the destination |
2163 | | * |
2164 | | * @param[in] src_strd |
2165 | | * integer source stride |
2166 | | * |
2167 | | * @param[in] dst_strd |
2168 | | * integer destination stride |
2169 | | * |
2170 | | * @param[in] u4_n_avblty |
2171 | | * availability of neighbouring pixels |
2172 | | * |
2173 | | * @param[in] u4_intra_mode |
2174 | | * Pointer to the variable in which best mode is returned |
2175 | | * |
2176 | | * @param[in] pu4_sadmin |
2177 | | * Pointer to the variable in which minimum sad is returned |
2178 | | * |
2179 | | * @param[in] u4_valid_intra_modes |
2180 | | * Says what all modes are valid |
2181 | | * |
2182 | | * @return none |
2183 | | * |
2184 | | ****************************************************************************** |
2185 | | */ |
2186 | | void ih264e_evaluate_intra_chroma_modes(UWORD8 *pu1_src, |
2187 | | UWORD8 *pu1_ngbr_pels, |
2188 | | UWORD8 *pu1_dst, |
2189 | | UWORD32 src_strd, |
2190 | | UWORD32 dst_strd, |
2191 | | WORD32 u4_n_avblty, |
2192 | | UWORD32 *u4_intra_mode, |
2193 | | WORD32 *pu4_sadmin, |
2194 | | UWORD32 u4_valid_intra_modes) |
2195 | 3.71M | { |
2196 | 3.71M | UWORD8 *pu1_neighbour; |
2197 | 3.71M | UWORD8 *pu1_src_temp = pu1_src; |
2198 | 3.71M | UWORD8 left = 0, top = 0; |
2199 | 3.71M | WORD32 u4_dcval_u_l[2] = { 0, 0 }, /*sum left neighbours for 'U' ,two separate sets - sum of first four from top,and sum of four values from bottom */ |
2200 | 3.71M | u4_dcval_u_t[2] = { 0, 0 }; /*sum top neighbours for 'U'*/ |
2201 | | |
2202 | 3.71M | WORD32 u4_dcval_v_l[2] = { 0, 0 }, /*sum left neighbours for 'V'*/ |
2203 | 3.71M | u4_dcval_v_t[2] = { 0, 0 }; /*sum top neighbours for 'V'*/ |
2204 | | |
2205 | 3.71M | WORD32 i, j, row, col, i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX, |
2206 | 3.71M | i4_sad_dc = INT_MAX, i4_min_sad = INT_MAX; |
2207 | 3.71M | UWORD8 val_u, val_v; |
2208 | | |
2209 | 3.71M | WORD32 u4_dc_val[2][2][2];/* ----------- |
2210 | | | | | Chroma can have four |
2211 | | | 00 | 01 | separate dc value... |
2212 | | ----------- u4_dc_val corresponds to this dc values |
2213 | | | | | with u4_dc_val[2][2][U] and u4_dc_val[2][2][V] |
2214 | | | 10 | 11 | |
2215 | | ----------- */ |
2216 | 3.71M | left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK); |
2217 | 3.71M | top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2; |
2218 | | |
2219 | | /*Evaluating HORZ*/ |
2220 | 3.71M | if (left)/* Ifleft available*/ |
2221 | 3.60M | { |
2222 | 3.60M | i4_sad_horz = 0; |
2223 | | |
2224 | 32.1M | for (i = 0; i < 8; i++) |
2225 | 28.5M | { |
2226 | 28.5M | val_v = pu1_ngbr_pels[15 - 2 * i]; |
2227 | 28.5M | val_u = pu1_ngbr_pels[15 - 2 * i - 1]; |
2228 | 28.5M | row = i / 4; |
2229 | 28.5M | u4_dcval_u_l[row] += val_u; |
2230 | 28.5M | u4_dcval_v_l[row] += val_v; |
2231 | 256M | for (j = 0; j < 8; j++) |
2232 | 227M | { |
2233 | 227M | i4_sad_horz += ABS(val_u - pu1_src_temp[2 * j]);/* Finding SAD for HORZ mode*/ |
2234 | 227M | i4_sad_horz += ABS(val_v - pu1_src_temp[2 * j + 1]); |
2235 | 227M | } |
2236 | | |
2237 | 28.5M | pu1_src_temp += src_strd; |
2238 | 28.5M | } |
2239 | 3.60M | u4_dcval_u_l[0] += 2; |
2240 | 3.60M | u4_dcval_u_l[1] += 2; |
2241 | 3.60M | u4_dcval_v_l[0] += 2; |
2242 | 3.60M | u4_dcval_v_l[1] += 2; |
2243 | 3.60M | } |
2244 | | |
2245 | | /*Evaluating VERT**/ |
2246 | 3.71M | pu1_src_temp = pu1_src; |
2247 | 3.71M | if (top) /* top available*/ |
2248 | 3.53M | { |
2249 | 3.53M | i4_sad_vert = 0; |
2250 | | |
2251 | 31.5M | for (i = 0; i < 8; i++) |
2252 | 28.0M | { |
2253 | 28.0M | col = i / 4; |
2254 | | |
2255 | 28.0M | val_u = pu1_ngbr_pels[18 + i * 2]; |
2256 | 28.0M | val_v = pu1_ngbr_pels[18 + i * 2 + 1]; |
2257 | 28.0M | u4_dcval_u_t[col] += val_u; |
2258 | 28.0M | u4_dcval_v_t[col] += val_v; |
2259 | | |
2260 | 475M | for (j = 0; j < 16; j++) |
2261 | 447M | { |
2262 | 447M | i4_sad_vert += ABS(pu1_ngbr_pels[18 + j] - pu1_src_temp[j]);/* Finding SAD for VERT mode*/ |
2263 | 447M | } |
2264 | 28.0M | pu1_src_temp += src_strd; |
2265 | | |
2266 | 28.0M | } |
2267 | 3.53M | u4_dcval_u_t[0] += 2; |
2268 | 3.53M | u4_dcval_u_t[1] += 2; |
2269 | 3.53M | u4_dcval_v_t[0] += 2; |
2270 | 3.53M | u4_dcval_v_t[1] += 2; |
2271 | 3.53M | } |
2272 | | |
2273 | | /* computing DC value*/ |
2274 | | /* Equation 8-128 in spec*/ |
2275 | 3.71M | u4_dc_val[0][0][0] = (u4_dcval_u_l[0] + u4_dcval_u_t[0]) >> (1 + left + top); |
2276 | 3.71M | u4_dc_val[0][0][1] = (u4_dcval_v_l[0] + u4_dcval_v_t[0]) >> (1 + left + top); |
2277 | 3.71M | u4_dc_val[1][1][0] = (u4_dcval_u_l[1] + u4_dcval_u_t[1]) >> (1 + left + top); |
2278 | 3.71M | u4_dc_val[1][1][1] = (u4_dcval_v_l[1] + u4_dcval_v_t[1]) >> (1 + left + top); |
2279 | | |
2280 | 3.71M | if (top) |
2281 | 3.53M | { |
2282 | | /* Equation 8-132 in spec*/ |
2283 | 3.53M | u4_dc_val[0][1][0] = (u4_dcval_u_t[1]) >> (1 + top); |
2284 | 3.53M | u4_dc_val[0][1][1] = (u4_dcval_v_t[1]) >> (1 + top); |
2285 | 3.53M | } |
2286 | 176k | else |
2287 | 176k | { |
2288 | 176k | u4_dc_val[0][1][0] = (u4_dcval_u_l[0]) >> (1 + left); |
2289 | 176k | u4_dc_val[0][1][1] = (u4_dcval_v_l[0]) >> (1 + left); |
2290 | 176k | } |
2291 | | |
2292 | 3.71M | if (left) |
2293 | 3.60M | { |
2294 | 3.60M | u4_dc_val[1][0][0] = (u4_dcval_u_l[1]) >> (1 + left); |
2295 | 3.60M | u4_dc_val[1][0][1] = (u4_dcval_v_l[1]) >> (1 + left); |
2296 | 3.60M | } |
2297 | 108k | else |
2298 | 108k | { |
2299 | 108k | u4_dc_val[1][0][0] = (u4_dcval_u_t[0]) >> (1 + top); |
2300 | 108k | u4_dc_val[1][0][1] = (u4_dcval_v_t[0]) >> (1 + top); |
2301 | 108k | } |
2302 | | |
2303 | 3.71M | if (!(left || top)) |
2304 | 26.1k | { |
2305 | | /*none available*/ |
2306 | 26.1k | u4_dc_val[0][0][0] = u4_dc_val[0][0][1] = |
2307 | 26.1k | u4_dc_val[0][1][0] = u4_dc_val[0][1][1] = |
2308 | 26.1k | u4_dc_val[1][0][0] = u4_dc_val[1][0][1] = |
2309 | 26.1k | u4_dc_val[1][1][0] = u4_dc_val[1][1][1] = 128; |
2310 | 26.1k | } |
2311 | | |
2312 | | /* Evaluating DC */ |
2313 | 3.71M | pu1_src_temp = pu1_src; |
2314 | 3.71M | i4_sad_dc = 0; |
2315 | 33.1M | for (i = 0; i < 8; i++) |
2316 | 29.3M | { |
2317 | 264M | for (j = 0; j < 8; j++) |
2318 | 235M | { |
2319 | 235M | col = j / 4; |
2320 | 235M | row = i / 4; |
2321 | 235M | val_u = u4_dc_val[row][col][0]; |
2322 | 235M | val_v = u4_dc_val[row][col][1]; |
2323 | | |
2324 | 235M | i4_sad_dc += ABS(val_u - pu1_src_temp[2 * j]);/* Finding SAD for DC mode*/ |
2325 | 235M | i4_sad_dc += ABS(val_v - pu1_src_temp[2 * j + 1]); |
2326 | 235M | } |
2327 | 29.3M | pu1_src_temp += src_strd; |
2328 | 29.3M | } |
2329 | | |
2330 | 3.71M | if ((u4_valid_intra_modes & 01) == 0)/* If DC is disabled*/ |
2331 | 0 | i4_sad_dc = INT_MAX; |
2332 | 3.71M | if ((u4_valid_intra_modes & 02) == 0)/* If HORZ is disabled*/ |
2333 | 110k | i4_sad_horz = INT_MAX; |
2334 | 3.71M | if ((u4_valid_intra_modes & 04) == 0)/* If VERT is disabled*/ |
2335 | 178k | i4_sad_vert = INT_MAX; |
2336 | | |
2337 | 3.71M | i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert); |
2338 | | |
2339 | | /* Finding Minimum sad and doing corresponding prediction*/ |
2340 | 3.71M | if (i4_min_sad < *pu4_sadmin) |
2341 | 3.72M | { |
2342 | 3.72M | *pu4_sadmin = i4_min_sad; |
2343 | | |
2344 | 3.72M | if (i4_min_sad == i4_sad_dc) |
2345 | 3.52M | { |
2346 | 3.52M | *u4_intra_mode = DC_CH_I8x8; |
2347 | 31.4M | for (i = 0; i < 8; i++) |
2348 | 27.9M | { |
2349 | 250M | for (j = 0; j < 8; j++) |
2350 | 222M | { |
2351 | 222M | col = j / 4; |
2352 | 222M | row = i / 4; |
2353 | | |
2354 | 222M | pu1_dst[2 * j] = u4_dc_val[row][col][0]; |
2355 | 222M | pu1_dst[2 * j + 1] = u4_dc_val[row][col][1]; |
2356 | 222M | } |
2357 | 27.9M | pu1_dst += dst_strd; |
2358 | 27.9M | } |
2359 | 3.52M | } |
2360 | 199k | else if (i4_min_sad == i4_sad_horz) |
2361 | 59.6k | { |
2362 | 59.6k | *u4_intra_mode = HORZ_CH_I8x8; |
2363 | 536k | for (j = 0; j < 8; j++) |
2364 | 476k | { |
2365 | 476k | val_v = pu1_ngbr_pels[15 - 2 * j]; |
2366 | 476k | val_u = pu1_ngbr_pels[15 - 2 * j - 1]; |
2367 | | |
2368 | 4.29M | for (i = 0; i < 8; i++) |
2369 | 3.81M | { |
2370 | 3.81M | pu1_dst[2 * i] = val_u; |
2371 | 3.81M | pu1_dst[2 * i + 1] = val_v; |
2372 | | |
2373 | 3.81M | } |
2374 | 476k | pu1_dst += dst_strd; |
2375 | 476k | } |
2376 | 59.6k | } |
2377 | 139k | else |
2378 | 139k | { |
2379 | 139k | *u4_intra_mode = VERT_CH_I8x8; |
2380 | 139k | pu1_neighbour = pu1_ngbr_pels + 18; |
2381 | 1.25M | for (j = 0; j < 8; j++) |
2382 | 1.11M | { |
2383 | 1.11M | memcpy(pu1_dst, pu1_neighbour, MB_SIZE); |
2384 | 1.11M | pu1_dst += dst_strd; |
2385 | 1.11M | } |
2386 | 139k | } |
2387 | 3.72M | } |
2388 | | |
2389 | 3.71M | return; |
2390 | 3.71M | } |