Coverage Report

Created: 2026-03-07 07:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libavc/encoder/ih264e_intra_modes_eval.c
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2015 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
21
/**
22
*******************************************************************************
23
* @file
24
*  ih264e_intra_modes_eval.c
25
*
26
* @brief
27
*  This file contains definitions of routines that perform rate distortion
28
*  analysis on a macroblock if they are to be coded as intra.
29
*
30
* @author
31
*  ittiam
32
*
33
* @par List of Functions:
34
*  - ih264e_derive_neighbor_availability_of_mbs
35
*  - ih264e_derive_ngbr_avbl_of_mb_partitions
36
*  - ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff
37
*  - ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff
38
*  - ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff
39
*  - ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton
40
*  - ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff
41
*  - ih264e_evaluate_intra16x16_modes
42
*  - ih264e_evaluate_intra4x4_modes
43
*  - ih264e_evaluate_intra_chroma_modes
44
*
45
* @remarks
46
*  none
47
*
48
*******************************************************************************
49
*/
50
51
/*****************************************************************************/
52
/* File Includes                                                             */
53
/*****************************************************************************/
54
55
/* System Include Files */
56
#include <stdio.h>
57
#include <string.h>
58
#include <limits.h>
59
#include <assert.h>
60
61
/* User Include Files */
62
#include "ih264e_config.h"
63
#include "ih264_typedefs.h"
64
#include "iv2.h"
65
#include "ive2.h"
66
67
#include "ih264_debug.h"
68
#include "ih264_macros.h"
69
#include "ih264_defs.h"
70
#include "ih264_mem_fns.h"
71
#include "ih264_padding.h"
72
#include "ih264_structs.h"
73
#include "ih264_trans_quant_itrans_iquant.h"
74
#include "ih264_inter_pred_filters.h"
75
#include "ih264_intra_pred_filters.h"
76
#include "ih264_deblk_edge_filters.h"
77
#include "ih264_common_tables.h"
78
#include "ih264_cabac_tables.h"
79
80
#include "ime_defs.h"
81
#include "ime_distortion_metrics.h"
82
#include "ime_structs.h"
83
#include "ime_platform_macros.h"
84
85
#include "irc_cntrl_param.h"
86
#include "irc_frame_info_collector.h"
87
88
#include "ih264e_error.h"
89
#include "ih264e_defs.h"
90
#include "ih264e_globals.h"
91
#include "ih264e_rate_control.h"
92
#include "ih264e_bitstream.h"
93
#include "ih264e_cabac_structs.h"
94
#include "ih264e_structs.h"
95
#include "ih264e_intra_modes_eval.h"
96
97
98
/*****************************************************************************/
99
/* Function Definitions                                                      */
100
/*****************************************************************************/
101
102
/**
103
******************************************************************************
104
*
105
* @brief
106
*  derivation process for macroblock availability
107
*
108
* @par   Description
109
*  Calculates the availability of the left, top, topright and topleft macroblocks.
110
*
111
* @param[in] ps_proc_ctxt
112
*  pointer to proc context (handle)
113
*
114
* @remarks Based on section 6.4.5 in H264 spec
115
*
116
* @return  none
117
*
118
******************************************************************************
119
*/
120
void ih264e_derive_nghbr_avbl_of_mbs(process_ctxt_t *ps_proc)
121
6.00M
{
122
6.00M
    UWORD8 *pu1_slice_idx_curr = ps_proc->pu1_slice_idx;
123
6.00M
    UWORD8 *pu1_slice_idx_b;
124
6.00M
    UWORD8 *pu1_slice_idx_a;
125
6.00M
    UWORD8 *pu1_slice_idx_c;
126
6.00M
    UWORD8 *pu1_slice_idx_d;
127
6.00M
    block_neighbors_t *ps_ngbr_avbl;
128
6.00M
    WORD32 i4_mb_x, i4_mb_y;
129
6.00M
    WORD32 i4_wd_mbs;
130
131
6.00M
    i4_mb_x = ps_proc->i4_mb_x;
132
6.00M
    i4_mb_y = ps_proc->i4_mb_y;
133
134
6.00M
    i4_wd_mbs = ps_proc->i4_wd_mbs;
135
136
6.00M
    pu1_slice_idx_curr += (i4_mb_y * i4_wd_mbs) + i4_mb_x;
137
6.00M
    pu1_slice_idx_a = pu1_slice_idx_curr - 1;
138
6.00M
    pu1_slice_idx_b = pu1_slice_idx_curr - i4_wd_mbs;
139
6.00M
    pu1_slice_idx_c = pu1_slice_idx_b + 1;
140
6.00M
    pu1_slice_idx_d = pu1_slice_idx_b - 1;
141
6.00M
    ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
142
143
    /**********************************************************************/
144
    /* The macroblock is marked as available, unless one of the following */
145
    /* conditions is true in which case the macroblock shall be marked as */
146
    /* not available.                                                     */
147
    /* 1. mbAddr < 0                                                      */
148
    /* 2  mbAddr > CurrMbAddr                                             */
149
    /* 3. the macroblock with address mbAddr belongs to a different slice */
150
    /* than the macroblock with address CurrMbAddr                        */
151
    /**********************************************************************/
152
153
    /* left macroblock availability */
154
6.00M
    if (i4_mb_x == 0)
155
236k
    { /* macroblocks along first column */
156
236k
        ps_ngbr_avbl->u1_mb_a = 0;
157
236k
    }
158
5.77M
    else
159
5.77M
    { /* macroblocks belong to same slice? */
160
5.77M
        if (*pu1_slice_idx_a != *pu1_slice_idx_curr)
161
0
            ps_ngbr_avbl->u1_mb_a = 0;
162
5.77M
        else
163
5.77M
            ps_ngbr_avbl->u1_mb_a = 1;
164
5.77M
    }
165
166
    /* top macroblock availability */
167
6.00M
    if (i4_mb_y == 0)
168
261k
    { /* macroblocks along first row */
169
261k
        ps_ngbr_avbl->u1_mb_b = 0;
170
261k
    }
171
5.74M
    else
172
5.74M
    { /* macroblocks belong to same slice? */
173
5.74M
        if (*pu1_slice_idx_b != *pu1_slice_idx_curr)
174
0
            ps_ngbr_avbl->u1_mb_b = 0;
175
5.74M
        else
176
5.74M
            ps_ngbr_avbl->u1_mb_b = 1;
177
5.74M
    }
178
179
    /* top right macroblock availability */
180
6.00M
    if (i4_mb_x == i4_wd_mbs-1 || i4_mb_y == 0)
181
426k
    { /* macroblocks along last column */
182
426k
        ps_ngbr_avbl->u1_mb_c = 0;
183
426k
    }
184
5.58M
    else
185
5.58M
    { /* macroblocks belong to same slice? */
186
5.58M
        if (*pu1_slice_idx_c != *pu1_slice_idx_curr)
187
0
            ps_ngbr_avbl->u1_mb_c = 0;
188
5.58M
        else
189
5.58M
            ps_ngbr_avbl->u1_mb_c = 1;
190
5.58M
    }
191
192
    /* top left macroblock availability */
193
6.00M
    if (i4_mb_x == 0 || i4_mb_y == 0)
194
426k
    { /* macroblocks along first column */
195
426k
        ps_ngbr_avbl->u1_mb_d = 0;
196
426k
    }
197
5.58M
    else
198
5.58M
    { /* macroblocks belong to same slice? */
199
5.58M
        if (*pu1_slice_idx_d != *pu1_slice_idx_curr)
200
0
            ps_ngbr_avbl->u1_mb_d = 0;
201
5.58M
        else
202
5.58M
            ps_ngbr_avbl->u1_mb_d = 1;
203
5.58M
    }
204
6.00M
}
205
206
/**
207
******************************************************************************
208
*
209
* @brief
210
*  derivation process for subblock/partition availability
211
*
212
* @par   Description
213
*  Calculates the availability of the left, top, topright and topleft subblock
214
*  or partitions.
215
*
216
* @param[in]    ps_proc_ctxt
217
*  pointer to macroblock context (handle)
218
*
219
* @param[in]    i1_pel_pos_x
220
*  column position of the pel wrt the current block
221
*
222
* @param[in]    i1_pel_pos_y
223
*  row position of the pel in wrt current block
224
*
225
* @remarks     Assumptions: before calling this function it is assumed that
226
*   the neighbor availability of the current macroblock is already derived.
227
*   Based on table 6-3 of H264 specification
228
*
229
* @return      availability status (yes or no)
230
*
231
******************************************************************************
232
*/
233
UWORD8 ih264e_derive_ngbr_avbl_of_mb_partitions(block_neighbors_t *ps_ngbr_avbl,
234
                                                WORD8 i1_pel_pos_x,
235
                                                WORD8 i1_pel_pos_y)
236
0
{
237
0
    UWORD8 u1_neighbor_avail=0;
238
239
    /**********************************************************************/
240
    /* values of i1_pel_pos_x in the range 0-15 inclusive correspond to   */
241
    /* various columns of a macroblock                                    */
242
    /*                                                                    */
243
    /* values of i1_pel_pos_y in the range 0-15 inclusive correspond to   */
244
    /* various rows of a macroblock                                       */
245
    /*                                                                    */
246
    /* other values of i1_pel_pos_x & i1_pel_pos_y represents elements    */
247
    /* outside the bound of an mb ie., represents its neighbors.          */
248
    /**********************************************************************/
249
0
    if (i1_pel_pos_x < 0)
250
0
    { /* column(-1) */
251
0
        if (i1_pel_pos_y < 0)
252
0
        { /* row(-1) */
253
0
            u1_neighbor_avail = ps_ngbr_avbl->u1_mb_d; /* current mb topleft availability */
254
0
        }
255
0
        else if (i1_pel_pos_y >= 0 && i1_pel_pos_y < 16)
256
0
        { /* all rows of a macroblock */
257
0
            u1_neighbor_avail = ps_ngbr_avbl->u1_mb_a; /* current mb left availability */
258
0
        }
259
0
        else /* if (i1_pel_pos_y >= 16) */
260
0
        { /* rows(+16) */
261
0
            u1_neighbor_avail = 0;  /* current mb bottom left availability */
262
0
        }
263
0
    }
264
0
    else if (i1_pel_pos_x >= 0 && i1_pel_pos_x < 16)
265
0
    { /* all columns of a macroblock */
266
0
        if (i1_pel_pos_y < 0)
267
0
        { /* row(-1) */
268
0
            u1_neighbor_avail = ps_ngbr_avbl->u1_mb_b; /* current mb top availability */
269
0
        }
270
0
        else if (i1_pel_pos_y >= 0 && i1_pel_pos_y < 16)
271
0
        { /* all rows of a macroblock */
272
0
            u1_neighbor_avail = 1; /* current mb availability */
273
            /* availability of the partition is dependent on the position of the partition inside the mb */
274
            /* although the availability is declared as 1 in all cases these needs to be corrected somewhere else and this is not done in here */
275
0
        }
276
0
        else /* if (i1_pel_pos_y >= 16) */
277
0
        { /* rows(+16) */
278
0
            u1_neighbor_avail = 0;  /* current mb bottom availability */
279
0
        }
280
0
    }
281
0
    else if (i1_pel_pos_x >= 16)
282
0
    { /* column(+16) */
283
0
        if (i1_pel_pos_y < 0)
284
0
        { /* row(-1) */
285
0
            u1_neighbor_avail = ps_ngbr_avbl->u1_mb_c; /* current mb top right availability */
286
0
        }
287
0
        else /* if (i1_pel_pos_y >= 0) */
288
0
        { /* all other rows */
289
0
            u1_neighbor_avail = 0;  /* current mb right & bottom right availability */
290
0
        }
291
0
    }
292
293
0
    return u1_neighbor_avail;
294
0
}
295
296
/**
297
******************************************************************************
298
*
299
* @brief
300
*  evaluate best intra 16x16 mode (rate distortion opt off)
301
*
302
* @par Description
303
*  This function evaluates all the possible intra 16x16 modes and finds the mode
304
*  that best represents the macro-block (least distortion) and occupies fewer
305
*  bits in the bit-stream.
306
*
307
* @param[in]   ps_proc_ctxt
308
*  pointer to process context (handle)
309
*
310
* @remarks
311
*  Ideally the cost of encoding a macroblock is calculated as
312
*  (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
313
*  input block and the reconstructed block and rate is the number of bits taken
314
*  to place the macroblock in the bit-stream. In this routine the rate does not
315
*  exactly point to the total number of bits it takes, rather it points to header
316
*  bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
317
*  and residual bits fall in to texture bits the number of bits taken to encoding
318
*  mbtype is considered as rate, we compute cost. Further we will approximate
319
*  the distortion as the deviation b/w input and the predicted block as opposed
320
*  to input and reconstructed block.
321
*
322
*  NOTE: As per the Document JVT-O079, for intra 16x16 macroblock,
323
*  the SAD and cost are one and the same.
324
*
325
* @return     none
326
*
327
******************************************************************************
328
*/
329
void ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
330
4.00M
{
331
    /* Codec Context */
332
4.00M
    codec_t *ps_codec = ps_proc->ps_codec;
333
334
    /* SAD(distortion metric) of an 8x8 block */
335
4.00M
    WORD32 i4_mb_distortion = INT_MAX, i4_mb_distortion_least = INT_MAX;
336
337
    /* lambda */
338
4.00M
    UWORD32 u4_lambda = ps_proc->u4_lambda;
339
340
    /* cost = distortion + lambda*rate */
341
4.00M
    WORD32 i4_mb_cost= INT_MAX, i4_mb_cost_least = INT_MAX;
342
343
    /* intra mode */
344
4.00M
    UWORD32 u4_intra_mode, u4_best_intra_16x16_mode = DC_I16x16;
345
346
    /* neighbor pels for intra prediction */
347
4.00M
    UWORD8 *pu1_ngbr_pels_i16 = ps_proc->au1_ngbr_pels;
348
349
    /* neighbor availability */
350
4.00M
    WORD32 i4_ngbr_avbl;
351
352
    /* pointer to src macro block */
353
4.00M
    UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
354
4.00M
    UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma;
355
356
    /* pointer to prediction macro block */
357
4.00M
    UWORD8 *pu1_pred_mb_intra_16x16 = ps_proc->pu1_pred_mb_intra_16x16;
358
4.00M
    UWORD8 *pu1_pred_mb_intra_16x16_plane = ps_proc->pu1_pred_mb_intra_16x16_plane;
359
360
    /* strides */
361
4.00M
    WORD32 i4_src_strd = ps_proc->i4_src_strd;
362
4.00M
    WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
363
4.00M
    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
364
365
    /* pointer to neighbors left, top, topleft */
366
4.00M
    UWORD8 *pu1_mb_a = pu1_ref_mb - 1;
367
4.00M
    UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd;
368
4.00M
    UWORD8 *pu1_mb_d = pu1_mb_b - 1;
369
4.00M
    UWORD8 u1_mb_a, u1_mb_b, u1_mb_d;
370
371
    /* valid intra modes map */
372
4.00M
    UWORD32 u4_valid_intra_modes;
373
374
    /* lut for valid intra modes */
375
4.00M
    const UWORD8 u1_valid_intra_modes[8] = {4, 6, 4, 6, 5, 7, 5, 15};
376
377
    /* temp var */
378
4.00M
    UWORD32 i, u4_enable_fast_sad = 0, offset = 0;
379
4.00M
    mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
380
4.00M
    UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred;
381
382
    /* init temp var */
383
4.00M
    if (ps_proc->i4_slice_type != ISLICE)
384
113k
    {
385
        /* Offset for MBtype */
386
113k
        offset = (ps_proc->i4_slice_type == PSLICE) ? 5 : 23;
387
113k
        u4_enable_fast_sad = ps_proc->s_me_ctxt.u4_enable_fast_sad;
388
113k
    }
389
390
    /* locating neighbors that are available for prediction */
391
392
    /* gather prediction pels from the neighbors, if particular set is not available
393
     * it is set to zero*/
394
    /* left pels */
395
4.00M
    u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a)
396
3.90M
                    && (u4_constrained_intra_pred ? ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1));
397
4.00M
    if (u1_mb_a)
398
3.88M
    {
399
65.4M
        for(i = 0; i < 16; i++)
400
61.6M
            pu1_ngbr_pels_i16[16-1-i] = pu1_mb_a[i * i4_rec_strd];
401
3.88M
    }
402
125k
    else
403
125k
    {
404
125k
        ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_i16,0,MB_SIZE);
405
125k
    }
406
    /* top pels */
407
4.00M
    u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b)
408
3.88M
                    && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1));
409
4.00M
    if (u1_mb_b)
410
3.86M
    {
411
3.86M
        ps_codec->pf_mem_cpy_mul8(pu1_ngbr_pels_i16+16+1,pu1_mb_b,16);
412
3.86M
    }
413
142k
    else
414
142k
    {
415
142k
        ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_i16+16+1,0,MB_SIZE);
416
142k
    }
417
    /* topleft pels */
418
4.00M
    u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d)
419
3.81M
                    && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1));
420
4.00M
    if (u1_mb_d)
421
3.80M
    {
422
3.80M
        pu1_ngbr_pels_i16[16] = *pu1_mb_d;
423
3.80M
    }
424
201k
    else
425
201k
    {
426
201k
        pu1_ngbr_pels_i16[16] = 0;
427
201k
    }
428
429
4.00M
    i4_ngbr_avbl = (u1_mb_a) + (u1_mb_b << 2) + (u1_mb_d << 1);
430
4.00M
    ps_proc->i4_ngbr_avbl_16x16_mb = i4_ngbr_avbl;
431
432
    /* set valid intra modes for evaluation */
433
4.00M
    u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl];
434
435
4.00M
    if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST ||
436
3.72M
                    ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST)
437
362k
        u4_valid_intra_modes &= ~(1 << PLANE_I16x16);
438
439
    /* evaluate b/w HORZ_I16x16, VERT_I16x16 & DC_I16x16 */
440
4.00M
    ps_codec->pf_ih264e_evaluate_intra16x16_modes(pu1_curr_mb, pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16,
441
4.00M
                                                  i4_src_strd, i4_pred_strd,
442
4.00M
                                                  i4_ngbr_avbl, &u4_intra_mode, &i4_mb_distortion_least,
443
4.00M
                                                  u4_valid_intra_modes);
444
445
    /* cost = distortion + lambda*rate */
446
4.00M
    i4_mb_cost_least = i4_mb_distortion_least;
447
448
4.00M
    if (((u4_valid_intra_modes >> 3) & 1) != 0)
449
3.47M
    {
450
        /* intra prediction for PLANE mode*/
451
3.47M
        (ps_codec->apf_intra_pred_16_l)[PLANE_I16x16](pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16_plane, 0, i4_pred_strd, i4_ngbr_avbl);
452
453
        /* evaluate distortion between the actual blk and the estimated blk for the given mode */
454
3.47M
        ps_codec->apf_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_pred_mb_intra_16x16_plane, i4_src_strd, i4_pred_strd, i4_mb_cost_least, &i4_mb_distortion);
455
456
        /* cost = distortion + lambda*rate */
457
3.47M
        i4_mb_cost = i4_mb_distortion;
458
459
        /* update the least cost information if necessary */
460
3.47M
        if(i4_mb_cost < i4_mb_distortion_least)
461
6.35k
        {
462
6.35k
            u4_intra_mode = PLANE_I16x16;
463
464
6.35k
            i4_mb_cost_least = i4_mb_cost;
465
6.35k
            i4_mb_distortion_least = i4_mb_distortion;
466
6.35k
        }
467
3.47M
    }
468
469
4.00M
    u4_best_intra_16x16_mode = u4_intra_mode;
470
471
4.00M
    DEBUG("%d partition cost, %d intra mode\n", i4_mb_cost_least * 32, u4_best_intra_16x16_mode);
472
473
4.00M
    ps_proc->u1_l_i16_mode = u4_best_intra_16x16_mode;
474
475
    /* cost = distortion + lambda*rate */
476
4.00M
    i4_mb_cost_least    = i4_mb_distortion_least + u4_lambda*u1_uev_codelength[offset + u4_best_intra_16x16_mode];
477
478
479
    /* update the type of the mb if necessary */
480
4.00M
    if (i4_mb_cost_least < ps_proc->i4_mb_cost)
481
3.19M
    {
482
3.19M
        ps_proc->i4_mb_cost = i4_mb_cost_least;
483
3.19M
        ps_proc->i4_mb_distortion = i4_mb_distortion_least;
484
3.19M
        ps_proc->u4_mb_type = I16x16;
485
3.19M
    }
486
4.00M
    if (i4_mb_cost_least < ps_proc->i4_mb_intra_cost)
487
3.25M
    {
488
3.25M
        ps_proc->i4_mb_intra_cost = i4_mb_cost_least;
489
3.25M
    }
490
491
4.00M
    return ;
492
4.00M
}
493
494
495
/**
496
******************************************************************************
497
*
498
* @brief
499
*  evaluate best intra 8x8 mode (rate distortion opt on)
500
*
501
* @par Description
502
*  This function evaluates all the possible intra 8x8 modes and finds the mode
503
*  that best represents the macro-block (least distortion) and occupies fewer
504
*  bits in the bit-stream.
505
*
506
* @param[in]    ps_proc_ctxt
507
*  pointer to proc ctxt
508
*
509
* @remarks Ideally the cost of encoding a macroblock is calculated as
510
*  (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
511
*  input block and the reconstructed block and rate is the number of bits taken
512
*  to place the macroblock in the bit-stream. In this routine the rate does not
513
*  exactly point to the total number of bits it takes, rather it points to header
514
*  bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
515
*  and residual bits fall in to texture bits the number of bits taken to encoding
516
*  mbtype is considered as rate, we compute cost. Further we will approximate
517
*  the distortion as the deviation b/w input and the predicted block as opposed
518
*  to input and reconstructed block.
519
*
520
*  NOTE: TODO: This function needs to be tested
521
*
522
*  @return      none
523
*
524
******************************************************************************
525
*/
526
void ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
527
0
{
528
    /* Codec Context */
529
0
    codec_t *ps_codec = ps_proc->ps_codec;
530
531
    /* SAD(distortion metric) of an 4x4 block */
532
0
    WORD32 i4_partition_distortion, i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
533
534
    /* lambda */
535
0
    UWORD32 u4_lambda = ps_proc->u4_lambda;
536
537
    /* cost = distortion + lambda*rate */
538
0
    WORD32 i4_partition_cost, i4_partition_cost_least, i4_total_cost = u4_lambda;
539
540
    /* cost due to mbtype */
541
0
    UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
542
543
    /* intra mode */
544
0
    UWORD32 u4_intra_mode, u4_best_intra_8x8_mode = DC_I8x8, u4_estimated_intra_8x8_mode;
545
546
    /* neighbor pels for intra prediction */
547
0
    UWORD8 *pu1_ngbr_pels_i8 = ps_proc->au1_ngbr_pels;
548
549
    /* pointer to curr partition */
550
0
    UWORD8 *pu1_mb_curr;
551
552
    /* pointer to prediction macro block */
553
0
    UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
554
555
    /* strides */
556
0
    WORD32 i4_src_strd = ps_proc->i4_src_strd;
557
0
    WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
558
559
    /* neighbors left, top, top right, top left */
560
0
    UWORD8 *pu1_mb_a;
561
0
    UWORD8 *pu1_mb_b;
562
0
    UWORD8 *pu1_mb_d;
563
564
    /* neighbor availability */
565
0
    WORD32 i4_ngbr_avbl;
566
0
    block_neighbors_t s_ngbr_avbl;
567
568
    /* temp vars */
569
0
    UWORD32  b8, u4_pix_x, u4_pix_y;
570
0
    UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred;
571
0
    block_neighbors_t s_ngbr_avbl_MB;
572
573
    /* ngbr mb syntax information */
574
0
    UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4);
575
0
    mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
576
0
    mb_info_t *ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
577
578
    /* valid intra modes map */
579
0
    UWORD32 u4_valid_intra_modes;
580
581
0
    if (ps_proc->ps_ngbr_avbl->u1_mb_c)
582
0
    {
583
0
        ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + (ps_proc->i4_mb_x + 1);
584
0
    }
585
    /* left pels */
586
0
    s_ngbr_avbl_MB.u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a)
587
0
                                  && (u4_constrained_intra_pred ? ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1));
588
589
    /* top pels */
590
0
    s_ngbr_avbl_MB.u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b)
591
0
                                  && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1));
592
593
    /* topleft pels */
594
0
    s_ngbr_avbl_MB.u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d)
595
0
                                  && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1));
596
597
    /* top right */
598
0
    s_ngbr_avbl_MB.u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c)
599
0
                                  && (u4_constrained_intra_pred ? ps_top_right_mb_syn_ele->u2_is_intra : 1));
600
601
602
0
    for (b8 = 0; b8 < 4; b8++)
603
0
    {
604
0
        u4_pix_x = (b8 & 0x01) << 3;
605
0
        u4_pix_y = (b8 >> 1) << 3;
606
607
0
        pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd);
608
        /* when rdopt is off, we use the input as reference for constructing prediction buffer */
609
        /* as opposed to using the recon pels. (open loop intra prediction) */
610
0
        pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */
611
0
        pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */
612
0
        pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */
613
614
        /* locating neighbors that are available for prediction */
615
        /* TODO : update the neighbor availability information basing on constrained intra pred information */
616
        /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */
617
        /* basing on neighbors available and hence evade the computation of neighbor availability totally. */
618
0
        s_ngbr_avbl.u1_mb_a = ih264e_derive_ngbr_avbl_of_mb_partitions(&s_ngbr_avbl_MB, u4_pix_x - 1, u4_pix_y); /* xD = -1, yD = 0 */
619
0
        s_ngbr_avbl.u1_mb_b = ih264e_derive_ngbr_avbl_of_mb_partitions(&s_ngbr_avbl_MB, u4_pix_x, u4_pix_y - 1); /* xD = 0, yD = -1 */
620
0
        s_ngbr_avbl.u1_mb_c = ih264e_derive_ngbr_avbl_of_mb_partitions(&s_ngbr_avbl_MB, u4_pix_x + 8, u4_pix_y - 1); /* xD = BLK_8x8_SIZE, yD = -1 */
621
0
        s_ngbr_avbl.u1_mb_d = ih264e_derive_ngbr_avbl_of_mb_partitions(&s_ngbr_avbl_MB, u4_pix_x - 1, u4_pix_y - 1); /* xD = -1, yD = -1 */
622
623
        /* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b * TOP_MB_AVAILABLE_MASK + blk_c * TOP_RIGHT_MB_AVAILABLE_MASK + blk_d * TOP_LEFT_MB_AVAILABLE_MASK */
624
0
        i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) +  (s_ngbr_avbl.u1_mb_c << 3) +
625
0
                        (s_ngbr_avbl.u1_mb_a << 4);
626
        /* if top partition is available and top right is not available for intra prediction, then */
627
        /* padd top right samples using top sample and make top right also available */
628
        /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) +  ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */
629
0
        ps_proc->ai4_neighbor_avail_8x8_subblks[b8] = i4_ngbr_avbl;
630
631
632
0
        ih264_intra_pred_luma_8x8_mode_ref_filtering(pu1_mb_a, pu1_mb_b, pu1_mb_d, pu1_ngbr_pels_i8,
633
0
                                                     i4_src_strd, i4_ngbr_avbl);
634
635
0
        i4_partition_cost_least = INT_MAX;
636
        /* set valid intra modes for evaluation */
637
0
        u4_valid_intra_modes = 0x1ff;
638
639
0
        if (!s_ngbr_avbl.u1_mb_b)
640
0
        {
641
0
            u4_valid_intra_modes &= ~(1 << VERT_I4x4);
642
0
            u4_valid_intra_modes &= ~(1 << DIAG_DL_I4x4);
643
0
            u4_valid_intra_modes &= ~(1 << VERT_L_I4x4);
644
0
        }
645
0
        if (!s_ngbr_avbl.u1_mb_a)
646
0
        {
647
0
            u4_valid_intra_modes &= ~(1 << HORZ_I4x4);
648
0
            u4_valid_intra_modes &= ~(1 << HORZ_U_I4x4);
649
0
        }
650
0
        if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b || !s_ngbr_avbl.u1_mb_d)
651
0
        {
652
0
            u4_valid_intra_modes &= ~(1 << DIAG_DR_I4x4);
653
0
            u4_valid_intra_modes &= ~(1 << VERT_R_I4x4);
654
0
            u4_valid_intra_modes &= ~(1 << HORZ_D_I4x4);
655
0
        }
656
657
        /* estimate the intra 8x8 mode for the current partition (for evaluating cost) */
658
0
        if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
659
0
        {
660
0
            u4_estimated_intra_8x8_mode = DC_I8x8;
661
0
        }
662
0
        else
663
0
        {
664
0
            UWORD32 u4_left_intra_8x8_mode = DC_I8x8;
665
0
            UWORD32 u4_top_intra_8x8_mode = DC_I8x8;
666
667
0
            if (u4_pix_x == 0)
668
0
            {
669
0
                if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8)
670
0
                {
671
0
                    u4_left_intra_8x8_mode = ps_proc->au1_left_mb_intra_modes[b8+1];
672
0
                }
673
0
                else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4)
674
0
                {
675
0
                    u4_left_intra_8x8_mode = ps_proc->au1_left_mb_intra_modes[(b8+1)*4+2];
676
0
                }
677
0
            }
678
0
            else
679
0
            {
680
0
                u4_left_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8-1];
681
0
            }
682
683
0
            if (u4_pix_y == 0)
684
0
            {
685
0
                if (ps_top_mb_syn_ele->u2_mb_type == I8x8)
686
0
                {
687
0
                    u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[b8+2];
688
0
                }
689
0
                else if (ps_top_mb_syn_ele->u2_mb_type == I4x4)
690
0
                {
691
0
                    u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[(b8+2)*4+2];
692
0
                }
693
0
            }
694
0
            else
695
0
            {
696
0
                u4_top_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8-2];
697
0
            }
698
699
0
            u4_estimated_intra_8x8_mode = MIN(u4_left_intra_8x8_mode, u4_top_intra_8x8_mode);
700
0
        }
701
702
        /* perform intra mode 8x8 evaluation */
703
0
        for (u4_intra_mode = VERT_I8x8; u4_valid_intra_modes != 0; u4_intra_mode++, u4_valid_intra_modes >>= 1)
704
0
        {
705
0
            if ( (u4_valid_intra_modes & 1) == 0)
706
0
                continue;
707
708
            /* intra prediction */
709
0
            (ps_codec->apf_intra_pred_8_l)[u4_intra_mode](pu1_ngbr_pels_i8, pu1_pred_mb, 0, i4_pred_strd, i4_ngbr_avbl);
710
711
            /* evaluate distortion between the actual blk and the estimated blk for the given mode */
712
0
            ime_compute_sad_8x8(pu1_mb_curr, pu1_pred_mb, i4_src_strd, i4_pred_strd, i4_partition_cost_least, &i4_partition_distortion);
713
714
0
            i4_partition_cost = i4_partition_distortion + ((u4_estimated_intra_8x8_mode == u4_intra_mode)?u4_cost_one_bit:u4_cost_four_bits);
715
716
            /* update the least cost information if necessary */
717
0
            if (i4_partition_cost < i4_partition_cost_least)
718
0
            {
719
0
                i4_partition_cost_least = i4_partition_cost;
720
0
                i4_partition_distortion_least = i4_partition_distortion;
721
0
                u4_best_intra_8x8_mode = u4_intra_mode;
722
0
            }
723
0
        }
724
        /* macroblock distortion */
725
0
        i4_total_cost += i4_partition_cost_least;
726
0
        i4_total_distortion += i4_partition_distortion_least;
727
        /* mb partition mode */
728
0
        ps_proc->au1_intra_luma_mb_8x8_modes[b8] = u4_best_intra_8x8_mode;
729
730
0
    }
731
732
    /* update the type of the mb if necessary */
733
0
    if (i4_total_cost < ps_proc->i4_mb_cost)
734
0
    {
735
0
        ps_proc->i4_mb_cost = i4_total_cost;
736
0
        ps_proc->i4_mb_distortion = i4_total_distortion;
737
0
        ps_proc->u4_mb_type = I8x8;
738
0
    }
739
0
    if (i4_total_cost < ps_proc->i4_mb_intra_cost)
740
0
    {
741
0
        ps_proc->i4_mb_intra_cost = i4_total_cost;
742
0
    }
743
744
0
    return ;
745
0
}
746
747
748
/**
749
******************************************************************************
750
*
751
* @brief
752
*  evaluate best intra 4x4 mode (rate distortion opt off)
753
*
754
* @par Description
755
*  This function evaluates all the possible intra 4x4 modes and finds the mode
756
*  that best represents the macro-block (least distortion) and occupies fewer
757
*  bits in the bit-stream.
758
*
759
* @param[in]    ps_proc_ctxt
760
*  pointer to proc ctxt
761
*
762
* @remarks
763
*  Ideally the cost of encoding a macroblock is calculated as
764
*  (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
765
*  input block and the reconstructed block and rate is the number of bits taken
766
*  to place the macroblock in the bit-stream. In this routine the rate does not
767
*  exactly point to the total number of bits it takes, rather it points to header
768
*  bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
769
*  and residual bits fall in to texture bits the number of bits taken to encoding
770
*  mbtype is considered as rate, we compute cost. Further we will approximate
771
*  the distortion as the deviation b/w input and the predicted block as opposed
772
*  to input and reconstructed block.
773
*
774
*  NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
775
*  24*lambda is added to the SAD before comparison with the best SAD for
776
*  inter prediction. This is an empirical value to prevent using too many intra
777
*  blocks.
778
*
779
* @return      none
780
*
781
******************************************************************************
782
*/
783
void ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
784
2.86M
{
785
    /* Codec Context */
786
2.86M
    codec_t *ps_codec = ps_proc->ps_codec;
787
788
    /* SAD(distortion metric) of an 4x4 block */
789
2.86M
    WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
790
791
    /* lambda */
792
2.86M
    UWORD32 u4_lambda = ps_proc->u4_lambda;
793
794
    /* cost = distortion + lambda*rate */
795
2.86M
    WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda;
796
797
    /* cost due to mbtype */
798
2.86M
    UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
799
800
    /* intra mode */
801
2.86M
    UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode;
802
803
    /* neighbor pels for intra prediction */
804
2.86M
    UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
805
806
    /* pointer to curr partition */
807
2.86M
    UWORD8 *pu1_mb_curr;
808
809
    /* pointer to prediction macro block */
810
2.86M
    UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
811
812
    /* strides */
813
2.86M
    WORD32 i4_src_strd = ps_proc->i4_src_strd;
814
2.86M
    WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
815
816
    /* neighbors left, top, top right, top left */
817
2.86M
    UWORD8 *pu1_mb_a;
818
2.86M
    UWORD8 *pu1_mb_b;
819
2.86M
    UWORD8 *pu1_mb_c;
820
2.86M
    UWORD8 *pu1_mb_d;
821
822
    /* neighbor availability */
823
2.86M
    WORD32 i4_ngbr_avbl;
824
2.86M
    block_neighbors_t s_ngbr_avbl;
825
826
    /* temp vars */
827
2.86M
    UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y;
828
829
    /* scan order inside 4x4 block */
830
2.86M
    const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
831
832
    /* ngbr sub mb modes */
833
2.86M
    UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4);
834
2.86M
    mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
835
2.86M
    mb_info_t *ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
836
837
    /* valid intra modes map */
838
2.86M
    UWORD32 u4_valid_intra_modes;
839
2.86M
    UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511};
840
841
2.86M
    UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred;
842
2.86M
    UWORD8 u1_mb_a, u1_mb_b, u1_mb_c, u1_mb_d;
843
844
2.86M
    if (ps_proc->ps_ngbr_avbl->u1_mb_c)
845
2.75M
    {
846
2.75M
        ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x + 1;
847
2.75M
    }
848
    /* left pels */
849
2.86M
    u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a)
850
2.79M
                    && (u4_constrained_intra_pred ? ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1));
851
852
    /* top pels */
853
2.86M
    u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b)
854
2.80M
                    && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1));
855
856
    /* topleft pels */
857
2.86M
    u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d)
858
2.75M
                    && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1));
859
860
    /* top right */
861
2.86M
    u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c)
862
2.75M
                    && (u4_constrained_intra_pred ? ps_top_right_mb_syn_ele->u2_is_intra : 1));
863
864
2.86M
    i4_ngbr_avbl = (u1_mb_a) + (u1_mb_d << 1) + (u1_mb_b << 2) + (u1_mb_c << 3);
865
2.86M
    memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16);
866
867
14.2M
    for (b8 = 0; b8 < 4; b8++)
868
11.4M
    {
869
11.4M
        u4_blk_x = (b8 & 0x01) << 3;
870
11.4M
        u4_blk_y = (b8 >> 1) << 3;
871
56.0M
        for (b4 = 0; b4 < 4; b4++)
872
44.6M
        {
873
44.6M
            u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2);
874
44.6M
            u4_pix_y = u4_blk_y + ((b4 >> 1) << 2);
875
876
44.6M
            pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd);
877
            /* when rdopt is off, we use the input as reference for constructing prediction buffer */
878
            /* as opposed to using the recon pels. (open loop intra prediction) */
879
44.6M
            pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */
880
44.6M
            pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */
881
44.6M
            pu1_mb_c = pu1_mb_b + 4; /* pointer to top macro block */
882
44.6M
            pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */
883
884
            /* locating neighbors that are available for prediction */
885
            /* TODO : update the neighbor availability information basing on constrained intra pred information */
886
            /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */
887
            /* basing on neighbors available and hence evade the computation of neighbor availability totally. */
888
889
44.6M
            i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
890
44.6M
            s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1);
891
44.6M
            s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1;
892
44.6M
            s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2;
893
44.6M
            s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3;
894
            /* set valid intra modes for evaluation */
895
44.6M
            u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7];
896
897
            /* if top partition is available and top right is not available for intra prediction, then */
898
            /* padd top right samples using top sample and make top right also available */
899
            /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */
900
901
            /* gather prediction pels from the neighbors */
902
44.6M
            if (s_ngbr_avbl.u1_mb_a)
903
44.4M
            {
904
219M
                for(i = 0; i < 4; i++)
905
175M
                    pu1_ngbr_pels_i4[4 - 1 -i] = pu1_mb_a[i * i4_src_strd];
906
44.4M
            }
907
177k
            else
908
177k
            {
909
177k
                memset(pu1_ngbr_pels_i4, 0, 4);
910
177k
            }
911
912
44.6M
            if (s_ngbr_avbl.u1_mb_b)
913
44.1M
            {
914
44.1M
                memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
915
44.1M
            }
916
549k
            else
917
549k
            {
918
549k
                memset(pu1_ngbr_pels_i4 + 5, 0, 4);
919
549k
            }
920
921
44.6M
            if (s_ngbr_avbl.u1_mb_d)
922
43.7M
                pu1_ngbr_pels_i4[4] = *pu1_mb_d;
923
906k
            else
924
906k
                pu1_ngbr_pels_i4[4] = 0;
925
926
44.6M
            if (s_ngbr_avbl.u1_mb_c)
927
30.3M
            {
928
30.3M
                memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4);
929
30.3M
            }
930
14.2M
            else if (s_ngbr_avbl.u1_mb_b)
931
14.2M
            {
932
14.2M
                memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4);
933
14.2M
                s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b;
934
14.2M
            }
935
936
44.6M
            i4_partition_cost_least = INT_MAX;
937
938
            /* predict the intra 4x4 mode for the current partition (for evaluating cost) */
939
44.6M
            if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
940
509k
            {
941
509k
                u4_estimated_intra_4x4_mode = DC_I4x4;
942
509k
            }
943
44.1M
            else
944
44.1M
            {
945
44.1M
                UWORD32 u4_left_intra_4x4_mode = DC_I4x4;
946
44.1M
                UWORD32 u4_top_intra_4x4_mode = DC_I4x4;
947
948
44.1M
                if (u4_pix_x == 0)
949
11.0M
                {
950
11.0M
                    if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4)
951
7.78M
                    {
952
7.78M
                        u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[u1_scan_order[3 + u4_pix_y]];
953
7.78M
                    }
954
3.21M
                    else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8)
955
0
                    {
956
0
                        u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[b8 + 1];
957
0
                    }
958
11.0M
                }
959
33.1M
                else
960
33.1M
                {
961
33.1M
                    u4_left_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 1]];
962
33.1M
                }
963
964
44.1M
                if (u4_pix_y == 0)
965
10.9M
                {
966
10.9M
                    if (ps_top_mb_syn_ele->u2_mb_type == I4x4)
967
7.76M
                    {
968
7.76M
                        u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[u1_scan_order[12 + (u4_pix_x >> 2)]];
969
7.76M
                    }
970
3.20M
                    else if (ps_top_mb_syn_ele->u2_mb_type == I8x8)
971
0
                    {
972
0
                        u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2];
973
0
                    }
974
10.9M
                }
975
33.1M
                else
976
33.1M
                {
977
33.1M
                    u4_top_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 4]];
978
33.1M
                }
979
980
44.1M
                u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode);
981
44.1M
            }
982
983
44.6M
            ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_estimated_intra_4x4_mode;
984
985
            /* mode evaluation and prediction */
986
44.6M
            ps_codec->pf_ih264e_evaluate_intra_4x4_modes(pu1_mb_curr,
987
44.6M
                                                         pu1_ngbr_pels_i4,
988
44.6M
                                                         pu1_pred_mb, i4_src_strd,
989
44.6M
                                                         i4_pred_strd, i4_ngbr_avbl,
990
44.6M
                                                         &u4_best_intra_4x4_mode,
991
44.6M
                                                         &i4_partition_cost_least,
992
44.6M
                                                         u4_valid_intra_modes,
993
44.6M
                                                         u4_lambda,
994
44.6M
                                                         u4_estimated_intra_4x4_mode);
995
996
997
44.6M
            i4_partition_distortion_least = i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode) ? u4_cost_one_bit : u4_cost_four_bits);
998
999
44.6M
            DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least, u4_best_intra_4x4_mode);
1000
            /* macroblock distortion */
1001
44.6M
            i4_total_distortion += i4_partition_distortion_least;
1002
44.6M
            i4_total_cost += i4_partition_cost_least;
1003
            /* mb partition mode */
1004
44.6M
            ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode;
1005
44.6M
        }
1006
11.4M
    }
1007
1008
    /* update the type of the mb if necessary */
1009
2.86M
    if (i4_total_cost < ps_proc->i4_mb_cost)
1010
2.87M
    {
1011
2.87M
        ps_proc->i4_mb_cost = i4_total_cost;
1012
2.87M
        ps_proc->i4_mb_distortion = i4_total_distortion;
1013
2.87M
        ps_proc->u4_mb_type = I4x4;
1014
2.87M
    }
1015
2.86M
    if (i4_total_cost < ps_proc->i4_mb_intra_cost)
1016
2.86M
    {
1017
2.86M
        ps_proc->i4_mb_intra_cost = i4_total_cost;
1018
2.86M
    }
1019
1020
2.86M
    return ;
1021
2.86M
}
1022
1023
/**
1024
******************************************************************************
1025
*
1026
* @brief evaluate best intra 4x4 mode (rate distortion opt on)
1027
*
1028
* @par Description
1029
*  This function evaluates all the possible intra 4x4 modes and finds the mode
1030
*  that best represents the macro-block (least distortion) and occupies fewer
1031
*  bits in the bit-stream.
1032
*
1033
* @param[in]    ps_proc_ctxt
1034
*  pointer to proc ctxt
1035
*
1036
* @remarks
1037
*  Ideally the cost of encoding a macroblock is calculated as
1038
*  (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
1039
*  input block and the reconstructed block and rate is the number of bits taken
1040
*  to place the macroblock in the bit-stream. In this routine the rate does not
1041
*  exactly point to the total number of bits it takes, rather it points to header
1042
*  bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
1043
*  and residual bits fall in to texture bits the number of bits taken to encoding
1044
*  mbtype is considered as rate, we compute cost. Further we will approximate
1045
*  the distortion as the deviation b/w input and the predicted block as opposed
1046
*  to input and reconstructed block.
1047
*
1048
*  NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
1049
*  24*lambda is added to the SAD before comparison with the best SAD for
1050
*  inter prediction. This is an empirical value to prevent using too many intra
1051
*  blocks.
1052
*
1053
* @return      none
1054
*
1055
******************************************************************************
1056
*/
1057
void ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(process_ctxt_t *ps_proc)
1058
1.69M
{
1059
    /* Codec Context */
1060
1.69M
    codec_t *ps_codec = ps_proc->ps_codec;
1061
1062
    /* SAD(distortion metric) of an 4x4 block */
1063
1.69M
    WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
1064
1065
    /* lambda */
1066
1.69M
    UWORD32 u4_lambda = ps_proc->u4_lambda;
1067
1068
    /* cost = distortion + lambda*rate */
1069
1.69M
    WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda;
1070
1071
    /* cost due to mbtype */
1072
1.69M
    UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
1073
1074
    /* intra mode */
1075
1.69M
    UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode;
1076
1077
    /* neighbor pels for intra prediction */
1078
1.69M
    UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
1079
1080
    /* pointer to curr partition */
1081
1.69M
    UWORD8 *pu1_mb_curr;
1082
1.69M
    UWORD8 *pu1_mb_ref_left, *pu1_mb_ref_top;
1083
1.69M
    UWORD8 *pu1_ref_mb_intra_4x4;
1084
1085
    /* pointer to residual macro block */
1086
1.69M
    WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4;
1087
1088
    /* pointer to prediction macro block */
1089
1.69M
    UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
1090
1091
    /* strides */
1092
1.69M
    WORD32 i4_src_strd = ps_proc->i4_src_strd;
1093
1.69M
    WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
1094
1.69M
    WORD32 i4_ref_strd_left, i4_ref_strd_top;
1095
1096
    /* neighbors left, top, top right, top left */
1097
1.69M
    UWORD8 *pu1_mb_a;
1098
1.69M
    UWORD8 *pu1_mb_b;
1099
1.69M
    UWORD8 *pu1_mb_c;
1100
1.69M
    UWORD8 *pu1_mb_d;
1101
1102
    /* number of non zero coeffs*/
1103
1.69M
    UWORD8 *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz_intra_4x4;
1104
1105
    /* quantization parameters */
1106
1.69M
    quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1107
1108
    /* neighbor availability */
1109
1.69M
    WORD32 i4_ngbr_avbl;
1110
1.69M
    block_neighbors_t s_ngbr_avbl;
1111
1112
    /* temp vars */
1113
1.69M
    UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y;
1114
1115
    /* scan order inside 4x4 block */
1116
1.69M
    const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
1117
1118
    /* ngbr sub mb modes */
1119
1.69M
    UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4);
1120
1.69M
    mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
1121
1.69M
    mb_info_t *ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
1122
1123
    /* valid intra modes map */
1124
1.69M
    UWORD32 u4_valid_intra_modes;
1125
1.69M
    UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511};
1126
1127
    /* Dummy variable for 4x4 trans function */
1128
1.69M
    WORD16 i2_dc_dummy;
1129
1.69M
    UWORD8 u1_mb_a, u1_mb_b, u1_mb_c, u1_mb_d;
1130
1.69M
    UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred;
1131
1132
    /* compute ngbr availability for sub blks */
1133
1.69M
    if (ps_proc->ps_ngbr_avbl->u1_mb_c)
1134
1.62M
    {
1135
1.62M
        ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + (ps_proc->i4_mb_x + 1);
1136
1.62M
    }
1137
1138
    /* left pels */
1139
1.69M
    u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a)
1140
1.66M
                    && (u4_constrained_intra_pred ? ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1));
1141
1142
       /* top pels */
1143
1.69M
    u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b)
1144
1.65M
                    && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1));
1145
1146
       /* topleft pels */
1147
1.69M
    u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d)
1148
1.62M
                    && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1));
1149
1150
       /* top right pels */
1151
1.69M
    u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c)
1152
1.62M
                    && (u4_constrained_intra_pred ? ps_top_right_mb_syn_ele->u2_is_intra : 1));
1153
1154
1.69M
    i4_ngbr_avbl = (u1_mb_a) + (u1_mb_d << 1) + (u1_mb_b << 2) + (u1_mb_c << 3);
1155
1.69M
    memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16);
1156
1157
8.48M
    for (b8 = 0; b8 < 4; b8++)
1158
6.78M
    {
1159
6.78M
        u4_blk_x = (b8 & 0x01) << 3;
1160
6.78M
        u4_blk_y = (b8 >> 1) << 3;
1161
33.5M
        for (b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_res_mb += MB_SIZE)
1162
26.8M
        {
1163
26.8M
            u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2);
1164
26.8M
            u4_pix_y = u4_blk_y + ((b4 >> 1) << 2);
1165
1166
26.8M
            pu1_ref_mb_intra_4x4 = ps_proc->pu1_ref_mb_intra_4x4 + u4_pix_x + (u4_pix_y * i4_pred_strd);
1167
26.8M
            pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd);
1168
26.8M
            if (u4_pix_x == 0)
1169
6.78M
            {
1170
6.78M
                i4_ref_strd_left = ps_proc->i4_rec_strd;
1171
6.78M
                pu1_mb_ref_left = ps_proc->pu1_rec_buf_luma + u4_pix_x + (u4_pix_y * i4_ref_strd_left);
1172
6.78M
            }
1173
20.0M
            else
1174
20.0M
            {
1175
20.0M
                i4_ref_strd_left = i4_pred_strd;
1176
20.0M
                pu1_mb_ref_left = pu1_ref_mb_intra_4x4;
1177
20.0M
            }
1178
26.8M
            if (u4_pix_y == 0)
1179
6.77M
            {
1180
6.77M
                i4_ref_strd_top = ps_proc->i4_rec_strd;
1181
6.77M
                pu1_mb_ref_top = ps_proc->pu1_rec_buf_luma + u4_pix_x + (u4_pix_y * i4_ref_strd_top);
1182
6.77M
            }
1183
20.0M
            else
1184
20.0M
            {
1185
20.0M
                i4_ref_strd_top = i4_pred_strd;
1186
20.0M
                pu1_mb_ref_top = pu1_ref_mb_intra_4x4;
1187
20.0M
            }
1188
1189
26.8M
            pu1_mb_a = pu1_mb_ref_left - 1; /* pointer to left macro block */
1190
26.8M
            pu1_mb_b = pu1_mb_ref_top - i4_ref_strd_top; /* pointer to top macro block */
1191
26.8M
            pu1_mb_c = pu1_mb_b + 4; /* pointer to top right macro block */
1192
26.8M
            if (u4_pix_y == 0)
1193
6.77M
                pu1_mb_d = pu1_mb_b - 1;
1194
20.0M
            else
1195
20.0M
                pu1_mb_d = pu1_mb_a - i4_ref_strd_left; /* pointer to top left macro block */
1196
1197
            /* locating neighbors that are available for prediction */
1198
            /* TODO : update the neighbor availability information basing on constrained intra pred information */
1199
            /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */
1200
            /* basing on neighbors available and hence evade the computation of neighbor availability totally. */
1201
1202
26.8M
            i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
1203
26.8M
            s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1);
1204
26.8M
            s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1;
1205
26.8M
            s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2;
1206
26.8M
            s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3;
1207
            /* set valid intra modes for evaluation */
1208
26.8M
            u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7];
1209
1210
            /* if top partition is available and top right is not available for intra prediction, then */
1211
            /* padd top right samples using top sample and make top right also available */
1212
            /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */
1213
1214
            /* gather prediction pels from the neighbors */
1215
26.8M
            if (s_ngbr_avbl.u1_mb_a)
1216
26.6M
            {
1217
132M
                for(i = 0; i < 4; i++)
1218
106M
                    pu1_ngbr_pels_i4[4 - 1 -i] = pu1_mb_a[i * i4_ref_strd_left];
1219
26.6M
            }
1220
113k
            else
1221
113k
            {
1222
113k
                memset(pu1_ngbr_pels_i4,0,4);
1223
113k
            }
1224
26.8M
            if(s_ngbr_avbl.u1_mb_b)
1225
26.5M
            {
1226
26.5M
                memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
1227
26.5M
            }
1228
226k
            else
1229
226k
            {
1230
226k
                memset(pu1_ngbr_pels_i4 + 4 + 1, 0, 4);
1231
226k
            }
1232
26.8M
            if (s_ngbr_avbl.u1_mb_d)
1233
26.4M
                pu1_ngbr_pels_i4[4] = *pu1_mb_d;
1234
388k
            else
1235
388k
                pu1_ngbr_pels_i4[4] = 0;
1236
26.8M
            if (s_ngbr_avbl.u1_mb_c)
1237
18.2M
            {
1238
18.2M
                memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4);
1239
18.2M
            }
1240
8.55M
            else if (s_ngbr_avbl.u1_mb_b)
1241
8.48M
            {
1242
8.48M
                memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4);
1243
8.48M
                s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b;
1244
8.48M
            }
1245
1246
26.8M
            i4_partition_cost_least = INT_MAX;
1247
1248
            /* predict the intra 4x4 mode for the current partition (for evaluating cost) */
1249
26.8M
            if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
1250
345k
            {
1251
345k
                u4_estimated_intra_4x4_mode = DC_I4x4;
1252
345k
            }
1253
26.4M
            else
1254
26.4M
            {
1255
26.4M
                UWORD32 u4_left_intra_4x4_mode = DC_I4x4;
1256
26.4M
                UWORD32 u4_top_intra_4x4_mode = DC_I4x4;
1257
1258
26.4M
                if (u4_pix_x == 0)
1259
6.58M
                {
1260
6.58M
                    if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4)
1261
1.73M
                    {
1262
1.73M
                        u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[u1_scan_order[3 + u4_pix_y]];
1263
1.73M
                    }
1264
4.85M
                    else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8)
1265
0
                    {
1266
0
                        u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[b8 + 1];
1267
0
                    }
1268
6.58M
                }
1269
19.8M
                else
1270
19.8M
                {
1271
19.8M
                    u4_left_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 1]];
1272
19.8M
                }
1273
1274
26.4M
                if (u4_pix_y == 0)
1275
6.54M
                {
1276
6.54M
                    if (ps_top_mb_syn_ele->u2_mb_type == I4x4)
1277
1.71M
                    {
1278
1.71M
                        u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[u1_scan_order[12 + (u4_pix_x >> 2)]];
1279
1.71M
                    }
1280
4.83M
                    else if (ps_top_mb_syn_ele->u2_mb_type == I8x8)
1281
0
                    {
1282
0
                        u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2];
1283
0
                    }
1284
6.54M
                }
1285
19.9M
                else
1286
19.9M
                {
1287
19.9M
                    u4_top_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 4]];
1288
19.9M
                }
1289
1290
26.4M
                u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode);
1291
26.4M
            }
1292
1293
26.8M
            ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_estimated_intra_4x4_mode;
1294
1295
            /*mode evaluation and prediction*/
1296
26.8M
            ps_codec->pf_ih264e_evaluate_intra_4x4_modes(pu1_mb_curr,
1297
26.8M
                                                         pu1_ngbr_pels_i4,
1298
26.8M
                                                         pu1_pred_mb, i4_src_strd,
1299
26.8M
                                                         i4_pred_strd, i4_ngbr_avbl,
1300
26.8M
                                                         &u4_best_intra_4x4_mode,
1301
26.8M
                                                         &i4_partition_cost_least,
1302
26.8M
                                                         u4_valid_intra_modes,
1303
26.8M
                                                         u4_lambda,
1304
26.8M
                                                         u4_estimated_intra_4x4_mode);
1305
1306
1307
26.8M
            i4_partition_distortion_least = i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode)?u4_cost_one_bit:u4_cost_four_bits);
1308
1309
26.8M
            DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least, u4_best_intra_4x4_mode);
1310
1311
            /* macroblock distortion */
1312
26.8M
            i4_total_distortion += i4_partition_distortion_least;
1313
26.8M
            i4_total_cost += i4_partition_cost_least;
1314
1315
            /* mb partition mode */
1316
26.8M
            ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode;
1317
1318
1319
            /********************************************************/
1320
            /*  error estimation,                                   */
1321
            /*  transform                                           */
1322
            /*  quantization                                        */
1323
            /********************************************************/
1324
26.8M
            ps_codec->pf_resi_trans_quant_4x4(pu1_mb_curr, pu1_pred_mb,
1325
26.8M
                                              pi2_res_mb, i4_src_strd,
1326
26.8M
                                              i4_pred_strd,
1327
                                              /* No op stride, this implies a buff of lenght 1x16 */
1328
26.8M
                                              ps_qp_params->pu2_scale_mat,
1329
26.8M
                                              ps_qp_params->pu2_thres_mat,
1330
26.8M
                                              ps_qp_params->u1_qbits,
1331
26.8M
                                              ps_qp_params->u4_dead_zone,
1332
26.8M
                                              pu1_nnz, &i2_dc_dummy);
1333
1334
            /********************************************************/
1335
            /*  ierror estimation,                                  */
1336
            /*  itransform                                          */
1337
            /*  iquantization                                       */
1338
            /********************************************************/
1339
26.8M
            ps_codec->pf_iquant_itrans_recon_4x4(pi2_res_mb, pu1_pred_mb,
1340
26.8M
                                                 pu1_ref_mb_intra_4x4,
1341
26.8M
                                                 i4_pred_strd, i4_pred_strd,
1342
26.8M
                                                 ps_qp_params->pu2_iscale_mat,
1343
26.8M
                                                 ps_qp_params->pu2_weigh_mat,
1344
26.8M
                                                 ps_qp_params->u1_qp_div,
1345
26.8M
                                                 ps_proc->pv_scratch_buff, 0,
1346
26.8M
                                                 NULL);
1347
26.8M
        }
1348
6.78M
    }
1349
1350
    /* update the type of the mb if necessary */
1351
1.69M
    if (i4_total_cost < ps_proc->i4_mb_cost)
1352
1.67M
    {
1353
1.67M
        ps_proc->i4_mb_cost = i4_total_cost;
1354
1.67M
        ps_proc->i4_mb_distortion = i4_total_distortion;
1355
1.67M
        ps_proc->u4_mb_type = I4x4;
1356
1.67M
    }
1357
1.69M
    if (i4_total_cost < ps_proc->i4_mb_intra_cost)
1358
1.70M
    {
1359
1.70M
        ps_proc->i4_mb_intra_cost = i4_total_cost;
1360
1.70M
    }
1361
1362
1.69M
    return ;
1363
1.69M
}
1364
1365
/**
1366
******************************************************************************
1367
*
1368
* @brief
1369
*  evaluate best chroma intra 8x8 mode (rate distortion opt off)
1370
*
1371
* @par Description
1372
*  This function evaluates all the possible chroma intra 8x8 modes and finds
1373
*  the mode that best represents the macroblock (least distortion) and occupies
1374
*  fewer bits in the bitstream.
1375
*
1376
* @param[in] ps_proc_ctxt
1377
*  pointer to macroblock context (handle)
1378
*
1379
* @remarks
1380
*  For chroma best intra pred mode is calculated based only on SAD
1381
*
1382
* @returns none
1383
*
1384
******************************************************************************
1385
*/
1386
void ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
1387
5.67M
{
1388
    /* Codec Context */
1389
5.67M
    codec_t *ps_codec = ps_proc->ps_codec;
1390
1391
    /* SAD(distortion metric) of an 8x8 block */
1392
5.67M
    WORD32 i4_mb_distortion, i4_chroma_mb_distortion;
1393
1394
    /* intra mode */
1395
5.67M
    UWORD32  u4_best_chroma_intra_8x8_mode = DC_CH_I8x8;
1396
1397
    /* neighbor pels for intra prediction */
1398
5.67M
    UWORD8 *pu1_ngbr_pels_c_i8x8 = ps_proc->au1_ngbr_pels;
1399
1400
    /* pointer to curr macro block */
1401
5.67M
    UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma;
1402
5.67M
    UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_chroma;
1403
1404
    /* pointer to prediction macro block */
1405
5.67M
    UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma;
1406
5.67M
    UWORD8 *pu1_pred_mb_plane = ps_proc->pu1_pred_mb_intra_chroma_plane;
1407
1408
    /* strides */
1409
5.67M
    WORD32 i4_src_strd_c = ps_proc->i4_src_chroma_strd;
1410
5.67M
    WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
1411
5.67M
    WORD32 i4_rec_strd_c = ps_proc->i4_rec_strd;
1412
1413
    /* neighbors left, top, top left */
1414
5.67M
    UWORD8 *pu1_mb_a = pu1_ref_mb - 2;
1415
5.67M
    UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd_c;
1416
5.67M
    UWORD8 *pu1_mb_d = pu1_mb_b - 2;
1417
1418
    /* neighbor availability */
1419
5.67M
    const UWORD8  u1_valid_intra_modes[8] = {1, 3, 1, 3, 5, 7, 5, 15};
1420
5.67M
    WORD32 i4_ngbr_avbl;
1421
1422
    /* valid intra modes map */
1423
5.67M
    UWORD32 u4_valid_intra_modes;
1424
5.67M
    mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
1425
1426
    /* temp var */
1427
5.67M
    UWORD8 i;
1428
5.67M
    UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred;
1429
5.67M
    UWORD8 u1_mb_a, u1_mb_b, u1_mb_d;
1430
1431
    /* locating neighbors that are available for prediction */
1432
    /* gather prediction pels from the neighbors */
1433
    /* left pels */
1434
5.67M
    u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a)
1435
5.55M
                    && (u4_constrained_intra_pred ?  ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1));
1436
5.67M
    if (u1_mb_a)
1437
5.54M
    {
1438
49.6M
        for (i = 0; i < 16; i += 2)
1439
44.1M
        {
1440
44.1M
            pu1_ngbr_pels_c_i8x8[16 - 2 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c];
1441
44.1M
            pu1_ngbr_pels_c_i8x8[16 - 1 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c + 1];
1442
44.1M
        }
1443
5.54M
    }
1444
134k
    else
1445
134k
    {
1446
134k
        ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_c_i8x8, 0, MB_SIZE);
1447
134k
    }
1448
1449
    /* top pels */
1450
5.67M
    u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b)
1451
5.56M
                    && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1));
1452
5.67M
    if (u1_mb_b)
1453
5.55M
    {
1454
5.55M
        ps_codec->pf_mem_cpy_mul8(&pu1_ngbr_pels_c_i8x8[18], pu1_mb_b, 16);
1455
5.55M
    }
1456
124k
    else
1457
124k
    {
1458
124k
        ps_codec->pf_mem_set_mul8((pu1_ngbr_pels_c_i8x8 + 18), 0, MB_SIZE);
1459
124k
    }
1460
1461
    /* top left pels */
1462
5.67M
    u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d)
1463
5.46M
                    && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1));
1464
5.67M
    if (u1_mb_d)
1465
5.45M
    {
1466
5.45M
        pu1_ngbr_pels_c_i8x8[16] = *pu1_mb_d;
1467
5.45M
        pu1_ngbr_pels_c_i8x8[17] = *(pu1_mb_d + 1);
1468
5.45M
    }
1469
5.67M
    i4_ngbr_avbl = (u1_mb_a) + (u1_mb_b << 2) + (u1_mb_d << 1);
1470
5.67M
    ps_proc->i4_chroma_neighbor_avail_8x8_mb = i4_ngbr_avbl;
1471
1472
5.67M
    u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl];
1473
1474
5.67M
    if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST ||
1475
5.29M
                    ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST)
1476
662k
        u4_valid_intra_modes &= ~(1 << PLANE_CH_I8x8);
1477
1478
5.67M
    i4_chroma_mb_distortion = INT_MAX;
1479
1480
    /* perform intra mode chroma  8x8 evaluation */
1481
    /* intra prediction */
1482
5.67M
    ps_codec->pf_ih264e_evaluate_intra_chroma_modes(pu1_curr_mb,
1483
5.67M
                                                    pu1_ngbr_pels_c_i8x8,
1484
5.67M
                                                    pu1_pred_mb,
1485
5.67M
                                                    i4_src_strd_c,
1486
5.67M
                                                    i4_pred_strd,
1487
5.67M
                                                    i4_ngbr_avbl,
1488
5.67M
                                                    &u4_best_chroma_intra_8x8_mode,
1489
5.67M
                                                    &i4_chroma_mb_distortion,
1490
5.67M
                                                    u4_valid_intra_modes);
1491
1492
5.67M
    if (u4_valid_intra_modes & 8)/* if Chroma PLANE is valid*/
1493
4.82M
    {
1494
4.82M
        (ps_codec->apf_intra_pred_c)[PLANE_CH_I8x8](pu1_ngbr_pels_c_i8x8, pu1_pred_mb_plane, 0, i4_pred_strd, i4_ngbr_avbl);
1495
1496
        /* evaluate distortion(sad) */
1497
4.82M
        ps_codec->pf_compute_sad_16x8(pu1_curr_mb, pu1_pred_mb_plane, i4_src_strd_c, i4_pred_strd, i4_chroma_mb_distortion, &i4_mb_distortion);
1498
1499
        /* update the least distortion information if necessary */
1500
4.82M
        if(i4_mb_distortion < i4_chroma_mb_distortion)
1501
6.27k
        {
1502
6.27k
            i4_chroma_mb_distortion = i4_mb_distortion;
1503
6.27k
            u4_best_chroma_intra_8x8_mode = PLANE_CH_I8x8;
1504
6.27k
        }
1505
4.82M
    }
1506
1507
5.67M
    DEBUG("%d partition cost, %d intra mode\n", i4_chroma_mb_distortion, u4_best_chroma_intra_8x8_mode);
1508
1509
5.67M
    ps_proc->u1_c_i8_mode = u4_best_chroma_intra_8x8_mode;
1510
1511
5.67M
    return ;
1512
5.67M
}
1513
1514
1515
/**
1516
******************************************************************************
1517
*
1518
* @brief
1519
*  Evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the
1520
*  prediction.
1521
*
1522
* @par Description
1523
*  This function evaluates first three 16x16 modes and compute corresponding sad
1524
*  and return the buffer predicted with best mode.
1525
*
1526
* @param[in] pu1_src
1527
*  UWORD8 pointer to the source
1528
*
1529
* @param[in] pu1_ngbr_pels_i16
1530
*  UWORD8 pointer to neighbouring pels
1531
*
1532
* @param[out] pu1_dst
1533
*  UWORD8 pointer to the destination
1534
*
1535
* @param[in] src_strd
1536
*  integer source stride
1537
*
1538
* @param[in] dst_strd
1539
*  integer destination stride
1540
*
1541
* @param[in] u4_n_avblty
1542
*  availability of neighbouring pixels
1543
*
1544
* @param[in] u4_intra_mode
1545
*  Pointer to the variable in which best mode is returned
1546
*
1547
* @param[in] pu4_sadmin
1548
*  Pointer to the variable in which minimum sad is returned
1549
*
1550
* @param[in] u4_valid_intra_modes
1551
*  Says what all modes are valid
1552
*
1553
* @returns      none
1554
*
1555
******************************************************************************
1556
*/
1557
void ih264e_evaluate_intra16x16_modes(UWORD8 *pu1_src,
1558
                                      UWORD8 *pu1_ngbr_pels_i16,
1559
                                      UWORD8 *pu1_dst,
1560
                                      UWORD32 src_strd,
1561
                                      UWORD32 dst_strd,
1562
                                      WORD32 u4_n_avblty,
1563
                                      UWORD32 *u4_intra_mode,
1564
                                      WORD32 *pu4_sadmin,
1565
                                      UWORD32 u4_valid_intra_modes)
1566
6.27M
{
1567
6.27M
    UWORD8 *pu1_neighbour;
1568
6.27M
    UWORD8 *pu1_src_temp = pu1_src;
1569
6.27M
    UWORD8 left = 0, top = 0;
1570
6.27M
    WORD32 u4_dcval = 0;
1571
6.27M
    WORD32 i, j;
1572
6.27M
    WORD32 i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX, i4_sad_dc = INT_MAX,
1573
6.27M
                    i4_min_sad = INT_MAX;
1574
6.27M
    UWORD8 val;
1575
1576
6.27M
    left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
1577
6.27M
    top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
1578
1579
    /* left available */
1580
6.27M
    if (left)
1581
5.43M
    {
1582
5.43M
        i4_sad_horz = 0;
1583
1584
89.9M
        for (i = 0; i < 16; i++)
1585
84.5M
        {
1586
84.5M
            val = pu1_ngbr_pels_i16[15 - i];
1587
1588
84.5M
            u4_dcval += val;
1589
1590
1.43G
            for (j = 0; j < 16; j++)
1591
1.35G
            {
1592
1.35G
                i4_sad_horz += ABS(val - pu1_src_temp[j]);
1593
1.35G
            }
1594
1595
84.5M
            pu1_src_temp += src_strd;
1596
84.5M
        }
1597
5.43M
        u4_dcval += 8;
1598
5.43M
    }
1599
1600
6.27M
    pu1_src_temp = pu1_src;
1601
    /* top available */
1602
6.27M
    if (top)
1603
5.26M
    {
1604
5.26M
        i4_sad_vert = 0;
1605
1606
87.7M
        for (i = 0; i < 16; i++)
1607
82.4M
        {
1608
82.4M
            u4_dcval += pu1_ngbr_pels_i16[17 + i];
1609
1610
1.39G
            for (j = 0; j < 16; j++)
1611
1.31G
            {
1612
1.31G
                i4_sad_vert += ABS(pu1_ngbr_pels_i16[17 + j] - pu1_src_temp[j]);
1613
1.31G
            }
1614
82.4M
            pu1_src_temp += src_strd;
1615
1616
82.4M
        }
1617
5.26M
        u4_dcval += 8;
1618
5.26M
    }
1619
1620
6.27M
    u4_dcval = (u4_dcval) >> (3 + left + top);
1621
1622
6.27M
    pu1_src_temp = pu1_src;
1623
1624
    /* none available */
1625
6.27M
    u4_dcval += (left == 0) * (top == 0) * 128;
1626
1627
6.27M
    i4_sad_dc = 0;
1628
1629
104M
    for (i = 0; i < 16; i++)
1630
98.5M
    {
1631
1.67G
        for (j = 0; j < 16; j++)
1632
1.57G
        {
1633
1.57G
            i4_sad_dc += ABS(u4_dcval - pu1_src_temp[j]);
1634
1.57G
        }
1635
98.5M
        pu1_src_temp += src_strd;
1636
98.5M
    }
1637
1638
6.27M
    if ((u4_valid_intra_modes & 04) == 0)/* If DC is disabled */
1639
0
        i4_sad_dc = INT_MAX;
1640
1641
6.27M
    if ((u4_valid_intra_modes & 01) == 0)/* If VERT is disabled */
1642
1.01M
        i4_sad_vert = INT_MAX;
1643
1644
6.27M
    if ((u4_valid_intra_modes & 02) == 0)/* If HORZ is disabled */
1645
836k
        i4_sad_horz = INT_MAX;
1646
1647
6.27M
    i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert);
1648
1649
    /* Finding Minimum sad and doing corresponding prediction */
1650
6.27M
    if (i4_min_sad < *pu4_sadmin)
1651
6.29M
    {
1652
6.29M
        *pu4_sadmin = i4_min_sad;
1653
6.29M
        if (i4_min_sad == i4_sad_vert)
1654
5.07M
        {
1655
5.07M
            *u4_intra_mode = VERT_I16x16;
1656
5.07M
            pu1_neighbour = pu1_ngbr_pels_i16 + 17;
1657
85.8M
            for (j = 0; j < 16; j++)
1658
80.7M
            {
1659
80.7M
                memcpy(pu1_dst, pu1_neighbour, MB_SIZE);
1660
80.7M
                pu1_dst += dst_strd;
1661
80.7M
            }
1662
5.07M
        }
1663
1.22M
        else if (i4_min_sad == i4_sad_horz)
1664
419k
        {
1665
419k
            *u4_intra_mode = HORZ_I16x16;
1666
7.12M
            for (j = 0; j < 16; j++)
1667
6.70M
            {
1668
6.70M
                val = pu1_ngbr_pels_i16[15 - j];
1669
6.70M
                memset(pu1_dst, val, MB_SIZE);
1670
6.70M
                pu1_dst += dst_strd;
1671
6.70M
            }
1672
419k
        }
1673
803k
        else
1674
803k
        {
1675
803k
            *u4_intra_mode = DC_I16x16;
1676
13.5M
            for (j = 0; j < 16; j++)
1677
12.7M
            {
1678
12.7M
                memset(pu1_dst, u4_dcval, MB_SIZE);
1679
12.7M
                pu1_dst += dst_strd;
1680
12.7M
            }
1681
803k
        }
1682
6.29M
    }
1683
6.27M
    return;
1684
6.27M
}
1685
1686
/**
1687
******************************************************************************
1688
*
1689
* @brief
1690
*  Evaluate best intra 4x4 mode and perform prediction.
1691
*
1692
* @par Description
1693
*  This function evaluates  4x4 modes and compute corresponding sad
1694
*  and return the buffer predicted with best mode.
1695
*
1696
* @param[in] pu1_src
1697
*  UWORD8 pointer to the source
1698
*
1699
* @param[in] pu1_ngbr_pels
1700
*  UWORD8 pointer to neighbouring pels
1701
*
1702
* @param[out] pu1_dst
1703
*  UWORD8 pointer to the destination
1704
*
1705
* @param[in] src_strd
1706
*  integer source stride
1707
*
1708
* @param[in] dst_strd
1709
*  integer destination stride
1710
*
1711
* @param[in] u4_n_avblty
1712
*  availability of neighbouring pixels
1713
*
1714
* @param[in] u4_intra_mode
1715
*  Pointer to the variable in which best mode is returned
1716
*
1717
* @param[in] pu4_sadmin
1718
*  Pointer to the variable in which minimum cost is returned
1719
*
1720
* @param[in] u4_valid_intra_modes
1721
*  Says what all modes are valid
1722
*
1723
* @param[in] u4_lambda
1724
*  Lamda value for computing cost from SAD
1725
*
1726
* @param[in] u4_predictd_mode
1727
*  Predicted mode for cost computation
1728
*
1729
* @returns      none
1730
*
1731
******************************************************************************
1732
*/
1733
void ih264e_evaluate_intra_4x4_modes(UWORD8 *pu1_src,
1734
                                     UWORD8 *pu1_ngbr_pels,
1735
                                     UWORD8 *pu1_dst,
1736
                                     UWORD32 src_strd,
1737
                                     UWORD32 dst_strd,
1738
                                     WORD32 u4_n_avblty,
1739
                                     UWORD32 *u4_intra_mode,
1740
                                     WORD32 *pu4_sadmin,
1741
                                     UWORD32 u4_valid_intra_modes,
1742
                                     UWORD32  u4_lambda,
1743
                                     UWORD32 u4_predictd_mode)
1744
10.3M
{
1745
10.3M
    UWORD8 *pu1_src_temp = pu1_src;
1746
10.3M
    UWORD8 *pu1_pred = pu1_ngbr_pels;
1747
10.3M
    UWORD8 left = 0, top = 0;
1748
10.3M
    UWORD8 u1_pred_val = 0;
1749
10.3M
    UWORD8 u1_pred_vals[4] = {0};
1750
10.3M
    UWORD8 *pu1_pred_val = NULL;
1751
    /* To store FILT121 operated values*/
1752
10.3M
    UWORD8 u1_pred_vals_diag_121[15] = {0};
1753
    /* To store FILT11 operated values*/
1754
10.3M
    UWORD8 u1_pred_vals_diag_11[15] = {0};
1755
10.3M
    UWORD8 u1_pred_vals_vert_r[8] = {0};
1756
10.3M
    UWORD8 u1_pred_vals_horz_d[10] = {0};
1757
10.3M
    UWORD8 u1_pred_vals_horz_u[10] = {0};
1758
10.3M
    WORD32 u4_dcval = 0;
1759
10.3M
    WORD32 i4_sad[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
1760
10.3M
                               INT_MAX, INT_MAX, INT_MAX, INT_MAX};
1761
1762
10.3M
    WORD32 i4_cost[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
1763
10.3M
                                INT_MAX, INT_MAX, INT_MAX, INT_MAX};
1764
10.3M
    WORD32 i, i4_min_cost = INT_MAX;
1765
1766
10.3M
    left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
1767
10.3M
    top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
1768
1769
    /* Computing SAD */
1770
1771
    /* VERT mode valid */
1772
10.3M
    if (u4_valid_intra_modes & 1)
1773
9.56M
    {
1774
9.56M
        pu1_pred = pu1_ngbr_pels + 5;
1775
9.56M
        i4_sad[VERT_I4x4] = 0;
1776
9.56M
        i4_cost[VERT_I4x4] = 0;
1777
1778
9.56M
        USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1779
9.56M
        pu1_src_temp += src_strd;
1780
9.56M
        USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1781
9.56M
        pu1_src_temp += src_strd;
1782
9.56M
        USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1783
9.56M
        pu1_src_temp += src_strd;
1784
9.56M
        USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1785
1786
9.56M
        i4_cost[VERT_I4x4] = i4_sad[VERT_I4x4] + ((u4_predictd_mode == VERT_I4x4) ?
1787
8.20M
                                        u4_lambda : 4 * u4_lambda);
1788
9.56M
    }
1789
1790
    /* HORZ mode valid */
1791
10.3M
    if (u4_valid_intra_modes & 2)
1792
9.70M
    {
1793
9.70M
        i4_sad[HORZ_I4x4] = 0;
1794
9.70M
        i4_cost[HORZ_I4x4] =0;
1795
9.70M
        pu1_src_temp = pu1_src;
1796
1797
9.70M
        u1_pred_val = pu1_ngbr_pels[3];
1798
1799
9.70M
        i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
1800
9.70M
                        + ABS(pu1_src_temp[1] - u1_pred_val)
1801
9.70M
                        + ABS(pu1_src_temp[2] - u1_pred_val)
1802
9.70M
                        + ABS(pu1_src_temp[3] - u1_pred_val);
1803
9.70M
        pu1_src_temp += src_strd;
1804
1805
9.70M
        u1_pred_val = pu1_ngbr_pels[2];
1806
1807
9.70M
        i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
1808
9.70M
                        + ABS(pu1_src_temp[1] - u1_pred_val)
1809
9.70M
                        + ABS(pu1_src_temp[2] - u1_pred_val)
1810
9.70M
                        + ABS(pu1_src_temp[3] - u1_pred_val);
1811
9.70M
        pu1_src_temp += src_strd;
1812
1813
9.70M
        u1_pred_val = pu1_ngbr_pels[1];
1814
1815
9.70M
        i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
1816
9.70M
                        + ABS(pu1_src_temp[1] - u1_pred_val)
1817
9.70M
                        + ABS(pu1_src_temp[2] - u1_pred_val)
1818
9.70M
                        + ABS(pu1_src_temp[3] - u1_pred_val);
1819
9.70M
        pu1_src_temp += src_strd;
1820
1821
9.70M
        u1_pred_val = pu1_ngbr_pels[0];
1822
1823
9.70M
        i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
1824
9.70M
                        + ABS(pu1_src_temp[1] - u1_pred_val)
1825
9.70M
                        + ABS(pu1_src_temp[2] - u1_pred_val)
1826
9.70M
                        + ABS(pu1_src_temp[3] - u1_pred_val);
1827
1828
9.70M
        i4_cost[HORZ_I4x4] = i4_sad[HORZ_I4x4] + ((u4_predictd_mode == HORZ_I4x4) ?
1829
6.85M
                                        u4_lambda : 4 * u4_lambda);
1830
9.70M
    }
1831
1832
    /* DC mode valid */
1833
10.3M
    if (u4_valid_intra_modes & 4)
1834
10.3M
    {
1835
10.3M
        i4_sad[DC_I4x4] = 0;
1836
10.3M
        i4_cost[DC_I4x4] = 0;
1837
10.3M
        pu1_src_temp = pu1_src;
1838
1839
10.3M
        if (left)
1840
9.69M
            u4_dcval = pu1_ngbr_pels[0] + pu1_ngbr_pels[1] + pu1_ngbr_pels[2]
1841
9.69M
                            + pu1_ngbr_pels[3] + 2;
1842
10.3M
        if (top)
1843
9.59M
            u4_dcval += pu1_ngbr_pels[5] + pu1_ngbr_pels[6] + pu1_ngbr_pels[7]
1844
9.59M
                            + pu1_ngbr_pels[8] + 2;
1845
1846
10.3M
        u4_dcval = (u4_dcval) ? (u4_dcval >> (1 + left + top)) : 128;
1847
1848
        /* none available */
1849
10.3M
        memset(u1_pred_vals, u4_dcval, 4);
1850
10.3M
        USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1851
10.3M
        pu1_src_temp += src_strd;
1852
10.3M
        USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1853
10.3M
        pu1_src_temp += src_strd;
1854
10.3M
        USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1855
10.3M
        pu1_src_temp += src_strd;
1856
10.3M
        USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1857
10.3M
        pu1_src_temp += src_strd;
1858
1859
10.3M
        i4_cost[DC_I4x4] = i4_sad[DC_I4x4] + ((u4_predictd_mode == DC_I4x4) ?
1860
6.14M
                                        u4_lambda : 4 * u4_lambda);
1861
10.3M
    }
1862
1863
    /* if modes other than VERT, HORZ and DC are  valid */
1864
10.3M
    if (u4_valid_intra_modes > 7)
1865
10.2M
    {
1866
10.2M
        pu1_pred = pu1_ngbr_pels;
1867
10.2M
        pu1_pred[13] = pu1_pred[14] = pu1_pred[12];
1868
1869
        /* Performing FILT121 and FILT11 operation for all neighbour values*/
1870
140M
        for (i = 0; i < 13; i++)
1871
130M
        {
1872
130M
            u1_pred_vals_diag_121[i] = FILT121(pu1_pred[0], pu1_pred[1], pu1_pred[2]);
1873
130M
            u1_pred_vals_diag_11[i] = FILT11(pu1_pred[0], pu1_pred[1]);
1874
1875
130M
            pu1_pred++;
1876
130M
        }
1877
1878
10.2M
        if (u4_valid_intra_modes & 8)/* DIAG_DL */
1879
9.41M
        {
1880
9.41M
            i4_sad[DIAG_DL_I4x4] = 0;
1881
9.41M
            i4_cost[DIAG_DL_I4x4] = 0;
1882
9.41M
            pu1_src_temp = pu1_src;
1883
9.41M
            pu1_pred_val = u1_pred_vals_diag_121 + 5;
1884
1885
9.41M
            USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DL_I4x4]);
1886
9.41M
            pu1_src_temp += src_strd;
1887
9.41M
            USADA8(pu1_src_temp, (pu1_pred_val + 1), i4_sad[DIAG_DL_I4x4]);
1888
9.41M
            pu1_src_temp += src_strd;
1889
9.41M
            USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[DIAG_DL_I4x4]);
1890
9.41M
            pu1_src_temp += src_strd;
1891
9.41M
            USADA8(pu1_src_temp, (pu1_pred_val + 3), i4_sad[DIAG_DL_I4x4]);
1892
9.41M
            pu1_src_temp += src_strd;
1893
9.41M
            i4_cost[DIAG_DL_I4x4] = i4_sad[DIAG_DL_I4x4] + ((u4_predictd_mode == DIAG_DL_I4x4) ?
1894
9.40M
                                            u4_lambda : 4 * u4_lambda);
1895
9.41M
        }
1896
1897
10.2M
        if (u4_valid_intra_modes & 16)/* DIAG_DR */
1898
9.01M
        {
1899
9.01M
            i4_sad[DIAG_DR_I4x4] = 0;
1900
9.01M
            i4_cost[DIAG_DR_I4x4] = 0;
1901
9.01M
            pu1_src_temp = pu1_src;
1902
9.01M
            pu1_pred_val = u1_pred_vals_diag_121 + 3;
1903
1904
9.01M
            USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DR_I4x4]);
1905
9.01M
            pu1_src_temp += src_strd;
1906
9.01M
            USADA8(pu1_src_temp, (pu1_pred_val - 1), i4_sad[DIAG_DR_I4x4]);
1907
9.01M
            pu1_src_temp += src_strd;
1908
9.01M
            USADA8(pu1_src_temp, (pu1_pred_val - 2), i4_sad[DIAG_DR_I4x4]);
1909
9.01M
            pu1_src_temp += src_strd;
1910
9.01M
            USADA8(pu1_src_temp, (pu1_pred_val - 3), i4_sad[DIAG_DR_I4x4]);
1911
9.01M
            pu1_src_temp += src_strd;
1912
9.01M
            i4_cost[DIAG_DR_I4x4] = i4_sad[DIAG_DR_I4x4] + ((u4_predictd_mode == DIAG_DR_I4x4) ?
1913
9.01M
                                            u4_lambda : 4 * u4_lambda);
1914
1915
9.01M
        }
1916
1917
10.2M
        if (u4_valid_intra_modes & 32)/* VERT_R mode valid ????*/
1918
9.02M
        {
1919
9.02M
            i4_sad[VERT_R_I4x4] = 0;
1920
1921
9.02M
            pu1_src_temp = pu1_src;
1922
9.02M
            u1_pred_vals_vert_r[0] = u1_pred_vals_diag_121[2];
1923
9.02M
            memcpy((u1_pred_vals_vert_r + 1), (u1_pred_vals_diag_11 + 4), 3);
1924
9.02M
            u1_pred_vals_vert_r[4] = u1_pred_vals_diag_121[1];
1925
9.02M
            memcpy((u1_pred_vals_vert_r + 5), (u1_pred_vals_diag_121 + 3), 3);
1926
1927
9.02M
            pu1_pred_val = u1_pred_vals_diag_11 + 4;
1928
9.02M
            USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]);
1929
9.02M
            pu1_pred_val = u1_pred_vals_diag_121 + 3;
1930
9.02M
            pu1_src_temp += src_strd;
1931
9.02M
            USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]);
1932
9.02M
            pu1_src_temp += src_strd;
1933
9.02M
            USADA8(pu1_src_temp, (u1_pred_vals_vert_r), i4_sad[VERT_R_I4x4]);
1934
9.02M
            pu1_src_temp += src_strd;
1935
9.02M
            USADA8(pu1_src_temp, (u1_pred_vals_vert_r + 4),
1936
9.02M
                   i4_sad[VERT_R_I4x4]);
1937
1938
9.02M
            i4_cost[VERT_R_I4x4] = i4_sad[VERT_R_I4x4] + ((u4_predictd_mode == VERT_R_I4x4) ?
1939
9.01M
                                            u4_lambda : 4 * u4_lambda);
1940
9.02M
        }
1941
1942
10.2M
        if (u4_valid_intra_modes & 64)/* HORZ_D mode valid ????*/
1943
9.00M
        {
1944
9.00M
            i4_sad[HORZ_D_I4x4] = 0;
1945
1946
9.00M
            pu1_src_temp = pu1_src;
1947
9.00M
            u1_pred_vals_horz_d[6] = u1_pred_vals_diag_11[3];
1948
9.00M
            memcpy((u1_pred_vals_horz_d + 7), (u1_pred_vals_diag_121 + 3), 3);
1949
9.00M
            u1_pred_vals_horz_d[0] = u1_pred_vals_diag_11[0];
1950
9.00M
            u1_pred_vals_horz_d[1] = u1_pred_vals_diag_121[0];
1951
9.00M
            u1_pred_vals_horz_d[2] = u1_pred_vals_diag_11[1];
1952
9.00M
            u1_pred_vals_horz_d[3] = u1_pred_vals_diag_121[1];
1953
9.00M
            u1_pred_vals_horz_d[4] = u1_pred_vals_diag_11[2];
1954
9.00M
            u1_pred_vals_horz_d[5] = u1_pred_vals_diag_121[2];
1955
1956
9.00M
            pu1_pred_val = u1_pred_vals_horz_d;
1957
9.00M
            USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_D_I4x4]);
1958
9.00M
            pu1_src_temp += src_strd;
1959
9.00M
            USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_D_I4x4]);
1960
9.00M
            pu1_src_temp += src_strd;
1961
9.00M
            USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_D_I4x4]);
1962
9.00M
            pu1_src_temp += src_strd;
1963
9.00M
            USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_D_I4x4]);
1964
1965
9.00M
            i4_cost[HORZ_D_I4x4] = i4_sad[HORZ_D_I4x4] + ((u4_predictd_mode == HORZ_D_I4x4) ?
1966
9.00M
                                            u4_lambda : 4 * u4_lambda);
1967
9.00M
        }
1968
1969
10.2M
        if (u4_valid_intra_modes & 128)/* VERT_L mode valid ????*/
1970
9.56M
        {
1971
9.56M
            i4_sad[VERT_L_I4x4] = 0;
1972
9.56M
            pu1_src_temp = pu1_src;
1973
9.56M
            pu1_pred_val = u1_pred_vals_diag_11 + 5;
1974
9.56M
            USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1975
9.56M
            pu1_src_temp += src_strd;
1976
9.56M
            pu1_pred_val = u1_pred_vals_diag_121 + 5;
1977
9.56M
            USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1978
9.56M
            pu1_src_temp += src_strd;
1979
9.56M
            pu1_pred_val = u1_pred_vals_diag_11 + 6;
1980
9.56M
            USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1981
9.56M
            pu1_src_temp += src_strd;
1982
9.56M
            pu1_pred_val = u1_pred_vals_diag_121 + 6;
1983
9.56M
            USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1984
1985
9.56M
            i4_cost[VERT_L_I4x4] = i4_sad[VERT_L_I4x4] + ((u4_predictd_mode == VERT_L_I4x4) ?
1986
9.56M
                                            u4_lambda : 4 * u4_lambda);
1987
9.56M
        }
1988
1989
10.2M
        if (u4_valid_intra_modes & 256)/* HORZ_U mode valid ????*/
1990
9.72M
        {
1991
9.72M
            i4_sad[HORZ_U_I4x4] = 0;
1992
9.72M
            pu1_src_temp = pu1_src;
1993
9.72M
            u1_pred_vals_horz_u[0] = u1_pred_vals_diag_11[2];
1994
9.72M
            u1_pred_vals_horz_u[1] = u1_pred_vals_diag_121[1];
1995
9.72M
            u1_pred_vals_horz_u[2] = u1_pred_vals_diag_11[1];
1996
9.72M
            u1_pred_vals_horz_u[3] = u1_pred_vals_diag_121[0];
1997
9.72M
            u1_pred_vals_horz_u[4] = u1_pred_vals_diag_11[0];
1998
9.72M
            u1_pred_vals_horz_u[5] = FILT121(pu1_ngbr_pels[0], pu1_ngbr_pels[0], pu1_ngbr_pels[1]);
1999
2000
9.72M
            memset((u1_pred_vals_horz_u + 6), pu1_ngbr_pels[0], 4);
2001
2002
9.72M
            pu1_pred_val = u1_pred_vals_horz_u;
2003
9.72M
            USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_U_I4x4]);
2004
9.72M
            pu1_src_temp += src_strd;
2005
9.72M
            USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_U_I4x4]);
2006
9.72M
            pu1_src_temp += src_strd;
2007
9.72M
            USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_U_I4x4]);
2008
9.72M
            pu1_src_temp += src_strd;
2009
9.72M
            USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_U_I4x4]);
2010
2011
9.72M
            i4_cost[HORZ_U_I4x4] = i4_sad[HORZ_U_I4x4] + ((u4_predictd_mode == HORZ_U_I4x4) ?
2012
9.72M
                                            u4_lambda : 4 * u4_lambda);
2013
9.72M
        }
2014
2015
10.2M
        i4_min_cost = MIN3(MIN3(i4_cost[0], i4_cost[1], i4_cost[2]),
2016
10.2M
                        MIN3(i4_cost[3], i4_cost[4], i4_cost[5]),
2017
10.2M
                        MIN3(i4_cost[6], i4_cost[7], i4_cost[8]));
2018
2019
10.2M
    }
2020
127k
    else
2021
127k
    {
2022
        /* Only first three modes valid */
2023
127k
        i4_min_cost = MIN3(i4_cost[0], i4_cost[1], i4_cost[2]);
2024
127k
    }
2025
2026
10.3M
    *pu4_sadmin = i4_min_cost;
2027
2028
10.3M
    if (i4_min_cost == i4_cost[0])
2029
1.30M
    {
2030
1.30M
        *u4_intra_mode = VERT_I4x4;
2031
1.30M
        pu1_pred_val = pu1_ngbr_pels + 5;
2032
1.30M
        memcpy(pu1_dst, (pu1_pred_val), 4);
2033
1.30M
        pu1_dst += dst_strd;
2034
1.30M
        memcpy(pu1_dst, (pu1_pred_val), 4);
2035
1.30M
        pu1_dst += dst_strd;
2036
1.30M
        memcpy(pu1_dst, (pu1_pred_val), 4);
2037
1.30M
        pu1_dst += dst_strd;
2038
1.30M
        memcpy(pu1_dst, (pu1_pred_val), 4);
2039
1.30M
    }
2040
9.05M
    else if (i4_min_cost == i4_cost[1])
2041
2.98M
    {
2042
2.98M
        *u4_intra_mode = HORZ_I4x4;
2043
2.98M
        memset(pu1_dst, pu1_ngbr_pels[3], 4);
2044
2.98M
        pu1_dst += dst_strd;
2045
2.98M
        memset(pu1_dst, pu1_ngbr_pels[2], 4);
2046
2.98M
        pu1_dst += dst_strd;
2047
2.98M
        memset(pu1_dst, pu1_ngbr_pels[1], 4);
2048
2.98M
        pu1_dst += dst_strd;
2049
2.98M
        memset(pu1_dst, pu1_ngbr_pels[0], 4);
2050
2.98M
    }
2051
6.07M
    else if (i4_min_cost == i4_cost[2])
2052
5.99M
    {
2053
5.99M
        *u4_intra_mode = DC_I4x4;
2054
5.99M
        memset(pu1_dst, u4_dcval, 4);
2055
5.99M
        pu1_dst += dst_strd;
2056
5.99M
        memset(pu1_dst, u4_dcval, 4);
2057
5.99M
        pu1_dst += dst_strd;
2058
5.99M
        memset(pu1_dst, u4_dcval, 4);
2059
5.99M
        pu1_dst += dst_strd;
2060
5.99M
        memset(pu1_dst, u4_dcval, 4);
2061
5.99M
    }
2062
84.4k
    else if (i4_min_cost == i4_cost[3])
2063
22.4k
    {
2064
22.4k
        *u4_intra_mode = DIAG_DL_I4x4;
2065
22.4k
        pu1_pred_val = u1_pred_vals_diag_121 + 5;
2066
22.4k
        memcpy(pu1_dst, (pu1_pred_val), 4);
2067
22.4k
        pu1_dst += dst_strd;
2068
22.4k
        memcpy(pu1_dst, (pu1_pred_val + 1), 4);
2069
22.4k
        pu1_dst += dst_strd;
2070
22.4k
        memcpy(pu1_dst, (pu1_pred_val + 2), 4);
2071
22.4k
        pu1_dst += dst_strd;
2072
22.4k
        memcpy(pu1_dst, (pu1_pred_val + 3), 4);
2073
22.4k
    }
2074
61.9k
    else if (i4_min_cost == i4_cost[4])
2075
11.3k
    {
2076
11.3k
        *u4_intra_mode = DIAG_DR_I4x4;
2077
11.3k
        pu1_pred_val = u1_pred_vals_diag_121 + 3;
2078
2079
11.3k
        memcpy(pu1_dst, (pu1_pred_val), 4);
2080
11.3k
        pu1_dst += dst_strd;
2081
11.3k
        memcpy(pu1_dst, (pu1_pred_val - 1), 4);
2082
11.3k
        pu1_dst += dst_strd;
2083
11.3k
        memcpy(pu1_dst, (pu1_pred_val - 2), 4);
2084
11.3k
        pu1_dst += dst_strd;
2085
11.3k
        memcpy(pu1_dst, (pu1_pred_val - 3), 4);
2086
11.3k
    }
2087
50.6k
    else if (i4_min_cost == i4_cost[5])
2088
12.8k
    {
2089
12.8k
        *u4_intra_mode = VERT_R_I4x4;
2090
12.8k
        pu1_pred_val = u1_pred_vals_diag_11 + 4;
2091
12.8k
        memcpy(pu1_dst, (pu1_pred_val), 4);
2092
12.8k
        pu1_dst += dst_strd;
2093
12.8k
        pu1_pred_val = u1_pred_vals_diag_121 + 3;
2094
12.8k
        memcpy(pu1_dst, (pu1_pred_val), 4);
2095
12.8k
        pu1_dst += dst_strd;
2096
12.8k
        memcpy(pu1_dst, (u1_pred_vals_vert_r), 4);
2097
12.8k
        pu1_dst += dst_strd;
2098
12.8k
        memcpy(pu1_dst, (u1_pred_vals_vert_r + 4), 4);
2099
12.8k
    }
2100
37.7k
    else if (i4_min_cost == i4_cost[6])
2101
11.8k
    {
2102
11.8k
        *u4_intra_mode = HORZ_D_I4x4;
2103
11.8k
        pu1_pred_val = u1_pred_vals_horz_d;
2104
11.8k
        memcpy(pu1_dst, (pu1_pred_val + 6), 4);
2105
11.8k
        pu1_dst += dst_strd;
2106
11.8k
        memcpy(pu1_dst, (pu1_pred_val + 4), 4);
2107
11.8k
        pu1_dst += dst_strd;
2108
11.8k
        memcpy(pu1_dst, (pu1_pred_val + 2), 4);
2109
11.8k
        pu1_dst += dst_strd;
2110
11.8k
        memcpy(pu1_dst, (pu1_pred_val), 4);
2111
11.8k
        pu1_dst += dst_strd;
2112
11.8k
    }
2113
25.8k
    else if (i4_min_cost == i4_cost[7])
2114
13.0k
    {
2115
13.0k
        *u4_intra_mode = VERT_L_I4x4;
2116
13.0k
        pu1_pred_val = u1_pred_vals_diag_11 + 5;
2117
13.0k
        memcpy(pu1_dst, (pu1_pred_val), 4);
2118
13.0k
        pu1_dst += dst_strd;
2119
13.0k
        pu1_pred_val = u1_pred_vals_diag_121 + 5;
2120
13.0k
        memcpy(pu1_dst, (pu1_pred_val), 4);
2121
13.0k
        pu1_dst += dst_strd;
2122
13.0k
        pu1_pred_val = u1_pred_vals_diag_11 + 6;
2123
13.0k
        memcpy(pu1_dst, (pu1_pred_val), 4);
2124
13.0k
        pu1_dst += dst_strd;
2125
13.0k
        pu1_pred_val = u1_pred_vals_diag_121 + 6;
2126
13.0k
        memcpy(pu1_dst, (pu1_pred_val), 4);
2127
13.0k
    }
2128
12.7k
    else if (i4_min_cost == i4_cost[8])
2129
24.1k
    {
2130
24.1k
        *u4_intra_mode = HORZ_U_I4x4;
2131
24.1k
        pu1_pred_val = u1_pred_vals_horz_u;
2132
24.1k
        memcpy(pu1_dst, (pu1_pred_val), 4);
2133
24.1k
        pu1_dst += dst_strd;
2134
24.1k
        memcpy(pu1_dst, (pu1_pred_val + 2), 4);
2135
24.1k
        pu1_dst += dst_strd;
2136
24.1k
        memcpy(pu1_dst, (pu1_pred_val + 4), 4);
2137
24.1k
        pu1_dst += dst_strd;
2138
24.1k
        memcpy(pu1_dst, (pu1_pred_val + 6), 4);
2139
24.1k
        pu1_dst += dst_strd;
2140
24.1k
    }
2141
2142
10.3M
    return;
2143
10.3M
}
2144
2145
/**
2146
******************************************************************************
2147
*
2148
* @brief:
2149
*  Evaluate best intr chroma mode (among VERT, HORZ and DC ) and do the prediction.
2150
*
2151
* @par Description
2152
*  This function evaluates  first three intra chroma modes and compute corresponding sad
2153
*  and return the buffer predicted with best mode.
2154
*
2155
* @param[in] pu1_src
2156
*  UWORD8 pointer to the source
2157
*
2158
* @param[in] pu1_ngbr_pels
2159
*  UWORD8 pointer to neighbouring pels
2160
*
2161
* @param[out] pu1_dst
2162
*  UWORD8 pointer to the destination
2163
*
2164
* @param[in] src_strd
2165
*  integer source stride
2166
*
2167
* @param[in] dst_strd
2168
*  integer destination stride
2169
*
2170
* @param[in] u4_n_avblty
2171
*  availability of neighbouring pixels
2172
*
2173
* @param[in] u4_intra_mode
2174
*  Pointer to the variable in which best mode is returned
2175
*
2176
* @param[in] pu4_sadmin
2177
*  Pointer to the variable in which minimum sad is returned
2178
*
2179
* @param[in] u4_valid_intra_modes
2180
*  Says what all modes are valid
2181
*
2182
* @return      none
2183
*
2184
******************************************************************************
2185
*/
2186
void ih264e_evaluate_intra_chroma_modes(UWORD8 *pu1_src,
2187
                                        UWORD8 *pu1_ngbr_pels,
2188
                                        UWORD8 *pu1_dst,
2189
                                        UWORD32 src_strd,
2190
                                        UWORD32 dst_strd,
2191
                                        WORD32 u4_n_avblty,
2192
                                        UWORD32 *u4_intra_mode,
2193
                                        WORD32 *pu4_sadmin,
2194
                                        UWORD32 u4_valid_intra_modes)
2195
3.71M
{
2196
3.71M
    UWORD8 *pu1_neighbour;
2197
3.71M
    UWORD8 *pu1_src_temp = pu1_src;
2198
3.71M
    UWORD8 left = 0, top = 0;
2199
3.71M
    WORD32 u4_dcval_u_l[2] = { 0, 0 }, /*sum left neighbours for 'U' ,two separate sets - sum of first four from top,and sum of four values from bottom */
2200
3.71M
           u4_dcval_u_t[2] = { 0, 0 };  /*sum top neighbours for 'U'*/
2201
2202
3.71M
    WORD32 u4_dcval_v_l[2] = { 0, 0 }, /*sum left neighbours for 'V'*/
2203
3.71M
           u4_dcval_v_t[2] = { 0, 0 }; /*sum top neighbours for 'V'*/
2204
2205
3.71M
    WORD32 i, j, row, col, i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX,
2206
3.71M
                    i4_sad_dc = INT_MAX, i4_min_sad = INT_MAX;
2207
3.71M
    UWORD8 val_u, val_v;
2208
2209
3.71M
    WORD32 u4_dc_val[2][2][2];/*  -----------
2210
                                  |    |    |  Chroma can have four
2211
                                  | 00 | 01 |  separate dc value...
2212
                                  -----------  u4_dc_val corresponds to this dc values
2213
                                  |    |    |  with u4_dc_val[2][2][U] and u4_dc_val[2][2][V]
2214
                                  | 10 | 11 |
2215
                                  -----------                */
2216
3.71M
    left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
2217
3.71M
    top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
2218
2219
    /*Evaluating HORZ*/
2220
3.71M
    if (left)/* Ifleft available*/
2221
3.60M
    {
2222
3.60M
        i4_sad_horz = 0;
2223
2224
32.1M
        for (i = 0; i < 8; i++)
2225
28.5M
        {
2226
28.5M
            val_v = pu1_ngbr_pels[15 - 2 * i];
2227
28.5M
            val_u = pu1_ngbr_pels[15 - 2 * i - 1];
2228
28.5M
            row = i / 4;
2229
28.5M
            u4_dcval_u_l[row] += val_u;
2230
28.5M
            u4_dcval_v_l[row] += val_v;
2231
256M
            for (j = 0; j < 8; j++)
2232
227M
            {
2233
227M
                i4_sad_horz += ABS(val_u - pu1_src_temp[2 * j]);/* Finding SAD for HORZ mode*/
2234
227M
                i4_sad_horz += ABS(val_v - pu1_src_temp[2 * j + 1]);
2235
227M
            }
2236
2237
28.5M
            pu1_src_temp += src_strd;
2238
28.5M
        }
2239
3.60M
        u4_dcval_u_l[0] += 2;
2240
3.60M
        u4_dcval_u_l[1] += 2;
2241
3.60M
        u4_dcval_v_l[0] += 2;
2242
3.60M
        u4_dcval_v_l[1] += 2;
2243
3.60M
    }
2244
2245
    /*Evaluating VERT**/
2246
3.71M
    pu1_src_temp = pu1_src;
2247
3.71M
    if (top) /* top available*/
2248
3.53M
    {
2249
3.53M
        i4_sad_vert = 0;
2250
2251
31.5M
        for (i = 0; i < 8; i++)
2252
28.0M
        {
2253
28.0M
            col = i / 4;
2254
2255
28.0M
            val_u = pu1_ngbr_pels[18 + i * 2];
2256
28.0M
            val_v = pu1_ngbr_pels[18 + i * 2 + 1];
2257
28.0M
            u4_dcval_u_t[col] += val_u;
2258
28.0M
            u4_dcval_v_t[col] += val_v;
2259
2260
475M
            for (j = 0; j < 16; j++)
2261
447M
            {
2262
447M
                i4_sad_vert += ABS(pu1_ngbr_pels[18 + j] - pu1_src_temp[j]);/* Finding SAD for VERT mode*/
2263
447M
            }
2264
28.0M
            pu1_src_temp += src_strd;
2265
2266
28.0M
        }
2267
3.53M
        u4_dcval_u_t[0] += 2;
2268
3.53M
        u4_dcval_u_t[1] += 2;
2269
3.53M
        u4_dcval_v_t[0] += 2;
2270
3.53M
        u4_dcval_v_t[1] += 2;
2271
3.53M
    }
2272
2273
    /* computing DC value*/
2274
    /* Equation  8-128 in spec*/
2275
3.71M
    u4_dc_val[0][0][0] = (u4_dcval_u_l[0] + u4_dcval_u_t[0]) >> (1 + left + top);
2276
3.71M
    u4_dc_val[0][0][1] = (u4_dcval_v_l[0] + u4_dcval_v_t[0]) >> (1 + left + top);
2277
3.71M
    u4_dc_val[1][1][0] = (u4_dcval_u_l[1] + u4_dcval_u_t[1]) >> (1 + left + top);
2278
3.71M
    u4_dc_val[1][1][1] = (u4_dcval_v_l[1] + u4_dcval_v_t[1]) >> (1 + left + top);
2279
2280
3.71M
    if (top)
2281
3.53M
    {
2282
        /* Equation  8-132 in spec*/
2283
3.53M
        u4_dc_val[0][1][0] = (u4_dcval_u_t[1]) >> (1 + top);
2284
3.53M
        u4_dc_val[0][1][1] = (u4_dcval_v_t[1]) >> (1 + top);
2285
3.53M
    }
2286
176k
    else
2287
176k
    {
2288
176k
        u4_dc_val[0][1][0] = (u4_dcval_u_l[0]) >> (1 + left);
2289
176k
        u4_dc_val[0][1][1] = (u4_dcval_v_l[0]) >> (1 + left);
2290
176k
    }
2291
2292
3.71M
    if (left)
2293
3.60M
    {
2294
3.60M
        u4_dc_val[1][0][0] = (u4_dcval_u_l[1]) >> (1 + left);
2295
3.60M
        u4_dc_val[1][0][1] = (u4_dcval_v_l[1]) >> (1 + left);
2296
3.60M
    }
2297
108k
    else
2298
108k
    {
2299
108k
        u4_dc_val[1][0][0] = (u4_dcval_u_t[0]) >> (1 + top);
2300
108k
        u4_dc_val[1][0][1] = (u4_dcval_v_t[0]) >> (1 + top);
2301
108k
    }
2302
2303
3.71M
    if (!(left || top))
2304
26.1k
    {
2305
        /*none available*/
2306
26.1k
        u4_dc_val[0][0][0] = u4_dc_val[0][0][1] =
2307
26.1k
        u4_dc_val[0][1][0] = u4_dc_val[0][1][1] =
2308
26.1k
        u4_dc_val[1][0][0] = u4_dc_val[1][0][1] =
2309
26.1k
        u4_dc_val[1][1][0] = u4_dc_val[1][1][1] = 128;
2310
26.1k
    }
2311
2312
    /* Evaluating DC */
2313
3.71M
    pu1_src_temp = pu1_src;
2314
3.71M
    i4_sad_dc = 0;
2315
33.1M
    for (i = 0; i < 8; i++)
2316
29.3M
    {
2317
264M
        for (j = 0; j < 8; j++)
2318
235M
        {
2319
235M
            col = j / 4;
2320
235M
            row = i / 4;
2321
235M
            val_u = u4_dc_val[row][col][0];
2322
235M
            val_v = u4_dc_val[row][col][1];
2323
2324
235M
            i4_sad_dc += ABS(val_u - pu1_src_temp[2 * j]);/* Finding SAD for DC mode*/
2325
235M
            i4_sad_dc += ABS(val_v - pu1_src_temp[2 * j + 1]);
2326
235M
        }
2327
29.3M
        pu1_src_temp += src_strd;
2328
29.3M
    }
2329
2330
3.71M
    if ((u4_valid_intra_modes & 01) == 0)/* If DC is disabled*/
2331
0
        i4_sad_dc = INT_MAX;
2332
3.71M
    if ((u4_valid_intra_modes & 02) == 0)/* If HORZ is disabled*/
2333
110k
        i4_sad_horz = INT_MAX;
2334
3.71M
    if ((u4_valid_intra_modes & 04) == 0)/* If VERT is disabled*/
2335
178k
        i4_sad_vert = INT_MAX;
2336
2337
3.71M
    i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert);
2338
2339
    /* Finding Minimum sad and doing corresponding prediction*/
2340
3.71M
    if (i4_min_sad < *pu4_sadmin)
2341
3.72M
    {
2342
3.72M
        *pu4_sadmin = i4_min_sad;
2343
2344
3.72M
        if (i4_min_sad == i4_sad_dc)
2345
3.52M
        {
2346
3.52M
            *u4_intra_mode = DC_CH_I8x8;
2347
31.4M
            for (i = 0; i < 8; i++)
2348
27.9M
            {
2349
250M
                for (j = 0; j < 8; j++)
2350
222M
                {
2351
222M
                    col = j / 4;
2352
222M
                    row = i / 4;
2353
2354
222M
                    pu1_dst[2 * j] = u4_dc_val[row][col][0];
2355
222M
                    pu1_dst[2 * j + 1] = u4_dc_val[row][col][1];
2356
222M
                }
2357
27.9M
                pu1_dst += dst_strd;
2358
27.9M
            }
2359
3.52M
        }
2360
199k
        else if (i4_min_sad == i4_sad_horz)
2361
59.6k
        {
2362
59.6k
            *u4_intra_mode = HORZ_CH_I8x8;
2363
536k
            for (j = 0; j < 8; j++)
2364
476k
            {
2365
476k
                val_v = pu1_ngbr_pels[15 - 2 * j];
2366
476k
                val_u = pu1_ngbr_pels[15 - 2 * j - 1];
2367
2368
4.29M
                for (i = 0; i < 8; i++)
2369
3.81M
                {
2370
3.81M
                    pu1_dst[2 * i] = val_u;
2371
3.81M
                    pu1_dst[2 * i + 1] = val_v;
2372
2373
3.81M
                }
2374
476k
                pu1_dst += dst_strd;
2375
476k
            }
2376
59.6k
        }
2377
139k
        else
2378
139k
        {
2379
139k
            *u4_intra_mode = VERT_CH_I8x8;
2380
139k
            pu1_neighbour = pu1_ngbr_pels + 18;
2381
1.25M
            for (j = 0; j < 8; j++)
2382
1.11M
            {
2383
1.11M
                memcpy(pu1_dst, pu1_neighbour, MB_SIZE);
2384
1.11M
                pu1_dst += dst_strd;
2385
1.11M
            }
2386
139k
        }
2387
3.72M
    }
2388
2389
3.71M
    return;
2390
3.71M
}