Coverage Report

Created: 2024-07-27 06:35

/src/libavc/encoder/svc/isvce_intra_modes_eval.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2022 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
 */
20
21
/**
22
*******************************************************************************
23
* @file
24
*  isvce_intra_modes_eval.c
25
*
26
* @brief
27
*  This file contains definitions of routines that perform rate distortion
28
*  analysis on a macroblock if they are to be coded as intra.
29
*
30
* @author
31
*  ittiam
32
*
33
* @par List of Functions:
34
*  - isvce_derive_neighbor_availability_of_mbs()
35
*  - isvce_derive_ngbr_avbl_of_mb_partitions()
36
*  - isvce_evaluate_intra16x16_modes_for_least_cost_rdoptoff()
37
*  - isvce_evaluate_intra8x8_modes_for_least_cost_rdoptoff()
38
*  - isvce_evaluate_intra4x4_modes_for_least_cost_rdoptoff()
39
*  - isvce_evaluate_intra4x4_modes_for_least_cost_rdopton()
40
*  - isvce_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff()
41
*  - isvce_evaluate_intra16x16_modes()
42
*  - isvce_evaluate_intra4x4_modes()
43
*  - isvce_evaluate_intra_chroma_modes()
44
*
45
* @remarks
46
*  None
47
*
48
*******************************************************************************
49
*/
50
51
/*****************************************************************************/
52
/* File Includes                                                             */
53
/*****************************************************************************/
54
55
/* System include files */
56
#include <stdio.h>
57
#include <string.h>
58
#include <limits.h>
59
#include <assert.h>
60
61
/* User include files */
62
#include "ih264e_config.h"
63
#include "ih264_typedefs.h"
64
#include "iv2.h"
65
#include "ive2.h"
66
#include "ih264_debug.h"
67
#include "isvc_defs.h"
68
#include "isvc_macros.h"
69
#include "ih264_intra_pred_filters.h"
70
#include "isvc_structs.h"
71
#include "isvc_common_tables.h"
72
#include "isvc_trans_quant_itrans_iquant.h"
73
#include "isvc_inter_pred_filters.h"
74
#include "isvc_mem_fns.h"
75
#include "ih264_padding.h"
76
#include "ih264_size_defs.h"
77
#include "ih264_deblk_edge_filters.h"
78
#include "isvc_cabac_tables.h"
79
#include "isvce_defs.h"
80
#include "ime_distortion_metrics.h"
81
#include "ih264e_error.h"
82
#include "ih264e_bitstream.h"
83
#include "ime_defs.h"
84
#include "ime_structs.h"
85
#include "irc_cntrl_param.h"
86
#include "irc_frame_info_collector.h"
87
#include "isvce_rate_control.h"
88
#include "isvce_cabac_structs.h"
89
#include "isvce_structs.h"
90
#include "ih264e_intra_modes_eval.h"
91
#include "isvce_globals.h"
92
#include "ime_platform_macros.h"
93
94
/*****************************************************************************/
95
/* Function Definitions                                                      */
96
/*****************************************************************************/
97
98
/**
99
******************************************************************************
100
*
101
* @brief
102
*  derivation process for subblock/partition availability
103
*
104
* @par   Description
105
*  Calculates the availability of the left, top, topright and topleft subblock
106
*  or partitions.
107
*
108
* @param[in]    ps_proc_ctxt
109
*  pointer to macroblock context (handle)
110
*
111
* @param[in]    i1_pel_pos_x
112
*  column position of the pel wrt the current block
113
*
114
* @param[in]    i1_pel_pos_y
115
*  row position of the pel in wrt current block
116
*
117
* @remarks     Assumptions: before calling this function it is assumed that
118
*   the neighbor availability of the current macroblock is already derived.
119
*   Based on table 6-3 of H264 specification
120
*
121
* @return      availability status (yes or no)
122
*
123
******************************************************************************
124
*/
125
UWORD8 isvce_derive_ngbr_avbl_of_mb_partitions(block_neighbors_t *ps_ngbr_avbl, WORD8 i1_pel_pos_x,
126
                                               WORD8 i1_pel_pos_y)
127
0
{
128
0
    UWORD8 u1_neighbor_avail = 0;
129
130
    /**********************************************************************/
131
    /* values of i1_pel_pos_x in the range 0-15 inclusive correspond to   */
132
    /* various columns of a macroblock                                    */
133
    /*                                                                    */
134
    /* values of i1_pel_pos_y in the range 0-15 inclusive correspond to   */
135
    /* various rows of a macroblock                                       */
136
    /*                                                                    */
137
    /* other values of i1_pel_pos_x & i1_pel_pos_y represents elements    */
138
    /* outside the bound of an mb ie., represents its neighbors.          */
139
    /**********************************************************************/
140
0
    if(i1_pel_pos_x < 0)
141
0
    { /* column(-1) */
142
0
        if(i1_pel_pos_y < 0)
143
0
        {                                              /* row(-1) */
144
0
            u1_neighbor_avail = ps_ngbr_avbl->u1_mb_d; /* current mb topleft availability */
145
0
        }
146
0
        else if(i1_pel_pos_y >= 0 && i1_pel_pos_y < 16)
147
0
        {                                              /* all rows of a macroblock */
148
0
            u1_neighbor_avail = ps_ngbr_avbl->u1_mb_a; /* current mb left availability */
149
0
        }
150
0
        else                       /* if (i1_pel_pos_y >= 16) */
151
0
        {                          /* rows(+16) */
152
0
            u1_neighbor_avail = 0; /* current mb bottom left availability */
153
0
        }
154
0
    }
155
0
    else if(i1_pel_pos_x >= 0 && i1_pel_pos_x < 16)
156
0
    { /* all columns of a macroblock */
157
0
        if(i1_pel_pos_y < 0)
158
0
        {                                              /* row(-1) */
159
0
            u1_neighbor_avail = ps_ngbr_avbl->u1_mb_b; /* current mb top availability */
160
0
        }
161
0
        else if(i1_pel_pos_y >= 0 && i1_pel_pos_y < 16)
162
0
        {                          /* all rows of a macroblock */
163
0
            u1_neighbor_avail = 1; /* current mb availability */
164
            /* availability of the partition is dependent on the position of the
165
             * partition inside the mb */
166
            /* although the availability is declared as 1 in all cases these needs to
167
             * be corrected somewhere else and this is not done in here */
168
0
        }
169
0
        else                       /* if (i1_pel_pos_y >= 16) */
170
0
        {                          /* rows(+16) */
171
0
            u1_neighbor_avail = 0; /* current mb bottom availability */
172
0
        }
173
0
    }
174
0
    else if(i1_pel_pos_x >= 16)
175
0
    { /* column(+16) */
176
0
        if(i1_pel_pos_y < 0)
177
0
        {                                              /* row(-1) */
178
0
            u1_neighbor_avail = ps_ngbr_avbl->u1_mb_c; /* current mb top right availability */
179
0
        }
180
0
        else                       /* if (i1_pel_pos_y >= 0) */
181
0
        {                          /* all other rows */
182
0
            u1_neighbor_avail = 0; /* current mb right & bottom right availability */
183
0
        }
184
0
    }
185
186
0
    return u1_neighbor_avail;
187
0
}
188
189
/**
190
******************************************************************************
191
*
192
* @brief
193
*  evaluate best intra 16x16 mode (rate distortion opt off)
194
*
195
* @par Description
196
*  This function evaluates all the possible intra 16x16 modes and finds the mode
197
*  that best represents the macro-block (least distortion) and occupies fewer
198
*  bits in the bit-stream.
199
*
200
* @param[in]   ps_proc_ctxt
201
*  pointer to process context (handle)
202
*
203
* @remarks
204
*  Ideally the cost of encoding a macroblock is calculated as
205
*  (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
206
*  input block and the reconstructed block and rate is the number of bits taken
207
*  to place the macroblock in the bit-stream. In this routine the rate does not
208
*  exactly point to the total number of bits it takes, rather it points to
209
*header bits necessary for encoding the macroblock. Assuming the deltaQP, cbp
210
*bits and residual bits fall in to texture bits the number of bits taken to
211
*encoding mbtype is considered as rate, we compute cost. Further we will
212
*approximate the distortion as the deviation b/w input and the predicted block
213
*as opposed to input and reconstructed block.
214
*
215
*  NOTE: As per the Document JVT-O079, for intra 16x16 macroblock,
216
*  the SAD and cost are one and the same.
217
*
218
* @return     none
219
*
220
******************************************************************************
221
*/
222
223
void isvce_evaluate_intra16x16_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc)
224
9.61M
{
225
    /* Codec Context */
226
9.61M
    isvce_codec_t *ps_codec = ps_proc->ps_codec;
227
9.61M
    isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
228
9.61M
    mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns;
229
230
    /* SAD(distortion metric) of an 8x8 block */
231
9.61M
    WORD32 i4_mb_distortion = INT_MAX, i4_mb_distortion_least = INT_MAX;
232
233
    /* lambda */
234
9.61M
    UWORD32 u4_lambda = ps_proc->u4_lambda;
235
236
    /* cost = distortion + lambda*rate */
237
9.61M
    WORD32 i4_mb_cost = INT_MAX, i4_mb_cost_least = INT_MAX;
238
239
    /* intra mode */
240
9.61M
    UWORD32 u4_intra_mode, u4_best_intra_16x16_mode = DC_I16x16;
241
242
    /* neighbor pels for intra prediction */
243
9.61M
    UWORD8 *pu1_ngbr_pels_i16 = ps_proc->au1_ngbr_pels;
244
245
    /* neighbor availability */
246
9.61M
    WORD32 i4_ngbr_avbl;
247
248
    /* pointer to src macro block */
249
9.61M
    UWORD8 *pu1_curr_mb = ps_proc->s_src_buf_props.as_component_bufs[0].pv_data;
250
9.61M
    UWORD8 *pu1_ref_mb = ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data;
251
252
    /* pointer to prediction macro block */
253
9.61M
    UWORD8 *pu1_pred_mb_intra_16x16 = ps_proc->pu1_pred_mb_intra_16x16;
254
9.61M
    UWORD8 *pu1_pred_mb_intra_16x16_plane = ps_proc->pu1_pred_mb_intra_16x16_plane;
255
256
    /* strides */
257
9.61M
    WORD32 i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[0].i4_data_stride;
258
9.61M
    WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
259
9.61M
    WORD32 i4_rec_strd = ps_proc->s_rec_buf_props.as_component_bufs[0].i4_data_stride;
260
261
    /* pointer to neighbors left, top, topleft */
262
9.61M
    UWORD8 *pu1_mb_a = pu1_ref_mb - 1;
263
9.61M
    UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd;
264
9.61M
    UWORD8 *pu1_mb_d = pu1_mb_b - 1;
265
9.61M
    UWORD8 u1_mb_a, u1_mb_b, u1_mb_d;
266
    /* valid intra modes map */
267
9.61M
    UWORD32 u4_valid_intra_modes;
268
269
    /* lut for valid intra modes */
270
9.61M
    const UWORD8 u1_valid_intra_modes[8] = {4, 6, 4, 6, 5, 7, 5, 15};
271
272
9.61M
    UWORD32 i, u4_enable_fast_sad = 0, offset = 0;
273
9.61M
    isvce_mb_info_t *ps_top_mb_syn_ele = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x;
274
9.61M
    UWORD32 u4_constrained_intra_pred =
275
9.61M
        ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id];
276
277
9.61M
    if(ps_proc->i4_slice_type != ISLICE)
278
1.00M
    {
279
        /* Offset for MBtype */
280
18.4E
        offset = (ps_proc->i4_slice_type == PSLICE) ? 5 : 23;
281
1.00M
        u4_enable_fast_sad = ps_proc->s_me_ctxt.u4_enable_fast_sad;
282
1.00M
    }
283
284
    /* locating neighbors that are available for prediction */
285
286
    /* gather prediction pels from the neighbors, if particular set is not
287
     * available it is set to zero*/
288
    /* left pels */
289
9.61M
    u1_mb_a =
290
9.61M
        ((ps_proc->ps_ngbr_avbl->u1_mb_a) &&
291
9.61M
         (u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra &&
292
1.97M
                                       !ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag)
293
9.31M
                                    : 1));
294
9.61M
    if(u1_mb_a)
295
8.38M
    {
296
142M
        for(i = 0; i < 16; i++) pu1_ngbr_pels_i16[16 - 1 - i] = pu1_mb_a[i * i4_rec_strd];
297
8.38M
    }
298
1.22M
    else
299
1.22M
    {
300
1.22M
        ps_mem_fxns->pf_mem_set_mul8(pu1_ngbr_pels_i16, 0, MB_SIZE);
301
1.22M
    }
302
    /* top pels */
303
9.61M
    u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) &&
304
9.61M
               (u4_constrained_intra_pred
305
9.07M
                    ? (ps_top_mb_syn_ele->u1_is_intra && !ps_top_mb_syn_ele->u1_base_mode_flag)
306
9.07M
                    : 1));
307
9.61M
    if(u1_mb_b)
308
8.23M
    {
309
8.23M
        ps_mem_fxns->pf_mem_cpy_mul8(pu1_ngbr_pels_i16 + 16 + 1, pu1_mb_b, 16);
310
8.23M
    }
311
1.38M
    else
312
1.38M
    {
313
1.38M
        ps_mem_fxns->pf_mem_set_mul8(pu1_ngbr_pels_i16 + 16 + 1, 0, MB_SIZE);
314
1.38M
    }
315
    /* topleft pels */
316
9.61M
    u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) &&
317
9.61M
               (u4_constrained_intra_pred ? (ps_top_mb_syn_ele[-1].u1_is_intra &&
318
1.77M
                                             !ps_top_mb_syn_ele[-1].u1_base_mode_flag)
319
8.79M
                                          : 1));
320
9.61M
    if(u1_mb_d)
321
8.00M
    {
322
8.00M
        pu1_ngbr_pels_i16[16] = *pu1_mb_d;
323
8.00M
    }
324
1.60M
    else
325
1.60M
    {
326
1.60M
        pu1_ngbr_pels_i16[16] = 0;
327
1.60M
    }
328
329
9.61M
    i4_ngbr_avbl = (u1_mb_a) + (u1_mb_b << 2) + (u1_mb_d << 1);
330
9.61M
    ps_proc->i4_ngbr_avbl_16x16_mb = i4_ngbr_avbl;
331
332
    /* set valid intra modes for evaluation */
333
9.61M
    u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl];
334
335
9.61M
    if(ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST ||
336
9.61M
       ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST)
337
1.04M
        u4_valid_intra_modes &= ~(1 << PLANE_I16x16);
338
339
    /* evaluate b/w HORZ_I16x16, VERT_I16x16 & DC_I16x16 */
340
9.61M
    ps_codec->pf_ih264e_evaluate_intra16x16_modes(
341
9.61M
        pu1_curr_mb, pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16, i4_src_strd, i4_pred_strd,
342
9.61M
        i4_ngbr_avbl, &u4_intra_mode, &i4_mb_distortion_least, u4_valid_intra_modes);
343
344
    /* cost = distortion + lambda*rate */
345
9.61M
    i4_mb_cost_least = i4_mb_distortion_least;
346
347
9.61M
    if(((u4_valid_intra_modes >> 3) & 1) != 0)
348
7.11M
    {
349
        /* intra prediction for PLANE mode*/
350
7.11M
        (ps_codec->apf_intra_pred_16_l)[PLANE_I16x16](
351
7.11M
            pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16_plane, 0, i4_pred_strd, i4_ngbr_avbl);
352
353
        /* evaluate distortion between the actual blk and the estimated blk for the
354
         * given mode */
355
7.11M
        ps_codec->apf_compute_sad_16x16[u4_enable_fast_sad](
356
7.11M
            pu1_curr_mb, pu1_pred_mb_intra_16x16_plane, i4_src_strd, i4_pred_strd, i4_mb_cost_least,
357
7.11M
            &i4_mb_distortion);
358
359
        /* cost = distortion + lambda*rate */
360
7.11M
        i4_mb_cost = i4_mb_distortion;
361
362
        /* update the least cost information if necessary */
363
7.11M
        if(i4_mb_cost < i4_mb_distortion_least)
364
295k
        {
365
295k
            u4_intra_mode = PLANE_I16x16;
366
367
295k
            i4_mb_cost_least = i4_mb_cost;
368
295k
            i4_mb_distortion_least = i4_mb_distortion;
369
295k
        }
370
7.11M
    }
371
372
9.61M
    u4_best_intra_16x16_mode = u4_intra_mode;
373
374
9.61M
    DEBUG("%d partition cost, %d intra mode\n", i4_mb_cost_least * 32, u4_best_intra_16x16_mode);
375
376
9.61M
    ps_proc->u1_l_i16_mode = u4_best_intra_16x16_mode;
377
378
    /* cost = distortion + lambda*rate */
379
9.61M
    i4_mb_cost_least =
380
9.61M
        i4_mb_distortion_least + u4_lambda * u1_uev_codelength[offset + u4_best_intra_16x16_mode];
381
382
    /* update the type of the mb if necessary */
383
9.61M
    if(i4_mb_cost_least < ps_proc->i4_mb_cost)
384
5.61M
    {
385
5.61M
        ps_proc->i4_mb_cost = i4_mb_cost_least;
386
5.61M
        ps_proc->i4_mb_distortion = i4_mb_distortion_least;
387
5.61M
        ps_proc->ps_mb_info->u2_mb_type = I16x16;
388
5.61M
    }
389
9.61M
}
390
391
/**
392
******************************************************************************
393
*
394
* @brief
395
*  evaluate best intra 8x8 mode (rate distortion opt on)
396
*
397
* @par Description
398
*  This function evaluates all the possible intra 8x8 modes and finds the mode
399
*  that best represents the macro-block (least distortion) and occupies fewer
400
*  bits in the bit-stream.
401
*
402
* @param[in]    ps_proc_ctxt
403
*  pointer to proc ctxt
404
*
405
* @remarks Ideally the cost of encoding a macroblock is calculated as
406
*  (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
407
*  input block and the reconstructed block and rate is the number of bits taken
408
*  to place the macroblock in the bit-stream. In this routine the rate does not
409
*  exactly point to the total number of bits it takes, rather it points to
410
*header bits necessary for encoding the macroblock. Assuming the deltaQP, cbp
411
*bits and residual bits fall in to texture bits the number of bits taken to
412
*encoding mbtype is considered as rate, we compute cost. Further we will
413
*approximate the distortion as the deviation b/w input and the predicted block
414
*as opposed to input and reconstructed block.
415
*
416
*  NOTE: TODO: This function needs to be tested
417
*
418
*  @return      none
419
*
420
******************************************************************************
421
*/
422
void isvce_evaluate_intra8x8_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc)
423
0
{
424
    /* Codec Context */
425
0
    isvce_codec_t *ps_codec = ps_proc->ps_codec;
426
427
    /* SAD(distortion metric) of an 4x4 block */
428
0
    WORD32 i4_partition_distortion, i4_partition_distortion_least = INT_MAX,
429
0
                                    i4_total_distortion = 0;
430
431
    /* lambda */
432
0
    UWORD32 u4_lambda = ps_proc->u4_lambda;
433
434
    /* cost = distortion + lambda*rate */
435
0
    WORD32 i4_partition_cost, i4_partition_cost_least, i4_total_cost = u4_lambda;
436
437
    /* cost due to mbtype */
438
0
    UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
439
440
    /* intra mode */
441
0
    UWORD32 u4_intra_mode, u4_best_intra_8x8_mode = DC_I8x8, u4_estimated_intra_8x8_mode;
442
443
    /* neighbor pels for intra prediction */
444
0
    UWORD8 *pu1_ngbr_pels_i8 = ps_proc->au1_ngbr_pels;
445
446
    /* pointer to curr partition */
447
0
    UWORD8 *pu1_mb_curr;
448
449
    /* pointer to prediction macro block */
450
0
    UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
451
452
    /* strides */
453
0
    WORD32 i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[0].i4_data_stride;
454
0
    WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
455
456
    /* neighbors left, top, top right, top left */
457
0
    UWORD8 *pu1_mb_a;
458
0
    UWORD8 *pu1_mb_b;
459
0
    UWORD8 *pu1_mb_d;
460
461
    /* neighbor availability */
462
0
    WORD32 i4_ngbr_avbl;
463
0
    block_neighbors_t s_ngbr_avbl;
464
465
    /* temp vars */
466
0
    UWORD32 b8, u4_pix_x, u4_pix_y;
467
0
    UWORD32 u4_constrained_intra_pred =
468
0
        ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id];
469
0
    block_neighbors_t s_ngbr_avbl_MB;
470
471
    /* ngbr mb syntax information */
472
0
    UWORD8 *pu1_top_mb_intra_modes =
473
0
        ps_proc->s_nbr_info.ps_top_mb_intra_modes[ps_proc->i4_mb_x].au1_intra_modes;
474
0
    isvce_mb_info_t *ps_top_mb_syn_ele = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x;
475
0
    isvce_mb_info_t *ps_top_right_mb_syn_ele = ps_top_mb_syn_ele + 1;
476
    /* valid intra modes map */
477
0
    UWORD32 u4_valid_intra_modes;
478
479
0
    if(ps_proc->ps_ngbr_avbl->u1_mb_c)
480
0
    {
481
0
        ps_top_right_mb_syn_ele = ps_top_mb_syn_ele + 1;
482
0
    }
483
    /* left pels */
484
0
    s_ngbr_avbl_MB.u1_mb_a =
485
0
        ((ps_proc->ps_ngbr_avbl->u1_mb_a) &&
486
0
         (u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra &&
487
0
                                       !ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag)
488
0
                                    : 1));
489
490
    /* top pels */
491
0
    s_ngbr_avbl_MB.u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) &&
492
0
                              (u4_constrained_intra_pred ? (ps_top_mb_syn_ele->u1_is_intra &&
493
0
                                                            !ps_top_mb_syn_ele->u1_base_mode_flag)
494
0
                                                         : 1));
495
496
    /* topleft pels */
497
0
    s_ngbr_avbl_MB.u1_mb_d =
498
0
        ((ps_proc->ps_ngbr_avbl->u1_mb_d) &&
499
0
         (u4_constrained_intra_pred
500
0
              ? (ps_top_mb_syn_ele[-1].u1_is_intra && !ps_top_mb_syn_ele[-1].u1_base_mode_flag)
501
0
              : 1));
502
503
    /* top right */
504
0
    s_ngbr_avbl_MB.u1_mb_c =
505
0
        ((ps_proc->ps_ngbr_avbl->u1_mb_c) &&
506
0
         (u4_constrained_intra_pred ? (ps_top_right_mb_syn_ele->u1_is_intra &&
507
0
                                       !ps_top_right_mb_syn_ele->u1_base_mode_flag)
508
0
                                    : 1));
509
510
0
    for(b8 = 0; b8 < 4; b8++)
511
0
    {
512
0
        u4_pix_x = (b8 & 0x01) << 3;
513
0
        u4_pix_y = (b8 >> 1) << 3;
514
515
0
        pu1_mb_curr = ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[0].pv_data) +
516
0
                      u4_pix_x + (u4_pix_y * i4_src_strd);
517
        /* when rdopt is off, we use the input as reference for constructing
518
         * prediction buffer */
519
        /* as opposed to using the recon pels. (open loop intra prediction) */
520
0
        pu1_mb_a = pu1_mb_curr - 1;           /* pointer to left macro block */
521
0
        pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */
522
0
        pu1_mb_d = pu1_mb_b - 1;              /* pointer to top left macro block */
523
524
        /* locating neighbors that are available for prediction */
525
        /* TODO : update the neighbor availability information basing on constrained
526
         * intra pred information */
527
        /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be
528
         * split in to distinct routines */
529
        /* basing on neighbors available and hence evade the computation of neighbor
530
         * availability totally. */
531
0
        s_ngbr_avbl.u1_mb_a = isvce_derive_ngbr_avbl_of_mb_partitions(
532
0
            &s_ngbr_avbl_MB, u4_pix_x - 1, u4_pix_y); /* xD = -1, yD = 0 */
533
0
        s_ngbr_avbl.u1_mb_b = isvce_derive_ngbr_avbl_of_mb_partitions(
534
0
            &s_ngbr_avbl_MB, u4_pix_x, u4_pix_y - 1); /* xD = 0, yD = -1 */
535
0
        s_ngbr_avbl.u1_mb_c = isvce_derive_ngbr_avbl_of_mb_partitions(
536
0
            &s_ngbr_avbl_MB, u4_pix_x + 8, u4_pix_y - 1); /* xD = BLK_8x8_SIZE, yD = -1 */
537
0
        s_ngbr_avbl.u1_mb_d = isvce_derive_ngbr_avbl_of_mb_partitions(
538
0
            &s_ngbr_avbl_MB, u4_pix_x - 1, u4_pix_y - 1); /* xD = -1, yD = -1 */
539
540
        /* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b *
541
         * TOP_MB_AVAILABLE_MASK + blk_c * TOP_RIGHT_MB_AVAILABLE_MASK + blk_d *
542
         * TOP_LEFT_MB_AVAILABLE_MASK */
543
0
        i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) +
544
0
                       (s_ngbr_avbl.u1_mb_b << 2) + (s_ngbr_avbl.u1_mb_c << 3) +
545
0
                       (s_ngbr_avbl.u1_mb_a << 4);
546
        /* if top partition is available and top right is not available for intra
547
         * prediction, then */
548
        /* padd top right samples using top sample and make top right also available
549
         */
550
        /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) +
551
         * (s_ngbr_avbl.u1_mb_b << 2) +  ((s_ngbr_avbl.u1_mb_b |
552
         * s_ngbr_avbl.u1_mb_c) << 3); */
553
0
        ps_proc->ai4_neighbor_avail_8x8_subblks[b8] = i4_ngbr_avbl;
554
555
0
        ih264_intra_pred_luma_8x8_mode_ref_filtering(pu1_mb_a, pu1_mb_b, pu1_mb_d, pu1_ngbr_pels_i8,
556
0
                                                     i4_src_strd, i4_ngbr_avbl);
557
558
0
        i4_partition_cost_least = INT_MAX;
559
        /* set valid intra modes for evaluation */
560
0
        u4_valid_intra_modes = 0x1ff;
561
562
0
        if(!s_ngbr_avbl.u1_mb_b)
563
0
        {
564
0
            u4_valid_intra_modes &= ~(1 << VERT_I4x4);
565
0
            u4_valid_intra_modes &= ~(1 << DIAG_DL_I4x4);
566
0
            u4_valid_intra_modes &= ~(1 << VERT_L_I4x4);
567
0
        }
568
0
        if(!s_ngbr_avbl.u1_mb_a)
569
0
        {
570
0
            u4_valid_intra_modes &= ~(1 << HORZ_I4x4);
571
0
            u4_valid_intra_modes &= ~(1 << HORZ_U_I4x4);
572
0
        }
573
0
        if(!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b || !s_ngbr_avbl.u1_mb_d)
574
0
        {
575
0
            u4_valid_intra_modes &= ~(1 << DIAG_DR_I4x4);
576
0
            u4_valid_intra_modes &= ~(1 << VERT_R_I4x4);
577
0
            u4_valid_intra_modes &= ~(1 << HORZ_D_I4x4);
578
0
        }
579
580
        /* estimate the intra 8x8 mode for the current partition (for evaluating
581
         * cost) */
582
0
        if(!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
583
0
        {
584
0
            u4_estimated_intra_8x8_mode = DC_I8x8;
585
0
        }
586
0
        else
587
0
        {
588
0
            UWORD32 u4_left_intra_8x8_mode = DC_I8x8;
589
0
            UWORD32 u4_top_intra_8x8_mode = DC_I8x8;
590
591
0
            if(u4_pix_x == 0)
592
0
            {
593
0
                if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I8x8)
594
0
                {
595
0
                    u4_left_intra_8x8_mode =
596
0
                        ps_proc->s_nbr_info.ps_left_mb_intra_modes->au1_intra_modes[b8 + 1];
597
0
                }
598
0
                else if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I4x4)
599
0
                {
600
0
                    u4_left_intra_8x8_mode = ps_proc->s_nbr_info.ps_left_mb_intra_modes
601
0
                                                 ->au1_intra_modes[(b8 + 1) * 4 + 2];
602
0
                }
603
0
            }
604
0
            else
605
0
            {
606
0
                u4_left_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8 - 1];
607
0
            }
608
609
0
            if(u4_pix_y == 0)
610
0
            {
611
0
                if(ps_top_mb_syn_ele->u2_mb_type == I8x8)
612
0
                {
613
0
                    u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[b8 + 2];
614
0
                }
615
0
                else if(ps_top_mb_syn_ele->u2_mb_type == I4x4)
616
0
                {
617
0
                    u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[(b8 + 2) * 4 + 2];
618
0
                }
619
0
            }
620
0
            else
621
0
            {
622
0
                u4_top_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8 - 2];
623
0
            }
624
625
0
            u4_estimated_intra_8x8_mode = MIN(u4_left_intra_8x8_mode, u4_top_intra_8x8_mode);
626
0
        }
627
628
        /* perform intra mode 8x8 evaluation */
629
0
        for(u4_intra_mode = VERT_I8x8; u4_valid_intra_modes != 0;
630
0
            u4_intra_mode++, u4_valid_intra_modes >>= 1)
631
0
        {
632
0
            if((u4_valid_intra_modes & 1) == 0) continue;
633
634
            /* intra prediction */
635
0
            (ps_codec->apf_intra_pred_8_l)[u4_intra_mode](pu1_ngbr_pels_i8, pu1_pred_mb, 0,
636
0
                                                          i4_pred_strd, i4_ngbr_avbl);
637
638
            /* evaluate distortion between the actual blk and the estimated blk for
639
             * the given mode */
640
0
            ime_compute_sad_8x8(pu1_mb_curr, pu1_pred_mb, i4_src_strd, i4_pred_strd,
641
0
                                i4_partition_cost_least, &i4_partition_distortion);
642
643
0
            i4_partition_cost =
644
0
                i4_partition_distortion + ((u4_estimated_intra_8x8_mode == u4_intra_mode)
645
0
                                               ? u4_cost_one_bit
646
0
                                               : u4_cost_four_bits);
647
648
            /* update the least cost information if necessary */
649
0
            if(i4_partition_cost < i4_partition_cost_least)
650
0
            {
651
0
                i4_partition_cost_least = i4_partition_cost;
652
0
                i4_partition_distortion_least = i4_partition_distortion;
653
0
                u4_best_intra_8x8_mode = u4_intra_mode;
654
0
            }
655
0
        }
656
        /* macroblock distortion */
657
0
        i4_total_cost += i4_partition_cost_least;
658
0
        i4_total_distortion += i4_partition_distortion_least;
659
        /* mb partition mode */
660
0
        ps_proc->au1_intra_luma_mb_8x8_modes[b8] = u4_best_intra_8x8_mode;
661
0
    }
662
663
    /* update the type of the mb if necessary */
664
0
    if(i4_total_cost < ps_proc->i4_mb_cost)
665
0
    {
666
0
        ps_proc->i4_mb_cost = i4_total_cost;
667
0
        ps_proc->i4_mb_distortion = i4_total_distortion;
668
0
        ps_proc->ps_mb_info->u2_mb_type = I8x8;
669
0
    }
670
0
}
671
672
/**
673
******************************************************************************
674
*
675
* @brief
676
*  evaluate best intra 4x4 mode (rate distortion opt off)
677
*
678
* @par Description
679
*  This function evaluates all the possible intra 4x4 modes and finds the mode
680
*  that best represents the macro-block (least distortion) and occupies fewer
681
*  bits in the bit-stream.
682
*
683
* @param[in]    ps_proc_ctxt
684
*  pointer to proc ctxt
685
*
686
* @remarks
687
*  Ideally the cost of encoding a macroblock is calculated as
688
*  (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
689
*  input block and the reconstructed block and rate is the number of bits taken
690
*  to place the macroblock in the bit-stream. In this routine the rate does not
691
*  exactly point to the total number of bits it takes, rather it points to
692
*header bits necessary for encoding the macroblock. Assuming the deltaQP, cbp
693
*bits and residual bits fall in to texture bits the number of bits taken to
694
*encoding mbtype is considered as rate, we compute cost. Further we will
695
*approximate the distortion as the deviation b/w input and the predicted block
696
*as opposed to input and reconstructed block.
697
*
698
*  NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
699
*  24*lambda is added to the SAD before comparison with the best SAD for
700
*  inter prediction. This is an empirical value to prevent using too many intra
701
*  blocks.
702
*
703
* @return      none
704
*
705
******************************************************************************
706
*/
707
void isvce_evaluate_intra4x4_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc)
708
872k
{
709
    /* Codec Context */
710
872k
    isvce_codec_t *ps_codec = ps_proc->ps_codec;
711
712
    /* SAD(distortion metric) of an 4x4 block */
713
872k
    WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
714
715
    /* lambda */
716
872k
    UWORD32 u4_lambda = ps_proc->u4_lambda;
717
718
    /* cost = distortion + lambda*rate */
719
872k
    WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda;
720
721
    /* cost due to mbtype */
722
872k
    UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
723
724
    /* intra mode */
725
872k
    UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode;
726
727
    /* neighbor pels for intra prediction */
728
872k
    UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
729
730
    /* pointer to curr partition */
731
872k
    UWORD8 *pu1_mb_curr;
732
733
    /* pointer to prediction macro block */
734
872k
    UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
735
736
    /* strides */
737
872k
    WORD32 i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[0].i4_data_stride;
738
872k
    WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
739
740
    /* neighbors left, top, top right, top left */
741
872k
    UWORD8 *pu1_mb_a;
742
872k
    UWORD8 *pu1_mb_b;
743
872k
    UWORD8 *pu1_mb_c;
744
872k
    UWORD8 *pu1_mb_d;
745
746
    /* neighbor availability */
747
872k
    WORD32 i4_ngbr_avbl;
748
872k
    block_neighbors_t s_ngbr_avbl;
749
750
    /* temp vars */
751
872k
    UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y;
752
753
    /* ngbr sub mb modes */
754
872k
    UWORD8 *pu1_top_mb_intra_modes =
755
872k
        ps_proc->s_nbr_info.ps_top_mb_intra_modes[ps_proc->i4_mb_x].au1_intra_modes;
756
872k
    isvce_mb_info_t *ps_top_mb_syn_ele = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x;
757
872k
    isvce_mb_info_t *ps_top_right_mb_syn_ele = ps_top_mb_syn_ele + 1;
758
759
    /* valid intra modes map */
760
872k
    UWORD32 u4_valid_intra_modes;
761
872k
    UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511};
762
763
872k
    UWORD32 u4_constrained_intra_pred =
764
872k
        ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id];
765
872k
    UWORD8 u1_mb_a, u1_mb_b, u1_mb_c, u1_mb_d;
766
872k
    if(ps_proc->ps_ngbr_avbl->u1_mb_c)
767
766k
    {
768
766k
        ps_top_right_mb_syn_ele = ps_top_mb_syn_ele + 1;
769
766k
    }
770
    /* left pels */
771
872k
    u1_mb_a =
772
872k
        ((ps_proc->ps_ngbr_avbl->u1_mb_a) &&
773
872k
         (u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra &&
774
242k
                                       !ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag)
775
817k
                                    : 1));
776
777
    /* top pels */
778
872k
    u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) &&
779
872k
               (u4_constrained_intra_pred
780
789k
                    ? (ps_top_mb_syn_ele->u1_is_intra && !ps_top_mb_syn_ele->u1_base_mode_flag)
781
789k
                    : 1));
782
783
    /* topleft pels */
784
872k
    u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) &&
785
872k
               (u4_constrained_intra_pred ? (ps_top_mb_syn_ele[-1].u1_is_intra &&
786
210k
                                             !ps_top_mb_syn_ele[-1].u1_base_mode_flag)
787
750k
                                          : 1));
788
789
    /* top right */
790
872k
    u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c) &&
791
872k
               (u4_constrained_intra_pred ? (ps_top_right_mb_syn_ele->u1_is_intra &&
792
216k
                                             !ps_top_right_mb_syn_ele->u1_base_mode_flag)
793
765k
                                          : 1));
794
795
872k
    i4_ngbr_avbl = (u1_mb_a) + (u1_mb_d << 1) + (u1_mb_b << 2) + (u1_mb_c << 3);
796
872k
    memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16);
797
798
4.18M
    for(b8 = 0; b8 < 4; b8++)
799
3.30M
    {
800
3.30M
        u4_blk_x = (b8 & 0x01) << 3;
801
3.30M
        u4_blk_y = (b8 >> 1) << 3;
802
15.1M
        for(b4 = 0; b4 < 4; b4++)
803
11.8M
        {
804
11.8M
            u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2);
805
11.8M
            u4_pix_y = u4_blk_y + ((b4 >> 1) << 2);
806
807
11.8M
            pu1_mb_curr = ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[0].pv_data) +
808
11.8M
                          u4_pix_x + (u4_pix_y * i4_src_strd);
809
            /* when rdopt is off, we use the input as reference for constructing
810
             * prediction buffer */
811
            /* as opposed to using the recon pels. (open loop intra prediction) */
812
11.8M
            pu1_mb_a = pu1_mb_curr - 1;           /* pointer to left macro block */
813
11.8M
            pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */
814
11.8M
            pu1_mb_c = pu1_mb_b + 4;              /* pointer to top macro block */
815
11.8M
            pu1_mb_d = pu1_mb_b - 1;              /* pointer to top left macro block */
816
817
            /* locating neighbors that are available for prediction */
818
            /* TODO : update the neighbor availability information basing on
819
             * constrained intra pred information */
820
            /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be
821
             * split in to distinct routines */
822
            /* basing on neighbors available and hence evade the computation of
823
             * neighbor availability totally. */
824
825
11.8M
            i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
826
11.8M
            s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1);
827
11.8M
            s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1;
828
11.8M
            s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2;
829
11.8M
            s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3;
830
            /* set valid intra modes for evaluation */
831
11.8M
            u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7];
832
833
            /* if top partition is available and top right is not available for intra
834
             * prediction, then */
835
            /* padd top right samples using top sample and make top right also
836
             * available */
837
            /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) +
838
             * (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b |
839
             * s_ngbr_avbl.u1_mb_c) << 3); */
840
841
            /* gather prediction pels from the neighbors */
842
11.8M
            if(s_ngbr_avbl.u1_mb_a)
843
11.4M
            {
844
57.2M
                for(i = 0; i < 4; i++) pu1_ngbr_pels_i4[4 - 1 - i] = pu1_mb_a[i * i4_src_strd];
845
11.4M
            }
846
406k
            else
847
406k
            {
848
406k
                memset(pu1_ngbr_pels_i4, 0, 4);
849
406k
            }
850
851
11.8M
            if(s_ngbr_avbl.u1_mb_b)
852
11.5M
            {
853
11.5M
                memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
854
11.5M
            }
855
315k
            else
856
315k
            {
857
315k
                memset(pu1_ngbr_pels_i4 + 5, 0, 4);
858
315k
            }
859
860
11.8M
            if(s_ngbr_avbl.u1_mb_d)
861
11.2M
                pu1_ngbr_pels_i4[4] = *pu1_mb_d;
862
631k
            else
863
631k
                pu1_ngbr_pels_i4[4] = 0;
864
865
11.8M
            if(s_ngbr_avbl.u1_mb_c)
866
8.11M
            {
867
8.11M
                memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4);
868
8.11M
            }
869
3.73M
            else if(s_ngbr_avbl.u1_mb_b)
870
4.14M
            {
871
4.14M
                memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4);
872
4.14M
                s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b;
873
4.14M
            }
874
875
11.8M
            i4_partition_cost_least = INT_MAX;
876
877
            /* predict the intra 4x4 mode for the current partition (for evaluating
878
             * cost) */
879
11.8M
            if(!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
880
952k
            {
881
952k
                u4_estimated_intra_4x4_mode = DC_I4x4;
882
952k
            }
883
10.8M
            else
884
10.8M
            {
885
10.8M
                UWORD32 u4_left_intra_4x4_mode = DC_I4x4;
886
10.8M
                UWORD32 u4_top_intra_4x4_mode = DC_I4x4;
887
888
10.8M
                if(u4_pix_x == 0)
889
2.85M
                {
890
2.85M
                    if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I4x4)
891
2.05M
                    {
892
2.05M
                        u4_left_intra_4x4_mode =
893
2.05M
                            ps_proc->s_nbr_info.ps_left_mb_intra_modes
894
2.05M
                                ->au1_intra_modes[gau1_raster_to_zscan_map[3 + u4_pix_y]];
895
2.05M
                    }
896
796k
                    else if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I8x8)
897
0
                    {
898
0
                        u4_left_intra_4x4_mode =
899
0
                            ps_proc->s_nbr_info.ps_left_mb_intra_modes->au1_intra_modes[b8 + 1];
900
0
                    }
901
2.85M
                }
902
8.04M
                else
903
8.04M
                {
904
8.04M
                    u4_left_intra_4x4_mode =
905
8.04M
                        ps_proc->au1_intra_luma_mb_4x4_modes
906
8.04M
                            [gau1_raster_to_zscan_map[(u4_pix_x >> 2) + u4_pix_y - 1]];
907
8.04M
                }
908
909
10.8M
                if(u4_pix_y == 0)
910
2.77M
                {
911
2.77M
                    if(ps_top_mb_syn_ele->u2_mb_type == I4x4)
912
1.99M
                    {
913
1.99M
                        u4_top_intra_4x4_mode =
914
1.99M
                            pu1_top_mb_intra_modes[gau1_raster_to_zscan_map[12 + (u4_pix_x >> 2)]];
915
1.99M
                    }
916
778k
                    else if(ps_top_mb_syn_ele->u2_mb_type == I8x8)
917
0
                    {
918
0
                        u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2];
919
0
                    }
920
2.77M
                }
921
8.11M
                else
922
8.11M
                {
923
8.11M
                    u4_top_intra_4x4_mode =
924
8.11M
                        ps_proc->au1_intra_luma_mb_4x4_modes
925
8.11M
                            [gau1_raster_to_zscan_map[(u4_pix_x >> 2) + u4_pix_y - 4]];
926
8.11M
                }
927
928
10.8M
                u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode);
929
10.8M
            }
930
931
11.8M
            ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] =
932
11.8M
                u4_estimated_intra_4x4_mode;
933
934
            /* mode evaluation and prediction */
935
11.8M
            ps_codec->pf_ih264e_evaluate_intra_4x4_modes(
936
11.8M
                pu1_mb_curr, pu1_ngbr_pels_i4, pu1_pred_mb, i4_src_strd, i4_pred_strd, i4_ngbr_avbl,
937
11.8M
                &u4_best_intra_4x4_mode, &i4_partition_cost_least, u4_valid_intra_modes, u4_lambda,
938
11.8M
                u4_estimated_intra_4x4_mode);
939
940
11.8M
            i4_partition_distortion_least =
941
11.8M
                i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode)
942
11.8M
                                               ? u4_cost_one_bit
943
11.8M
                                               : u4_cost_four_bits);
944
945
11.8M
            DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least,
946
11.8M
                  u4_best_intra_4x4_mode);
947
            /* macroblock distortion */
948
11.8M
            i4_total_distortion += i4_partition_distortion_least;
949
11.8M
            i4_total_cost += i4_partition_cost_least;
950
            /* mb partition mode */
951
11.8M
            ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode;
952
11.8M
        }
953
3.30M
    }
954
955
    /* update the type of the mb if necessary */
956
872k
    if(i4_total_cost < ps_proc->i4_mb_cost)
957
582k
    {
958
582k
        ps_proc->i4_mb_cost = i4_total_cost;
959
582k
        ps_proc->i4_mb_distortion = i4_total_distortion;
960
582k
        ps_proc->ps_mb_info->u2_mb_type = I4x4;
961
582k
    }
962
872k
}
963
964
/**
965
******************************************************************************
966
*
967
* @brief evaluate best intra 4x4 mode (rate distortion opt on)
968
*
969
* @par Description
970
*  This function evaluates all the possible intra 4x4 modes and finds the mode
971
*  that best represents the macro-block (least distortion) and occupies fewer
972
*  bits in the bit-stream.
973
*
974
* @param[in]    ps_proc_ctxt
975
*  pointer to proc ctxt
976
*
977
* @remarks
978
*  Ideally the cost of encoding a macroblock is calculated as
979
*  (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
980
*  input block and the reconstructed block and rate is the number of bits taken
981
*  to place the macroblock in the bit-stream. In this routine the rate does not
982
*  exactly point to the total number of bits it takes, rather it points to
983
*header bits necessary for encoding the macroblock. Assuming the deltaQP, cbp
984
*bits and residual bits fall in to texture bits the number of bits taken to
985
*encoding mbtype is considered as rate, we compute cost. Further we will
986
*approximate the distortion as the deviation b/w input and the predicted block
987
*as opposed to input and reconstructed block.
988
*
989
*  NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
990
*  24*lambda is added to the SAD before comparison with the best SAD for
991
*  inter prediction. This is an empirical value to prevent using too many intra
992
*  blocks.
993
*
994
* @return      none
995
*
996
******************************************************************************
997
*/
998
void isvce_evaluate_intra4x4_modes_for_least_cost_rdopton(isvce_process_ctxt_t *ps_proc)
999
329k
{
1000
329k
    block_neighbors_t s_ngbr_avbl;
1001
329k
    buffer_container_t s_src;
1002
329k
    buffer_container_t s_pred;
1003
329k
    buffer_container_t s_recon;
1004
329k
    buffer_container_t s_quant_coeffs;
1005
329k
    buffer_container_t s_res_pred;
1006
1007
    /* neighbors left, top, top right, top left */
1008
329k
    UWORD8 *pu1_mb_a;
1009
329k
    UWORD8 *pu1_mb_b;
1010
329k
    UWORD8 *pu1_mb_c;
1011
329k
    UWORD8 *pu1_mb_d;
1012
329k
    UWORD8 *pu1_mb_curr;
1013
329k
    UWORD8 *pu1_mb_ref_left, *pu1_mb_ref_top;
1014
329k
    UWORD8 *pu1_ref_mb_intra_4x4;
1015
329k
    WORD32 i4_ref_strd_left, i4_ref_strd_top;
1016
329k
    WORD32 i4_ngbr_avbl;
1017
329k
    UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y;
1018
    /* valid intra modes map */
1019
329k
    UWORD32 u4_valid_intra_modes;
1020
    /* Dummy variable for 4x4 trans function */
1021
329k
    WORD16 i2_dc_dummy;
1022
329k
    UWORD8 u1_mb_a, u1_mb_b, u1_mb_c, u1_mb_d;
1023
1024
329k
    isvce_codec_t *ps_codec = ps_proc->ps_codec;
1025
329k
    quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1026
329k
    isvce_mb_info_t *ps_top_mb = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x;
1027
329k
    isvce_mb_info_t *ps_top_right_mb = ps_top_mb + 1;
1028
329k
    isvce_mb_info_t *ps_top_left_mb = ps_top_mb - 1;
1029
329k
    isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
1030
329k
    enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns;
1031
329k
    resi_trans_quant_constants_t s_resi_trans_quant_constants = {
1032
329k
        .pu2_scale_matrix = ps_qp_params->pu2_scale_mat,
1033
329k
        .pu2_threshold_matrix = ps_qp_params->pu2_thres_mat,
1034
329k
        .u4_qbits = ps_qp_params->u1_qbits,
1035
329k
        .u4_round_factor = ps_qp_params->u4_dead_zone};
1036
329k
    iq_it_res_rec_constants_t s_iq_it_res_rec_constants = {
1037
329k
        .pu2_iscal_mat = ps_qp_params->pu2_iscale_mat,
1038
329k
        .pu2_weigh_mat = ps_qp_params->pu2_weigh_mat,
1039
329k
        .u4_qp_div_6 = ps_qp_params->u1_qp_div};
1040
1041
329k
    const UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511};
1042
329k
    WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
1043
329k
    UWORD32 u4_lambda = ps_proc->u4_lambda;
1044
329k
    WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda;
1045
    /* cost due to mbtype */
1046
329k
    UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
1047
329k
    UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode;
1048
329k
    UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
1049
329k
    WORD16 *pi2_quant_coeffs = ps_proc->pi2_res_buf_intra_4x4;
1050
329k
    UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
1051
329k
    WORD32 i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[0].i4_data_stride;
1052
329k
    WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
1053
329k
    UWORD8 *pu1_nnz = (UWORD8 *) ps_proc->au4_nnz_intra_4x4;
1054
329k
    UWORD8 *pu1_top_mb_intra_modes =
1055
329k
        ps_proc->s_nbr_info.ps_top_mb_intra_modes[ps_proc->i4_mb_x].au1_intra_modes;
1056
329k
    UWORD32 u4_constrained_intra_pred =
1057
329k
        ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id];
1058
329k
    UWORD8 u1_resi_trans_fxn_idx = isvc_get_resi_trans_quant_variant_idx(0);
1059
329k
    UWORD8 u1_iq_it_recon_fxn_idx = isvc_get_iq_it_recon_variant_idx(1, 0);
1060
1061
329k
    s_res_pred = ps_proc->ps_mb_res_buf->as_component_bufs[Y];
1062
1063
    /* compute ngbr availability for sub blks */
1064
329k
    if(ps_proc->ps_ngbr_avbl->u1_mb_c)
1065
269k
    {
1066
269k
        ps_top_right_mb = ps_top_mb + 1;
1067
269k
    }
1068
1069
    /* left pels */
1070
329k
    u1_mb_a =
1071
329k
        ((ps_proc->ps_ngbr_avbl->u1_mb_a) &&
1072
329k
         (u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra &&
1073
207k
                                       !ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag)
1074
299k
                                    : 1));
1075
1076
    /* top pels */
1077
329k
    u1_mb_b =
1078
329k
        ((ps_proc->ps_ngbr_avbl->u1_mb_b) &&
1079
329k
         (u4_constrained_intra_pred ? (ps_top_mb->u1_is_intra && !ps_top_mb->u1_base_mode_flag)
1080
278k
                                    : 1));
1081
1082
    /* topleft pels */
1083
329k
    u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) &&
1084
329k
               (u4_constrained_intra_pred
1085
256k
                    ? (ps_top_left_mb->u1_is_intra && !ps_top_left_mb->u1_base_mode_flag)
1086
256k
                    : 1));
1087
1088
    /* top right pels */
1089
329k
    u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c) &&
1090
329k
               (u4_constrained_intra_pred
1091
269k
                    ? (ps_top_right_mb->u1_is_intra && !ps_top_right_mb->u1_base_mode_flag)
1092
269k
                    : 1));
1093
1094
329k
    i4_ngbr_avbl = (u1_mb_a) + (u1_mb_d << 1) + (u1_mb_b << 2) + (u1_mb_c << 3);
1095
329k
    memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16);
1096
1097
1.62M
    for(b8 = 0; b8 < 4; b8++)
1098
1.29M
    {
1099
1.29M
        u4_blk_x = (b8 & 0x01) << 3;
1100
1.29M
        u4_blk_y = (b8 >> 1) << 3;
1101
6.20M
        for(b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_quant_coeffs += MB_SIZE)
1102
4.91M
        {
1103
4.91M
            u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2);
1104
4.91M
            u4_pix_y = u4_blk_y + ((b4 >> 1) << 2);
1105
1106
4.91M
            pu1_ref_mb_intra_4x4 =
1107
4.91M
                ps_proc->pu1_ref_mb_intra_4x4 + u4_pix_x + (u4_pix_y * i4_pred_strd);
1108
4.91M
            pu1_mb_curr = ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[0].pv_data) +
1109
4.91M
                          u4_pix_x + (u4_pix_y * i4_src_strd);
1110
4.91M
            pu1_pred_mb = ps_proc->pu1_pred_mb + u4_pix_x + (u4_pix_y * i4_pred_strd);
1111
4.91M
            if(u4_pix_x == 0)
1112
1.29M
            {
1113
1.29M
                i4_ref_strd_left = ps_proc->s_rec_buf_props.as_component_bufs[0].i4_data_stride;
1114
1.29M
                pu1_mb_ref_left =
1115
1.29M
                    ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data) + u4_pix_x +
1116
1.29M
                    (u4_pix_y * i4_ref_strd_left);
1117
1.29M
            }
1118
3.61M
            else
1119
3.61M
            {
1120
3.61M
                i4_ref_strd_left = i4_pred_strd;
1121
3.61M
                pu1_mb_ref_left = pu1_ref_mb_intra_4x4;
1122
3.61M
            }
1123
4.91M
            if(u4_pix_y == 0)
1124
1.29M
            {
1125
1.29M
                i4_ref_strd_top = ps_proc->s_rec_buf_props.as_component_bufs[0].i4_data_stride;
1126
1.29M
                pu1_mb_ref_top =
1127
1.29M
                    ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data) + u4_pix_x +
1128
1.29M
                    (u4_pix_y * i4_ref_strd_top);
1129
1.29M
            }
1130
3.61M
            else
1131
3.61M
            {
1132
3.61M
                i4_ref_strd_top = i4_pred_strd;
1133
3.61M
                pu1_mb_ref_top = pu1_ref_mb_intra_4x4;
1134
3.61M
            }
1135
1136
4.91M
            pu1_mb_a = pu1_mb_ref_left - 1;              /* pointer to left macro block */
1137
4.91M
            pu1_mb_b = pu1_mb_ref_top - i4_ref_strd_top; /* pointer to top macro block */
1138
4.91M
            pu1_mb_c = pu1_mb_b + 4;                     /* pointer to top right macro block */
1139
4.91M
            if(u4_pix_y == 0)
1140
1.29M
                pu1_mb_d = pu1_mb_b - 1;
1141
3.61M
            else
1142
3.61M
                pu1_mb_d = pu1_mb_a - i4_ref_strd_left; /* pointer to top left macro block */
1143
1144
            /* locating neighbors that are available for prediction */
1145
            /* TODO : update the neighbor availability information basing on
1146
             * constrained intra pred information */
1147
            /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be
1148
             * split in to distinct routines */
1149
            /* basing on neighbors available and hence evade the computation of
1150
             * neighbor availability totally. */
1151
1152
4.91M
            i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
1153
4.91M
            s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1);
1154
4.91M
            s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1;
1155
4.91M
            s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2;
1156
4.91M
            s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3;
1157
            /* set valid intra modes for evaluation */
1158
4.91M
            u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7];
1159
1160
            /* if top partition is available and top right is not available for intra
1161
             * prediction, then */
1162
            /* padd top right samples using top sample and make top right also
1163
             * available */
1164
            /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) +
1165
             * (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b |
1166
             * s_ngbr_avbl.u1_mb_c) << 3); */
1167
1168
            /* gather prediction pels from the neighbors */
1169
4.91M
            if(s_ngbr_avbl.u1_mb_a)
1170
4.32M
            {
1171
21.6M
                for(i = 0; i < 4; i++) pu1_ngbr_pels_i4[4 - 1 - i] = pu1_mb_a[i * i4_ref_strd_left];
1172
4.32M
            }
1173
584k
            else
1174
584k
            {
1175
584k
                memset(pu1_ngbr_pels_i4, 0, 4);
1176
584k
            }
1177
4.91M
            if(s_ngbr_avbl.u1_mb_b)
1178
4.25M
            {
1179
4.25M
                memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
1180
4.25M
            }
1181
657k
            else
1182
657k
            {
1183
657k
                memset(pu1_ngbr_pels_i4 + 4 + 1, 0, 4);
1184
657k
            }
1185
4.91M
            if(s_ngbr_avbl.u1_mb_d)
1186
3.78M
                pu1_ngbr_pels_i4[4] = *pu1_mb_d;
1187
1.12M
            else
1188
1.12M
                pu1_ngbr_pels_i4[4] = 0;
1189
4.91M
            if(s_ngbr_avbl.u1_mb_c)
1190
2.75M
            {
1191
2.75M
                memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4);
1192
2.75M
            }
1193
2.15M
            else if(s_ngbr_avbl.u1_mb_b)
1194
1.61M
            {
1195
1.61M
                memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4);
1196
1.61M
                s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b;
1197
1.61M
            }
1198
1199
4.91M
            i4_partition_cost_least = INT_MAX;
1200
1201
            /* predict the intra 4x4 mode for the current partition (for evaluating
1202
             * cost) */
1203
4.91M
            if(!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
1204
1.24M
            {
1205
1.24M
                u4_estimated_intra_4x4_mode = DC_I4x4;
1206
1.24M
            }
1207
3.66M
            else
1208
3.66M
            {
1209
3.66M
                UWORD32 u4_left_intra_4x4_mode = DC_I4x4;
1210
3.66M
                UWORD32 u4_top_intra_4x4_mode = DC_I4x4;
1211
1212
3.66M
                if(u4_pix_x == 0)
1213
604k
                {
1214
604k
                    if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I4x4)
1215
204k
                    {
1216
204k
                        u4_left_intra_4x4_mode =
1217
204k
                            ps_proc->s_nbr_info.ps_left_mb_intra_modes
1218
204k
                                ->au1_intra_modes[gau1_raster_to_zscan_map[3 + u4_pix_y]];
1219
204k
                    }
1220
399k
                    else if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I8x8)
1221
0
                    {
1222
0
                        u4_left_intra_4x4_mode =
1223
0
                            ps_proc->s_nbr_info.ps_left_mb_intra_modes->au1_intra_modes[b8 + 1];
1224
0
                    }
1225
604k
                }
1226
3.05M
                else
1227
3.05M
                {
1228
3.05M
                    u4_left_intra_4x4_mode =
1229
3.05M
                        ps_proc->au1_intra_luma_mb_4x4_modes
1230
3.05M
                            [gau1_raster_to_zscan_map[(u4_pix_x >> 2) + u4_pix_y - 1]];
1231
3.05M
                }
1232
1233
3.66M
                if(u4_pix_y == 0)
1234
541k
                {
1235
541k
                    if(ps_top_mb->u2_mb_type == I4x4)
1236
141k
                    {
1237
141k
                        u4_top_intra_4x4_mode =
1238
141k
                            pu1_top_mb_intra_modes[gau1_raster_to_zscan_map[12 + (u4_pix_x >> 2)]];
1239
141k
                    }
1240
400k
                    else if(ps_top_mb->u2_mb_type == I8x8)
1241
0
                    {
1242
0
                        u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2];
1243
0
                    }
1244
541k
                }
1245
3.12M
                else
1246
3.12M
                {
1247
3.12M
                    u4_top_intra_4x4_mode =
1248
3.12M
                        ps_proc->au1_intra_luma_mb_4x4_modes
1249
3.12M
                            [gau1_raster_to_zscan_map[(u4_pix_x >> 2) + u4_pix_y - 4]];
1250
3.12M
                }
1251
1252
3.66M
                u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode);
1253
3.66M
            }
1254
1255
4.91M
            ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] =
1256
4.91M
                u4_estimated_intra_4x4_mode;
1257
1258
            /*mode evaluation and prediction*/
1259
4.91M
            ps_codec->pf_ih264e_evaluate_intra_4x4_modes(
1260
4.91M
                pu1_mb_curr, pu1_ngbr_pels_i4, pu1_pred_mb, i4_src_strd, i4_pred_strd, i4_ngbr_avbl,
1261
4.91M
                &u4_best_intra_4x4_mode, &i4_partition_cost_least, u4_valid_intra_modes, u4_lambda,
1262
4.91M
                u4_estimated_intra_4x4_mode);
1263
1264
4.91M
            i4_partition_distortion_least =
1265
4.91M
                i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode)
1266
4.91M
                                               ? u4_cost_one_bit
1267
4.91M
                                               : u4_cost_four_bits);
1268
1269
4.91M
            DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least,
1270
4.91M
                  u4_best_intra_4x4_mode);
1271
1272
            /* macroblock distortion */
1273
4.91M
            i4_total_distortion += i4_partition_distortion_least;
1274
4.91M
            i4_total_cost += i4_partition_cost_least;
1275
1276
            /* mb partition mode */
1277
4.91M
            ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode;
1278
1279
            /********************************************************/
1280
            /*  error estimation,                                   */
1281
            /*  transform                                           */
1282
            /*  quantization                                        */
1283
            /********************************************************/
1284
4.91M
            s_src.pv_data = pu1_mb_curr;
1285
4.91M
            s_src.i4_data_stride = i4_src_strd;
1286
1287
4.91M
            s_pred.pv_data = pu1_pred_mb;
1288
4.91M
            s_pred.i4_data_stride = i4_pred_strd;
1289
1290
4.91M
            s_quant_coeffs.pv_data = pi2_quant_coeffs;
1291
4.91M
            s_quant_coeffs.i4_data_stride = 4;
1292
1293
4.91M
            ps_enc_loop_fxns->apf_resi_trans_quant_4x4[u1_resi_trans_fxn_idx](
1294
4.91M
                &s_src, &s_pred, &s_quant_coeffs, &s_res_pred,
1295
                /* No op stride, this implies a buff of lenght 1x16 */
1296
4.91M
                &s_resi_trans_quant_constants, pu1_nnz, &i2_dc_dummy, 0);
1297
1298
            /********************************************************/
1299
            /*  ierror estimation,                                  */
1300
            /*  itransform                                          */
1301
            /*  iquantization                                       */
1302
            /********************************************************/
1303
1304
            /* Tx blk coeffs are stored blk by blk */
1305
            /* Hence, in order to access rows of each Tx blk, one needs to stride of
1306
             * TxxSize */
1307
4.91M
            s_quant_coeffs.i4_data_stride = 4;
1308
1309
4.91M
            s_recon.pv_data = pu1_ref_mb_intra_4x4;
1310
4.91M
            s_recon.i4_data_stride = i4_pred_strd;
1311
1312
4.91M
            ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[u1_iq_it_recon_fxn_idx](
1313
4.91M
                &s_quant_coeffs, &s_pred, &s_res_pred, &s_res_pred, &s_recon,
1314
4.91M
                &s_iq_it_res_rec_constants, ps_proc->pv_scratch_buff, s_quant_coeffs.pv_data, 0, 0);
1315
4.91M
        }
1316
1.29M
    }
1317
1318
    /* update the type of the mb if necessary */
1319
329k
    if(i4_total_cost < ps_proc->i4_mb_cost)
1320
69.5k
    {
1321
69.5k
        ps_proc->i4_mb_cost = i4_total_cost;
1322
69.5k
        ps_proc->i4_mb_distortion = i4_total_distortion;
1323
69.5k
        ps_proc->ps_mb_info->u2_mb_type = I4x4;
1324
69.5k
    }
1325
329k
}
1326
1327
/**
1328
******************************************************************************
1329
*
1330
* @brief
1331
*  evaluate best chroma intra 8x8 mode (rate distortion opt off)
1332
*
1333
* @par Description
1334
*  This function evaluates all the possible chroma intra 8x8 modes and finds
1335
*  the mode that best represents the macroblock (least distortion) and occupies
1336
*  fewer bits in the bitstream.
1337
*
1338
* @param[in] ps_proc_ctxt
1339
*  pointer to macroblock context (handle)
1340
*
1341
* @remarks
1342
*  For chroma best intra pred mode is calculated based only on SAD
1343
*
1344
* @returns none
1345
*
1346
******************************************************************************
1347
*/
1348
1349
void isvce_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc)
1350
5.79M
{
1351
    /* Codec Context */
1352
5.79M
    isvce_codec_t *ps_codec = ps_proc->ps_codec;
1353
5.79M
    isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
1354
5.79M
    mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns;
1355
1356
    /* SAD(distortion metric) of an 8x8 block */
1357
5.79M
    WORD32 i4_mb_distortion, i4_chroma_mb_distortion;
1358
1359
    /* intra mode */
1360
5.79M
    UWORD32 u4_best_chroma_intra_8x8_mode = DC_CH_I8x8;
1361
1362
    /* neighbor pels for intra prediction */
1363
5.79M
    UWORD8 *pu1_ngbr_pels_c_i8x8 = ps_proc->au1_ngbr_pels;
1364
1365
    /* pointer to curr macro block */
1366
5.79M
    UWORD8 *pu1_curr_mb = ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[1].pv_data);
1367
5.79M
    UWORD8 *pu1_ref_mb = ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[1].pv_data);
1368
1369
    /* pointer to prediction macro block */
1370
5.79M
    UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma;
1371
5.79M
    UWORD8 *pu1_pred_mb_plane = ps_proc->pu1_pred_mb_intra_chroma_plane;
1372
1373
    /* strides */
1374
5.79M
    WORD32 i4_src_strd_c = ps_proc->s_src_buf_props.as_component_bufs[1].i4_data_stride;
1375
5.79M
    WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
1376
5.79M
    WORD32 i4_rec_strd_c = ps_proc->s_rec_buf_props.as_component_bufs[1].i4_data_stride;
1377
1378
    /* neighbors left, top, top left */
1379
5.79M
    UWORD8 *pu1_mb_a = pu1_ref_mb - 2;
1380
5.79M
    UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd_c;
1381
5.79M
    UWORD8 *pu1_mb_d = pu1_mb_b - 2;
1382
1383
    /* neighbor availability */
1384
5.79M
    const UWORD8 u1_valid_intra_modes[8] = {1, 3, 1, 3, 5, 7, 5, 15};
1385
5.79M
    WORD32 i4_ngbr_avbl;
1386
1387
    /* valid intra modes map */
1388
5.79M
    UWORD32 u4_valid_intra_modes;
1389
5.79M
    isvce_mb_info_t *ps_top_mb_syn_ele = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x;
1390
1391
    /* temp var */
1392
5.79M
    UWORD8 i;
1393
5.79M
    UWORD32 u4_constrained_intra_pred =
1394
5.79M
        ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id];
1395
5.79M
    UWORD8 u1_mb_a, u1_mb_b, u1_mb_d;
1396
    /* locating neighbors that are available for prediction */
1397
1398
    /* gather prediction pels from the neighbors */
1399
    /* left pels */
1400
5.79M
    u1_mb_a =
1401
5.79M
        ((ps_proc->ps_ngbr_avbl->u1_mb_a) &&
1402
5.79M
         (u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra &&
1403
1.06M
                                       !ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag)
1404
5.62M
                                    : 1));
1405
5.79M
    if(u1_mb_a)
1406
5.61M
    {
1407
50.5M
        for(i = 0; i < 16; i += 2)
1408
44.8M
        {
1409
44.8M
            pu1_ngbr_pels_c_i8x8[16 - 2 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c];
1410
44.8M
            pu1_ngbr_pels_c_i8x8[16 - 1 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c + 1];
1411
44.8M
        }
1412
5.61M
    }
1413
179k
    else
1414
179k
    {
1415
179k
        ps_mem_fxns->pf_mem_set_mul8(pu1_ngbr_pels_c_i8x8, 0, MB_SIZE);
1416
179k
    }
1417
1418
    /* top pels */
1419
5.79M
    u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) &&
1420
5.79M
               (u4_constrained_intra_pred
1421
5.56M
                    ? (ps_top_mb_syn_ele->u1_is_intra && !ps_top_mb_syn_ele->u1_base_mode_flag)
1422
5.56M
                    : 1));
1423
5.79M
    if(u1_mb_b)
1424
5.54M
    {
1425
5.54M
        ps_mem_fxns->pf_mem_cpy_mul8(&pu1_ngbr_pels_c_i8x8[18], pu1_mb_b, 16);
1426
5.54M
    }
1427
241k
    else
1428
241k
    {
1429
241k
        ps_mem_fxns->pf_mem_set_mul8((pu1_ngbr_pels_c_i8x8 + 18), 0, MB_SIZE);
1430
241k
    }
1431
1432
    /* top left pels */
1433
5.79M
    u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) &&
1434
5.79M
               (u4_constrained_intra_pred ? (ps_top_mb_syn_ele[-1].u1_is_intra &&
1435
1.00M
                                             !ps_top_mb_syn_ele[-1].u1_base_mode_flag)
1436
5.42M
                                          : 1));
1437
5.79M
    if(u1_mb_d)
1438
5.40M
    {
1439
5.40M
        pu1_ngbr_pels_c_i8x8[16] = *pu1_mb_d;
1440
5.40M
        pu1_ngbr_pels_c_i8x8[17] = *(pu1_mb_d + 1);
1441
5.40M
    }
1442
5.79M
    i4_ngbr_avbl = (u1_mb_a) + (u1_mb_b << 2) + (u1_mb_d << 1);
1443
5.79M
    ps_proc->i4_chroma_neighbor_avail_8x8_mb = i4_ngbr_avbl;
1444
1445
5.79M
    u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl];
1446
1447
5.79M
    if(ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST ||
1448
5.79M
       ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST)
1449
629k
        u4_valid_intra_modes &= ~(1 << PLANE_CH_I8x8);
1450
1451
5.79M
    i4_chroma_mb_distortion = INT_MAX;
1452
1453
    /* perform intra mode chroma  8x8 evaluation */
1454
    /* intra prediction */
1455
5.79M
    ps_codec->pf_ih264e_evaluate_intra_chroma_modes(
1456
5.79M
        pu1_curr_mb, pu1_ngbr_pels_c_i8x8, pu1_pred_mb, i4_src_strd_c, i4_pred_strd, i4_ngbr_avbl,
1457
5.79M
        &u4_best_chroma_intra_8x8_mode, &i4_chroma_mb_distortion, u4_valid_intra_modes);
1458
1459
5.79M
    if(u4_valid_intra_modes & 8) /* if Chroma PLANE is valid*/
1460
4.79M
    {
1461
4.79M
        (ps_codec->apf_intra_pred_c)[PLANE_CH_I8x8](pu1_ngbr_pels_c_i8x8, pu1_pred_mb_plane, 0,
1462
4.79M
                                                    i4_pred_strd, i4_ngbr_avbl);
1463
1464
        /* evaluate distortion(sad) */
1465
4.79M
        ps_codec->pf_compute_sad_16x8(pu1_curr_mb, pu1_pred_mb_plane, i4_src_strd_c, i4_pred_strd,
1466
4.79M
                                      i4_chroma_mb_distortion, &i4_mb_distortion);
1467
1468
        /* update the least distortion information if necessary */
1469
4.79M
        if(i4_mb_distortion < i4_chroma_mb_distortion)
1470
11.6k
        {
1471
11.6k
            i4_chroma_mb_distortion = i4_mb_distortion;
1472
11.6k
            u4_best_chroma_intra_8x8_mode = PLANE_CH_I8x8;
1473
11.6k
        }
1474
4.79M
    }
1475
1476
5.79M
    DEBUG("%d partition cost, %d intra mode\n", i4_chroma_mb_distortion,
1477
5.79M
          u4_best_chroma_intra_8x8_mode);
1478
1479
5.79M
    ps_proc->u1_c_i8_mode = u4_best_chroma_intra_8x8_mode;
1480
5.79M
}
1481
1482
/**
1483
******************************************************************************
1484
*
1485
* @brief
1486
*  Evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the
1487
*  prediction.
1488
*
1489
* @par Description
1490
*  This function evaluates first three 16x16 modes and compute corresponding sad
1491
*  and return the buffer predicted with best mode.
1492
*
1493
* @param[in] pu1_src
1494
*  UWORD8 pointer to the source
1495
*
1496
* @param[in] pu1_ngbr_pels_i16
1497
*  UWORD8 pointer to neighbouring pels
1498
*
1499
* @param[out] pu1_dst
1500
*  UWORD8 pointer to the destination
1501
*
1502
* @param[in] src_strd
1503
*  integer source stride
1504
*
1505
* @param[in] dst_strd
1506
*  integer destination stride
1507
*
1508
* @param[in] u4_n_avblty
1509
*  availability of neighbouring pixels
1510
*
1511
* @param[in] u4_intra_mode
1512
*  Pointer to the variable in which best mode is returned
1513
*
1514
* @param[in] pu4_sadmin
1515
*  Pointer to the variable in which minimum sad is returned
1516
*
1517
* @param[in] u4_valid_intra_modes
1518
*  Says what all modes are valid
1519
*
1520
* @returns      none
1521
*
1522
******************************************************************************
1523
*/
1524
void isvce_evaluate_intra16x16_modes(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels_i16, UWORD8 *pu1_dst,
1525
                                     UWORD32 src_strd, UWORD32 dst_strd, WORD32 u4_n_avblty,
1526
                                     UWORD32 *u4_intra_mode, WORD32 *pu4_sadmin,
1527
                                     UWORD32 u4_valid_intra_modes)
1528
0
{
1529
0
    UWORD8 *pu1_neighbour;
1530
0
    UWORD8 *pu1_src_temp = pu1_src;
1531
0
    UWORD8 left = 0, top = 0;
1532
0
    WORD32 u4_dcval = 0;
1533
0
    WORD32 i, j;
1534
0
    WORD32 i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX, i4_sad_dc = INT_MAX, i4_min_sad = INT_MAX;
1535
0
    UWORD8 val;
1536
1537
0
    left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
1538
0
    top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
1539
1540
    /* left available */
1541
0
    if(left)
1542
0
    {
1543
0
        i4_sad_horz = 0;
1544
1545
0
        for(i = 0; i < 16; i++)
1546
0
        {
1547
0
            val = pu1_ngbr_pels_i16[15 - i];
1548
1549
0
            u4_dcval += val;
1550
1551
0
            for(j = 0; j < 16; j++)
1552
0
            {
1553
0
                i4_sad_horz += ABS(val - pu1_src_temp[j]);
1554
0
            }
1555
1556
0
            pu1_src_temp += src_strd;
1557
0
        }
1558
0
        u4_dcval += 8;
1559
0
    }
1560
1561
0
    pu1_src_temp = pu1_src;
1562
    /* top available */
1563
0
    if(top)
1564
0
    {
1565
0
        i4_sad_vert = 0;
1566
1567
0
        for(i = 0; i < 16; i++)
1568
0
        {
1569
0
            u4_dcval += pu1_ngbr_pels_i16[17 + i];
1570
1571
0
            for(j = 0; j < 16; j++)
1572
0
            {
1573
0
                i4_sad_vert += ABS(pu1_ngbr_pels_i16[17 + j] - pu1_src_temp[j]);
1574
0
            }
1575
0
            pu1_src_temp += src_strd;
1576
0
        }
1577
0
        u4_dcval += 8;
1578
0
    }
1579
1580
0
    u4_dcval = (u4_dcval) >> (3 + left + top);
1581
1582
0
    pu1_src_temp = pu1_src;
1583
1584
    /* none available */
1585
0
    u4_dcval += (left == 0) * (top == 0) * 128;
1586
1587
0
    i4_sad_dc = 0;
1588
1589
0
    for(i = 0; i < 16; i++)
1590
0
    {
1591
0
        for(j = 0; j < 16; j++)
1592
0
        {
1593
0
            i4_sad_dc += ABS(u4_dcval - pu1_src_temp[j]);
1594
0
        }
1595
0
        pu1_src_temp += src_strd;
1596
0
    }
1597
1598
0
    if((u4_valid_intra_modes & 04) == 0) /* If DC is disabled */
1599
0
        i4_sad_dc = INT_MAX;
1600
1601
0
    if((u4_valid_intra_modes & 01) == 0) /* If VERT is disabled */
1602
0
        i4_sad_vert = INT_MAX;
1603
1604
0
    if((u4_valid_intra_modes & 02) == 0) /* If HORZ is disabled */
1605
0
        i4_sad_horz = INT_MAX;
1606
1607
0
    i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert);
1608
1609
    /* Finding Minimum sad and doing corresponding prediction */
1610
0
    if(i4_min_sad < *pu4_sadmin)
1611
0
    {
1612
0
        *pu4_sadmin = i4_min_sad;
1613
0
        if(i4_min_sad == i4_sad_vert)
1614
0
        {
1615
0
            *u4_intra_mode = VERT_I16x16;
1616
0
            pu1_neighbour = pu1_ngbr_pels_i16 + 17;
1617
0
            for(j = 0; j < 16; j++)
1618
0
            {
1619
0
                memcpy(pu1_dst, pu1_neighbour, MB_SIZE);
1620
0
                pu1_dst += dst_strd;
1621
0
            }
1622
0
        }
1623
0
        else if(i4_min_sad == i4_sad_horz)
1624
0
        {
1625
0
            *u4_intra_mode = HORZ_I16x16;
1626
0
            for(j = 0; j < 16; j++)
1627
0
            {
1628
0
                val = pu1_ngbr_pels_i16[15 - j];
1629
0
                memset(pu1_dst, val, MB_SIZE);
1630
0
                pu1_dst += dst_strd;
1631
0
            }
1632
0
        }
1633
0
        else
1634
0
        {
1635
0
            *u4_intra_mode = DC_I16x16;
1636
0
            for(j = 0; j < 16; j++)
1637
0
            {
1638
0
                memset(pu1_dst, u4_dcval, MB_SIZE);
1639
0
                pu1_dst += dst_strd;
1640
0
            }
1641
0
        }
1642
0
    }
1643
0
}
1644
1645
/**
1646
******************************************************************************
1647
*
1648
* @brief
1649
*  Evaluate best intra 4x4 mode and perform prediction.
1650
*
1651
* @par Description
1652
*  This function evaluates  4x4 modes and compute corresponding sad
1653
*  and return the buffer predicted with best mode.
1654
*
1655
* @param[in] pu1_src
1656
*  UWORD8 pointer to the source
1657
*
1658
* @param[in] pu1_ngbr_pels
1659
*  UWORD8 pointer to neighbouring pels
1660
*
1661
* @param[out] pu1_dst
1662
*  UWORD8 pointer to the destination
1663
*
1664
* @param[in] src_strd
1665
*  integer source stride
1666
*
1667
* @param[in] dst_strd
1668
*  integer destination stride
1669
*
1670
* @param[in] u4_n_avblty
1671
*  availability of neighbouring pixels
1672
*
1673
* @param[in] u4_intra_mode
1674
*  Pointer to the variable in which best mode is returned
1675
*
1676
* @param[in] pu4_sadmin
1677
*  Pointer to the variable in which minimum cost is returned
1678
*
1679
* @param[in] u4_valid_intra_modes
1680
*  Says what all modes are valid
1681
*
1682
* @param[in] u4_lambda
1683
*  Lamda value for computing cost from SAD
1684
*
1685
* @param[in] u4_predictd_mode
1686
*  Predicted mode for cost computation
1687
*
1688
* @returns      none
1689
*
1690
******************************************************************************
1691
*/
1692
void isvce_evaluate_intra_4x4_modes(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels, UWORD8 *pu1_dst,
1693
                                    UWORD32 src_strd, UWORD32 dst_strd, WORD32 u4_n_avblty,
1694
                                    UWORD32 *u4_intra_mode, WORD32 *pu4_sadmin,
1695
                                    UWORD32 u4_valid_intra_modes, UWORD32 u4_lambda,
1696
                                    UWORD32 u4_predictd_mode)
1697
0
{
1698
0
    UWORD8 *pu1_src_temp = pu1_src;
1699
0
    UWORD8 *pu1_pred = pu1_ngbr_pels;
1700
0
    UWORD8 left = 0, top = 0;
1701
0
    UWORD8 u1_pred_val = 0;
1702
0
    UWORD8 u1_pred_vals[4] = {0};
1703
0
    UWORD8 *pu1_pred_val = NULL;
1704
    /* To store FILT121 operated values*/
1705
0
    UWORD8 u1_pred_vals_diag_121[15] = {0};
1706
    /* To store FILT11 operated values*/
1707
0
    UWORD8 u1_pred_vals_diag_11[15] = {0};
1708
0
    UWORD8 u1_pred_vals_vert_r[8] = {0};
1709
0
    UWORD8 u1_pred_vals_horz_d[10] = {0};
1710
0
    UWORD8 u1_pred_vals_horz_u[10] = {0};
1711
0
    WORD32 u4_dcval = 0;
1712
0
    WORD32 i4_sad[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
1713
0
                               INT_MAX, INT_MAX, INT_MAX, INT_MAX};
1714
1715
0
    WORD32 i4_cost[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
1716
0
                                INT_MAX, INT_MAX, INT_MAX, INT_MAX};
1717
0
    WORD32 i, i4_min_cost = INT_MAX;
1718
1719
0
    left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
1720
0
    top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
1721
1722
    /* Computing SAD */
1723
1724
    /* VERT mode valid */
1725
0
    if(u4_valid_intra_modes & 1)
1726
0
    {
1727
0
        pu1_pred = pu1_ngbr_pels + 5;
1728
0
        i4_sad[VERT_I4x4] = 0;
1729
0
        i4_cost[VERT_I4x4] = 0;
1730
1731
0
        USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1732
0
        pu1_src_temp += src_strd;
1733
0
        USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1734
0
        pu1_src_temp += src_strd;
1735
0
        USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1736
0
        pu1_src_temp += src_strd;
1737
0
        USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1738
1739
0
        i4_cost[VERT_I4x4] =
1740
0
            i4_sad[VERT_I4x4] + ((u4_predictd_mode == VERT_I4x4) ? u4_lambda : 4 * u4_lambda);
1741
0
    }
1742
1743
    /* HORZ mode valid */
1744
0
    if(u4_valid_intra_modes & 2)
1745
0
    {
1746
0
        i4_sad[HORZ_I4x4] = 0;
1747
0
        i4_cost[HORZ_I4x4] = 0;
1748
0
        pu1_src_temp = pu1_src;
1749
1750
0
        u1_pred_val = pu1_ngbr_pels[3];
1751
1752
0
        i4_sad[HORZ_I4x4] +=
1753
0
            ABS(pu1_src_temp[0] - u1_pred_val) + ABS(pu1_src_temp[1] - u1_pred_val) +
1754
0
            ABS(pu1_src_temp[2] - u1_pred_val) + ABS(pu1_src_temp[3] - u1_pred_val);
1755
0
        pu1_src_temp += src_strd;
1756
1757
0
        u1_pred_val = pu1_ngbr_pels[2];
1758
1759
0
        i4_sad[HORZ_I4x4] +=
1760
0
            ABS(pu1_src_temp[0] - u1_pred_val) + ABS(pu1_src_temp[1] - u1_pred_val) +
1761
0
            ABS(pu1_src_temp[2] - u1_pred_val) + ABS(pu1_src_temp[3] - u1_pred_val);
1762
0
        pu1_src_temp += src_strd;
1763
1764
0
        u1_pred_val = pu1_ngbr_pels[1];
1765
1766
0
        i4_sad[HORZ_I4x4] +=
1767
0
            ABS(pu1_src_temp[0] - u1_pred_val) + ABS(pu1_src_temp[1] - u1_pred_val) +
1768
0
            ABS(pu1_src_temp[2] - u1_pred_val) + ABS(pu1_src_temp[3] - u1_pred_val);
1769
0
        pu1_src_temp += src_strd;
1770
1771
0
        u1_pred_val = pu1_ngbr_pels[0];
1772
1773
0
        i4_sad[HORZ_I4x4] +=
1774
0
            ABS(pu1_src_temp[0] - u1_pred_val) + ABS(pu1_src_temp[1] - u1_pred_val) +
1775
0
            ABS(pu1_src_temp[2] - u1_pred_val) + ABS(pu1_src_temp[3] - u1_pred_val);
1776
1777
0
        i4_cost[HORZ_I4x4] =
1778
0
            i4_sad[HORZ_I4x4] + ((u4_predictd_mode == HORZ_I4x4) ? u4_lambda : 4 * u4_lambda);
1779
0
    }
1780
1781
    /* DC mode valid */
1782
0
    if(u4_valid_intra_modes & 4)
1783
0
    {
1784
0
        i4_sad[DC_I4x4] = 0;
1785
0
        i4_cost[DC_I4x4] = 0;
1786
0
        pu1_src_temp = pu1_src;
1787
1788
0
        if(left)
1789
0
            u4_dcval =
1790
0
                pu1_ngbr_pels[0] + pu1_ngbr_pels[1] + pu1_ngbr_pels[2] + pu1_ngbr_pels[3] + 2;
1791
0
        if(top)
1792
0
            u4_dcval +=
1793
0
                pu1_ngbr_pels[5] + pu1_ngbr_pels[6] + pu1_ngbr_pels[7] + pu1_ngbr_pels[8] + 2;
1794
1795
0
        u4_dcval = (u4_dcval) ? (u4_dcval >> (1 + left + top)) : 128;
1796
1797
        /* none available */
1798
0
        memset(u1_pred_vals, u4_dcval, 4);
1799
0
        USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1800
0
        pu1_src_temp += src_strd;
1801
0
        USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1802
0
        pu1_src_temp += src_strd;
1803
0
        USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1804
0
        pu1_src_temp += src_strd;
1805
0
        USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1806
0
        pu1_src_temp += src_strd;
1807
1808
0
        i4_cost[DC_I4x4] =
1809
0
            i4_sad[DC_I4x4] + ((u4_predictd_mode == DC_I4x4) ? u4_lambda : 4 * u4_lambda);
1810
0
    }
1811
1812
    /* if modes other than VERT, HORZ and DC are  valid */
1813
0
    if(u4_valid_intra_modes > 7)
1814
0
    {
1815
0
        pu1_pred = pu1_ngbr_pels;
1816
0
        pu1_pred[13] = pu1_pred[14] = pu1_pred[12];
1817
1818
        /* Performing FILT121 and FILT11 operation for all neighbour values*/
1819
0
        for(i = 0; i < 13; i++)
1820
0
        {
1821
0
            u1_pred_vals_diag_121[i] = FILT121(pu1_pred[0], pu1_pred[1], pu1_pred[2]);
1822
0
            u1_pred_vals_diag_11[i] = FILT11(pu1_pred[0], pu1_pred[1]);
1823
1824
0
            pu1_pred++;
1825
0
        }
1826
1827
0
        if(u4_valid_intra_modes & 8) /* DIAG_DL */
1828
0
        {
1829
0
            i4_sad[DIAG_DL_I4x4] = 0;
1830
0
            i4_cost[DIAG_DL_I4x4] = 0;
1831
0
            pu1_src_temp = pu1_src;
1832
0
            pu1_pred_val = u1_pred_vals_diag_121 + 5;
1833
1834
0
            USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DL_I4x4]);
1835
0
            pu1_src_temp += src_strd;
1836
0
            USADA8(pu1_src_temp, (pu1_pred_val + 1), i4_sad[DIAG_DL_I4x4]);
1837
0
            pu1_src_temp += src_strd;
1838
0
            USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[DIAG_DL_I4x4]);
1839
0
            pu1_src_temp += src_strd;
1840
0
            USADA8(pu1_src_temp, (pu1_pred_val + 3), i4_sad[DIAG_DL_I4x4]);
1841
0
            pu1_src_temp += src_strd;
1842
0
            i4_cost[DIAG_DL_I4x4] =
1843
0
                i4_sad[DIAG_DL_I4x4] +
1844
0
                ((u4_predictd_mode == DIAG_DL_I4x4) ? u4_lambda : 4 * u4_lambda);
1845
0
        }
1846
1847
0
        if(u4_valid_intra_modes & 16) /* DIAG_DR */
1848
0
        {
1849
0
            i4_sad[DIAG_DR_I4x4] = 0;
1850
0
            i4_cost[DIAG_DR_I4x4] = 0;
1851
0
            pu1_src_temp = pu1_src;
1852
0
            pu1_pred_val = u1_pred_vals_diag_121 + 3;
1853
1854
0
            USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DR_I4x4]);
1855
0
            pu1_src_temp += src_strd;
1856
0
            USADA8(pu1_src_temp, (pu1_pred_val - 1), i4_sad[DIAG_DR_I4x4]);
1857
0
            pu1_src_temp += src_strd;
1858
0
            USADA8(pu1_src_temp, (pu1_pred_val - 2), i4_sad[DIAG_DR_I4x4]);
1859
0
            pu1_src_temp += src_strd;
1860
0
            USADA8(pu1_src_temp, (pu1_pred_val - 3), i4_sad[DIAG_DR_I4x4]);
1861
0
            pu1_src_temp += src_strd;
1862
0
            i4_cost[DIAG_DR_I4x4] =
1863
0
                i4_sad[DIAG_DR_I4x4] +
1864
0
                ((u4_predictd_mode == DIAG_DR_I4x4) ? u4_lambda : 4 * u4_lambda);
1865
0
        }
1866
1867
0
        if(u4_valid_intra_modes & 32) /* VERT_R mode valid ????*/
1868
0
        {
1869
0
            i4_sad[VERT_R_I4x4] = 0;
1870
1871
0
            pu1_src_temp = pu1_src;
1872
0
            u1_pred_vals_vert_r[0] = u1_pred_vals_diag_121[2];
1873
0
            memcpy((u1_pred_vals_vert_r + 1), (u1_pred_vals_diag_11 + 4), 3);
1874
0
            u1_pred_vals_vert_r[4] = u1_pred_vals_diag_121[1];
1875
0
            memcpy((u1_pred_vals_vert_r + 5), (u1_pred_vals_diag_121 + 3), 3);
1876
1877
0
            pu1_pred_val = u1_pred_vals_diag_11 + 4;
1878
0
            USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]);
1879
0
            pu1_pred_val = u1_pred_vals_diag_121 + 3;
1880
0
            pu1_src_temp += src_strd;
1881
0
            USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]);
1882
0
            pu1_src_temp += src_strd;
1883
0
            USADA8(pu1_src_temp, (u1_pred_vals_vert_r), i4_sad[VERT_R_I4x4]);
1884
0
            pu1_src_temp += src_strd;
1885
0
            USADA8(pu1_src_temp, (u1_pred_vals_vert_r + 4), i4_sad[VERT_R_I4x4]);
1886
1887
0
            i4_cost[VERT_R_I4x4] = i4_sad[VERT_R_I4x4] +
1888
0
                                   ((u4_predictd_mode == VERT_R_I4x4) ? u4_lambda : 4 * u4_lambda);
1889
0
        }
1890
1891
0
        if(u4_valid_intra_modes & 64) /* HORZ_D mode valid ????*/
1892
0
        {
1893
0
            i4_sad[HORZ_D_I4x4] = 0;
1894
1895
0
            pu1_src_temp = pu1_src;
1896
0
            u1_pred_vals_horz_d[6] = u1_pred_vals_diag_11[3];
1897
0
            memcpy((u1_pred_vals_horz_d + 7), (u1_pred_vals_diag_121 + 3), 3);
1898
0
            u1_pred_vals_horz_d[0] = u1_pred_vals_diag_11[0];
1899
0
            u1_pred_vals_horz_d[1] = u1_pred_vals_diag_121[0];
1900
0
            u1_pred_vals_horz_d[2] = u1_pred_vals_diag_11[1];
1901
0
            u1_pred_vals_horz_d[3] = u1_pred_vals_diag_121[1];
1902
0
            u1_pred_vals_horz_d[4] = u1_pred_vals_diag_11[2];
1903
0
            u1_pred_vals_horz_d[5] = u1_pred_vals_diag_121[2];
1904
1905
0
            pu1_pred_val = u1_pred_vals_horz_d;
1906
0
            USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_D_I4x4]);
1907
0
            pu1_src_temp += src_strd;
1908
0
            USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_D_I4x4]);
1909
0
            pu1_src_temp += src_strd;
1910
0
            USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_D_I4x4]);
1911
0
            pu1_src_temp += src_strd;
1912
0
            USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_D_I4x4]);
1913
1914
0
            i4_cost[HORZ_D_I4x4] = i4_sad[HORZ_D_I4x4] +
1915
0
                                   ((u4_predictd_mode == HORZ_D_I4x4) ? u4_lambda : 4 * u4_lambda);
1916
0
        }
1917
1918
0
        if(u4_valid_intra_modes & 128) /* VERT_L mode valid ????*/
1919
0
        {
1920
0
            i4_sad[VERT_L_I4x4] = 0;
1921
0
            pu1_src_temp = pu1_src;
1922
0
            pu1_pred_val = u1_pred_vals_diag_11 + 5;
1923
0
            USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1924
0
            pu1_src_temp += src_strd;
1925
0
            pu1_pred_val = u1_pred_vals_diag_121 + 5;
1926
0
            USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1927
0
            pu1_src_temp += src_strd;
1928
0
            pu1_pred_val = u1_pred_vals_diag_11 + 6;
1929
0
            USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1930
0
            pu1_src_temp += src_strd;
1931
0
            pu1_pred_val = u1_pred_vals_diag_121 + 6;
1932
0
            USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1933
1934
0
            i4_cost[VERT_L_I4x4] = i4_sad[VERT_L_I4x4] +
1935
0
                                   ((u4_predictd_mode == VERT_L_I4x4) ? u4_lambda : 4 * u4_lambda);
1936
0
        }
1937
1938
0
        if(u4_valid_intra_modes & 256) /* HORZ_U mode valid ????*/
1939
0
        {
1940
0
            i4_sad[HORZ_U_I4x4] = 0;
1941
0
            pu1_src_temp = pu1_src;
1942
0
            u1_pred_vals_horz_u[0] = u1_pred_vals_diag_11[2];
1943
0
            u1_pred_vals_horz_u[1] = u1_pred_vals_diag_121[1];
1944
0
            u1_pred_vals_horz_u[2] = u1_pred_vals_diag_11[1];
1945
0
            u1_pred_vals_horz_u[3] = u1_pred_vals_diag_121[0];
1946
0
            u1_pred_vals_horz_u[4] = u1_pred_vals_diag_11[0];
1947
0
            u1_pred_vals_horz_u[5] = FILT121(pu1_ngbr_pels[0], pu1_ngbr_pels[0], pu1_ngbr_pels[1]);
1948
1949
0
            memset((u1_pred_vals_horz_u + 6), pu1_ngbr_pels[0], 4);
1950
1951
0
            pu1_pred_val = u1_pred_vals_horz_u;
1952
0
            USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_U_I4x4]);
1953
0
            pu1_src_temp += src_strd;
1954
0
            USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_U_I4x4]);
1955
0
            pu1_src_temp += src_strd;
1956
0
            USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_U_I4x4]);
1957
0
            pu1_src_temp += src_strd;
1958
0
            USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_U_I4x4]);
1959
1960
0
            i4_cost[HORZ_U_I4x4] = i4_sad[HORZ_U_I4x4] +
1961
0
                                   ((u4_predictd_mode == HORZ_U_I4x4) ? u4_lambda : 4 * u4_lambda);
1962
0
        }
1963
1964
0
        i4_min_cost =
1965
0
            MIN3(MIN3(i4_cost[0], i4_cost[1], i4_cost[2]), MIN3(i4_cost[3], i4_cost[4], i4_cost[5]),
1966
0
                 MIN3(i4_cost[6], i4_cost[7], i4_cost[8]));
1967
0
    }
1968
0
    else
1969
0
    {
1970
        /* Only first three modes valid */
1971
0
        i4_min_cost = MIN3(i4_cost[0], i4_cost[1], i4_cost[2]);
1972
0
    }
1973
1974
0
    *pu4_sadmin = i4_min_cost;
1975
1976
0
    if(i4_min_cost == i4_cost[0])
1977
0
    {
1978
0
        *u4_intra_mode = VERT_I4x4;
1979
0
        pu1_pred_val = pu1_ngbr_pels + 5;
1980
0
        memcpy(pu1_dst, (pu1_pred_val), 4);
1981
0
        pu1_dst += dst_strd;
1982
0
        memcpy(pu1_dst, (pu1_pred_val), 4);
1983
0
        pu1_dst += dst_strd;
1984
0
        memcpy(pu1_dst, (pu1_pred_val), 4);
1985
0
        pu1_dst += dst_strd;
1986
0
        memcpy(pu1_dst, (pu1_pred_val), 4);
1987
0
    }
1988
0
    else if(i4_min_cost == i4_cost[1])
1989
0
    {
1990
0
        *u4_intra_mode = HORZ_I4x4;
1991
0
        memset(pu1_dst, pu1_ngbr_pels[3], 4);
1992
0
        pu1_dst += dst_strd;
1993
0
        memset(pu1_dst, pu1_ngbr_pels[2], 4);
1994
0
        pu1_dst += dst_strd;
1995
0
        memset(pu1_dst, pu1_ngbr_pels[1], 4);
1996
0
        pu1_dst += dst_strd;
1997
0
        memset(pu1_dst, pu1_ngbr_pels[0], 4);
1998
0
    }
1999
0
    else if(i4_min_cost == i4_cost[2])
2000
0
    {
2001
0
        *u4_intra_mode = DC_I4x4;
2002
0
        memset(pu1_dst, u4_dcval, 4);
2003
0
        pu1_dst += dst_strd;
2004
0
        memset(pu1_dst, u4_dcval, 4);
2005
0
        pu1_dst += dst_strd;
2006
0
        memset(pu1_dst, u4_dcval, 4);
2007
0
        pu1_dst += dst_strd;
2008
0
        memset(pu1_dst, u4_dcval, 4);
2009
0
    }
2010
2011
0
    else if(i4_min_cost == i4_cost[3])
2012
0
    {
2013
0
        *u4_intra_mode = DIAG_DL_I4x4;
2014
0
        pu1_pred_val = u1_pred_vals_diag_121 + 5;
2015
0
        memcpy(pu1_dst, (pu1_pred_val), 4);
2016
0
        pu1_dst += dst_strd;
2017
0
        memcpy(pu1_dst, (pu1_pred_val + 1), 4);
2018
0
        pu1_dst += dst_strd;
2019
0
        memcpy(pu1_dst, (pu1_pred_val + 2), 4);
2020
0
        pu1_dst += dst_strd;
2021
0
        memcpy(pu1_dst, (pu1_pred_val + 3), 4);
2022
0
    }
2023
0
    else if(i4_min_cost == i4_cost[4])
2024
0
    {
2025
0
        *u4_intra_mode = DIAG_DR_I4x4;
2026
0
        pu1_pred_val = u1_pred_vals_diag_121 + 3;
2027
2028
0
        memcpy(pu1_dst, (pu1_pred_val), 4);
2029
0
        pu1_dst += dst_strd;
2030
0
        memcpy(pu1_dst, (pu1_pred_val - 1), 4);
2031
0
        pu1_dst += dst_strd;
2032
0
        memcpy(pu1_dst, (pu1_pred_val - 2), 4);
2033
0
        pu1_dst += dst_strd;
2034
0
        memcpy(pu1_dst, (pu1_pred_val - 3), 4);
2035
0
    }
2036
2037
0
    else if(i4_min_cost == i4_cost[5])
2038
0
    {
2039
0
        *u4_intra_mode = VERT_R_I4x4;
2040
0
        pu1_pred_val = u1_pred_vals_diag_11 + 4;
2041
0
        memcpy(pu1_dst, (pu1_pred_val), 4);
2042
0
        pu1_dst += dst_strd;
2043
0
        pu1_pred_val = u1_pred_vals_diag_121 + 3;
2044
0
        memcpy(pu1_dst, (pu1_pred_val), 4);
2045
0
        pu1_dst += dst_strd;
2046
0
        memcpy(pu1_dst, (u1_pred_vals_vert_r), 4);
2047
0
        pu1_dst += dst_strd;
2048
0
        memcpy(pu1_dst, (u1_pred_vals_vert_r + 4), 4);
2049
0
    }
2050
0
    else if(i4_min_cost == i4_cost[6])
2051
0
    {
2052
0
        *u4_intra_mode = HORZ_D_I4x4;
2053
0
        pu1_pred_val = u1_pred_vals_horz_d;
2054
0
        memcpy(pu1_dst, (pu1_pred_val + 6), 4);
2055
0
        pu1_dst += dst_strd;
2056
0
        memcpy(pu1_dst, (pu1_pred_val + 4), 4);
2057
0
        pu1_dst += dst_strd;
2058
0
        memcpy(pu1_dst, (pu1_pred_val + 2), 4);
2059
0
        pu1_dst += dst_strd;
2060
0
        memcpy(pu1_dst, (pu1_pred_val), 4);
2061
0
        pu1_dst += dst_strd;
2062
0
    }
2063
0
    else if(i4_min_cost == i4_cost[7])
2064
0
    {
2065
0
        *u4_intra_mode = VERT_L_I4x4;
2066
0
        pu1_pred_val = u1_pred_vals_diag_11 + 5;
2067
0
        memcpy(pu1_dst, (pu1_pred_val), 4);
2068
0
        pu1_dst += dst_strd;
2069
0
        pu1_pred_val = u1_pred_vals_diag_121 + 5;
2070
0
        memcpy(pu1_dst, (pu1_pred_val), 4);
2071
0
        pu1_dst += dst_strd;
2072
0
        pu1_pred_val = u1_pred_vals_diag_11 + 6;
2073
0
        memcpy(pu1_dst, (pu1_pred_val), 4);
2074
0
        pu1_dst += dst_strd;
2075
0
        pu1_pred_val = u1_pred_vals_diag_121 + 6;
2076
0
        memcpy(pu1_dst, (pu1_pred_val), 4);
2077
0
    }
2078
0
    else if(i4_min_cost == i4_cost[8])
2079
0
    {
2080
0
        *u4_intra_mode = HORZ_U_I4x4;
2081
0
        pu1_pred_val = u1_pred_vals_horz_u;
2082
0
        memcpy(pu1_dst, (pu1_pred_val), 4);
2083
0
        pu1_dst += dst_strd;
2084
0
        memcpy(pu1_dst, (pu1_pred_val + 2), 4);
2085
0
        pu1_dst += dst_strd;
2086
0
        memcpy(pu1_dst, (pu1_pred_val + 4), 4);
2087
0
        pu1_dst += dst_strd;
2088
0
        memcpy(pu1_dst, (pu1_pred_val + 6), 4);
2089
0
        pu1_dst += dst_strd;
2090
0
    }
2091
2092
0
    return;
2093
0
}
2094
2095
/**
2096
******************************************************************************
2097
*
2098
* @brief:
2099
*  Evaluate best intr chroma mode (among VERT, HORZ and DC ) and do the
2100
*prediction.
2101
*
2102
* @par Description
2103
*  This function evaluates  first three intra chroma modes and compute
2104
*corresponding sad and return the buffer predicted with best mode.
2105
*
2106
* @param[in] pu1_src
2107
*  UWORD8 pointer to the source
2108
*
2109
* @param[in] pu1_ngbr_pels
2110
*  UWORD8 pointer to neighbouring pels
2111
*
2112
* @param[out] pu1_dst
2113
*  UWORD8 pointer to the destination
2114
*
2115
* @param[in] src_strd
2116
*  integer source stride
2117
*
2118
* @param[in] dst_strd
2119
*  integer destination stride
2120
*
2121
* @param[in] u4_n_avblty
2122
*  availability of neighbouring pixels
2123
*
2124
* @param[in] u4_intra_mode
2125
*  Pointer to the variable in which best mode is returned
2126
*
2127
* @param[in] pu4_sadmin
2128
*  Pointer to the variable in which minimum sad is returned
2129
*
2130
* @param[in] u4_valid_intra_modes
2131
*  Says what all modes are valid
2132
*
2133
* @return      none
2134
*
2135
******************************************************************************
2136
*/
2137
void isvce_evaluate_intra_chroma_modes(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels, UWORD8 *pu1_dst,
2138
                                       UWORD32 src_strd, UWORD32 dst_strd, WORD32 u4_n_avblty,
2139
                                       UWORD32 *u4_intra_mode, WORD32 *pu4_sadmin,
2140
                                       UWORD32 u4_valid_intra_modes)
2141
0
{
2142
0
    UWORD8 *pu1_neighbour;
2143
0
    UWORD8 *pu1_src_temp = pu1_src;
2144
0
    UWORD8 left = 0, top = 0;
2145
0
    WORD32 u4_dcval_u_l[2] = {0, 0}, /*sum left neighbours for 'U' ,two separate sets - sum of
2146
                                        first four from top,and sum of four values from bottom */
2147
0
        u4_dcval_u_t[2] = {0, 0};    /*sum top neighbours for 'U'*/
2148
2149
0
    WORD32 u4_dcval_v_l[2] = {0, 0}, /*sum left neighbours for 'V'*/
2150
0
        u4_dcval_v_t[2] = {0, 0};    /*sum top neighbours for 'V'*/
2151
2152
0
    WORD32 i, j, row, col, i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX, i4_sad_dc = INT_MAX,
2153
0
                           i4_min_sad = INT_MAX;
2154
0
    UWORD8 val_u, val_v;
2155
2156
0
    WORD32 u4_dc_val[2][2][2]; /*  -----------
2157
                                   |    |    |  Chroma can have four
2158
                                   | 00 | 01 |  separate dc value...
2159
                                   -----------  u4_dc_val corresponds to this dc
2160
                                  values |    |    |  with u4_dc_val[2][2][U] and
2161
                                  u4_dc_val[2][2][V] | 10 | 11 |
2162
                                   -----------                */
2163
0
    left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
2164
0
    top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
2165
2166
    /*Evaluating HORZ*/
2167
0
    if(left) /* Ifleft available*/
2168
0
    {
2169
0
        i4_sad_horz = 0;
2170
2171
0
        for(i = 0; i < 8; i++)
2172
0
        {
2173
0
            val_v = pu1_ngbr_pels[15 - 2 * i];
2174
0
            val_u = pu1_ngbr_pels[15 - 2 * i - 1];
2175
0
            row = i / 4;
2176
0
            u4_dcval_u_l[row] += val_u;
2177
0
            u4_dcval_v_l[row] += val_v;
2178
0
            for(j = 0; j < 8; j++)
2179
0
            {
2180
0
                i4_sad_horz += ABS(val_u - pu1_src_temp[2 * j]); /* Finding SAD for HORZ mode*/
2181
0
                i4_sad_horz += ABS(val_v - pu1_src_temp[2 * j + 1]);
2182
0
            }
2183
2184
0
            pu1_src_temp += src_strd;
2185
0
        }
2186
0
        u4_dcval_u_l[0] += 2;
2187
0
        u4_dcval_u_l[1] += 2;
2188
0
        u4_dcval_v_l[0] += 2;
2189
0
        u4_dcval_v_l[1] += 2;
2190
0
    }
2191
2192
    /*Evaluating VERT**/
2193
0
    pu1_src_temp = pu1_src;
2194
0
    if(top) /* top available*/
2195
0
    {
2196
0
        i4_sad_vert = 0;
2197
2198
0
        for(i = 0; i < 8; i++)
2199
0
        {
2200
0
            col = i / 4;
2201
2202
0
            val_u = pu1_ngbr_pels[18 + i * 2];
2203
0
            val_v = pu1_ngbr_pels[18 + i * 2 + 1];
2204
0
            u4_dcval_u_t[col] += val_u;
2205
0
            u4_dcval_v_t[col] += val_v;
2206
2207
0
            for(j = 0; j < 16; j++)
2208
0
            {
2209
0
                i4_sad_vert +=
2210
0
                    ABS(pu1_ngbr_pels[18 + j] - pu1_src_temp[j]); /* Finding SAD for VERT mode*/
2211
0
            }
2212
0
            pu1_src_temp += src_strd;
2213
0
        }
2214
0
        u4_dcval_u_t[0] += 2;
2215
0
        u4_dcval_u_t[1] += 2;
2216
0
        u4_dcval_v_t[0] += 2;
2217
0
        u4_dcval_v_t[1] += 2;
2218
0
    }
2219
2220
    /* computing DC value*/
2221
    /* Equation  8-128 in spec*/
2222
0
    u4_dc_val[0][0][0] = (u4_dcval_u_l[0] + u4_dcval_u_t[0]) >> (1 + left + top);
2223
0
    u4_dc_val[0][0][1] = (u4_dcval_v_l[0] + u4_dcval_v_t[0]) >> (1 + left + top);
2224
0
    u4_dc_val[1][1][0] = (u4_dcval_u_l[1] + u4_dcval_u_t[1]) >> (1 + left + top);
2225
0
    u4_dc_val[1][1][1] = (u4_dcval_v_l[1] + u4_dcval_v_t[1]) >> (1 + left + top);
2226
2227
0
    if(top)
2228
0
    {
2229
        /* Equation  8-132 in spec*/
2230
0
        u4_dc_val[0][1][0] = (u4_dcval_u_t[1]) >> (1 + top);
2231
0
        u4_dc_val[0][1][1] = (u4_dcval_v_t[1]) >> (1 + top);
2232
0
    }
2233
0
    else
2234
0
    {
2235
0
        u4_dc_val[0][1][0] = (u4_dcval_u_l[0]) >> (1 + left);
2236
0
        u4_dc_val[0][1][1] = (u4_dcval_v_l[0]) >> (1 + left);
2237
0
    }
2238
2239
0
    if(left)
2240
0
    {
2241
0
        u4_dc_val[1][0][0] = (u4_dcval_u_l[1]) >> (1 + left);
2242
0
        u4_dc_val[1][0][1] = (u4_dcval_v_l[1]) >> (1 + left);
2243
0
    }
2244
0
    else
2245
0
    {
2246
0
        u4_dc_val[1][0][0] = (u4_dcval_u_t[0]) >> (1 + top);
2247
0
        u4_dc_val[1][0][1] = (u4_dcval_v_t[0]) >> (1 + top);
2248
0
    }
2249
2250
0
    if(!(left || top))
2251
0
    {
2252
        /*none available*/
2253
0
        u4_dc_val[0][0][0] = u4_dc_val[0][0][1] = u4_dc_val[0][1][0] = u4_dc_val[0][1][1] =
2254
0
            u4_dc_val[1][0][0] = u4_dc_val[1][0][1] = u4_dc_val[1][1][0] = u4_dc_val[1][1][1] = 128;
2255
0
    }
2256
2257
    /* Evaluating DC */
2258
0
    pu1_src_temp = pu1_src;
2259
0
    i4_sad_dc = 0;
2260
0
    for(i = 0; i < 8; i++)
2261
0
    {
2262
0
        for(j = 0; j < 8; j++)
2263
0
        {
2264
0
            col = j / 4;
2265
0
            row = i / 4;
2266
0
            val_u = u4_dc_val[row][col][0];
2267
0
            val_v = u4_dc_val[row][col][1];
2268
2269
0
            i4_sad_dc += ABS(val_u - pu1_src_temp[2 * j]); /* Finding SAD for DC mode*/
2270
0
            i4_sad_dc += ABS(val_v - pu1_src_temp[2 * j + 1]);
2271
0
        }
2272
0
        pu1_src_temp += src_strd;
2273
0
    }
2274
2275
0
    if((u4_valid_intra_modes & 01) == 0) /* If DC is disabled*/
2276
0
        i4_sad_dc = INT_MAX;
2277
0
    if((u4_valid_intra_modes & 02) == 0) /* If HORZ is disabled*/
2278
0
        i4_sad_horz = INT_MAX;
2279
0
    if((u4_valid_intra_modes & 04) == 0) /* If VERT is disabled*/
2280
0
        i4_sad_vert = INT_MAX;
2281
2282
0
    i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert);
2283
2284
    /* Finding Minimum sad and doing corresponding prediction*/
2285
0
    if(i4_min_sad < *pu4_sadmin)
2286
0
    {
2287
0
        *pu4_sadmin = i4_min_sad;
2288
2289
0
        if(i4_min_sad == i4_sad_dc)
2290
0
        {
2291
0
            *u4_intra_mode = DC_CH_I8x8;
2292
0
            for(i = 0; i < 8; i++)
2293
0
            {
2294
0
                for(j = 0; j < 8; j++)
2295
0
                {
2296
0
                    col = j / 4;
2297
0
                    row = i / 4;
2298
2299
0
                    pu1_dst[2 * j] = u4_dc_val[row][col][0];
2300
0
                    pu1_dst[2 * j + 1] = u4_dc_val[row][col][1];
2301
0
                }
2302
0
                pu1_dst += dst_strd;
2303
0
            }
2304
0
        }
2305
0
        else if(i4_min_sad == i4_sad_horz)
2306
0
        {
2307
0
            *u4_intra_mode = HORZ_CH_I8x8;
2308
0
            for(j = 0; j < 8; j++)
2309
0
            {
2310
0
                val_v = pu1_ngbr_pels[15 - 2 * j];
2311
0
                val_u = pu1_ngbr_pels[15 - 2 * j - 1];
2312
2313
0
                for(i = 0; i < 8; i++)
2314
0
                {
2315
0
                    pu1_dst[2 * i] = val_u;
2316
0
                    pu1_dst[2 * i + 1] = val_v;
2317
0
                }
2318
0
                pu1_dst += dst_strd;
2319
0
            }
2320
0
        }
2321
0
        else
2322
0
        {
2323
0
            *u4_intra_mode = VERT_CH_I8x8;
2324
0
            pu1_neighbour = pu1_ngbr_pels + 18;
2325
0
            for(j = 0; j < 8; j++)
2326
0
            {
2327
0
                memcpy(pu1_dst, pu1_neighbour, MB_SIZE);
2328
0
                pu1_dst += dst_strd;
2329
0
            }
2330
0
        }
2331
0
    }
2332
2333
0
    return;
2334
0
}