Coverage Report

Created: 2025-08-26 06:37

/src/libavc/encoder/ih264e_process.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2015 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
21
/**
22
*******************************************************************************
23
* @file
24
*  ih264e_process.c
25
*
26
* @brief
27
*  Contains functions for codec thread
28
*
29
* @author
30
*  ittiam
31
*
32
* @par List of Functions:
33
* - ih264e_generate_sps_pps
34
* - ih264e_init_entropy_ctxt
35
* - ih264e_entropy
36
* - ih264e_pack_header_data
37
* - ih264e_update_proc_ctxt
38
* - ih264e_init_proc_ctxt
39
* - ih264e_pad_recon_buffer
40
* - ih264e_dblk_pad_hpel_processing_n_mbs
41
* - ih264e_process
42
* - ih264e_update_rc_post_enc
43
* - ih264e_process_thread
44
*
45
* @remarks
46
*  none
47
*
48
*******************************************************************************
49
*/
50
51
/*****************************************************************************/
52
/* File Includes                                                             */
53
/*****************************************************************************/
54
55
/* System Include Files */
56
#include <stdio.h>
57
#include <stddef.h>
58
#include <stdlib.h>
59
#include <string.h>
60
#include <limits.h>
61
#include <assert.h>
62
63
/* User Include Files */
64
#include "ih264e_config.h"
65
#include "ih264_typedefs.h"
66
#include "iv2.h"
67
#include "ive2.h"
68
#include "ithread.h"
69
70
#include "ih264_debug.h"
71
#include "ih264_macros.h"
72
#include "ih264_error.h"
73
#include "ih264_defs.h"
74
#include "ih264_mem_fns.h"
75
#include "ih264_padding.h"
76
#include "ih264_structs.h"
77
#include "ih264_trans_quant_itrans_iquant.h"
78
#include "ih264_inter_pred_filters.h"
79
#include "ih264_intra_pred_filters.h"
80
#include "ih264_deblk_edge_filters.h"
81
#include "ih264_common_tables.h"
82
#include "ih264_cavlc_tables.h"
83
#include "ih264_cabac_tables.h"
84
#include "ih264_buf_mgr.h"
85
#include "ih264_list.h"
86
#include "ih264_platform_macros.h"
87
88
#include "ime_defs.h"
89
#include "ime_distortion_metrics.h"
90
#include "ime_structs.h"
91
#include "ime_statistics.h"
92
93
#include "irc_mem_req_and_acq.h"
94
#include "irc_cntrl_param.h"
95
#include "irc_frame_info_collector.h"
96
#include "irc_rate_control_api.h"
97
98
#include "ih264e_error.h"
99
#include "ih264e_defs.h"
100
#include "ih264e_globals.h"
101
#include "ih264e_rate_control.h"
102
#include "ih264e_bitstream.h"
103
#include "ih264e_cabac_structs.h"
104
#include "ih264e_structs.h"
105
#include "ih264e_deblk.h"
106
#include "ih264e_encode_header.h"
107
#include "ih264e_utils.h"
108
#include "ih264e_me.h"
109
#include "ih264e_intra_modes_eval.h"
110
#include "ih264e_cavlc.h"
111
#include "ih264e_cabac.h"
112
#include "ih264e_master.h"
113
#include "ih264e_process.h"
114
#include "ih264e_trace.h"
115
#include "ih264e_statistics.h"
116
#include "ih264e_platform_macros.h"
117
118
119
/*****************************************************************************/
120
/* Function Definitions                                                      */
121
/*****************************************************************************/
122
123
/**
124
******************************************************************************
125
*
126
* @brief This function generates sps, pps set on request
127
*
128
* @par   Description
129
*  When the encoder is set in header generation mode, the following function
130
*  is called. This generates sps and pps headers and returns the control back
131
*  to caller.
132
*
133
* @param[in]    ps_codec
134
*  pointer to codec context
135
*
136
* @return      success or failure error code
137
*
138
******************************************************************************
139
*/
140
IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec)
141
4.22k
{
142
    /* choose between ping-pong process buffer set */
143
4.22k
    WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
144
145
    /* entropy ctxt */
146
4.22k
    entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy;
147
148
    /* Bitstream structure */
149
4.22k
    bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
150
151
    /* sps */
152
4.22k
    sps_t *ps_sps = NULL;
153
154
    /* pps */
155
4.22k
    pps_t *ps_pps = NULL;
156
157
    /* output buff */
158
4.22k
    out_buf_t *ps_out_buf = &ps_codec->as_out_buf[ctxt_sel];
159
160
161
    /********************************************************************/
162
    /*      initialize the bit stream buffer                            */
163
    /********************************************************************/
164
4.22k
    ih264e_bitstrm_init(ps_bitstrm, ps_out_buf->s_bits_buf.pv_buf, ps_out_buf->s_bits_buf.u4_bufsize);
165
166
    /********************************************************************/
167
    /*                    BEGIN HEADER GENERATION                       */
168
    /********************************************************************/
169
    /*ps_codec->i4_pps_id ++;*/
170
4.22k
    ps_codec->i4_pps_id %= MAX_PPS_CNT;
171
172
    /*ps_codec->i4_sps_id ++;*/
173
4.22k
    ps_codec->i4_sps_id %= MAX_SPS_CNT;
174
175
    /* populate sps header */
176
4.22k
    ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id;
177
4.22k
    ih264e_populate_sps(ps_codec, ps_sps);
178
179
    /* populate pps header */
180
4.22k
    ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id;
181
4.22k
    ih264e_populate_pps(ps_codec, ps_pps);
182
183
4.22k
    ps_entropy->i4_error_code = IH264E_SUCCESS;
184
185
    /* generate sps */
186
4.22k
    ps_entropy->i4_error_code = ih264e_generate_sps(ps_bitstrm, ps_sps,
187
4.22k
                                                     &ps_codec->s_cfg.s_vui);
188
4.22k
    if(ps_entropy->i4_error_code != IH264E_SUCCESS)
189
0
    {
190
0
        return ps_entropy->i4_error_code;
191
0
    }
192
    /* generate pps */
193
4.22k
    ps_entropy->i4_error_code = ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
194
195
    /* queue output buffer */
196
4.22k
    ps_out_buf->s_bits_buf.u4_bytes = ps_bitstrm->u4_strm_buf_offset;
197
198
4.22k
    return ps_entropy->i4_error_code;
199
4.22k
}
200
201
/**
202
*******************************************************************************
203
*
204
* @brief   initialize entropy context.
205
*
206
* @par Description:
207
*  Before invoking the call to perform to entropy coding the entropy context
208
*  associated with the job needs to be initialized. This involves the start
209
*  mb address, end mb address, slice index and the pointer to location at
210
*  which the mb residue info and mb header info are packed.
211
*
212
* @param[in] ps_proc
213
*  Pointer to the current process context
214
*
215
* @returns error status
216
*
217
* @remarks none
218
*
219
*******************************************************************************
220
*/
221
IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc)
222
173k
{
223
    /* codec context */
224
173k
    codec_t *ps_codec = ps_proc->ps_codec;
225
226
    /* entropy ctxt */
227
173k
    entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
228
229
    /* start address */
230
173k
    ps_entropy->i4_mb_start_add = ps_entropy->i4_mb_y * ps_entropy->i4_wd_mbs + ps_entropy->i4_mb_x;
231
232
    /* end address */
233
173k
    ps_entropy->i4_mb_end_add = ps_entropy->i4_mb_start_add + ps_entropy->i4_mb_cnt;
234
235
    /* slice index */
236
173k
    ps_entropy->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_entropy->i4_mb_start_add];
237
238
    /* sof */
239
    /* @ start of frame or start of a new slice, set sof flag */
240
173k
    if (ps_entropy->i4_mb_start_add == 0)
241
40.9k
    {
242
40.9k
        ps_entropy->i4_sof = 1;
243
40.9k
    }
244
245
173k
    if (ps_entropy->i4_mb_x == 0)
246
173k
    {
247
        /* packed mb coeff data */
248
173k
        ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
249
173k
                        ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
250
251
        /* packed mb header data */
252
173k
        ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
253
173k
                        ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
254
173k
    }
255
256
173k
    return IH264E_SUCCESS;
257
173k
}
258
259
/**
260
*******************************************************************************
261
*
262
* @brief entry point for entropy coding
263
*
264
* @par Description
265
*  This function calls lower level functions to perform entropy coding for a
266
*  group (n rows) of mb's. After encoding 1 row of mb's,  the function takes
267
*  back the control, updates the ctxt and calls lower level functions again.
268
*  This process is repeated till all the rows or group of mb's (which ever is
269
*  minimum) are coded
270
*
271
* @param[in] ps_proc
272
*  process context
273
*
274
* @returns  error status
275
*
276
* @remarks
277
*
278
*******************************************************************************
279
*/
280
IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
281
173k
{
282
    /* codec context */
283
173k
    codec_t *ps_codec = ps_proc->ps_codec;
284
285
    /* entropy context */
286
173k
    entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
287
288
    /* cabac context */
289
173k
    cabac_ctxt_t *ps_cabac_ctxt = ps_entropy->ps_cabac;
290
291
    /* sps */
292
173k
    sps_t *ps_sps = ps_entropy->ps_sps_base + (ps_entropy->u4_sps_id % MAX_SPS_CNT);
293
294
    /* pps */
295
173k
    pps_t *ps_pps = ps_entropy->ps_pps_base + (ps_entropy->u4_pps_id % MAX_PPS_CNT);
296
297
    /* slice header */
298
173k
    slice_header_t *ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (ps_entropy->i4_cur_slice_idx % MAX_SLICE_HDR_CNT);
299
300
    /* slice type */
301
173k
    WORD32 i4_slice_type = ps_proc->i4_slice_type;
302
303
    /* Bitstream structure */
304
173k
    bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
305
306
    /* output buff */
307
173k
    out_buf_t s_out_buf;
308
309
    /* sei params */
310
173k
    sei_params_t s_sei;
311
312
    /* proc map */
313
173k
    UWORD8  *pu1_proc_map;
314
315
    /* entropy map */
316
173k
    UWORD8  *pu1_entropy_map_curr;
317
318
    /* proc base idx */
319
173k
    WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
320
321
    /* temp var */
322
173k
    WORD32 i4_wd_mbs, i4_ht_mbs;
323
173k
    UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx, u4_insert_per_idr;
324
173k
    WORD32 bitstream_start_offset, bitstream_end_offset;
325
    /********************************************************************/
326
    /*                            BEGIN INIT                            */
327
    /********************************************************************/
328
329
    /* entropy encode start address */
330
173k
    u4_mb_idx = ps_entropy->i4_mb_start_add;
331
332
    /* entropy encode end address */
333
173k
    u4_mb_end_idx = ps_entropy->i4_mb_end_add;
334
335
    /* width in mbs */
336
173k
    i4_wd_mbs = ps_entropy->i4_wd_mbs;
337
338
    /* height in mbs */
339
173k
    i4_ht_mbs = ps_entropy->i4_ht_mbs;
340
341
    /* total mb cnt */
342
173k
    u4_mb_cnt = i4_wd_mbs * i4_ht_mbs;
343
344
    /* proc map */
345
173k
    pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
346
347
    /* entropy map */
348
173k
    pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
349
350
    /********************************************************************/
351
    /* @ start of frame / slice,                                        */
352
    /*      initialize the output buffer,                               */
353
    /*      initialize the bit stream buffer,                           */
354
    /*      check if sps and pps headers have to be generated,          */
355
    /*      populate and generate slice header                          */
356
    /********************************************************************/
357
173k
    if (ps_entropy->i4_sof)
358
40.9k
    {
359
        /********************************************************************/
360
        /*      initialize the output buffer                                */
361
        /********************************************************************/
362
40.9k
        s_out_buf = ps_codec->as_out_buf[ctxt_sel];
363
364
        /* is last frame to encode */
365
40.9k
        s_out_buf.u4_is_last = ps_entropy->u4_is_last;
366
367
        /* frame idx */
368
40.9k
        s_out_buf.u4_timestamp_high = ps_entropy->u4_timestamp_high;
369
40.9k
        s_out_buf.u4_timestamp_low = ps_entropy->u4_timestamp_low;
370
371
        /********************************************************************/
372
        /*      initialize the bit stream buffer                            */
373
        /********************************************************************/
374
40.9k
        ih264e_bitstrm_init(ps_bitstrm, s_out_buf.s_bits_buf.pv_buf, s_out_buf.s_bits_buf.u4_bufsize);
375
376
        /********************************************************************/
377
        /*                    BEGIN HEADER GENERATION                       */
378
        /********************************************************************/
379
40.9k
        if (1 == ps_entropy->i4_gen_header)
380
0
        {
381
            /* generate sps */
382
0
            ps_entropy->i4_error_code = ih264e_generate_sps(ps_bitstrm, ps_sps,
383
0
                                                             &ps_codec->s_cfg.s_vui);
384
0
            RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
385
            /* generate pps */
386
0
            ps_entropy->i4_error_code = ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
387
0
            RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
388
389
            /* reset i4_gen_header */
390
0
            ps_entropy->i4_gen_header = 0;
391
0
        }
392
393
        /* populate slice header */
394
40.9k
        ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps);
395
396
        /* Starting bitstream offset for header in bits */
397
40.9k
        bitstream_start_offset = GET_NUM_BITS(ps_bitstrm);
398
399
        /* generate sei */
400
40.9k
        u4_insert_per_idr = (NAL_SLICE_IDR == ps_slice_hdr->i1_nal_unit_type);
401
402
40.9k
        memset(&s_sei, 0, sizeof(sei_params_t));
403
40.9k
        s_sei.u1_sei_mdcv_params_present_flag =
404
40.9k
                    ps_codec->s_cfg.s_sei.u1_sei_mdcv_params_present_flag;
405
40.9k
        s_sei.s_sei_mdcv_params = ps_codec->s_cfg.s_sei.s_sei_mdcv_params;
406
40.9k
        s_sei.u1_sei_cll_params_present_flag =
407
40.9k
                    ps_codec->s_cfg.s_sei.u1_sei_cll_params_present_flag;
408
40.9k
        s_sei.s_sei_cll_params = ps_codec->s_cfg.s_sei.s_sei_cll_params;
409
40.9k
        s_sei.u1_sei_ave_params_present_flag =
410
40.9k
                    ps_codec->s_cfg.s_sei.u1_sei_ave_params_present_flag;
411
40.9k
        s_sei.s_sei_ave_params = ps_codec->s_cfg.s_sei.s_sei_ave_params;
412
40.9k
        s_sei.u1_sei_ccv_params_present_flag = 0;
413
40.9k
        s_sei.s_sei_ccv_params =
414
40.9k
                    ps_codec->as_inp_list[ps_codec->i4_poc % MAX_NUM_BFRAMES].s_sei_ccv;
415
40.9k
        s_sei.u1_sei_sii_params_present_flag = ps_codec->s_cfg.s_sei.u1_sei_sii_params_present_flag;
416
40.9k
        s_sei.s_sei_sii_params = ps_codec->s_cfg.s_sei.s_sei_sii_params;
417
418
40.9k
        if((1 == ps_sps->i1_vui_parameters_present_flag) &&
419
40.9k
           (1 == ps_codec->s_cfg.s_vui.u1_video_signal_type_present_flag) &&
420
40.9k
           (1 == ps_codec->s_cfg.s_vui.u1_colour_description_present_flag) &&
421
40.9k
           (2 != ps_codec->s_cfg.s_vui.u1_colour_primaries) &&
422
40.9k
           (2 != ps_codec->s_cfg.s_vui.u1_matrix_coefficients) &&
423
40.9k
           (2 != ps_codec->s_cfg.s_vui.u1_transfer_characteristics) &&
424
40.9k
           (4 != ps_codec->s_cfg.s_vui.u1_transfer_characteristics) &&
425
40.9k
           (5 != ps_codec->s_cfg.s_vui.u1_transfer_characteristics))
426
40.9k
        {
427
40.9k
            s_sei.u1_sei_ccv_params_present_flag =
428
40.9k
            ps_codec->as_inp_list[ps_codec->i4_poc % MAX_NUM_BFRAMES].u1_sei_ccv_params_present_flag;
429
40.9k
        }
430
431
40.9k
        if((1 == s_sei.u1_sei_mdcv_params_present_flag && u4_insert_per_idr) ||
432
40.9k
           (1 == s_sei.u1_sei_cll_params_present_flag && u4_insert_per_idr) ||
433
40.9k
           (1 == s_sei.u1_sei_ave_params_present_flag && u4_insert_per_idr) ||
434
40.9k
           (1 == s_sei.u1_sei_ccv_params_present_flag) ||
435
40.9k
           (1 == s_sei.u1_sei_sii_params_present_flag))
436
19.9k
        {
437
19.9k
            ps_entropy->i4_error_code =
438
19.9k
                    ih264e_generate_sei(ps_bitstrm, &s_sei, u4_insert_per_idr);
439
19.9k
            RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
440
19.9k
        }
441
40.9k
        ps_codec->as_inp_list[ps_codec->i4_poc % MAX_NUM_BFRAMES].u1_sei_ccv_params_present_flag = 0;
442
443
        /* generate slice header */
444
40.9k
        ps_entropy->i4_error_code = ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr,
445
40.9k
                                                                  ps_pps, ps_sps);
446
40.9k
        RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
447
        /* once start of frame / slice is done, you can reset it */
448
        /* it is the responsibility of the caller to set this flag */
449
40.9k
        ps_entropy->i4_sof = 0;
450
451
40.9k
        if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
452
12.8k
        {
453
12.8k
            BITSTREAM_BYTE_ALIGN(ps_bitstrm);
454
12.8k
            BITSTREAM_FLUSH(ps_bitstrm, ps_entropy->i4_error_code);
455
12.8k
            RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
456
12.8k
            ih264e_init_cabac_ctxt(ps_entropy);
457
12.8k
        }
458
459
        /* Ending bitstream offset for header in bits */
460
40.9k
        bitstream_end_offset = GET_NUM_BITS(ps_bitstrm);
461
40.9k
        ps_entropy->u4_header_bits[i4_slice_type == PSLICE] +=
462
40.9k
                        bitstream_end_offset - bitstream_start_offset;
463
40.9k
    }
464
465
    /* begin entropy coding for the mb set */
466
6.80M
    while (u4_mb_idx < u4_mb_end_idx)
467
6.65M
    {
468
        /* init ptrs/indices */
469
6.65M
        if (ps_entropy->i4_mb_x == i4_wd_mbs)
470
0
        {
471
0
            ps_entropy->i4_mb_y++;
472
0
            ps_entropy->i4_mb_x = 0;
473
474
            /* packed mb coeff data */
475
0
            ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
476
0
                            ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
477
478
            /* packed mb header data */
479
0
            ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
480
0
                            ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
481
482
            /* proc map */
483
0
            pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
484
485
            /* entropy map */
486
0
            pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
487
0
        }
488
489
6.65M
        DEBUG("\nmb indices x, y %d, %d", ps_entropy->i4_mb_x, ps_entropy->i4_mb_y);
490
6.65M
        ENTROPY_TRACE("mb index x %d", ps_entropy->i4_mb_x);
491
6.65M
        ENTROPY_TRACE("mb index y %d", ps_entropy->i4_mb_y);
492
493
        /* wait until the curr mb is core coded */
494
        /* The wait for curr mb to be core coded is essential when entropy is launched
495
         * as a separate job
496
         */
497
6.65M
        while (1)
498
6.65M
        {
499
6.65M
            volatile UWORD8 *pu1_buf1;
500
6.65M
            WORD32 idx = ps_entropy->i4_mb_x;
501
502
6.65M
            pu1_buf1 = pu1_proc_map + idx;
503
6.65M
            if (*pu1_buf1)
504
6.65M
                break;
505
252
            ithread_yield();
506
252
        }
507
508
509
        /* write mb layer */
510
6.65M
        ps_entropy->i4_error_code = ps_codec->pf_write_mb_syntax_layer
511
6.65M
                        [ps_entropy->u1_entropy_coding_mode_flag][i4_slice_type](ps_entropy);
512
6.65M
        RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
513
514
        /* Starting bitstream offset for header in bits */
515
6.63M
        bitstream_start_offset = GET_NUM_BITS(ps_bitstrm);
516
517
        /* set entropy map */
518
6.63M
        pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1;
519
520
6.63M
        u4_mb_idx++;
521
6.63M
        ps_entropy->i4_mb_x++;
522
        /* check for eof */
523
6.63M
        if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
524
3.07M
        {
525
3.07M
            if (ps_entropy->i4_mb_x < i4_wd_mbs)
526
3.00M
            {
527
3.00M
                ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
528
3.00M
            }
529
3.07M
        }
530
531
6.63M
        if (ps_entropy->i4_mb_x == i4_wd_mbs)
532
149k
        {
533
            /* if slices are enabled */
534
149k
            if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS)
535
0
            {
536
                /* current slice index */
537
0
                WORD32 i4_curr_slice_idx = ps_entropy->i4_cur_slice_idx;
538
539
                /* slice map */
540
0
                UWORD8 *pu1_slice_idx = ps_entropy->pu1_slice_idx;
541
542
                /* No need to open a slice at end of frame. The current slice can be closed at the time
543
                 * of signaling eof flag.
544
                 */
545
0
                if ((u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx
546
0
                                                != pu1_slice_idx[u4_mb_idx]))
547
0
                {
548
0
                    if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
549
0
                    { /* mb skip run */
550
0
                        if ((i4_slice_type != ISLICE)
551
0
                                        && *ps_entropy->pi4_mb_skip_run)
552
0
                        {
553
0
                            if (*ps_entropy->pi4_mb_skip_run)
554
0
                            {
555
0
                                PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run,
556
0
                                            ps_entropy->i4_error_code, "mb skip run");
557
0
                                *ps_entropy->pi4_mb_skip_run = 0;
558
0
                                RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
559
0
                            }
560
0
                        }
561
                        /* put rbsp trailing bits for the previous slice */
562
0
                        ps_entropy->i4_error_code = ih264e_put_rbsp_trailing_bits(ps_bitstrm);
563
0
                        RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
564
0
                    }
565
0
                    else
566
0
                    {
567
0
                        ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
568
0
                    }
569
570
                    /* update slice header pointer */
571
0
                    i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx];
572
0
                    ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx;
573
0
                    ps_slice_hdr = ps_entropy->ps_slice_hdr_base+ (i4_curr_slice_idx % MAX_SLICE_HDR_CNT);
574
575
                    /* populate slice header */
576
0
                    ps_entropy->i4_mb_start_add = u4_mb_idx;
577
0
                    ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps,
578
0
                                                 ps_sps);
579
580
                    /* generate slice header */
581
0
                    ps_entropy->i4_error_code = ih264e_generate_slice_header(
582
0
                                    ps_bitstrm, ps_slice_hdr, ps_pps, ps_sps);
583
0
                    RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
584
0
                    if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
585
0
                    {
586
0
                        BITSTREAM_BYTE_ALIGN(ps_bitstrm);
587
0
                        BITSTREAM_FLUSH(ps_bitstrm, ps_entropy->i4_error_code);
588
0
                        ih264e_init_cabac_ctxt(ps_entropy);
589
0
                    }
590
0
                }
591
0
                else
592
0
                {
593
0
                    if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
594
0
                                    && u4_mb_idx != u4_mb_cnt)
595
0
                    {
596
0
                        ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
597
0
                    }
598
0
                }
599
0
            }
600
149k
        }
601
602
        /* Ending bitstream offset for header in bits */
603
6.63M
        bitstream_end_offset = GET_NUM_BITS(ps_bitstrm);
604
6.63M
        ps_entropy->u4_header_bits[i4_slice_type == PSLICE] +=
605
6.63M
                        bitstream_end_offset - bitstream_start_offset;
606
6.63M
    }
607
608
    /* check for eof */
609
149k
    if (u4_mb_idx == u4_mb_cnt)
610
24.4k
    {
611
        /* set end of frame flag */
612
24.4k
        ps_entropy->i4_eof = 1;
613
24.4k
    }
614
124k
    else
615
124k
    {
616
124k
        if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
617
124k
                        && ps_codec->s_cfg.e_slice_mode
618
61.8k
                                        != IVE_SLICE_MODE_BLOCKS)
619
61.8k
        {
620
61.8k
            bitstream_start_offset = GET_NUM_BITS(ps_bitstrm);
621
61.8k
            ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
622
61.8k
            bitstream_end_offset = GET_NUM_BITS(ps_bitstrm);
623
61.8k
            ps_entropy->u4_header_bits[i4_slice_type == PSLICE] +=
624
61.8k
                            bitstream_end_offset - bitstream_start_offset;
625
61.8k
        }
626
124k
    }
627
628
149k
    if (ps_entropy->i4_eof)
629
24.4k
    {
630
24.4k
        bitstream_start_offset = GET_NUM_BITS(ps_bitstrm);
631
24.4k
        if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
632
18.4k
        {
633
            /* mb skip run */
634
18.4k
            if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run)
635
3.30k
            {
636
3.30k
                if (*ps_entropy->pi4_mb_skip_run)
637
3.30k
                {
638
3.30k
                    PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run,
639
3.30k
                                 ps_entropy->i4_error_code, "mb skip run");
640
3.22k
                    *ps_entropy->pi4_mb_skip_run = 0;
641
3.22k
                    RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
642
3.22k
                }
643
3.30k
            }
644
            /* put rbsp trailing bits */
645
18.3k
             ps_entropy->i4_error_code = ih264e_put_rbsp_trailing_bits(ps_bitstrm);
646
18.3k
             RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
647
18.0k
        }
648
6.04k
        else
649
6.04k
        {
650
6.04k
            ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
651
6.04k
        }
652
24.1k
        bitstream_end_offset = GET_NUM_BITS(ps_bitstrm);
653
24.1k
        ps_entropy->u4_header_bits[i4_slice_type == PSLICE] +=
654
24.1k
                        bitstream_end_offset - bitstream_start_offset;
655
656
24.1k
        DEBUG("entropy status %x", ps_entropy->i4_error_code);
657
24.1k
    }
658
659
148k
    return ps_entropy->i4_error_code;
660
149k
}
661
662
/**
663
*******************************************************************************
664
*
665
* @brief Packs header information of a mb in to a buffer
666
*
667
* @par Description:
668
*  After the deciding the mode info of a macroblock, the syntax elements
669
*  associated with the mb are packed and stored. The entropy thread unpacks
670
*  this buffer and generates the end bit stream.
671
*
672
* @param[in] ps_proc
673
*  Pointer to the current process context
674
*
675
* @returns error status
676
*
677
* @remarks none
678
*
679
*******************************************************************************
680
*/
681
IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc)
682
6.64M
{
683
    /* curr mb type */
684
6.64M
    UWORD32 u4_mb_type = ps_proc->u4_mb_type;
685
686
    /* pack mb syntax layer of curr mb (used for entropy coding) */
687
6.64M
    if (u4_mb_type == I4x4)
688
2.90M
    {
689
        /* pointer to mb header storage space */
690
2.90M
        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
691
2.90M
        mb_hdr_i4x4_t *ps_mb_hdr = (mb_hdr_i4x4_t *)ps_proc->pv_mb_header_data;
692
693
        /* temp var */
694
2.90M
        WORD32 i4, byte;
695
696
        /* mb type plus mode */
697
2.90M
        ps_mb_hdr->common.u1_mb_type_mode = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type;
698
699
        /* cbp */
700
2.90M
        ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
701
702
        /* mb qp delta */
703
2.90M
        ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
704
705
        /* sub mb modes */
706
26.0M
        for (i4 = 0; i4 < 16; i4 ++)
707
23.1M
        {
708
23.1M
            byte = 0;
709
710
23.1M
            if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
711
23.1M
                            ps_proc->au1_intra_luma_mb_4x4_modes[i4])
712
22.7M
            {
713
22.7M
                byte |= 1;
714
22.7M
            }
715
397k
            else
716
397k
            {
717
718
397k
                if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
719
397k
                                ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
720
245k
                {
721
245k
                    byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 1);
722
245k
                }
723
151k
                else
724
151k
                {
725
151k
                    byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 1;
726
151k
                }
727
397k
            }
728
729
23.1M
            i4++;
730
731
23.1M
            if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
732
23.1M
                            ps_proc->au1_intra_luma_mb_4x4_modes[i4])
733
22.8M
            {
734
22.8M
                byte |= 16;
735
22.8M
            }
736
294k
            else
737
294k
            {
738
739
294k
                if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
740
294k
                                ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
741
136k
                {
742
136k
                    byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 5);
743
136k
                }
744
157k
                else
745
157k
                {
746
157k
                    byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 5;
747
157k
                }
748
294k
            }
749
750
23.1M
            ps_mb_hdr->au1_sub_blk_modes[i4 >> 1] =  byte;
751
23.1M
        }
752
753
        /* end of mb layer */
754
2.90M
        pu1_ptr += sizeof(mb_hdr_i4x4_t);
755
2.90M
        ps_proc->pv_mb_header_data = pu1_ptr;
756
2.90M
    }
757
3.74M
    else if (u4_mb_type == I16x16)
758
3.62M
    {
759
        /* pointer to mb header storage space */
760
3.62M
        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
761
3.62M
        mb_hdr_i16x16_t *ps_mb_hdr = (mb_hdr_i16x16_t *)ps_proc->pv_mb_header_data;
762
763
        /* mb type plus mode */
764
3.62M
        ps_mb_hdr->common.u1_mb_type_mode = (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type;
765
766
        /* cbp */
767
3.62M
        ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
768
769
        /* mb qp delta */
770
3.62M
        ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
771
772
        /* end of mb layer */
773
3.62M
        pu1_ptr += sizeof(mb_hdr_i16x16_t);
774
3.62M
        ps_proc->pv_mb_header_data = pu1_ptr;
775
3.62M
    }
776
116k
    else if (u4_mb_type == P16x16)
777
40.1k
    {
778
        /* pointer to mb header storage space */
779
40.1k
        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
780
40.1k
        mb_hdr_p16x16_t *ps_mb_hdr = (mb_hdr_p16x16_t *)ps_proc->pv_mb_header_data;
781
782
        /* mb type */
783
40.1k
        ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type;
784
785
        /* cbp */
786
40.1k
        ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
787
788
        /* mb qp delta */
789
40.1k
        ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
790
791
40.1k
        ps_mb_hdr->ai2_mv[0] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
792
793
40.1k
        ps_mb_hdr->ai2_mv[1] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
794
795
        /* end of mb layer */
796
40.1k
        pu1_ptr += sizeof(mb_hdr_p16x16_t);
797
40.1k
        ps_proc->pv_mb_header_data = pu1_ptr;
798
40.1k
    }
799
76.4k
    else if (u4_mb_type == PSKIP)
800
12.0k
    {
801
        /* pointer to mb header storage space */
802
12.0k
        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
803
12.0k
        mb_hdr_pskip_t *ps_mb_hdr = (mb_hdr_pskip_t *)ps_proc->pv_mb_header_data;
804
805
        /* mb type */
806
12.0k
        ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type;
807
808
        /* end of mb layer */
809
12.0k
        pu1_ptr += sizeof(mb_hdr_pskip_t);
810
12.0k
        ps_proc->pv_mb_header_data = pu1_ptr;
811
12.0k
    }
812
64.4k
    else if(u4_mb_type == B16x16)
813
45.7k
    {
814
815
        /* pointer to mb header storage space */
816
45.7k
        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
817
45.7k
        mb_hdr_b16x16_t *ps_mb_hdr = (mb_hdr_b16x16_t *)ps_proc->pv_mb_header_data;
818
819
45.7k
        UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
820
821
        /* mb type plus mode */
822
45.7k
        ps_mb_hdr->common.u1_mb_type_mode = (u4_pred_mode << 4) + u4_mb_type;
823
824
        /* cbp */
825
45.7k
        ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
826
827
        /* mb qp delta */
828
45.7k
        ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
829
830
        /* l0 & l1 me data */
831
45.7k
        if (u4_pred_mode != PRED_L1)
832
27.4k
        {
833
27.4k
            ps_mb_hdr->ai2_mv[0][0] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx
834
27.4k
                            - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
835
836
27.4k
            ps_mb_hdr->ai2_mv[0][1] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy
837
27.4k
                            - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
838
27.4k
        }
839
45.7k
        if (u4_pred_mode != PRED_L0)
840
24.8k
        {
841
24.8k
            ps_mb_hdr->ai2_mv[1][0] = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx
842
24.8k
                            - ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
843
844
24.8k
            ps_mb_hdr->ai2_mv[1][1] = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy
845
24.8k
                            - ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
846
24.8k
        }
847
848
        /* end of mb layer */
849
45.7k
        pu1_ptr += sizeof(mb_hdr_b16x16_t);
850
45.7k
        ps_proc->pv_mb_header_data = pu1_ptr;
851
852
45.7k
    }
853
18.6k
    else if(u4_mb_type == BDIRECT)
854
13.1k
    {
855
        /* pointer to mb header storage space */
856
13.1k
        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
857
13.1k
        mb_hdr_bdirect_t *ps_mb_hdr = (mb_hdr_bdirect_t *)ps_proc->pv_mb_header_data;
858
859
        /* mb type plus mode */
860
13.1k
        ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type;
861
862
        /* cbp */
863
13.1k
        ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
864
865
        /* mb qp delta */
866
13.1k
        ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
867
868
        /* end of mb layer */
869
13.1k
        pu1_ptr += sizeof(mb_hdr_bdirect_t);
870
13.1k
        ps_proc->pv_mb_header_data = pu1_ptr;
871
872
13.1k
    }
873
5.44k
    else if(u4_mb_type == BSKIP)
874
5.82k
    {
875
5.82k
        UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
876
877
        /* pointer to mb header storage space */
878
5.82k
        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
879
5.82k
        mb_hdr_bskip_t *ps_mb_hdr = (mb_hdr_bskip_t *)ps_proc->pv_mb_header_data;
880
881
        /* mb type plus mode */
882
5.82k
        ps_mb_hdr->common.u1_mb_type_mode = (u4_pred_mode << 4) + u4_mb_type;
883
884
        /* end of mb layer */
885
5.82k
        pu1_ptr += sizeof(mb_hdr_bskip_t);
886
5.82k
        ps_proc->pv_mb_header_data = pu1_ptr;
887
5.82k
    }
888
889
6.64M
    return IH264E_SUCCESS;
890
6.64M
}
891
892
/**
893
*******************************************************************************
894
*
895
* @brief   update process context after encoding an mb. This involves preserving
896
* the current mb information for later use, initialize the proc ctxt elements to
897
* encode next mb.
898
*
899
* @par Description:
900
*  This function performs house keeping tasks after encoding an mb.
901
*  After encoding an mb, various elements of the process context needs to be
902
*  updated to encode the next mb. For instance, the source, recon and reference
903
*  pointers, mb indices have to be adjusted to the next mb. The slice index of
904
*  the current mb needs to be updated. If mb qp modulation is enabled, then if
905
*  the qp changes the quant param structure needs to be updated. Also to encoding
906
*  the next mb, the current mb info is used as part of mode prediction or mv
907
*  prediction. Hence the current mb info has to preserved at top/top left/left
908
*  locations.
909
*
910
* @param[in] ps_proc
911
*  Pointer to the current process context
912
*
913
* @returns none
914
*
915
* @remarks none
916
*
917
*******************************************************************************
918
*/
919
WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc)
920
6.66M
{
921
    /* error status */
922
6.66M
    WORD32 error_status = IH264_SUCCESS;
923
924
    /* codec context */
925
6.66M
    codec_t *ps_codec = ps_proc->ps_codec;
926
927
    /* curr mb indices */
928
6.66M
    WORD32 i4_mb_x = ps_proc->i4_mb_x;
929
6.66M
    WORD32 i4_mb_y = ps_proc->i4_mb_y;
930
931
    /* mb syntax elements of neighbors */
932
6.66M
    mb_info_t *ps_left_syn =  &ps_proc->s_left_mb_syntax_ele;
933
6.66M
    mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x;
934
6.66M
    mb_info_t *ps_top_left_syn = &ps_proc->s_top_left_mb_syntax_ele;
935
936
    /* curr mb type */
937
6.66M
    UWORD32 u4_mb_type = ps_proc->u4_mb_type;
938
939
    /* curr mb type */
940
6.66M
    UWORD32 u4_is_intra = ps_proc->u4_is_intra;
941
942
    /* width in mbs */
943
6.66M
    WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
944
945
    /*height in mbs*/
946
6.66M
    WORD32 i4_ht_mbs = ps_proc->i4_ht_mbs;
947
948
    /* proc map */
949
6.66M
    UWORD8 *pu1_proc_map = ps_proc->pu1_proc_map + (i4_mb_y * i4_wd_mbs);
950
951
    /* deblk context */
952
6.66M
    deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
953
954
    /* deblk bs context */
955
6.66M
    bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
956
957
    /* top row motion vector info */
958
6.66M
    enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x;
959
960
    /* top left mb motion vector */
961
6.66M
    enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
962
963
    /* left mb motion vector */
964
6.66M
    enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu;
965
966
    /* sub mb modes */
967
6.66M
    UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (i4_mb_x << 4);
968
969
    /*************************************************************/
970
    /* During MV prediction, when top right mb is not available, */
971
    /* top left mb info. is used for prediction. Hence the curr  */
972
    /* top, which will be top left for the next mb needs to be   */
973
    /* preserved before updating it with curr mb info.           */
974
    /*************************************************************/
975
976
    /* mb type, mb class, csbp */
977
6.66M
    *ps_top_left_syn = *ps_top_syn;
978
979
6.66M
    if (ps_proc->i4_slice_type != ISLICE)
980
195k
    {
981
        /*****************************************/
982
        /* update top left with top info results */
983
        /*****************************************/
984
        /* mv */
985
195k
        *ps_top_left_mb_pu = *ps_top_row_pu;
986
195k
    }
987
988
    /*************************************************/
989
    /* update top and left with curr mb info results */
990
    /*************************************************/
991
992
    /* mb type */
993
6.66M
    ps_left_syn->u2_mb_type = ps_top_syn->u2_mb_type = u4_mb_type;
994
995
    /* mb class */
996
6.66M
    ps_left_syn->u2_is_intra = ps_top_syn->u2_is_intra = u4_is_intra;
997
998
    /* csbp */
999
6.66M
    ps_left_syn->u4_csbp = ps_top_syn->u4_csbp = ps_proc->u4_csbp;
1000
1001
    /* distortion */
1002
6.66M
    ps_left_syn->i4_mb_distortion = ps_top_syn->i4_mb_distortion = ps_proc->i4_mb_distortion;
1003
1004
6.66M
    if (u4_is_intra)
1005
6.53M
    {
1006
        /* mb / sub mb modes */
1007
6.53M
        if (I16x16 == u4_mb_type)
1008
3.62M
        {
1009
3.62M
            pu1_top_mb_intra_modes[0] = ps_proc->au1_left_mb_intra_modes[0] = ps_proc->u1_l_i16_mode;
1010
3.62M
        }
1011
2.90M
        else if (I4x4 == u4_mb_type)
1012
2.90M
        {
1013
2.90M
            ps_codec->pf_mem_cpy_mul8(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
1014
2.90M
            ps_codec->pf_mem_cpy_mul8(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
1015
2.90M
        }
1016
18.4E
        else if (I8x8 == u4_mb_type)
1017
0
        {
1018
0
            memcpy(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
1019
0
            memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
1020
0
        }
1021
1022
6.53M
        if ((ps_proc->i4_slice_type == PSLICE) ||(ps_proc->i4_slice_type == BSLICE))
1023
78.1k
        {
1024
            /* mv */
1025
78.1k
            *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
1026
78.1k
        }
1027
1028
6.53M
        *ps_proc->pu4_mb_pu_cnt = 1;
1029
6.53M
    }
1030
130k
    else
1031
130k
    {
1032
        /* mv */
1033
130k
        *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
1034
130k
    }
1035
1036
    /*
1037
     * Mark that the MB has been coded intra
1038
     * So that future AIRs can skip it
1039
     */
1040
6.66M
    ps_proc->pu1_is_intra_coded[i4_mb_x + (i4_mb_y * i4_wd_mbs)] = u4_is_intra;
1041
1042
    /**************************************************/
1043
    /* pack mb header info. for entropy coding        */
1044
    /**************************************************/
1045
6.66M
    ih264e_pack_header_data(ps_proc);
1046
1047
    /* update previous mb qp */
1048
6.66M
    ps_proc->u4_mb_qp_prev = ps_proc->u4_mb_qp;
1049
1050
    /* store qp */
1051
6.66M
    ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
1052
1053
    /*
1054
     * We need to sync the cache to make sure that the nmv content of proc
1055
     * is updated to cache properly
1056
     */
1057
6.66M
    DATA_SYNC();
1058
1059
    /* Just before finishing the row, enqueue the job in to entropy queue.
1060
     * The master thread depending on its convenience shall dequeue it and
1061
     * performs entropy.
1062
     *
1063
     * WARN !! Placing this block post proc map update can cause queuing of
1064
     * entropy jobs in out of order.
1065
     */
1066
6.66M
    if (i4_mb_x == i4_wd_mbs - 1)
1067
173k
    {
1068
        /* job structures */
1069
173k
        job_t s_job;
1070
1071
        /* job class */
1072
173k
        s_job.i4_cmd = CMD_ENTROPY;
1073
1074
        /* number of mbs to be processed in the current job */
1075
173k
        s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs;
1076
1077
        /* job start index x */
1078
173k
        s_job.i2_mb_x = 0;
1079
1080
        /* job start index y */
1081
173k
        s_job.i2_mb_y = ps_proc->i4_mb_y;
1082
1083
        /* proc base idx */
1084
173k
        s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS) ? (MAX_PROCESS_CTXT / 2) : 0;
1085
1086
        /* queue the job */
1087
173k
        error_status = ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1);
1088
173k
        if(error_status != IH264_SUCCESS)
1089
0
        {
1090
0
            return error_status;
1091
0
        }
1092
173k
        if(ps_proc->i4_mb_y == (i4_ht_mbs - 1))
1093
39.2k
            ih264_list_terminate(ps_codec->pv_entropy_jobq);
1094
173k
    }
1095
1096
    /* update intra cost if valid */
1097
6.66M
    if (ps_proc->i4_mb_intra_cost != INT_MAX)
1098
6.63M
    {
1099
6.63M
        ps_codec->pi4_mb_intra_cost[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->i4_mb_intra_cost;
1100
6.63M
    }
1101
1102
    /* update proc map */
1103
6.66M
    pu1_proc_map[i4_mb_x] = 1;
1104
1105
    /**************************************************/
1106
    /* update proc ctxt elements for encoding next mb */
1107
    /**************************************************/
1108
    /* update indices */
1109
6.66M
    i4_mb_x ++;
1110
6.66M
    ps_proc->i4_mb_x = i4_mb_x;
1111
1112
6.66M
    if (ps_proc->i4_mb_x == i4_wd_mbs)
1113
173k
    {
1114
173k
        ps_proc->i4_mb_y++;
1115
173k
        ps_proc->i4_mb_x = 0;
1116
173k
    }
1117
1118
    /* update slice index */
1119
6.66M
    ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_proc->i4_mb_y * i4_wd_mbs + ps_proc->i4_mb_x];
1120
1121
    /* update buffers pointers */
1122
6.66M
    ps_proc->pu1_src_buf_luma += MB_SIZE;
1123
6.66M
    ps_proc->pu1_rec_buf_luma += MB_SIZE;
1124
6.66M
    ps_proc->apu1_ref_buf_luma[0] += MB_SIZE;
1125
6.66M
    ps_proc->apu1_ref_buf_luma[1] += MB_SIZE;
1126
1127
    /*
1128
     * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1129
     * the stride per MB is MB_SIZE
1130
     */
1131
6.66M
    ps_proc->pu1_src_buf_chroma += MB_SIZE;
1132
6.66M
    ps_proc->pu1_rec_buf_chroma += MB_SIZE;
1133
6.66M
    ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE;
1134
6.66M
    ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE;
1135
1136
    /* Reset cost, distortion params */
1137
6.66M
    ps_proc->i4_mb_cost = INT_MAX;
1138
6.66M
    ps_proc->i4_mb_intra_cost = INT_MAX;
1139
6.66M
    ps_proc->i4_mb_distortion = SHRT_MAX;
1140
1141
6.66M
    ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
1142
1143
6.66M
    ps_proc->pu4_mb_pu_cnt += 1;
1144
1145
    /* Update colocated pu */
1146
6.66M
    if (ps_proc->i4_slice_type == BSLICE)
1147
100k
        ps_proc->ps_colpu += *(ps_proc->aps_mv_buf[1]->pu4_mb_pu_cnt +  (i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x);
1148
1149
    /* deblk ctxts */
1150
6.66M
    if (ps_proc->u4_disable_deblock_level != 1)
1151
5.39M
    {
1152
        /* indices */
1153
5.39M
        ps_bs->i4_mb_x = ps_proc->i4_mb_x;
1154
5.39M
        ps_bs->i4_mb_y = ps_proc->i4_mb_y;
1155
1156
#ifndef N_MB_ENABLE /* For N MB processing update take place inside deblocking function */
1157
        ps_deblk->i4_mb_x ++;
1158
1159
        ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1160
        /*
1161
         * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1162
         * the stride per MB is MB_SIZE
1163
         */
1164
        ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1165
#endif
1166
5.39M
    }
1167
1168
6.66M
    return error_status;
1169
6.66M
}
1170
1171
/**
1172
*******************************************************************************
1173
*
1174
* @brief   initialize process context.
1175
*
1176
* @par Description:
1177
*  Before dispatching the current job to process thread, the process context
1178
*  associated with the job is initialized. Usually every job aims to encode one
1179
*  row of mb's. Basing on the row indices provided by the job, the process
1180
*  context's buffer ptrs, slice indices and other elements that are necessary
1181
*  during core-coding are initialized.
1182
*
1183
* @param[in] ps_proc
1184
*  Pointer to the current process context
1185
*
1186
* @returns error status
1187
*
1188
* @remarks none
1189
*
1190
*******************************************************************************
1191
*/
1192
IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
1193
173k
{
1194
    /* codec context */
1195
173k
    codec_t *ps_codec = ps_proc->ps_codec;
1196
1197
    /* nmb processing context*/
1198
173k
    n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1199
1200
    /* indices */
1201
173k
    WORD32 i4_mb_x, i4_mb_y;
1202
1203
    /* strides */
1204
173k
    WORD32 i4_src_strd = ps_proc->i4_src_strd;
1205
173k
    WORD32 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd;
1206
173k
    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1207
1208
    /* quant params */
1209
173k
    quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1210
1211
    /* deblk ctxt */
1212
173k
    deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1213
1214
    /* deblk bs context */
1215
173k
    bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
1216
1217
    /* Pointer to mv_buffer of current frame */
1218
173k
    mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
1219
1220
    /* Pointers for color space conversion */
1221
173k
    UWORD8 *pu1_y_buf_base, *pu1_u_buf_base, *pu1_v_buf_base;
1222
1223
    /* Pad the MB to support non standard sizes */
1224
173k
    UWORD32 u4_pad_right_sz = ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd;
1225
173k
    UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht;
1226
173k
    UWORD16 u2_num_rows = MB_SIZE;
1227
173k
    WORD32 convert_uv_only;
1228
1229
    /********************************************************************/
1230
    /*                            BEGIN INIT                            */
1231
    /********************************************************************/
1232
1233
173k
    i4_mb_x = ps_proc->i4_mb_x;
1234
173k
    i4_mb_y = ps_proc->i4_mb_y;
1235
1236
    /* Number of mbs processed in one loop of process function */
1237
173k
    ps_proc->i4_nmb_ntrpy = ps_proc->i4_wd_mbs;
1238
173k
    ps_proc->u4_nmb_me = ps_proc->i4_wd_mbs;
1239
1240
    /* init buffer pointers */
1241
173k
    convert_uv_only = 1;
1242
173k
    if (u4_pad_bottom_sz || u4_pad_right_sz ||
1243
173k
        ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE ||
1244
173k
        ps_codec->s_cfg.u4_enable_quality_metrics & QUALITY_MASK_PSNR ||
1245
173k
        ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1246
162k
    {
1247
162k
        if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1248
39.2k
            u2_num_rows = (UWORD16) MB_SIZE - u4_pad_bottom_sz;
1249
162k
        ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base;
1250
162k
        i4_src_strd = ps_proc->i4_src_strd = ps_codec->s_cfg.u4_max_wd;
1251
162k
        ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * MB_SIZE);
1252
162k
        convert_uv_only = 0;
1253
162k
    }
1254
11.2k
    else
1255
11.2k
    {
1256
11.2k
        i4_src_strd = ps_proc->i4_src_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1257
11.2k
        ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * MB_SIZE);
1258
11.2k
    }
1259
1260
1261
173k
    if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE ||
1262
173k
        ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420P ||
1263
173k
        ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) ||
1264
173k
        ps_codec->s_cfg.u4_enable_quality_metrics & QUALITY_MASK_PSNR ||
1265
173k
        u4_pad_bottom_sz || u4_pad_right_sz)
1266
168k
    {
1267
168k
        if ((ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_UV) ||
1268
168k
            (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU))
1269
69.4k
            ps_proc->pu1_src_buf_chroma_base = ps_codec->pu1_uv_csc_buf_base;
1270
1271
168k
        ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * BLK8x8SIZE);
1272
168k
        i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_codec->s_cfg.u4_max_wd;
1273
168k
    }
1274
5.16k
    else
1275
5.16k
    {
1276
5.16k
        i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
1277
5.16k
        ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_src_chroma_strd * (i4_mb_y * BLK8x8SIZE);
1278
5.16k
    }
1279
1280
173k
    ps_proc->pu1_rec_buf_luma = ps_proc->pu1_rec_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1281
173k
    ps_proc->pu1_rec_buf_chroma = ps_proc->pu1_rec_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1282
1283
    /* Temporal back and forward reference buffer */
1284
173k
    ps_proc->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1285
173k
    ps_proc->apu1_ref_buf_chroma[0] = ps_proc->apu1_ref_buf_chroma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1286
173k
    ps_proc->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1287
173k
    ps_proc->apu1_ref_buf_chroma[1] = ps_proc->apu1_ref_buf_chroma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1288
1289
    /*
1290
     * Do color space conversion
1291
     * NOTE : We assume there that the number of MB's to process will not span multiple rows
1292
     */
1293
173k
    switch (ps_codec->s_cfg.e_inp_color_fmt)
1294
173k
    {
1295
29.9k
        case IV_YUV_420SP_UV:
1296
74.6k
        case IV_YUV_420SP_VU:
1297
            /* In case of 420 semi-planar input, copy last few rows to intermediate
1298
               buffer as few SIMD functions access upto 16 more bytes.
1299
               This data will be padded if required */
1300
74.6k
            if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) || u4_pad_bottom_sz || u4_pad_right_sz)
1301
69.4k
            {
1302
69.4k
                WORD32 num_rows = MB_SIZE;
1303
69.4k
                UWORD8 *pu1_src;
1304
69.4k
                UWORD8 *pu1_dst;
1305
69.4k
                WORD32 i;
1306
69.4k
                pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
1307
69.4k
                          ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1308
1309
69.4k
                pu1_dst = ps_proc->pu1_src_buf_luma;
1310
1311
69.4k
                if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1312
20.2k
                    num_rows = MB_SIZE - u4_pad_bottom_sz;
1313
985k
                for (i = 0; i < num_rows; i++)
1314
915k
                {
1315
915k
                    memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_disp_wd);
1316
915k
                    pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1317
915k
                    pu1_dst += ps_proc->i4_src_strd;
1318
915k
                }
1319
69.4k
                pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
1320
69.4k
                          ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
1321
69.4k
                pu1_dst = ps_proc->pu1_src_buf_chroma;
1322
1323
                /* Last MB row of chroma is copied unconditionally, since trans functions access an extra byte
1324
                 * due to interleaved input
1325
                 */
1326
69.4k
                if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1327
20.2k
                    num_rows = (ps_codec->s_cfg.u4_disp_ht >> 1) - (ps_proc->i4_mb_y * BLK8x8SIZE);
1328
49.2k
                else
1329
49.2k
                    num_rows = BLK8x8SIZE;
1330
527k
                for (i = 0; i < num_rows; i++)
1331
457k
                {
1332
457k
                    memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_disp_wd);
1333
457k
                    pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
1334
457k
                    pu1_dst += ps_proc->i4_src_chroma_strd;
1335
457k
                }
1336
1337
69.4k
            }
1338
74.6k
            break;
1339
1340
62.9k
        case IV_YUV_420P :
1341
62.9k
            pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
1342
62.9k
                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1343
1344
62.9k
            pu1_u_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
1345
62.9k
                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
1346
1347
62.9k
            pu1_v_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[2] + (i4_mb_x * BLK8x8SIZE) +
1348
62.9k
                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[2] * (i4_mb_y * BLK8x8SIZE);
1349
1350
62.9k
            ps_codec->pf_ih264e_conv_420p_to_420sp(
1351
62.9k
                            pu1_y_buf_base, pu1_u_buf_base, pu1_v_buf_base,
1352
62.9k
                            ps_proc->pu1_src_buf_luma,
1353
62.9k
                            ps_proc->pu1_src_buf_chroma, u2_num_rows,
1354
62.9k
                            ps_codec->s_cfg.u4_disp_wd,
1355
62.9k
                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[0],
1356
62.9k
                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[1],
1357
62.9k
                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[2],
1358
62.9k
                            ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
1359
62.9k
                            convert_uv_only);
1360
62.9k
            break;
1361
1362
36.1k
        case IV_YUV_422ILE :
1363
36.1k
            pu1_y_buf_base =  (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE * 2)
1364
36.1k
                              + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1365
1366
36.1k
            ps_codec->pf_ih264e_fmt_conv_422i_to_420sp(
1367
36.1k
                            ps_proc->pu1_src_buf_luma,
1368
36.1k
                            ps_proc->pu1_src_buf_chroma,
1369
36.1k
                            ps_proc->pu1_src_buf_chroma + 1, pu1_y_buf_base,
1370
36.1k
                            ps_codec->s_cfg.u4_disp_wd, u2_num_rows,
1371
36.1k
                            ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
1372
36.1k
                            ps_proc->i4_src_chroma_strd,
1373
36.1k
                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] >> 1);
1374
36.1k
            break;
1375
1376
0
        default:
1377
0
            break;
1378
173k
    }
1379
1380
173k
    if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0))
1381
115k
    {
1382
115k
        UWORD32 u4_pad_wd, u4_pad_ht;
1383
115k
        u4_pad_wd = (UWORD32)(ps_proc->i4_src_strd - ps_codec->s_cfg.u4_disp_wd);
1384
115k
        u4_pad_wd = MIN(u4_pad_right_sz, u4_pad_wd);
1385
115k
        u4_pad_ht = MB_SIZE;
1386
115k
        if(ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1387
23.7k
            u4_pad_ht = MIN(MB_SIZE, (MB_SIZE - u4_pad_bottom_sz));
1388
1389
115k
        ih264_pad_right_luma(
1390
115k
                        ps_proc->pu1_src_buf_luma + ps_codec->s_cfg.u4_disp_wd,
1391
115k
                        ps_proc->i4_src_strd, u4_pad_ht, u4_pad_wd);
1392
1393
115k
        ih264_pad_right_chroma(
1394
115k
                        ps_proc->pu1_src_buf_chroma + ps_codec->s_cfg.u4_disp_wd,
1395
115k
                        ps_proc->i4_src_chroma_strd, u4_pad_ht / 2, u4_pad_wd);
1396
115k
    }
1397
1398
173k
    if (ps_proc->i4_mb_y && ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) {
1399
27.3k
        UWORD8 *pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] +
1400
27.3k
                        ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE) -
1401
27.3k
                        ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1402
27.3k
        UWORD8 *pu1_dst = ps_proc->pu1_src_buf_luma - ps_proc->i4_src_strd;
1403
27.3k
        memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_disp_wd);
1404
27.3k
        if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0)) {
1405
22.4k
            pu1_dst += ps_codec->s_cfg.u4_disp_wd;
1406
22.4k
            memset(pu1_dst, pu1_dst[-1], u4_pad_right_sz);
1407
22.4k
        }
1408
27.3k
    }
1409
1410
    /* pad bottom edge */
1411
173k
    if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) && ps_proc->i4_mb_x == 0)
1412
26.0k
    {
1413
26.0k
        ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd,
1414
26.0k
                         ps_proc->i4_src_strd, ps_proc->i4_src_strd, u4_pad_bottom_sz);
1415
1416
26.0k
        ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_chroma_strd / 2,
1417
26.0k
                         ps_proc->i4_src_chroma_strd, ps_proc->i4_src_chroma_strd, (u4_pad_bottom_sz / 2));
1418
26.0k
    }
1419
1420
1421
    /* packed mb coeff data */
1422
173k
    ps_proc->pv_mb_coeff_data = ((UWORD8 *)ps_proc->pv_pic_mb_coeff_data) + i4_mb_y * ps_codec->u4_size_coeff_data;
1423
1424
    /* packed mb header data */
1425
173k
    ps_proc->pv_mb_header_data = ((UWORD8 *)ps_proc->pv_pic_mb_header_data) + i4_mb_y * ps_codec->u4_size_header_data;
1426
1427
    /* slice index */
1428
173k
    ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[i4_mb_y * ps_proc->i4_wd_mbs + i4_mb_x];
1429
1430
    /*********************************************************************/
1431
    /* ih264e_init_quant_params() routine is called at the pic init level*/
1432
    /* this would have initialized the qp.                               */
1433
    /* TODO_LATER: currently it is assumed that quant params donot change*/
1434
    /* across mb's. When they do calculate update ps_qp_params accordingly*/
1435
    /*********************************************************************/
1436
1437
    /* init mv buffer ptr */
1438
173k
    ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
1439
173k
                     ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1440
1441
    /* Init co-located mv buffer */
1442
173k
    ps_proc->ps_colpu = ps_proc->aps_mv_buf[1]->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
1443
173k
                        ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1444
1445
173k
    if (i4_mb_y == 0)
1446
40.9k
    {
1447
40.9k
        ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu;
1448
40.9k
    }
1449
132k
    else
1450
132k
    {
1451
132k
        ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs *
1452
132k
                                    ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1453
132k
    }
1454
1455
173k
    ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs);
1456
1457
    /* mb type */
1458
173k
    ps_proc->u4_mb_type = I16x16;
1459
1460
    /* lambda */
1461
173k
    if (ps_codec->pic_type == PIC_B)
1462
35.8k
    {
1463
35.8k
        ps_proc->u4_lambda = gu1_qp_lambdaB[ps_qp_params->u1_mb_qp];
1464
35.8k
    }
1465
137k
    else
1466
137k
    {
1467
137k
        ps_proc->u4_lambda = gu1_qp_lambdaIP[ps_qp_params->u1_mb_qp];
1468
137k
    }
1469
1470
    /* mb distortion */
1471
173k
    ps_proc->i4_mb_distortion = SHRT_MAX;
1472
1473
173k
    if (i4_mb_x == 0)
1474
173k
    {
1475
173k
        ps_proc->s_left_mb_syntax_ele.i4_mb_distortion = 0;
1476
1477
173k
        ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion = 0;
1478
1479
173k
        ps_proc->s_top_left_mb_syntax_ME.i4_mb_distortion = 0;
1480
1481
173k
        if (i4_mb_y == 0)
1482
40.9k
        {
1483
40.9k
            memset(ps_proc->ps_top_row_mb_syntax_ele, 0, (ps_proc->i4_wd_mbs + 1)*sizeof(mb_info_t));
1484
40.9k
        }
1485
173k
    }
1486
1487
    /* mb cost */
1488
173k
    ps_proc->i4_mb_cost = INT_MAX;
1489
173k
    ps_proc->i4_mb_intra_cost = INT_MAX;
1490
1491
    /**********************/
1492
    /* init deblk context */
1493
    /**********************/
1494
173k
    ps_deblk->i4_mb_x = ps_proc->i4_mb_x;
1495
    /* deblk lags the current mb proc by 1 row */
1496
    /* NOTE: Intra prediction has to happen with non deblocked samples used as reference */
1497
    /* Hence to deblk MB 0 of row 0, you have wait till MB 0 of row 1 is encoded. */
1498
    /* For simplicity, we chose to lag deblking by 1 Row wrt to proc */
1499
173k
    ps_deblk->i4_mb_y = ps_proc->i4_mb_y - 1;
1500
1501
    /* buffer ptrs */
1502
173k
    ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + i4_rec_strd * (ps_deblk->i4_mb_y * MB_SIZE);
1503
173k
    ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + i4_rec_strd * (ps_deblk->i4_mb_y * BLK8x8SIZE);
1504
1505
    /* init deblk bs context */
1506
    /* mb indices */
1507
173k
    ps_bs->i4_mb_x = ps_proc->i4_mb_x;
1508
173k
    ps_bs->i4_mb_y = ps_proc->i4_mb_y;
1509
1510
    /* init n_mb_process  context */
1511
173k
    ps_n_mb_ctxt->i4_mb_x = 0;
1512
173k
    ps_n_mb_ctxt->i4_mb_y = ps_deblk->i4_mb_y;
1513
173k
    ps_n_mb_ctxt->i4_n_mbs = ps_proc->i4_nmb_ntrpy;
1514
1515
173k
    return IH264E_SUCCESS;
1516
173k
}
1517
1518
/**
1519
*******************************************************************************
1520
*
1521
* @brief This function performs luma & chroma padding
1522
*
1523
* @par Description:
1524
*
1525
* @param[in] ps_proc
1526
*  Process context corresponding to the job
1527
*
1528
* @param[in] pu1_curr_pic_luma
1529
*  Pointer to luma buffer
1530
*
1531
* @param[in] pu1_curr_pic_chroma
1532
*  Pointer to chroma buffer
1533
*
1534
* @param[in] i4_mb_x
1535
*  mb index x
1536
*
1537
* @param[in] i4_mb_y
1538
*  mb index y
1539
*
1540
*  @param[in] i4_pad_ht
1541
*  number of rows to be padded
1542
*
1543
* @returns  error status
1544
*
1545
* @remarks none
1546
*
1547
*******************************************************************************
1548
*/
1549
IH264E_ERROR_T ih264e_pad_recon_buffer(process_ctxt_t *ps_proc,
1550
                                       UWORD8 *pu1_curr_pic_luma,
1551
                                       UWORD8 *pu1_curr_pic_chroma,
1552
                                       WORD32 i4_mb_x,
1553
                                       WORD32 i4_mb_y,
1554
                                       WORD32 i4_pad_ht)
1555
0
{
1556
    /* codec context */
1557
0
    codec_t *ps_codec = ps_proc->ps_codec;
1558
1559
    /* strides */
1560
0
    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1561
1562
0
    if (i4_mb_x == 0)
1563
0
    {
1564
        /* padding left luma */
1565
0
        ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, i4_pad_ht, PAD_LEFT);
1566
1567
        /* padding left chroma */
1568
0
        ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, i4_pad_ht >> 1, PAD_LEFT);
1569
0
    }
1570
0
    if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1571
0
    {
1572
        /* padding right luma */
1573
0
        ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, i4_pad_ht, PAD_RIGHT);
1574
1575
        /* padding right chroma */
1576
0
        ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, i4_pad_ht >> 1, PAD_RIGHT);
1577
1578
0
        if (i4_mb_y == ps_proc->i4_ht_mbs - 1)
1579
0
        {
1580
0
            UWORD8 *pu1_rec_luma = pu1_curr_pic_luma + MB_SIZE + PAD_RIGHT + ((i4_pad_ht - 1) * i4_rec_strd);
1581
0
            UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma + MB_SIZE + PAD_RIGHT + (((i4_pad_ht >> 1) - 1) * i4_rec_strd);
1582
1583
            /* padding bottom luma */
1584
0
            ps_codec->pf_pad_bottom(pu1_rec_luma, i4_rec_strd, i4_rec_strd, PAD_BOT);
1585
1586
            /* padding bottom chroma */
1587
0
            ps_codec->pf_pad_bottom(pu1_rec_chroma, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1588
0
        }
1589
0
    }
1590
1591
0
    if (i4_mb_y == 0)
1592
0
    {
1593
0
        UWORD8 *pu1_rec_luma = pu1_curr_pic_luma;
1594
0
        UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma;
1595
0
        WORD32 wd = MB_SIZE;
1596
1597
0
        if (i4_mb_x == 0)
1598
0
        {
1599
0
            pu1_rec_luma -= PAD_LEFT;
1600
0
            pu1_rec_chroma -= PAD_LEFT;
1601
1602
0
            wd += PAD_LEFT;
1603
0
        }
1604
0
        if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1605
0
        {
1606
0
            wd += PAD_RIGHT;
1607
0
        }
1608
1609
        /* padding top luma */
1610
0
        ps_codec->pf_pad_top(pu1_rec_luma, i4_rec_strd, wd, PAD_TOP);
1611
1612
        /* padding top chroma */
1613
0
        ps_codec->pf_pad_top(pu1_rec_chroma, i4_rec_strd, wd, (PAD_TOP >> 1));
1614
0
    }
1615
1616
0
    return IH264E_SUCCESS;
1617
0
}
1618
1619
/**
1620
*******************************************************************************
1621
*
1622
* @brief This function performs deblocking, padding and halfpel generation for
1623
*  'n' MBs
1624
*
1625
* @par Description:
1626
*
1627
* @param[in] ps_proc
1628
*  Process context corresponding to the job
1629
*
1630
* @param[in] pu1_curr_pic_luma
1631
* Current MB being processed(Luma)
1632
*
1633
* @param[in] pu1_curr_pic_chroma
1634
* Current MB being processed(Chroma)
1635
*
1636
* @param[in] i4_mb_x
1637
* Column value of current MB processed
1638
*
1639
* @param[in] i4_mb_y
1640
* Curent row processed
1641
*
1642
* @returns  error status
1643
*
1644
* @remarks none
1645
*
1646
*******************************************************************************
1647
*/
1648
IH264E_ERROR_T ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t *ps_proc,
1649
                                                     UWORD8 *pu1_curr_pic_luma,
1650
                                                     UWORD8 *pu1_curr_pic_chroma,
1651
                                                     WORD32 i4_mb_x,
1652
                                                     WORD32 i4_mb_y)
1653
6.56M
{
1654
    /* codec context */
1655
6.56M
    codec_t *ps_codec = ps_proc->ps_codec;
1656
1657
    /* n_mb processing context */
1658
6.56M
    n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1659
1660
    /* deblk context */
1661
6.56M
    deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1662
1663
    /* strides */
1664
6.56M
    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1665
1666
    /* loop variables */
1667
6.56M
    WORD32 row, i, j, col;
1668
1669
    /* Padding Width */
1670
6.56M
    UWORD32 u4_pad_wd;
1671
1672
    /* deblk_map of the row being deblocked */
1673
6.56M
    UWORD8 *pu1_deblk_map = ps_proc->pu1_deblk_map + ps_deblk->i4_mb_y * ps_proc->i4_wd_mbs;
1674
1675
    /* deblk_map_previous row */
1676
6.56M
    UWORD8 *pu1_deblk_map_prev_row = pu1_deblk_map - ps_proc->i4_wd_mbs;
1677
1678
6.56M
    WORD32 u4_pad_top = 0;
1679
1680
6.56M
    WORD32 u4_deblk_prev_row = 0;
1681
1682
    /* Number of mbs to be processed */
1683
6.56M
    WORD32 i4_n_mbs = ps_n_mb_ctxt->i4_n_mbs;
1684
1685
    /* Number of mbs  actually processed
1686
     * (at the end of a row, when remaining number of MBs are less than i4_n_mbs) */
1687
6.56M
    WORD32 i4_n_mb_process_count = 0;
1688
1689
6.56M
    UWORD8 *pu1_pad_bottom_src = NULL;
1690
1691
6.56M
    UWORD8 *pu1_pad_src_luma = NULL;
1692
6.56M
    UWORD8 *pu1_pad_src_chroma = NULL;
1693
1694
6.56M
    if (ps_proc->u4_disable_deblock_level == 1)
1695
1.22M
    {
1696
        /* If left most MB is processed, then pad left */
1697
1.22M
        if (i4_mb_x == 0)
1698
30.8k
        {
1699
            /* padding left luma */
1700
30.8k
            ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1701
1702
            /* padding left chroma */
1703
30.8k
            ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1704
30.8k
        }
1705
        /*last col*/
1706
1.22M
        if (i4_mb_x == (ps_proc->i4_wd_mbs - 1))
1707
30.8k
        {
1708
            /* padding right luma */
1709
30.8k
            ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1710
1711
            /* padding right chroma */
1712
30.8k
            ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1713
30.8k
        }
1714
1.22M
    }
1715
1716
6.56M
    if ((i4_mb_y > 0) || (i4_mb_y == (ps_proc->i4_ht_mbs - 1)))
1717
6.43M
    {
1718
        /* if number of mb's to be processed are less than 'N', go back.
1719
         * exception to the above clause is end of row */
1720
6.43M
        if ( ((i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1)) < i4_n_mbs) && (i4_mb_x < (ps_proc->i4_wd_mbs - 1)) )
1721
6.31M
        {
1722
6.31M
            return IH264E_SUCCESS;
1723
6.31M
        }
1724
120k
        else
1725
120k
        {
1726
120k
            i4_n_mb_process_count = MIN(i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1), i4_n_mbs);
1727
1728
            /* performing deblocking for required number of MBs */
1729
120k
            if ((i4_mb_y > 0) && (ps_proc->u4_disable_deblock_level != 1))
1730
89.4k
            {
1731
89.4k
                u4_deblk_prev_row = 1;
1732
1733
                /* checking whether the top rows are deblocked */
1734
5.32M
                for (col = 0; col < i4_n_mb_process_count; col++)
1735
5.23M
                {
1736
5.23M
                    u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + col];
1737
5.23M
                }
1738
1739
                /* checking whether the top right MB is deblocked */
1740
89.4k
                if ((ps_deblk->i4_mb_x + i4_n_mb_process_count) != ps_proc->i4_wd_mbs)
1741
0
                {
1742
0
                    u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + i4_n_mb_process_count];
1743
0
                }
1744
1745
                /* Top or Top right MBs not deblocked */
1746
89.4k
                if ((u4_deblk_prev_row != 1) && (i4_mb_y > 0))
1747
0
                {
1748
0
                    return IH264E_SUCCESS;
1749
0
                }
1750
1751
5.32M
                for (row = 0; row < i4_n_mb_process_count; row++)
1752
5.23M
                {
1753
5.23M
                    ih264e_deblock_mb(ps_proc, ps_deblk);
1754
1755
5.23M
                    pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1756
1757
5.23M
                    if (ps_deblk->i4_mb_y > 0)
1758
5.13M
                    {
1759
5.13M
                        if (ps_deblk->i4_mb_x == 0)/* If left most MB is processed, then pad left*/
1760
78.6k
                        {
1761
                            /* padding left luma */
1762
78.6k
                            ps_codec->pf_pad_left_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE, i4_rec_strd, MB_SIZE, PAD_LEFT);
1763
1764
                            /* padding left chroma */
1765
78.6k
                            ps_codec->pf_pad_left_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1766
78.6k
                        }
1767
1768
5.13M
                        if (ps_deblk->i4_mb_x == (ps_proc->i4_wd_mbs - 1))/*last column*/
1769
78.6k
                        {
1770
                            /* padding right luma */
1771
78.6k
                            ps_codec->pf_pad_right_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1772
1773
                            /* padding right chroma */
1774
78.6k
                            ps_codec->pf_pad_right_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1775
78.6k
                        }
1776
5.13M
                    }
1777
5.23M
                    ps_deblk->i4_mb_x++;
1778
1779
5.23M
                    ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1780
5.23M
                    ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1781
1782
5.23M
                }
1783
89.4k
            }
1784
30.8k
            else if(i4_mb_y > 0)
1785
22.1k
            {
1786
22.1k
                ps_deblk->i4_mb_x += i4_n_mb_process_count;
1787
1788
22.1k
                ps_deblk->pu1_cur_pic_luma += i4_n_mb_process_count * MB_SIZE;
1789
22.1k
                ps_deblk->pu1_cur_pic_chroma += i4_n_mb_process_count * MB_SIZE;
1790
22.1k
            }
1791
1792
120k
            if (i4_mb_y == 2)
1793
3.70k
            {
1794
3.70k
                u4_pad_wd = i4_n_mb_process_count * MB_SIZE;
1795
3.70k
                u4_pad_top = ps_n_mb_ctxt->i4_mb_x * MB_SIZE;
1796
1797
3.70k
                if (ps_n_mb_ctxt->i4_mb_x == 0)
1798
3.70k
                {
1799
3.70k
                    u4_pad_wd += PAD_LEFT;
1800
3.70k
                    u4_pad_top = -PAD_LEFT;
1801
3.70k
                }
1802
1803
3.70k
                if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1804
3.70k
                {
1805
3.70k
                    u4_pad_wd += PAD_RIGHT;
1806
3.70k
                }
1807
1808
                /* padding top luma */
1809
3.70k
                ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_luma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, PAD_TOP);
1810
1811
                /* padding top chroma */
1812
3.70k
                ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_chroma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, (PAD_TOP >> 1));
1813
3.70k
            }
1814
1815
120k
            ps_n_mb_ctxt->i4_mb_x += i4_n_mb_process_count;
1816
1817
120k
            if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1818
120k
            {
1819
120k
                if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1820
24.6k
                {
1821
                    /* Bottom Padding is done in one stretch for the entire width */
1822
24.6k
                    if (ps_proc->u4_disable_deblock_level != 1)
1823
16.3k
                    {
1824
16.3k
                        ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * MB_SIZE;
1825
1826
16.3k
                        ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * BLK8x8SIZE;
1827
1828
16.3k
                        ps_n_mb_ctxt->i4_mb_x = 0;
1829
16.3k
                        ps_n_mb_ctxt->i4_mb_y = ps_proc->i4_mb_y;
1830
16.3k
                        ps_deblk->i4_mb_x = 0;
1831
16.3k
                        ps_deblk->i4_mb_y = ps_proc->i4_mb_y;
1832
1833
                        /* update pic qp map (as update_proc_ctxt is still not called for the last MB) */
1834
16.3k
                        ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
1835
1836
16.3k
                        i4_n_mb_process_count = (ps_proc->i4_wd_mbs) % i4_n_mbs;
1837
1838
16.3k
                        j = (ps_proc->i4_wd_mbs) / i4_n_mbs;
1839
1840
32.7k
                        for (i = 0; i < j; i++)
1841
16.3k
                        {
1842
122k
                            for (col = 0; col < i4_n_mbs; col++)
1843
106k
                            {
1844
106k
                                ih264e_deblock_mb(ps_proc, ps_deblk);
1845
1846
106k
                                pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1847
1848
106k
                                ps_deblk->i4_mb_x++;
1849
106k
                                ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1850
106k
                                ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1851
106k
                                ps_n_mb_ctxt->i4_mb_x++;
1852
106k
                            }
1853
16.3k
                        }
1854
1855
16.3k
                        for (col = 0; col < i4_n_mb_process_count; col++)
1856
0
                        {
1857
0
                            ih264e_deblock_mb(ps_proc, ps_deblk);
1858
1859
0
                            pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1860
1861
0
                            ps_deblk->i4_mb_x++;
1862
0
                            ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1863
0
                            ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1864
0
                            ps_n_mb_ctxt->i4_mb_x++;
1865
0
                        }
1866
1867
16.3k
                        pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd;
1868
1869
16.3k
                        pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd;
1870
1871
                        /* padding left luma */
1872
16.3k
                        ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1873
1874
                        /* padding left chroma */
1875
16.3k
                        ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1876
1877
16.3k
                        pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1878
16.3k
                        pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1879
1880
                        /* padding left luma */
1881
16.3k
                        ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1882
1883
                        /* padding left chroma */
1884
16.3k
                        ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1885
1886
16.3k
                        pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1887
1888
16.3k
                        pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1889
1890
                        /* padding right luma */
1891
16.3k
                        ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1892
1893
                        /* padding right chroma */
1894
16.3k
                        ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1895
1896
16.3k
                        pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1897
16.3k
                        pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1898
1899
                        /* padding right luma */
1900
16.3k
                        ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1901
1902
                        /* padding right chroma */
1903
16.3k
                        ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1904
1905
16.3k
                    }
1906
1907
                    /* In case height is less than 2 MBs pad top */
1908
24.6k
                    if (ps_proc->i4_ht_mbs <= 2)
1909
20.9k
                    {
1910
20.9k
                        UWORD8 *pu1_pad_top_src;
1911
                        /* padding top luma */
1912
20.9k
                        pu1_pad_top_src = ps_proc->pu1_rec_buf_luma_base - PAD_LEFT;
1913
20.9k
                        ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, PAD_TOP);
1914
1915
                        /* padding top chroma */
1916
20.9k
                        pu1_pad_top_src = ps_proc->pu1_rec_buf_chroma_base - PAD_LEFT;
1917
20.9k
                        ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, (PAD_TOP >> 1));
1918
20.9k
                    }
1919
1920
                    /* padding bottom luma */
1921
24.6k
                    pu1_pad_bottom_src = ps_proc->pu1_rec_buf_luma_base + ps_proc->i4_ht_mbs * MB_SIZE * i4_rec_strd - PAD_LEFT;
1922
24.6k
                    ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, PAD_BOT);
1923
1924
                    /* padding bottom chroma */
1925
24.6k
                    pu1_pad_bottom_src = ps_proc->pu1_rec_buf_chroma_base + ps_proc->i4_ht_mbs * (MB_SIZE >> 1) * i4_rec_strd - PAD_LEFT;
1926
24.6k
                    ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1927
24.6k
                }
1928
120k
            }
1929
120k
        }
1930
6.43M
    }
1931
1932
248k
    return IH264E_SUCCESS;
1933
6.56M
}
1934
1935
1936
/**
1937
*******************************************************************************
1938
*
1939
* @brief This function performs luma & chroma encoding for a set of mb's.
1940
*
1941
* @par Description:
1942
*  The mb to be coded is taken and is evaluated over a predefined set of modes
1943
*  (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost
1944
*  is selected and using intra/inter prediction filters, prediction is carried out.
1945
*  The deviation between src and pred signal constitutes error signal. This error
1946
*  signal is transformed (hierarchical transform if necessary) and quantized. The
1947
*  quantized residue is packed in to entropy buffer for entropy coding. This is
1948
*  repeated for all the mb's enlisted under the job.
1949
*
1950
* @param[in] ps_proc
1951
*  Process context corresponding to the job
1952
*
1953
* @returns  error status
1954
*
1955
* @remarks none
1956
*
1957
*******************************************************************************
1958
*/
1959
WORD32 ih264e_process(process_ctxt_t *ps_proc)
1960
173k
{
1961
    /* error status */
1962
173k
    WORD32 error_status = IH264_SUCCESS;
1963
1964
    /* codec context */
1965
173k
    codec_t *ps_codec = ps_proc->ps_codec;
1966
1967
    /* cbp luma, chroma */
1968
173k
    UWORD32 u4_cbp_l, u4_cbp_c;
1969
1970
    /* width in mbs */
1971
173k
    WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
1972
1973
    /* loop var */
1974
173k
    WORD32  i4_mb_idx, i4_mb_cnt = ps_proc->i4_mb_cnt;
1975
1976
    /* valid modes */
1977
173k
    UWORD32 u4_valid_modes = 0;
1978
1979
    /* gate threshold */
1980
173k
    WORD32 i4_gate_threshold = 0;
1981
1982
    /* is intra */
1983
173k
    WORD32 luma_idx, chroma_idx, is_intra;
1984
1985
    /* temp variables */
1986
173k
    WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
1987
1988
    /*
1989
     * list of modes for evaluation
1990
     * -------------------------------------------------------------------------
1991
     * Note on enabling I4x4 and I16x16
1992
     * At very low QP's the hadamard transform in I16x16 will push up the maximum
1993
     * coeff value very high. CAVLC may not be able to represent the value and
1994
     * hence the stream may not be decodable in some clips.
1995
     * Hence at low QPs, we will enable I4x4 and disable I16x16 irrespective of preset.
1996
     */
1997
173k
    if (ps_proc->i4_slice_type == ISLICE)
1998
101k
    {
1999
101k
        if (ps_proc->u4_frame_qp > 10)
2000
65.2k
        {
2001
            /* enable intra 16x16 */
2002
65.2k
            u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
2003
2004
            /* enable intra 8x8 */
2005
65.2k
            u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0;
2006
65.2k
        }
2007
2008
        /* enable intra 4x4 */
2009
101k
        u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
2010
101k
        u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
2011
2012
101k
    }
2013
72.3k
    else if (ps_proc->i4_slice_type == PSLICE)
2014
36.5k
    {
2015
36.5k
        if (ps_proc->u4_frame_qp > 10)
2016
22.2k
        {
2017
            /* enable intra 16x16 */
2018
18.4E
            u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
2019
22.2k
        }
2020
2021
        /* enable intra 4x4 */
2022
36.5k
        if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
2023
6.16k
        {
2024
6.16k
            u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
2025
6.16k
        }
2026
36.5k
        u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
2027
2028
        /* enable inter P16x16 */
2029
36.5k
        u4_valid_modes |= (1 << P16x16);
2030
36.5k
    }
2031
35.8k
    else if (ps_proc->i4_slice_type == BSLICE)
2032
35.8k
    {
2033
35.8k
        if (ps_proc->u4_frame_qp > 10)
2034
22.8k
        {
2035
            /* enable intra 16x16 */
2036
22.8k
            u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
2037
22.8k
        }
2038
2039
        /* enable intra 4x4 */
2040
35.8k
        if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
2041
3.78k
        {
2042
3.78k
            u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
2043
3.78k
        }
2044
35.8k
        u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
2045
2046
        /* enable inter B16x16 */
2047
35.8k
        u4_valid_modes |= (1 << B16x16);
2048
35.8k
    }
2049
2050
    /* init entropy */
2051
173k
    ps_proc->s_entropy.i4_mb_x = ps_proc->i4_mb_x;
2052
173k
    ps_proc->s_entropy.i4_mb_y = ps_proc->i4_mb_y;
2053
173k
    ps_proc->s_entropy.i4_mb_cnt = MIN(ps_proc->i4_nmb_ntrpy, i4_wd_mbs - ps_proc->i4_mb_x);
2054
2055
    /* compute recon when :
2056
     *   1. current frame is to be used as a reference
2057
     *   2. dump recon for bit stream sanity check
2058
     */
2059
173k
    ps_proc->u4_compute_recon = ps_codec->u4_is_curr_frm_ref ||
2060
173k
                                ps_codec->s_cfg.u4_enable_recon ||
2061
173k
                                ps_codec->s_cfg.u4_enable_quality_metrics & QUALITY_MASK_PSNR;
2062
2063
    /* Encode 'n' macroblocks,
2064
     * 'n' being the number of mbs dictated by current proc ctxt */
2065
6.83M
    for (i4_mb_idx = 0; i4_mb_idx < i4_mb_cnt; i4_mb_idx ++)
2066
6.64M
    {
2067
        /* since we have not yet found sad, we have not yet got min sad */
2068
        /* we need to initialize these variables for each MB */
2069
        /* TODO how to get the min sad into the codec */
2070
6.64M
        ps_proc->u4_min_sad = ps_codec->s_cfg.i4_min_sad;
2071
6.64M
        ps_proc->u4_min_sad_reached = 0;
2072
2073
        /* mb analysis */
2074
6.64M
        {
2075
            /* temp var */
2076
6.64M
            WORD32 i4_mb_id = ps_proc->i4_mb_x + ps_proc->i4_mb_y * i4_wd_mbs;
2077
2078
            /* force intra refresh ? */
2079
6.64M
            WORD32 i4_air_enable_inter = (ps_codec->s_cfg.e_air_mode == IVE_AIR_MODE_NONE) ||
2080
6.64M
                            (ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt);
2081
2082
            /* evaluate inter 16x16 modes */
2083
6.64M
            if ((u4_valid_modes & (1 << P16x16)) || (u4_valid_modes & (1 << B16x16)))
2084
195k
            {
2085
                /* compute nmb me */
2086
195k
                if (ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0)
2087
72.3k
                {
2088
72.3k
                    ih264e_compute_me_nmb(ps_proc, MIN((WORD32)ps_proc->u4_nmb_me,
2089
72.3k
                                                       i4_wd_mbs - ps_proc->i4_mb_x));
2090
72.3k
                }
2091
2092
                /* set pointers to ME data appropriately for other modules to use */
2093
195k
                {
2094
195k
                    UWORD32 u4_mb_index = ps_proc->i4_mb_x % ps_proc->u4_nmb_me ;
2095
2096
                    /* get the min sad condition for current mb */
2097
195k
                    ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
2098
195k
                    ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
2099
195k
                    ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion;
2100
195k
                    ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost;
2101
195k
                    ps_proc->u4_mb_type = ps_proc->ps_nmb_info[u4_mb_index].u4_mb_type;
2102
2103
195k
                    ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_skip_mv[0]);
2104
195k
                    ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl);
2105
195k
                    ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_pred_mv[0]);
2106
2107
                    /* get the best sub pel buffer */
2108
195k
                    ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_mb_index].pu1_best_sub_pel_buf;
2109
195k
                    ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_mb_index].u4_bst_spel_buf_strd;
2110
195k
                }
2111
195k
                ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
2112
195k
            }
2113
6.44M
            else
2114
6.44M
            {
2115
                /* Derive neighbor availability for the current macroblock */
2116
6.44M
                ps_proc->ps_ngbr_avbl = &ps_proc->s_ngbr_avbl;
2117
2118
6.44M
                ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
2119
6.44M
            }
2120
2121
            /*
2122
             * If air says intra, we need to force the following code path to evaluate intra
2123
             * The easy way is just to say that the inter cost is too much
2124
             */
2125
6.64M
            if (!i4_air_enable_inter)
2126
593k
            {
2127
593k
                ps_proc->u4_min_sad_reached = 0;
2128
593k
                ps_proc->i4_mb_cost = INT_MAX;
2129
593k
                ps_proc->i4_mb_distortion = INT_MAX;
2130
593k
            }
2131
6.04M
            else if (ps_proc->u4_mb_type == PSKIP)
2132
0
            {
2133
0
                goto UPDATE_MB_INFO;
2134
0
            }
2135
2136
            /* wait until the proc of [top + 1] mb is computed.
2137
             * We wait till the proc dependencies are satisfied */
2138
6.64M
             if(ps_proc->i4_mb_y > 0)
2139
6.47M
             {
2140
                /* proc map */
2141
6.47M
                UWORD8  *pu1_proc_map_top;
2142
2143
6.47M
                pu1_proc_map_top = ps_proc->pu1_proc_map + ((ps_proc->i4_mb_y - 1) * i4_wd_mbs);
2144
2145
27.8M
                while (1)
2146
27.8M
                {
2147
27.8M
                    volatile UWORD8 *pu1_buf;
2148
27.8M
                    WORD32 idx = i4_mb_idx + 1;
2149
2150
27.8M
                    idx = MIN(idx, ((WORD32)ps_codec->s_cfg.i4_wd_mbs - 1));
2151
27.8M
                    pu1_buf =  pu1_proc_map_top + idx;
2152
27.8M
                    if(*pu1_buf)
2153
6.47M
                        break;
2154
21.4M
                    ithread_yield();
2155
21.4M
                }
2156
6.47M
            }
2157
2158
            /* If we already have the minimum sad, there is no point in searching for sad again */
2159
6.64M
            if (ps_proc->u4_min_sad_reached == 0 || ps_codec->s_cfg.u4_enc_speed_preset != IVE_FASTEST)
2160
6.65M
            {
2161
                /* intra gating in inter slices */
2162
                /* No need of gating if we want to force intra, we need to find the threshold only if inter is enabled by AIR*/
2163
6.65M
                if (i4_air_enable_inter && ps_proc->i4_slice_type != ISLICE && ps_codec->u4_inter_gate)
2164
96.1k
                {
2165
                    /* distortion of neighboring blocks */
2166
96.1k
                    WORD32 i4_distortion[4];
2167
2168
96.1k
                    i4_distortion[0] = ps_proc->s_left_mb_syntax_ele.i4_mb_distortion;
2169
2170
96.1k
                    i4_distortion[1] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x].i4_mb_distortion;
2171
2172
96.1k
                    i4_distortion[2] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x + 1].i4_mb_distortion;
2173
2174
96.1k
                    i4_distortion[3] = ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion;
2175
2176
96.1k
                    i4_gate_threshold = (i4_distortion[0] + i4_distortion[1] + i4_distortion[2] + i4_distortion[3]) >> 2;
2177
2178
96.1k
                }
2179
2180
2181
                /* If we are going to force intra we need to evaluate intra irrespective of gating */
2182
6.65M
                if ( (!i4_air_enable_inter) || ((i4_gate_threshold + 16 *((WORD32) ps_proc->u4_lambda)) < ps_proc->i4_mb_distortion))
2183
6.61M
                {
2184
                    /* evaluate intra 4x4 modes */
2185
6.61M
                    if (u4_valid_modes & (1 << I4x4))
2186
5.25M
                    {
2187
5.25M
                        if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
2188
2.13M
                        {
2189
2.13M
                            ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(ps_proc);
2190
2.13M
                        }
2191
3.11M
                        else
2192
3.11M
                        {
2193
3.11M
                            ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(ps_proc);
2194
3.11M
                        }
2195
5.25M
                    }
2196
2197
                    /* evaluate intra 16x16 modes */
2198
6.61M
                    if (u4_valid_modes & (1 << I16x16))
2199
4.28M
                    {
2200
4.28M
                        ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(ps_proc);
2201
4.28M
                    }
2202
2203
                    /* evaluate intra 8x8 modes */
2204
6.61M
                    if (u4_valid_modes & (1 << I8x8))
2205
0
                    {
2206
0
                        ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
2207
0
                    }
2208
2209
6.61M
                }
2210
6.65M
            }
2211
6.64M
        }
2212
2213
        /* is intra */
2214
6.64M
        if (ps_proc->u4_mb_type == I4x4 || ps_proc->u4_mb_type == I16x16 || ps_proc->u4_mb_type == I8x8)
2215
6.52M
        {
2216
6.52M
            luma_idx = ps_proc->u4_mb_type;
2217
6.52M
            chroma_idx = 0;
2218
6.52M
            is_intra = 1;
2219
2220
            /* evaluate chroma blocks for intra */
2221
6.52M
            ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
2222
6.52M
        }
2223
120k
        else
2224
120k
        {
2225
120k
            luma_idx = 3;
2226
120k
            chroma_idx = 1;
2227
120k
            is_intra = 0;
2228
120k
        }
2229
6.64M
        ps_proc->u4_is_intra = is_intra;
2230
6.64M
        ps_proc->ps_pu->b1_intra_flag = is_intra;
2231
2232
        /* redo MV pred of neighbors in the case intra mb */
2233
        /* TODO : currently called unconditionally, needs to be called only in the case of intra
2234
         * to modify neighbors */
2235
6.64M
        if (ps_proc->i4_slice_type != ISLICE)
2236
195k
        {
2237
195k
            ih264e_mv_pred(ps_proc, ps_proc->i4_slice_type);
2238
195k
        }
2239
2240
        /* Perform luma mb core coding */
2241
6.64M
        u4_cbp_l = (ps_codec->luma_energy_compaction)[luma_idx](ps_proc);
2242
2243
        /* Perform luma mb core coding */
2244
6.64M
        u4_cbp_c = (ps_codec->chroma_energy_compaction)[chroma_idx](ps_proc);
2245
2246
        /* coded block pattern */
2247
6.64M
        ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l;
2248
2249
6.64M
        if (!ps_proc->u4_is_intra)
2250
116k
        {
2251
116k
            if (ps_proc->i4_slice_type == BSLICE)
2252
64.7k
            {
2253
64.7k
                if (ih264e_find_bskip_params(ps_proc, PRED_L0))
2254
19.0k
                {
2255
19.0k
                    ps_proc->u4_mb_type = (ps_proc->u4_cbp) ? BDIRECT : BSKIP;
2256
19.0k
                }
2257
64.7k
            }
2258
52.2k
            else if(!ps_proc->u4_cbp)
2259
15.2k
            {
2260
15.2k
                if (ih264e_find_pskip_params(ps_proc, PRED_L0))
2261
12.0k
                {
2262
12.0k
                    ps_proc->u4_mb_type = PSKIP;
2263
12.0k
                }
2264
15.2k
            }
2265
116k
        }
2266
2267
6.66M
UPDATE_MB_INFO:
2268
2269
        /* Update mb sad, mb qp and intra mb cost. Will be used by rate control */
2270
6.66M
        ih264e_update_rc_mb_info(&ps_proc->s_frame_info, ps_proc);
2271
2272
        /**********************************************************************/
2273
        /* if disable deblock level is '0' this implies enable deblocking for */
2274
        /* all edges of all macroblocks with out any restrictions             */
2275
        /*                                                                    */
2276
        /* if disable deblock level is '1' this implies disable deblocking for*/
2277
        /* all edges of all macroblocks with out any restrictions             */
2278
        /*                                                                    */
2279
        /* if disable deblock level is '2' this implies enable deblocking for */
2280
        /* all edges of all macroblocks except edges overlapping with slice   */
2281
        /* boundaries. This option is not currently supported by the encoder  */
2282
        /* hence the slice map should be of no significance to perform debloc */
2283
        /* king                                                               */
2284
        /**********************************************************************/
2285
2286
6.66M
        if (ps_proc->u4_compute_recon)
2287
6.56M
        {
2288
            /* deblk context */
2289
            /* src pointers */
2290
6.56M
            UWORD8 *pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma;
2291
6.56M
            UWORD8 *pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma;
2292
2293
            /* src indices */
2294
6.56M
            UWORD32 i4_mb_x = ps_proc->i4_mb_x;
2295
6.56M
            UWORD32 i4_mb_y = ps_proc->i4_mb_y;
2296
2297
            /* compute blocking strength */
2298
6.56M
            if (ps_proc->u4_disable_deblock_level != 1)
2299
5.33M
            {
2300
5.33M
                ih264e_compute_bs(ps_proc);
2301
5.33M
            }
2302
2303
            /* nmb deblocking and hpel and padding */
2304
6.56M
            ih264e_dblk_pad_hpel_processing_n_mbs(ps_proc, pu1_cur_pic_luma,
2305
6.56M
                                                  pu1_cur_pic_chroma, i4_mb_x,
2306
6.56M
                                                  i4_mb_y);
2307
6.56M
        }
2308
2309
        /* update the context after for coding next mb */
2310
6.66M
        error_status = ih264e_update_proc_ctxt(ps_proc);
2311
6.66M
        if(error_status != IH264E_SUCCESS)
2312
0
        {
2313
0
            return error_status;
2314
0
        }
2315
        /* Once the last row is processed, mark the buffer status appropriately */
2316
6.66M
        if (ps_proc->i4_ht_mbs == ps_proc->i4_mb_y)
2317
39.2k
        {
2318
            /* Pointer to current picture buffer structure */
2319
39.2k
            pic_buf_t *ps_cur_pic = ps_proc->ps_cur_pic;
2320
2321
            /* Pointer to current picture's mv buffer structure */
2322
39.2k
            mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
2323
2324
            /**********************************************************************/
2325
            /* if disable deblock level is '0' this implies enable deblocking for */
2326
            /* all edges of all macroblocks with out any restrictions             */
2327
            /*                                                                    */
2328
            /* if disable deblock level is '1' this implies disable deblocking for*/
2329
            /* all edges of all macroblocks with out any restrictions             */
2330
            /*                                                                    */
2331
            /* if disable deblock level is '2' this implies enable deblocking for */
2332
            /* all edges of all macroblocks except edges overlapping with slice   */
2333
            /* boundaries. This option is not currently supported by the encoder  */
2334
            /* hence the slice map should be of no significance to perform debloc */
2335
            /* king                                                               */
2336
            /**********************************************************************/
2337
39.2k
            error_status = ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr,
2338
39.2k
                                                ps_cur_mv_buf->i4_buf_id , BUF_MGR_CODEC);
2339
39.2k
            if(error_status != IH264E_SUCCESS)
2340
0
            {
2341
0
                return error_status;
2342
0
            }
2343
39.2k
            error_status = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr,
2344
39.2k
                                                ps_cur_pic->i4_buf_id , BUF_MGR_CODEC);
2345
39.2k
            if(error_status != IH264E_SUCCESS)
2346
0
            {
2347
0
                return error_status;
2348
0
            }
2349
39.2k
            if (ps_codec->s_cfg.u4_enable_recon)
2350
0
            {
2351
                /* pic cnt */
2352
0
                ps_codec->as_rec_buf[ctxt_sel].i4_pic_cnt = ps_proc->i4_pic_cnt;
2353
2354
                /* rec buffers */
2355
0
                ps_codec->as_rec_buf[ctxt_sel].s_pic_buf  = *ps_proc->ps_cur_pic;
2356
2357
                /* is last? */
2358
0
                ps_codec->as_rec_buf[ctxt_sel].u4_is_last = ps_proc->s_entropy.u4_is_last;
2359
2360
                /* frame time stamp */
2361
0
                ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_high = ps_proc->s_entropy.u4_timestamp_high;
2362
0
                ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_low = ps_proc->s_entropy.u4_timestamp_low;
2363
0
            }
2364
2365
39.2k
        }
2366
6.66M
    }
2367
2368
192k
    DEBUG_HISTOGRAM_DUMP(ps_codec->s_cfg.i4_ht_mbs == ps_proc->i4_mb_y);
2369
2370
192k
    return error_status;
2371
173k
}
2372
2373
/**
2374
*******************************************************************************
2375
*
2376
* @brief
2377
*  Function to update rc context after encoding
2378
*
2379
* @par   Description
2380
*  This function updates the rate control context after the frame is encoded.
2381
*  Number of bits consumed by the current frame, frame distortion, frame cost,
2382
*  number of intra/inter mb's, ... are passed on to rate control context for
2383
*  updating the rc model.
2384
*
2385
* @param[in] ps_codec
2386
*  Handle to codec context
2387
*
2388
* @param[in] ctxt_sel
2389
*  frame context selector
2390
*
2391
* @param[in] pic_cnt
2392
*  pic count
2393
*
2394
* @returns i4_stuffing_byte
2395
*  number of stuffing bytes (if necessary)
2396
*
2397
* @remarks
2398
*
2399
*******************************************************************************
2400
*/
2401
WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 i4_is_first_frm)
2402
40.9k
{
2403
    /* proc set base idx */
2404
40.9k
    WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0;
2405
2406
    /* proc ctxt */
2407
40.9k
    process_ctxt_t *ps_proc = &ps_codec->as_process[i4_proc_ctxt_sel_base];
2408
2409
    /* entropy context */
2410
40.9k
    entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
2411
2412
    /* Bitstream structure */
2413
40.9k
    bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
2414
2415
    /* frame qp */
2416
40.9k
    UWORD8 u1_frame_qp = ps_codec->u4_frame_qp;
2417
2418
    /* cbr rc return status */
2419
40.9k
    WORD32 i4_stuffing_byte = 0;
2420
2421
    /* current frame stats */
2422
40.9k
    frame_info_t s_frame_info;
2423
40.9k
    picture_type_e rc_pic_type;
2424
2425
    /* temp var */
2426
40.9k
    WORD32 i, j;
2427
2428
    /********************************************************************/
2429
    /*                            BEGIN INIT                            */
2430
    /********************************************************************/
2431
2432
    /* init frame info */
2433
40.9k
    irc_init_frame_info(&s_frame_info);
2434
2435
    /* get frame info */
2436
136k
    for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++)
2437
95.4k
    {
2438
        /*****************************************************************/
2439
        /* One frame can be encoded by max of u4_num_cores threads       */
2440
        /* Accumulating the num mbs, sad, qp and intra_mb_cost from      */
2441
        /* u4_num_cores threads                                          */
2442
        /*****************************************************************/
2443
286k
        for (j = 0; j< MAX_MB_TYPE; j++)
2444
190k
        {
2445
190k
            s_frame_info.num_mbs[j] += ps_proc[i].s_frame_info.num_mbs[j];
2446
2447
190k
            s_frame_info.tot_mb_sad[j] += ps_proc[i].s_frame_info.tot_mb_sad[j];
2448
2449
190k
            s_frame_info.qp_sum[j] += ps_proc[i].s_frame_info.qp_sum[j];
2450
190k
        }
2451
2452
95.4k
        s_frame_info.intra_mb_cost_sum += ps_proc[i].s_frame_info.intra_mb_cost_sum;
2453
2454
95.4k
        s_frame_info.activity_sum += ps_proc[i].s_frame_info.activity_sum;
2455
2456
        /*****************************************************************/
2457
        /* gather number of residue and header bits consumed by the frame*/
2458
        /*****************************************************************/
2459
95.4k
        ih264e_update_rc_bits_info(&s_frame_info, &ps_proc[i].s_entropy);
2460
95.4k
    }
2461
2462
    /* get pic type */
2463
40.9k
    switch (ps_codec->pic_type)
2464
40.9k
    {
2465
1.78k
        case PIC_I:
2466
11.0k
        case PIC_IDR:
2467
11.0k
            rc_pic_type = I_PIC;
2468
11.0k
            break;
2469
15.1k
        case PIC_P:
2470
15.1k
            rc_pic_type = P_PIC;
2471
15.1k
            break;
2472
14.6k
        case PIC_B:
2473
14.6k
            rc_pic_type = B_PIC;
2474
14.6k
            break;
2475
0
        default:
2476
0
            assert(0);
2477
0
            break;
2478
40.9k
    }
2479
2480
    /* update rc lib with current frame stats */
2481
40.9k
    i4_stuffing_byte =  ih264e_rc_post_enc(ps_codec->s_rate_control.pps_rate_control_api,
2482
40.9k
                                          &(s_frame_info),
2483
40.9k
                                          ps_codec->s_rate_control.pps_pd_frm_rate,
2484
40.9k
                                          ps_codec->s_rate_control.pps_time_stamp,
2485
40.9k
                                          ps_codec->s_rate_control.pps_frame_time,
2486
40.9k
                                          (ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs),
2487
40.9k
                                          &rc_pic_type,
2488
40.9k
                                          i4_is_first_frm,
2489
40.9k
                                          &ps_codec->s_rate_control.post_encode_skip[ctxt_sel],
2490
40.9k
                                          u1_frame_qp,
2491
40.9k
                                          &ps_codec->s_rate_control.num_intra_in_prev_frame,
2492
40.9k
                                          &ps_codec->s_rate_control.i4_avg_activity);
2493
2494
    /* cbr rc - house keeping */
2495
40.9k
    if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
2496
0
    {
2497
0
         ps_entropy->ps_bitstrm->u4_strm_buf_offset = 0;
2498
         // If an IDR frame was skipped, restore frame num and IDR pic id
2499
0
         if (ps_codec->u4_is_idr == 1)
2500
0
         {
2501
0
             ps_codec->i4_frame_num = ps_codec->i4_restore_frame_num;
2502
0
             ps_codec->i4_idr_pic_id--;
2503
0
         }
2504
0
    }
2505
40.9k
    else if (i4_stuffing_byte)
2506
0
    {
2507
        /* add filler nal units */
2508
0
        ps_entropy->i4_error_code = ih264e_add_filler_nal_unit(ps_bitstrm, i4_stuffing_byte);
2509
0
    }
2510
2511
    /*
2512
     * Frame number is to be incremented only if the current frame is a
2513
     * reference frame. After each successful frame encode, we increment
2514
     * frame number by 1
2515
     */
2516
40.9k
    if (!ps_codec->s_rate_control.post_encode_skip[ctxt_sel]
2517
40.9k
                    && ps_codec->u4_is_curr_frm_ref)
2518
26.2k
    {
2519
26.2k
        ps_codec->i4_frame_num++;
2520
26.2k
    }
2521
    /********************************************************************/
2522
    /*      signal the output                                           */
2523
    /********************************************************************/
2524
40.9k
    ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes =
2525
40.9k
                    ps_entropy->ps_bitstrm->u4_strm_buf_offset;
2526
2527
40.9k
    return ps_entropy->i4_error_code;
2528
40.9k
}
2529
2530
/**
2531
*******************************************************************************
2532
*
2533
* @brief
2534
*  entry point of a spawned encoder thread
2535
*
2536
* @par Description:
2537
*  The encoder thread dequeues a proc/entropy job from the encoder queue and
2538
*  calls necessary routines.
2539
*
2540
* @param[in] pv_proc
2541
*  Process context corresponding to the thread
2542
*
2543
* @returns  error status
2544
*
2545
* @remarks
2546
*
2547
*******************************************************************************
2548
*/
2549
WORD32 ih264e_process_thread(void *pv_proc)
2550
36.6M
{
2551
    /* error status */
2552
36.6M
    IH264_ERROR_T ret = IH264_SUCCESS;
2553
36.6M
    WORD32 error_status = IH264_SUCCESS;
2554
2555
    /* proc ctxt */
2556
36.6M
    process_ctxt_t *ps_proc = pv_proc;
2557
2558
    /* codec ctxt */
2559
36.6M
    codec_t *ps_codec = ps_proc->ps_codec;
2560
2561
    /* structure to represent a processing job entry */
2562
36.6M
    job_t s_job;
2563
2564
    /* blocking call : entropy dequeue is non-blocking till all
2565
     * the proc jobs are processed */
2566
36.6M
    WORD32 is_blocking = 0;
2567
2568
    /* codec context selector */
2569
36.6M
    WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
2570
2571
    /* set affinity */
2572
36.6M
    ithread_set_affinity(ps_proc->i4_id);
2573
2574
36.6M
    ps_proc->i4_error_code = IH264_SUCCESS;
2575
42.4M
    while(1)
2576
42.4M
    {
2577
        /* dequeue a job from the entropy queue */
2578
42.4M
        {
2579
42.4M
            int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex);
2580
2581
42.4M
            volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel];
2582
2583
            /* have the lock */
2584
42.4M
            if (error == 0)
2585
42.5M
            {
2586
42.5M
                if (*pu4_buf == 0)
2587
28.6M
                {
2588
                    /* no entropy threads are active, try dequeuing a job from the entropy queue */
2589
28.6M
                    ret = ih264_list_dequeue(ps_proc->pv_entropy_jobq, &s_job, is_blocking);
2590
28.6M
                    if (IH264_SUCCESS == ret)
2591
173k
                    {
2592
173k
                        *pu4_buf = 1;
2593
173k
                        ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2594
173k
                        goto WORKER;
2595
173k
                    }
2596
28.4M
                    else if(is_blocking)
2597
26.8k
                    {
2598
26.8k
                        ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2599
26.8k
                        break;
2600
26.8k
                    }
2601
28.6M
                }
2602
42.3M
                ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2603
42.3M
            }
2604
42.4M
        }
2605
2606
        /* dequeue a job from the process queue */
2607
42.2M
        ret = ih264_list_dequeue(ps_proc->pv_proc_jobq, &s_job, 1);
2608
42.2M
        if (IH264_SUCCESS != ret)
2609
42.1M
        {
2610
42.1M
            if(ps_proc->i4_id)
2611
36.6M
                break;
2612
5.50M
            else
2613
5.50M
            {
2614
5.50M
                is_blocking = 1;
2615
5.50M
                continue;
2616
5.50M
            }
2617
42.1M
        }
2618
2619
347k
WORKER:
2620
        /* choose appropriate proc context based on proc_base_idx */
2621
347k
        ps_proc = &ps_codec->as_process[ps_proc->i4_id + s_job.i2_proc_base_idx];
2622
2623
347k
        switch (s_job.i4_cmd)
2624
347k
        {
2625
173k
            case CMD_PROCESS:
2626
173k
                ps_proc->i4_mb_cnt = s_job.i2_mb_cnt;
2627
173k
                ps_proc->i4_mb_x = s_job.i2_mb_x;
2628
173k
                ps_proc->i4_mb_y = s_job.i2_mb_y;
2629
2630
                /* init process context */
2631
173k
                ih264e_init_proc_ctxt(ps_proc);
2632
2633
                /* core code all mbs enlisted under the current job */
2634
173k
                error_status = ih264e_process(ps_proc);
2635
173k
                if(error_status !=IH264_SUCCESS)
2636
0
                {
2637
0
                    ps_proc->i4_error_code = error_status;
2638
0
                    return ret;
2639
0
                }
2640
173k
                break;
2641
2642
173k
            case CMD_ENTROPY:
2643
173k
                ps_proc->s_entropy.i4_mb_x = s_job.i2_mb_x;
2644
173k
                ps_proc->s_entropy.i4_mb_y = s_job.i2_mb_y;
2645
173k
                ps_proc->s_entropy.i4_mb_cnt = s_job.i2_mb_cnt;
2646
2647
                /* init entropy */
2648
173k
                ih264e_init_entropy_ctxt(ps_proc);
2649
2650
                /* entropy code all mbs enlisted under the current job */
2651
173k
                error_status = ih264e_entropy(ps_proc);
2652
2653
                /* Dont execute any further instructions until store synchronization took place */
2654
173k
                DATA_SYNC();
2655
2656
                /* allow threads to dequeue entropy jobs */
2657
173k
                ps_codec->au4_entropy_thread_active[ctxt_sel] = 0;
2658
2659
173k
                if (error_status != IH264_SUCCESS)
2660
24.8k
                {
2661
24.8k
                    ps_proc->i4_error_code = error_status;
2662
24.8k
                    return ret;
2663
24.8k
                }
2664
148k
                break;
2665
2666
148k
            default:
2667
0
                ps_proc->i4_error_code = IH264_FAIL;
2668
0
                return ret;
2669
347k
        }
2670
347k
    }
2671
2672
36.6M
    return ret;
2673
36.6M
}