Coverage Report

Created: 2025-11-05 07:08

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libhevc/encoder/hme_interface.c
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
21
/*****************************************************************************/
22
/* File Includes                                                             */
23
/*****************************************************************************/
24
/* System include files */
25
#include <stdio.h>
26
#include <string.h>
27
#include <stdlib.h>
28
#include <assert.h>
29
#include <stdarg.h>
30
#include <math.h>
31
#include <limits.h>
32
33
/* User include files */
34
#include "ihevc_typedefs.h"
35
#include "itt_video_api.h"
36
#include "ihevce_api.h"
37
38
#include "rc_cntrl_param.h"
39
#include "rc_frame_info_collector.h"
40
#include "rc_look_ahead_params.h"
41
42
#include "ihevc_defs.h"
43
#include "ihevc_structs.h"
44
#include "ihevc_platform_macros.h"
45
#include "ihevc_deblk.h"
46
#include "ihevc_itrans_recon.h"
47
#include "ihevc_chroma_itrans_recon.h"
48
#include "ihevc_chroma_intra_pred.h"
49
#include "ihevc_intra_pred.h"
50
#include "ihevc_inter_pred.h"
51
#include "ihevc_mem_fns.h"
52
#include "ihevc_padding.h"
53
#include "ihevc_weighted_pred.h"
54
#include "ihevc_sao.h"
55
#include "ihevc_resi_trans.h"
56
#include "ihevc_quant_iquant_ssd.h"
57
#include "ihevc_cabac_tables.h"
58
59
#include "ihevce_defs.h"
60
#include "ihevce_lap_enc_structs.h"
61
#include "ihevce_multi_thrd_structs.h"
62
#include "ihevce_multi_thrd_funcs.h"
63
#include "ihevce_me_common_defs.h"
64
#include "ihevce_had_satd.h"
65
#include "ihevce_error_codes.h"
66
#include "ihevce_bitstream.h"
67
#include "ihevce_cabac.h"
68
#include "ihevce_rdoq_macros.h"
69
#include "ihevce_function_selector.h"
70
#include "ihevce_enc_structs.h"
71
#include "ihevce_entropy_structs.h"
72
#include "ihevce_cmn_utils_instr_set_router.h"
73
#include "ihevce_enc_loop_structs.h"
74
#include "ihevce_bs_compute_ctb.h"
75
#include "ihevce_global_tables.h"
76
#include "ihevce_dep_mngr_interface.h"
77
#include "hme_datatype.h"
78
#include "hme_interface.h"
79
#include "hme_common_defs.h"
80
#include "hme_defs.h"
81
#include "ihevce_me_instr_set_router.h"
82
#include "hme_globals.h"
83
#include "hme_utils.h"
84
#include "hme_coarse.h"
85
#include "hme_refine.h"
86
#include "hme_err_compute.h"
87
#include "hme_common_utils.h"
88
#include "hme_search_algo.h"
89
#include "ihevce_profile.h"
90
91
/*****************************************************************************/
92
/* Function Definitions                                                      */
93
/*****************************************************************************/
94
95
void hme_init_globals()
96
6.76k
{
97
6.76k
    GRID_PT_T id;
98
6.76k
    S32 i, j;
99
    /*************************************************************************/
100
    /* Initialize the lookup table for x offset, y offset, optimized mask    */
101
    /* based on grid id. The design is as follows:                           */
102
    /*                                                                       */
103
    /*     a  b  c  d                                                        */
104
    /*    TL  T TR  e                                                        */
105
    /*     L  C  R  f                                                        */
106
    /*    BL  B BR                                                           */
107
    /*                                                                       */
108
    /*  IF a non corner pt, like T is the new minima, then we need to        */
109
    /*  evaluate only 3 new pts, in this case, a, b, c. So the optimal       */
110
    /*  grid mask would reflect this. If a corner pt like TR is the new      */
111
    /*  minima, then we need to evaluate 5 new pts, in this case, b, c, d,   */
112
    /*  e and f. So the grid mask will have 5 pts enabled.                   */
113
    /*************************************************************************/
114
115
6.76k
    id = PT_C;
116
6.76k
    gai4_opt_grid_mask[id] = GRID_ALL_PTS_VALID ^ (BIT_EN(PT_C));
117
6.76k
    gai1_grid_id_to_x[id] = 0;
118
6.76k
    gai1_grid_id_to_y[id] = 0;
119
6.76k
    gai4_opt_grid_mask_diamond[id] = GRID_DIAMOND_ENABLE_ALL ^ (BIT_EN(PT_C));
120
6.76k
    gai4_opt_grid_mask_conventional[id] = GRID_ALL_PTS_VALID ^ (BIT_EN(PT_C));
121
122
6.76k
    id = PT_L;
123
6.76k
    gai4_opt_grid_mask[id] = BIT_EN(PT_TL) | BIT_EN(PT_L) | BIT_EN(PT_BL);
124
6.76k
    gai1_grid_id_to_x[id] = -1;
125
6.76k
    gai1_grid_id_to_y[id] = 0;
126
6.76k
    gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_T) | BIT_EN(PT_L) | BIT_EN(PT_B);
127
6.76k
    gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_L) | BIT_EN(PT_B);
128
129
6.76k
    id = PT_R;
130
6.76k
    gai4_opt_grid_mask[id] = BIT_EN(PT_TR) | BIT_EN(PT_R) | BIT_EN(PT_BR);
131
6.76k
    gai1_grid_id_to_x[id] = 1;
132
6.76k
    gai1_grid_id_to_y[id] = 0;
133
6.76k
    gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_T) | BIT_EN(PT_R) | BIT_EN(PT_B);
134
6.76k
    gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_R) | BIT_EN(PT_B);
135
136
6.76k
    id = PT_T;
137
6.76k
    gai4_opt_grid_mask[id] = BIT_EN(PT_TL) | BIT_EN(PT_T) | BIT_EN(PT_TR);
138
6.76k
    gai1_grid_id_to_x[id] = 0;
139
6.76k
    gai1_grid_id_to_y[id] = -1;
140
6.76k
    gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_R) | BIT_EN(PT_L) | BIT_EN(PT_T);
141
6.76k
    gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_R) | BIT_EN(PT_L) | BIT_EN(PT_T);
142
143
6.76k
    id = PT_B;
144
6.76k
    gai4_opt_grid_mask[id] = BIT_EN(PT_BL) | BIT_EN(PT_B) | BIT_EN(PT_BR);
145
6.76k
    gai1_grid_id_to_x[id] = 0;
146
6.76k
    gai1_grid_id_to_y[id] = 1;
147
6.76k
    gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_B) | BIT_EN(PT_L) | BIT_EN(PT_R);
148
6.76k
    gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_B) | BIT_EN(PT_L) | BIT_EN(PT_R);
149
150
6.76k
    id = PT_TL;
151
6.76k
    gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_L] | gai4_opt_grid_mask[PT_T];
152
6.76k
    gai1_grid_id_to_x[id] = -1;
153
6.76k
    gai1_grid_id_to_y[id] = -1;
154
6.76k
    gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_L);
155
156
6.76k
    id = PT_TR;
157
6.76k
    gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_R] | gai4_opt_grid_mask[PT_T];
158
6.76k
    gai1_grid_id_to_x[id] = 1;
159
6.76k
    gai1_grid_id_to_y[id] = -1;
160
6.76k
    gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_R);
161
162
6.76k
    id = PT_BL;
163
6.76k
    gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_L] | gai4_opt_grid_mask[PT_B];
164
6.76k
    gai1_grid_id_to_x[id] = -1;
165
6.76k
    gai1_grid_id_to_y[id] = 1;
166
6.76k
    gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_L) | BIT_EN(PT_B);
167
168
6.76k
    id = PT_BR;
169
6.76k
    gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_R] | gai4_opt_grid_mask[PT_B];
170
6.76k
    gai1_grid_id_to_x[id] = 1;
171
6.76k
    gai1_grid_id_to_y[id] = 1;
172
6.76k
    gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_R) | BIT_EN(PT_B);
173
174
6.76k
    ge_part_id_to_blk_size[CU_8x8][PART_ID_2Nx2N] = BLK_8x8;
175
6.76k
    ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxN_T] = BLK_8x4;
176
6.76k
    ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxN_B] = BLK_8x4;
177
6.76k
    ge_part_id_to_blk_size[CU_8x8][PART_ID_Nx2N_L] = BLK_4x8;
178
6.76k
    ge_part_id_to_blk_size[CU_8x8][PART_ID_Nx2N_R] = BLK_4x8;
179
6.76k
    ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_TL] = BLK_4x4;
180
6.76k
    ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_TR] = BLK_4x4;
181
6.76k
    ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_BL] = BLK_4x4;
182
6.76k
    ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_BR] = BLK_4x4;
183
6.76k
    ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnU_T] = BLK_INVALID;
184
6.76k
    ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnU_B] = BLK_INVALID;
185
6.76k
    ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnD_T] = BLK_INVALID;
186
6.76k
    ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnD_B] = BLK_INVALID;
187
6.76k
    ge_part_id_to_blk_size[CU_8x8][PART_ID_nLx2N_L] = BLK_INVALID;
188
6.76k
    ge_part_id_to_blk_size[CU_8x8][PART_ID_nLx2N_R] = BLK_INVALID;
189
6.76k
    ge_part_id_to_blk_size[CU_8x8][PART_ID_nRx2N_L] = BLK_INVALID;
190
6.76k
    ge_part_id_to_blk_size[CU_8x8][PART_ID_nRx2N_R] = BLK_INVALID;
191
192
6.76k
    ge_part_id_to_blk_size[CU_16x16][PART_ID_2Nx2N] = BLK_16x16;
193
6.76k
    ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxN_T] = BLK_16x8;
194
6.76k
    ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxN_B] = BLK_16x8;
195
6.76k
    ge_part_id_to_blk_size[CU_16x16][PART_ID_Nx2N_L] = BLK_8x16;
196
6.76k
    ge_part_id_to_blk_size[CU_16x16][PART_ID_Nx2N_R] = BLK_8x16;
197
6.76k
    ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_TL] = BLK_8x8;
198
6.76k
    ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_TR] = BLK_8x8;
199
6.76k
    ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_BL] = BLK_8x8;
200
6.76k
    ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_BR] = BLK_8x8;
201
6.76k
    ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnU_T] = BLK_16x4;
202
6.76k
    ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnU_B] = BLK_16x12;
203
6.76k
    ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnD_T] = BLK_16x12;
204
6.76k
    ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnD_B] = BLK_16x4;
205
6.76k
    ge_part_id_to_blk_size[CU_16x16][PART_ID_nLx2N_L] = BLK_4x16;
206
6.76k
    ge_part_id_to_blk_size[CU_16x16][PART_ID_nLx2N_R] = BLK_12x16;
207
6.76k
    ge_part_id_to_blk_size[CU_16x16][PART_ID_nRx2N_L] = BLK_12x16;
208
6.76k
    ge_part_id_to_blk_size[CU_16x16][PART_ID_nRx2N_R] = BLK_4x16;
209
210
6.76k
    ge_part_id_to_blk_size[CU_32x32][PART_ID_2Nx2N] = BLK_32x32;
211
6.76k
    ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxN_T] = BLK_32x16;
212
6.76k
    ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxN_B] = BLK_32x16;
213
6.76k
    ge_part_id_to_blk_size[CU_32x32][PART_ID_Nx2N_L] = BLK_16x32;
214
6.76k
    ge_part_id_to_blk_size[CU_32x32][PART_ID_Nx2N_R] = BLK_16x32;
215
6.76k
    ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_TL] = BLK_16x16;
216
6.76k
    ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_TR] = BLK_16x16;
217
6.76k
    ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_BL] = BLK_16x16;
218
6.76k
    ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_BR] = BLK_16x16;
219
6.76k
    ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnU_T] = BLK_32x8;
220
6.76k
    ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnU_B] = BLK_32x24;
221
6.76k
    ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnD_T] = BLK_32x24;
222
6.76k
    ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnD_B] = BLK_32x8;
223
6.76k
    ge_part_id_to_blk_size[CU_32x32][PART_ID_nLx2N_L] = BLK_8x32;
224
6.76k
    ge_part_id_to_blk_size[CU_32x32][PART_ID_nLx2N_R] = BLK_24x32;
225
6.76k
    ge_part_id_to_blk_size[CU_32x32][PART_ID_nRx2N_L] = BLK_24x32;
226
6.76k
    ge_part_id_to_blk_size[CU_32x32][PART_ID_nRx2N_R] = BLK_8x32;
227
228
6.76k
    ge_part_id_to_blk_size[CU_64x64][PART_ID_2Nx2N] = BLK_64x64;
229
6.76k
    ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxN_T] = BLK_64x32;
230
6.76k
    ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxN_B] = BLK_64x32;
231
6.76k
    ge_part_id_to_blk_size[CU_64x64][PART_ID_Nx2N_L] = BLK_32x64;
232
6.76k
    ge_part_id_to_blk_size[CU_64x64][PART_ID_Nx2N_R] = BLK_32x64;
233
6.76k
    ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_TL] = BLK_32x32;
234
6.76k
    ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_TR] = BLK_32x32;
235
6.76k
    ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_BL] = BLK_32x32;
236
6.76k
    ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_BR] = BLK_32x32;
237
6.76k
    ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnU_T] = BLK_64x16;
238
6.76k
    ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnU_B] = BLK_64x48;
239
6.76k
    ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnD_T] = BLK_64x48;
240
6.76k
    ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnD_B] = BLK_64x16;
241
6.76k
    ge_part_id_to_blk_size[CU_64x64][PART_ID_nLx2N_L] = BLK_16x64;
242
6.76k
    ge_part_id_to_blk_size[CU_64x64][PART_ID_nLx2N_R] = BLK_48x64;
243
6.76k
    ge_part_id_to_blk_size[CU_64x64][PART_ID_nRx2N_L] = BLK_48x64;
244
6.76k
    ge_part_id_to_blk_size[CU_64x64][PART_ID_nRx2N_R] = BLK_16x64;
245
246
6.76k
    gau1_num_parts_in_part_type[PRT_2Nx2N] = 1;
247
6.76k
    gau1_num_parts_in_part_type[PRT_2NxN] = 2;
248
6.76k
    gau1_num_parts_in_part_type[PRT_Nx2N] = 2;
249
6.76k
    gau1_num_parts_in_part_type[PRT_NxN] = 4;
250
6.76k
    gau1_num_parts_in_part_type[PRT_2NxnU] = 2;
251
6.76k
    gau1_num_parts_in_part_type[PRT_2NxnD] = 2;
252
6.76k
    gau1_num_parts_in_part_type[PRT_nLx2N] = 2;
253
6.76k
    gau1_num_parts_in_part_type[PRT_nRx2N] = 2;
254
255
60.9k
    for(i = 0; i < MAX_PART_TYPES; i++)
256
270k
        for(j = 0; j < MAX_NUM_PARTS; j++)
257
216k
            ge_part_type_to_part_id[i][j] = PART_ID_INVALID;
258
259
    /* 2Nx2N only one partition */
260
6.76k
    ge_part_type_to_part_id[PRT_2Nx2N][0] = PART_ID_2Nx2N;
261
262
    /* 2NxN 2 partitions */
263
6.76k
    ge_part_type_to_part_id[PRT_2NxN][0] = PART_ID_2NxN_T;
264
6.76k
    ge_part_type_to_part_id[PRT_2NxN][1] = PART_ID_2NxN_B;
265
266
    /* Nx2N 2 partitions */
267
6.76k
    ge_part_type_to_part_id[PRT_Nx2N][0] = PART_ID_Nx2N_L;
268
6.76k
    ge_part_type_to_part_id[PRT_Nx2N][1] = PART_ID_Nx2N_R;
269
270
    /* NxN 4 partitions */
271
6.76k
    ge_part_type_to_part_id[PRT_NxN][0] = PART_ID_NxN_TL;
272
6.76k
    ge_part_type_to_part_id[PRT_NxN][1] = PART_ID_NxN_TR;
273
6.76k
    ge_part_type_to_part_id[PRT_NxN][2] = PART_ID_NxN_BL;
274
6.76k
    ge_part_type_to_part_id[PRT_NxN][3] = PART_ID_NxN_BR;
275
276
    /* AMP 2Nx (N/2 + 3N/2) 2 partitions */
277
6.76k
    ge_part_type_to_part_id[PRT_2NxnU][0] = PART_ID_2NxnU_T;
278
6.76k
    ge_part_type_to_part_id[PRT_2NxnU][1] = PART_ID_2NxnU_B;
279
280
    /* AMP 2Nx (3N/2 + N/2) 2 partitions */
281
6.76k
    ge_part_type_to_part_id[PRT_2NxnD][0] = PART_ID_2NxnD_T;
282
6.76k
    ge_part_type_to_part_id[PRT_2NxnD][1] = PART_ID_2NxnD_B;
283
284
    /* AMP (N/2 + 3N/2) x 2N 2 partitions */
285
6.76k
    ge_part_type_to_part_id[PRT_nLx2N][0] = PART_ID_nLx2N_L;
286
6.76k
    ge_part_type_to_part_id[PRT_nLx2N][1] = PART_ID_nLx2N_R;
287
288
    /* AMP (3N/2 + N/2) x 2N 2 partitions */
289
6.76k
    ge_part_type_to_part_id[PRT_nRx2N][0] = PART_ID_nRx2N_L;
290
6.76k
    ge_part_type_to_part_id[PRT_nRx2N][1] = PART_ID_nRx2N_R;
291
292
    /*************************************************************************/
293
    /* initialize attributes for each partition id within the cu.            */
294
    /*************************************************************************/
295
6.76k
    {
296
6.76k
        part_attr_t *ps_part_attr;
297
298
6.76k
        ps_part_attr = &gas_part_attr_in_cu[PART_ID_2Nx2N];
299
6.76k
        ps_part_attr->u1_x_start = 0;
300
6.76k
        ps_part_attr->u1_y_start = 0;
301
6.76k
        ps_part_attr->u1_x_count = 8;
302
6.76k
        ps_part_attr->u1_y_count = 8;
303
304
6.76k
        ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxN_T];
305
6.76k
        ps_part_attr->u1_x_start = 0;
306
6.76k
        ps_part_attr->u1_y_start = 0;
307
6.76k
        ps_part_attr->u1_x_count = 8;
308
6.76k
        ps_part_attr->u1_y_count = 4;
309
310
6.76k
        ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxN_B];
311
6.76k
        ps_part_attr->u1_x_start = 0;
312
6.76k
        ps_part_attr->u1_y_start = 4;
313
6.76k
        ps_part_attr->u1_x_count = 8;
314
6.76k
        ps_part_attr->u1_y_count = 4;
315
316
6.76k
        ps_part_attr = &gas_part_attr_in_cu[PART_ID_Nx2N_L];
317
6.76k
        ps_part_attr->u1_x_start = 0;
318
6.76k
        ps_part_attr->u1_y_start = 0;
319
6.76k
        ps_part_attr->u1_x_count = 4;
320
6.76k
        ps_part_attr->u1_y_count = 8;
321
322
6.76k
        ps_part_attr = &gas_part_attr_in_cu[PART_ID_Nx2N_R];
323
6.76k
        ps_part_attr->u1_x_start = 4;
324
6.76k
        ps_part_attr->u1_y_start = 0;
325
6.76k
        ps_part_attr->u1_x_count = 4;
326
6.76k
        ps_part_attr->u1_y_count = 8;
327
328
6.76k
        ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_TL];
329
6.76k
        ps_part_attr->u1_x_start = 0;
330
6.76k
        ps_part_attr->u1_y_start = 0;
331
6.76k
        ps_part_attr->u1_x_count = 4;
332
6.76k
        ps_part_attr->u1_y_count = 4;
333
334
6.76k
        ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_TR];
335
6.76k
        ps_part_attr->u1_x_start = 4;
336
6.76k
        ps_part_attr->u1_y_start = 0;
337
6.76k
        ps_part_attr->u1_x_count = 4;
338
6.76k
        ps_part_attr->u1_y_count = 4;
339
340
6.76k
        ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_BL];
341
6.76k
        ps_part_attr->u1_x_start = 0;
342
6.76k
        ps_part_attr->u1_y_start = 4;
343
6.76k
        ps_part_attr->u1_x_count = 4;
344
6.76k
        ps_part_attr->u1_y_count = 4;
345
346
6.76k
        ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_BR];
347
6.76k
        ps_part_attr->u1_x_start = 4;
348
6.76k
        ps_part_attr->u1_y_start = 4;
349
6.76k
        ps_part_attr->u1_x_count = 4;
350
6.76k
        ps_part_attr->u1_y_count = 4;
351
352
6.76k
        ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnU_T];
353
6.76k
        ps_part_attr->u1_x_start = 0;
354
6.76k
        ps_part_attr->u1_y_start = 0;
355
6.76k
        ps_part_attr->u1_x_count = 8;
356
6.76k
        ps_part_attr->u1_y_count = 2;
357
358
6.76k
        ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnU_B];
359
6.76k
        ps_part_attr->u1_x_start = 0;
360
6.76k
        ps_part_attr->u1_y_start = 2;
361
6.76k
        ps_part_attr->u1_x_count = 8;
362
6.76k
        ps_part_attr->u1_y_count = 6;
363
364
6.76k
        ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnD_T];
365
6.76k
        ps_part_attr->u1_x_start = 0;
366
6.76k
        ps_part_attr->u1_y_start = 0;
367
6.76k
        ps_part_attr->u1_x_count = 8;
368
6.76k
        ps_part_attr->u1_y_count = 6;
369
370
6.76k
        ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnD_B];
371
6.76k
        ps_part_attr->u1_x_start = 0;
372
6.76k
        ps_part_attr->u1_y_start = 6;
373
6.76k
        ps_part_attr->u1_x_count = 8;
374
6.76k
        ps_part_attr->u1_y_count = 2;
375
376
6.76k
        ps_part_attr = &gas_part_attr_in_cu[PART_ID_nLx2N_L];
377
6.76k
        ps_part_attr->u1_x_start = 0;
378
6.76k
        ps_part_attr->u1_y_start = 0;
379
6.76k
        ps_part_attr->u1_x_count = 2;
380
6.76k
        ps_part_attr->u1_y_count = 8;
381
382
6.76k
        ps_part_attr = &gas_part_attr_in_cu[PART_ID_nLx2N_R];
383
6.76k
        ps_part_attr->u1_x_start = 2;
384
6.76k
        ps_part_attr->u1_y_start = 0;
385
6.76k
        ps_part_attr->u1_x_count = 6;
386
6.76k
        ps_part_attr->u1_y_count = 8;
387
388
6.76k
        ps_part_attr = &gas_part_attr_in_cu[PART_ID_nRx2N_L];
389
6.76k
        ps_part_attr->u1_x_start = 0;
390
6.76k
        ps_part_attr->u1_y_start = 0;
391
6.76k
        ps_part_attr->u1_x_count = 6;
392
6.76k
        ps_part_attr->u1_y_count = 8;
393
394
6.76k
        ps_part_attr = &gas_part_attr_in_cu[PART_ID_nRx2N_R];
395
6.76k
        ps_part_attr->u1_x_start = 6;
396
6.76k
        ps_part_attr->u1_y_start = 0;
397
6.76k
        ps_part_attr->u1_x_count = 2;
398
6.76k
        ps_part_attr->u1_y_count = 8;
399
6.76k
    }
400
175k
    for(i = 0; i < NUM_BLK_SIZES; i++)
401
169k
        ge_blk_size_to_cu_size[i] = CU_INVALID;
402
403
6.76k
    ge_blk_size_to_cu_size[BLK_8x8] = CU_8x8;
404
6.76k
    ge_blk_size_to_cu_size[BLK_16x16] = CU_16x16;
405
6.76k
    ge_blk_size_to_cu_size[BLK_32x32] = CU_32x32;
406
6.76k
    ge_blk_size_to_cu_size[BLK_64x64] = CU_64x64;
407
408
    /* This is the reverse, given cU size, get blk size */
409
6.76k
    ge_cu_size_to_blk_size[CU_8x8] = BLK_8x8;
410
6.76k
    ge_cu_size_to_blk_size[CU_16x16] = BLK_16x16;
411
6.76k
    ge_cu_size_to_blk_size[CU_32x32] = BLK_32x32;
412
6.76k
    ge_cu_size_to_blk_size[CU_64x64] = BLK_64x64;
413
414
6.76k
    gau1_is_vert_part[PRT_2Nx2N] = 0;
415
6.76k
    gau1_is_vert_part[PRT_2NxN] = 0;
416
6.76k
    gau1_is_vert_part[PRT_Nx2N] = 1;
417
6.76k
    gau1_is_vert_part[PRT_NxN] = 1;
418
6.76k
    gau1_is_vert_part[PRT_2NxnU] = 0;
419
6.76k
    gau1_is_vert_part[PRT_2NxnD] = 0;
420
6.76k
    gau1_is_vert_part[PRT_nLx2N] = 1;
421
6.76k
    gau1_is_vert_part[PRT_nRx2N] = 1;
422
423
    /* Initialise the number of best results for the full pell refinement */
424
6.76k
    gau1_num_best_results_PQ[PART_ID_2Nx2N] = 2;
425
6.76k
    gau1_num_best_results_PQ[PART_ID_2NxN_T] = 0;
426
6.76k
    gau1_num_best_results_PQ[PART_ID_2NxN_B] = 0;
427
6.76k
    gau1_num_best_results_PQ[PART_ID_Nx2N_L] = 0;
428
6.76k
    gau1_num_best_results_PQ[PART_ID_Nx2N_R] = 0;
429
6.76k
    gau1_num_best_results_PQ[PART_ID_NxN_TL] = 1;
430
6.76k
    gau1_num_best_results_PQ[PART_ID_NxN_TR] = 1;
431
6.76k
    gau1_num_best_results_PQ[PART_ID_NxN_BL] = 1;
432
6.76k
    gau1_num_best_results_PQ[PART_ID_NxN_BR] = 1;
433
6.76k
    gau1_num_best_results_PQ[PART_ID_2NxnU_T] = 1;
434
6.76k
    gau1_num_best_results_PQ[PART_ID_2NxnU_B] = 0;
435
6.76k
    gau1_num_best_results_PQ[PART_ID_2NxnD_T] = 0;
436
6.76k
    gau1_num_best_results_PQ[PART_ID_2NxnD_B] = 1;
437
6.76k
    gau1_num_best_results_PQ[PART_ID_nLx2N_L] = 1;
438
6.76k
    gau1_num_best_results_PQ[PART_ID_nLx2N_R] = 0;
439
6.76k
    gau1_num_best_results_PQ[PART_ID_nRx2N_L] = 0;
440
6.76k
    gau1_num_best_results_PQ[PART_ID_nRx2N_R] = 1;
441
442
6.76k
    gau1_num_best_results_HQ[PART_ID_2Nx2N] = 2;
443
6.76k
    gau1_num_best_results_HQ[PART_ID_2NxN_T] = 0;
444
6.76k
    gau1_num_best_results_HQ[PART_ID_2NxN_B] = 0;
445
6.76k
    gau1_num_best_results_HQ[PART_ID_Nx2N_L] = 0;
446
6.76k
    gau1_num_best_results_HQ[PART_ID_Nx2N_R] = 0;
447
6.76k
    gau1_num_best_results_HQ[PART_ID_NxN_TL] = 1;
448
6.76k
    gau1_num_best_results_HQ[PART_ID_NxN_TR] = 1;
449
6.76k
    gau1_num_best_results_HQ[PART_ID_NxN_BL] = 1;
450
6.76k
    gau1_num_best_results_HQ[PART_ID_NxN_BR] = 1;
451
6.76k
    gau1_num_best_results_HQ[PART_ID_2NxnU_T] = 1;
452
6.76k
    gau1_num_best_results_HQ[PART_ID_2NxnU_B] = 0;
453
6.76k
    gau1_num_best_results_HQ[PART_ID_2NxnD_T] = 0;
454
6.76k
    gau1_num_best_results_HQ[PART_ID_2NxnD_B] = 1;
455
6.76k
    gau1_num_best_results_HQ[PART_ID_nLx2N_L] = 1;
456
6.76k
    gau1_num_best_results_HQ[PART_ID_nLx2N_R] = 0;
457
6.76k
    gau1_num_best_results_HQ[PART_ID_nRx2N_L] = 0;
458
6.76k
    gau1_num_best_results_HQ[PART_ID_nRx2N_R] = 1;
459
460
6.76k
    gau1_num_best_results_MS[PART_ID_2Nx2N] = 2;
461
6.76k
    gau1_num_best_results_MS[PART_ID_2NxN_T] = 0;
462
6.76k
    gau1_num_best_results_MS[PART_ID_2NxN_B] = 0;
463
6.76k
    gau1_num_best_results_MS[PART_ID_Nx2N_L] = 0;
464
6.76k
    gau1_num_best_results_MS[PART_ID_Nx2N_R] = 0;
465
6.76k
    gau1_num_best_results_MS[PART_ID_NxN_TL] = 1;
466
6.76k
    gau1_num_best_results_MS[PART_ID_NxN_TR] = 1;
467
6.76k
    gau1_num_best_results_MS[PART_ID_NxN_BL] = 1;
468
6.76k
    gau1_num_best_results_MS[PART_ID_NxN_BR] = 1;
469
6.76k
    gau1_num_best_results_MS[PART_ID_2NxnU_T] = 1;
470
6.76k
    gau1_num_best_results_MS[PART_ID_2NxnU_B] = 0;
471
6.76k
    gau1_num_best_results_MS[PART_ID_2NxnD_T] = 0;
472
6.76k
    gau1_num_best_results_MS[PART_ID_2NxnD_B] = 1;
473
6.76k
    gau1_num_best_results_MS[PART_ID_nLx2N_L] = 1;
474
6.76k
    gau1_num_best_results_MS[PART_ID_nLx2N_R] = 0;
475
6.76k
    gau1_num_best_results_MS[PART_ID_nRx2N_L] = 0;
476
6.76k
    gau1_num_best_results_MS[PART_ID_nRx2N_R] = 1;
477
478
6.76k
    gau1_num_best_results_HS[PART_ID_2Nx2N] = 2;
479
6.76k
    gau1_num_best_results_HS[PART_ID_2NxN_T] = 0;
480
6.76k
    gau1_num_best_results_HS[PART_ID_2NxN_B] = 0;
481
6.76k
    gau1_num_best_results_HS[PART_ID_Nx2N_L] = 0;
482
6.76k
    gau1_num_best_results_HS[PART_ID_Nx2N_R] = 0;
483
6.76k
    gau1_num_best_results_HS[PART_ID_NxN_TL] = 0;
484
6.76k
    gau1_num_best_results_HS[PART_ID_NxN_TR] = 0;
485
6.76k
    gau1_num_best_results_HS[PART_ID_NxN_BL] = 0;
486
6.76k
    gau1_num_best_results_HS[PART_ID_NxN_BR] = 0;
487
6.76k
    gau1_num_best_results_HS[PART_ID_2NxnU_T] = 0;
488
6.76k
    gau1_num_best_results_HS[PART_ID_2NxnU_B] = 0;
489
6.76k
    gau1_num_best_results_HS[PART_ID_2NxnD_T] = 0;
490
6.76k
    gau1_num_best_results_HS[PART_ID_2NxnD_B] = 0;
491
6.76k
    gau1_num_best_results_HS[PART_ID_nLx2N_L] = 0;
492
6.76k
    gau1_num_best_results_HS[PART_ID_nLx2N_R] = 0;
493
6.76k
    gau1_num_best_results_HS[PART_ID_nRx2N_L] = 0;
494
6.76k
    gau1_num_best_results_HS[PART_ID_nRx2N_R] = 0;
495
496
6.76k
    gau1_num_best_results_XS[PART_ID_2Nx2N] = 2;
497
6.76k
    gau1_num_best_results_XS[PART_ID_2NxN_T] = 0;
498
6.76k
    gau1_num_best_results_XS[PART_ID_2NxN_B] = 0;
499
6.76k
    gau1_num_best_results_XS[PART_ID_Nx2N_L] = 0;
500
6.76k
    gau1_num_best_results_XS[PART_ID_Nx2N_R] = 0;
501
6.76k
    gau1_num_best_results_XS[PART_ID_NxN_TL] = 0;
502
6.76k
    gau1_num_best_results_XS[PART_ID_NxN_TR] = 0;
503
6.76k
    gau1_num_best_results_XS[PART_ID_NxN_BL] = 0;
504
6.76k
    gau1_num_best_results_XS[PART_ID_NxN_BR] = 0;
505
6.76k
    gau1_num_best_results_XS[PART_ID_2NxnU_T] = 0;
506
6.76k
    gau1_num_best_results_XS[PART_ID_2NxnU_B] = 0;
507
6.76k
    gau1_num_best_results_XS[PART_ID_2NxnD_T] = 0;
508
6.76k
    gau1_num_best_results_XS[PART_ID_2NxnD_B] = 0;
509
6.76k
    gau1_num_best_results_XS[PART_ID_nLx2N_L] = 0;
510
6.76k
    gau1_num_best_results_XS[PART_ID_nLx2N_R] = 0;
511
6.76k
    gau1_num_best_results_XS[PART_ID_nRx2N_L] = 0;
512
6.76k
    gau1_num_best_results_XS[PART_ID_nRx2N_R] = 0;
513
514
6.76k
    gau1_num_best_results_XS25[PART_ID_2Nx2N] = MAX_NUM_CANDS_FOR_FPEL_REFINE_IN_XS25;
515
6.76k
    gau1_num_best_results_XS25[PART_ID_2NxN_T] = 0;
516
6.76k
    gau1_num_best_results_XS25[PART_ID_2NxN_B] = 0;
517
6.76k
    gau1_num_best_results_XS25[PART_ID_Nx2N_L] = 0;
518
6.76k
    gau1_num_best_results_XS25[PART_ID_Nx2N_R] = 0;
519
6.76k
    gau1_num_best_results_XS25[PART_ID_NxN_TL] = 0;
520
6.76k
    gau1_num_best_results_XS25[PART_ID_NxN_TR] = 0;
521
6.76k
    gau1_num_best_results_XS25[PART_ID_NxN_BL] = 0;
522
6.76k
    gau1_num_best_results_XS25[PART_ID_NxN_BR] = 0;
523
6.76k
    gau1_num_best_results_XS25[PART_ID_2NxnU_T] = 0;
524
6.76k
    gau1_num_best_results_XS25[PART_ID_2NxnU_B] = 0;
525
6.76k
    gau1_num_best_results_XS25[PART_ID_2NxnD_T] = 0;
526
6.76k
    gau1_num_best_results_XS25[PART_ID_2NxnD_B] = 0;
527
6.76k
    gau1_num_best_results_XS25[PART_ID_nLx2N_L] = 0;
528
6.76k
    gau1_num_best_results_XS25[PART_ID_nLx2N_R] = 0;
529
6.76k
    gau1_num_best_results_XS25[PART_ID_nRx2N_L] = 0;
530
6.76k
    gau1_num_best_results_XS25[PART_ID_nRx2N_R] = 0;
531
532
    /* Top right validity for each part id */
533
6.76k
    gau1_partid_tr_valid[PART_ID_2Nx2N] = 1;
534
6.76k
    gau1_partid_tr_valid[PART_ID_2NxN_T] = 1;
535
6.76k
    gau1_partid_tr_valid[PART_ID_2NxN_B] = 0;
536
6.76k
    gau1_partid_tr_valid[PART_ID_Nx2N_L] = 1;
537
6.76k
    gau1_partid_tr_valid[PART_ID_Nx2N_R] = 1;
538
6.76k
    gau1_partid_tr_valid[PART_ID_NxN_TL] = 1;
539
6.76k
    gau1_partid_tr_valid[PART_ID_NxN_TR] = 1;
540
6.76k
    gau1_partid_tr_valid[PART_ID_NxN_BL] = 1;
541
6.76k
    gau1_partid_tr_valid[PART_ID_NxN_BR] = 0;
542
6.76k
    gau1_partid_tr_valid[PART_ID_2NxnU_T] = 1;
543
6.76k
    gau1_partid_tr_valid[PART_ID_2NxnU_B] = 0;
544
6.76k
    gau1_partid_tr_valid[PART_ID_2NxnD_T] = 1;
545
6.76k
    gau1_partid_tr_valid[PART_ID_2NxnD_B] = 0;
546
6.76k
    gau1_partid_tr_valid[PART_ID_nLx2N_L] = 1;
547
6.76k
    gau1_partid_tr_valid[PART_ID_nLx2N_R] = 1;
548
6.76k
    gau1_partid_tr_valid[PART_ID_nRx2N_L] = 1;
549
6.76k
    gau1_partid_tr_valid[PART_ID_nRx2N_R] = 1;
550
551
    /* Bot Left validity for each part id */
552
6.76k
    gau1_partid_bl_valid[PART_ID_2Nx2N] = 1;
553
6.76k
    gau1_partid_bl_valid[PART_ID_2NxN_T] = 1;
554
6.76k
    gau1_partid_bl_valid[PART_ID_2NxN_B] = 1;
555
6.76k
    gau1_partid_bl_valid[PART_ID_Nx2N_L] = 1;
556
6.76k
    gau1_partid_bl_valid[PART_ID_Nx2N_R] = 0;
557
6.76k
    gau1_partid_bl_valid[PART_ID_NxN_TL] = 1;
558
6.76k
    gau1_partid_bl_valid[PART_ID_NxN_TR] = 0;
559
6.76k
    gau1_partid_bl_valid[PART_ID_NxN_BL] = 1;
560
6.76k
    gau1_partid_bl_valid[PART_ID_NxN_BR] = 0;
561
6.76k
    gau1_partid_bl_valid[PART_ID_2NxnU_T] = 1;
562
6.76k
    gau1_partid_bl_valid[PART_ID_2NxnU_B] = 1;
563
6.76k
    gau1_partid_bl_valid[PART_ID_2NxnD_T] = 1;
564
6.76k
    gau1_partid_bl_valid[PART_ID_2NxnD_B] = 1;
565
6.76k
    gau1_partid_bl_valid[PART_ID_nLx2N_L] = 1;
566
6.76k
    gau1_partid_bl_valid[PART_ID_nLx2N_R] = 0;
567
6.76k
    gau1_partid_bl_valid[PART_ID_nRx2N_L] = 1;
568
6.76k
    gau1_partid_bl_valid[PART_ID_nRx2N_R] = 0;
569
570
    /*Part id to part num of this partition id in the CU */
571
6.76k
    gau1_part_id_to_part_num[PART_ID_2Nx2N] = 0;
572
6.76k
    gau1_part_id_to_part_num[PART_ID_2NxN_T] = 0;
573
6.76k
    gau1_part_id_to_part_num[PART_ID_2NxN_B] = 1;
574
6.76k
    gau1_part_id_to_part_num[PART_ID_Nx2N_L] = 0;
575
6.76k
    gau1_part_id_to_part_num[PART_ID_Nx2N_R] = 1;
576
6.76k
    gau1_part_id_to_part_num[PART_ID_NxN_TL] = 0;
577
6.76k
    gau1_part_id_to_part_num[PART_ID_NxN_TR] = 1;
578
6.76k
    gau1_part_id_to_part_num[PART_ID_NxN_BL] = 2;
579
6.76k
    gau1_part_id_to_part_num[PART_ID_NxN_BR] = 3;
580
6.76k
    gau1_part_id_to_part_num[PART_ID_2NxnU_T] = 0;
581
6.76k
    gau1_part_id_to_part_num[PART_ID_2NxnU_B] = 1;
582
6.76k
    gau1_part_id_to_part_num[PART_ID_2NxnD_T] = 0;
583
6.76k
    gau1_part_id_to_part_num[PART_ID_2NxnD_B] = 1;
584
6.76k
    gau1_part_id_to_part_num[PART_ID_nLx2N_L] = 0;
585
6.76k
    gau1_part_id_to_part_num[PART_ID_nLx2N_R] = 1;
586
6.76k
    gau1_part_id_to_part_num[PART_ID_nRx2N_L] = 0;
587
6.76k
    gau1_part_id_to_part_num[PART_ID_nRx2N_R] = 1;
588
589
    /*Which partition type does this partition id belong to */
590
6.76k
    ge_part_id_to_part_type[PART_ID_2Nx2N] = PRT_2Nx2N;
591
6.76k
    ge_part_id_to_part_type[PART_ID_2NxN_T] = PRT_2NxN;
592
6.76k
    ge_part_id_to_part_type[PART_ID_2NxN_B] = PRT_2NxN;
593
6.76k
    ge_part_id_to_part_type[PART_ID_Nx2N_L] = PRT_Nx2N;
594
6.76k
    ge_part_id_to_part_type[PART_ID_Nx2N_R] = PRT_Nx2N;
595
6.76k
    ge_part_id_to_part_type[PART_ID_NxN_TL] = PRT_NxN;
596
6.76k
    ge_part_id_to_part_type[PART_ID_NxN_TR] = PRT_NxN;
597
6.76k
    ge_part_id_to_part_type[PART_ID_NxN_BL] = PRT_NxN;
598
6.76k
    ge_part_id_to_part_type[PART_ID_NxN_BR] = PRT_NxN;
599
6.76k
    ge_part_id_to_part_type[PART_ID_2NxnU_T] = PRT_2NxnU;
600
6.76k
    ge_part_id_to_part_type[PART_ID_2NxnU_B] = PRT_2NxnU;
601
6.76k
    ge_part_id_to_part_type[PART_ID_2NxnD_T] = PRT_2NxnD;
602
6.76k
    ge_part_id_to_part_type[PART_ID_2NxnD_B] = PRT_2NxnD;
603
6.76k
    ge_part_id_to_part_type[PART_ID_nLx2N_L] = PRT_nLx2N;
604
6.76k
    ge_part_id_to_part_type[PART_ID_nLx2N_R] = PRT_nLx2N;
605
6.76k
    ge_part_id_to_part_type[PART_ID_nRx2N_L] = PRT_nRx2N;
606
6.76k
    ge_part_id_to_part_type[PART_ID_nRx2N_R] = PRT_nRx2N;
607
608
    /*************************************************************************/
609
    /* Set up the bits to be taken up for the part type. This is equally     */
610
    /* divided up between the various partitions in the part-type.           */
611
    /* For NxN @ CU 16x16, we assume it as CU 8x8, so consider it as         */
612
    /* partition 2Nx2N.                                                      */
613
    /*************************************************************************/
614
    /* 1 bit for 2Nx2N partition */
615
6.76k
    gau1_bits_for_part_id_q1[PART_ID_2Nx2N] = 2;
616
617
    /* 3 bits for symmetric part types, so 1.5 bits per partition */
618
6.76k
    gau1_bits_for_part_id_q1[PART_ID_2NxN_T] = 3;
619
6.76k
    gau1_bits_for_part_id_q1[PART_ID_2NxN_B] = 3;
620
6.76k
    gau1_bits_for_part_id_q1[PART_ID_Nx2N_L] = 3;
621
6.76k
    gau1_bits_for_part_id_q1[PART_ID_Nx2N_R] = 3;
622
623
    /* 1 bit for NxN partitions, assuming these to be 2Nx2N CUs of lower level */
624
6.76k
    gau1_bits_for_part_id_q1[PART_ID_NxN_TL] = 2;
625
6.76k
    gau1_bits_for_part_id_q1[PART_ID_NxN_TR] = 2;
626
6.76k
    gau1_bits_for_part_id_q1[PART_ID_NxN_BL] = 2;
627
6.76k
    gau1_bits_for_part_id_q1[PART_ID_NxN_BR] = 2;
628
629
    /* 4 bits for AMP so 2 bits per partition */
630
6.76k
    gau1_bits_for_part_id_q1[PART_ID_2NxnU_T] = 4;
631
6.76k
    gau1_bits_for_part_id_q1[PART_ID_2NxnU_B] = 4;
632
6.76k
    gau1_bits_for_part_id_q1[PART_ID_2NxnD_T] = 4;
633
6.76k
    gau1_bits_for_part_id_q1[PART_ID_2NxnD_B] = 4;
634
6.76k
    gau1_bits_for_part_id_q1[PART_ID_nLx2N_L] = 4;
635
6.76k
    gau1_bits_for_part_id_q1[PART_ID_nLx2N_R] = 4;
636
6.76k
    gau1_bits_for_part_id_q1[PART_ID_nRx2N_L] = 4;
637
6.76k
    gau1_bits_for_part_id_q1[PART_ID_nRx2N_R] = 4;
638
6.76k
}
639
640
/**
641
********************************************************************************
642
*  @fn     hme_enc_num_alloc()
643
*
644
*  @brief  returns number of memtabs that is required by hme module
645
*
646
*  @return   Number of memtabs required
647
********************************************************************************
648
*/
649
S32 hme_enc_num_alloc(WORD32 i4_num_me_frm_pllel)
650
60.9k
{
651
60.9k
    if(i4_num_me_frm_pllel > 1)
652
0
    {
653
0
        return ((S32)MAX_HME_ENC_TOT_MEMTABS);
654
0
    }
655
60.9k
    else
656
60.9k
    {
657
60.9k
        return ((S32)MIN_HME_ENC_TOT_MEMTABS);
658
60.9k
    }
659
60.9k
}
660
661
/**
662
********************************************************************************
663
*  @fn     hme_coarse_num_alloc()
664
*
665
*  @brief  returns number of memtabs that is required by hme module
666
*
667
*  @return   Number of memtabs required
668
********************************************************************************
669
*/
670
S32 hme_coarse_num_alloc()
671
60.9k
{
672
60.9k
    return ((S32)HME_COARSE_TOT_MEMTABS);
673
60.9k
}
674
675
/**
676
********************************************************************************
677
*  @fn     hme_coarse_dep_mngr_num_alloc()
678
*
679
*  @brief  returns number of memtabs that is required by Dep Mngr for hme module
680
*
681
*  @return   Number of memtabs required
682
********************************************************************************
683
*/
684
WORD32 hme_coarse_dep_mngr_num_alloc()
685
40.6k
{
686
40.6k
    return ((WORD32)((MAX_NUM_HME_LAYERS - 1) * ihevce_dmgr_get_num_mem_recs()));
687
40.6k
}
688
689
S32 hme_validate_init_prms(hme_init_prms_t *ps_prms)
690
27.0k
{
691
27.0k
    S32 n_layers = ps_prms->num_simulcast_layers;
692
693
    /* The final layer has got to be a non encode coarse layer */
694
27.0k
    if(n_layers > (MAX_NUM_LAYERS - 1))
695
0
        return (-1);
696
697
27.0k
    if(n_layers < 1)
698
0
        return (-1);
699
700
    /* Width of the coarsest encode layer got to be >= 2*min_wd where min_Wd */
701
    /* represents the min allowed width in any layer. Ditto with ht          */
702
27.0k
    if(ps_prms->a_wd[n_layers - 1] < 2 * (MIN_WD_COARSE))
703
0
        return (-1);
704
27.0k
    if(ps_prms->a_ht[n_layers - 1] < 2 * (MIN_HT_COARSE))
705
0
        return (-1);
706
27.0k
    if(ps_prms->max_num_ref > MAX_NUM_REF)
707
0
        return (-1);
708
27.0k
    if(ps_prms->max_num_ref < 0)
709
0
        return (-1);
710
711
27.0k
    return (0);
712
27.0k
}
713
void hme_set_layer_res_attrs(
714
    layer_ctxt_t *ps_layer, S32 wd, S32 ht, S32 disp_wd, S32 disp_ht, U08 u1_enc)
715
240k
{
716
240k
    ps_layer->i4_wd = wd;
717
240k
    ps_layer->i4_ht = ht;
718
240k
    ps_layer->i4_disp_wd = disp_wd;
719
240k
    ps_layer->i4_disp_ht = disp_ht;
720
240k
    if(0 == u1_enc)
721
172k
    {
722
172k
        ps_layer->i4_inp_stride = wd + 32 + 4;
723
172k
        ps_layer->i4_inp_offset = (ps_layer->i4_inp_stride * 16) + 16;
724
172k
        ps_layer->i4_pad_x_inp = 16;
725
172k
        ps_layer->i4_pad_y_inp = 16;
726
172k
        ps_layer->pu1_inp = ps_layer->pu1_inp_base + ps_layer->i4_inp_offset;
727
172k
    }
728
240k
}
729
730
/**
731
********************************************************************************
732
*  @fn     hme_coarse_get_layer1_mv_bank_ref_idx_size()
733
*
734
*  @brief  returns the MV bank and ref idx size of Layer 1 (penultimate)
735
*
736
*  @return   none
737
********************************************************************************
738
*/
739
void hme_coarse_get_layer1_mv_bank_ref_idx_size(
740
    S32 n_tot_layers,
741
    S32 *a_wd,
742
    S32 *a_ht,
743
    S32 max_num_ref,
744
    S32 *pi4_mv_bank_size,
745
    S32 *pi4_ref_idx_size)
746
6.76k
{
747
6.76k
    S32 num_blks, num_mvs_per_blk, num_ref;
748
6.76k
    S32 num_cols, num_rows, num_mvs_per_row;
749
6.76k
    S32 is_explicit_store = 1;
750
6.76k
    S32 wd, ht, num_layers_explicit_search;
751
6.76k
    S32 num_results, use_4x4;
752
6.76k
    wd = a_wd[1];
753
6.76k
    ht = a_ht[1];
754
755
    /* Assuming abt 4 layers for 1080p, we do explicit search across all ref */
756
    /* frames in all but final layer In final layer, it could be 1/2 */
757
    //ps_hme_init_prms->num_layers_explicit_search = 3;
758
6.76k
    num_layers_explicit_search = 3;
759
760
6.76k
    if(num_layers_explicit_search <= 0)
761
0
        num_layers_explicit_search = n_tot_layers - 1;
762
763
6.76k
    num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
764
765
    /* Possibly implicit search for lower (finer) layers */
766
6.76k
    if(n_tot_layers - 1 > num_layers_explicit_search)
767
0
        is_explicit_store = 0;
768
769
    /* coarsest layer alwasy uses 4x4 blks to store results */
770
6.76k
    if(1 == (n_tot_layers - 1))
771
0
    {
772
        /* we store 4 results in coarsest layer per blk. 8x4L, 8x4R, 4x8T, 4x8B */
773
        //ps_hme_init_prms->max_num_results_coarse = 4;
774
        //vijay : with new algo in coarseset layer this has to be revisited
775
0
        num_results = 4;
776
0
    }
777
6.76k
    else
778
6.76k
    {
779
        /* Every refinement layer stores a max of 2 results per partition */
780
        //ps_hme_init_prms->max_num_results = 2;
781
6.76k
        num_results = 2;
782
6.76k
    }
783
6.76k
    use_4x4 = hme_get_mv_blk_size(1, 1, n_tot_layers, 0);
784
785
6.76k
    num_cols = use_4x4 ? ((wd >> 2) + 2) : ((wd >> 3) + 2);
786
6.76k
    num_rows = use_4x4 ? ((ht >> 2) + 2) : ((ht >> 3) + 2);
787
788
6.76k
    if(is_explicit_store)
789
6.76k
        num_ref = max_num_ref;
790
0
    else
791
0
        num_ref = 2;
792
793
6.76k
    num_blks = num_cols * num_rows;
794
6.76k
    num_mvs_per_blk = num_ref * num_results;
795
6.76k
    num_mvs_per_row = num_mvs_per_blk * num_cols;
796
797
    /* stroe the sizes */
798
6.76k
    *pi4_mv_bank_size = num_blks * num_mvs_per_blk * sizeof(hme_mv_t);
799
6.76k
    *pi4_ref_idx_size = num_blks * num_mvs_per_blk * sizeof(S08);
800
801
6.76k
    return;
802
6.76k
}
803
/**
804
********************************************************************************
805
*  @fn     hme_alloc_init_layer_mv_bank()
806
*
807
*  @brief  memory alloc and init function for MV bank
808
*
809
*  @return   Number of memtabs required
810
********************************************************************************
811
*/
812
S32 hme_alloc_init_layer_mv_bank(
813
    hme_memtab_t *ps_memtab,
814
    S32 max_num_results,
815
    S32 max_num_ref,
816
    S32 use_4x4,
817
    S32 mem_avail,
818
    S32 u1_enc,
819
    S32 wd,
820
    S32 ht,
821
    S32 is_explicit_store,
822
    hme_mv_t **pps_mv_base,
823
    S08 **pi1_ref_idx_base,
824
    S32 *pi4_num_mvs_per_row)
825
82.8k
{
826
82.8k
    S32 count = 0;
827
82.8k
    S32 size;
828
82.8k
    S32 num_blks, num_mvs_per_blk;
829
82.8k
    S32 num_ref;
830
82.8k
    S32 num_cols, num_rows, num_mvs_per_row;
831
832
82.8k
    if(is_explicit_store)
833
15.1k
        num_ref = max_num_ref;
834
67.6k
    else
835
67.6k
        num_ref = 2;
836
837
    /* MV Bank allocation takes into consideration following */
838
    /* number of results per reference x max num refrences is the amount     */
839
    /* bufffered up per blk. Numbero f blks in pic deps on the blk size,     */
840
    /* which could be either 4x4 or 8x8.                                     */
841
82.8k
    num_cols = use_4x4 ? ((wd >> 2) + 2) : ((wd >> 3) + 2);
842
82.8k
    num_rows = use_4x4 ? ((ht >> 2) + 2) : ((ht >> 3) + 2);
843
844
82.8k
    if(u1_enc)
845
67.6k
    {
846
        /* TODO: CTB64x64 is assumed. FIX according to actual CTB */
847
67.6k
        WORD32 num_ctb_cols = ((wd + 63) >> 6);
848
67.6k
        WORD32 num_ctb_rows = ((ht + 63) >> 6);
849
850
67.6k
        num_cols = (num_ctb_cols << 3) + 2;
851
67.6k
        num_rows = (num_ctb_rows << 3) + 2;
852
67.6k
    }
853
82.8k
    num_blks = num_cols * num_rows;
854
82.8k
    num_mvs_per_blk = num_ref * max_num_results;
855
82.8k
    num_mvs_per_row = num_mvs_per_blk * num_cols;
856
857
82.8k
    size = num_blks * num_mvs_per_blk * sizeof(hme_mv_t);
858
82.8k
    if(mem_avail)
859
41.4k
    {
860
        /* store this for run time verifications */
861
41.4k
        *pi4_num_mvs_per_row = num_mvs_per_row;
862
41.4k
        ASSERT(ps_memtab[count].size == size);
863
41.4k
        *pps_mv_base = (hme_mv_t *)ps_memtab[count].pu1_mem;
864
41.4k
    }
865
41.4k
    else
866
41.4k
    {
867
41.4k
        ps_memtab[count].size = size;
868
41.4k
        ps_memtab[count].align = 4;
869
41.4k
        ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
870
41.4k
    }
871
872
82.8k
    count++;
873
    /* Ref idx takes the same route as mvbase */
874
875
82.8k
    size = num_blks * num_mvs_per_blk * sizeof(S08);
876
82.8k
    if(mem_avail)
877
41.4k
    {
878
41.4k
        ASSERT(ps_memtab[count].size == size);
879
41.4k
        *pi1_ref_idx_base = (S08 *)ps_memtab[count].pu1_mem;
880
41.4k
    }
881
41.4k
    else
882
41.4k
    {
883
41.4k
        ps_memtab[count].size = size;
884
41.4k
        ps_memtab[count].align = 4;
885
41.4k
        ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
886
41.4k
    }
887
82.8k
    count++;
888
889
82.8k
    return (count);
890
82.8k
}
891
/**
892
********************************************************************************
893
*  @fn     hme_alloc_init_layer()
894
*
895
*  @brief  memory alloc and init function
896
*
897
*  @return   Number of memtabs required
898
********************************************************************************
899
*/
900
S32 hme_alloc_init_layer(
901
    hme_memtab_t *ps_memtab,
902
    S32 max_num_results,
903
    S32 max_num_ref,
904
    S32 use_4x4,
905
    S32 mem_avail,
906
    S32 u1_enc,
907
    S32 wd,
908
    S32 ht,
909
    S32 disp_wd,
910
    S32 disp_ht,
911
    S32 segment_layer,
912
    S32 is_explicit_store,
913
    layer_ctxt_t **pps_layer)
914
240k
{
915
240k
    S32 count = 0;
916
240k
    layer_ctxt_t *ps_layer = NULL;
917
240k
    S32 size;
918
240k
    S32 num_ref;
919
920
240k
    ARG_NOT_USED(segment_layer);
921
922
240k
    if(is_explicit_store)
923
172k
        num_ref = max_num_ref;
924
67.6k
    else
925
67.6k
        num_ref = 2;
926
927
    /* We do not store 4x4 results for encoding layers */
928
240k
    if(u1_enc)
929
67.6k
        use_4x4 = 0;
930
931
240k
    size = sizeof(layer_ctxt_t);
932
240k
    if(mem_avail)
933
120k
    {
934
120k
        ASSERT(ps_memtab[count].size == size);
935
120k
        ps_layer = (layer_ctxt_t *)ps_memtab[count].pu1_mem;
936
120k
        *pps_layer = ps_layer;
937
120k
    }
938
120k
    else
939
120k
    {
940
120k
        ps_memtab[count].size = size;
941
120k
        ps_memtab[count].align = 8;
942
120k
        ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
943
120k
    }
944
945
240k
    count++;
946
947
    /* Input luma buffer allocated only for non encode case */
948
240k
    if(0 == u1_enc)
949
172k
    {
950
        /* Allocate input with padding of 16 pixels */
951
172k
        size = (wd + 32 + 4) * (ht + 32 + 4);
952
172k
        if(mem_avail)
953
86.1k
        {
954
86.1k
            ASSERT(ps_memtab[count].size == size);
955
86.1k
            ps_layer->pu1_inp_base = ps_memtab[count].pu1_mem;
956
86.1k
        }
957
86.1k
        else
958
86.1k
        {
959
86.1k
            ps_memtab[count].size = size;
960
86.1k
            ps_memtab[count].align = 16;
961
86.1k
            ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
962
86.1k
        }
963
172k
        count++;
964
172k
    }
965
966
    /* Allocate memory or just the layer mvbank strcture. */
967
    /* TODO : see if this can be removed by moving it to layer_ctxt */
968
240k
    size = sizeof(layer_mv_t);
969
970
240k
    if(mem_avail)
971
120k
    {
972
120k
        ASSERT(ps_memtab[count].size == size);
973
120k
        ps_layer->ps_layer_mvbank = (layer_mv_t *)ps_memtab[count].pu1_mem;
974
120k
    }
975
120k
    else
976
120k
    {
977
120k
        ps_memtab[count].size = size;
978
120k
        ps_memtab[count].align = 8;
979
120k
        ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
980
120k
    }
981
982
240k
    count++;
983
984
240k
    if(mem_avail)
985
120k
    {
986
120k
        hme_set_layer_res_attrs(ps_layer, wd, ht, disp_wd, disp_ht, u1_enc);
987
120k
    }
988
989
240k
    return (count);
990
240k
}
991
992
S32 hme_alloc_init_search_nodes(
993
    search_results_t *ps_search_results,
994
    hme_memtab_t *ps_memtabs,
995
    S32 mem_avail,
996
    S32 max_num_ref,
997
    S32 max_num_results)
998
297k
{
999
297k
    S32 size = max_num_results * sizeof(search_node_t) * max_num_ref * TOT_NUM_PARTS;
1000
297k
    S32 j, k;
1001
297k
    search_node_t *ps_search_node;
1002
1003
297k
    if(mem_avail == 0)
1004
148k
    {
1005
148k
        ps_memtabs->size = size;
1006
148k
        ps_memtabs->align = 4;
1007
148k
        ps_memtabs->e_mem_attr = HME_SCRATCH_OVLY_MEM;
1008
148k
        return (1);
1009
148k
    }
1010
1011
148k
    ps_search_node = (search_node_t *)ps_memtabs->pu1_mem;
1012
148k
    ASSERT(ps_memtabs->size == size);
1013
    /****************************************************************************/
1014
    /* For each CU, we search and store N best results, per partition, per ref  */
1015
    /* So, number of memtabs is  num_refs * num_parts                           */
1016
    /****************************************************************************/
1017
460k
    for(j = 0; j < max_num_ref; j++)
1018
311k
    {
1019
5.60M
        for(k = 0; k < TOT_NUM_PARTS; k++)
1020
5.29M
        {
1021
5.29M
            ps_search_results->aps_part_results[j][k] = ps_search_node;
1022
5.29M
            ps_search_node += max_num_results;
1023
5.29M
        }
1024
311k
    }
1025
148k
    return (1);
1026
148k
}
1027
1028
S32 hme_derive_num_layers(S32 n_enc_layers, S32 *p_wd, S32 *p_ht, S32 *p_disp_wd, S32 *p_disp_ht)
1029
169k
{
1030
169k
    S32 i;
1031
    /* We keep downscaling by 2 till we hit one of the conditions:           */
1032
    /* 1. MAX_NUM_LAYERS reached.                                            */
1033
    /* 2. Width or ht goes below min width and ht allowed at coarsest layer  */
1034
169k
    ASSERT(n_enc_layers < MAX_NUM_LAYERS);
1035
169k
    ASSERT(n_enc_layers > 0);
1036
169k
    ASSERT(p_wd[0] <= HME_MAX_WIDTH);
1037
169k
    ASSERT(p_ht[0] <= HME_MAX_HEIGHT);
1038
1039
169k
    p_disp_wd[0] = p_wd[0];
1040
169k
    p_disp_ht[0] = p_ht[0];
1041
    /*************************************************************************/
1042
    /* Verify that for simulcast, lower layer to higher layer ratio is bet   */
1043
    /* 2 (dyadic) and 1.33. Typically it should be 1.5.                      */
1044
    /* TODO : for interlace, we may choose to have additional downscaling for*/
1045
    /* width alone in coarsest layer to next layer.                          */
1046
    /*************************************************************************/
1047
169k
    for(i = 1; i < n_enc_layers; i++)
1048
0
    {
1049
0
        S32 wd1, wd2, ht1, ht2;
1050
0
        wd1 = FLOOR16(p_wd[i - 1] >> 1);
1051
0
        wd2 = CEIL16((p_wd[i - 1] * 3) >> 2);
1052
0
        ASSERT(p_wd[i] >= wd1);
1053
0
        ASSERT(p_wd[i] <= wd2);
1054
0
        ht1 = FLOOR16(p_ht[i - 1] >> 1);
1055
0
        ht2 = CEIL16((p_ht[i - 1] * 3) >> 2);
1056
0
        ASSERT(p_ht[i] >= ht1);
1057
0
        ASSERT(p_ht[i] <= ht2);
1058
0
    }
1059
169k
    ASSERT(p_wd[n_enc_layers - 1] >= 2 * MIN_WD_COARSE);
1060
169k
    ASSERT(p_ht[n_enc_layers - 1] >= 2 * MIN_HT_COARSE);
1061
1062
520k
    for(i = n_enc_layers; i < MAX_NUM_LAYERS; i++)
1063
509k
    {
1064
509k
        if((p_wd[i - 1] < 2 * MIN_WD_COARSE) || (p_ht[i - 1] < 2 * MIN_HT_COARSE))
1065
159k
        {
1066
159k
            return (i);
1067
159k
        }
1068
        /* Use CEIL16 to facilitate 16x16 searches in future, or to do       */
1069
        /* segmentation study in future                                      */
1070
350k
        p_wd[i] = CEIL16(p_wd[i - 1] >> 1);
1071
350k
        p_ht[i] = CEIL16(p_ht[i - 1] >> 1);
1072
1073
350k
        p_disp_wd[i] = p_disp_wd[i - 1] >> 1;
1074
350k
        p_disp_ht[i] = p_disp_ht[i - 1] >> 1;
1075
350k
    }
1076
10.5k
    return (i);
1077
169k
}
1078
1079
/**
1080
********************************************************************************
1081
*  @fn     hme_get_mv_blk_size()
1082
*
1083
*  @brief  returns whether blk uses 4x4 size or something else.
1084
*
1085
*  @param[in] enable_4x4 : input param from application to enable 4x4
1086
*
1087
*  @param[in] layer_id : id of current layer (0 finest)
1088
*
1089
*  @param[in] num_layeers : total num layers
1090
*
1091
*  @param[in] is_enc : Whether encoding enabled for layer
1092
*
1093
*  @return   1 for 4x4 blks, 0 for 8x8
1094
********************************************************************************
1095
*/
1096
S32 hme_get_mv_blk_size(S32 enable_4x4, S32 layer_id, S32 num_layers, S32 is_enc)
1097
727k
{
1098
727k
    S32 use_4x4 = enable_4x4;
1099
1100
727k
    if((layer_id <= 1) && (num_layers >= 4))
1101
33.9k
        use_4x4 = USE_4x4_IN_L1;
1102
727k
    if(layer_id == num_layers - 1)
1103
126k
        use_4x4 = 1;
1104
727k
    if(is_enc)
1105
294k
        use_4x4 = 0;
1106
1107
727k
    return (use_4x4);
1108
727k
}
1109
1110
/**
1111
********************************************************************************
1112
*  @fn     hme_enc_alloc_init_mem()
1113
*
1114
*  @brief  Requests/ assign memory based on mem avail
1115
*
1116
*  @param[in] ps_memtabs : memtab array
1117
*
1118
*  @param[in] ps_prms : init prms
1119
*
1120
*  @param[in] pv_ctxt : ME ctxt
1121
*
1122
*  @param[in] mem_avail : request/assign flag
1123
*
1124
*  @return   1 for 4x4 blks, 0 for 8x8
1125
********************************************************************************
1126
*/
1127
S32 hme_enc_alloc_init_mem(
1128
    hme_memtab_t *ps_memtabs,
1129
    hme_init_prms_t *ps_prms,
1130
    void *pv_ctxt,
1131
    S32 mem_avail,
1132
    S32 i4_num_me_frm_pllel)
1133
13.5k
{
1134
13.5k
    me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_ctxt;
1135
13.5k
    me_ctxt_t *ps_ctxt;
1136
13.5k
    S32 count = 0, size, i, j, use_4x4;
1137
13.5k
    S32 n_tot_layers, n_enc_layers;
1138
13.5k
    S32 num_layers_explicit_search;
1139
13.5k
    S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS];
1140
13.5k
    S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
1141
13.5k
    S32 num_results;
1142
13.5k
    S32 num_thrds;
1143
13.5k
    S32 ctb_wd = 1 << ps_prms->log_ctb_size;
1144
1145
    /* MV bank changes */
1146
13.5k
    hme_mv_t *aps_mv_bank[((DEFAULT_MAX_REFERENCE_PICS << 1) * MAX_NUM_ME_PARALLEL) + 1] = { NULL };
1147
13.5k
    S32 i4_num_mvs_per_row = 0;
1148
13.5k
    S08 *api1_ref_idx[((DEFAULT_MAX_REFERENCE_PICS << 1) * MAX_NUM_ME_PARALLEL) + 1] = { NULL };
1149
1150
13.5k
    n_enc_layers = ps_prms->num_simulcast_layers;
1151
1152
    /* Memtab 0: handle */
1153
13.5k
    size = sizeof(me_master_ctxt_t);
1154
13.5k
    if(mem_avail)
1155
6.76k
    {
1156
        /* store the number of processing threads */
1157
6.76k
        ps_master_ctxt->i4_num_proc_thrds = ps_prms->i4_num_proc_thrds;
1158
6.76k
    }
1159
6.76k
    else
1160
6.76k
    {
1161
6.76k
        ps_memtabs[count].size = size;
1162
6.76k
        ps_memtabs[count].align = 8;
1163
6.76k
        ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1164
6.76k
    }
1165
1166
13.5k
    count++;
1167
1168
    /* Memtab 1: ME threads ctxt */
1169
13.5k
    size = ps_prms->i4_num_proc_thrds * sizeof(me_ctxt_t);
1170
13.5k
    if(mem_avail)
1171
6.76k
    {
1172
6.76k
        me_ctxt_t *ps_me_tmp_ctxt = (me_ctxt_t *)ps_memtabs[count].pu1_mem;
1173
1174
        /* store the indivisual thread ctxt pointers */
1175
13.5k
        for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1176
6.76k
        {
1177
6.76k
            ps_master_ctxt->aps_me_ctxt[num_thrds] = ps_me_tmp_ctxt++;
1178
6.76k
        }
1179
6.76k
    }
1180
6.76k
    else
1181
6.76k
    {
1182
6.76k
        ps_memtabs[count].size = size;
1183
6.76k
        ps_memtabs[count].align = 8;
1184
6.76k
        ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1185
6.76k
    }
1186
1187
13.5k
    count++;
1188
1189
    /* Memtab 2: ME frame ctxts */
1190
13.5k
    size = sizeof(me_frm_ctxt_t) * MAX_NUM_ME_PARALLEL * ps_prms->i4_num_proc_thrds;
1191
13.5k
    if(mem_avail)
1192
6.76k
    {
1193
6.76k
        me_frm_ctxt_t *ps_me_frm_tmp_ctxt = (me_frm_ctxt_t *)ps_memtabs[count].pu1_mem;
1194
1195
13.5k
        for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1196
6.76k
        {
1197
            /* store the indivisual thread ctxt pointers */
1198
13.5k
            for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1199
6.76k
            {
1200
6.76k
                ps_master_ctxt->aps_me_ctxt[num_thrds]->aps_me_frm_prms[i] = ps_me_frm_tmp_ctxt;
1201
1202
6.76k
                ps_me_frm_tmp_ctxt++;
1203
6.76k
            }
1204
6.76k
        }
1205
6.76k
    }
1206
6.76k
    else
1207
6.76k
    {
1208
6.76k
        ps_memtabs[count].size = size;
1209
6.76k
        ps_memtabs[count].align = 8;
1210
6.76k
        ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1211
6.76k
    }
1212
1213
13.5k
    count++;
1214
1215
13.5k
    memcpy(a_wd, ps_prms->a_wd, sizeof(S32) * ps_prms->num_simulcast_layers);
1216
13.5k
    memcpy(a_ht, ps_prms->a_ht, sizeof(S32) * ps_prms->num_simulcast_layers);
1217
    /*************************************************************************/
1218
    /* Derive the number of HME layers, including both encoded and non encode*/
1219
    /* This function also derives the width and ht of each layer.            */
1220
    /*************************************************************************/
1221
13.5k
    n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
1222
13.5k
    num_layers_explicit_search = ps_prms->num_layers_explicit_search;
1223
13.5k
    if(num_layers_explicit_search <= 0)
1224
0
        num_layers_explicit_search = n_tot_layers - 1;
1225
1226
13.5k
    num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
1227
1228
13.5k
    if(mem_avail)
1229
6.76k
    {
1230
13.5k
        for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1231
6.76k
        {
1232
6.76k
            me_frm_ctxt_t *ps_frm_ctxt;
1233
6.76k
            ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1234
1235
13.5k
            for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1236
6.76k
            {
1237
6.76k
                ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
1238
1239
6.76k
                memset(ps_frm_ctxt->u1_encode, 0, n_tot_layers);
1240
6.76k
                memset(ps_frm_ctxt->u1_encode, 1, n_enc_layers);
1241
1242
                /* only one enocde layer is used */
1243
6.76k
                ps_frm_ctxt->num_layers = 1;
1244
1245
6.76k
                ps_frm_ctxt->i4_wd = a_wd[0];
1246
6.76k
                ps_frm_ctxt->i4_ht = a_ht[0];
1247
                /*
1248
            memcpy(ps_ctxt->a_wd, a_wd, sizeof(S32)*n_tot_layers);
1249
            memcpy(ps_ctxt->a_ht, a_ht, sizeof(S32)*n_tot_layers);
1250
*/
1251
6.76k
                ps_frm_ctxt->num_layers_explicit_search = num_layers_explicit_search;
1252
6.76k
                ps_frm_ctxt->max_num_results = ps_prms->max_num_results;
1253
6.76k
                ps_frm_ctxt->max_num_results_coarse = ps_prms->max_num_results_coarse;
1254
6.76k
                ps_frm_ctxt->max_num_ref = ps_prms->max_num_ref;
1255
6.76k
            }
1256
6.76k
        }
1257
6.76k
    }
1258
1259
    /* Memtabs : Layers MV bank for encode layer */
1260
    /* Each ref_desr in master ctxt will have seperate layer ctxt */
1261
1262
81.2k
    for(i = 0; i < (ps_prms->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
1263
67.6k
    {
1264
135k
        for(j = 0; j < 1; j++)
1265
67.6k
        {
1266
67.6k
            S32 is_explicit_store = 1;
1267
67.6k
            S32 wd, ht;
1268
67.6k
            U08 u1_enc = 1;
1269
67.6k
            wd = a_wd[j];
1270
67.6k
            ht = a_ht[j];
1271
1272
            /* Possibly implicit search for lower (finer) layers */
1273
67.6k
            if(n_tot_layers - j > num_layers_explicit_search)
1274
67.6k
                is_explicit_store = 0;
1275
1276
            /* Even if explicit search, we store only 2 results (L0 and L1) */
1277
            /* in finest layer */
1278
67.6k
            if(j == 0)
1279
67.6k
            {
1280
67.6k
                is_explicit_store = 0;
1281
67.6k
            }
1282
1283
            /* coarsest layer alwasy uses 4x4 blks to store results */
1284
67.6k
            if(j == n_tot_layers - 1)
1285
0
            {
1286
0
                num_results = ps_prms->max_num_results_coarse;
1287
0
            }
1288
67.6k
            else
1289
67.6k
            {
1290
67.6k
                num_results = ps_prms->max_num_results;
1291
67.6k
                if(j == 0)
1292
67.6k
                    num_results = 1;
1293
67.6k
            }
1294
67.6k
            use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc);
1295
1296
67.6k
            count += hme_alloc_init_layer_mv_bank(
1297
67.6k
                &ps_memtabs[count],
1298
67.6k
                num_results,
1299
67.6k
                ps_prms->max_num_ref,
1300
67.6k
                use_4x4,
1301
67.6k
                mem_avail,
1302
67.6k
                u1_enc,
1303
67.6k
                wd,
1304
67.6k
                ht,
1305
67.6k
                is_explicit_store,
1306
67.6k
                &aps_mv_bank[i],
1307
67.6k
                &api1_ref_idx[i],
1308
67.6k
                &i4_num_mvs_per_row);
1309
67.6k
        }
1310
67.6k
    }
1311
1312
    /* Memtabs : Layers * num-ref + 1 */
1313
81.2k
    for(i = 0; i < (ps_prms->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
1314
67.6k
    {
1315
        /* layer memory allocated only for enocde layer */
1316
135k
        for(j = 0; j < 1; j++)
1317
67.6k
        {
1318
67.6k
            layer_ctxt_t *ps_layer;
1319
67.6k
            S32 is_explicit_store = 1;
1320
67.6k
            S32 segment_this_layer = (j == 0) ? 1 : ps_prms->segment_higher_layers;
1321
67.6k
            S32 wd, ht;
1322
67.6k
            U08 u1_enc = 1;
1323
67.6k
            wd = a_wd[j];
1324
67.6k
            ht = a_ht[j];
1325
1326
            /* Possibly implicit search for lower (finer) layers */
1327
67.6k
            if(n_tot_layers - j > num_layers_explicit_search)
1328
67.6k
                is_explicit_store = 0;
1329
1330
            /* Even if explicit search, we store only 2 results (L0 and L1) */
1331
            /* in finest layer */
1332
67.6k
            if(j == 0)
1333
67.6k
            {
1334
67.6k
                is_explicit_store = 0;
1335
67.6k
            }
1336
1337
            /* coarsest layer alwasy uses 4x4 blks to store results */
1338
67.6k
            if(j == n_tot_layers - 1)
1339
0
            {
1340
0
                num_results = ps_prms->max_num_results_coarse;
1341
0
            }
1342
67.6k
            else
1343
67.6k
            {
1344
67.6k
                num_results = ps_prms->max_num_results;
1345
67.6k
                if(j == 0)
1346
67.6k
                    num_results = 1;
1347
67.6k
            }
1348
67.6k
            use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc);
1349
1350
67.6k
            count += hme_alloc_init_layer(
1351
67.6k
                &ps_memtabs[count],
1352
67.6k
                num_results,
1353
67.6k
                ps_prms->max_num_ref,
1354
67.6k
                use_4x4,
1355
67.6k
                mem_avail,
1356
67.6k
                u1_enc,
1357
67.6k
                wd,
1358
67.6k
                ht,
1359
67.6k
                a_disp_wd[j],
1360
67.6k
                a_disp_ht[j],
1361
67.6k
                segment_this_layer,
1362
67.6k
                is_explicit_store,
1363
67.6k
                &ps_layer);
1364
67.6k
            if(mem_avail)
1365
33.8k
            {
1366
                /* same ps_layer memory pointer is stored in all the threads */
1367
67.6k
                for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1368
33.8k
                {
1369
33.8k
                    ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1370
33.8k
                    ps_ctxt->as_ref_descr[i].aps_layers[j] = ps_layer;
1371
33.8k
                }
1372
1373
                /* store the MV bank pointers */
1374
33.8k
                ps_layer->ps_layer_mvbank->max_num_mvs_per_row = i4_num_mvs_per_row;
1375
33.8k
                ps_layer->ps_layer_mvbank->ps_mv_base = aps_mv_bank[i];
1376
33.8k
                ps_layer->ps_layer_mvbank->pi1_ref_idx_base = api1_ref_idx[i];
1377
33.8k
            }
1378
67.6k
        }
1379
67.6k
    }
1380
1381
    /* Memtabs : Buf Mgr for predictor bufs and working mem */
1382
    /* TODO : Parameterise this appropriately */
1383
13.5k
    size = MAX_WKG_MEM_SIZE_PER_THREAD * ps_prms->i4_num_proc_thrds * i4_num_me_frm_pllel;
1384
1385
13.5k
    if(mem_avail)
1386
6.76k
    {
1387
6.76k
        U08 *pu1_mem = ps_memtabs[count].pu1_mem;
1388
1389
6.76k
        ASSERT(ps_memtabs[count].size == size);
1390
1391
13.5k
        for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1392
6.76k
        {
1393
6.76k
            me_frm_ctxt_t *ps_frm_ctxt;
1394
6.76k
            ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1395
1396
13.5k
            for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1397
6.76k
            {
1398
6.76k
                ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
1399
1400
6.76k
                hme_init_wkg_mem(&ps_frm_ctxt->s_buf_mgr, pu1_mem, MAX_WKG_MEM_SIZE_PER_THREAD);
1401
1402
6.76k
                if(i4_num_me_frm_pllel != 1)
1403
0
                {
1404
                    /* update the memory buffer pointer */
1405
0
                    pu1_mem += MAX_WKG_MEM_SIZE_PER_THREAD;
1406
0
                }
1407
6.76k
            }
1408
6.76k
            if(i4_num_me_frm_pllel == 1)
1409
6.76k
            {
1410
6.76k
                pu1_mem += MAX_WKG_MEM_SIZE_PER_THREAD;
1411
6.76k
            }
1412
6.76k
        }
1413
6.76k
    }
1414
6.76k
    else
1415
6.76k
    {
1416
6.76k
        ps_memtabs[count].size = size;
1417
6.76k
        ps_memtabs[count].align = 4;
1418
6.76k
        ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1419
6.76k
    }
1420
13.5k
    count++;
1421
1422
    /*************************************************************************/
1423
    /* Memtab : We need 64x64 buffer to store the entire CTB input for bidir */
1424
    /* refinement. This memtab stores 2I - P0, I is input and P0 is L0 pred  */
1425
    /*************************************************************************/
1426
13.5k
    size = sizeof(S16) * CTB_BLK_SIZE * CTB_BLK_SIZE * ps_prms->i4_num_proc_thrds *
1427
13.5k
           i4_num_me_frm_pllel;
1428
1429
13.5k
    if(mem_avail)
1430
6.76k
    {
1431
6.76k
        S16 *pi2_mem = (S16 *)ps_memtabs[count].pu1_mem;
1432
1433
6.76k
        ASSERT(ps_memtabs[count].size == size);
1434
1435
13.5k
        for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1436
6.76k
        {
1437
6.76k
            me_frm_ctxt_t *ps_frm_ctxt;
1438
6.76k
            ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1439
1440
13.5k
            for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1441
6.76k
            {
1442
6.76k
                ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
1443
1444
6.76k
                ps_frm_ctxt->pi2_inp_bck = pi2_mem;
1445
                /** If no me frames running in parallel update the other aps_me_frm_prms indices with same memory **/
1446
6.76k
                if(i4_num_me_frm_pllel != 1)
1447
0
                {
1448
0
                    pi2_mem += (CTB_BLK_SIZE * CTB_BLK_SIZE);
1449
0
                }
1450
6.76k
            }
1451
6.76k
            if(i4_num_me_frm_pllel == 1)
1452
6.76k
            {
1453
6.76k
                pi2_mem += (CTB_BLK_SIZE * CTB_BLK_SIZE);
1454
6.76k
            }
1455
6.76k
        }
1456
6.76k
    }
1457
6.76k
    else
1458
6.76k
    {
1459
6.76k
        ps_memtabs[count].size = size;
1460
6.76k
        ps_memtabs[count].align = 16;
1461
6.76k
        ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1462
6.76k
    }
1463
1464
13.5k
    count++;
1465
1466
    /* Allocate a memtab for each histogram. As many as num ref and number of threads */
1467
    /* Loop across for each ME_FRM in PARALLEL */
1468
27.0k
    for(j = 0; j < MAX_NUM_ME_PARALLEL; j++)
1469
13.5k
    {
1470
67.6k
        for(i = 0; i < ps_prms->max_num_ref; i++)
1471
54.1k
        {
1472
54.1k
            size = ps_prms->i4_num_proc_thrds * sizeof(mv_hist_t);
1473
54.1k
            if(mem_avail)
1474
27.0k
            {
1475
27.0k
                mv_hist_t *ps_mv_hist = (mv_hist_t *)ps_memtabs[count].pu1_mem;
1476
1477
27.0k
                ASSERT(size == ps_memtabs[count].size);
1478
1479
                /* divide the memory accross the threads */
1480
54.1k
                for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1481
27.0k
                {
1482
27.0k
                    ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1483
1484
27.0k
                    ps_ctxt->aps_me_frm_prms[j]->aps_mv_hist[i] = ps_mv_hist;
1485
27.0k
                    ps_mv_hist++;
1486
27.0k
                }
1487
27.0k
            }
1488
27.0k
            else
1489
27.0k
            {
1490
27.0k
                ps_memtabs[count].size = size;
1491
27.0k
                ps_memtabs[count].align = 8;
1492
27.0k
                ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1493
27.0k
            }
1494
54.1k
            count++;
1495
54.1k
        }
1496
13.5k
        if((i4_num_me_frm_pllel == 1) && (j != (MAX_NUM_ME_PARALLEL - 1)))
1497
0
        {
1498
            /** If no me frames running in parallel update the other aps_me_frm_prms indices with same memory **/
1499
            /** bring the count back to earlier value if there are no me frames in parallel. don't decrement for last loop **/
1500
0
            count -= ps_prms->max_num_ref;
1501
0
        }
1502
13.5k
    }
1503
1504
    /* Memtabs : Search nodes for 16x16 CUs, 32x32 and 64x64 CUs */
1505
27.0k
    for(j = 0; j < MAX_NUM_ME_PARALLEL; j++)
1506
13.5k
    {
1507
13.5k
        S32 count_cpy = count;
1508
27.0k
        for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1509
13.5k
        {
1510
13.5k
            if(mem_avail)
1511
6.76k
            {
1512
6.76k
                ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1513
6.76k
            }
1514
1515
297k
            for(i = 0; i < 21; i++)
1516
284k
            {
1517
284k
                search_results_t *ps_search_results = NULL;
1518
284k
                if(mem_avail)
1519
142k
                {
1520
142k
                    if(i < 16)
1521
108k
                    {
1522
108k
                        ps_search_results =
1523
108k
                            &ps_ctxt->aps_me_frm_prms[j]->as_search_results_16x16[i];
1524
108k
                    }
1525
33.8k
                    else if(i < 20)
1526
27.0k
                    {
1527
27.0k
                        ps_search_results =
1528
27.0k
                            &ps_ctxt->aps_me_frm_prms[j]->as_search_results_32x32[i - 16];
1529
27.0k
                        ps_search_results->ps_cu_results =
1530
27.0k
                            &ps_ctxt->aps_me_frm_prms[j]->as_cu32x32_results[i - 16];
1531
27.0k
                    }
1532
6.76k
                    else if(i == 20)
1533
6.76k
                    {
1534
6.76k
                        ps_search_results = &ps_ctxt->aps_me_frm_prms[j]->s_search_results_64x64;
1535
6.76k
                        ps_search_results->ps_cu_results =
1536
6.76k
                            &ps_ctxt->aps_me_frm_prms[j]->s_cu64x64_results;
1537
6.76k
                    }
1538
0
                    else
1539
0
                    {
1540
                        /* 8x8 search results are not required in LO ME */
1541
0
                        ASSERT(0);
1542
0
                    }
1543
142k
                }
1544
284k
                count += hme_alloc_init_search_nodes(
1545
284k
                    ps_search_results, &ps_memtabs[count], mem_avail, 2, ps_prms->max_num_results);
1546
284k
            }
1547
13.5k
        }
1548
1549
13.5k
        if((i4_num_me_frm_pllel == 1) && (j != (MAX_NUM_ME_PARALLEL - 1)))
1550
0
        {
1551
0
            count = count_cpy;
1552
0
        }
1553
13.5k
    }
1554
1555
    /* Weighted inputs, one for each ref + one non weighted */
1556
27.0k
    for(j = 0; j < MAX_NUM_ME_PARALLEL; j++)
1557
13.5k
    {
1558
13.5k
        size = (ps_prms->max_num_ref + 1) * ctb_wd * ctb_wd * ps_prms->i4_num_proc_thrds;
1559
13.5k
        if(mem_avail)
1560
6.76k
        {
1561
6.76k
            U08 *pu1_mem;
1562
6.76k
            ASSERT(ps_memtabs[count].size == size);
1563
6.76k
            pu1_mem = ps_memtabs[count].pu1_mem;
1564
1565
13.5k
            for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1566
6.76k
            {
1567
6.76k
                ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1568
1569
40.6k
                for(i = 0; i < ps_prms->max_num_ref + 1; i++)
1570
33.8k
                {
1571
33.8k
                    ps_ctxt->aps_me_frm_prms[j]->s_wt_pred.apu1_wt_inp_buf_array[i] = pu1_mem;
1572
33.8k
                    pu1_mem += (ctb_wd * ctb_wd);
1573
33.8k
                }
1574
6.76k
            }
1575
6.76k
        }
1576
6.76k
        else
1577
6.76k
        {
1578
6.76k
            ps_memtabs[count].size = size;
1579
6.76k
            ps_memtabs[count].align = 16;
1580
6.76k
            ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1581
6.76k
        }
1582
13.5k
        if((i4_num_me_frm_pllel != 1) || (j == (MAX_NUM_ME_PARALLEL - 1)))
1583
13.5k
        {
1584
13.5k
            count++;
1585
13.5k
        }
1586
13.5k
    }
1587
1588
    /* if memory is allocated the intislaise the frm prms ptr to each thrd */
1589
13.5k
    if(mem_avail)
1590
6.76k
    {
1591
13.5k
        for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1592
6.76k
        {
1593
6.76k
            me_frm_ctxt_t *ps_frm_ctxt;
1594
6.76k
            ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1595
1596
13.5k
            for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1597
6.76k
            {
1598
6.76k
                ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
1599
1600
6.76k
                ps_frm_ctxt->ps_hme_frm_prms = &ps_master_ctxt->as_frm_prms[i];
1601
6.76k
                ps_frm_ctxt->ps_hme_ref_map = &ps_master_ctxt->as_ref_map[i];
1602
6.76k
            }
1603
6.76k
        }
1604
6.76k
    }
1605
1606
    /* Memory allocation for use in Clustering */
1607
13.5k
    if(ps_prms->s_me_coding_tools.e_me_quality_presets == ME_PRISTINE_QUALITY)
1608
6.54k
    {
1609
13.0k
        for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1610
6.54k
        {
1611
6.54k
            size = 16 * sizeof(cluster_16x16_blk_t) + 4 * sizeof(cluster_32x32_blk_t) +
1612
6.54k
                   sizeof(cluster_64x64_blk_t) + sizeof(ctb_cluster_info_t);
1613
6.54k
            size *= ps_prms->i4_num_proc_thrds;
1614
1615
6.54k
            if(mem_avail)
1616
3.27k
            {
1617
3.27k
                U08 *pu1_mem;
1618
1619
3.27k
                ASSERT(ps_memtabs[count].size == size);
1620
3.27k
                pu1_mem = ps_memtabs[count].pu1_mem;
1621
1622
6.54k
                for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1623
3.27k
                {
1624
3.27k
                    ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1625
1626
3.27k
                    ps_ctxt->aps_me_frm_prms[i]->ps_blk_16x16 = (cluster_16x16_blk_t *)pu1_mem;
1627
3.27k
                    pu1_mem += (16 * sizeof(cluster_16x16_blk_t));
1628
1629
3.27k
                    ps_ctxt->aps_me_frm_prms[i]->ps_blk_32x32 = (cluster_32x32_blk_t *)pu1_mem;
1630
3.27k
                    pu1_mem += (4 * sizeof(cluster_32x32_blk_t));
1631
1632
3.27k
                    ps_ctxt->aps_me_frm_prms[i]->ps_blk_64x64 = (cluster_64x64_blk_t *)pu1_mem;
1633
3.27k
                    pu1_mem += (sizeof(cluster_64x64_blk_t));
1634
1635
3.27k
                    ps_ctxt->aps_me_frm_prms[i]->ps_ctb_cluster_info =
1636
3.27k
                        (ctb_cluster_info_t *)pu1_mem;
1637
3.27k
                    pu1_mem += (sizeof(ctb_cluster_info_t));
1638
3.27k
                }
1639
3.27k
            }
1640
3.27k
            else
1641
3.27k
            {
1642
3.27k
                ps_memtabs[count].size = size;
1643
3.27k
                ps_memtabs[count].align = 16;
1644
3.27k
                ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1645
3.27k
            }
1646
1647
6.54k
            if((i4_num_me_frm_pllel != 1) || (i == (MAX_NUM_ME_PARALLEL - 1)))
1648
6.54k
            {
1649
6.54k
                count++;
1650
6.54k
            }
1651
6.54k
        }
1652
6.54k
    }
1653
6.99k
    else if(mem_avail)
1654
3.49k
    {
1655
6.99k
        for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1656
3.49k
        {
1657
6.99k
            for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1658
3.49k
            {
1659
3.49k
                ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1660
1661
3.49k
                ps_ctxt->aps_me_frm_prms[i]->ps_blk_16x16 = NULL;
1662
1663
3.49k
                ps_ctxt->aps_me_frm_prms[i]->ps_blk_32x32 = NULL;
1664
1665
3.49k
                ps_ctxt->aps_me_frm_prms[i]->ps_blk_64x64 = NULL;
1666
1667
3.49k
                ps_ctxt->aps_me_frm_prms[i]->ps_ctb_cluster_info = NULL;
1668
3.49k
            }
1669
3.49k
        }
1670
3.49k
    }
1671
1672
27.0k
    for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1673
13.5k
    {
1674
13.5k
        size = sizeof(fullpel_refine_ctxt_t);
1675
13.5k
        size *= ps_prms->i4_num_proc_thrds;
1676
1677
13.5k
        if(mem_avail)
1678
6.76k
        {
1679
6.76k
            U08 *pu1_mem;
1680
1681
6.76k
            ASSERT(ps_memtabs[count].size == size);
1682
6.76k
            pu1_mem = ps_memtabs[count].pu1_mem;
1683
1684
13.5k
            for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1685
6.76k
            {
1686
6.76k
                ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1687
1688
6.76k
                ps_ctxt->aps_me_frm_prms[i]->ps_fullpel_refine_ctxt =
1689
6.76k
                    (fullpel_refine_ctxt_t *)pu1_mem;
1690
6.76k
                pu1_mem += (sizeof(fullpel_refine_ctxt_t));
1691
6.76k
            }
1692
6.76k
        }
1693
6.76k
        else
1694
6.76k
        {
1695
6.76k
            ps_memtabs[count].size = size;
1696
6.76k
            ps_memtabs[count].align = 16;
1697
6.76k
            ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1698
6.76k
        }
1699
1700
13.5k
        if((i4_num_me_frm_pllel != 1) || (i == (MAX_NUM_ME_PARALLEL - 1)))
1701
13.5k
        {
1702
13.5k
            count++;
1703
13.5k
        }
1704
13.5k
    }
1705
1706
    /* Memory for ihevce_me_optimised_function_list_t struct  */
1707
13.5k
    if(mem_avail)
1708
6.76k
    {
1709
6.76k
        ps_master_ctxt->pv_me_optimised_function_list = (void *)ps_memtabs[count++].pu1_mem;
1710
6.76k
    }
1711
6.76k
    else
1712
6.76k
    {
1713
6.76k
        ps_memtabs[count].size = sizeof(ihevce_me_optimised_function_list_t);
1714
6.76k
        ps_memtabs[count].align = 16;
1715
6.76k
        ps_memtabs[count++].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1716
6.76k
    }
1717
1718
13.5k
    ASSERT(count < hme_enc_num_alloc(i4_num_me_frm_pllel));
1719
13.5k
    return (count);
1720
13.5k
}
1721
1722
/**
1723
********************************************************************************
1724
*  @fn     hme_coarse_alloc_init_mem()
1725
*
1726
*  @brief  Requests/ assign memory based on mem avail
1727
*
1728
*  @param[in] ps_memtabs : memtab array
1729
*
1730
*  @param[in] ps_prms : init prms
1731
*
1732
*  @param[in] pv_ctxt : ME ctxt
1733
*
1734
*  @param[in] mem_avail : request/assign flag
1735
*
1736
*  @return  number of memtabs
1737
********************************************************************************
1738
*/
1739
S32 hme_coarse_alloc_init_mem(
1740
    hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms, void *pv_ctxt, S32 mem_avail)
1741
13.5k
{
1742
13.5k
    coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt;
1743
13.5k
    coarse_me_ctxt_t *ps_ctxt;
1744
13.5k
    S32 count = 0, size, i, j, use_4x4, wd;
1745
13.5k
    S32 n_tot_layers;
1746
13.5k
    S32 num_layers_explicit_search;
1747
13.5k
    S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS];
1748
13.5k
    S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
1749
13.5k
    S32 num_results;
1750
13.5k
    S32 num_thrds;
1751
    //S32 ctb_wd = 1 << ps_prms->log_ctb_size;
1752
13.5k
    S32 sad_4x4_block_size, sad_4x4_block_stride, search_step, num_rows;
1753
13.5k
    S32 layer1_blk_width = 8;  // 8x8 search
1754
13.5k
    S32 blk_shift;
1755
1756
    /* MV bank changes */
1757
13.5k
    hme_mv_t *aps_mv_bank[MAX_NUM_LAYERS] = { NULL };
1758
13.5k
    S32 ai4_num_mvs_per_row[MAX_NUM_LAYERS] = { 0 };
1759
13.5k
    S08 *api1_ref_idx[MAX_NUM_LAYERS] = { NULL };
1760
1761
    /* Memtab 0: handle */
1762
13.5k
    size = sizeof(coarse_me_master_ctxt_t);
1763
13.5k
    if(mem_avail)
1764
6.76k
    {
1765
        /* store the number of processing threads */
1766
6.76k
        ps_master_ctxt->i4_num_proc_thrds = ps_prms->i4_num_proc_thrds;
1767
6.76k
    }
1768
6.76k
    else
1769
6.76k
    {
1770
6.76k
        ps_memtabs[count].size = size;
1771
6.76k
        ps_memtabs[count].align = 8;
1772
6.76k
        ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1773
6.76k
    }
1774
1775
13.5k
    count++;
1776
1777
    /* Memtab 1: ME threads ctxt */
1778
13.5k
    size = ps_prms->i4_num_proc_thrds * sizeof(coarse_me_ctxt_t);
1779
13.5k
    if(mem_avail)
1780
6.76k
    {
1781
6.76k
        coarse_me_ctxt_t *ps_me_tmp_ctxt = (coarse_me_ctxt_t *)ps_memtabs[count].pu1_mem;
1782
1783
        /* store the indivisual thread ctxt pointers */
1784
13.5k
        for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1785
6.76k
        {
1786
6.76k
            ps_master_ctxt->aps_me_ctxt[num_thrds] = ps_me_tmp_ctxt++;
1787
6.76k
        }
1788
6.76k
    }
1789
6.76k
    else
1790
6.76k
    {
1791
6.76k
        ps_memtabs[count].size = size;
1792
6.76k
        ps_memtabs[count].align = 8;
1793
6.76k
        ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1794
6.76k
    }
1795
1796
13.5k
    count++;
1797
1798
13.5k
    memcpy(a_wd, ps_prms->a_wd, sizeof(S32) * ps_prms->num_simulcast_layers);
1799
13.5k
    memcpy(a_ht, ps_prms->a_ht, sizeof(S32) * ps_prms->num_simulcast_layers);
1800
    /*************************************************************************/
1801
    /* Derive the number of HME layers, including both encoded and non encode*/
1802
    /* This function also derives the width and ht of each layer.            */
1803
    /*************************************************************************/
1804
13.5k
    n_tot_layers = hme_derive_num_layers(1, a_wd, a_ht, a_disp_wd, a_disp_ht);
1805
1806
13.5k
    num_layers_explicit_search = ps_prms->num_layers_explicit_search;
1807
1808
13.5k
    if(num_layers_explicit_search <= 0)
1809
0
        num_layers_explicit_search = n_tot_layers - 1;
1810
1811
13.5k
    num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
1812
1813
13.5k
    if(mem_avail)
1814
6.76k
    {
1815
13.5k
        for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1816
6.76k
        {
1817
6.76k
            ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1818
6.76k
            memset(ps_ctxt->u1_encode, 0, n_tot_layers);
1819
1820
            /* encode layer should be excluded during processing */
1821
6.76k
            ps_ctxt->num_layers = n_tot_layers;
1822
1823
6.76k
            memcpy(ps_ctxt->a_wd, a_wd, sizeof(S32) * n_tot_layers);
1824
6.76k
            memcpy(ps_ctxt->a_ht, a_ht, sizeof(S32) * n_tot_layers);
1825
1826
6.76k
            ps_ctxt->num_layers_explicit_search = num_layers_explicit_search;
1827
6.76k
            ps_ctxt->max_num_results = ps_prms->max_num_results;
1828
6.76k
            ps_ctxt->max_num_results_coarse = ps_prms->max_num_results_coarse;
1829
6.76k
            ps_ctxt->max_num_ref = ps_prms->max_num_ref;
1830
6.76k
        }
1831
6.76k
    }
1832
1833
    /* Memtabs : Layers MV bank for total layers - 2  */
1834
    /* for penultimate layer MV bank will be initialsed at every frame level */
1835
42.2k
    for(j = 1; j < n_tot_layers; j++)
1836
28.7k
    {
1837
28.7k
        S32 is_explicit_store = 1;
1838
28.7k
        S32 wd, ht;
1839
28.7k
        U08 u1_enc = 0;
1840
28.7k
        wd = a_wd[j];
1841
28.7k
        ht = a_ht[j];
1842
1843
        /* Possibly implicit search for lower (finer) layers */
1844
28.7k
        if(n_tot_layers - j > num_layers_explicit_search)
1845
0
            is_explicit_store = 0;
1846
1847
        /* Even if explicit search, we store only 2 results (L0 and L1) */
1848
        /* in finest layer */
1849
28.7k
        if(j == 0)
1850
0
        {
1851
0
            is_explicit_store = 0;
1852
0
        }
1853
1854
        /* coarsest layer alwasy uses 4x4 blks to store results */
1855
28.7k
        if(j == n_tot_layers - 1)
1856
13.5k
        {
1857
13.5k
            num_results = ps_prms->max_num_results_coarse;
1858
13.5k
        }
1859
15.1k
        else
1860
15.1k
        {
1861
15.1k
            num_results = ps_prms->max_num_results;
1862
15.1k
            if(j == 0)
1863
0
                num_results = 1;
1864
15.1k
        }
1865
28.7k
        use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc);
1866
1867
        /* for penultimate compute the parameters and store */
1868
28.7k
        if(j == 1)
1869
13.5k
        {
1870
13.5k
            S32 num_blks, num_mvs_per_blk, num_ref;
1871
13.5k
            S32 num_cols, num_rows, num_mvs_per_row;
1872
1873
13.5k
            num_cols = use_4x4 ? ((wd >> 2) + 2) : ((wd >> 3) + 2);
1874
13.5k
            num_rows = use_4x4 ? ((ht >> 2) + 2) : ((ht >> 3) + 2);
1875
1876
13.5k
            if(is_explicit_store)
1877
13.5k
                num_ref = ps_prms->max_num_ref;
1878
0
            else
1879
0
                num_ref = 2;
1880
1881
13.5k
            num_blks = num_cols * num_rows;
1882
13.5k
            num_mvs_per_blk = num_ref * num_results;
1883
13.5k
            num_mvs_per_row = num_mvs_per_blk * num_cols;
1884
1885
13.5k
            ai4_num_mvs_per_row[j] = num_mvs_per_row;
1886
13.5k
            aps_mv_bank[j] = NULL;
1887
13.5k
            api1_ref_idx[j] = NULL;
1888
13.5k
        }
1889
15.1k
        else
1890
15.1k
        {
1891
15.1k
            count += hme_alloc_init_layer_mv_bank(
1892
15.1k
                &ps_memtabs[count],
1893
15.1k
                num_results,
1894
15.1k
                ps_prms->max_num_ref,
1895
15.1k
                use_4x4,
1896
15.1k
                mem_avail,
1897
15.1k
                u1_enc,
1898
15.1k
                wd,
1899
15.1k
                ht,
1900
15.1k
                is_explicit_store,
1901
15.1k
                &aps_mv_bank[j],
1902
15.1k
                &api1_ref_idx[j],
1903
15.1k
                &ai4_num_mvs_per_row[j]);
1904
15.1k
        }
1905
28.7k
    }
1906
1907
    /* Memtabs : Layers * num-ref + 1 */
1908
94.7k
    for(i = 0; i < ps_prms->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
1909
81.2k
    {
1910
        /* for all layer except encode layer */
1911
253k
        for(j = 1; j < n_tot_layers; j++)
1912
172k
        {
1913
172k
            layer_ctxt_t *ps_layer;
1914
172k
            S32 is_explicit_store = 1;
1915
172k
            S32 segment_this_layer = (j == 0) ? 1 : ps_prms->segment_higher_layers;
1916
172k
            S32 wd, ht;
1917
172k
            U08 u1_enc = 0;
1918
172k
            wd = a_wd[j];
1919
172k
            ht = a_ht[j];
1920
1921
            /* Possibly implicit search for lower (finer) layers */
1922
172k
            if(n_tot_layers - j > num_layers_explicit_search)
1923
0
                is_explicit_store = 0;
1924
1925
            /* Even if explicit search, we store only 2 results (L0 and L1) */
1926
            /* in finest layer */
1927
172k
            if(j == 0)
1928
0
            {
1929
0
                is_explicit_store = 0;
1930
0
            }
1931
1932
            /* coarsest layer alwasy uses 4x4 blks to store results */
1933
172k
            if(j == n_tot_layers - 1)
1934
81.2k
            {
1935
81.2k
                num_results = ps_prms->max_num_results_coarse;
1936
81.2k
            }
1937
91.1k
            else
1938
91.1k
            {
1939
91.1k
                num_results = ps_prms->max_num_results;
1940
91.1k
                if(j == 0)
1941
0
                    num_results = 1;
1942
91.1k
            }
1943
172k
            use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc);
1944
1945
172k
            count += hme_alloc_init_layer(
1946
172k
                &ps_memtabs[count],
1947
172k
                num_results,
1948
172k
                ps_prms->max_num_ref,
1949
172k
                use_4x4,
1950
172k
                mem_avail,
1951
172k
                u1_enc,
1952
172k
                wd,
1953
172k
                ht,
1954
172k
                a_disp_wd[j],
1955
172k
                a_disp_ht[j],
1956
172k
                segment_this_layer,
1957
172k
                is_explicit_store,
1958
172k
                &ps_layer);
1959
172k
            if(mem_avail)
1960
86.1k
            {
1961
                /* same ps_layer memory pointer is stored in all the threads */
1962
172k
                for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1963
86.1k
                {
1964
86.1k
                    ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1965
86.1k
                    ps_ctxt->as_ref_descr[i].aps_layers[j] = ps_layer;
1966
86.1k
                }
1967
1968
                /* store the MV bank pointers */
1969
86.1k
                ps_layer->ps_layer_mvbank->max_num_mvs_per_row = ai4_num_mvs_per_row[j];
1970
86.1k
                ps_layer->ps_layer_mvbank->ps_mv_base = aps_mv_bank[j];
1971
86.1k
                ps_layer->ps_layer_mvbank->pi1_ref_idx_base = api1_ref_idx[j];
1972
86.1k
            }
1973
172k
        }
1974
81.2k
    }
1975
1976
    /* Memtabs : Prev Row search node at coarsest layer */
1977
13.5k
    wd = a_wd[n_tot_layers - 1];
1978
1979
    /* Allocate a memtab for storing 4x4 SADs for n rows. As many as num ref and number of threads */
1980
13.5k
    num_rows = ps_prms->i4_num_proc_thrds + 1;
1981
13.5k
    if(ps_prms->s_me_coding_tools.e_me_quality_presets < ME_MEDIUM_SPEED)
1982
8.28k
        search_step = HME_COARSE_STEP_SIZE_HIGH_QUALITY;
1983
5.24k
    else
1984
5.24k
        search_step = HME_COARSE_STEP_SIZE_HIGH_SPEED;
1985
1986
    /*shift factor*/
1987
13.5k
    blk_shift = 2; /*4x4*/
1988
13.5k
    search_step >>= 1;
1989
1990
13.5k
    sad_4x4_block_size = ((2 * MAX_MVX_SUPPORTED_IN_COARSE_LAYER) >> search_step) *
1991
13.5k
                         ((2 * MAX_MVY_SUPPORTED_IN_COARSE_LAYER) >> search_step);
1992
13.5k
    sad_4x4_block_stride = ((wd >> blk_shift) + 1) * sad_4x4_block_size;
1993
1994
13.5k
    size = num_rows * sad_4x4_block_stride * sizeof(S16);
1995
67.6k
    for(i = 0; i < ps_prms->max_num_ref; i++)
1996
54.1k
    {
1997
54.1k
        if(mem_avail)
1998
27.0k
        {
1999
27.0k
            ASSERT(size == ps_memtabs[count].size);
2000
2001
            /* same row memory pointer is stored in all the threads */
2002
54.1k
            for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2003
27.0k
            {
2004
27.0k
                ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2005
27.0k
                ps_ctxt->api2_sads_4x4_n_rows[i] = (S16 *)ps_memtabs[count].pu1_mem;
2006
27.0k
            }
2007
27.0k
        }
2008
27.0k
        else
2009
27.0k
        {
2010
27.0k
            ps_memtabs[count].size = size;
2011
27.0k
            ps_memtabs[count].align = 4;
2012
27.0k
            ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
2013
27.0k
        }
2014
54.1k
        count++;
2015
54.1k
    }
2016
2017
    /* Allocate a memtab for storing best search nodes 8x4 for n rows. Row is allocated for worst case (2*min_wd_coarse/4). As many as num ref and number of threads */
2018
13.5k
    size = num_rows * ((wd >> blk_shift) + 1) * sizeof(search_node_t);
2019
67.6k
    for(i = 0; i < ps_prms->max_num_ref; i++)
2020
54.1k
    {
2021
54.1k
        if(mem_avail)
2022
27.0k
        {
2023
27.0k
            ASSERT(size == ps_memtabs[count].size);
2024
2025
            /* same row memory pointer is stored in all the threads */
2026
54.1k
            for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2027
27.0k
            {
2028
27.0k
                ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2029
27.0k
                ps_ctxt->aps_best_search_nodes_8x4_n_rows[i] =
2030
27.0k
                    (search_node_t *)ps_memtabs[count].pu1_mem;
2031
27.0k
            }
2032
27.0k
        }
2033
27.0k
        else
2034
27.0k
        {
2035
27.0k
            ps_memtabs[count].size = size;
2036
27.0k
            ps_memtabs[count].align = 4;
2037
27.0k
            ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
2038
27.0k
        }
2039
54.1k
        count++;
2040
54.1k
    }
2041
    /* Allocate a memtab for storing best search nodes 4x8 for n rows. Row is allocated for worst case (2*min_wd_coarse/4). As many as num ref and number of threads */
2042
13.5k
    size = num_rows * ((wd >> blk_shift) + 1) * sizeof(search_node_t);
2043
67.6k
    for(i = 0; i < ps_prms->max_num_ref; i++)
2044
54.1k
    {
2045
54.1k
        if(mem_avail)
2046
27.0k
        {
2047
27.0k
            ASSERT(size == ps_memtabs[count].size);
2048
2049
            /* same row memory pointer is stored in all the threads */
2050
54.1k
            for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2051
27.0k
            {
2052
27.0k
                ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2053
27.0k
                ps_ctxt->aps_best_search_nodes_4x8_n_rows[i] =
2054
27.0k
                    (search_node_t *)ps_memtabs[count].pu1_mem;
2055
27.0k
            }
2056
27.0k
        }
2057
27.0k
        else
2058
27.0k
        {
2059
27.0k
            ps_memtabs[count].size = size;
2060
27.0k
            ps_memtabs[count].align = 4;
2061
27.0k
            ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
2062
27.0k
        }
2063
54.1k
        count++;
2064
54.1k
    }
2065
2066
    /* Allocate a memtab for each histogram. As many as num ref and number of threads */
2067
67.6k
    for(i = 0; i < ps_prms->max_num_ref; i++)
2068
54.1k
    {
2069
54.1k
        size = ps_prms->i4_num_proc_thrds * sizeof(mv_hist_t);
2070
54.1k
        if(mem_avail)
2071
27.0k
        {
2072
27.0k
            mv_hist_t *ps_mv_hist = (mv_hist_t *)ps_memtabs[count].pu1_mem;
2073
2074
27.0k
            ASSERT(size == ps_memtabs[count].size);
2075
2076
            /* divide the memory accross the threads */
2077
54.1k
            for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2078
27.0k
            {
2079
27.0k
                ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2080
27.0k
                ps_ctxt->aps_mv_hist[i] = ps_mv_hist;
2081
27.0k
                ps_mv_hist++;
2082
27.0k
            }
2083
27.0k
        }
2084
27.0k
        else
2085
27.0k
        {
2086
27.0k
            ps_memtabs[count].size = size;
2087
27.0k
            ps_memtabs[count].align = 8;
2088
27.0k
            ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
2089
27.0k
        }
2090
54.1k
        count++;
2091
54.1k
    }
2092
2093
    /* Memtabs : Search nodes for 8x8 blks */
2094
27.0k
    for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2095
13.5k
    {
2096
13.5k
        search_results_t *ps_search_results = NULL;
2097
2098
13.5k
        if(mem_avail)
2099
6.76k
        {
2100
6.76k
            ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2101
6.76k
        }
2102
2103
13.5k
        if(mem_avail)
2104
6.76k
        {
2105
6.76k
            ps_search_results = &ps_ctxt->s_search_results_8x8;
2106
6.76k
        }
2107
13.5k
        count += hme_alloc_init_search_nodes(
2108
13.5k
            ps_search_results,
2109
13.5k
            &ps_memtabs[count],
2110
13.5k
            mem_avail,
2111
13.5k
            ps_prms->max_num_ref,
2112
13.5k
            ps_prms->max_num_results);
2113
13.5k
    }
2114
2115
    /* Weighted inputs, one for each ref  */
2116
13.5k
    size = (ps_prms->max_num_ref + 1) * layer1_blk_width * layer1_blk_width *
2117
13.5k
           ps_prms->i4_num_proc_thrds;
2118
13.5k
    if(mem_avail)
2119
6.76k
    {
2120
6.76k
        U08 *pu1_mem;
2121
6.76k
        ASSERT(ps_memtabs[count].size == size);
2122
6.76k
        pu1_mem = ps_memtabs[count].pu1_mem;
2123
2124
13.5k
        for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2125
6.76k
        {
2126
6.76k
            ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2127
2128
40.6k
            for(i = 0; i < ps_prms->max_num_ref + 1; i++)
2129
33.8k
            {
2130
33.8k
                ps_ctxt->s_wt_pred.apu1_wt_inp_buf_array[i] = pu1_mem;
2131
33.8k
                pu1_mem += (layer1_blk_width * layer1_blk_width);
2132
33.8k
            }
2133
6.76k
        }
2134
6.76k
    }
2135
6.76k
    else
2136
6.76k
    {
2137
6.76k
        ps_memtabs[count].size = size;
2138
6.76k
        ps_memtabs[count].align = 16;
2139
6.76k
        ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
2140
6.76k
    }
2141
13.5k
    count++;
2142
2143
    /* if memory is allocated the intislaise the frm prms ptr to each thrd */
2144
13.5k
    if(mem_avail)
2145
6.76k
    {
2146
13.5k
        for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2147
6.76k
        {
2148
6.76k
            ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2149
2150
6.76k
            ps_ctxt->ps_hme_frm_prms = &ps_master_ctxt->s_frm_prms;
2151
6.76k
            ps_ctxt->ps_hme_ref_map = &ps_master_ctxt->s_ref_map;
2152
6.76k
        }
2153
6.76k
    }
2154
2155
    /* Memory for ihevce_me_optimised_function_list_t struct  */
2156
13.5k
    if(mem_avail)
2157
6.76k
    {
2158
6.76k
        ps_master_ctxt->pv_me_optimised_function_list = (void *)ps_memtabs[count++].pu1_mem;
2159
6.76k
    }
2160
6.76k
    else
2161
6.76k
    {
2162
6.76k
        ps_memtabs[count].size = sizeof(ihevce_me_optimised_function_list_t);
2163
6.76k
        ps_memtabs[count].align = 16;
2164
6.76k
        ps_memtabs[count++].e_mem_attr = HME_SCRATCH_OVLY_MEM;
2165
6.76k
    }
2166
2167
    //ASSERT(count < hme_enc_num_alloc());
2168
13.5k
    ASSERT(count < hme_coarse_num_alloc());
2169
13.5k
    return (count);
2170
13.5k
}
2171
2172
/*!
2173
******************************************************************************
2174
* \if Function name : ihevce_coarse_me_get_lyr_prms_dep_mngr \endif
2175
*
2176
* \brief Returns to the caller key attributes relevant for dependency manager,
2177
*        ie, the number of vertical units in each layer
2178
*
2179
* \par Description:
2180
*    This function requires the precondition that the width and ht of encode
2181
*    layer is known.
2182
*    The number of layers, number of vertical units in each layer, and for
2183
*    each vertial unit in each layer, its dependency on previous layer's units
2184
*    From ME's perspective, a vertical unit is one which is smallest min size
2185
*    vertically (and spans the entire row horizontally). This is CTB for encode
2186
*    layer, and 8x8 / 4x4 for non encode layers.
2187
*
2188
* \param[in] num_layers : Number of ME Layers
2189
* \param[in] pai4_ht    : Array storing ht at each layer
2190
* \param[in] pai4_wd    : Array storing wd at each layer
2191
* \param[out] pi4_num_vert_units_in_lyr : Array of size N (num layers), each
2192
*                     entry has num vertical units in that particular layer
2193
*
2194
* \return
2195
*    None
2196
*
2197
* \author
2198
*  Ittiam
2199
*
2200
*****************************************************************************
2201
*/
2202
void ihevce_coarse_me_get_lyr_prms_dep_mngr(
2203
    WORD32 num_layers, WORD32 *pai4_ht, WORD32 *pai4_wd, WORD32 *pai4_num_vert_units_in_lyr)
2204
13.5k
{
2205
    /* Height of current and next layers */
2206
13.5k
    WORD32 ht_c, ht_n;
2207
    /* Blk ht at a given layer and next layer*/
2208
13.5k
    WORD32 unit_ht_c, unit_ht_n, blk_ht_c, blk_ht_n;
2209
    /* Number of vertical units in current and next layer */
2210
13.5k
    WORD32 num_vert_c, num_vert_n;
2211
2212
13.5k
    WORD32 ctb_size = 64, num_enc_layers = 1, use_4x4 = 1, i;
2213
13.5k
    UWORD8 au1_encode[MAX_NUM_LAYERS];
2214
2215
13.5k
    memset(au1_encode, 0, num_layers);
2216
13.5k
    memset(au1_encode, 1, num_enc_layers);
2217
2218
13.5k
    ht_n = pai4_ht[num_layers - 2];
2219
13.5k
    ht_c = pai4_ht[num_layers - 1];
2220
2221
    /* compute blk ht and unit ht for c and n */
2222
13.5k
    if(au1_encode[num_layers - 1])
2223
0
    {
2224
0
        blk_ht_c = 16;
2225
0
        unit_ht_c = ctb_size;
2226
0
    }
2227
13.5k
    else
2228
13.5k
    {
2229
13.5k
        blk_ht_c = hme_get_blk_size(use_4x4, num_layers - 1, num_layers, 0);
2230
13.5k
        unit_ht_c = blk_ht_c;
2231
13.5k
    }
2232
2233
13.5k
    num_vert_c = (ht_c + unit_ht_c - 1) / unit_ht_c;
2234
    /* For new design in Coarsest HME layer we need */
2235
    /* one additional row extra at the end of frame */
2236
    /* hence num_vert_c is incremented by 1         */
2237
13.5k
    num_vert_c++;
2238
2239
    /*************************************************************************/
2240
    /* Run through each layer, set the number of vertical units              */
2241
    /*************************************************************************/
2242
42.2k
    for(i = num_layers - 1; i > 0; i--)
2243
28.7k
    {
2244
28.7k
        pai4_num_vert_units_in_lyr[i] = num_vert_c;
2245
2246
        /* "n" is computed for first time */
2247
28.7k
        ht_n = pai4_ht[i - 1];
2248
28.7k
        blk_ht_n = hme_get_blk_size(use_4x4, i - 1, num_layers, 0);
2249
28.7k
        unit_ht_n = blk_ht_n;
2250
28.7k
        if(au1_encode[i - 1])
2251
13.5k
            unit_ht_n = ctb_size;
2252
2253
28.7k
        num_vert_n = (ht_n + unit_ht_n - 1) / unit_ht_n;
2254
2255
        /* Compute the blk size and vert unit size in each layer             */
2256
        /* "c" denotes curr layer, and "n" denotes the layer to which result */
2257
        /* is projected to                                                   */
2258
28.7k
        ht_c = ht_n;
2259
28.7k
        blk_ht_c = blk_ht_n;
2260
28.7k
        unit_ht_c = unit_ht_n;
2261
28.7k
        num_vert_c = num_vert_n;
2262
28.7k
    }
2263
2264
    /* LAYER 0 OR ENCODE LAYER UPDATE : NO OUTPUT DEPS */
2265
    /* set the numebr of vertical units */
2266
13.5k
    pai4_num_vert_units_in_lyr[0] = num_vert_c;
2267
13.5k
}
2268
2269
/**
2270
********************************************************************************
2271
*  @fn     hme_coarse_dep_mngr_alloc_mem()
2272
*
2273
*  @brief  Requests memory for HME Dep Mngr
2274
*
2275
* \param[in,out]  ps_mem_tab : pointer to memory descriptors table
2276
* \param[in] ps_init_prms : Create time static parameters
2277
* \param[in] i4_mem_space : memspace in whihc memory request should be done
2278
*
2279
*  @return  number of memtabs
2280
********************************************************************************
2281
*/
2282
WORD32 hme_coarse_dep_mngr_alloc_mem(
2283
    iv_mem_rec_t *ps_mem_tab,
2284
    ihevce_static_cfg_params_t *ps_init_prms,
2285
    WORD32 i4_mem_space,
2286
    WORD32 i4_num_proc_thrds,
2287
    WORD32 i4_resolution_id)
2288
6.76k
{
2289
6.76k
    WORD32 ai4_num_vert_units_in_lyr[MAX_NUM_HME_LAYERS];
2290
6.76k
    WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS];
2291
6.76k
    WORD32 a_disp_wd[MAX_NUM_HME_LAYERS], a_disp_ht[MAX_NUM_HME_LAYERS];
2292
6.76k
    WORD32 n_enc_layers = 1, n_tot_layers, n_dep_tabs = 0, i;
2293
6.76k
    WORD32 min_cu_size;
2294
2295
    /* get the min cu size from config params */
2296
6.76k
    min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size;
2297
2298
6.76k
    min_cu_size = 1 << min_cu_size;
2299
2300
    /* Get the width and heights of different decomp layers */
2301
6.76k
    *a_wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
2302
6.76k
            SET_CTB_ALIGN(
2303
6.76k
                ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size);
2304
2305
6.76k
    *a_ht =
2306
6.76k
        ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
2307
6.76k
        SET_CTB_ALIGN(
2308
6.76k
            ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size);
2309
2310
6.76k
    n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
2311
6.76k
    ASSERT(n_tot_layers >= 3);
2312
2313
    /* --- Get the number of vartical units in each layer for dep. mngr -- */
2314
6.76k
    ihevce_coarse_me_get_lyr_prms_dep_mngr(
2315
6.76k
        n_tot_layers, &a_ht[0], &a_wd[0], &ai4_num_vert_units_in_lyr[0]);
2316
2317
    /* Fill memtabs for HME layers,except for L0 layer */
2318
21.1k
    for(i = 1; i < n_tot_layers; i++)
2319
14.3k
    {
2320
14.3k
        n_dep_tabs += ihevce_dmgr_get_mem_recs(
2321
14.3k
            &ps_mem_tab[n_dep_tabs],
2322
14.3k
            DEP_MNGR_ROW_ROW_SYNC,
2323
14.3k
            ai4_num_vert_units_in_lyr[i],
2324
14.3k
            1, /* Number of Col Tiles :  Not supported in PreEnc */
2325
14.3k
            i4_num_proc_thrds,
2326
14.3k
            i4_mem_space);
2327
14.3k
    }
2328
2329
6.76k
    ASSERT(n_dep_tabs <= hme_coarse_dep_mngr_num_alloc());
2330
2331
6.76k
    return (n_dep_tabs);
2332
6.76k
}
2333
2334
/**
2335
********************************************************************************
2336
*  @fn     hme_coarse_dep_mngr_init()
2337
*
2338
*  @brief  Assign memory for HME Dep Mngr
2339
*
2340
* \param[in,out]  ps_mem_tab : pointer to memory descriptors table
2341
* \param[in] ps_init_prms : Create time static parameters
2342
*  @param[in] pv_ctxt : ME ctxt
2343
* \param[in] pv_osal_handle : Osal handle
2344
*
2345
*  @return  number of memtabs
2346
********************************************************************************
2347
*/
2348
WORD32 hme_coarse_dep_mngr_init(
2349
    iv_mem_rec_t *ps_mem_tab,
2350
    ihevce_static_cfg_params_t *ps_init_prms,
2351
    void *pv_ctxt,
2352
    void *pv_osal_handle,
2353
    WORD32 i4_num_proc_thrds,
2354
    WORD32 i4_resolution_id)
2355
6.76k
{
2356
6.76k
    WORD32 ai4_num_vert_units_in_lyr[MAX_NUM_HME_LAYERS];
2357
6.76k
    WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS];
2358
6.76k
    WORD32 a_disp_wd[MAX_NUM_HME_LAYERS], a_disp_ht[MAX_NUM_HME_LAYERS];
2359
6.76k
    WORD32 n_enc_layers = 1, n_tot_layers, n_dep_tabs = 0, i;
2360
6.76k
    WORD32 min_cu_size;
2361
2362
6.76k
    coarse_me_master_ctxt_t *ps_me_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt;
2363
2364
    /* get the min cu size from config params */
2365
6.76k
    min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size;
2366
2367
6.76k
    min_cu_size = 1 << min_cu_size;
2368
2369
    /* Get the width and heights of different decomp layers */
2370
6.76k
    *a_wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
2371
6.76k
            SET_CTB_ALIGN(
2372
6.76k
                ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size);
2373
6.76k
    *a_ht =
2374
6.76k
        ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
2375
6.76k
        SET_CTB_ALIGN(
2376
6.76k
            ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size);
2377
2378
6.76k
    n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
2379
6.76k
    ASSERT(n_tot_layers >= 3);
2380
2381
    /* --- Get the number of vartical units in each layer for dep. mngr -- */
2382
6.76k
    ihevce_coarse_me_get_lyr_prms_dep_mngr(
2383
6.76k
        n_tot_layers, &a_ht[0], &a_wd[0], &ai4_num_vert_units_in_lyr[0]);
2384
2385
    /* --- HME sync Dep Mngr Mem init --    */
2386
21.1k
    for(i = 1; i < n_tot_layers; i++)
2387
14.3k
    {
2388
14.3k
        WORD32 num_blks_in_row, num_blks_in_pic, blk_size_shift;
2389
2390
14.3k
        if(i == (n_tot_layers - 1)) /* coarsest layer */
2391
6.76k
            blk_size_shift = 2;
2392
7.59k
        else
2393
7.59k
            blk_size_shift = 3; /* refine layers */
2394
2395
14.3k
        GET_NUM_BLKS_IN_PIC(a_wd[i], a_ht[i], blk_size_shift, num_blks_in_row, num_blks_in_pic);
2396
2397
        /* Coarsest layer : 1 block extra, since the last block */
2398
14.3k
        if(i == (n_tot_layers - 1)) /*  in a row needs East block */
2399
6.76k
            num_blks_in_row += 1;
2400
2401
        /* Note : i-1, only for HME layers, L0 is separate */
2402
14.3k
        ps_me_ctxt->apv_dep_mngr_hme_sync[i - 1] = ihevce_dmgr_init(
2403
14.3k
            &ps_mem_tab[n_dep_tabs],
2404
14.3k
            pv_osal_handle,
2405
14.3k
            DEP_MNGR_ROW_ROW_SYNC,
2406
14.3k
            ai4_num_vert_units_in_lyr[i],
2407
14.3k
            num_blks_in_row,
2408
14.3k
            1, /* Number of Col Tiles : Not supported in PreEnc */
2409
14.3k
            i4_num_proc_thrds,
2410
14.3k
            1 /*Sem disabled*/
2411
14.3k
        );
2412
2413
14.3k
        n_dep_tabs += ihevce_dmgr_get_num_mem_recs();
2414
14.3k
    }
2415
2416
6.76k
    return n_dep_tabs;
2417
6.76k
}
2418
2419
/**
2420
********************************************************************************
2421
*  @fn     hme_coarse_dep_mngr_reg_sem()
2422
*
2423
*  @brief  Assign semaphores for HME Dep Mngr
2424
*
2425
* \param[in] pv_me_ctxt : pointer to Coarse ME ctxt
2426
* \param[in] ppv_sem_hdls : Arry of semaphore handles
2427
* \param[in] i4_num_proc_thrds : Number of processing threads
2428
*
2429
*  @return  number of memtabs
2430
********************************************************************************
2431
*/
2432
void hme_coarse_dep_mngr_reg_sem(void *pv_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds)
2433
6.76k
{
2434
6.76k
    WORD32 i;
2435
6.76k
    coarse_me_master_ctxt_t *ps_me_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt;
2436
6.76k
    coarse_me_ctxt_t *ps_ctxt = ps_me_ctxt->aps_me_ctxt[0];
2437
2438
    /* --- HME sync Dep Mngr semaphore init --    */
2439
21.1k
    for(i = 1; i < ps_ctxt->num_layers; i++)
2440
14.3k
    {
2441
14.3k
        ihevce_dmgr_reg_sem_hdls(
2442
14.3k
            ps_me_ctxt->apv_dep_mngr_hme_sync[i - 1], ppv_sem_hdls, i4_num_proc_thrds);
2443
14.3k
    }
2444
2445
6.76k
    return;
2446
6.76k
}
2447
2448
/**
2449
********************************************************************************
2450
*  @fn     hme_coarse_dep_mngr_delete()
2451
*
2452
*    Destroy Coarse ME Dep Mngr module
2453
*   Note : Only Destroys the resources allocated in the module like
2454
*   semaphore,etc. Memory free is done Separately using memtabs
2455
*
2456
* \param[in] pv_me_ctxt : pointer to Coarse ME ctxt
2457
* \param[in] ps_init_prms : Create time static parameters
2458
*
2459
*  @return  none
2460
********************************************************************************
2461
*/
2462
void hme_coarse_dep_mngr_delete(
2463
    void *pv_me_ctxt, ihevce_static_cfg_params_t *ps_init_prms, WORD32 i4_resolution_id)
2464
6.76k
{
2465
6.76k
    WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS];
2466
6.76k
    WORD32 a_disp_wd[MAX_NUM_HME_LAYERS], a_disp_ht[MAX_NUM_HME_LAYERS];
2467
6.76k
    WORD32 n_enc_layers = 1, n_tot_layers, i;
2468
6.76k
    WORD32 min_cu_size;
2469
2470
6.76k
    coarse_me_master_ctxt_t *ps_me_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
2471
2472
    /* get the min cu size from config params */
2473
6.76k
    min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size;
2474
2475
6.76k
    min_cu_size = 1 << min_cu_size;
2476
2477
    /* Get the width and heights of different decomp layers */
2478
6.76k
    *a_wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
2479
6.76k
            SET_CTB_ALIGN(
2480
6.76k
                ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size);
2481
6.76k
    *a_ht =
2482
6.76k
        ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
2483
6.76k
        SET_CTB_ALIGN(
2484
6.76k
            ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size);
2485
6.76k
    n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
2486
6.76k
    ASSERT(n_tot_layers >= 3);
2487
2488
    /* --- HME sync Dep Mngr Delete --    */
2489
21.1k
    for(i = 1; i < n_tot_layers; i++)
2490
14.3k
    {
2491
        /* Note : i-1, only for HME layers, L0 is separate */
2492
14.3k
        ihevce_dmgr_del(ps_me_ctxt->apv_dep_mngr_hme_sync[i - 1]);
2493
14.3k
    }
2494
6.76k
}
2495
2496
/**
2497
*******************************************************************************
2498
*  @fn     S32 hme_enc_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
2499
*
2500
*  @brief  Fills up memtabs with memory information details required by HME
2501
*
2502
*  @param[out] ps_memtabs : Pointre to an array of memtabs where module fills
2503
*              up its requirements of memory
2504
*
2505
*  @param[in] ps_prms : Input parameters to module crucial in calculating reqd
2506
*                       amt of memory
2507
*
2508
*  @return   Number of memtabs required
2509
*******************************************************************************
2510
*/
2511
S32 hme_enc_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms, WORD32 i4_num_me_frm_pllel)
2512
6.76k
{
2513
6.76k
    S32 num, tot, i;
2514
2515
    /* Validation of init params */
2516
6.76k
    if(-1 == hme_validate_init_prms(ps_prms))
2517
0
        return (-1);
2518
2519
6.76k
    num = hme_enc_alloc_init_mem(ps_memtabs, ps_prms, NULL, 0, i4_num_me_frm_pllel);
2520
6.76k
    tot = hme_enc_num_alloc(i4_num_me_frm_pllel);
2521
2.02M
    for(i = num; i < tot; i++)
2522
2.01M
    {
2523
2.01M
        ps_memtabs[i].size = 4;
2524
2.01M
        ps_memtabs[i].align = 4;
2525
2.01M
        ps_memtabs[i].e_mem_attr = HME_PERSISTENT_MEM;
2526
2.01M
    }
2527
6.76k
    return (tot);
2528
6.76k
}
2529
2530
/**
2531
*******************************************************************************
2532
*  @fn     S32 hme_coarse_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
2533
*
2534
*  @brief  Fills up memtabs with memory information details required by Coarse HME
2535
*
2536
*  @param[out] ps_memtabs : Pointre to an array of memtabs where module fills
2537
*              up its requirements of memory
2538
*
2539
*  @param[in] ps_prms : Input parameters to module crucial in calculating reqd
2540
*                       amt of memory
2541
*
2542
*  @return   Number of memtabs required
2543
*******************************************************************************
2544
*/
2545
S32 hme_coarse_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
2546
6.76k
{
2547
6.76k
    S32 num, tot, i;
2548
2549
    /* Validation of init params */
2550
6.76k
    if(-1 == hme_validate_init_prms(ps_prms))
2551
0
        return (-1);
2552
2553
6.76k
    num = hme_coarse_alloc_init_mem(ps_memtabs, ps_prms, NULL, 0);
2554
6.76k
    tot = hme_coarse_num_alloc();
2555
1.74M
    for(i = num; i < tot; i++)
2556
1.73M
    {
2557
1.73M
        ps_memtabs[i].size = 4;
2558
1.73M
        ps_memtabs[i].align = 4;
2559
1.73M
        ps_memtabs[i].e_mem_attr = HME_PERSISTENT_MEM;
2560
1.73M
    }
2561
6.76k
    return (tot);
2562
6.76k
}
2563
2564
/**
2565
*******************************************************************************
2566
*  @fn hme_coarse_dep_mngr_alloc
2567
*
2568
*  @brief  Fills up memtabs with memory information details required by Coarse HME
2569
*
2570
* \param[in,out]  ps_mem_tab : pointer to memory descriptors table
2571
* \param[in] ps_init_prms : Create time static parameters
2572
* \param[in] i4_mem_space : memspace in whihc memory request should be done
2573
*
2574
*  @return   Number of memtabs required
2575
*******************************************************************************
2576
*/
2577
WORD32 hme_coarse_dep_mngr_alloc(
2578
    iv_mem_rec_t *ps_mem_tab,
2579
    ihevce_static_cfg_params_t *ps_init_prms,
2580
    WORD32 i4_mem_space,
2581
    WORD32 i4_num_proc_thrds,
2582
    WORD32 i4_resolution_id)
2583
6.76k
{
2584
6.76k
    S32 num, tot, i;
2585
2586
6.76k
    num = hme_coarse_dep_mngr_alloc_mem(
2587
6.76k
        ps_mem_tab, ps_init_prms, i4_mem_space, i4_num_proc_thrds, i4_resolution_id);
2588
6.76k
    tot = hme_coarse_dep_mngr_num_alloc();
2589
57.5k
    for(i = num; i < tot; i++)
2590
50.8k
    {
2591
50.8k
        ps_mem_tab[i].i4_mem_size = 4;
2592
50.8k
        ps_mem_tab[i].i4_mem_alignment = 4;
2593
50.8k
        ps_mem_tab[i].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
2594
50.8k
    }
2595
6.76k
    return (tot);
2596
6.76k
}
2597
2598
/**
2599
********************************************************************************
2600
*  @fn     hme_coarse_init_ctxt()
2601
*
2602
*  @brief  initialise context memory
2603
*
2604
*  @param[in] ps_prms : init prms
2605
*
2606
*  @param[in] pv_ctxt : ME ctxt
2607
*
2608
*  @return  number of memtabs
2609
********************************************************************************
2610
*/
2611
void hme_coarse_init_ctxt(coarse_me_master_ctxt_t *ps_master_ctxt, hme_init_prms_t *ps_prms)
2612
6.76k
{
2613
6.76k
    S32 i, j, num_thrds;
2614
6.76k
    coarse_me_ctxt_t *ps_ctxt;
2615
6.76k
    S32 num_rows_coarse;
2616
2617
    /* initialise the parameters inot context of all threads */
2618
13.5k
    for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
2619
6.76k
    {
2620
6.76k
        ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2621
2622
        /* Copy the init prms to context */
2623
6.76k
        ps_ctxt->s_init_prms = *ps_prms;
2624
2625
        /* Initialize some other variables in ctxt */
2626
6.76k
        ps_ctxt->i4_prev_poc = -1;
2627
2628
6.76k
        ps_ctxt->num_b_frms = ps_prms->num_b_frms;
2629
2630
6.76k
        ps_ctxt->apu1_ref_bits_tlu_lc[0] = &ps_ctxt->au1_ref_bits_tlu_lc[0][0];
2631
6.76k
        ps_ctxt->apu1_ref_bits_tlu_lc[1] = &ps_ctxt->au1_ref_bits_tlu_lc[1][0];
2632
2633
        /* Initialize num rows lookuptable */
2634
6.76k
        ps_ctxt->i4_num_row_bufs = ps_prms->i4_num_proc_thrds + 1;
2635
6.76k
        num_rows_coarse = ps_ctxt->i4_num_row_bufs;
2636
927k
        for(i = 0; i < ((HEVCE_MAX_HEIGHT >> 1) >> 2); i++)
2637
920k
        {
2638
920k
            ps_ctxt->ai4_row_index[i] = (i % num_rows_coarse);
2639
920k
        }
2640
6.76k
    }
2641
2642
    /* since same layer desc pointer is stored in all the threads ctxt */
2643
    /* layer init is done only using 0th thread ctxt                   */
2644
6.76k
    ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
2645
2646
    /* Initialize all layers descriptors to have -1 = poc meaning unfilled */
2647
47.3k
    for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
2648
40.6k
    {
2649
126k
        for(j = 1; j < ps_ctxt->num_layers; j++)
2650
86.1k
        {
2651
86.1k
            layer_ctxt_t *ps_layer;
2652
86.1k
            ps_layer = ps_ctxt->as_ref_descr[i].aps_layers[j];
2653
86.1k
            ps_layer->i4_poc = -1;
2654
86.1k
            ps_layer->ppu1_list_inp = &ps_ctxt->apu1_list_inp[j][0];
2655
86.1k
            memset(
2656
86.1k
                ps_layer->s_global_mv, 0, sizeof(hme_mv_t) * ps_ctxt->max_num_ref * NUM_GMV_LOBES);
2657
86.1k
        }
2658
40.6k
    }
2659
6.76k
}
2660
2661
/**
2662
********************************************************************************
2663
*  @fn     hme_enc_init_ctxt()
2664
*
2665
*  @brief  initialise context memory
2666
*
2667
*  @param[in] ps_prms : init prms
2668
*
2669
*  @param[in] pv_ctxt : ME ctxt
2670
*
2671
*  @return  number of memtabs
2672
********************************************************************************
2673
*/
2674
void hme_enc_init_ctxt(
2675
    me_master_ctxt_t *ps_master_ctxt, hme_init_prms_t *ps_prms, rc_quant_t *ps_rc_quant_ctxt)
2676
6.76k
{
2677
6.76k
    S32 i, j, num_thrds;
2678
6.76k
    me_ctxt_t *ps_ctxt;
2679
6.76k
    me_frm_ctxt_t *ps_frm_ctxt;
2680
2681
    /* initialise the parameters in context of all threads */
2682
13.5k
    for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
2683
6.76k
    {
2684
6.76k
        ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2685
        /* Store Tile params base into ME context */
2686
6.76k
        ps_ctxt->pv_tile_params_base = ps_master_ctxt->pv_tile_params_base;
2687
2688
13.5k
        for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
2689
6.76k
        {
2690
6.76k
            ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
2691
2692
            /* Copy the init prms to context */
2693
6.76k
            ps_ctxt->s_init_prms = *ps_prms;
2694
2695
            /* Initialize some other variables in ctxt */
2696
6.76k
            ps_frm_ctxt->i4_prev_poc = INVALID_POC;
2697
2698
6.76k
            ps_frm_ctxt->log_ctb_size = ps_prms->log_ctb_size;
2699
2700
6.76k
            ps_frm_ctxt->num_b_frms = ps_prms->num_b_frms;
2701
2702
6.76k
            ps_frm_ctxt->i4_is_prev_frame_reference = 0;
2703
2704
6.76k
            ps_frm_ctxt->ps_rc_quant_ctxt = ps_rc_quant_ctxt;
2705
2706
            /* Initialize mv grids for L0 and L1 used in final refinement layer */
2707
6.76k
            {
2708
6.76k
                hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid[0]);
2709
6.76k
                hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid[1]);
2710
6.76k
                hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_fpel[0]);
2711
6.76k
                hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_fpel[1]);
2712
6.76k
                hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_qpel[0]);
2713
6.76k
                hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_qpel[1]);
2714
6.76k
            }
2715
2716
6.76k
            ps_frm_ctxt->apu1_ref_bits_tlu_lc[0] = &ps_frm_ctxt->au1_ref_bits_tlu_lc[0][0];
2717
6.76k
            ps_frm_ctxt->apu1_ref_bits_tlu_lc[1] = &ps_frm_ctxt->au1_ref_bits_tlu_lc[1][0];
2718
6.76k
        }
2719
6.76k
    }
2720
2721
    /* since same layer desc pointer is stored in all the threads ctxt */
2722
    /* layer init is done only using 0th thread ctxt                   */
2723
6.76k
    ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
2724
2725
6.76k
    ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[0];
2726
2727
    /* Initialize all layers descriptors to have -1 = poc meaning unfilled */
2728
13.5k
    for(i = 0; i < (ps_frm_ctxt->max_num_ref * ps_master_ctxt->i4_num_me_frm_pllel) + 1; i++)
2729
6.76k
    {
2730
        /* only enocde layer is processed */
2731
13.5k
        for(j = 0; j < 1; j++)
2732
6.76k
        {
2733
6.76k
            layer_ctxt_t *ps_layer;
2734
6.76k
            ps_layer = ps_ctxt->as_ref_descr[i].aps_layers[j];
2735
6.76k
            ps_layer->i4_poc = INVALID_POC;
2736
6.76k
            ps_layer->i4_is_free = 1;
2737
6.76k
            ps_layer->ppu1_list_inp = &ps_frm_ctxt->apu1_list_inp[j][0];
2738
6.76k
            ps_layer->ppu1_list_rec_fxfy = &ps_frm_ctxt->apu1_list_rec_fxfy[j][0];
2739
6.76k
            ps_layer->ppu1_list_rec_hxfy = &ps_frm_ctxt->apu1_list_rec_hxfy[j][0];
2740
6.76k
            ps_layer->ppu1_list_rec_fxhy = &ps_frm_ctxt->apu1_list_rec_fxhy[j][0];
2741
6.76k
            ps_layer->ppu1_list_rec_hxhy = &ps_frm_ctxt->apu1_list_rec_hxhy[j][0];
2742
6.76k
            ps_layer->ppv_dep_mngr_recon = &ps_frm_ctxt->apv_list_dep_mngr[j][0];
2743
2744
6.76k
            memset(
2745
6.76k
                ps_layer->s_global_mv,
2746
6.76k
                0,
2747
6.76k
                sizeof(hme_mv_t) * ps_frm_ctxt->max_num_ref * NUM_GMV_LOBES);
2748
6.76k
        }
2749
6.76k
    }
2750
6.76k
}
2751
2752
/**
2753
*******************************************************************************
2754
*  @fn     S32 hme_enc_init(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms,rc_quant_t *ps_rc_quant_ctxt)
2755
*
2756
*  @brief  Initialises the Encode Layer HME ctxt
2757
*
2758
*  @param[out] ps_memtabs : Pointer to an array of memtabs where module fills
2759
*              up its requirements of memory
2760
*
2761
*  @param[in] ps_prms : Input parameters to module crucial in calculating reqd
2762
*                       amt of memory
2763
*
2764
*  @return   Number of memtabs required
2765
*******************************************************************************
2766
*/
2767
S32 hme_enc_init(
2768
    void *pv_ctxt,
2769
    hme_memtab_t *ps_memtabs,
2770
    hme_init_prms_t *ps_prms,
2771
    rc_quant_t *ps_rc_quant_ctxt,
2772
    WORD32 i4_num_me_frm_pllel)
2773
6.76k
{
2774
6.76k
    S32 num, tot;
2775
6.76k
    me_master_ctxt_t *ps_ctxt = (me_master_ctxt_t *)pv_ctxt;
2776
2777
6.76k
    tot = hme_enc_num_alloc(i4_num_me_frm_pllel);
2778
    /* Validation of init params */
2779
6.76k
    if(-1 == hme_validate_init_prms(ps_prms))
2780
0
        return (-1);
2781
2782
6.76k
    num = hme_enc_alloc_init_mem(ps_memtabs, ps_prms, pv_ctxt, 1, i4_num_me_frm_pllel);
2783
6.76k
    if(num > tot)
2784
0
        return (-1);
2785
2786
    /* Initialize all enumerations based globals */
2787
    //hme_init_globals(); /* done as part of coarse me */
2788
2789
    /* Copy the memtabs into the context for returning during free */
2790
6.76k
    memcpy(ps_ctxt->as_memtabs, ps_memtabs, sizeof(hme_memtab_t) * tot);
2791
2792
    /* initialize the context and related buffers */
2793
6.76k
    hme_enc_init_ctxt(ps_ctxt, ps_prms, ps_rc_quant_ctxt);
2794
6.76k
    return (0);
2795
6.76k
}
2796
2797
/**
2798
*******************************************************************************
2799
*  @fn     S32 hme_coarse_init(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
2800
*
2801
*  @brief  Initialises the Coarse HME ctxt
2802
*
2803
*  @param[out] ps_memtabs : Pointer to an array of memtabs where module fills
2804
*              up its requirements of memory
2805
*
2806
*  @param[in] ps_prms : Input parameters to module crucial in calculating reqd
2807
*                       amt of memory
2808
*
2809
*  @return   Number of memtabs required
2810
*******************************************************************************
2811
*/
2812
S32 hme_coarse_init(void *pv_ctxt, hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
2813
6.76k
{
2814
6.76k
    S32 num, tot;
2815
6.76k
    coarse_me_master_ctxt_t *ps_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt;
2816
2817
6.76k
    tot = hme_coarse_num_alloc();
2818
    /* Validation of init params */
2819
6.76k
    if(-1 == hme_validate_init_prms(ps_prms))
2820
0
        return (-1);
2821
2822
6.76k
    num = hme_coarse_alloc_init_mem(ps_memtabs, ps_prms, pv_ctxt, 1);
2823
6.76k
    if(num > tot)
2824
0
        return (-1);
2825
2826
    /* Initialize all enumerations based globals */
2827
6.76k
    hme_init_globals();
2828
2829
    /* Copy the memtabs into the context for returning during free */
2830
6.76k
    memcpy(ps_ctxt->as_memtabs, ps_memtabs, sizeof(hme_memtab_t) * tot);
2831
2832
    /* initialize the context and related buffers */
2833
6.76k
    hme_coarse_init_ctxt(ps_ctxt, ps_prms);
2834
2835
6.76k
    return (0);
2836
6.76k
}
2837
2838
/**
2839
*******************************************************************************
2840
*  @fn     S32 hme_set_resolution(void *pv_me_ctxt,
2841
*                                   S32 n_enc_layers,
2842
*                                   S32 *p_wd,
2843
*                                   S32 *p_ht
2844
*
2845
*  @brief  Sets up the layers based on resolution information.
2846
*
2847
*  @param[in, out] pv_me_ctxt : ME handle, updated with the resolution info
2848
*
2849
*  @param[in] n_enc_layers : Number of layers encoded
2850
*
2851
*  @param[in] p_wd : Pointer to an array having widths for each encode layer
2852
*
2853
*  @param[in] p_ht : Pointer to an array having heights for each encode layer
2854
*
2855
*  @return   void
2856
*******************************************************************************
2857
*/
2858
2859
void hme_set_resolution(void *pv_me_ctxt, S32 n_enc_layers, S32 *p_wd, S32 *p_ht, S32 me_frm_id)
2860
6.76k
{
2861
6.76k
    S32 n_tot_layers, num_layers_explicit_search, i, j;
2862
6.76k
    me_ctxt_t *ps_thrd_ctxt;
2863
6.76k
    me_frm_ctxt_t *ps_ctxt;
2864
2865
6.76k
    S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS];
2866
6.76k
    S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
2867
6.76k
    memcpy(a_wd, p_wd, n_enc_layers * sizeof(S32));
2868
6.76k
    memcpy(a_ht, p_ht, n_enc_layers * sizeof(S32));
2869
2870
6.76k
    ps_thrd_ctxt = (me_ctxt_t *)pv_me_ctxt;
2871
2872
6.76k
    ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[me_frm_id];
2873
2874
    /*************************************************************************/
2875
    /* Derive the number of HME layers, including both encoded and non encode*/
2876
    /* This function also derives the width and ht of each layer.            */
2877
    /*************************************************************************/
2878
6.76k
    n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
2879
6.76k
    num_layers_explicit_search = ps_thrd_ctxt->s_init_prms.num_layers_explicit_search;
2880
6.76k
    if(num_layers_explicit_search <= 0)
2881
0
        num_layers_explicit_search = n_tot_layers - 1;
2882
2883
6.76k
    num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
2884
6.76k
    ps_ctxt->num_layers_explicit_search = num_layers_explicit_search;
2885
6.76k
    memset(ps_ctxt->u1_encode, 0, n_tot_layers);
2886
6.76k
    memset(ps_ctxt->u1_encode, 1, n_enc_layers);
2887
2888
    /* only encode layer should be processed */
2889
6.76k
    ps_ctxt->num_layers = n_tot_layers;
2890
2891
6.76k
    ps_ctxt->i4_wd = a_wd[0];
2892
6.76k
    ps_ctxt->i4_ht = a_ht[0];
2893
2894
    /* Memtabs : Layers * num-ref + 1 */
2895
40.6k
    for(i = 0; i < ps_ctxt->max_num_ref + 1; i++)
2896
33.8k
    {
2897
67.6k
        for(j = 0; j < 1; j++)
2898
33.8k
        {
2899
33.8k
            S32 wd, ht;
2900
33.8k
            layer_ctxt_t *ps_layer;
2901
33.8k
            U08 u1_enc = ps_ctxt->u1_encode[j];
2902
33.8k
            wd = a_wd[j];
2903
33.8k
            ht = a_ht[j];
2904
33.8k
            ps_layer = ps_thrd_ctxt->as_ref_descr[i].aps_layers[j];
2905
33.8k
            hme_set_layer_res_attrs(ps_layer, wd, ht, a_disp_wd[j], a_disp_ht[j], u1_enc);
2906
33.8k
        }
2907
33.8k
    }
2908
6.76k
}
2909
2910
/**
2911
*******************************************************************************
2912
*  @fn     S32 hme_coarse_set_resolution(void *pv_me_ctxt,
2913
*                                   S32 n_enc_layers,
2914
*                                   S32 *p_wd,
2915
*                                   S32 *p_ht
2916
*
2917
*  @brief  Sets up the layers based on resolution information.
2918
*
2919
*  @param[in, out] pv_me_ctxt : ME handle, updated with the resolution info
2920
*
2921
*  @param[in] n_enc_layers : Number of layers encoded
2922
*
2923
*  @param[in] p_wd : Pointer to an array having widths for each encode layer
2924
*
2925
*  @param[in] p_ht : Pointer to an array having heights for each encode layer
2926
*
2927
*  @return   void
2928
*******************************************************************************
2929
*/
2930
2931
void hme_coarse_set_resolution(void *pv_me_ctxt, S32 n_enc_layers, S32 *p_wd, S32 *p_ht)
2932
6.76k
{
2933
6.76k
    S32 n_tot_layers, num_layers_explicit_search, i, j;
2934
6.76k
    coarse_me_ctxt_t *ps_ctxt;
2935
6.76k
    S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS];
2936
6.76k
    S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
2937
6.76k
    memcpy(a_wd, p_wd, n_enc_layers * sizeof(S32));
2938
6.76k
    memcpy(a_ht, p_ht, n_enc_layers * sizeof(S32));
2939
2940
6.76k
    ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
2941
    /*************************************************************************/
2942
    /* Derive the number of HME layers, including both encoded and non encode*/
2943
    /* This function also derives the width and ht of each layer.            */
2944
    /*************************************************************************/
2945
6.76k
    n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
2946
6.76k
    num_layers_explicit_search = ps_ctxt->s_init_prms.num_layers_explicit_search;
2947
6.76k
    if(num_layers_explicit_search <= 0)
2948
0
        num_layers_explicit_search = n_tot_layers - 1;
2949
2950
6.76k
    num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
2951
6.76k
    ps_ctxt->num_layers_explicit_search = num_layers_explicit_search;
2952
6.76k
    memset(ps_ctxt->u1_encode, 0, n_tot_layers);
2953
6.76k
    memset(ps_ctxt->u1_encode, 1, n_enc_layers);
2954
2955
    /* encode layer should be excluded */
2956
6.76k
    ps_ctxt->num_layers = n_tot_layers;
2957
2958
6.76k
    memcpy(ps_ctxt->a_wd, a_wd, sizeof(S32) * n_tot_layers);
2959
6.76k
    memcpy(ps_ctxt->a_ht, a_ht, sizeof(S32) * n_tot_layers);
2960
2961
    /* Memtabs : Layers * num-ref + 1 */
2962
47.3k
    for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
2963
40.6k
    {
2964
126k
        for(j = 1; j < n_tot_layers; j++)
2965
86.1k
        {
2966
86.1k
            S32 wd, ht;
2967
86.1k
            layer_ctxt_t *ps_layer;
2968
86.1k
            U08 u1_enc = ps_ctxt->u1_encode[j];
2969
86.1k
            wd = a_wd[j];
2970
86.1k
            ht = a_ht[j];
2971
86.1k
            ps_layer = ps_ctxt->as_ref_descr[i].aps_layers[j];
2972
86.1k
            hme_set_layer_res_attrs(ps_layer, wd, ht, a_disp_wd[j], a_disp_ht[j], u1_enc);
2973
86.1k
        }
2974
40.6k
    }
2975
6.76k
}
2976
2977
S32 hme_find_descr_idx(me_ctxt_t *ps_ctxt, S32 i4_poc, S32 i4_idr_gop_num, S32 i4_num_me_frm_pllel)
2978
236k
{
2979
236k
    S32 i;
2980
2981
680k
    for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
2982
680k
    {
2983
680k
        if(ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc == i4_poc &&
2984
254k
           ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_idr_gop_num == i4_idr_gop_num)
2985
236k
            return i;
2986
680k
    }
2987
    /* Should not come here */
2988
0
    ASSERT(0);
2989
0
    return (-1);
2990
0
}
2991
2992
S32 hme_coarse_find_descr_idx(coarse_me_ctxt_t *ps_ctxt, S32 i4_poc)
2993
225k
{
2994
225k
    S32 i;
2995
2996
682k
    for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
2997
682k
    {
2998
682k
        if(ps_ctxt->as_ref_descr[i].aps_layers[1]->i4_poc == i4_poc)
2999
225k
            return i;
3000
682k
    }
3001
    /* Should not come here */
3002
0
    ASSERT(0);
3003
0
    return (-1);
3004
0
}
3005
3006
S32 hme_find_free_descr_idx(me_ctxt_t *ps_ctxt, S32 i4_num_me_frm_pllel)
3007
95.3k
{
3008
95.3k
    S32 i;
3009
3010
264k
    for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
3011
264k
    {
3012
264k
        if(ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_free == 1)
3013
95.3k
        {
3014
95.3k
            ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_free = 0;
3015
95.3k
            return i;
3016
95.3k
        }
3017
264k
    }
3018
    /* Should not come here */
3019
0
    ASSERT(0);
3020
0
    return (-1);
3021
0
}
3022
3023
S32 hme_coarse_find_free_descr_idx(void *pv_ctxt)
3024
95.3k
{
3025
95.3k
    S32 i;
3026
3027
95.3k
    coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_ctxt;
3028
3029
268k
    for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
3030
268k
    {
3031
268k
        if(ps_ctxt->as_ref_descr[i].aps_layers[1]->i4_poc == -1)
3032
95.3k
            return i;
3033
268k
    }
3034
    /* Should not come here */
3035
0
    ASSERT(0);
3036
0
    return (-1);
3037
0
}
3038
3039
void hme_discard_frm(
3040
    void *pv_me_ctxt, S32 *p_pocs_to_remove, S32 i4_idr_gop_num, S32 i4_num_me_frm_pllel)
3041
95.3k
{
3042
95.3k
    me_ctxt_t *ps_ctxt = (me_ctxt_t *)pv_me_ctxt;
3043
95.3k
    S32 count = 0, idx, i;
3044
95.3k
    layers_descr_t *ps_descr;
3045
3046
    /* Search for the id of the layer descriptor that has this poc */
3047
184k
    while(p_pocs_to_remove[count] != INVALID_POC)
3048
88.7k
    {
3049
88.7k
        ASSERT(count == 0);
3050
88.7k
        idx = hme_find_descr_idx(
3051
88.7k
            ps_ctxt, p_pocs_to_remove[count], i4_idr_gop_num, i4_num_me_frm_pllel);
3052
88.7k
        ps_descr = &ps_ctxt->as_ref_descr[idx];
3053
        /*********************************************************************/
3054
        /* Setting i4_is_free = 1 in all layers invalidates this layer ctxt        */
3055
        /* Now this can be used for a fresh picture.                         */
3056
        /*********************************************************************/
3057
177k
        for(i = 0; i < 1; i++)
3058
88.7k
        {
3059
88.7k
            ps_descr->aps_layers[i]->i4_is_free = 1;
3060
88.7k
        }
3061
88.7k
        count++;
3062
88.7k
    }
3063
95.3k
}
3064
3065
void hme_coarse_discard_frm(void *pv_me_ctxt, S32 *p_pocs_to_remove)
3066
95.3k
{
3067
95.3k
    coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
3068
95.3k
    S32 count = 0, idx, i;
3069
95.3k
    layers_descr_t *ps_descr;
3070
3071
    /* Search for the id of the layer descriptor that has this poc */
3072
173k
    while(p_pocs_to_remove[count] != -1)
3073
78.4k
    {
3074
78.4k
        idx = hme_coarse_find_descr_idx(ps_ctxt, p_pocs_to_remove[count]);
3075
78.4k
        ps_descr = &ps_ctxt->as_ref_descr[idx];
3076
        /*********************************************************************/
3077
        /* Setting poc = -1 in all layers invalidates this layer ctxt        */
3078
        /* Now this can be used for a fresh picture.                         */
3079
        /*********************************************************************/
3080
235k
        for(i = 1; i < ps_ctxt->num_layers; i++)
3081
157k
        {
3082
157k
            ps_descr->aps_layers[i]->i4_poc = -1;
3083
157k
        }
3084
78.4k
        count++;
3085
78.4k
    }
3086
95.3k
}
3087
3088
void hme_update_layer_desc(
3089
    layers_descr_t *ps_layers_desc,
3090
    hme_ref_desc_t *ps_ref_desc,
3091
    S32 start_lyr_id,
3092
    S32 num_layers,
3093
    layers_descr_t *ps_curr_desc)
3094
294k
{
3095
294k
    layer_ctxt_t *ps_layer_ctxt, *ps_curr_layer;
3096
294k
    S32 i;
3097
590k
    for(i = start_lyr_id; i < num_layers; i++)
3098
295k
    {
3099
295k
        ps_layer_ctxt = ps_layers_desc->aps_layers[i];
3100
295k
        ps_curr_layer = ps_curr_desc->aps_layers[i];
3101
3102
295k
        ps_layer_ctxt->i4_poc = ps_ref_desc->i4_poc;
3103
295k
        ps_layer_ctxt->i4_idr_gop_num = ps_ref_desc->i4_GOP_num;
3104
3105
        /* Copy the recon planes for the given reference pic at given layer */
3106
295k
        ps_layer_ctxt->pu1_rec_fxfy = ps_ref_desc->as_ref_info[i].pu1_rec_fxfy;
3107
295k
        ps_layer_ctxt->pu1_rec_hxfy = ps_ref_desc->as_ref_info[i].pu1_rec_hxfy;
3108
295k
        ps_layer_ctxt->pu1_rec_fxhy = ps_ref_desc->as_ref_info[i].pu1_rec_fxhy;
3109
295k
        ps_layer_ctxt->pu1_rec_hxhy = ps_ref_desc->as_ref_info[i].pu1_rec_hxhy;
3110
3111
        /*********************************************************************/
3112
        /* reconstruction strides, offsets and padding info are copied for   */
3113
        /* this reference pic. It is assumed that these will be same across  */
3114
        /* pics, so even the current pic has this info updated, though the   */
3115
        /* current pic still does not have valid recon pointers.             */
3116
        /*********************************************************************/
3117
295k
        ps_layer_ctxt->i4_rec_stride = ps_ref_desc->as_ref_info[i].luma_stride;
3118
295k
        ps_layer_ctxt->i4_rec_offset = ps_ref_desc->as_ref_info[i].luma_offset;
3119
295k
        ps_layer_ctxt->i4_pad_x_rec = ps_ref_desc->as_ref_info[i].u1_pad_x;
3120
295k
        ps_layer_ctxt->i4_pad_y_rec = ps_ref_desc->as_ref_info[i].u1_pad_y;
3121
3122
295k
        ps_curr_layer->i4_rec_stride = ps_ref_desc->as_ref_info[i].luma_stride;
3123
295k
        ps_curr_layer->i4_pad_x_rec = ps_ref_desc->as_ref_info[i].u1_pad_x;
3124
295k
        ps_curr_layer->i4_pad_y_rec = ps_ref_desc->as_ref_info[i].u1_pad_y;
3125
295k
    }
3126
294k
}
3127
3128
void hme_add_inp(void *pv_me_ctxt, hme_inp_desc_t *ps_inp_desc, S32 me_frm_id, S32 i4_thrd_id)
3129
95.3k
{
3130
95.3k
    layers_descr_t *ps_desc;
3131
95.3k
    layer_ctxt_t *ps_layer_ctxt;
3132
95.3k
    me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
3133
95.3k
    me_ctxt_t *ps_thrd_ctxt;
3134
95.3k
    me_frm_ctxt_t *ps_ctxt;
3135
3136
95.3k
    hme_inp_buf_attr_t *ps_attr;
3137
95.3k
    S32 i4_poc, idx, i, i4_prev_poc;
3138
95.3k
    S32 num_thrds, prev_me_frm_id;
3139
95.3k
    S32 i4_idr_gop_num, i4_is_reference;
3140
3141
    /* since same layer desc pointer is stored in all thread ctxt */
3142
    /* a free idx is obtained using 0th thread ctxt pointer */
3143
3144
95.3k
    ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[i4_thrd_id];
3145
3146
95.3k
    ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[me_frm_id];
3147
3148
    /* Deriving the previous poc from previous frames context */
3149
95.3k
    if(me_frm_id == 0)
3150
95.3k
        prev_me_frm_id = (MAX_NUM_ME_PARALLEL - 1);
3151
0
    else
3152
0
        prev_me_frm_id = me_frm_id - 1;
3153
3154
95.3k
    i4_prev_poc = ps_thrd_ctxt->aps_me_frm_prms[prev_me_frm_id]->i4_curr_poc;
3155
3156
    /* Obtain an empty layer descriptor */
3157
95.3k
    idx = hme_find_free_descr_idx(ps_thrd_ctxt, ps_master_ctxt->i4_num_me_frm_pllel);
3158
95.3k
    ps_desc = &ps_thrd_ctxt->as_ref_descr[idx];
3159
3160
    /* initialise the parameters for all the threads */
3161
190k
    for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
3162
95.3k
    {
3163
95.3k
        me_frm_ctxt_t *ps_tmp_frm_ctxt;
3164
3165
95.3k
        ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
3166
95.3k
        ps_tmp_frm_ctxt = ps_thrd_ctxt->aps_me_frm_prms[me_frm_id];
3167
3168
95.3k
        ps_tmp_frm_ctxt->ps_curr_descr = &ps_thrd_ctxt->as_ref_descr[idx];
3169
3170
        /* Do the initialization for the first thread alone */
3171
95.3k
        i4_poc = ps_inp_desc->i4_poc;
3172
95.3k
        i4_idr_gop_num = ps_inp_desc->i4_idr_gop_num;
3173
95.3k
        i4_is_reference = ps_inp_desc->i4_is_reference;
3174
        /*Update poc id of previously encoded frm and curr frm */
3175
95.3k
        ps_tmp_frm_ctxt->i4_prev_poc = i4_prev_poc;
3176
95.3k
        ps_tmp_frm_ctxt->i4_curr_poc = i4_poc;
3177
95.3k
    }
3178
3179
    /* since same layer desc pointer is stored in all thread ctxt */
3180
    /* following processing is done using 0th thread ctxt pointer */
3181
95.3k
    ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[0];
3182
3183
    /* only encode layer */
3184
190k
    for(i = 0; i < 1; i++)
3185
95.3k
    {
3186
95.3k
        ps_layer_ctxt = ps_desc->aps_layers[i];
3187
95.3k
        ps_attr = &ps_inp_desc->s_layer_desc[i];
3188
3189
95.3k
        ps_layer_ctxt->i4_poc = i4_poc;
3190
95.3k
        ps_layer_ctxt->i4_idr_gop_num = i4_idr_gop_num;
3191
95.3k
        ps_layer_ctxt->i4_is_reference = i4_is_reference;
3192
95.3k
        ps_layer_ctxt->i4_non_ref_free = 0;
3193
3194
        /* If this layer is encoded, copy input attributes */
3195
95.3k
        if(ps_ctxt->u1_encode[i])
3196
95.3k
        {
3197
95.3k
            ps_layer_ctxt->pu1_inp = ps_attr->pu1_y;
3198
95.3k
            ps_layer_ctxt->i4_inp_stride = ps_attr->luma_stride;
3199
95.3k
            ps_layer_ctxt->i4_pad_x_inp = 0;
3200
95.3k
            ps_layer_ctxt->i4_pad_y_inp = 0;
3201
95.3k
        }
3202
0
        else
3203
0
        {
3204
            /* If not encoded, then ME owns the buffer.*/
3205
0
            S32 wd, dst_stride;
3206
3207
0
            ASSERT(i != 0);
3208
3209
0
            wd = ps_ctxt->i4_wd;
3210
3211
            /* destination has padding on either side of 16 */
3212
0
            dst_stride = CEIL16((wd >> 1)) + 32 + 4;
3213
0
            ps_layer_ctxt->i4_inp_stride = dst_stride;
3214
0
        }
3215
95.3k
    }
3216
3217
95.3k
    return;
3218
95.3k
}
3219
3220
void hme_coarse_add_inp(void *pv_me_ctxt, hme_inp_desc_t *ps_inp_desc, WORD32 i4_curr_idx)
3221
95.3k
{
3222
95.3k
    layers_descr_t *ps_desc;
3223
95.3k
    layer_ctxt_t *ps_layer_ctxt;
3224
95.3k
    coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
3225
95.3k
    coarse_me_ctxt_t *ps_ctxt;
3226
95.3k
    hme_inp_buf_attr_t *ps_attr;
3227
95.3k
    S32 i4_poc, i;
3228
95.3k
    S32 num_thrds;
3229
3230
    /* since same layer desc pointer is stored in all thread ctxt */
3231
    /* a free idx is obtained using 0th thread ctxt pointer */
3232
95.3k
    ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
3233
3234
95.3k
    ps_desc = &ps_ctxt->as_ref_descr[i4_curr_idx];
3235
3236
    /* initialise the parameters for all the threads */
3237
190k
    for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
3238
95.3k
    {
3239
95.3k
        ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
3240
95.3k
        ps_ctxt->ps_curr_descr = &ps_ctxt->as_ref_descr[i4_curr_idx];
3241
95.3k
        i4_poc = ps_inp_desc->i4_poc;
3242
3243
        /*Update poc id of previously encoded frm and curr frm */
3244
95.3k
        ps_ctxt->i4_prev_poc = ps_ctxt->i4_curr_poc;
3245
95.3k
        ps_ctxt->i4_curr_poc = i4_poc;
3246
95.3k
    }
3247
3248
    /* since same layer desc pointer is stored in all thread ctxt */
3249
    /* following processing is done using 0th thread ctxt pointer */
3250
95.3k
    ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
3251
3252
    /* only non encode layer */
3253
287k
    for(i = 1; i < ps_ctxt->num_layers; i++)
3254
192k
    {
3255
192k
        ps_layer_ctxt = ps_desc->aps_layers[i];
3256
192k
        ps_attr = &ps_inp_desc->s_layer_desc[i];
3257
3258
192k
        ps_layer_ctxt->i4_poc = i4_poc;
3259
        /* If this layer is encoded, copy input attributes */
3260
192k
        if(ps_ctxt->u1_encode[i])
3261
0
        {
3262
0
            ps_layer_ctxt->pu1_inp = ps_attr->pu1_y;
3263
0
            ps_layer_ctxt->i4_inp_stride = ps_attr->luma_stride;
3264
0
            ps_layer_ctxt->i4_pad_x_inp = 0;
3265
0
            ps_layer_ctxt->i4_pad_y_inp = 0;
3266
0
        }
3267
192k
        else
3268
192k
        {
3269
            /* If not encoded, then ME owns the buffer.           */
3270
            /* decomp of lower layers happens on a seperate pass  */
3271
            /* Coarse Me should export the pointers to the caller */
3272
192k
            S32 wd, dst_stride;
3273
3274
192k
            ASSERT(i != 0);
3275
3276
192k
            wd = ps_ctxt->a_wd[i - 1];
3277
3278
            /* destination has padding on either side of 16 */
3279
192k
            dst_stride = CEIL16((wd >> 1)) + 32 + 4;
3280
192k
            ps_layer_ctxt->i4_inp_stride = dst_stride;
3281
192k
        }
3282
192k
    }
3283
95.3k
}
3284
3285
static __inline U08 hme_determine_num_results_per_part(
3286
    U08 u1_layer_id, U08 u1_num_layers, ME_QUALITY_PRESETS_T e_quality_preset)
3287
288k
{
3288
288k
    U08 u1_num_results_per_part = MAX_RESULTS_PER_PART;
3289
3290
288k
    if((u1_layer_id == 0) && !!RESTRICT_NUM_PARTITION_LEVEL_L0ME_RESULTS_TO_1)
3291
127k
    {
3292
127k
        switch(e_quality_preset)
3293
127k
        {
3294
36.1k
        case ME_XTREME_SPEED_25:
3295
53.0k
        case ME_XTREME_SPEED:
3296
60.4k
        case ME_HIGH_SPEED:
3297
81.2k
        case ME_MEDIUM_SPEED:
3298
90.2k
        case ME_HIGH_QUALITY:
3299
127k
        case ME_PRISTINE_QUALITY:
3300
127k
        {
3301
127k
            u1_num_results_per_part = 1;
3302
3303
127k
            break;
3304
90.2k
        }
3305
0
        default:
3306
0
        {
3307
0
            u1_num_results_per_part = MAX_RESULTS_PER_PART;
3308
3309
0
            break;
3310
90.2k
        }
3311
127k
        }
3312
127k
    }
3313
160k
    else if((u1_layer_id == 1) && !!RESTRICT_NUM_PARTITION_LEVEL_L1ME_RESULTS_TO_1)
3314
158k
    {
3315
158k
        switch(e_quality_preset)
3316
158k
        {
3317
49.5k
        case ME_XTREME_SPEED_25:
3318
61.6k
        case ME_HIGH_QUALITY:
3319
105k
        case ME_PRISTINE_QUALITY:
3320
105k
        {
3321
105k
            u1_num_results_per_part = 1;
3322
3323
105k
            break;
3324
61.6k
        }
3325
53.7k
        default:
3326
53.7k
        {
3327
53.7k
            u1_num_results_per_part = MAX_RESULTS_PER_PART;
3328
3329
53.7k
            break;
3330
61.6k
        }
3331
158k
        }
3332
158k
    }
3333
1.98k
    else if((u1_layer_id == 2) && (u1_num_layers > 3) && !!RESTRICT_NUM_PARTITION_LEVEL_L2ME_RESULTS_TO_1)
3334
1.98k
    {
3335
1.98k
        switch(e_quality_preset)
3336
1.98k
        {
3337
248
        case ME_XTREME_SPEED_25:
3338
362
        case ME_XTREME_SPEED:
3339
512
        case ME_HIGH_SPEED:
3340
731
        case ME_MEDIUM_SPEED:
3341
731
        {
3342
731
            u1_num_results_per_part = 1;
3343
3344
731
            break;
3345
512
        }
3346
1.25k
        default:
3347
1.25k
        {
3348
1.25k
            u1_num_results_per_part = MAX_RESULTS_PER_PART;
3349
3350
1.25k
            break;
3351
512
        }
3352
1.98k
        }
3353
1.98k
    }
3354
3355
288k
    return u1_num_results_per_part;
3356
288k
}
3357
3358
static __inline void hme_max_search_cands_per_search_cand_loc_populator(
3359
    hme_frm_prms_t *ps_frm_prms,
3360
    U08 *pu1_num_fpel_search_cands,
3361
    U08 u1_layer_id,
3362
    ME_QUALITY_PRESETS_T e_quality_preset)
3363
288k
{
3364
288k
    if(0 == u1_layer_id)
3365
127k
    {
3366
127k
        S32 i;
3367
3368
1.65M
        for(i = 0; i < NUM_SEARCH_CAND_LOCATIONS; i++)
3369
1.52M
        {
3370
1.52M
            switch(e_quality_preset)
3371
1.52M
            {
3372
0
#if RESTRICT_NUM_SEARCH_CANDS_PER_SEARCH_CAND_LOC
3373
433k
            case ME_XTREME_SPEED_25:
3374
637k
            case ME_XTREME_SPEED:
3375
725k
            case ME_HIGH_SPEED:
3376
974k
            case ME_MEDIUM_SPEED:
3377
974k
            {
3378
974k
                pu1_num_fpel_search_cands[i] = 1;
3379
3380
974k
                break;
3381
725k
            }
3382
0
#endif
3383
551k
            default:
3384
551k
            {
3385
551k
                pu1_num_fpel_search_cands[i] =
3386
551k
                    MAX(2,
3387
551k
                        MAX(ps_frm_prms->u1_num_active_ref_l0, ps_frm_prms->u1_num_active_ref_l1) *
3388
551k
                            ((COLOCATED == (SEARCH_CAND_LOCATIONS_T)i) + 1));
3389
3390
551k
                break;
3391
725k
            }
3392
1.52M
            }
3393
1.52M
        }
3394
127k
    }
3395
288k
}
3396
3397
static __inline U08
3398
    hme_determine_max_2nx2n_tu_recur_cands(U08 u1_layer_id, ME_QUALITY_PRESETS_T e_quality_preset)
3399
288k
{
3400
288k
    U08 u1_num_cands = 2;
3401
3402
288k
    if((u1_layer_id == 0) && !!RESTRICT_NUM_2NX2N_TU_RECUR_CANDS)
3403
127k
    {
3404
127k
        switch(e_quality_preset)
3405
127k
        {
3406
36.1k
        case ME_XTREME_SPEED_25:
3407
53.0k
        case ME_XTREME_SPEED:
3408
60.4k
        case ME_HIGH_SPEED:
3409
81.2k
        case ME_MEDIUM_SPEED:
3410
81.2k
        {
3411
81.2k
            u1_num_cands = 1;
3412
3413
81.2k
            break;
3414
60.4k
        }
3415
45.9k
        default:
3416
45.9k
        {
3417
45.9k
            u1_num_cands = 2;
3418
3419
45.9k
            break;
3420
60.4k
        }
3421
127k
        }
3422
127k
    }
3423
3424
288k
    return u1_num_cands;
3425
288k
}
3426
3427
static __inline U08
3428
    hme_determine_max_num_fpel_refine_centers(U08 u1_layer_id, ME_QUALITY_PRESETS_T e_quality_preset)
3429
288k
{
3430
288k
    U08 i;
3431
3432
288k
    U08 u1_num_centers = 0;
3433
3434
288k
    if(0 == u1_layer_id)
3435
127k
    {
3436
127k
        switch(e_quality_preset)
3437
127k
        {
3438
36.1k
        case ME_XTREME_SPEED_25:
3439
36.1k
        {
3440
650k
            for(i = 0; i < TOT_NUM_PARTS; i++)
3441
614k
            {
3442
614k
                u1_num_centers += gau1_num_best_results_XS25[i];
3443
614k
            }
3444
3445
36.1k
            break;
3446
0
        }
3447
16.9k
        case ME_XTREME_SPEED:
3448
16.9k
        {
3449
304k
            for(i = 0; i < TOT_NUM_PARTS; i++)
3450
287k
            {
3451
287k
                u1_num_centers += gau1_num_best_results_XS[i];
3452
287k
            }
3453
3454
16.9k
            break;
3455
0
        }
3456
7.39k
        case ME_HIGH_SPEED:
3457
7.39k
        {
3458
133k
            for(i = 0; i < TOT_NUM_PARTS; i++)
3459
125k
            {
3460
125k
                u1_num_centers += gau1_num_best_results_HS[i];
3461
125k
            }
3462
3463
7.39k
            break;
3464
0
        }
3465
20.7k
        case ME_MEDIUM_SPEED:
3466
20.7k
        {
3467
372k
            for(i = 0; i < TOT_NUM_PARTS; i++)
3468
352k
            {
3469
352k
                u1_num_centers += gau1_num_best_results_MS[i];
3470
352k
            }
3471
3472
20.7k
            break;
3473
0
        }
3474
9.02k
        case ME_HIGH_QUALITY:
3475
9.02k
        {
3476
162k
            for(i = 0; i < TOT_NUM_PARTS; i++)
3477
153k
            {
3478
153k
                u1_num_centers += gau1_num_best_results_HQ[i];
3479
153k
            }
3480
3481
9.02k
            break;
3482
0
        }
3483
36.9k
        case ME_PRISTINE_QUALITY:
3484
36.9k
        {
3485
664k
            for(i = 0; i < TOT_NUM_PARTS; i++)
3486
627k
            {
3487
627k
                u1_num_centers += gau1_num_best_results_PQ[i];
3488
627k
            }
3489
3490
36.9k
            break;
3491
0
        }
3492
127k
        }
3493
127k
    }
3494
3495
288k
    return u1_num_centers;
3496
288k
}
3497
3498
static __inline U08 hme_determine_max_num_subpel_refine_centers(
3499
    U08 u1_layer_id, U08 u1_max_2Nx2N_subpel_cands, U08 u1_max_NxN_subpel_cands)
3500
288k
{
3501
288k
    U08 u1_num_centers = 0;
3502
3503
288k
    if(0 == u1_layer_id)
3504
127k
    {
3505
127k
        u1_num_centers += u1_max_2Nx2N_subpel_cands + 4 * u1_max_NxN_subpel_cands;
3506
127k
    }
3507
3508
288k
    return u1_num_centers;
3509
288k
}
3510
3511
void hme_set_refine_prms(
3512
    void *pv_refine_prms,
3513
    U08 u1_encode,
3514
    S32 num_ref,
3515
    S32 layer_id,
3516
    S32 num_layers,
3517
    S32 num_layers_explicit_search,
3518
    S32 use_4x4,
3519
    hme_frm_prms_t *ps_frm_prms,
3520
    double **ppd_intra_costs,
3521
    me_coding_params_t *ps_me_coding_tools)
3522
288k
{
3523
288k
    refine_prms_t *ps_refine_prms = (refine_prms_t *)pv_refine_prms;
3524
3525
288k
    ps_refine_prms->i4_encode = u1_encode;
3526
288k
    ps_refine_prms->bidir_enabled = ps_frm_prms->bidir_enabled;
3527
288k
    ps_refine_prms->i4_layer_id = layer_id;
3528
    /*************************************************************************/
3529
    /* Refinement layers have two lambdas, one for closed loop, another for  */
3530
    /* open loop. Non encode layers use only open loop lambda.               */
3531
    /*************************************************************************/
3532
288k
    ps_refine_prms->lambda_inp = ps_frm_prms->i4_ol_sad_lambda_qf;
3533
288k
    ps_refine_prms->lambda_recon = ps_frm_prms->i4_cl_sad_lambda_qf;
3534
288k
    ps_refine_prms->lambda_q_shift = ps_frm_prms->lambda_q_shift;
3535
288k
    ps_refine_prms->lambda_inp =
3536
288k
        ((float)ps_refine_prms->lambda_inp) * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f;
3537
288k
    ps_refine_prms->lambda_recon =
3538
288k
        ((float)ps_refine_prms->lambda_recon) * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f;
3539
3540
288k
    if((u1_encode) && (NULL != ppd_intra_costs))
3541
63.5k
    {
3542
63.5k
        ps_refine_prms->pd_intra_costs = ppd_intra_costs[layer_id];
3543
63.5k
    }
3544
3545
    /* Explicit or implicit depends on number of layers having eplicit search */
3546
288k
    if((layer_id == 0) || (num_layers - layer_id > num_layers_explicit_search))
3547
127k
    {
3548
127k
        ps_refine_prms->explicit_ref = 0;
3549
127k
        ps_refine_prms->i4_num_ref_fpel = MIN(2, num_ref);
3550
127k
    }
3551
160k
    else
3552
160k
    {
3553
160k
        ps_refine_prms->explicit_ref = 1;
3554
160k
        ps_refine_prms->i4_num_ref_fpel = num_ref;
3555
160k
    }
3556
3557
288k
    ps_refine_prms->e_search_complexity = SEARCH_CX_HIGH;
3558
3559
288k
    ps_refine_prms->i4_num_steps_hpel_refine = ps_me_coding_tools->i4_num_steps_hpel_refine;
3560
288k
    ps_refine_prms->i4_num_steps_qpel_refine = ps_me_coding_tools->i4_num_steps_qpel_refine;
3561
3562
288k
    if(u1_encode)
3563
127k
    {
3564
127k
        ps_refine_prms->i4_num_mvbank_results = 1;
3565
127k
        ps_refine_prms->i4_use_rec_in_fpel = 1;
3566
127k
        ps_refine_prms->i4_num_steps_fpel_refine = 1;
3567
3568
127k
        if(ps_me_coding_tools->e_me_quality_presets == ME_PRISTINE_QUALITY)
3569
36.9k
        {
3570
36.9k
            ps_refine_prms->i4_num_fpel_results = 4;
3571
36.9k
            ps_refine_prms->i4_num_32x32_merge_results = 4;
3572
36.9k
            ps_refine_prms->i4_num_64x64_merge_results = 4;
3573
36.9k
            ps_refine_prms->i4_num_steps_post_refine_fpel = 3;
3574
36.9k
            ps_refine_prms->i4_use_satd_subpel = 1;
3575
36.9k
            ps_refine_prms->u1_max_subpel_candts_2Nx2N = 2;
3576
36.9k
            ps_refine_prms->u1_max_subpel_candts_NxN = 1;
3577
36.9k
            ps_refine_prms->u1_subpel_candt_threshold = 1;
3578
36.9k
            ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3579
36.9k
            ps_refine_prms->pu1_num_best_results = gau1_num_best_results_PQ;
3580
36.9k
            ps_refine_prms->limit_active_partitions = 0;
3581
36.9k
        }
3582
90.2k
        else if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_QUALITY)
3583
9.02k
        {
3584
9.02k
            ps_refine_prms->i4_num_fpel_results = 4;
3585
9.02k
            ps_refine_prms->i4_num_32x32_merge_results = 4;
3586
9.02k
            ps_refine_prms->i4_num_64x64_merge_results = 4;
3587
9.02k
            ps_refine_prms->i4_num_steps_post_refine_fpel = 3;
3588
9.02k
            ps_refine_prms->i4_use_satd_subpel = 1;
3589
9.02k
            ps_refine_prms->u1_max_subpel_candts_2Nx2N = 2;
3590
9.02k
            ps_refine_prms->u1_max_subpel_candts_NxN = 1;
3591
9.02k
            ps_refine_prms->u1_subpel_candt_threshold = 2;
3592
9.02k
            ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3593
9.02k
            ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HQ;
3594
9.02k
            ps_refine_prms->limit_active_partitions = 0;
3595
9.02k
        }
3596
81.2k
        else if(ps_me_coding_tools->e_me_quality_presets == ME_MEDIUM_SPEED)
3597
20.7k
        {
3598
20.7k
            ps_refine_prms->i4_num_fpel_results = 1;
3599
20.7k
            ps_refine_prms->i4_num_32x32_merge_results = 2;
3600
20.7k
            ps_refine_prms->i4_num_64x64_merge_results = 2;
3601
20.7k
            ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3602
20.7k
            ps_refine_prms->i4_use_satd_subpel = 1;
3603
20.7k
            ps_refine_prms->u1_max_subpel_candts_2Nx2N = 2;
3604
20.7k
            ps_refine_prms->u1_max_subpel_candts_NxN = 1;
3605
20.7k
            ps_refine_prms->u1_subpel_candt_threshold = 3;
3606
20.7k
            ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3607
20.7k
            ps_refine_prms->pu1_num_best_results = gau1_num_best_results_MS;
3608
20.7k
            ps_refine_prms->limit_active_partitions = 1;
3609
20.7k
        }
3610
60.4k
        else if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_SPEED)
3611
7.39k
        {
3612
7.39k
            ps_refine_prms->i4_num_fpel_results = 1;
3613
7.39k
            ps_refine_prms->i4_num_32x32_merge_results = 2;
3614
7.39k
            ps_refine_prms->i4_num_64x64_merge_results = 2;
3615
7.39k
            ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3616
7.39k
            ps_refine_prms->u1_max_subpel_candts_2Nx2N = 1;
3617
7.39k
            ps_refine_prms->u1_max_subpel_candts_NxN = 1;
3618
7.39k
            ps_refine_prms->i4_use_satd_subpel = 0;
3619
7.39k
            ps_refine_prms->u1_subpel_candt_threshold = 0;
3620
7.39k
            ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3621
7.39k
            ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HS;
3622
7.39k
            ps_refine_prms->limit_active_partitions = 1;
3623
7.39k
        }
3624
53.0k
        else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED)
3625
16.9k
        {
3626
16.9k
            ps_refine_prms->i4_num_fpel_results = 1;
3627
16.9k
            ps_refine_prms->i4_num_32x32_merge_results = 2;
3628
16.9k
            ps_refine_prms->i4_num_64x64_merge_results = 2;
3629
16.9k
            ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3630
16.9k
            ps_refine_prms->i4_use_satd_subpel = 0;
3631
16.9k
            ps_refine_prms->u1_max_subpel_candts_2Nx2N = 1;
3632
16.9k
            ps_refine_prms->u1_max_subpel_candts_NxN = 0;
3633
16.9k
            ps_refine_prms->u1_subpel_candt_threshold = 0;
3634
16.9k
            ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3635
16.9k
            ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS;
3636
16.9k
            ps_refine_prms->limit_active_partitions = 1;
3637
16.9k
        }
3638
36.1k
        else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED_25)
3639
36.1k
        {
3640
36.1k
            ps_refine_prms->i4_num_fpel_results = 1;
3641
36.1k
            ps_refine_prms->i4_num_32x32_merge_results = 2;
3642
36.1k
            ps_refine_prms->i4_num_64x64_merge_results = 2;
3643
36.1k
            ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3644
36.1k
            ps_refine_prms->i4_use_satd_subpel = 0;
3645
36.1k
            ps_refine_prms->u1_max_subpel_candts_2Nx2N = 1;
3646
36.1k
            ps_refine_prms->u1_max_subpel_candts_NxN = 0;
3647
36.1k
            ps_refine_prms->u1_subpel_candt_threshold = 0;
3648
36.1k
            ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
3649
36.1k
            ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS25;
3650
36.1k
            ps_refine_prms->limit_active_partitions = 1;
3651
36.1k
        }
3652
127k
    }
3653
160k
    else
3654
160k
    {
3655
160k
        ps_refine_prms->i4_num_fpel_results = 2;
3656
160k
        ps_refine_prms->i4_use_rec_in_fpel = 0;
3657
160k
        ps_refine_prms->i4_num_steps_fpel_refine = 1;
3658
160k
        ps_refine_prms->i4_num_steps_hpel_refine = 0;
3659
160k
        ps_refine_prms->i4_num_steps_qpel_refine = 0;
3660
3661
160k
        if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_SPEED)
3662
10.0k
        {
3663
10.0k
            ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3664
10.0k
            ps_refine_prms->i4_use_satd_subpel = 1;
3665
10.0k
            ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
3666
10.0k
            ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HS;
3667
10.0k
        }
3668
150k
        else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED)
3669
19.6k
        {
3670
19.6k
            ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3671
19.6k
            ps_refine_prms->i4_use_satd_subpel = 0;
3672
19.6k
            ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
3673
19.6k
            ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS;
3674
19.6k
        }
3675
131k
        else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED_25)
3676
49.7k
        {
3677
49.7k
            ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3678
49.7k
            ps_refine_prms->i4_use_satd_subpel = 0;
3679
49.7k
            ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
3680
49.7k
            ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS25;
3681
49.7k
        }
3682
81.3k
        else if(ps_me_coding_tools->e_me_quality_presets == ME_PRISTINE_QUALITY)
3683
44.4k
        {
3684
44.4k
            ps_refine_prms->i4_num_steps_post_refine_fpel = 2;
3685
44.4k
            ps_refine_prms->i4_use_satd_subpel = 1;
3686
44.4k
            ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3687
44.4k
            ps_refine_prms->pu1_num_best_results = gau1_num_best_results_PQ;
3688
44.4k
        }
3689
36.9k
        else if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_QUALITY)
3690
12.4k
        {
3691
12.4k
            ps_refine_prms->i4_num_steps_post_refine_fpel = 2;
3692
12.4k
            ps_refine_prms->i4_use_satd_subpel = 1;
3693
12.4k
            ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3694
12.4k
            ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HQ;
3695
12.4k
        }
3696
24.4k
        else if(ps_me_coding_tools->e_me_quality_presets == ME_MEDIUM_SPEED)
3697
24.4k
        {
3698
24.4k
            ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3699
24.4k
            ps_refine_prms->i4_use_satd_subpel = 1;
3700
24.4k
            ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
3701
24.4k
            ps_refine_prms->pu1_num_best_results = gau1_num_best_results_MS;
3702
24.4k
        }
3703
3704
        /* Following fields unused in the non-encode layers */
3705
        /* But setting the same to default values           */
3706
160k
        ps_refine_prms->i4_num_32x32_merge_results = 4;
3707
160k
        ps_refine_prms->i4_num_64x64_merge_results = 4;
3708
3709
160k
        if(!ps_frm_prms->bidir_enabled)
3710
140k
        {
3711
140k
            ps_refine_prms->limit_active_partitions = 0;
3712
140k
        }
3713
20.2k
        else
3714
20.2k
        {
3715
20.2k
            ps_refine_prms->limit_active_partitions = 1;
3716
20.2k
        }
3717
160k
    }
3718
3719
288k
    ps_refine_prms->i4_enable_4x4_part =
3720
288k
        hme_get_mv_blk_size(use_4x4, layer_id, num_layers, u1_encode);
3721
3722
288k
    if(!ps_me_coding_tools->u1_l0_me_controlled_via_cmd_line)
3723
288k
    {
3724
288k
        ps_refine_prms->i4_num_results_per_part = hme_determine_num_results_per_part(
3725
288k
            layer_id, num_layers, ps_me_coding_tools->e_me_quality_presets);
3726
3727
288k
        hme_max_search_cands_per_search_cand_loc_populator(
3728
288k
            ps_frm_prms,
3729
288k
            ps_refine_prms->au1_num_fpel_search_cands,
3730
288k
            layer_id,
3731
288k
            ps_me_coding_tools->e_me_quality_presets);
3732
3733
288k
        ps_refine_prms->u1_max_2nx2n_tu_recur_cands = hme_determine_max_2nx2n_tu_recur_cands(
3734
288k
            layer_id, ps_me_coding_tools->e_me_quality_presets);
3735
3736
288k
        ps_refine_prms->u1_max_num_fpel_refine_centers = hme_determine_max_num_fpel_refine_centers(
3737
288k
            layer_id, ps_me_coding_tools->e_me_quality_presets);
3738
3739
288k
        ps_refine_prms->u1_max_num_subpel_refine_centers =
3740
288k
            hme_determine_max_num_subpel_refine_centers(
3741
288k
                layer_id,
3742
288k
                ps_refine_prms->u1_max_subpel_candts_2Nx2N,
3743
288k
                ps_refine_prms->u1_max_subpel_candts_NxN);
3744
288k
    }
3745
0
    else
3746
0
    {
3747
0
        if(0 == layer_id)
3748
0
        {
3749
0
            ps_refine_prms->i4_num_results_per_part =
3750
0
                ps_me_coding_tools->u1_num_results_per_part_in_l0me;
3751
0
        }
3752
0
        else if(1 == layer_id)
3753
0
        {
3754
0
            ps_refine_prms->i4_num_results_per_part =
3755
0
                ps_me_coding_tools->u1_num_results_per_part_in_l1me;
3756
0
        }
3757
0
        else if((2 == layer_id) && (num_layers > 3))
3758
0
        {
3759
0
            ps_refine_prms->i4_num_results_per_part =
3760
0
                ps_me_coding_tools->u1_num_results_per_part_in_l2me;
3761
0
        }
3762
0
        else
3763
0
        {
3764
0
            ps_refine_prms->i4_num_results_per_part = hme_determine_num_results_per_part(
3765
0
                layer_id, num_layers, ps_me_coding_tools->e_me_quality_presets);
3766
0
        }
3767
3768
0
        memset(
3769
0
            ps_refine_prms->au1_num_fpel_search_cands,
3770
0
            ps_me_coding_tools->u1_max_num_coloc_cands,
3771
0
            sizeof(ps_refine_prms->au1_num_fpel_search_cands));
3772
3773
0
        ps_refine_prms->u1_max_2nx2n_tu_recur_cands =
3774
0
            ps_me_coding_tools->u1_max_2nx2n_tu_recur_cands;
3775
3776
0
        ps_refine_prms->u1_max_num_fpel_refine_centers =
3777
0
            ps_me_coding_tools->u1_max_num_fpel_refine_centers;
3778
3779
0
        ps_refine_prms->u1_max_num_subpel_refine_centers =
3780
0
            ps_me_coding_tools->u1_max_num_subpel_refine_centers;
3781
0
    }
3782
3783
288k
    if(layer_id != 0)
3784
160k
    {
3785
160k
        ps_refine_prms->i4_num_mvbank_results = ps_refine_prms->i4_num_results_per_part;
3786
160k
    }
3787
3788
    /* 4 * lambda */
3789
288k
    ps_refine_prms->sdi_threshold =
3790
288k
        (ps_refine_prms->lambda_recon + (1 << (ps_frm_prms->lambda_q_shift - 1))) >>
3791
288k
        (ps_frm_prms->lambda_q_shift - 2);
3792
3793
288k
    ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb =
3794
288k
        MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON && ps_frm_prms->u1_is_cu_qp_delta_enabled;
3795
288k
}
3796
3797
void hme_set_ctb_boundary_attrs(ctb_boundary_attrs_t *ps_attrs, S32 num_8x8_horz, S32 num_8x8_vert)
3798
70.9k
{
3799
70.9k
    S32 cu_16x16_valid_flag = 0, merge_pattern_x, merge_pattern_y;
3800
70.9k
    S32 blk, blk_x, blk_y;
3801
70.9k
    S32 num_16x16_horz, num_16x16_vert;
3802
70.9k
    blk_ctb_attrs_t *ps_blk_attrs = &ps_attrs->as_blk_attrs[0];
3803
3804
70.9k
    num_16x16_horz = (num_8x8_horz + 1) >> 1;
3805
70.9k
    num_16x16_vert = (num_8x8_vert + 1) >> 1;
3806
70.9k
    ps_attrs->u1_num_blks_in_ctb = (U08)(num_16x16_horz * num_16x16_vert);
3807
3808
    /*************************************************************************/
3809
    /* Run through each blk assuming all 16x16 CUs valid. The order would be */
3810
    /* 0   1   4   5                                                         */
3811
    /* 2   3   6   7                                                         */
3812
    /* 8   9   12  13                                                        */
3813
    /* 10  11  14  15                                                        */
3814
    /* Out of these some may not be valid. For example, if num_16x16_horz is */
3815
    /* 2 and num_16x16_vert is 4, then right 2 columns not valid. In this    */
3816
    /* case, blks 8-11 get encoding number of 4-7. Further, the variable     */
3817
    /* cu_16x16_valid_flag will be 1111 0000 1111 0000. Also, the variable   */
3818
    /* u1_merge_to_32x32_flag will be 1010, and u1_merge_to_64x64_flag 0     */
3819
    /*************************************************************************/
3820
1.20M
    for(blk = 0; blk < 16; blk++)
3821
1.13M
    {
3822
1.13M
        U08 u1_blk_8x8_mask = 0xF;
3823
1.13M
        blk_x = gau1_encode_to_raster_x[blk];
3824
1.13M
        blk_y = gau1_encode_to_raster_y[blk];
3825
1.13M
        if((blk_x >= num_16x16_horz) || (blk_y >= num_16x16_vert))
3826
68.7k
        {
3827
68.7k
            continue;
3828
68.7k
        }
3829
3830
        /* The CU at encode location blk is valid */
3831
1.06M
        cu_16x16_valid_flag |= (1 << blk);
3832
1.06M
        ps_blk_attrs->u1_blk_id_in_full_ctb = blk;
3833
1.06M
        ps_blk_attrs->u1_blk_x = blk_x;
3834
1.06M
        ps_blk_attrs->u1_blk_y = blk_y;
3835
3836
        /* Disable blks 1 and 3 if the 16x16 blk overshoots on rt border */
3837
1.06M
        if(((blk_x << 1) + 2) > num_8x8_horz)
3838
15.1k
            u1_blk_8x8_mask &= 0x5;
3839
        /* Disable blks 2 and 3 if the 16x16 blk overshoots on bot border */
3840
1.06M
        if(((blk_y << 1) + 2) > num_8x8_vert)
3841
10.6k
            u1_blk_8x8_mask &= 0x3;
3842
1.06M
        ps_blk_attrs->u1_blk_8x8_mask = u1_blk_8x8_mask;
3843
1.06M
        ps_blk_attrs++;
3844
1.06M
    }
3845
3846
70.9k
    ps_attrs->cu_16x16_valid_flag = cu_16x16_valid_flag;
3847
3848
    /* 32x32 merge is logical combination of what merge is possible          */
3849
    /* horizontally as well as vertically.                                   */
3850
70.9k
    if(num_8x8_horz < 4)
3851
4.02k
        merge_pattern_x = 0x0;
3852
66.9k
    else if(num_8x8_horz < 8)
3853
577
        merge_pattern_x = 0x5;
3854
66.3k
    else
3855
66.3k
        merge_pattern_x = 0xF;
3856
3857
70.9k
    if(num_8x8_vert < 4)
3858
1.48k
        merge_pattern_y = 0x0;
3859
69.5k
    else if(num_8x8_vert < 8)
3860
1.76k
        merge_pattern_y = 0x3;
3861
67.7k
    else
3862
67.7k
        merge_pattern_y = 0xF;
3863
3864
70.9k
    ps_attrs->u1_merge_to_32x32_flag = (U08)(merge_pattern_x & merge_pattern_y);
3865
3866
    /* Do not attempt 64x64 merge if any blk invalid */
3867
70.9k
    if(ps_attrs->u1_merge_to_32x32_flag != 0xF)
3868
7.41k
        ps_attrs->u1_merge_to_64x64_flag = 0;
3869
63.5k
    else
3870
63.5k
        ps_attrs->u1_merge_to_64x64_flag = 1;
3871
70.9k
}
3872
3873
void hme_set_ctb_attrs(ctb_boundary_attrs_t *ps_attrs, S32 wd, S32 ht)
3874
63.5k
{
3875
63.5k
    S32 is_cropped_rt, is_cropped_bot;
3876
3877
63.5k
    is_cropped_rt = ((wd & 63) != 0) ? 1 : 0;
3878
63.5k
    is_cropped_bot = ((ht & 63) != 0) ? 1 : 0;
3879
3880
63.5k
    if(is_cropped_rt)
3881
4.16k
    {
3882
4.16k
        hme_set_ctb_boundary_attrs(&ps_attrs[CTB_RT_PIC_BOUNDARY], (wd & 63) >> 3, 8);
3883
4.16k
    }
3884
63.5k
    if(is_cropped_bot)
3885
2.81k
    {
3886
2.81k
        hme_set_ctb_boundary_attrs(&ps_attrs[CTB_BOT_PIC_BOUNDARY], 8, (ht & 63) >> 3);
3887
2.81k
    }
3888
63.5k
    if(is_cropped_rt & is_cropped_bot)
3889
435
    {
3890
435
        hme_set_ctb_boundary_attrs(
3891
435
            &ps_attrs[CTB_BOT_RT_PIC_BOUNDARY], (wd & 63) >> 3, (ht & 63) >> 3);
3892
435
    }
3893
63.5k
    hme_set_ctb_boundary_attrs(&ps_attrs[CTB_CENTRE], 8, 8);
3894
63.5k
}
3895
3896
/**
3897
********************************************************************************
3898
*  @fn     hme_scale_for_ref_idx(S32 curr_poc, S32 poc_from, S32 poc_to)
3899
*
3900
*  @brief  When we have an mv with ref id "poc_to" for which predictor to be
3901
*          computed, and predictor is ref id "poc_from", this funciton returns
3902
*          scale factor in Q8 for such a purpose
3903
*
3904
*  @param[in] curr_poc : input picture poc
3905
*
3906
*  @param[in] poc_from : POC of the pic, pointed to by ref id to be scaled
3907
*
3908
*  @param[in] poc_to : POC of hte pic, pointed to by ref id to be scaled to
3909
*
3910
*  @return Scale factor in Q8 format
3911
********************************************************************************
3912
*/
3913
S16 hme_scale_for_ref_idx(S32 curr_poc, S32 poc_from, S32 poc_to)
3914
761k
{
3915
761k
    S32 td, tx, tb;
3916
761k
    S16 i2_scf;
3917
    /*************************************************************************/
3918
    /* Approximate scale factor: 256 * num / denom                           */
3919
    /* num = curr_poc - poc_to, denom = curr_poc - poc_from                  */
3920
    /* Exact implementation as per standard.                                 */
3921
    /*************************************************************************/
3922
3923
761k
    tb = HME_CLIP((curr_poc - poc_to), -128, 127);
3924
761k
    td = HME_CLIP((curr_poc - poc_from), -128, 127);
3925
3926
761k
    tx = (16384 + (ABS(td) >> 1)) / td;
3927
    //i2_scf = HME_CLIP((((tb*tx)+32)>>6), -128, 127);
3928
761k
    i2_scf = HME_CLIP((((tb * tx) + 32) >> 6), -4096, 4095);
3929
3930
761k
    return (i2_scf);
3931
761k
}
3932
3933
/**
3934
********************************************************************************
3935
*  @fn     hme_process_frm_init
3936
*
3937
*  @brief  HME frame level initialsation processing function
3938
*
3939
*  @param[in] pv_me_ctxt : ME ctxt pointer
3940
*
3941
*  @param[in] ps_ref_map : Reference map prms pointer
3942
*
3943
*  @param[in] ps_frm_prms :Pointer to frame params
3944
*
3945
*  called only for encode layer
3946
*
3947
*  @return Scale factor in Q8 format
3948
********************************************************************************
3949
*/
3950
void hme_process_frm_init(
3951
    void *pv_me_ctxt,
3952
    hme_ref_map_t *ps_ref_map,
3953
    hme_frm_prms_t *ps_frm_prms,
3954
    WORD32 i4_me_frm_id,
3955
    WORD32 i4_num_me_frm_pllel)
3956
95.3k
{
3957
95.3k
    me_ctxt_t *ps_thrd_ctxt = (me_ctxt_t *)pv_me_ctxt;
3958
95.3k
    me_frm_ctxt_t *ps_ctxt = (me_frm_ctxt_t *)ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
3959
3960
95.3k
    S32 i, j, desc_idx;
3961
95.3k
    S16 i2_max_x = 0, i2_max_y = 0;
3962
3963
    /* Set the Qp of current frm passed by caller. Required for intra cost */
3964
95.3k
    ps_ctxt->frm_qstep = ps_frm_prms->qstep;
3965
95.3k
    ps_ctxt->qstep_ls8 = ps_frm_prms->qstep_ls8;
3966
3967
    /* Bidir enabled or not */
3968
95.3k
    ps_ctxt->s_frm_prms = *ps_frm_prms;
3969
3970
    /*************************************************************************/
3971
    /* Set up the ref pic parameters across all layers. For this, we do the  */
3972
    /* following: the application has given us a ref pic list, we go index   */
3973
    /* by index and pick up the picture. A picture can be uniquely be mapped */
3974
    /* to a POC. So we search all layer descriptor array to find the POC     */
3975
    /* Once found, we update all attributes in this descriptor.              */
3976
    /* During this updation process we also create an index of descriptor id */
3977
    /* to ref id mapping. It is important to find the same POC in the layers */
3978
    /* descr strcture since it holds the pyramid inputs for non encode layers*/
3979
    /* Apart from this, e also update array containing the index of the descr*/
3980
    /* During processing for ease of access, each layer has a pointer to aray*/
3981
    /* of pointers containing fxfy, fxhy, hxfy, hxhy and inputs for each ref */
3982
    /* we update this too.                                                   */
3983
    /*************************************************************************/
3984
95.3k
    ps_ctxt->num_ref_past = 0;
3985
95.3k
    ps_ctxt->num_ref_future = 0;
3986
242k
    for(i = 0; i < ps_ref_map->i4_num_ref; i++)
3987
147k
    {
3988
147k
        S32 ref_id_lc, idx;
3989
147k
        hme_ref_desc_t *ps_ref_desc;
3990
3991
147k
        ps_ref_desc = &ps_ref_map->as_ref_desc[i];
3992
147k
        ref_id_lc = ps_ref_desc->i1_ref_id_lc;
3993
        /* Obtain the id of descriptor that contains this POC */
3994
147k
        idx = hme_find_descr_idx(
3995
147k
            ps_thrd_ctxt, ps_ref_desc->i4_poc, ps_ref_desc->i4_GOP_num, i4_num_me_frm_pllel);
3996
3997
        /* Update all layers in this descr with the reference attributes */
3998
147k
        hme_update_layer_desc(
3999
147k
            &ps_thrd_ctxt->as_ref_descr[idx],
4000
147k
            ps_ref_desc,
4001
147k
            0,
4002
147k
            1,  //ps_ctxt->num_layers,
4003
147k
            ps_ctxt->ps_curr_descr);
4004
4005
        /* Update the pointer holder for the recon planes */
4006
147k
        ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_inp = &ps_ctxt->apu1_list_inp[0][0];
4007
147k
        ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_fxfy =
4008
147k
            &ps_ctxt->apu1_list_rec_fxfy[0][0];
4009
147k
        ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_hxfy =
4010
147k
            &ps_ctxt->apu1_list_rec_hxfy[0][0];
4011
147k
        ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_fxhy =
4012
147k
            &ps_ctxt->apu1_list_rec_fxhy[0][0];
4013
147k
        ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_hxhy =
4014
147k
            &ps_ctxt->apu1_list_rec_hxhy[0][0];
4015
147k
        ps_ctxt->ps_curr_descr->aps_layers[0]->ppv_dep_mngr_recon =
4016
147k
            &ps_ctxt->apv_list_dep_mngr[0][0];
4017
4018
        /* Update the array having ref id lc to descr id mapping */
4019
147k
        ps_ctxt->a_ref_to_descr_id[ps_ref_desc->i1_ref_id_lc] = idx;
4020
4021
        /* From ref id lc we need to work out the POC, So update this array */
4022
147k
        ps_ctxt->ai4_ref_idx_to_poc_lc[ref_id_lc] = ps_ref_desc->i4_poc;
4023
4024
        /* When computing costs in L0 and L1 directions, we need the */
4025
        /* respective ref id L0 and L1, so update this mapping */
4026
147k
        ps_ctxt->a_ref_idx_lc_to_l0[ref_id_lc] = ps_ref_desc->i1_ref_id_l0;
4027
147k
        ps_ctxt->a_ref_idx_lc_to_l1[ref_id_lc] = ps_ref_desc->i1_ref_id_l1;
4028
147k
        if((ps_ctxt->i4_curr_poc > ps_ref_desc->i4_poc) || ps_ctxt->i4_curr_poc == 0)
4029
135k
        {
4030
135k
            ps_ctxt->au1_is_past[ref_id_lc] = 1;
4031
135k
            ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = ref_id_lc;
4032
135k
            ps_ctxt->num_ref_past++;
4033
135k
        }
4034
12.1k
        else
4035
12.1k
        {
4036
12.1k
            ps_ctxt->au1_is_past[ref_id_lc] = 0;
4037
12.1k
            ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = ref_id_lc;
4038
12.1k
            ps_ctxt->num_ref_future++;
4039
12.1k
        }
4040
4041
147k
        if(1 == ps_ctxt->i4_wt_pred_enable_flag)
4042
0
        {
4043
            /* copy the weight and offsets from current ref desc */
4044
0
            ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = ps_ref_desc->i2_weight;
4045
4046
            /* inv weight is stored in Q15 format */
4047
0
            ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] =
4048
0
                ((1 << 15) + (ps_ref_desc->i2_weight >> 1)) / ps_ref_desc->i2_weight;
4049
0
            ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = ps_ref_desc->i2_offset;
4050
0
        }
4051
147k
        else
4052
147k
        {
4053
            /* store default wt and offset*/
4054
147k
            ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = WGHT_DEFAULT;
4055
4056
            /* inv weight is stored in Q15 format */
4057
147k
            ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] =
4058
147k
                ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT;
4059
4060
147k
            ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = 0;
4061
147k
        }
4062
147k
    }
4063
4064
95.3k
    ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = -1;
4065
95.3k
    ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = -1;
4066
4067
    /*************************************************************************/
4068
    /* Preparation of the TLU for bits for reference indices.                */
4069
    /* Special case is that of numref = 2. (TEV)                             */
4070
    /* Other cases uses UEV                                                  */
4071
    /*************************************************************************/
4072
1.24M
    for(i = 0; i < MAX_NUM_REF; i++)
4073
1.14M
    {
4074
1.14M
        ps_ctxt->au1_ref_bits_tlu_lc[0][i] = 0;
4075
1.14M
        ps_ctxt->au1_ref_bits_tlu_lc[1][i] = 0;
4076
1.14M
    }
4077
4078
95.3k
    if(ps_ref_map->i4_num_ref == 2)
4079
31.0k
    {
4080
31.0k
        ps_ctxt->au1_ref_bits_tlu_lc[0][0] = 1;
4081
31.0k
        ps_ctxt->au1_ref_bits_tlu_lc[1][0] = 1;
4082
31.0k
        ps_ctxt->au1_ref_bits_tlu_lc[0][1] = 1;
4083
31.0k
        ps_ctxt->au1_ref_bits_tlu_lc[1][1] = 1;
4084
31.0k
    }
4085
64.3k
    else if(ps_ref_map->i4_num_ref > 2)
4086
16.4k
    {
4087
77.9k
        for(i = 0; i < ps_ref_map->i4_num_ref; i++)
4088
61.4k
        {
4089
61.4k
            S32 l0, l1;
4090
61.4k
            l0 = ps_ctxt->a_ref_idx_lc_to_l0[i];
4091
61.4k
            l1 = ps_ctxt->a_ref_idx_lc_to_l1[i];
4092
61.4k
            ps_ctxt->au1_ref_bits_tlu_lc[0][i] = gau1_ref_bits[l0];
4093
61.4k
            ps_ctxt->au1_ref_bits_tlu_lc[1][i] = gau1_ref_bits[l1];
4094
61.4k
        }
4095
16.4k
    }
4096
4097
    /*************************************************************************/
4098
    /* Preparation of the scaling factors for reference indices. The scale   */
4099
    /* factor depends on distance of the two ref indices from current input  */
4100
    /* in terms of poc delta.                                                */
4101
    /*************************************************************************/
4102
242k
    for(i = 0; i < ps_ref_map->i4_num_ref; i++)
4103
147k
    {
4104
528k
        for(j = 0; j < ps_ref_map->i4_num_ref; j++)
4105
380k
        {
4106
380k
            S16 i2_scf_q8;
4107
380k
            S32 poc_from, poc_to;
4108
4109
380k
            poc_from = ps_ctxt->ai4_ref_idx_to_poc_lc[j];
4110
380k
            poc_to = ps_ctxt->ai4_ref_idx_to_poc_lc[i];
4111
4112
380k
            i2_scf_q8 = hme_scale_for_ref_idx(ps_ctxt->i4_curr_poc, poc_from, poc_to);
4113
380k
            ps_ctxt->ai2_ref_scf[j + i * MAX_NUM_REF] = i2_scf_q8;
4114
380k
        }
4115
147k
    }
4116
4117
    /*************************************************************************/
4118
    /* We store simplified look ups for 4 hpel planes and inp y plane for    */
4119
    /* every layer and for every ref id in the layer. So update these lookups*/
4120
    /*************************************************************************/
4121
190k
    for(i = 0; i < 1; i++)
4122
95.3k
    {
4123
95.3k
        U08 **ppu1_rec_fxfy, **ppu1_rec_hxfy, **ppu1_rec_fxhy, **ppu1_rec_hxhy;
4124
95.3k
        U08 **ppu1_inp;
4125
95.3k
        void **ppvlist_dep_mngr;
4126
95.3k
        layer_ctxt_t *ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i];
4127
4128
95.3k
        ppvlist_dep_mngr = &ps_ctxt->apv_list_dep_mngr[i][0];
4129
95.3k
        ppu1_rec_fxfy = &ps_ctxt->apu1_list_rec_fxfy[i][0];
4130
95.3k
        ppu1_rec_hxfy = &ps_ctxt->apu1_list_rec_hxfy[i][0];
4131
95.3k
        ppu1_rec_fxhy = &ps_ctxt->apu1_list_rec_fxhy[i][0];
4132
95.3k
        ppu1_rec_hxhy = &ps_ctxt->apu1_list_rec_hxhy[i][0];
4133
95.3k
        ppu1_inp = &ps_ctxt->apu1_list_inp[i][0];
4134
242k
        for(j = 0; j < ps_ref_map->i4_num_ref; j++)
4135
147k
        {
4136
147k
            hme_ref_desc_t *ps_ref_desc;
4137
147k
            hme_ref_buf_info_t *ps_buf_info;
4138
147k
            layer_ctxt_t *ps_layer;
4139
147k
            S32 ref_id_lc;
4140
4141
147k
            ps_ref_desc = &ps_ref_map->as_ref_desc[j];
4142
147k
            ps_buf_info = &ps_ref_desc->as_ref_info[i];
4143
147k
            ref_id_lc = ps_ref_desc->i1_ref_id_lc;
4144
4145
147k
            desc_idx = ps_ctxt->a_ref_to_descr_id[ref_id_lc];
4146
147k
            ps_layer = ps_thrd_ctxt->as_ref_descr[desc_idx].aps_layers[i];
4147
4148
147k
            ppu1_inp[j] = ps_buf_info->pu1_ref_src;
4149
147k
            ppu1_rec_fxfy[j] = ps_buf_info->pu1_rec_fxfy;
4150
147k
            ppu1_rec_hxfy[j] = ps_buf_info->pu1_rec_hxfy;
4151
147k
            ppu1_rec_fxhy[j] = ps_buf_info->pu1_rec_fxhy;
4152
147k
            ppu1_rec_hxhy[j] = ps_buf_info->pu1_rec_hxhy;
4153
147k
            ppvlist_dep_mngr[j] = ps_buf_info->pv_dep_mngr;
4154
4155
            /* Update the curr descriptors reference pointers here */
4156
147k
            ps_layer_ctxt->ppu1_list_inp[j] = ps_buf_info->pu1_ref_src;
4157
147k
            ps_layer_ctxt->ppu1_list_rec_fxfy[j] = ps_buf_info->pu1_rec_fxfy;
4158
147k
            ps_layer_ctxt->ppu1_list_rec_hxfy[j] = ps_buf_info->pu1_rec_hxfy;
4159
147k
            ps_layer_ctxt->ppu1_list_rec_fxhy[j] = ps_buf_info->pu1_rec_fxhy;
4160
147k
            ps_layer_ctxt->ppu1_list_rec_hxhy[j] = ps_buf_info->pu1_rec_hxhy;
4161
147k
        }
4162
95.3k
    }
4163
    /*************************************************************************/
4164
    /* The mv range for each layer is computed. For dyadic layers it will    */
4165
    /* keep shrinking by 2, for non dyadic it will shrink by ratio of wd and */
4166
    /* ht. In general formula used is scale by ratio of wd for x and ht for y*/
4167
    /*************************************************************************/
4168
190k
    for(i = 0; i < 1; i++)
4169
95.3k
    {
4170
95.3k
        layer_ctxt_t *ps_layer_ctxt;
4171
95.3k
        if(i == 0)
4172
95.3k
        {
4173
95.3k
            i2_max_x = ps_frm_prms->i2_mv_range_x;
4174
95.3k
            i2_max_y = ps_frm_prms->i2_mv_range_y;
4175
95.3k
        }
4176
0
        else
4177
0
        {
4178
0
            i2_max_x = (S16)FLOOR8(((i2_max_x * ps_ctxt->i4_wd) / ps_ctxt->i4_wd));
4179
0
            i2_max_y = (S16)FLOOR8(((i2_max_y * ps_ctxt->i4_ht) / ps_ctxt->i4_ht));
4180
0
        }
4181
95.3k
        ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i];
4182
95.3k
        ps_layer_ctxt->i2_max_mv_x = i2_max_x;
4183
95.3k
        ps_layer_ctxt->i2_max_mv_y = i2_max_y;
4184
4185
        /*********************************************************************/
4186
        /* Every layer maintains a reference id lc to POC mapping. This is   */
4187
        /* because the mapping is unique for every frm. Also, in next frm,   */
4188
        /* we require colocated mvs which means scaling according to temporal*/
4189
        /*distance. Hence this mapping needs to be maintained in every       */
4190
        /* layer ctxt                                                        */
4191
        /*********************************************************************/
4192
95.3k
        memset(ps_layer_ctxt->ai4_ref_id_to_poc_lc, -1, sizeof(S32) * ps_ctxt->max_num_ref);
4193
95.3k
        if(ps_ref_map->i4_num_ref)
4194
71.3k
        {
4195
71.3k
            memcpy(
4196
71.3k
                ps_layer_ctxt->ai4_ref_id_to_poc_lc,
4197
71.3k
                ps_ctxt->ai4_ref_idx_to_poc_lc,
4198
71.3k
                ps_ref_map->i4_num_ref * sizeof(S32));
4199
71.3k
        }
4200
95.3k
    }
4201
4202
95.3k
    return;
4203
95.3k
}
4204
4205
/**
4206
********************************************************************************
4207
*  @fn     hme_coarse_process_frm_init
4208
*
4209
*  @brief  HME frame level initialsation processing function
4210
*
4211
*  @param[in] pv_me_ctxt : ME ctxt pointer
4212
*
4213
*  @param[in] ps_ref_map : Reference map prms pointer
4214
*
4215
*  @param[in] ps_frm_prms :Pointer to frame params
4216
*
4217
*  @return Scale factor in Q8 format
4218
********************************************************************************
4219
*/
4220
void hme_coarse_process_frm_init(
4221
    void *pv_me_ctxt, hme_ref_map_t *ps_ref_map, hme_frm_prms_t *ps_frm_prms)
4222
95.3k
{
4223
95.3k
    coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
4224
95.3k
    S32 i, j, desc_idx;
4225
95.3k
    S16 i2_max_x = 0, i2_max_y = 0;
4226
4227
    /* Set the Qp of current frm passed by caller. Required for intra cost */
4228
95.3k
    ps_ctxt->frm_qstep = ps_frm_prms->qstep;
4229
4230
    /* Bidir enabled or not */
4231
95.3k
    ps_ctxt->s_frm_prms = *ps_frm_prms;
4232
4233
    /*************************************************************************/
4234
    /* Set up the ref pic parameters across all layers. For this, we do the  */
4235
    /* following: the application has given us a ref pic list, we go index   */
4236
    /* by index and pick up the picture. A picture can be uniquely be mapped */
4237
    /* to a POC. So we search all layer descriptor array to find the POC     */
4238
    /* Once found, we update all attributes in this descriptor.              */
4239
    /* During this updation process we also create an index of descriptor id */
4240
    /* to ref id mapping. It is important to find the same POC in the layers */
4241
    /* descr strcture since it holds the pyramid inputs for non encode layers*/
4242
    /* Apart from this, e also update array containing the index of the descr*/
4243
    /* During processing for ease of access, each layer has a pointer to aray*/
4244
    /* of pointers containing fxfy, fxhy, hxfy, hxhy and inputs for each ref */
4245
    /* we update this too.                                                   */
4246
    /*************************************************************************/
4247
95.3k
    ps_ctxt->num_ref_past = 0;
4248
95.3k
    ps_ctxt->num_ref_future = 0;
4249
242k
    for(i = 0; i < ps_ref_map->i4_num_ref; i++)
4250
147k
    {
4251
147k
        S32 ref_id_lc, idx;
4252
147k
        hme_ref_desc_t *ps_ref_desc;
4253
4254
147k
        ps_ref_desc = &ps_ref_map->as_ref_desc[i];
4255
147k
        ref_id_lc = ps_ref_desc->i1_ref_id_lc;
4256
        /* Obtain the id of descriptor that contains this POC */
4257
147k
        idx = hme_coarse_find_descr_idx(ps_ctxt, ps_ref_desc->i4_poc);
4258
4259
        /* Update all layers in this descr with the reference attributes */
4260
147k
        hme_update_layer_desc(
4261
147k
            &ps_ctxt->as_ref_descr[idx],
4262
147k
            ps_ref_desc,
4263
147k
            1,
4264
147k
            ps_ctxt->num_layers - 1,
4265
147k
            ps_ctxt->ps_curr_descr);
4266
4267
        /* Update the array having ref id lc to descr id mapping */
4268
147k
        ps_ctxt->a_ref_to_descr_id[ps_ref_desc->i1_ref_id_lc] = idx;
4269
4270
        /* From ref id lc we need to work out the POC, So update this array */
4271
147k
        ps_ctxt->ai4_ref_idx_to_poc_lc[ref_id_lc] = ps_ref_desc->i4_poc;
4272
4273
        /* From ref id lc we need to work out the display num, So update this array */
4274
147k
        ps_ctxt->ai4_ref_idx_to_disp_num[ref_id_lc] = ps_ref_desc->i4_display_num;
4275
4276
        /* When computing costs in L0 and L1 directions, we need the */
4277
        /* respective ref id L0 and L1, so update this mapping */
4278
147k
        ps_ctxt->a_ref_idx_lc_to_l0[ref_id_lc] = ps_ref_desc->i1_ref_id_l0;
4279
147k
        ps_ctxt->a_ref_idx_lc_to_l1[ref_id_lc] = ps_ref_desc->i1_ref_id_l1;
4280
147k
        if((ps_ctxt->i4_curr_poc > ps_ref_desc->i4_poc) || ps_ctxt->i4_curr_poc == 0)
4281
135k
        {
4282
135k
            ps_ctxt->au1_is_past[ref_id_lc] = 1;
4283
135k
            ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = ref_id_lc;
4284
135k
            ps_ctxt->num_ref_past++;
4285
135k
        }
4286
12.1k
        else
4287
12.1k
        {
4288
12.1k
            ps_ctxt->au1_is_past[ref_id_lc] = 0;
4289
12.1k
            ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = ref_id_lc;
4290
12.1k
            ps_ctxt->num_ref_future++;
4291
12.1k
        }
4292
147k
        if(1 == ps_ctxt->i4_wt_pred_enable_flag)
4293
0
        {
4294
            /* copy the weight and offsets from current ref desc */
4295
0
            ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = ps_ref_desc->i2_weight;
4296
4297
            /* inv weight is stored in Q15 format */
4298
0
            ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] =
4299
0
                ((1 << 15) + (ps_ref_desc->i2_weight >> 1)) / ps_ref_desc->i2_weight;
4300
4301
0
            ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = ps_ref_desc->i2_offset;
4302
0
        }
4303
147k
        else
4304
147k
        {
4305
            /* store default wt and offset*/
4306
147k
            ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = WGHT_DEFAULT;
4307
4308
            /* inv weight is stored in Q15 format */
4309
147k
            ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] =
4310
147k
                ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT;
4311
4312
147k
            ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = 0;
4313
147k
        }
4314
147k
    }
4315
4316
95.3k
    ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = -1;
4317
95.3k
    ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = -1;
4318
4319
    /*************************************************************************/
4320
    /* Preparation of the TLU for bits for reference indices.                */
4321
    /* Special case is that of numref = 2. (TEV)                             */
4322
    /* Other cases uses UEV                                                  */
4323
    /*************************************************************************/
4324
1.24M
    for(i = 0; i < MAX_NUM_REF; i++)
4325
1.14M
    {
4326
1.14M
        ps_ctxt->au1_ref_bits_tlu_lc[0][i] = 0;
4327
1.14M
        ps_ctxt->au1_ref_bits_tlu_lc[1][i] = 0;
4328
1.14M
    }
4329
4330
95.3k
    if(ps_ref_map->i4_num_ref == 2)
4331
31.0k
    {
4332
31.0k
        ps_ctxt->au1_ref_bits_tlu_lc[0][0] = 1;
4333
31.0k
        ps_ctxt->au1_ref_bits_tlu_lc[1][0] = 1;
4334
31.0k
        ps_ctxt->au1_ref_bits_tlu_lc[0][1] = 1;
4335
31.0k
        ps_ctxt->au1_ref_bits_tlu_lc[1][1] = 1;
4336
31.0k
    }
4337
64.3k
    else if(ps_ref_map->i4_num_ref > 2)
4338
16.4k
    {
4339
77.9k
        for(i = 0; i < ps_ref_map->i4_num_ref; i++)
4340
61.4k
        {
4341
61.4k
            S32 l0, l1;
4342
61.4k
            l0 = ps_ctxt->a_ref_idx_lc_to_l0[i];
4343
61.4k
            l1 = ps_ctxt->a_ref_idx_lc_to_l1[i];
4344
61.4k
            ps_ctxt->au1_ref_bits_tlu_lc[0][i] = gau1_ref_bits[l0];
4345
61.4k
            ps_ctxt->au1_ref_bits_tlu_lc[1][i] = gau1_ref_bits[l1];
4346
61.4k
        }
4347
16.4k
    }
4348
4349
    /*************************************************************************/
4350
    /* Preparation of the scaling factors for reference indices. The scale   */
4351
    /* factor depends on distance of the two ref indices from current input  */
4352
    /* in terms of poc delta.                                                */
4353
    /*************************************************************************/
4354
242k
    for(i = 0; i < ps_ref_map->i4_num_ref; i++)
4355
147k
    {
4356
528k
        for(j = 0; j < ps_ref_map->i4_num_ref; j++)
4357
380k
        {
4358
380k
            S16 i2_scf_q8;
4359
380k
            S32 poc_from, poc_to;
4360
4361
380k
            poc_from = ps_ctxt->ai4_ref_idx_to_poc_lc[j];
4362
380k
            poc_to = ps_ctxt->ai4_ref_idx_to_poc_lc[i];
4363
4364
380k
            i2_scf_q8 = hme_scale_for_ref_idx(ps_ctxt->i4_curr_poc, poc_from, poc_to);
4365
380k
            ps_ctxt->ai2_ref_scf[j + i * MAX_NUM_REF] = i2_scf_q8;
4366
380k
        }
4367
147k
    }
4368
4369
    /*************************************************************************/
4370
    /* We store simplified look ups for inp y plane for                      */
4371
    /* every layer and for every ref id in the layer.                        */
4372
    /*************************************************************************/
4373
287k
    for(i = 1; i < ps_ctxt->num_layers; i++)
4374
192k
    {
4375
192k
        U08 **ppu1_inp;
4376
4377
192k
        ppu1_inp = &ps_ctxt->apu1_list_inp[i][0];
4378
488k
        for(j = 0; j < ps_ref_map->i4_num_ref; j++)
4379
295k
        {
4380
295k
            hme_ref_desc_t *ps_ref_desc;
4381
295k
            hme_ref_buf_info_t *ps_buf_info;
4382
295k
            layer_ctxt_t *ps_layer;
4383
295k
            S32 ref_id_lc;
4384
4385
295k
            ps_ref_desc = &ps_ref_map->as_ref_desc[j];
4386
295k
            ps_buf_info = &ps_ref_desc->as_ref_info[i];
4387
295k
            ref_id_lc = ps_ref_desc->i1_ref_id_lc;
4388
4389
295k
            desc_idx = ps_ctxt->a_ref_to_descr_id[ref_id_lc];
4390
295k
            ps_layer = ps_ctxt->as_ref_descr[desc_idx].aps_layers[i];
4391
4392
295k
            ppu1_inp[j] = ps_layer->pu1_inp;
4393
295k
        }
4394
192k
    }
4395
    /*************************************************************************/
4396
    /* The mv range for each layer is computed. For dyadic layers it will    */
4397
    /* keep shrinking by 2, for non dyadic it will shrink by ratio of wd and */
4398
    /* ht. In general formula used is scale by ratio of wd for x and ht for y*/
4399
    /*************************************************************************/
4400
4401
    /* set to layer 0 search range params */
4402
95.3k
    i2_max_x = ps_frm_prms->i2_mv_range_x;
4403
95.3k
    i2_max_y = ps_frm_prms->i2_mv_range_y;
4404
4405
287k
    for(i = 1; i < ps_ctxt->num_layers; i++)
4406
192k
    {
4407
192k
        layer_ctxt_t *ps_layer_ctxt;
4408
4409
192k
        {
4410
192k
            i2_max_x = (S16)FLOOR8(((i2_max_x * ps_ctxt->a_wd[i]) / ps_ctxt->a_wd[i - 1]));
4411
192k
            i2_max_y = (S16)FLOOR8(((i2_max_y * ps_ctxt->a_ht[i]) / ps_ctxt->a_ht[i - 1]));
4412
192k
        }
4413
192k
        ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i];
4414
192k
        ps_layer_ctxt->i2_max_mv_x = i2_max_x;
4415
192k
        ps_layer_ctxt->i2_max_mv_y = i2_max_y;
4416
4417
        /*********************************************************************/
4418
        /* Every layer maintains a reference id lc to POC mapping. This is   */
4419
        /* because the mapping is unique for every frm. Also, in next frm,   */
4420
        /* we require colocated mvs which means scaling according to temporal*/
4421
        /*distance. Hence this mapping needs to be maintained in every       */
4422
        /* layer ctxt                                                        */
4423
        /*********************************************************************/
4424
192k
        memset(ps_layer_ctxt->ai4_ref_id_to_poc_lc, -1, sizeof(S32) * ps_ctxt->max_num_ref);
4425
192k
        if(ps_ref_map->i4_num_ref)
4426
143k
        {
4427
143k
            memcpy(
4428
143k
                ps_layer_ctxt->ai4_ref_id_to_poc_lc,
4429
143k
                ps_ctxt->ai4_ref_idx_to_poc_lc,
4430
143k
                ps_ref_map->i4_num_ref * sizeof(S32));
4431
143k
            memcpy(
4432
143k
                ps_layer_ctxt->ai4_ref_id_to_disp_num,
4433
143k
                ps_ctxt->ai4_ref_idx_to_disp_num,
4434
143k
                ps_ref_map->i4_num_ref * sizeof(S32));
4435
143k
        }
4436
192k
    }
4437
4438
95.3k
    return;
4439
95.3k
}
4440
4441
/**
4442
********************************************************************************
4443
*  @fn     hme_process_frm
4444
*
4445
*  @brief  HME frame level processing function
4446
*
4447
*  @param[in] pv_me_ctxt : ME ctxt pointer
4448
*
4449
*  @param[in] ps_ref_map : Reference map prms pointer
4450
*
4451
*  @param[in] ppd_intra_costs : pointer to array of intra cost cost buffers for each layer
4452
*
4453
*  @param[in] ps_frm_prms : pointer to Frame level parameters of HME
4454
*
4455
*  @param[in] pf_ext_update_fxn : function pointer to update CTb results
4456
*
4457
*  @param[in] pf_get_intra_cu_and_cost :function pointer to get intra cu size and cost
4458
*
4459
*  @param[in] ps_multi_thrd_ctxt :function pointer to get intra cu size and cost
4460
*
4461
*  @return Scale factor in Q8 format
4462
********************************************************************************
4463
*/
4464
4465
void hme_process_frm(
4466
    void *pv_me_ctxt,
4467
    pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input,
4468
    hme_ref_map_t *ps_ref_map,
4469
    double **ppd_intra_costs,
4470
    hme_frm_prms_t *ps_frm_prms,
4471
    PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,
4472
    void *pv_coarse_layer,
4473
    void *pv_multi_thrd_ctxt,
4474
    S32 i4_frame_parallelism_level,
4475
    S32 thrd_id,
4476
    S32 i4_me_frm_id)
4477
63.5k
{
4478
63.5k
    refine_prms_t s_refine_prms;
4479
63.5k
    me_ctxt_t *ps_thrd_ctxt = (me_ctxt_t *)pv_me_ctxt;
4480
63.5k
    me_frm_ctxt_t *ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
4481
4482
63.5k
    S32 lyr_job_type;
4483
63.5k
    multi_thrd_ctxt_t *ps_multi_thrd_ctxt;
4484
63.5k
    layer_ctxt_t *ps_coarse_layer = (layer_ctxt_t *)pv_coarse_layer;
4485
4486
63.5k
    ps_multi_thrd_ctxt = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
4487
4488
63.5k
    lyr_job_type = ME_JOB_ENC_LYR;
4489
    /*************************************************************************/
4490
    /* Final L0 layer ME call                                                */
4491
    /*************************************************************************/
4492
63.5k
    {
4493
        /* Set the CTB attributes dependin on corner/rt edge/bot edge/center*/
4494
63.5k
        hme_set_ctb_attrs(ps_ctxt->as_ctb_bound_attrs, ps_ctxt->i4_wd, ps_ctxt->i4_ht);
4495
4496
63.5k
        hme_set_refine_prms(
4497
63.5k
            &s_refine_prms,
4498
63.5k
            ps_ctxt->u1_encode[0],
4499
63.5k
            ps_ref_map->i4_num_ref,
4500
63.5k
            0,
4501
63.5k
            ps_ctxt->num_layers,
4502
63.5k
            ps_ctxt->num_layers_explicit_search,
4503
63.5k
            ps_thrd_ctxt->s_init_prms.use_4x4,
4504
63.5k
            ps_frm_prms,
4505
63.5k
            ppd_intra_costs,
4506
63.5k
            &ps_thrd_ctxt->s_init_prms.s_me_coding_tools);
4507
4508
63.5k
        hme_refine(
4509
63.5k
            ps_thrd_ctxt,
4510
63.5k
            &s_refine_prms,
4511
63.5k
            pf_ext_update_fxn,
4512
63.5k
            ps_coarse_layer,
4513
63.5k
            ps_multi_thrd_ctxt,
4514
63.5k
            lyr_job_type,
4515
63.5k
            thrd_id,
4516
63.5k
            i4_me_frm_id,
4517
63.5k
            ps_l0_ipe_input);
4518
4519
        /* Set current ref pic status which will used as perv frame ref pic */
4520
63.5k
        if(i4_frame_parallelism_level)
4521
0
        {
4522
0
            ps_ctxt->i4_is_prev_frame_reference = 0;
4523
0
        }
4524
63.5k
        else
4525
63.5k
        {
4526
63.5k
            ps_ctxt->i4_is_prev_frame_reference =
4527
63.5k
                ps_multi_thrd_ctxt->aps_cur_inp_me_prms[i4_me_frm_id]
4528
63.5k
                    ->ps_curr_inp->s_lap_out.i4_is_ref_pic;
4529
63.5k
        }
4530
63.5k
    }
4531
4532
63.5k
    return;
4533
63.5k
}
4534
4535
/**
4536
********************************************************************************
4537
*  @fn     hme_coarse_process_frm
4538
*
4539
*  @brief  HME frame level processing function (coarse + refine)
4540
*
4541
*  @param[in] pv_me_ctxt : ME ctxt pointer
4542
*
4543
*  @param[in] ps_ref_map : Reference map prms pointer
4544
*
4545
*  @param[in] ps_frm_prms : pointer to Frame level parameters of HME
4546
*
4547
*  @param[in] ps_multi_thrd_ctxt :Multi thread related ctxt
4548
*
4549
*  @return Scale factor in Q8 format
4550
********************************************************************************
4551
*/
4552
4553
void hme_coarse_process_frm(
4554
    void *pv_me_ctxt,
4555
    hme_ref_map_t *ps_ref_map,
4556
    hme_frm_prms_t *ps_frm_prms,
4557
    void *pv_multi_thrd_ctxt,
4558
    WORD32 i4_ping_pong,
4559
    void **ppv_dep_mngr_hme_sync)
4560
95.3k
{
4561
95.3k
    S16 i2_max;
4562
95.3k
    S32 layer_id;
4563
95.3k
    coarse_prms_t s_coarse_prms;
4564
95.3k
    refine_prms_t s_refine_prms;
4565
95.3k
    coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
4566
95.3k
    S32 lyr_job_type;
4567
95.3k
    multi_thrd_ctxt_t *ps_multi_thrd_ctxt;
4568
4569
95.3k
    ps_multi_thrd_ctxt = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
4570
    /*************************************************************************/
4571
    /* Fire processing of all layers, starting with coarsest layer.          */
4572
    /*************************************************************************/
4573
95.3k
    layer_id = ps_ctxt->num_layers - 1;
4574
95.3k
    i2_max = ps_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_x;
4575
95.3k
    i2_max = MAX(i2_max, ps_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_y);
4576
95.3k
    s_coarse_prms.i4_layer_id = layer_id;
4577
95.3k
    {
4578
95.3k
        S32 log_start_step;
4579
        /* Based on Preset, set the starting step size for Refinement */
4580
95.3k
        if(ME_MEDIUM_SPEED > ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets)
4581
32.6k
        {
4582
32.6k
            log_start_step = 0;
4583
32.6k
        }
4584
62.6k
        else
4585
62.6k
        {
4586
62.6k
            log_start_step = 1;
4587
62.6k
        }
4588
4589
95.3k
        s_coarse_prms.i4_max_iters = i2_max >> log_start_step;
4590
95.3k
        s_coarse_prms.i4_start_step = 1 << log_start_step;
4591
95.3k
    }
4592
95.3k
    s_coarse_prms.i4_num_ref = ps_ref_map->i4_num_ref;
4593
95.3k
    s_coarse_prms.do_full_search = 1;
4594
95.3k
    if(s_coarse_prms.do_full_search)
4595
95.3k
    {
4596
        /* Set to 2 or 4 */
4597
95.3k
        if(ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets < ME_MEDIUM_SPEED)
4598
32.6k
            s_coarse_prms.full_search_step = HME_COARSE_STEP_SIZE_HIGH_QUALITY;
4599
62.6k
        else if(ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets >= ME_MEDIUM_SPEED)
4600
62.6k
            s_coarse_prms.full_search_step = HME_COARSE_STEP_SIZE_HIGH_SPEED;
4601
95.3k
    }
4602
95.3k
    s_coarse_prms.num_results = ps_ctxt->max_num_results_coarse;
4603
4604
    /* Coarse layer uses only 1 lambda, i.e. the one for open loop ME */
4605
95.3k
    s_coarse_prms.lambda = ps_frm_prms->i4_ol_sad_lambda_qf;
4606
95.3k
    s_coarse_prms.lambda_q_shift = ps_frm_prms->lambda_q_shift;
4607
95.3k
    s_coarse_prms.lambda = ((float)s_coarse_prms.lambda * (100.0 - ME_LAMBDA_DISCOUNT) / 100.0);
4608
4609
95.3k
    hme_coarsest(ps_ctxt, &s_coarse_prms, ps_multi_thrd_ctxt, i4_ping_pong, ppv_dep_mngr_hme_sync);
4610
4611
    /* all refinement layer processed in the loop below */
4612
95.3k
    layer_id--;
4613
95.3k
    lyr_job_type = ps_multi_thrd_ctxt->i4_me_coarsest_lyr_type + 1;
4614
4615
    /*************************************************************************/
4616
    /* This loop will run for all refine layers (non- encode layers)          */
4617
    /*************************************************************************/
4618
192k
    while(layer_id > 0)
4619
96.8k
    {
4620
96.8k
        hme_set_refine_prms(
4621
96.8k
            &s_refine_prms,
4622
96.8k
            ps_ctxt->u1_encode[layer_id],
4623
96.8k
            ps_ref_map->i4_num_ref,
4624
96.8k
            layer_id,
4625
96.8k
            ps_ctxt->num_layers,
4626
96.8k
            ps_ctxt->num_layers_explicit_search,
4627
96.8k
            ps_ctxt->s_init_prms.use_4x4,
4628
96.8k
            ps_frm_prms,
4629
96.8k
            NULL,
4630
96.8k
            &ps_ctxt->s_init_prms.s_me_coding_tools);
4631
4632
96.8k
        hme_refine_no_encode(
4633
96.8k
            ps_ctxt,
4634
96.8k
            &s_refine_prms,
4635
96.8k
            ps_multi_thrd_ctxt,
4636
96.8k
            lyr_job_type,
4637
96.8k
            i4_ping_pong,
4638
96.8k
            ppv_dep_mngr_hme_sync);
4639
4640
96.8k
        layer_id--;
4641
96.8k
        lyr_job_type++;
4642
96.8k
    }
4643
95.3k
}
4644
/**
4645
********************************************************************************
4646
*  @fn     hme_fill_neighbour_mvs
4647
*
4648
*  @brief  HME neighbour MV population function
4649
*
4650
*  @param[in] pps_mv_grid : MV grid array pointer
4651
*
4652
*  @param[in] i4_ctb_x : CTB pos X
4653
4654
*  @param[in] i4_ctb_y : CTB pos Y
4655
*
4656
*  @remarks :  Needs to be populated for proper implementation of cost fxn
4657
*
4658
*  @return Scale factor in Q8 format
4659
********************************************************************************
4660
*/
4661
void hme_fill_neighbour_mvs(
4662
    mv_grid_t **pps_mv_grid, S32 i4_ctb_x, S32 i4_ctb_y, S32 i4_num_ref, void *pv_ctxt)
4663
0
{
4664
    /* TODO : Needs to be populated for proper implementation of cost fxn */
4665
0
    ARG_NOT_USED(pps_mv_grid);
4666
0
    ARG_NOT_USED(i4_ctb_x);
4667
0
    ARG_NOT_USED(i4_ctb_y);
4668
0
    ARG_NOT_USED(i4_num_ref);
4669
0
    ARG_NOT_USED(pv_ctxt);
4670
0
}
4671
4672
/**
4673
*******************************************************************************
4674
*  @fn     void hme_get_active_pocs_list(void *pv_me_ctxt,
4675
*                                       S32 *p_pocs_buffered_in_me)
4676
*
4677
*  @brief  Returns the list of active POCs in ME ctxt
4678
*
4679
*  @param[in] pv_me_ctxt : handle to ME context
4680
*
4681
*  @param[out] p_pocs_buffered_in_me : pointer to an array which this fxn
4682
*                                      populates with pocs active
4683
*
4684
*  @return   void
4685
*******************************************************************************
4686
*/
4687
WORD32 hme_get_active_pocs_list(void *pv_me_ctxt, S32 i4_num_me_frm_pllel)
4688
95.3k
{
4689
95.3k
    me_ctxt_t *ps_ctxt = (me_ctxt_t *)pv_me_ctxt;
4690
95.3k
    S32 i, count = 0;
4691
4692
572k
    for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
4693
476k
    {
4694
476k
        S32 poc = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc;
4695
476k
        S32 i4_is_free = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_free;
4696
4697
476k
        if((i4_is_free == 0) && (poc != INVALID_POC))
4698
470k
        {
4699
470k
            count++;
4700
470k
        }
4701
476k
    }
4702
95.3k
    if(count == (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1)
4703
88.7k
    {
4704
88.7k
        return 1;
4705
88.7k
    }
4706
6.65k
    else
4707
6.65k
    {
4708
6.65k
        return 0;
4709
6.65k
    }
4710
95.3k
}
4711
4712
/**
4713
*******************************************************************************
4714
*  @fn     void hme_coarse_get_active_pocs_list(void *pv_me_ctxt,
4715
*                                       S32 *p_pocs_buffered_in_me)
4716
*
4717
*  @brief  Returns the list of active POCs in ME ctxt
4718
*
4719
*  @param[in] pv_me_ctxt : handle to ME context
4720
*
4721
*  @param[out] p_pocs_buffered_in_me : pointer to an array which this fxn
4722
*                                      populates with pocs active
4723
*
4724
*  @return   void
4725
*******************************************************************************
4726
*/
4727
void hme_coarse_get_active_pocs_list(void *pv_me_ctxt, S32 *p_pocs_buffered_in_me)
4728
95.3k
{
4729
95.3k
    coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
4730
95.3k
    S32 i, count = 0;
4731
4732
667k
    for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
4733
572k
    {
4734
572k
        S32 poc = ps_ctxt->as_ref_descr[i].aps_layers[1]->i4_poc;
4735
4736
572k
        if(poc != -1)
4737
295k
        {
4738
295k
            p_pocs_buffered_in_me[count] = poc;
4739
295k
            count++;
4740
295k
        }
4741
572k
    }
4742
95.3k
    p_pocs_buffered_in_me[count] = -1;
4743
95.3k
}
4744
4745
S32 hme_get_blk_size(S32 use_4x4, S32 layer_id, S32 n_layers, S32 encode)
4746
329k
{
4747
    /* coarsest layer uses 4x4 blks, lowermost layer/encode layer uses 16x16 */
4748
329k
    if(layer_id == n_layers - 1)
4749
108k
        return 4;
4750
220k
    else if((layer_id == 0) || (encode))
4751
108k
        return 16;
4752
4753
    /* Intermediate non encode layers use 8 */
4754
112k
    return 8;
4755
329k
}