/src/libhevc/encoder/hme_interface.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2018 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | |
21 | | /*****************************************************************************/ |
22 | | /* File Includes */ |
23 | | /*****************************************************************************/ |
24 | | /* System include files */ |
25 | | #include <stdio.h> |
26 | | #include <string.h> |
27 | | #include <stdlib.h> |
28 | | #include <assert.h> |
29 | | #include <stdarg.h> |
30 | | #include <math.h> |
31 | | #include <limits.h> |
32 | | |
33 | | /* User include files */ |
34 | | #include "ihevc_typedefs.h" |
35 | | #include "itt_video_api.h" |
36 | | #include "ihevce_api.h" |
37 | | |
38 | | #include "rc_cntrl_param.h" |
39 | | #include "rc_frame_info_collector.h" |
40 | | #include "rc_look_ahead_params.h" |
41 | | |
42 | | #include "ihevc_defs.h" |
43 | | #include "ihevc_structs.h" |
44 | | #include "ihevc_platform_macros.h" |
45 | | #include "ihevc_deblk.h" |
46 | | #include "ihevc_itrans_recon.h" |
47 | | #include "ihevc_chroma_itrans_recon.h" |
48 | | #include "ihevc_chroma_intra_pred.h" |
49 | | #include "ihevc_intra_pred.h" |
50 | | #include "ihevc_inter_pred.h" |
51 | | #include "ihevc_mem_fns.h" |
52 | | #include "ihevc_padding.h" |
53 | | #include "ihevc_weighted_pred.h" |
54 | | #include "ihevc_sao.h" |
55 | | #include "ihevc_resi_trans.h" |
56 | | #include "ihevc_quant_iquant_ssd.h" |
57 | | #include "ihevc_cabac_tables.h" |
58 | | |
59 | | #include "ihevce_defs.h" |
60 | | #include "ihevce_lap_enc_structs.h" |
61 | | #include "ihevce_multi_thrd_structs.h" |
62 | | #include "ihevce_multi_thrd_funcs.h" |
63 | | #include "ihevce_me_common_defs.h" |
64 | | #include "ihevce_had_satd.h" |
65 | | #include "ihevce_error_codes.h" |
66 | | #include "ihevce_bitstream.h" |
67 | | #include "ihevce_cabac.h" |
68 | | #include "ihevce_rdoq_macros.h" |
69 | | #include "ihevce_function_selector.h" |
70 | | #include "ihevce_enc_structs.h" |
71 | | #include "ihevce_entropy_structs.h" |
72 | | #include "ihevce_cmn_utils_instr_set_router.h" |
73 | | #include "ihevce_enc_loop_structs.h" |
74 | | #include "ihevce_bs_compute_ctb.h" |
75 | | #include "ihevce_global_tables.h" |
76 | | #include "ihevce_dep_mngr_interface.h" |
77 | | #include "hme_datatype.h" |
78 | | #include "hme_interface.h" |
79 | | #include "hme_common_defs.h" |
80 | | #include "hme_defs.h" |
81 | | #include "ihevce_me_instr_set_router.h" |
82 | | #include "hme_globals.h" |
83 | | #include "hme_utils.h" |
84 | | #include "hme_coarse.h" |
85 | | #include "hme_refine.h" |
86 | | #include "hme_err_compute.h" |
87 | | #include "hme_common_utils.h" |
88 | | #include "hme_search_algo.h" |
89 | | #include "ihevce_profile.h" |
90 | | |
91 | | /*****************************************************************************/ |
92 | | /* Function Definitions */ |
93 | | /*****************************************************************************/ |
94 | | |
95 | | void hme_init_globals() |
96 | 0 | { |
97 | 0 | GRID_PT_T id; |
98 | 0 | S32 i, j; |
99 | | /*************************************************************************/ |
100 | | /* Initialize the lookup table for x offset, y offset, optimized mask */ |
101 | | /* based on grid id. The design is as follows: */ |
102 | | /* */ |
103 | | /* a b c d */ |
104 | | /* TL T TR e */ |
105 | | /* L C R f */ |
106 | | /* BL B BR */ |
107 | | /* */ |
108 | | /* IF a non corner pt, like T is the new minima, then we need to */ |
109 | | /* evaluate only 3 new pts, in this case, a, b, c. So the optimal */ |
110 | | /* grid mask would reflect this. If a corner pt like TR is the new */ |
111 | | /* minima, then we need to evaluate 5 new pts, in this case, b, c, d, */ |
112 | | /* e and f. So the grid mask will have 5 pts enabled. */ |
113 | | /*************************************************************************/ |
114 | |
|
115 | 0 | id = PT_C; |
116 | 0 | gai4_opt_grid_mask[id] = GRID_ALL_PTS_VALID ^ (BIT_EN(PT_C)); |
117 | 0 | gai1_grid_id_to_x[id] = 0; |
118 | 0 | gai1_grid_id_to_y[id] = 0; |
119 | 0 | gai4_opt_grid_mask_diamond[id] = GRID_DIAMOND_ENABLE_ALL ^ (BIT_EN(PT_C)); |
120 | 0 | gai4_opt_grid_mask_conventional[id] = GRID_ALL_PTS_VALID ^ (BIT_EN(PT_C)); |
121 | |
|
122 | 0 | id = PT_L; |
123 | 0 | gai4_opt_grid_mask[id] = BIT_EN(PT_TL) | BIT_EN(PT_L) | BIT_EN(PT_BL); |
124 | 0 | gai1_grid_id_to_x[id] = -1; |
125 | 0 | gai1_grid_id_to_y[id] = 0; |
126 | 0 | gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_T) | BIT_EN(PT_L) | BIT_EN(PT_B); |
127 | 0 | gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_L) | BIT_EN(PT_B); |
128 | |
|
129 | 0 | id = PT_R; |
130 | 0 | gai4_opt_grid_mask[id] = BIT_EN(PT_TR) | BIT_EN(PT_R) | BIT_EN(PT_BR); |
131 | 0 | gai1_grid_id_to_x[id] = 1; |
132 | 0 | gai1_grid_id_to_y[id] = 0; |
133 | 0 | gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_T) | BIT_EN(PT_R) | BIT_EN(PT_B); |
134 | 0 | gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_R) | BIT_EN(PT_B); |
135 | |
|
136 | 0 | id = PT_T; |
137 | 0 | gai4_opt_grid_mask[id] = BIT_EN(PT_TL) | BIT_EN(PT_T) | BIT_EN(PT_TR); |
138 | 0 | gai1_grid_id_to_x[id] = 0; |
139 | 0 | gai1_grid_id_to_y[id] = -1; |
140 | 0 | gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_R) | BIT_EN(PT_L) | BIT_EN(PT_T); |
141 | 0 | gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_R) | BIT_EN(PT_L) | BIT_EN(PT_T); |
142 | |
|
143 | 0 | id = PT_B; |
144 | 0 | gai4_opt_grid_mask[id] = BIT_EN(PT_BL) | BIT_EN(PT_B) | BIT_EN(PT_BR); |
145 | 0 | gai1_grid_id_to_x[id] = 0; |
146 | 0 | gai1_grid_id_to_y[id] = 1; |
147 | 0 | gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_B) | BIT_EN(PT_L) | BIT_EN(PT_R); |
148 | 0 | gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_B) | BIT_EN(PT_L) | BIT_EN(PT_R); |
149 | |
|
150 | 0 | id = PT_TL; |
151 | 0 | gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_L] | gai4_opt_grid_mask[PT_T]; |
152 | 0 | gai1_grid_id_to_x[id] = -1; |
153 | 0 | gai1_grid_id_to_y[id] = -1; |
154 | 0 | gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_L); |
155 | |
|
156 | 0 | id = PT_TR; |
157 | 0 | gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_R] | gai4_opt_grid_mask[PT_T]; |
158 | 0 | gai1_grid_id_to_x[id] = 1; |
159 | 0 | gai1_grid_id_to_y[id] = -1; |
160 | 0 | gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_R); |
161 | |
|
162 | 0 | id = PT_BL; |
163 | 0 | gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_L] | gai4_opt_grid_mask[PT_B]; |
164 | 0 | gai1_grid_id_to_x[id] = -1; |
165 | 0 | gai1_grid_id_to_y[id] = 1; |
166 | 0 | gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_L) | BIT_EN(PT_B); |
167 | |
|
168 | 0 | id = PT_BR; |
169 | 0 | gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_R] | gai4_opt_grid_mask[PT_B]; |
170 | 0 | gai1_grid_id_to_x[id] = 1; |
171 | 0 | gai1_grid_id_to_y[id] = 1; |
172 | 0 | gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_R) | BIT_EN(PT_B); |
173 | |
|
174 | 0 | ge_part_id_to_blk_size[CU_8x8][PART_ID_2Nx2N] = BLK_8x8; |
175 | 0 | ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxN_T] = BLK_8x4; |
176 | 0 | ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxN_B] = BLK_8x4; |
177 | 0 | ge_part_id_to_blk_size[CU_8x8][PART_ID_Nx2N_L] = BLK_4x8; |
178 | 0 | ge_part_id_to_blk_size[CU_8x8][PART_ID_Nx2N_R] = BLK_4x8; |
179 | 0 | ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_TL] = BLK_4x4; |
180 | 0 | ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_TR] = BLK_4x4; |
181 | 0 | ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_BL] = BLK_4x4; |
182 | 0 | ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_BR] = BLK_4x4; |
183 | 0 | ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnU_T] = BLK_INVALID; |
184 | 0 | ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnU_B] = BLK_INVALID; |
185 | 0 | ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnD_T] = BLK_INVALID; |
186 | 0 | ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnD_B] = BLK_INVALID; |
187 | 0 | ge_part_id_to_blk_size[CU_8x8][PART_ID_nLx2N_L] = BLK_INVALID; |
188 | 0 | ge_part_id_to_blk_size[CU_8x8][PART_ID_nLx2N_R] = BLK_INVALID; |
189 | 0 | ge_part_id_to_blk_size[CU_8x8][PART_ID_nRx2N_L] = BLK_INVALID; |
190 | 0 | ge_part_id_to_blk_size[CU_8x8][PART_ID_nRx2N_R] = BLK_INVALID; |
191 | |
|
192 | 0 | ge_part_id_to_blk_size[CU_16x16][PART_ID_2Nx2N] = BLK_16x16; |
193 | 0 | ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxN_T] = BLK_16x8; |
194 | 0 | ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxN_B] = BLK_16x8; |
195 | 0 | ge_part_id_to_blk_size[CU_16x16][PART_ID_Nx2N_L] = BLK_8x16; |
196 | 0 | ge_part_id_to_blk_size[CU_16x16][PART_ID_Nx2N_R] = BLK_8x16; |
197 | 0 | ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_TL] = BLK_8x8; |
198 | 0 | ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_TR] = BLK_8x8; |
199 | 0 | ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_BL] = BLK_8x8; |
200 | 0 | ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_BR] = BLK_8x8; |
201 | 0 | ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnU_T] = BLK_16x4; |
202 | 0 | ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnU_B] = BLK_16x12; |
203 | 0 | ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnD_T] = BLK_16x12; |
204 | 0 | ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnD_B] = BLK_16x4; |
205 | 0 | ge_part_id_to_blk_size[CU_16x16][PART_ID_nLx2N_L] = BLK_4x16; |
206 | 0 | ge_part_id_to_blk_size[CU_16x16][PART_ID_nLx2N_R] = BLK_12x16; |
207 | 0 | ge_part_id_to_blk_size[CU_16x16][PART_ID_nRx2N_L] = BLK_12x16; |
208 | 0 | ge_part_id_to_blk_size[CU_16x16][PART_ID_nRx2N_R] = BLK_4x16; |
209 | |
|
210 | 0 | ge_part_id_to_blk_size[CU_32x32][PART_ID_2Nx2N] = BLK_32x32; |
211 | 0 | ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxN_T] = BLK_32x16; |
212 | 0 | ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxN_B] = BLK_32x16; |
213 | 0 | ge_part_id_to_blk_size[CU_32x32][PART_ID_Nx2N_L] = BLK_16x32; |
214 | 0 | ge_part_id_to_blk_size[CU_32x32][PART_ID_Nx2N_R] = BLK_16x32; |
215 | 0 | ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_TL] = BLK_16x16; |
216 | 0 | ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_TR] = BLK_16x16; |
217 | 0 | ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_BL] = BLK_16x16; |
218 | 0 | ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_BR] = BLK_16x16; |
219 | 0 | ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnU_T] = BLK_32x8; |
220 | 0 | ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnU_B] = BLK_32x24; |
221 | 0 | ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnD_T] = BLK_32x24; |
222 | 0 | ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnD_B] = BLK_32x8; |
223 | 0 | ge_part_id_to_blk_size[CU_32x32][PART_ID_nLx2N_L] = BLK_8x32; |
224 | 0 | ge_part_id_to_blk_size[CU_32x32][PART_ID_nLx2N_R] = BLK_24x32; |
225 | 0 | ge_part_id_to_blk_size[CU_32x32][PART_ID_nRx2N_L] = BLK_24x32; |
226 | 0 | ge_part_id_to_blk_size[CU_32x32][PART_ID_nRx2N_R] = BLK_8x32; |
227 | |
|
228 | 0 | ge_part_id_to_blk_size[CU_64x64][PART_ID_2Nx2N] = BLK_64x64; |
229 | 0 | ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxN_T] = BLK_64x32; |
230 | 0 | ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxN_B] = BLK_64x32; |
231 | 0 | ge_part_id_to_blk_size[CU_64x64][PART_ID_Nx2N_L] = BLK_32x64; |
232 | 0 | ge_part_id_to_blk_size[CU_64x64][PART_ID_Nx2N_R] = BLK_32x64; |
233 | 0 | ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_TL] = BLK_32x32; |
234 | 0 | ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_TR] = BLK_32x32; |
235 | 0 | ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_BL] = BLK_32x32; |
236 | 0 | ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_BR] = BLK_32x32; |
237 | 0 | ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnU_T] = BLK_64x16; |
238 | 0 | ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnU_B] = BLK_64x48; |
239 | 0 | ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnD_T] = BLK_64x48; |
240 | 0 | ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnD_B] = BLK_64x16; |
241 | 0 | ge_part_id_to_blk_size[CU_64x64][PART_ID_nLx2N_L] = BLK_16x64; |
242 | 0 | ge_part_id_to_blk_size[CU_64x64][PART_ID_nLx2N_R] = BLK_48x64; |
243 | 0 | ge_part_id_to_blk_size[CU_64x64][PART_ID_nRx2N_L] = BLK_48x64; |
244 | 0 | ge_part_id_to_blk_size[CU_64x64][PART_ID_nRx2N_R] = BLK_16x64; |
245 | |
|
246 | 0 | gau1_num_parts_in_part_type[PRT_2Nx2N] = 1; |
247 | 0 | gau1_num_parts_in_part_type[PRT_2NxN] = 2; |
248 | 0 | gau1_num_parts_in_part_type[PRT_Nx2N] = 2; |
249 | 0 | gau1_num_parts_in_part_type[PRT_NxN] = 4; |
250 | 0 | gau1_num_parts_in_part_type[PRT_2NxnU] = 2; |
251 | 0 | gau1_num_parts_in_part_type[PRT_2NxnD] = 2; |
252 | 0 | gau1_num_parts_in_part_type[PRT_nLx2N] = 2; |
253 | 0 | gau1_num_parts_in_part_type[PRT_nRx2N] = 2; |
254 | |
|
255 | 0 | for(i = 0; i < MAX_PART_TYPES; i++) |
256 | 0 | for(j = 0; j < MAX_NUM_PARTS; j++) |
257 | 0 | ge_part_type_to_part_id[i][j] = PART_ID_INVALID; |
258 | | |
259 | | /* 2Nx2N only one partition */ |
260 | 0 | ge_part_type_to_part_id[PRT_2Nx2N][0] = PART_ID_2Nx2N; |
261 | | |
262 | | /* 2NxN 2 partitions */ |
263 | 0 | ge_part_type_to_part_id[PRT_2NxN][0] = PART_ID_2NxN_T; |
264 | 0 | ge_part_type_to_part_id[PRT_2NxN][1] = PART_ID_2NxN_B; |
265 | | |
266 | | /* Nx2N 2 partitions */ |
267 | 0 | ge_part_type_to_part_id[PRT_Nx2N][0] = PART_ID_Nx2N_L; |
268 | 0 | ge_part_type_to_part_id[PRT_Nx2N][1] = PART_ID_Nx2N_R; |
269 | | |
270 | | /* NxN 4 partitions */ |
271 | 0 | ge_part_type_to_part_id[PRT_NxN][0] = PART_ID_NxN_TL; |
272 | 0 | ge_part_type_to_part_id[PRT_NxN][1] = PART_ID_NxN_TR; |
273 | 0 | ge_part_type_to_part_id[PRT_NxN][2] = PART_ID_NxN_BL; |
274 | 0 | ge_part_type_to_part_id[PRT_NxN][3] = PART_ID_NxN_BR; |
275 | | |
276 | | /* AMP 2Nx (N/2 + 3N/2) 2 partitions */ |
277 | 0 | ge_part_type_to_part_id[PRT_2NxnU][0] = PART_ID_2NxnU_T; |
278 | 0 | ge_part_type_to_part_id[PRT_2NxnU][1] = PART_ID_2NxnU_B; |
279 | | |
280 | | /* AMP 2Nx (3N/2 + N/2) 2 partitions */ |
281 | 0 | ge_part_type_to_part_id[PRT_2NxnD][0] = PART_ID_2NxnD_T; |
282 | 0 | ge_part_type_to_part_id[PRT_2NxnD][1] = PART_ID_2NxnD_B; |
283 | | |
284 | | /* AMP (N/2 + 3N/2) x 2N 2 partitions */ |
285 | 0 | ge_part_type_to_part_id[PRT_nLx2N][0] = PART_ID_nLx2N_L; |
286 | 0 | ge_part_type_to_part_id[PRT_nLx2N][1] = PART_ID_nLx2N_R; |
287 | | |
288 | | /* AMP (3N/2 + N/2) x 2N 2 partitions */ |
289 | 0 | ge_part_type_to_part_id[PRT_nRx2N][0] = PART_ID_nRx2N_L; |
290 | 0 | ge_part_type_to_part_id[PRT_nRx2N][1] = PART_ID_nRx2N_R; |
291 | | |
292 | | /*************************************************************************/ |
293 | | /* initialize attributes for each partition id within the cu. */ |
294 | | /*************************************************************************/ |
295 | 0 | { |
296 | 0 | part_attr_t *ps_part_attr; |
297 | |
|
298 | 0 | ps_part_attr = &gas_part_attr_in_cu[PART_ID_2Nx2N]; |
299 | 0 | ps_part_attr->u1_x_start = 0; |
300 | 0 | ps_part_attr->u1_y_start = 0; |
301 | 0 | ps_part_attr->u1_x_count = 8; |
302 | 0 | ps_part_attr->u1_y_count = 8; |
303 | |
|
304 | 0 | ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxN_T]; |
305 | 0 | ps_part_attr->u1_x_start = 0; |
306 | 0 | ps_part_attr->u1_y_start = 0; |
307 | 0 | ps_part_attr->u1_x_count = 8; |
308 | 0 | ps_part_attr->u1_y_count = 4; |
309 | |
|
310 | 0 | ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxN_B]; |
311 | 0 | ps_part_attr->u1_x_start = 0; |
312 | 0 | ps_part_attr->u1_y_start = 4; |
313 | 0 | ps_part_attr->u1_x_count = 8; |
314 | 0 | ps_part_attr->u1_y_count = 4; |
315 | |
|
316 | 0 | ps_part_attr = &gas_part_attr_in_cu[PART_ID_Nx2N_L]; |
317 | 0 | ps_part_attr->u1_x_start = 0; |
318 | 0 | ps_part_attr->u1_y_start = 0; |
319 | 0 | ps_part_attr->u1_x_count = 4; |
320 | 0 | ps_part_attr->u1_y_count = 8; |
321 | |
|
322 | 0 | ps_part_attr = &gas_part_attr_in_cu[PART_ID_Nx2N_R]; |
323 | 0 | ps_part_attr->u1_x_start = 4; |
324 | 0 | ps_part_attr->u1_y_start = 0; |
325 | 0 | ps_part_attr->u1_x_count = 4; |
326 | 0 | ps_part_attr->u1_y_count = 8; |
327 | |
|
328 | 0 | ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_TL]; |
329 | 0 | ps_part_attr->u1_x_start = 0; |
330 | 0 | ps_part_attr->u1_y_start = 0; |
331 | 0 | ps_part_attr->u1_x_count = 4; |
332 | 0 | ps_part_attr->u1_y_count = 4; |
333 | |
|
334 | 0 | ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_TR]; |
335 | 0 | ps_part_attr->u1_x_start = 4; |
336 | 0 | ps_part_attr->u1_y_start = 0; |
337 | 0 | ps_part_attr->u1_x_count = 4; |
338 | 0 | ps_part_attr->u1_y_count = 4; |
339 | |
|
340 | 0 | ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_BL]; |
341 | 0 | ps_part_attr->u1_x_start = 0; |
342 | 0 | ps_part_attr->u1_y_start = 4; |
343 | 0 | ps_part_attr->u1_x_count = 4; |
344 | 0 | ps_part_attr->u1_y_count = 4; |
345 | |
|
346 | 0 | ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_BR]; |
347 | 0 | ps_part_attr->u1_x_start = 4; |
348 | 0 | ps_part_attr->u1_y_start = 4; |
349 | 0 | ps_part_attr->u1_x_count = 4; |
350 | 0 | ps_part_attr->u1_y_count = 4; |
351 | |
|
352 | 0 | ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnU_T]; |
353 | 0 | ps_part_attr->u1_x_start = 0; |
354 | 0 | ps_part_attr->u1_y_start = 0; |
355 | 0 | ps_part_attr->u1_x_count = 8; |
356 | 0 | ps_part_attr->u1_y_count = 2; |
357 | |
|
358 | 0 | ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnU_B]; |
359 | 0 | ps_part_attr->u1_x_start = 0; |
360 | 0 | ps_part_attr->u1_y_start = 2; |
361 | 0 | ps_part_attr->u1_x_count = 8; |
362 | 0 | ps_part_attr->u1_y_count = 6; |
363 | |
|
364 | 0 | ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnD_T]; |
365 | 0 | ps_part_attr->u1_x_start = 0; |
366 | 0 | ps_part_attr->u1_y_start = 0; |
367 | 0 | ps_part_attr->u1_x_count = 8; |
368 | 0 | ps_part_attr->u1_y_count = 6; |
369 | |
|
370 | 0 | ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnD_B]; |
371 | 0 | ps_part_attr->u1_x_start = 0; |
372 | 0 | ps_part_attr->u1_y_start = 6; |
373 | 0 | ps_part_attr->u1_x_count = 8; |
374 | 0 | ps_part_attr->u1_y_count = 2; |
375 | |
|
376 | 0 | ps_part_attr = &gas_part_attr_in_cu[PART_ID_nLx2N_L]; |
377 | 0 | ps_part_attr->u1_x_start = 0; |
378 | 0 | ps_part_attr->u1_y_start = 0; |
379 | 0 | ps_part_attr->u1_x_count = 2; |
380 | 0 | ps_part_attr->u1_y_count = 8; |
381 | |
|
382 | 0 | ps_part_attr = &gas_part_attr_in_cu[PART_ID_nLx2N_R]; |
383 | 0 | ps_part_attr->u1_x_start = 2; |
384 | 0 | ps_part_attr->u1_y_start = 0; |
385 | 0 | ps_part_attr->u1_x_count = 6; |
386 | 0 | ps_part_attr->u1_y_count = 8; |
387 | |
|
388 | 0 | ps_part_attr = &gas_part_attr_in_cu[PART_ID_nRx2N_L]; |
389 | 0 | ps_part_attr->u1_x_start = 0; |
390 | 0 | ps_part_attr->u1_y_start = 0; |
391 | 0 | ps_part_attr->u1_x_count = 6; |
392 | 0 | ps_part_attr->u1_y_count = 8; |
393 | |
|
394 | 0 | ps_part_attr = &gas_part_attr_in_cu[PART_ID_nRx2N_R]; |
395 | 0 | ps_part_attr->u1_x_start = 6; |
396 | 0 | ps_part_attr->u1_y_start = 0; |
397 | 0 | ps_part_attr->u1_x_count = 2; |
398 | 0 | ps_part_attr->u1_y_count = 8; |
399 | 0 | } |
400 | 0 | for(i = 0; i < NUM_BLK_SIZES; i++) |
401 | 0 | ge_blk_size_to_cu_size[i] = CU_INVALID; |
402 | |
|
403 | 0 | ge_blk_size_to_cu_size[BLK_8x8] = CU_8x8; |
404 | 0 | ge_blk_size_to_cu_size[BLK_16x16] = CU_16x16; |
405 | 0 | ge_blk_size_to_cu_size[BLK_32x32] = CU_32x32; |
406 | 0 | ge_blk_size_to_cu_size[BLK_64x64] = CU_64x64; |
407 | | |
408 | | /* This is the reverse, given cU size, get blk size */ |
409 | 0 | ge_cu_size_to_blk_size[CU_8x8] = BLK_8x8; |
410 | 0 | ge_cu_size_to_blk_size[CU_16x16] = BLK_16x16; |
411 | 0 | ge_cu_size_to_blk_size[CU_32x32] = BLK_32x32; |
412 | 0 | ge_cu_size_to_blk_size[CU_64x64] = BLK_64x64; |
413 | |
|
414 | 0 | gau1_is_vert_part[PRT_2Nx2N] = 0; |
415 | 0 | gau1_is_vert_part[PRT_2NxN] = 0; |
416 | 0 | gau1_is_vert_part[PRT_Nx2N] = 1; |
417 | 0 | gau1_is_vert_part[PRT_NxN] = 1; |
418 | 0 | gau1_is_vert_part[PRT_2NxnU] = 0; |
419 | 0 | gau1_is_vert_part[PRT_2NxnD] = 0; |
420 | 0 | gau1_is_vert_part[PRT_nLx2N] = 1; |
421 | 0 | gau1_is_vert_part[PRT_nRx2N] = 1; |
422 | | |
423 | | /* Initialise the number of best results for the full pell refinement */ |
424 | 0 | gau1_num_best_results_PQ[PART_ID_2Nx2N] = 2; |
425 | 0 | gau1_num_best_results_PQ[PART_ID_2NxN_T] = 0; |
426 | 0 | gau1_num_best_results_PQ[PART_ID_2NxN_B] = 0; |
427 | 0 | gau1_num_best_results_PQ[PART_ID_Nx2N_L] = 0; |
428 | 0 | gau1_num_best_results_PQ[PART_ID_Nx2N_R] = 0; |
429 | 0 | gau1_num_best_results_PQ[PART_ID_NxN_TL] = 1; |
430 | 0 | gau1_num_best_results_PQ[PART_ID_NxN_TR] = 1; |
431 | 0 | gau1_num_best_results_PQ[PART_ID_NxN_BL] = 1; |
432 | 0 | gau1_num_best_results_PQ[PART_ID_NxN_BR] = 1; |
433 | 0 | gau1_num_best_results_PQ[PART_ID_2NxnU_T] = 1; |
434 | 0 | gau1_num_best_results_PQ[PART_ID_2NxnU_B] = 0; |
435 | 0 | gau1_num_best_results_PQ[PART_ID_2NxnD_T] = 0; |
436 | 0 | gau1_num_best_results_PQ[PART_ID_2NxnD_B] = 1; |
437 | 0 | gau1_num_best_results_PQ[PART_ID_nLx2N_L] = 1; |
438 | 0 | gau1_num_best_results_PQ[PART_ID_nLx2N_R] = 0; |
439 | 0 | gau1_num_best_results_PQ[PART_ID_nRx2N_L] = 0; |
440 | 0 | gau1_num_best_results_PQ[PART_ID_nRx2N_R] = 1; |
441 | |
|
442 | 0 | gau1_num_best_results_HQ[PART_ID_2Nx2N] = 2; |
443 | 0 | gau1_num_best_results_HQ[PART_ID_2NxN_T] = 0; |
444 | 0 | gau1_num_best_results_HQ[PART_ID_2NxN_B] = 0; |
445 | 0 | gau1_num_best_results_HQ[PART_ID_Nx2N_L] = 0; |
446 | 0 | gau1_num_best_results_HQ[PART_ID_Nx2N_R] = 0; |
447 | 0 | gau1_num_best_results_HQ[PART_ID_NxN_TL] = 1; |
448 | 0 | gau1_num_best_results_HQ[PART_ID_NxN_TR] = 1; |
449 | 0 | gau1_num_best_results_HQ[PART_ID_NxN_BL] = 1; |
450 | 0 | gau1_num_best_results_HQ[PART_ID_NxN_BR] = 1; |
451 | 0 | gau1_num_best_results_HQ[PART_ID_2NxnU_T] = 1; |
452 | 0 | gau1_num_best_results_HQ[PART_ID_2NxnU_B] = 0; |
453 | 0 | gau1_num_best_results_HQ[PART_ID_2NxnD_T] = 0; |
454 | 0 | gau1_num_best_results_HQ[PART_ID_2NxnD_B] = 1; |
455 | 0 | gau1_num_best_results_HQ[PART_ID_nLx2N_L] = 1; |
456 | 0 | gau1_num_best_results_HQ[PART_ID_nLx2N_R] = 0; |
457 | 0 | gau1_num_best_results_HQ[PART_ID_nRx2N_L] = 0; |
458 | 0 | gau1_num_best_results_HQ[PART_ID_nRx2N_R] = 1; |
459 | |
|
460 | 0 | gau1_num_best_results_MS[PART_ID_2Nx2N] = 2; |
461 | 0 | gau1_num_best_results_MS[PART_ID_2NxN_T] = 0; |
462 | 0 | gau1_num_best_results_MS[PART_ID_2NxN_B] = 0; |
463 | 0 | gau1_num_best_results_MS[PART_ID_Nx2N_L] = 0; |
464 | 0 | gau1_num_best_results_MS[PART_ID_Nx2N_R] = 0; |
465 | 0 | gau1_num_best_results_MS[PART_ID_NxN_TL] = 1; |
466 | 0 | gau1_num_best_results_MS[PART_ID_NxN_TR] = 1; |
467 | 0 | gau1_num_best_results_MS[PART_ID_NxN_BL] = 1; |
468 | 0 | gau1_num_best_results_MS[PART_ID_NxN_BR] = 1; |
469 | 0 | gau1_num_best_results_MS[PART_ID_2NxnU_T] = 1; |
470 | 0 | gau1_num_best_results_MS[PART_ID_2NxnU_B] = 0; |
471 | 0 | gau1_num_best_results_MS[PART_ID_2NxnD_T] = 0; |
472 | 0 | gau1_num_best_results_MS[PART_ID_2NxnD_B] = 1; |
473 | 0 | gau1_num_best_results_MS[PART_ID_nLx2N_L] = 1; |
474 | 0 | gau1_num_best_results_MS[PART_ID_nLx2N_R] = 0; |
475 | 0 | gau1_num_best_results_MS[PART_ID_nRx2N_L] = 0; |
476 | 0 | gau1_num_best_results_MS[PART_ID_nRx2N_R] = 1; |
477 | |
|
478 | 0 | gau1_num_best_results_HS[PART_ID_2Nx2N] = 2; |
479 | 0 | gau1_num_best_results_HS[PART_ID_2NxN_T] = 0; |
480 | 0 | gau1_num_best_results_HS[PART_ID_2NxN_B] = 0; |
481 | 0 | gau1_num_best_results_HS[PART_ID_Nx2N_L] = 0; |
482 | 0 | gau1_num_best_results_HS[PART_ID_Nx2N_R] = 0; |
483 | 0 | gau1_num_best_results_HS[PART_ID_NxN_TL] = 0; |
484 | 0 | gau1_num_best_results_HS[PART_ID_NxN_TR] = 0; |
485 | 0 | gau1_num_best_results_HS[PART_ID_NxN_BL] = 0; |
486 | 0 | gau1_num_best_results_HS[PART_ID_NxN_BR] = 0; |
487 | 0 | gau1_num_best_results_HS[PART_ID_2NxnU_T] = 0; |
488 | 0 | gau1_num_best_results_HS[PART_ID_2NxnU_B] = 0; |
489 | 0 | gau1_num_best_results_HS[PART_ID_2NxnD_T] = 0; |
490 | 0 | gau1_num_best_results_HS[PART_ID_2NxnD_B] = 0; |
491 | 0 | gau1_num_best_results_HS[PART_ID_nLx2N_L] = 0; |
492 | 0 | gau1_num_best_results_HS[PART_ID_nLx2N_R] = 0; |
493 | 0 | gau1_num_best_results_HS[PART_ID_nRx2N_L] = 0; |
494 | 0 | gau1_num_best_results_HS[PART_ID_nRx2N_R] = 0; |
495 | |
|
496 | 0 | gau1_num_best_results_XS[PART_ID_2Nx2N] = 2; |
497 | 0 | gau1_num_best_results_XS[PART_ID_2NxN_T] = 0; |
498 | 0 | gau1_num_best_results_XS[PART_ID_2NxN_B] = 0; |
499 | 0 | gau1_num_best_results_XS[PART_ID_Nx2N_L] = 0; |
500 | 0 | gau1_num_best_results_XS[PART_ID_Nx2N_R] = 0; |
501 | 0 | gau1_num_best_results_XS[PART_ID_NxN_TL] = 0; |
502 | 0 | gau1_num_best_results_XS[PART_ID_NxN_TR] = 0; |
503 | 0 | gau1_num_best_results_XS[PART_ID_NxN_BL] = 0; |
504 | 0 | gau1_num_best_results_XS[PART_ID_NxN_BR] = 0; |
505 | 0 | gau1_num_best_results_XS[PART_ID_2NxnU_T] = 0; |
506 | 0 | gau1_num_best_results_XS[PART_ID_2NxnU_B] = 0; |
507 | 0 | gau1_num_best_results_XS[PART_ID_2NxnD_T] = 0; |
508 | 0 | gau1_num_best_results_XS[PART_ID_2NxnD_B] = 0; |
509 | 0 | gau1_num_best_results_XS[PART_ID_nLx2N_L] = 0; |
510 | 0 | gau1_num_best_results_XS[PART_ID_nLx2N_R] = 0; |
511 | 0 | gau1_num_best_results_XS[PART_ID_nRx2N_L] = 0; |
512 | 0 | gau1_num_best_results_XS[PART_ID_nRx2N_R] = 0; |
513 | |
|
514 | 0 | gau1_num_best_results_XS25[PART_ID_2Nx2N] = MAX_NUM_CANDS_FOR_FPEL_REFINE_IN_XS25; |
515 | 0 | gau1_num_best_results_XS25[PART_ID_2NxN_T] = 0; |
516 | 0 | gau1_num_best_results_XS25[PART_ID_2NxN_B] = 0; |
517 | 0 | gau1_num_best_results_XS25[PART_ID_Nx2N_L] = 0; |
518 | 0 | gau1_num_best_results_XS25[PART_ID_Nx2N_R] = 0; |
519 | 0 | gau1_num_best_results_XS25[PART_ID_NxN_TL] = 0; |
520 | 0 | gau1_num_best_results_XS25[PART_ID_NxN_TR] = 0; |
521 | 0 | gau1_num_best_results_XS25[PART_ID_NxN_BL] = 0; |
522 | 0 | gau1_num_best_results_XS25[PART_ID_NxN_BR] = 0; |
523 | 0 | gau1_num_best_results_XS25[PART_ID_2NxnU_T] = 0; |
524 | 0 | gau1_num_best_results_XS25[PART_ID_2NxnU_B] = 0; |
525 | 0 | gau1_num_best_results_XS25[PART_ID_2NxnD_T] = 0; |
526 | 0 | gau1_num_best_results_XS25[PART_ID_2NxnD_B] = 0; |
527 | 0 | gau1_num_best_results_XS25[PART_ID_nLx2N_L] = 0; |
528 | 0 | gau1_num_best_results_XS25[PART_ID_nLx2N_R] = 0; |
529 | 0 | gau1_num_best_results_XS25[PART_ID_nRx2N_L] = 0; |
530 | 0 | gau1_num_best_results_XS25[PART_ID_nRx2N_R] = 0; |
531 | | |
532 | | /* Top right validity for each part id */ |
533 | 0 | gau1_partid_tr_valid[PART_ID_2Nx2N] = 1; |
534 | 0 | gau1_partid_tr_valid[PART_ID_2NxN_T] = 1; |
535 | 0 | gau1_partid_tr_valid[PART_ID_2NxN_B] = 0; |
536 | 0 | gau1_partid_tr_valid[PART_ID_Nx2N_L] = 1; |
537 | 0 | gau1_partid_tr_valid[PART_ID_Nx2N_R] = 1; |
538 | 0 | gau1_partid_tr_valid[PART_ID_NxN_TL] = 1; |
539 | 0 | gau1_partid_tr_valid[PART_ID_NxN_TR] = 1; |
540 | 0 | gau1_partid_tr_valid[PART_ID_NxN_BL] = 1; |
541 | 0 | gau1_partid_tr_valid[PART_ID_NxN_BR] = 0; |
542 | 0 | gau1_partid_tr_valid[PART_ID_2NxnU_T] = 1; |
543 | 0 | gau1_partid_tr_valid[PART_ID_2NxnU_B] = 0; |
544 | 0 | gau1_partid_tr_valid[PART_ID_2NxnD_T] = 1; |
545 | 0 | gau1_partid_tr_valid[PART_ID_2NxnD_B] = 0; |
546 | 0 | gau1_partid_tr_valid[PART_ID_nLx2N_L] = 1; |
547 | 0 | gau1_partid_tr_valid[PART_ID_nLx2N_R] = 1; |
548 | 0 | gau1_partid_tr_valid[PART_ID_nRx2N_L] = 1; |
549 | 0 | gau1_partid_tr_valid[PART_ID_nRx2N_R] = 1; |
550 | | |
551 | | /* Bot Left validity for each part id */ |
552 | 0 | gau1_partid_bl_valid[PART_ID_2Nx2N] = 1; |
553 | 0 | gau1_partid_bl_valid[PART_ID_2NxN_T] = 1; |
554 | 0 | gau1_partid_bl_valid[PART_ID_2NxN_B] = 1; |
555 | 0 | gau1_partid_bl_valid[PART_ID_Nx2N_L] = 1; |
556 | 0 | gau1_partid_bl_valid[PART_ID_Nx2N_R] = 0; |
557 | 0 | gau1_partid_bl_valid[PART_ID_NxN_TL] = 1; |
558 | 0 | gau1_partid_bl_valid[PART_ID_NxN_TR] = 0; |
559 | 0 | gau1_partid_bl_valid[PART_ID_NxN_BL] = 1; |
560 | 0 | gau1_partid_bl_valid[PART_ID_NxN_BR] = 0; |
561 | 0 | gau1_partid_bl_valid[PART_ID_2NxnU_T] = 1; |
562 | 0 | gau1_partid_bl_valid[PART_ID_2NxnU_B] = 1; |
563 | 0 | gau1_partid_bl_valid[PART_ID_2NxnD_T] = 1; |
564 | 0 | gau1_partid_bl_valid[PART_ID_2NxnD_B] = 1; |
565 | 0 | gau1_partid_bl_valid[PART_ID_nLx2N_L] = 1; |
566 | 0 | gau1_partid_bl_valid[PART_ID_nLx2N_R] = 0; |
567 | 0 | gau1_partid_bl_valid[PART_ID_nRx2N_L] = 1; |
568 | 0 | gau1_partid_bl_valid[PART_ID_nRx2N_R] = 0; |
569 | | |
570 | | /*Part id to part num of this partition id in the CU */ |
571 | 0 | gau1_part_id_to_part_num[PART_ID_2Nx2N] = 0; |
572 | 0 | gau1_part_id_to_part_num[PART_ID_2NxN_T] = 0; |
573 | 0 | gau1_part_id_to_part_num[PART_ID_2NxN_B] = 1; |
574 | 0 | gau1_part_id_to_part_num[PART_ID_Nx2N_L] = 0; |
575 | 0 | gau1_part_id_to_part_num[PART_ID_Nx2N_R] = 1; |
576 | 0 | gau1_part_id_to_part_num[PART_ID_NxN_TL] = 0; |
577 | 0 | gau1_part_id_to_part_num[PART_ID_NxN_TR] = 1; |
578 | 0 | gau1_part_id_to_part_num[PART_ID_NxN_BL] = 2; |
579 | 0 | gau1_part_id_to_part_num[PART_ID_NxN_BR] = 3; |
580 | 0 | gau1_part_id_to_part_num[PART_ID_2NxnU_T] = 0; |
581 | 0 | gau1_part_id_to_part_num[PART_ID_2NxnU_B] = 1; |
582 | 0 | gau1_part_id_to_part_num[PART_ID_2NxnD_T] = 0; |
583 | 0 | gau1_part_id_to_part_num[PART_ID_2NxnD_B] = 1; |
584 | 0 | gau1_part_id_to_part_num[PART_ID_nLx2N_L] = 0; |
585 | 0 | gau1_part_id_to_part_num[PART_ID_nLx2N_R] = 1; |
586 | 0 | gau1_part_id_to_part_num[PART_ID_nRx2N_L] = 0; |
587 | 0 | gau1_part_id_to_part_num[PART_ID_nRx2N_R] = 1; |
588 | | |
589 | | /*Which partition type does this partition id belong to */ |
590 | 0 | ge_part_id_to_part_type[PART_ID_2Nx2N] = PRT_2Nx2N; |
591 | 0 | ge_part_id_to_part_type[PART_ID_2NxN_T] = PRT_2NxN; |
592 | 0 | ge_part_id_to_part_type[PART_ID_2NxN_B] = PRT_2NxN; |
593 | 0 | ge_part_id_to_part_type[PART_ID_Nx2N_L] = PRT_Nx2N; |
594 | 0 | ge_part_id_to_part_type[PART_ID_Nx2N_R] = PRT_Nx2N; |
595 | 0 | ge_part_id_to_part_type[PART_ID_NxN_TL] = PRT_NxN; |
596 | 0 | ge_part_id_to_part_type[PART_ID_NxN_TR] = PRT_NxN; |
597 | 0 | ge_part_id_to_part_type[PART_ID_NxN_BL] = PRT_NxN; |
598 | 0 | ge_part_id_to_part_type[PART_ID_NxN_BR] = PRT_NxN; |
599 | 0 | ge_part_id_to_part_type[PART_ID_2NxnU_T] = PRT_2NxnU; |
600 | 0 | ge_part_id_to_part_type[PART_ID_2NxnU_B] = PRT_2NxnU; |
601 | 0 | ge_part_id_to_part_type[PART_ID_2NxnD_T] = PRT_2NxnD; |
602 | 0 | ge_part_id_to_part_type[PART_ID_2NxnD_B] = PRT_2NxnD; |
603 | 0 | ge_part_id_to_part_type[PART_ID_nLx2N_L] = PRT_nLx2N; |
604 | 0 | ge_part_id_to_part_type[PART_ID_nLx2N_R] = PRT_nLx2N; |
605 | 0 | ge_part_id_to_part_type[PART_ID_nRx2N_L] = PRT_nRx2N; |
606 | 0 | ge_part_id_to_part_type[PART_ID_nRx2N_R] = PRT_nRx2N; |
607 | | |
608 | | /*************************************************************************/ |
609 | | /* Set up the bits to be taken up for the part type. This is equally */ |
610 | | /* divided up between the various partitions in the part-type. */ |
611 | | /* For NxN @ CU 16x16, we assume it as CU 8x8, so consider it as */ |
612 | | /* partition 2Nx2N. */ |
613 | | /*************************************************************************/ |
614 | | /* 1 bit for 2Nx2N partition */ |
615 | 0 | gau1_bits_for_part_id_q1[PART_ID_2Nx2N] = 2; |
616 | | |
617 | | /* 3 bits for symmetric part types, so 1.5 bits per partition */ |
618 | 0 | gau1_bits_for_part_id_q1[PART_ID_2NxN_T] = 3; |
619 | 0 | gau1_bits_for_part_id_q1[PART_ID_2NxN_B] = 3; |
620 | 0 | gau1_bits_for_part_id_q1[PART_ID_Nx2N_L] = 3; |
621 | 0 | gau1_bits_for_part_id_q1[PART_ID_Nx2N_R] = 3; |
622 | | |
623 | | /* 1 bit for NxN partitions, assuming these to be 2Nx2N CUs of lower level */ |
624 | 0 | gau1_bits_for_part_id_q1[PART_ID_NxN_TL] = 2; |
625 | 0 | gau1_bits_for_part_id_q1[PART_ID_NxN_TR] = 2; |
626 | 0 | gau1_bits_for_part_id_q1[PART_ID_NxN_BL] = 2; |
627 | 0 | gau1_bits_for_part_id_q1[PART_ID_NxN_BR] = 2; |
628 | | |
629 | | /* 4 bits for AMP so 2 bits per partition */ |
630 | 0 | gau1_bits_for_part_id_q1[PART_ID_2NxnU_T] = 4; |
631 | 0 | gau1_bits_for_part_id_q1[PART_ID_2NxnU_B] = 4; |
632 | 0 | gau1_bits_for_part_id_q1[PART_ID_2NxnD_T] = 4; |
633 | 0 | gau1_bits_for_part_id_q1[PART_ID_2NxnD_B] = 4; |
634 | 0 | gau1_bits_for_part_id_q1[PART_ID_nLx2N_L] = 4; |
635 | 0 | gau1_bits_for_part_id_q1[PART_ID_nLx2N_R] = 4; |
636 | 0 | gau1_bits_for_part_id_q1[PART_ID_nRx2N_L] = 4; |
637 | 0 | gau1_bits_for_part_id_q1[PART_ID_nRx2N_R] = 4; |
638 | 0 | } |
639 | | |
640 | | /** |
641 | | ******************************************************************************** |
642 | | * @fn hme_enc_num_alloc() |
643 | | * |
644 | | * @brief returns number of memtabs that is required by hme module |
645 | | * |
646 | | * @return Number of memtabs required |
647 | | ******************************************************************************** |
648 | | */ |
649 | | S32 hme_enc_num_alloc(WORD32 i4_num_me_frm_pllel) |
650 | 0 | { |
651 | 0 | if(i4_num_me_frm_pllel > 1) |
652 | 0 | { |
653 | 0 | return ((S32)MAX_HME_ENC_TOT_MEMTABS); |
654 | 0 | } |
655 | 0 | else |
656 | 0 | { |
657 | 0 | return ((S32)MIN_HME_ENC_TOT_MEMTABS); |
658 | 0 | } |
659 | 0 | } |
660 | | |
661 | | /** |
662 | | ******************************************************************************** |
663 | | * @fn hme_coarse_num_alloc() |
664 | | * |
665 | | * @brief returns number of memtabs that is required by hme module |
666 | | * |
667 | | * @return Number of memtabs required |
668 | | ******************************************************************************** |
669 | | */ |
670 | | S32 hme_coarse_num_alloc() |
671 | 0 | { |
672 | 0 | return ((S32)HME_COARSE_TOT_MEMTABS); |
673 | 0 | } |
674 | | |
675 | | /** |
676 | | ******************************************************************************** |
677 | | * @fn hme_coarse_dep_mngr_num_alloc() |
678 | | * |
679 | | * @brief returns number of memtabs that is required by Dep Mngr for hme module |
680 | | * |
681 | | * @return Number of memtabs required |
682 | | ******************************************************************************** |
683 | | */ |
684 | | WORD32 hme_coarse_dep_mngr_num_alloc() |
685 | 0 | { |
686 | 0 | return ((WORD32)((MAX_NUM_HME_LAYERS - 1) * ihevce_dmgr_get_num_mem_recs())); |
687 | 0 | } |
688 | | |
689 | | S32 hme_validate_init_prms(hme_init_prms_t *ps_prms) |
690 | 0 | { |
691 | 0 | S32 n_layers = ps_prms->num_simulcast_layers; |
692 | | |
693 | | /* The final layer has got to be a non encode coarse layer */ |
694 | 0 | if(n_layers > (MAX_NUM_LAYERS - 1)) |
695 | 0 | return (-1); |
696 | | |
697 | 0 | if(n_layers < 1) |
698 | 0 | return (-1); |
699 | | |
700 | | /* Width of the coarsest encode layer got to be >= 2*min_wd where min_Wd */ |
701 | | /* represents the min allowed width in any layer. Ditto with ht */ |
702 | 0 | if(ps_prms->a_wd[n_layers - 1] < 2 * (MIN_WD_COARSE)) |
703 | 0 | return (-1); |
704 | 0 | if(ps_prms->a_ht[n_layers - 1] < 2 * (MIN_HT_COARSE)) |
705 | 0 | return (-1); |
706 | 0 | if(ps_prms->max_num_ref > MAX_NUM_REF) |
707 | 0 | return (-1); |
708 | 0 | if(ps_prms->max_num_ref < 0) |
709 | 0 | return (-1); |
710 | | |
711 | 0 | return (0); |
712 | 0 | } |
713 | | void hme_set_layer_res_attrs( |
714 | | layer_ctxt_t *ps_layer, S32 wd, S32 ht, S32 disp_wd, S32 disp_ht, U08 u1_enc) |
715 | 0 | { |
716 | 0 | ps_layer->i4_wd = wd; |
717 | 0 | ps_layer->i4_ht = ht; |
718 | 0 | ps_layer->i4_disp_wd = disp_wd; |
719 | 0 | ps_layer->i4_disp_ht = disp_ht; |
720 | 0 | if(0 == u1_enc) |
721 | 0 | { |
722 | 0 | ps_layer->i4_inp_stride = wd + 32 + 4; |
723 | 0 | ps_layer->i4_inp_offset = (ps_layer->i4_inp_stride * 16) + 16; |
724 | 0 | ps_layer->i4_pad_x_inp = 16; |
725 | 0 | ps_layer->i4_pad_y_inp = 16; |
726 | 0 | ps_layer->pu1_inp = ps_layer->pu1_inp_base + ps_layer->i4_inp_offset; |
727 | 0 | } |
728 | 0 | } |
729 | | |
730 | | /** |
731 | | ******************************************************************************** |
732 | | * @fn hme_coarse_get_layer1_mv_bank_ref_idx_size() |
733 | | * |
734 | | * @brief returns the MV bank and ref idx size of Layer 1 (penultimate) |
735 | | * |
736 | | * @return none |
737 | | ******************************************************************************** |
738 | | */ |
739 | | void hme_coarse_get_layer1_mv_bank_ref_idx_size( |
740 | | S32 n_tot_layers, |
741 | | S32 *a_wd, |
742 | | S32 *a_ht, |
743 | | S32 max_num_ref, |
744 | | S32 *pi4_mv_bank_size, |
745 | | S32 *pi4_ref_idx_size) |
746 | 0 | { |
747 | 0 | S32 num_blks, num_mvs_per_blk, num_ref; |
748 | 0 | S32 num_cols, num_rows, num_mvs_per_row; |
749 | 0 | S32 is_explicit_store = 1; |
750 | 0 | S32 wd, ht, num_layers_explicit_search; |
751 | 0 | S32 num_results, use_4x4; |
752 | 0 | wd = a_wd[1]; |
753 | 0 | ht = a_ht[1]; |
754 | | |
755 | | /* Assuming abt 4 layers for 1080p, we do explicit search across all ref */ |
756 | | /* frames in all but final layer In final layer, it could be 1/2 */ |
757 | | //ps_hme_init_prms->num_layers_explicit_search = 3; |
758 | 0 | num_layers_explicit_search = 3; |
759 | |
|
760 | 0 | if(num_layers_explicit_search <= 0) |
761 | 0 | num_layers_explicit_search = n_tot_layers - 1; |
762 | |
|
763 | 0 | num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1); |
764 | | |
765 | | /* Possibly implicit search for lower (finer) layers */ |
766 | 0 | if(n_tot_layers - 1 > num_layers_explicit_search) |
767 | 0 | is_explicit_store = 0; |
768 | | |
769 | | /* coarsest layer alwasy uses 4x4 blks to store results */ |
770 | 0 | if(1 == (n_tot_layers - 1)) |
771 | 0 | { |
772 | | /* we store 4 results in coarsest layer per blk. 8x4L, 8x4R, 4x8T, 4x8B */ |
773 | | //ps_hme_init_prms->max_num_results_coarse = 4; |
774 | | //vijay : with new algo in coarseset layer this has to be revisited |
775 | 0 | num_results = 4; |
776 | 0 | } |
777 | 0 | else |
778 | 0 | { |
779 | | /* Every refinement layer stores a max of 2 results per partition */ |
780 | | //ps_hme_init_prms->max_num_results = 2; |
781 | 0 | num_results = 2; |
782 | 0 | } |
783 | 0 | use_4x4 = hme_get_mv_blk_size(1, 1, n_tot_layers, 0); |
784 | |
|
785 | 0 | num_cols = use_4x4 ? ((wd >> 2) + 2) : ((wd >> 3) + 2); |
786 | 0 | num_rows = use_4x4 ? ((ht >> 2) + 2) : ((ht >> 3) + 2); |
787 | |
|
788 | 0 | if(is_explicit_store) |
789 | 0 | num_ref = max_num_ref; |
790 | 0 | else |
791 | 0 | num_ref = 2; |
792 | |
|
793 | 0 | num_blks = num_cols * num_rows; |
794 | 0 | num_mvs_per_blk = num_ref * num_results; |
795 | 0 | num_mvs_per_row = num_mvs_per_blk * num_cols; |
796 | | |
797 | | /* stroe the sizes */ |
798 | 0 | *pi4_mv_bank_size = num_blks * num_mvs_per_blk * sizeof(hme_mv_t); |
799 | 0 | *pi4_ref_idx_size = num_blks * num_mvs_per_blk * sizeof(S08); |
800 | |
|
801 | 0 | return; |
802 | 0 | } |
803 | | /** |
804 | | ******************************************************************************** |
805 | | * @fn hme_alloc_init_layer_mv_bank() |
806 | | * |
807 | | * @brief memory alloc and init function for MV bank |
808 | | * |
809 | | * @return Number of memtabs required |
810 | | ******************************************************************************** |
811 | | */ |
812 | | S32 hme_alloc_init_layer_mv_bank( |
813 | | hme_memtab_t *ps_memtab, |
814 | | S32 max_num_results, |
815 | | S32 max_num_ref, |
816 | | S32 use_4x4, |
817 | | S32 mem_avail, |
818 | | S32 u1_enc, |
819 | | S32 wd, |
820 | | S32 ht, |
821 | | S32 is_explicit_store, |
822 | | hme_mv_t **pps_mv_base, |
823 | | S08 **pi1_ref_idx_base, |
824 | | S32 *pi4_num_mvs_per_row) |
825 | 0 | { |
826 | 0 | S32 count = 0; |
827 | 0 | S32 size; |
828 | 0 | S32 num_blks, num_mvs_per_blk; |
829 | 0 | S32 num_ref; |
830 | 0 | S32 num_cols, num_rows, num_mvs_per_row; |
831 | |
|
832 | 0 | if(is_explicit_store) |
833 | 0 | num_ref = max_num_ref; |
834 | 0 | else |
835 | 0 | num_ref = 2; |
836 | | |
837 | | /* MV Bank allocation takes into consideration following */ |
838 | | /* number of results per reference x max num refrences is the amount */ |
839 | | /* bufffered up per blk. Numbero f blks in pic deps on the blk size, */ |
840 | | /* which could be either 4x4 or 8x8. */ |
841 | 0 | num_cols = use_4x4 ? ((wd >> 2) + 2) : ((wd >> 3) + 2); |
842 | 0 | num_rows = use_4x4 ? ((ht >> 2) + 2) : ((ht >> 3) + 2); |
843 | |
|
844 | 0 | if(u1_enc) |
845 | 0 | { |
846 | | /* TODO: CTB64x64 is assumed. FIX according to actual CTB */ |
847 | 0 | WORD32 num_ctb_cols = ((wd + 63) >> 6); |
848 | 0 | WORD32 num_ctb_rows = ((ht + 63) >> 6); |
849 | |
|
850 | 0 | num_cols = (num_ctb_cols << 3) + 2; |
851 | 0 | num_rows = (num_ctb_rows << 3) + 2; |
852 | 0 | } |
853 | 0 | num_blks = num_cols * num_rows; |
854 | 0 | num_mvs_per_blk = num_ref * max_num_results; |
855 | 0 | num_mvs_per_row = num_mvs_per_blk * num_cols; |
856 | |
|
857 | 0 | size = num_blks * num_mvs_per_blk * sizeof(hme_mv_t); |
858 | 0 | if(mem_avail) |
859 | 0 | { |
860 | | /* store this for run time verifications */ |
861 | 0 | *pi4_num_mvs_per_row = num_mvs_per_row; |
862 | 0 | ASSERT(ps_memtab[count].size == size); |
863 | 0 | *pps_mv_base = (hme_mv_t *)ps_memtab[count].pu1_mem; |
864 | 0 | } |
865 | 0 | else |
866 | 0 | { |
867 | 0 | ps_memtab[count].size = size; |
868 | 0 | ps_memtab[count].align = 4; |
869 | 0 | ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM; |
870 | 0 | } |
871 | | |
872 | 0 | count++; |
873 | | /* Ref idx takes the same route as mvbase */ |
874 | |
|
875 | 0 | size = num_blks * num_mvs_per_blk * sizeof(S08); |
876 | 0 | if(mem_avail) |
877 | 0 | { |
878 | 0 | ASSERT(ps_memtab[count].size == size); |
879 | 0 | *pi1_ref_idx_base = (S08 *)ps_memtab[count].pu1_mem; |
880 | 0 | } |
881 | 0 | else |
882 | 0 | { |
883 | 0 | ps_memtab[count].size = size; |
884 | 0 | ps_memtab[count].align = 4; |
885 | 0 | ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM; |
886 | 0 | } |
887 | 0 | count++; |
888 | |
|
889 | 0 | return (count); |
890 | 0 | } |
891 | | /** |
892 | | ******************************************************************************** |
893 | | * @fn hme_alloc_init_layer() |
894 | | * |
895 | | * @brief memory alloc and init function |
896 | | * |
897 | | * @return Number of memtabs required |
898 | | ******************************************************************************** |
899 | | */ |
900 | | S32 hme_alloc_init_layer( |
901 | | hme_memtab_t *ps_memtab, |
902 | | S32 max_num_results, |
903 | | S32 max_num_ref, |
904 | | S32 use_4x4, |
905 | | S32 mem_avail, |
906 | | S32 u1_enc, |
907 | | S32 wd, |
908 | | S32 ht, |
909 | | S32 disp_wd, |
910 | | S32 disp_ht, |
911 | | S32 segment_layer, |
912 | | S32 is_explicit_store, |
913 | | layer_ctxt_t **pps_layer) |
914 | 0 | { |
915 | 0 | S32 count = 0; |
916 | 0 | layer_ctxt_t *ps_layer = NULL; |
917 | 0 | S32 size; |
918 | 0 | S32 num_ref; |
919 | |
|
920 | 0 | ARG_NOT_USED(segment_layer); |
921 | |
|
922 | 0 | if(is_explicit_store) |
923 | 0 | num_ref = max_num_ref; |
924 | 0 | else |
925 | 0 | num_ref = 2; |
926 | | |
927 | | /* We do not store 4x4 results for encoding layers */ |
928 | 0 | if(u1_enc) |
929 | 0 | use_4x4 = 0; |
930 | |
|
931 | 0 | size = sizeof(layer_ctxt_t); |
932 | 0 | if(mem_avail) |
933 | 0 | { |
934 | 0 | ASSERT(ps_memtab[count].size == size); |
935 | 0 | ps_layer = (layer_ctxt_t *)ps_memtab[count].pu1_mem; |
936 | 0 | *pps_layer = ps_layer; |
937 | 0 | } |
938 | 0 | else |
939 | 0 | { |
940 | 0 | ps_memtab[count].size = size; |
941 | 0 | ps_memtab[count].align = 8; |
942 | 0 | ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM; |
943 | 0 | } |
944 | | |
945 | 0 | count++; |
946 | | |
947 | | /* Input luma buffer allocated only for non encode case */ |
948 | 0 | if(0 == u1_enc) |
949 | 0 | { |
950 | | /* Allocate input with padding of 16 pixels */ |
951 | 0 | size = (wd + 32 + 4) * (ht + 32 + 4); |
952 | 0 | if(mem_avail) |
953 | 0 | { |
954 | 0 | ASSERT(ps_memtab[count].size == size); |
955 | 0 | ps_layer->pu1_inp_base = ps_memtab[count].pu1_mem; |
956 | 0 | } |
957 | 0 | else |
958 | 0 | { |
959 | 0 | ps_memtab[count].size = size; |
960 | 0 | ps_memtab[count].align = 16; |
961 | 0 | ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM; |
962 | 0 | } |
963 | 0 | count++; |
964 | 0 | } |
965 | | |
966 | | /* Allocate memory or just the layer mvbank strcture. */ |
967 | | /* TODO : see if this can be removed by moving it to layer_ctxt */ |
968 | 0 | size = sizeof(layer_mv_t); |
969 | |
|
970 | 0 | if(mem_avail) |
971 | 0 | { |
972 | 0 | ASSERT(ps_memtab[count].size == size); |
973 | 0 | ps_layer->ps_layer_mvbank = (layer_mv_t *)ps_memtab[count].pu1_mem; |
974 | 0 | } |
975 | 0 | else |
976 | 0 | { |
977 | 0 | ps_memtab[count].size = size; |
978 | 0 | ps_memtab[count].align = 8; |
979 | 0 | ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM; |
980 | 0 | } |
981 | | |
982 | 0 | count++; |
983 | |
|
984 | 0 | if(mem_avail) |
985 | 0 | { |
986 | 0 | hme_set_layer_res_attrs(ps_layer, wd, ht, disp_wd, disp_ht, u1_enc); |
987 | 0 | } |
988 | |
|
989 | 0 | return (count); |
990 | 0 | } |
991 | | |
992 | | S32 hme_alloc_init_search_nodes( |
993 | | search_results_t *ps_search_results, |
994 | | hme_memtab_t *ps_memtabs, |
995 | | S32 mem_avail, |
996 | | S32 max_num_ref, |
997 | | S32 max_num_results) |
998 | 0 | { |
999 | 0 | S32 size = max_num_results * sizeof(search_node_t) * max_num_ref * TOT_NUM_PARTS; |
1000 | 0 | S32 j, k; |
1001 | 0 | search_node_t *ps_search_node; |
1002 | |
|
1003 | 0 | if(mem_avail == 0) |
1004 | 0 | { |
1005 | 0 | ps_memtabs->size = size; |
1006 | 0 | ps_memtabs->align = 4; |
1007 | 0 | ps_memtabs->e_mem_attr = HME_SCRATCH_OVLY_MEM; |
1008 | 0 | return (1); |
1009 | 0 | } |
1010 | | |
1011 | 0 | ps_search_node = (search_node_t *)ps_memtabs->pu1_mem; |
1012 | 0 | ASSERT(ps_memtabs->size == size); |
1013 | | /****************************************************************************/ |
1014 | | /* For each CU, we search and store N best results, per partition, per ref */ |
1015 | | /* So, number of memtabs is num_refs * num_parts */ |
1016 | | /****************************************************************************/ |
1017 | 0 | for(j = 0; j < max_num_ref; j++) |
1018 | 0 | { |
1019 | 0 | for(k = 0; k < TOT_NUM_PARTS; k++) |
1020 | 0 | { |
1021 | 0 | ps_search_results->aps_part_results[j][k] = ps_search_node; |
1022 | 0 | ps_search_node += max_num_results; |
1023 | 0 | } |
1024 | 0 | } |
1025 | 0 | return (1); |
1026 | 0 | } |
1027 | | |
1028 | | S32 hme_derive_num_layers(S32 n_enc_layers, S32 *p_wd, S32 *p_ht, S32 *p_disp_wd, S32 *p_disp_ht) |
1029 | 0 | { |
1030 | 0 | S32 i; |
1031 | | /* We keep downscaling by 2 till we hit one of the conditions: */ |
1032 | | /* 1. MAX_NUM_LAYERS reached. */ |
1033 | | /* 2. Width or ht goes below min width and ht allowed at coarsest layer */ |
1034 | 0 | ASSERT(n_enc_layers < MAX_NUM_LAYERS); |
1035 | 0 | ASSERT(n_enc_layers > 0); |
1036 | 0 | ASSERT(p_wd[0] <= HME_MAX_WIDTH); |
1037 | 0 | ASSERT(p_ht[0] <= HME_MAX_HEIGHT); |
1038 | | |
1039 | 0 | p_disp_wd[0] = p_wd[0]; |
1040 | 0 | p_disp_ht[0] = p_ht[0]; |
1041 | | /*************************************************************************/ |
1042 | | /* Verify that for simulcast, lower layer to higher layer ratio is bet */ |
1043 | | /* 2 (dyadic) and 1.33. Typically it should be 1.5. */ |
1044 | | /* TODO : for interlace, we may choose to have additional downscaling for*/ |
1045 | | /* width alone in coarsest layer to next layer. */ |
1046 | | /*************************************************************************/ |
1047 | 0 | for(i = 1; i < n_enc_layers; i++) |
1048 | 0 | { |
1049 | 0 | S32 wd1, wd2, ht1, ht2; |
1050 | 0 | wd1 = FLOOR16(p_wd[i - 1] >> 1); |
1051 | 0 | wd2 = CEIL16((p_wd[i - 1] * 3) >> 2); |
1052 | 0 | ASSERT(p_wd[i] >= wd1); |
1053 | 0 | ASSERT(p_wd[i] <= wd2); |
1054 | 0 | ht1 = FLOOR16(p_ht[i - 1] >> 1); |
1055 | 0 | ht2 = CEIL16((p_ht[i - 1] * 3) >> 2); |
1056 | 0 | ASSERT(p_ht[i] >= ht1); |
1057 | 0 | ASSERT(p_ht[i] <= ht2); |
1058 | 0 | } |
1059 | 0 | ASSERT(p_wd[n_enc_layers - 1] >= 2 * MIN_WD_COARSE); |
1060 | 0 | ASSERT(p_ht[n_enc_layers - 1] >= 2 * MIN_HT_COARSE); |
1061 | | |
1062 | 0 | for(i = n_enc_layers; i < MAX_NUM_LAYERS; i++) |
1063 | 0 | { |
1064 | 0 | if((p_wd[i - 1] < 2 * MIN_WD_COARSE) || (p_ht[i - 1] < 2 * MIN_HT_COARSE)) |
1065 | 0 | { |
1066 | 0 | return (i); |
1067 | 0 | } |
1068 | | /* Use CEIL16 to facilitate 16x16 searches in future, or to do */ |
1069 | | /* segmentation study in future */ |
1070 | 0 | p_wd[i] = CEIL16(p_wd[i - 1] >> 1); |
1071 | 0 | p_ht[i] = CEIL16(p_ht[i - 1] >> 1); |
1072 | |
|
1073 | 0 | p_disp_wd[i] = p_disp_wd[i - 1] >> 1; |
1074 | 0 | p_disp_ht[i] = p_disp_ht[i - 1] >> 1; |
1075 | 0 | } |
1076 | 0 | return (i); |
1077 | 0 | } |
1078 | | |
1079 | | /** |
1080 | | ******************************************************************************** |
1081 | | * @fn hme_get_mv_blk_size() |
1082 | | * |
1083 | | * @brief returns whether blk uses 4x4 size or something else. |
1084 | | * |
1085 | | * @param[in] enable_4x4 : input param from application to enable 4x4 |
1086 | | * |
1087 | | * @param[in] layer_id : id of current layer (0 finest) |
1088 | | * |
1089 | | * @param[in] num_layeers : total num layers |
1090 | | * |
1091 | | * @param[in] is_enc : Whether encoding enabled for layer |
1092 | | * |
1093 | | * @return 1 for 4x4 blks, 0 for 8x8 |
1094 | | ******************************************************************************** |
1095 | | */ |
1096 | | S32 hme_get_mv_blk_size(S32 enable_4x4, S32 layer_id, S32 num_layers, S32 is_enc) |
1097 | 0 | { |
1098 | 0 | S32 use_4x4 = enable_4x4; |
1099 | |
|
1100 | 0 | if((layer_id <= 1) && (num_layers >= 4)) |
1101 | 0 | use_4x4 = USE_4x4_IN_L1; |
1102 | 0 | if(layer_id == num_layers - 1) |
1103 | 0 | use_4x4 = 1; |
1104 | 0 | if(is_enc) |
1105 | 0 | use_4x4 = 0; |
1106 | |
|
1107 | 0 | return (use_4x4); |
1108 | 0 | } |
1109 | | |
1110 | | /** |
1111 | | ******************************************************************************** |
1112 | | * @fn hme_enc_alloc_init_mem() |
1113 | | * |
1114 | | * @brief Requests/ assign memory based on mem avail |
1115 | | * |
1116 | | * @param[in] ps_memtabs : memtab array |
1117 | | * |
1118 | | * @param[in] ps_prms : init prms |
1119 | | * |
1120 | | * @param[in] pv_ctxt : ME ctxt |
1121 | | * |
1122 | | * @param[in] mem_avail : request/assign flag |
1123 | | * |
1124 | | * @return 1 for 4x4 blks, 0 for 8x8 |
1125 | | ******************************************************************************** |
1126 | | */ |
1127 | | S32 hme_enc_alloc_init_mem( |
1128 | | hme_memtab_t *ps_memtabs, |
1129 | | hme_init_prms_t *ps_prms, |
1130 | | void *pv_ctxt, |
1131 | | S32 mem_avail, |
1132 | | S32 i4_num_me_frm_pllel) |
1133 | 0 | { |
1134 | 0 | me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_ctxt; |
1135 | 0 | me_ctxt_t *ps_ctxt; |
1136 | 0 | S32 count = 0, size, i, j, use_4x4; |
1137 | 0 | S32 n_tot_layers, n_enc_layers; |
1138 | 0 | S32 num_layers_explicit_search; |
1139 | 0 | S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS]; |
1140 | 0 | S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS]; |
1141 | 0 | S32 num_results; |
1142 | 0 | S32 num_thrds; |
1143 | 0 | S32 ctb_wd = 1 << ps_prms->log_ctb_size; |
1144 | | |
1145 | | /* MV bank changes */ |
1146 | 0 | hme_mv_t *aps_mv_bank[((DEFAULT_MAX_REFERENCE_PICS << 1) * MAX_NUM_ME_PARALLEL) + 1] = { NULL }; |
1147 | 0 | S32 i4_num_mvs_per_row = 0; |
1148 | 0 | S08 *api1_ref_idx[((DEFAULT_MAX_REFERENCE_PICS << 1) * MAX_NUM_ME_PARALLEL) + 1] = { NULL }; |
1149 | |
|
1150 | 0 | n_enc_layers = ps_prms->num_simulcast_layers; |
1151 | | |
1152 | | /* Memtab 0: handle */ |
1153 | 0 | size = sizeof(me_master_ctxt_t); |
1154 | 0 | if(mem_avail) |
1155 | 0 | { |
1156 | | /* store the number of processing threads */ |
1157 | 0 | ps_master_ctxt->i4_num_proc_thrds = ps_prms->i4_num_proc_thrds; |
1158 | 0 | } |
1159 | 0 | else |
1160 | 0 | { |
1161 | 0 | ps_memtabs[count].size = size; |
1162 | 0 | ps_memtabs[count].align = 8; |
1163 | 0 | ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM; |
1164 | 0 | } |
1165 | |
|
1166 | 0 | count++; |
1167 | | |
1168 | | /* Memtab 1: ME threads ctxt */ |
1169 | 0 | size = ps_prms->i4_num_proc_thrds * sizeof(me_ctxt_t); |
1170 | 0 | if(mem_avail) |
1171 | 0 | { |
1172 | 0 | me_ctxt_t *ps_me_tmp_ctxt = (me_ctxt_t *)ps_memtabs[count].pu1_mem; |
1173 | | |
1174 | | /* store the indivisual thread ctxt pointers */ |
1175 | 0 | for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) |
1176 | 0 | { |
1177 | 0 | ps_master_ctxt->aps_me_ctxt[num_thrds] = ps_me_tmp_ctxt++; |
1178 | 0 | } |
1179 | 0 | } |
1180 | 0 | else |
1181 | 0 | { |
1182 | 0 | ps_memtabs[count].size = size; |
1183 | 0 | ps_memtabs[count].align = 8; |
1184 | 0 | ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM; |
1185 | 0 | } |
1186 | |
|
1187 | 0 | count++; |
1188 | | |
1189 | | /* Memtab 2: ME frame ctxts */ |
1190 | 0 | size = sizeof(me_frm_ctxt_t) * MAX_NUM_ME_PARALLEL * ps_prms->i4_num_proc_thrds; |
1191 | 0 | if(mem_avail) |
1192 | 0 | { |
1193 | 0 | me_frm_ctxt_t *ps_me_frm_tmp_ctxt = (me_frm_ctxt_t *)ps_memtabs[count].pu1_mem; |
1194 | |
|
1195 | 0 | for(i = 0; i < MAX_NUM_ME_PARALLEL; i++) |
1196 | 0 | { |
1197 | | /* store the indivisual thread ctxt pointers */ |
1198 | 0 | for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) |
1199 | 0 | { |
1200 | 0 | ps_master_ctxt->aps_me_ctxt[num_thrds]->aps_me_frm_prms[i] = ps_me_frm_tmp_ctxt; |
1201 | |
|
1202 | 0 | ps_me_frm_tmp_ctxt++; |
1203 | 0 | } |
1204 | 0 | } |
1205 | 0 | } |
1206 | 0 | else |
1207 | 0 | { |
1208 | 0 | ps_memtabs[count].size = size; |
1209 | 0 | ps_memtabs[count].align = 8; |
1210 | 0 | ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM; |
1211 | 0 | } |
1212 | |
|
1213 | 0 | count++; |
1214 | |
|
1215 | 0 | memcpy(a_wd, ps_prms->a_wd, sizeof(S32) * ps_prms->num_simulcast_layers); |
1216 | 0 | memcpy(a_ht, ps_prms->a_ht, sizeof(S32) * ps_prms->num_simulcast_layers); |
1217 | | /*************************************************************************/ |
1218 | | /* Derive the number of HME layers, including both encoded and non encode*/ |
1219 | | /* This function also derives the width and ht of each layer. */ |
1220 | | /*************************************************************************/ |
1221 | 0 | n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht); |
1222 | 0 | num_layers_explicit_search = ps_prms->num_layers_explicit_search; |
1223 | 0 | if(num_layers_explicit_search <= 0) |
1224 | 0 | num_layers_explicit_search = n_tot_layers - 1; |
1225 | |
|
1226 | 0 | num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1); |
1227 | |
|
1228 | 0 | if(mem_avail) |
1229 | 0 | { |
1230 | 0 | for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) |
1231 | 0 | { |
1232 | 0 | me_frm_ctxt_t *ps_frm_ctxt; |
1233 | 0 | ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; |
1234 | |
|
1235 | 0 | for(i = 0; i < MAX_NUM_ME_PARALLEL; i++) |
1236 | 0 | { |
1237 | 0 | ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i]; |
1238 | |
|
1239 | 0 | memset(ps_frm_ctxt->u1_encode, 0, n_tot_layers); |
1240 | 0 | memset(ps_frm_ctxt->u1_encode, 1, n_enc_layers); |
1241 | | |
1242 | | /* only one enocde layer is used */ |
1243 | 0 | ps_frm_ctxt->num_layers = 1; |
1244 | |
|
1245 | 0 | ps_frm_ctxt->i4_wd = a_wd[0]; |
1246 | 0 | ps_frm_ctxt->i4_ht = a_ht[0]; |
1247 | | /* |
1248 | | memcpy(ps_ctxt->a_wd, a_wd, sizeof(S32)*n_tot_layers); |
1249 | | memcpy(ps_ctxt->a_ht, a_ht, sizeof(S32)*n_tot_layers); |
1250 | | */ |
1251 | 0 | ps_frm_ctxt->num_layers_explicit_search = num_layers_explicit_search; |
1252 | 0 | ps_frm_ctxt->max_num_results = ps_prms->max_num_results; |
1253 | 0 | ps_frm_ctxt->max_num_results_coarse = ps_prms->max_num_results_coarse; |
1254 | 0 | ps_frm_ctxt->max_num_ref = ps_prms->max_num_ref; |
1255 | 0 | } |
1256 | 0 | } |
1257 | 0 | } |
1258 | | |
1259 | | /* Memtabs : Layers MV bank for encode layer */ |
1260 | | /* Each ref_desr in master ctxt will have seperate layer ctxt */ |
1261 | |
|
1262 | 0 | for(i = 0; i < (ps_prms->max_num_ref * i4_num_me_frm_pllel) + 1; i++) |
1263 | 0 | { |
1264 | 0 | for(j = 0; j < 1; j++) |
1265 | 0 | { |
1266 | 0 | S32 is_explicit_store = 1; |
1267 | 0 | S32 wd, ht; |
1268 | 0 | U08 u1_enc = 1; |
1269 | 0 | wd = a_wd[j]; |
1270 | 0 | ht = a_ht[j]; |
1271 | | |
1272 | | /* Possibly implicit search for lower (finer) layers */ |
1273 | 0 | if(n_tot_layers - j > num_layers_explicit_search) |
1274 | 0 | is_explicit_store = 0; |
1275 | | |
1276 | | /* Even if explicit search, we store only 2 results (L0 and L1) */ |
1277 | | /* in finest layer */ |
1278 | 0 | if(j == 0) |
1279 | 0 | { |
1280 | 0 | is_explicit_store = 0; |
1281 | 0 | } |
1282 | | |
1283 | | /* coarsest layer alwasy uses 4x4 blks to store results */ |
1284 | 0 | if(j == n_tot_layers - 1) |
1285 | 0 | { |
1286 | 0 | num_results = ps_prms->max_num_results_coarse; |
1287 | 0 | } |
1288 | 0 | else |
1289 | 0 | { |
1290 | 0 | num_results = ps_prms->max_num_results; |
1291 | 0 | if(j == 0) |
1292 | 0 | num_results = 1; |
1293 | 0 | } |
1294 | 0 | use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc); |
1295 | |
|
1296 | 0 | count += hme_alloc_init_layer_mv_bank( |
1297 | 0 | &ps_memtabs[count], |
1298 | 0 | num_results, |
1299 | 0 | ps_prms->max_num_ref, |
1300 | 0 | use_4x4, |
1301 | 0 | mem_avail, |
1302 | 0 | u1_enc, |
1303 | 0 | wd, |
1304 | 0 | ht, |
1305 | 0 | is_explicit_store, |
1306 | 0 | &aps_mv_bank[i], |
1307 | 0 | &api1_ref_idx[i], |
1308 | 0 | &i4_num_mvs_per_row); |
1309 | 0 | } |
1310 | 0 | } |
1311 | | |
1312 | | /* Memtabs : Layers * num-ref + 1 */ |
1313 | 0 | for(i = 0; i < (ps_prms->max_num_ref * i4_num_me_frm_pllel) + 1; i++) |
1314 | 0 | { |
1315 | | /* layer memory allocated only for enocde layer */ |
1316 | 0 | for(j = 0; j < 1; j++) |
1317 | 0 | { |
1318 | 0 | layer_ctxt_t *ps_layer; |
1319 | 0 | S32 is_explicit_store = 1; |
1320 | 0 | S32 segment_this_layer = (j == 0) ? 1 : ps_prms->segment_higher_layers; |
1321 | 0 | S32 wd, ht; |
1322 | 0 | U08 u1_enc = 1; |
1323 | 0 | wd = a_wd[j]; |
1324 | 0 | ht = a_ht[j]; |
1325 | | |
1326 | | /* Possibly implicit search for lower (finer) layers */ |
1327 | 0 | if(n_tot_layers - j > num_layers_explicit_search) |
1328 | 0 | is_explicit_store = 0; |
1329 | | |
1330 | | /* Even if explicit search, we store only 2 results (L0 and L1) */ |
1331 | | /* in finest layer */ |
1332 | 0 | if(j == 0) |
1333 | 0 | { |
1334 | 0 | is_explicit_store = 0; |
1335 | 0 | } |
1336 | | |
1337 | | /* coarsest layer alwasy uses 4x4 blks to store results */ |
1338 | 0 | if(j == n_tot_layers - 1) |
1339 | 0 | { |
1340 | 0 | num_results = ps_prms->max_num_results_coarse; |
1341 | 0 | } |
1342 | 0 | else |
1343 | 0 | { |
1344 | 0 | num_results = ps_prms->max_num_results; |
1345 | 0 | if(j == 0) |
1346 | 0 | num_results = 1; |
1347 | 0 | } |
1348 | 0 | use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc); |
1349 | |
|
1350 | 0 | count += hme_alloc_init_layer( |
1351 | 0 | &ps_memtabs[count], |
1352 | 0 | num_results, |
1353 | 0 | ps_prms->max_num_ref, |
1354 | 0 | use_4x4, |
1355 | 0 | mem_avail, |
1356 | 0 | u1_enc, |
1357 | 0 | wd, |
1358 | 0 | ht, |
1359 | 0 | a_disp_wd[j], |
1360 | 0 | a_disp_ht[j], |
1361 | 0 | segment_this_layer, |
1362 | 0 | is_explicit_store, |
1363 | 0 | &ps_layer); |
1364 | 0 | if(mem_avail) |
1365 | 0 | { |
1366 | | /* same ps_layer memory pointer is stored in all the threads */ |
1367 | 0 | for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) |
1368 | 0 | { |
1369 | 0 | ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; |
1370 | 0 | ps_ctxt->as_ref_descr[i].aps_layers[j] = ps_layer; |
1371 | 0 | } |
1372 | | |
1373 | | /* store the MV bank pointers */ |
1374 | 0 | ps_layer->ps_layer_mvbank->max_num_mvs_per_row = i4_num_mvs_per_row; |
1375 | 0 | ps_layer->ps_layer_mvbank->ps_mv_base = aps_mv_bank[i]; |
1376 | 0 | ps_layer->ps_layer_mvbank->pi1_ref_idx_base = api1_ref_idx[i]; |
1377 | 0 | } |
1378 | 0 | } |
1379 | 0 | } |
1380 | | |
1381 | | /* Memtabs : Buf Mgr for predictor bufs and working mem */ |
1382 | | /* TODO : Parameterise this appropriately */ |
1383 | 0 | size = MAX_WKG_MEM_SIZE_PER_THREAD * ps_prms->i4_num_proc_thrds * i4_num_me_frm_pllel; |
1384 | |
|
1385 | 0 | if(mem_avail) |
1386 | 0 | { |
1387 | 0 | U08 *pu1_mem = ps_memtabs[count].pu1_mem; |
1388 | |
|
1389 | 0 | ASSERT(ps_memtabs[count].size == size); |
1390 | | |
1391 | 0 | for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) |
1392 | 0 | { |
1393 | 0 | me_frm_ctxt_t *ps_frm_ctxt; |
1394 | 0 | ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; |
1395 | |
|
1396 | 0 | for(i = 0; i < MAX_NUM_ME_PARALLEL; i++) |
1397 | 0 | { |
1398 | 0 | ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i]; |
1399 | |
|
1400 | 0 | hme_init_wkg_mem(&ps_frm_ctxt->s_buf_mgr, pu1_mem, MAX_WKG_MEM_SIZE_PER_THREAD); |
1401 | |
|
1402 | 0 | if(i4_num_me_frm_pllel != 1) |
1403 | 0 | { |
1404 | | /* update the memory buffer pointer */ |
1405 | 0 | pu1_mem += MAX_WKG_MEM_SIZE_PER_THREAD; |
1406 | 0 | } |
1407 | 0 | } |
1408 | 0 | if(i4_num_me_frm_pllel == 1) |
1409 | 0 | { |
1410 | 0 | pu1_mem += MAX_WKG_MEM_SIZE_PER_THREAD; |
1411 | 0 | } |
1412 | 0 | } |
1413 | 0 | } |
1414 | 0 | else |
1415 | 0 | { |
1416 | 0 | ps_memtabs[count].size = size; |
1417 | 0 | ps_memtabs[count].align = 4; |
1418 | 0 | ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM; |
1419 | 0 | } |
1420 | 0 | count++; |
1421 | | |
1422 | | /*************************************************************************/ |
1423 | | /* Memtab : We need 64x64 buffer to store the entire CTB input for bidir */ |
1424 | | /* refinement. This memtab stores 2I - P0, I is input and P0 is L0 pred */ |
1425 | | /*************************************************************************/ |
1426 | 0 | size = sizeof(S16) * CTB_BLK_SIZE * CTB_BLK_SIZE * ps_prms->i4_num_proc_thrds * |
1427 | 0 | i4_num_me_frm_pllel; |
1428 | |
|
1429 | 0 | if(mem_avail) |
1430 | 0 | { |
1431 | 0 | S16 *pi2_mem = (S16 *)ps_memtabs[count].pu1_mem; |
1432 | |
|
1433 | 0 | ASSERT(ps_memtabs[count].size == size); |
1434 | | |
1435 | 0 | for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) |
1436 | 0 | { |
1437 | 0 | me_frm_ctxt_t *ps_frm_ctxt; |
1438 | 0 | ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; |
1439 | |
|
1440 | 0 | for(i = 0; i < MAX_NUM_ME_PARALLEL; i++) |
1441 | 0 | { |
1442 | 0 | ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i]; |
1443 | |
|
1444 | 0 | ps_frm_ctxt->pi2_inp_bck = pi2_mem; |
1445 | | /** If no me frames running in parallel update the other aps_me_frm_prms indices with same memory **/ |
1446 | 0 | if(i4_num_me_frm_pllel != 1) |
1447 | 0 | { |
1448 | 0 | pi2_mem += (CTB_BLK_SIZE * CTB_BLK_SIZE); |
1449 | 0 | } |
1450 | 0 | } |
1451 | 0 | if(i4_num_me_frm_pllel == 1) |
1452 | 0 | { |
1453 | 0 | pi2_mem += (CTB_BLK_SIZE * CTB_BLK_SIZE); |
1454 | 0 | } |
1455 | 0 | } |
1456 | 0 | } |
1457 | 0 | else |
1458 | 0 | { |
1459 | 0 | ps_memtabs[count].size = size; |
1460 | 0 | ps_memtabs[count].align = 16; |
1461 | 0 | ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM; |
1462 | 0 | } |
1463 | | |
1464 | 0 | count++; |
1465 | | |
1466 | | /* Allocate a memtab for each histogram. As many as num ref and number of threads */ |
1467 | | /* Loop across for each ME_FRM in PARALLEL */ |
1468 | 0 | for(j = 0; j < MAX_NUM_ME_PARALLEL; j++) |
1469 | 0 | { |
1470 | 0 | for(i = 0; i < ps_prms->max_num_ref; i++) |
1471 | 0 | { |
1472 | 0 | size = ps_prms->i4_num_proc_thrds * sizeof(mv_hist_t); |
1473 | 0 | if(mem_avail) |
1474 | 0 | { |
1475 | 0 | mv_hist_t *ps_mv_hist = (mv_hist_t *)ps_memtabs[count].pu1_mem; |
1476 | |
|
1477 | 0 | ASSERT(size == ps_memtabs[count].size); |
1478 | | |
1479 | | /* divide the memory accross the threads */ |
1480 | 0 | for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) |
1481 | 0 | { |
1482 | 0 | ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; |
1483 | |
|
1484 | 0 | ps_ctxt->aps_me_frm_prms[j]->aps_mv_hist[i] = ps_mv_hist; |
1485 | 0 | ps_mv_hist++; |
1486 | 0 | } |
1487 | 0 | } |
1488 | 0 | else |
1489 | 0 | { |
1490 | 0 | ps_memtabs[count].size = size; |
1491 | 0 | ps_memtabs[count].align = 8; |
1492 | 0 | ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM; |
1493 | 0 | } |
1494 | 0 | count++; |
1495 | 0 | } |
1496 | 0 | if((i4_num_me_frm_pllel == 1) && (j != (MAX_NUM_ME_PARALLEL - 1))) |
1497 | 0 | { |
1498 | | /** If no me frames running in parallel update the other aps_me_frm_prms indices with same memory **/ |
1499 | | /** bring the count back to earlier value if there are no me frames in parallel. don't decrement for last loop **/ |
1500 | 0 | count -= ps_prms->max_num_ref; |
1501 | 0 | } |
1502 | 0 | } |
1503 | | |
1504 | | /* Memtabs : Search nodes for 16x16 CUs, 32x32 and 64x64 CUs */ |
1505 | 0 | for(j = 0; j < MAX_NUM_ME_PARALLEL; j++) |
1506 | 0 | { |
1507 | 0 | S32 count_cpy = count; |
1508 | 0 | for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) |
1509 | 0 | { |
1510 | 0 | if(mem_avail) |
1511 | 0 | { |
1512 | 0 | ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; |
1513 | 0 | } |
1514 | |
|
1515 | 0 | for(i = 0; i < 21; i++) |
1516 | 0 | { |
1517 | 0 | search_results_t *ps_search_results = NULL; |
1518 | 0 | if(mem_avail) |
1519 | 0 | { |
1520 | 0 | if(i < 16) |
1521 | 0 | { |
1522 | 0 | ps_search_results = |
1523 | 0 | &ps_ctxt->aps_me_frm_prms[j]->as_search_results_16x16[i]; |
1524 | 0 | } |
1525 | 0 | else if(i < 20) |
1526 | 0 | { |
1527 | 0 | ps_search_results = |
1528 | 0 | &ps_ctxt->aps_me_frm_prms[j]->as_search_results_32x32[i - 16]; |
1529 | 0 | ps_search_results->ps_cu_results = |
1530 | 0 | &ps_ctxt->aps_me_frm_prms[j]->as_cu32x32_results[i - 16]; |
1531 | 0 | } |
1532 | 0 | else if(i == 20) |
1533 | 0 | { |
1534 | 0 | ps_search_results = &ps_ctxt->aps_me_frm_prms[j]->s_search_results_64x64; |
1535 | 0 | ps_search_results->ps_cu_results = |
1536 | 0 | &ps_ctxt->aps_me_frm_prms[j]->s_cu64x64_results; |
1537 | 0 | } |
1538 | 0 | else |
1539 | 0 | { |
1540 | | /* 8x8 search results are not required in LO ME */ |
1541 | 0 | ASSERT(0); |
1542 | 0 | } |
1543 | 0 | } |
1544 | 0 | count += hme_alloc_init_search_nodes( |
1545 | 0 | ps_search_results, &ps_memtabs[count], mem_avail, 2, ps_prms->max_num_results); |
1546 | 0 | } |
1547 | 0 | } |
1548 | | |
1549 | 0 | if((i4_num_me_frm_pllel == 1) && (j != (MAX_NUM_ME_PARALLEL - 1))) |
1550 | 0 | { |
1551 | 0 | count = count_cpy; |
1552 | 0 | } |
1553 | 0 | } |
1554 | | |
1555 | | /* Weighted inputs, one for each ref + one non weighted */ |
1556 | 0 | for(j = 0; j < MAX_NUM_ME_PARALLEL; j++) |
1557 | 0 | { |
1558 | 0 | size = (ps_prms->max_num_ref + 1) * ctb_wd * ctb_wd * ps_prms->i4_num_proc_thrds; |
1559 | 0 | if(mem_avail) |
1560 | 0 | { |
1561 | 0 | U08 *pu1_mem; |
1562 | 0 | ASSERT(ps_memtabs[count].size == size); |
1563 | 0 | pu1_mem = ps_memtabs[count].pu1_mem; |
1564 | |
|
1565 | 0 | for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) |
1566 | 0 | { |
1567 | 0 | ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; |
1568 | |
|
1569 | 0 | for(i = 0; i < ps_prms->max_num_ref + 1; i++) |
1570 | 0 | { |
1571 | 0 | ps_ctxt->aps_me_frm_prms[j]->s_wt_pred.apu1_wt_inp_buf_array[i] = pu1_mem; |
1572 | 0 | pu1_mem += (ctb_wd * ctb_wd); |
1573 | 0 | } |
1574 | 0 | } |
1575 | 0 | } |
1576 | 0 | else |
1577 | 0 | { |
1578 | 0 | ps_memtabs[count].size = size; |
1579 | 0 | ps_memtabs[count].align = 16; |
1580 | 0 | ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM; |
1581 | 0 | } |
1582 | 0 | if((i4_num_me_frm_pllel != 1) || (j == (MAX_NUM_ME_PARALLEL - 1))) |
1583 | 0 | { |
1584 | 0 | count++; |
1585 | 0 | } |
1586 | 0 | } |
1587 | | |
1588 | | /* if memory is allocated the intislaise the frm prms ptr to each thrd */ |
1589 | 0 | if(mem_avail) |
1590 | 0 | { |
1591 | 0 | for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) |
1592 | 0 | { |
1593 | 0 | me_frm_ctxt_t *ps_frm_ctxt; |
1594 | 0 | ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; |
1595 | |
|
1596 | 0 | for(i = 0; i < MAX_NUM_ME_PARALLEL; i++) |
1597 | 0 | { |
1598 | 0 | ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i]; |
1599 | |
|
1600 | 0 | ps_frm_ctxt->ps_hme_frm_prms = &ps_master_ctxt->as_frm_prms[i]; |
1601 | 0 | ps_frm_ctxt->ps_hme_ref_map = &ps_master_ctxt->as_ref_map[i]; |
1602 | 0 | } |
1603 | 0 | } |
1604 | 0 | } |
1605 | | |
1606 | | /* Memory allocation for use in Clustering */ |
1607 | 0 | if(ps_prms->s_me_coding_tools.e_me_quality_presets == ME_PRISTINE_QUALITY) |
1608 | 0 | { |
1609 | 0 | for(i = 0; i < MAX_NUM_ME_PARALLEL; i++) |
1610 | 0 | { |
1611 | 0 | size = 16 * sizeof(cluster_16x16_blk_t) + 4 * sizeof(cluster_32x32_blk_t) + |
1612 | 0 | sizeof(cluster_64x64_blk_t) + sizeof(ctb_cluster_info_t); |
1613 | 0 | size *= ps_prms->i4_num_proc_thrds; |
1614 | |
|
1615 | 0 | if(mem_avail) |
1616 | 0 | { |
1617 | 0 | U08 *pu1_mem; |
1618 | |
|
1619 | 0 | ASSERT(ps_memtabs[count].size == size); |
1620 | 0 | pu1_mem = ps_memtabs[count].pu1_mem; |
1621 | |
|
1622 | 0 | for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) |
1623 | 0 | { |
1624 | 0 | ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; |
1625 | |
|
1626 | 0 | ps_ctxt->aps_me_frm_prms[i]->ps_blk_16x16 = (cluster_16x16_blk_t *)pu1_mem; |
1627 | 0 | pu1_mem += (16 * sizeof(cluster_16x16_blk_t)); |
1628 | |
|
1629 | 0 | ps_ctxt->aps_me_frm_prms[i]->ps_blk_32x32 = (cluster_32x32_blk_t *)pu1_mem; |
1630 | 0 | pu1_mem += (4 * sizeof(cluster_32x32_blk_t)); |
1631 | |
|
1632 | 0 | ps_ctxt->aps_me_frm_prms[i]->ps_blk_64x64 = (cluster_64x64_blk_t *)pu1_mem; |
1633 | 0 | pu1_mem += (sizeof(cluster_64x64_blk_t)); |
1634 | |
|
1635 | 0 | ps_ctxt->aps_me_frm_prms[i]->ps_ctb_cluster_info = |
1636 | 0 | (ctb_cluster_info_t *)pu1_mem; |
1637 | 0 | pu1_mem += (sizeof(ctb_cluster_info_t)); |
1638 | 0 | } |
1639 | 0 | } |
1640 | 0 | else |
1641 | 0 | { |
1642 | 0 | ps_memtabs[count].size = size; |
1643 | 0 | ps_memtabs[count].align = 16; |
1644 | 0 | ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM; |
1645 | 0 | } |
1646 | | |
1647 | 0 | if((i4_num_me_frm_pllel != 1) || (i == (MAX_NUM_ME_PARALLEL - 1))) |
1648 | 0 | { |
1649 | 0 | count++; |
1650 | 0 | } |
1651 | 0 | } |
1652 | 0 | } |
1653 | 0 | else if(mem_avail) |
1654 | 0 | { |
1655 | 0 | for(i = 0; i < MAX_NUM_ME_PARALLEL; i++) |
1656 | 0 | { |
1657 | 0 | for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) |
1658 | 0 | { |
1659 | 0 | ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; |
1660 | |
|
1661 | 0 | ps_ctxt->aps_me_frm_prms[i]->ps_blk_16x16 = NULL; |
1662 | |
|
1663 | 0 | ps_ctxt->aps_me_frm_prms[i]->ps_blk_32x32 = NULL; |
1664 | |
|
1665 | 0 | ps_ctxt->aps_me_frm_prms[i]->ps_blk_64x64 = NULL; |
1666 | |
|
1667 | 0 | ps_ctxt->aps_me_frm_prms[i]->ps_ctb_cluster_info = NULL; |
1668 | 0 | } |
1669 | 0 | } |
1670 | 0 | } |
1671 | | |
1672 | 0 | for(i = 0; i < MAX_NUM_ME_PARALLEL; i++) |
1673 | 0 | { |
1674 | 0 | size = sizeof(fullpel_refine_ctxt_t); |
1675 | 0 | size *= ps_prms->i4_num_proc_thrds; |
1676 | |
|
1677 | 0 | if(mem_avail) |
1678 | 0 | { |
1679 | 0 | U08 *pu1_mem; |
1680 | |
|
1681 | 0 | ASSERT(ps_memtabs[count].size == size); |
1682 | 0 | pu1_mem = ps_memtabs[count].pu1_mem; |
1683 | |
|
1684 | 0 | for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) |
1685 | 0 | { |
1686 | 0 | ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; |
1687 | |
|
1688 | 0 | ps_ctxt->aps_me_frm_prms[i]->ps_fullpel_refine_ctxt = |
1689 | 0 | (fullpel_refine_ctxt_t *)pu1_mem; |
1690 | 0 | pu1_mem += (sizeof(fullpel_refine_ctxt_t)); |
1691 | 0 | } |
1692 | 0 | } |
1693 | 0 | else |
1694 | 0 | { |
1695 | 0 | ps_memtabs[count].size = size; |
1696 | 0 | ps_memtabs[count].align = 16; |
1697 | 0 | ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM; |
1698 | 0 | } |
1699 | | |
1700 | 0 | if((i4_num_me_frm_pllel != 1) || (i == (MAX_NUM_ME_PARALLEL - 1))) |
1701 | 0 | { |
1702 | 0 | count++; |
1703 | 0 | } |
1704 | 0 | } |
1705 | | |
1706 | | /* Memory for ihevce_me_optimised_function_list_t struct */ |
1707 | 0 | if(mem_avail) |
1708 | 0 | { |
1709 | 0 | ps_master_ctxt->pv_me_optimised_function_list = (void *)ps_memtabs[count++].pu1_mem; |
1710 | 0 | } |
1711 | 0 | else |
1712 | 0 | { |
1713 | 0 | ps_memtabs[count].size = sizeof(ihevce_me_optimised_function_list_t); |
1714 | 0 | ps_memtabs[count].align = 16; |
1715 | 0 | ps_memtabs[count++].e_mem_attr = HME_SCRATCH_OVLY_MEM; |
1716 | 0 | } |
1717 | |
|
1718 | 0 | ASSERT(count < hme_enc_num_alloc(i4_num_me_frm_pllel)); |
1719 | 0 | return (count); |
1720 | 0 | } |
1721 | | |
1722 | | /** |
1723 | | ******************************************************************************** |
1724 | | * @fn hme_coarse_alloc_init_mem() |
1725 | | * |
1726 | | * @brief Requests/ assign memory based on mem avail |
1727 | | * |
1728 | | * @param[in] ps_memtabs : memtab array |
1729 | | * |
1730 | | * @param[in] ps_prms : init prms |
1731 | | * |
1732 | | * @param[in] pv_ctxt : ME ctxt |
1733 | | * |
1734 | | * @param[in] mem_avail : request/assign flag |
1735 | | * |
1736 | | * @return number of memtabs |
1737 | | ******************************************************************************** |
1738 | | */ |
1739 | | S32 hme_coarse_alloc_init_mem( |
1740 | | hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms, void *pv_ctxt, S32 mem_avail) |
1741 | 0 | { |
1742 | 0 | coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt; |
1743 | 0 | coarse_me_ctxt_t *ps_ctxt; |
1744 | 0 | S32 count = 0, size, i, j, use_4x4, wd; |
1745 | 0 | S32 n_tot_layers; |
1746 | 0 | S32 num_layers_explicit_search; |
1747 | 0 | S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS]; |
1748 | 0 | S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS]; |
1749 | 0 | S32 num_results; |
1750 | 0 | S32 num_thrds; |
1751 | | //S32 ctb_wd = 1 << ps_prms->log_ctb_size; |
1752 | 0 | S32 sad_4x4_block_size, sad_4x4_block_stride, search_step, num_rows; |
1753 | 0 | S32 layer1_blk_width = 8; // 8x8 search |
1754 | 0 | S32 blk_shift; |
1755 | | |
1756 | | /* MV bank changes */ |
1757 | 0 | hme_mv_t *aps_mv_bank[MAX_NUM_LAYERS] = { NULL }; |
1758 | 0 | S32 ai4_num_mvs_per_row[MAX_NUM_LAYERS] = { 0 }; |
1759 | 0 | S08 *api1_ref_idx[MAX_NUM_LAYERS] = { NULL }; |
1760 | | |
1761 | | /* Memtab 0: handle */ |
1762 | 0 | size = sizeof(coarse_me_master_ctxt_t); |
1763 | 0 | if(mem_avail) |
1764 | 0 | { |
1765 | | /* store the number of processing threads */ |
1766 | 0 | ps_master_ctxt->i4_num_proc_thrds = ps_prms->i4_num_proc_thrds; |
1767 | 0 | } |
1768 | 0 | else |
1769 | 0 | { |
1770 | 0 | ps_memtabs[count].size = size; |
1771 | 0 | ps_memtabs[count].align = 8; |
1772 | 0 | ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM; |
1773 | 0 | } |
1774 | |
|
1775 | 0 | count++; |
1776 | | |
1777 | | /* Memtab 1: ME threads ctxt */ |
1778 | 0 | size = ps_prms->i4_num_proc_thrds * sizeof(coarse_me_ctxt_t); |
1779 | 0 | if(mem_avail) |
1780 | 0 | { |
1781 | 0 | coarse_me_ctxt_t *ps_me_tmp_ctxt = (coarse_me_ctxt_t *)ps_memtabs[count].pu1_mem; |
1782 | | |
1783 | | /* store the indivisual thread ctxt pointers */ |
1784 | 0 | for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) |
1785 | 0 | { |
1786 | 0 | ps_master_ctxt->aps_me_ctxt[num_thrds] = ps_me_tmp_ctxt++; |
1787 | 0 | } |
1788 | 0 | } |
1789 | 0 | else |
1790 | 0 | { |
1791 | 0 | ps_memtabs[count].size = size; |
1792 | 0 | ps_memtabs[count].align = 8; |
1793 | 0 | ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM; |
1794 | 0 | } |
1795 | |
|
1796 | 0 | count++; |
1797 | |
|
1798 | 0 | memcpy(a_wd, ps_prms->a_wd, sizeof(S32) * ps_prms->num_simulcast_layers); |
1799 | 0 | memcpy(a_ht, ps_prms->a_ht, sizeof(S32) * ps_prms->num_simulcast_layers); |
1800 | | /*************************************************************************/ |
1801 | | /* Derive the number of HME layers, including both encoded and non encode*/ |
1802 | | /* This function also derives the width and ht of each layer. */ |
1803 | | /*************************************************************************/ |
1804 | 0 | n_tot_layers = hme_derive_num_layers(1, a_wd, a_ht, a_disp_wd, a_disp_ht); |
1805 | |
|
1806 | 0 | num_layers_explicit_search = ps_prms->num_layers_explicit_search; |
1807 | |
|
1808 | 0 | if(num_layers_explicit_search <= 0) |
1809 | 0 | num_layers_explicit_search = n_tot_layers - 1; |
1810 | |
|
1811 | 0 | num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1); |
1812 | |
|
1813 | 0 | if(mem_avail) |
1814 | 0 | { |
1815 | 0 | for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) |
1816 | 0 | { |
1817 | 0 | ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; |
1818 | 0 | memset(ps_ctxt->u1_encode, 0, n_tot_layers); |
1819 | | |
1820 | | /* encode layer should be excluded during processing */ |
1821 | 0 | ps_ctxt->num_layers = n_tot_layers; |
1822 | |
|
1823 | 0 | memcpy(ps_ctxt->a_wd, a_wd, sizeof(S32) * n_tot_layers); |
1824 | 0 | memcpy(ps_ctxt->a_ht, a_ht, sizeof(S32) * n_tot_layers); |
1825 | |
|
1826 | 0 | ps_ctxt->num_layers_explicit_search = num_layers_explicit_search; |
1827 | 0 | ps_ctxt->max_num_results = ps_prms->max_num_results; |
1828 | 0 | ps_ctxt->max_num_results_coarse = ps_prms->max_num_results_coarse; |
1829 | 0 | ps_ctxt->max_num_ref = ps_prms->max_num_ref; |
1830 | 0 | } |
1831 | 0 | } |
1832 | | |
1833 | | /* Memtabs : Layers MV bank for total layers - 2 */ |
1834 | | /* for penultimate layer MV bank will be initialsed at every frame level */ |
1835 | 0 | for(j = 1; j < n_tot_layers; j++) |
1836 | 0 | { |
1837 | 0 | S32 is_explicit_store = 1; |
1838 | 0 | S32 wd, ht; |
1839 | 0 | U08 u1_enc = 0; |
1840 | 0 | wd = a_wd[j]; |
1841 | 0 | ht = a_ht[j]; |
1842 | | |
1843 | | /* Possibly implicit search for lower (finer) layers */ |
1844 | 0 | if(n_tot_layers - j > num_layers_explicit_search) |
1845 | 0 | is_explicit_store = 0; |
1846 | | |
1847 | | /* Even if explicit search, we store only 2 results (L0 and L1) */ |
1848 | | /* in finest layer */ |
1849 | 0 | if(j == 0) |
1850 | 0 | { |
1851 | 0 | is_explicit_store = 0; |
1852 | 0 | } |
1853 | | |
1854 | | /* coarsest layer alwasy uses 4x4 blks to store results */ |
1855 | 0 | if(j == n_tot_layers - 1) |
1856 | 0 | { |
1857 | 0 | num_results = ps_prms->max_num_results_coarse; |
1858 | 0 | } |
1859 | 0 | else |
1860 | 0 | { |
1861 | 0 | num_results = ps_prms->max_num_results; |
1862 | 0 | if(j == 0) |
1863 | 0 | num_results = 1; |
1864 | 0 | } |
1865 | 0 | use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc); |
1866 | | |
1867 | | /* for penultimate compute the parameters and store */ |
1868 | 0 | if(j == 1) |
1869 | 0 | { |
1870 | 0 | S32 num_blks, num_mvs_per_blk, num_ref; |
1871 | 0 | S32 num_cols, num_rows, num_mvs_per_row; |
1872 | |
|
1873 | 0 | num_cols = use_4x4 ? ((wd >> 2) + 2) : ((wd >> 3) + 2); |
1874 | 0 | num_rows = use_4x4 ? ((ht >> 2) + 2) : ((ht >> 3) + 2); |
1875 | |
|
1876 | 0 | if(is_explicit_store) |
1877 | 0 | num_ref = ps_prms->max_num_ref; |
1878 | 0 | else |
1879 | 0 | num_ref = 2; |
1880 | |
|
1881 | 0 | num_blks = num_cols * num_rows; |
1882 | 0 | num_mvs_per_blk = num_ref * num_results; |
1883 | 0 | num_mvs_per_row = num_mvs_per_blk * num_cols; |
1884 | |
|
1885 | 0 | ai4_num_mvs_per_row[j] = num_mvs_per_row; |
1886 | 0 | aps_mv_bank[j] = NULL; |
1887 | 0 | api1_ref_idx[j] = NULL; |
1888 | 0 | } |
1889 | 0 | else |
1890 | 0 | { |
1891 | 0 | count += hme_alloc_init_layer_mv_bank( |
1892 | 0 | &ps_memtabs[count], |
1893 | 0 | num_results, |
1894 | 0 | ps_prms->max_num_ref, |
1895 | 0 | use_4x4, |
1896 | 0 | mem_avail, |
1897 | 0 | u1_enc, |
1898 | 0 | wd, |
1899 | 0 | ht, |
1900 | 0 | is_explicit_store, |
1901 | 0 | &aps_mv_bank[j], |
1902 | 0 | &api1_ref_idx[j], |
1903 | 0 | &ai4_num_mvs_per_row[j]); |
1904 | 0 | } |
1905 | 0 | } |
1906 | | |
1907 | | /* Memtabs : Layers * num-ref + 1 */ |
1908 | 0 | for(i = 0; i < ps_prms->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++) |
1909 | 0 | { |
1910 | | /* for all layer except encode layer */ |
1911 | 0 | for(j = 1; j < n_tot_layers; j++) |
1912 | 0 | { |
1913 | 0 | layer_ctxt_t *ps_layer; |
1914 | 0 | S32 is_explicit_store = 1; |
1915 | 0 | S32 segment_this_layer = (j == 0) ? 1 : ps_prms->segment_higher_layers; |
1916 | 0 | S32 wd, ht; |
1917 | 0 | U08 u1_enc = 0; |
1918 | 0 | wd = a_wd[j]; |
1919 | 0 | ht = a_ht[j]; |
1920 | | |
1921 | | /* Possibly implicit search for lower (finer) layers */ |
1922 | 0 | if(n_tot_layers - j > num_layers_explicit_search) |
1923 | 0 | is_explicit_store = 0; |
1924 | | |
1925 | | /* Even if explicit search, we store only 2 results (L0 and L1) */ |
1926 | | /* in finest layer */ |
1927 | 0 | if(j == 0) |
1928 | 0 | { |
1929 | 0 | is_explicit_store = 0; |
1930 | 0 | } |
1931 | | |
1932 | | /* coarsest layer alwasy uses 4x4 blks to store results */ |
1933 | 0 | if(j == n_tot_layers - 1) |
1934 | 0 | { |
1935 | 0 | num_results = ps_prms->max_num_results_coarse; |
1936 | 0 | } |
1937 | 0 | else |
1938 | 0 | { |
1939 | 0 | num_results = ps_prms->max_num_results; |
1940 | 0 | if(j == 0) |
1941 | 0 | num_results = 1; |
1942 | 0 | } |
1943 | 0 | use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc); |
1944 | |
|
1945 | 0 | count += hme_alloc_init_layer( |
1946 | 0 | &ps_memtabs[count], |
1947 | 0 | num_results, |
1948 | 0 | ps_prms->max_num_ref, |
1949 | 0 | use_4x4, |
1950 | 0 | mem_avail, |
1951 | 0 | u1_enc, |
1952 | 0 | wd, |
1953 | 0 | ht, |
1954 | 0 | a_disp_wd[j], |
1955 | 0 | a_disp_ht[j], |
1956 | 0 | segment_this_layer, |
1957 | 0 | is_explicit_store, |
1958 | 0 | &ps_layer); |
1959 | 0 | if(mem_avail) |
1960 | 0 | { |
1961 | | /* same ps_layer memory pointer is stored in all the threads */ |
1962 | 0 | for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) |
1963 | 0 | { |
1964 | 0 | ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; |
1965 | 0 | ps_ctxt->as_ref_descr[i].aps_layers[j] = ps_layer; |
1966 | 0 | } |
1967 | | |
1968 | | /* store the MV bank pointers */ |
1969 | 0 | ps_layer->ps_layer_mvbank->max_num_mvs_per_row = ai4_num_mvs_per_row[j]; |
1970 | 0 | ps_layer->ps_layer_mvbank->ps_mv_base = aps_mv_bank[j]; |
1971 | 0 | ps_layer->ps_layer_mvbank->pi1_ref_idx_base = api1_ref_idx[j]; |
1972 | 0 | } |
1973 | 0 | } |
1974 | 0 | } |
1975 | | |
1976 | | /* Memtabs : Prev Row search node at coarsest layer */ |
1977 | 0 | wd = a_wd[n_tot_layers - 1]; |
1978 | | |
1979 | | /* Allocate a memtab for storing 4x4 SADs for n rows. As many as num ref and number of threads */ |
1980 | 0 | num_rows = ps_prms->i4_num_proc_thrds + 1; |
1981 | 0 | if(ps_prms->s_me_coding_tools.e_me_quality_presets < ME_MEDIUM_SPEED) |
1982 | 0 | search_step = HME_COARSE_STEP_SIZE_HIGH_QUALITY; |
1983 | 0 | else |
1984 | 0 | search_step = HME_COARSE_STEP_SIZE_HIGH_SPEED; |
1985 | | |
1986 | | /*shift factor*/ |
1987 | 0 | blk_shift = 2; /*4x4*/ |
1988 | 0 | search_step >>= 1; |
1989 | |
|
1990 | 0 | sad_4x4_block_size = ((2 * MAX_MVX_SUPPORTED_IN_COARSE_LAYER) >> search_step) * |
1991 | 0 | ((2 * MAX_MVY_SUPPORTED_IN_COARSE_LAYER) >> search_step); |
1992 | 0 | sad_4x4_block_stride = ((wd >> blk_shift) + 1) * sad_4x4_block_size; |
1993 | |
|
1994 | 0 | size = num_rows * sad_4x4_block_stride * sizeof(S16); |
1995 | 0 | for(i = 0; i < ps_prms->max_num_ref; i++) |
1996 | 0 | { |
1997 | 0 | if(mem_avail) |
1998 | 0 | { |
1999 | 0 | ASSERT(size == ps_memtabs[count].size); |
2000 | | |
2001 | | /* same row memory pointer is stored in all the threads */ |
2002 | 0 | for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) |
2003 | 0 | { |
2004 | 0 | ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; |
2005 | 0 | ps_ctxt->api2_sads_4x4_n_rows[i] = (S16 *)ps_memtabs[count].pu1_mem; |
2006 | 0 | } |
2007 | 0 | } |
2008 | 0 | else |
2009 | 0 | { |
2010 | 0 | ps_memtabs[count].size = size; |
2011 | 0 | ps_memtabs[count].align = 4; |
2012 | 0 | ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM; |
2013 | 0 | } |
2014 | 0 | count++; |
2015 | 0 | } |
2016 | | |
2017 | | /* Allocate a memtab for storing best search nodes 8x4 for n rows. Row is allocated for worst case (2*min_wd_coarse/4). As many as num ref and number of threads */ |
2018 | 0 | size = num_rows * ((wd >> blk_shift) + 1) * sizeof(search_node_t); |
2019 | 0 | for(i = 0; i < ps_prms->max_num_ref; i++) |
2020 | 0 | { |
2021 | 0 | if(mem_avail) |
2022 | 0 | { |
2023 | 0 | ASSERT(size == ps_memtabs[count].size); |
2024 | | |
2025 | | /* same row memory pointer is stored in all the threads */ |
2026 | 0 | for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) |
2027 | 0 | { |
2028 | 0 | ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; |
2029 | 0 | ps_ctxt->aps_best_search_nodes_8x4_n_rows[i] = |
2030 | 0 | (search_node_t *)ps_memtabs[count].pu1_mem; |
2031 | 0 | } |
2032 | 0 | } |
2033 | 0 | else |
2034 | 0 | { |
2035 | 0 | ps_memtabs[count].size = size; |
2036 | 0 | ps_memtabs[count].align = 4; |
2037 | 0 | ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM; |
2038 | 0 | } |
2039 | 0 | count++; |
2040 | 0 | } |
2041 | | /* Allocate a memtab for storing best search nodes 4x8 for n rows. Row is allocated for worst case (2*min_wd_coarse/4). As many as num ref and number of threads */ |
2042 | 0 | size = num_rows * ((wd >> blk_shift) + 1) * sizeof(search_node_t); |
2043 | 0 | for(i = 0; i < ps_prms->max_num_ref; i++) |
2044 | 0 | { |
2045 | 0 | if(mem_avail) |
2046 | 0 | { |
2047 | 0 | ASSERT(size == ps_memtabs[count].size); |
2048 | | |
2049 | | /* same row memory pointer is stored in all the threads */ |
2050 | 0 | for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) |
2051 | 0 | { |
2052 | 0 | ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; |
2053 | 0 | ps_ctxt->aps_best_search_nodes_4x8_n_rows[i] = |
2054 | 0 | (search_node_t *)ps_memtabs[count].pu1_mem; |
2055 | 0 | } |
2056 | 0 | } |
2057 | 0 | else |
2058 | 0 | { |
2059 | 0 | ps_memtabs[count].size = size; |
2060 | 0 | ps_memtabs[count].align = 4; |
2061 | 0 | ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM; |
2062 | 0 | } |
2063 | 0 | count++; |
2064 | 0 | } |
2065 | | |
2066 | | /* Allocate a memtab for each histogram. As many as num ref and number of threads */ |
2067 | 0 | for(i = 0; i < ps_prms->max_num_ref; i++) |
2068 | 0 | { |
2069 | 0 | size = ps_prms->i4_num_proc_thrds * sizeof(mv_hist_t); |
2070 | 0 | if(mem_avail) |
2071 | 0 | { |
2072 | 0 | mv_hist_t *ps_mv_hist = (mv_hist_t *)ps_memtabs[count].pu1_mem; |
2073 | |
|
2074 | 0 | ASSERT(size == ps_memtabs[count].size); |
2075 | | |
2076 | | /* divide the memory accross the threads */ |
2077 | 0 | for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) |
2078 | 0 | { |
2079 | 0 | ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; |
2080 | 0 | ps_ctxt->aps_mv_hist[i] = ps_mv_hist; |
2081 | 0 | ps_mv_hist++; |
2082 | 0 | } |
2083 | 0 | } |
2084 | 0 | else |
2085 | 0 | { |
2086 | 0 | ps_memtabs[count].size = size; |
2087 | 0 | ps_memtabs[count].align = 8; |
2088 | 0 | ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM; |
2089 | 0 | } |
2090 | 0 | count++; |
2091 | 0 | } |
2092 | | |
2093 | | /* Memtabs : Search nodes for 8x8 blks */ |
2094 | 0 | for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) |
2095 | 0 | { |
2096 | 0 | search_results_t *ps_search_results = NULL; |
2097 | |
|
2098 | 0 | if(mem_avail) |
2099 | 0 | { |
2100 | 0 | ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; |
2101 | 0 | } |
2102 | |
|
2103 | 0 | if(mem_avail) |
2104 | 0 | { |
2105 | 0 | ps_search_results = &ps_ctxt->s_search_results_8x8; |
2106 | 0 | } |
2107 | 0 | count += hme_alloc_init_search_nodes( |
2108 | 0 | ps_search_results, |
2109 | 0 | &ps_memtabs[count], |
2110 | 0 | mem_avail, |
2111 | 0 | ps_prms->max_num_ref, |
2112 | 0 | ps_prms->max_num_results); |
2113 | 0 | } |
2114 | | |
2115 | | /* Weighted inputs, one for each ref */ |
2116 | 0 | size = (ps_prms->max_num_ref + 1) * layer1_blk_width * layer1_blk_width * |
2117 | 0 | ps_prms->i4_num_proc_thrds; |
2118 | 0 | if(mem_avail) |
2119 | 0 | { |
2120 | 0 | U08 *pu1_mem; |
2121 | 0 | ASSERT(ps_memtabs[count].size == size); |
2122 | 0 | pu1_mem = ps_memtabs[count].pu1_mem; |
2123 | |
|
2124 | 0 | for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) |
2125 | 0 | { |
2126 | 0 | ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; |
2127 | |
|
2128 | 0 | for(i = 0; i < ps_prms->max_num_ref + 1; i++) |
2129 | 0 | { |
2130 | 0 | ps_ctxt->s_wt_pred.apu1_wt_inp_buf_array[i] = pu1_mem; |
2131 | 0 | pu1_mem += (layer1_blk_width * layer1_blk_width); |
2132 | 0 | } |
2133 | 0 | } |
2134 | 0 | } |
2135 | 0 | else |
2136 | 0 | { |
2137 | 0 | ps_memtabs[count].size = size; |
2138 | 0 | ps_memtabs[count].align = 16; |
2139 | 0 | ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM; |
2140 | 0 | } |
2141 | 0 | count++; |
2142 | | |
2143 | | /* if memory is allocated the intislaise the frm prms ptr to each thrd */ |
2144 | 0 | if(mem_avail) |
2145 | 0 | { |
2146 | 0 | for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) |
2147 | 0 | { |
2148 | 0 | ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; |
2149 | |
|
2150 | 0 | ps_ctxt->ps_hme_frm_prms = &ps_master_ctxt->s_frm_prms; |
2151 | 0 | ps_ctxt->ps_hme_ref_map = &ps_master_ctxt->s_ref_map; |
2152 | 0 | } |
2153 | 0 | } |
2154 | | |
2155 | | /* Memory for ihevce_me_optimised_function_list_t struct */ |
2156 | 0 | if(mem_avail) |
2157 | 0 | { |
2158 | 0 | ps_master_ctxt->pv_me_optimised_function_list = (void *)ps_memtabs[count++].pu1_mem; |
2159 | 0 | } |
2160 | 0 | else |
2161 | 0 | { |
2162 | 0 | ps_memtabs[count].size = sizeof(ihevce_me_optimised_function_list_t); |
2163 | 0 | ps_memtabs[count].align = 16; |
2164 | 0 | ps_memtabs[count++].e_mem_attr = HME_SCRATCH_OVLY_MEM; |
2165 | 0 | } |
2166 | | |
2167 | | //ASSERT(count < hme_enc_num_alloc()); |
2168 | 0 | ASSERT(count < hme_coarse_num_alloc()); |
2169 | 0 | return (count); |
2170 | 0 | } |
2171 | | |
2172 | | /*! |
2173 | | ****************************************************************************** |
2174 | | * \if Function name : ihevce_coarse_me_get_lyr_prms_dep_mngr \endif |
2175 | | * |
2176 | | * \brief Returns to the caller key attributes relevant for dependency manager, |
2177 | | * ie, the number of vertical units in each layer |
2178 | | * |
2179 | | * \par Description: |
2180 | | * This function requires the precondition that the width and ht of encode |
2181 | | * layer is known. |
2182 | | * The number of layers, number of vertical units in each layer, and for |
2183 | | * each vertial unit in each layer, its dependency on previous layer's units |
2184 | | * From ME's perspective, a vertical unit is one which is smallest min size |
2185 | | * vertically (and spans the entire row horizontally). This is CTB for encode |
2186 | | * layer, and 8x8 / 4x4 for non encode layers. |
2187 | | * |
2188 | | * \param[in] num_layers : Number of ME Layers |
2189 | | * \param[in] pai4_ht : Array storing ht at each layer |
2190 | | * \param[in] pai4_wd : Array storing wd at each layer |
2191 | | * \param[out] pi4_num_vert_units_in_lyr : Array of size N (num layers), each |
2192 | | * entry has num vertical units in that particular layer |
2193 | | * |
2194 | | * \return |
2195 | | * None |
2196 | | * |
2197 | | * \author |
2198 | | * Ittiam |
2199 | | * |
2200 | | ***************************************************************************** |
2201 | | */ |
2202 | | void ihevce_coarse_me_get_lyr_prms_dep_mngr( |
2203 | | WORD32 num_layers, WORD32 *pai4_ht, WORD32 *pai4_wd, WORD32 *pai4_num_vert_units_in_lyr) |
2204 | 0 | { |
2205 | | /* Height of current and next layers */ |
2206 | 0 | WORD32 ht_c, ht_n; |
2207 | | /* Blk ht at a given layer and next layer*/ |
2208 | 0 | WORD32 unit_ht_c, unit_ht_n, blk_ht_c, blk_ht_n; |
2209 | | /* Number of vertical units in current and next layer */ |
2210 | 0 | WORD32 num_vert_c, num_vert_n; |
2211 | |
|
2212 | 0 | WORD32 ctb_size = 64, num_enc_layers = 1, use_4x4 = 1, i; |
2213 | 0 | UWORD8 au1_encode[MAX_NUM_LAYERS]; |
2214 | |
|
2215 | 0 | memset(au1_encode, 0, num_layers); |
2216 | 0 | memset(au1_encode, 1, num_enc_layers); |
2217 | |
|
2218 | 0 | ht_n = pai4_ht[num_layers - 2]; |
2219 | 0 | ht_c = pai4_ht[num_layers - 1]; |
2220 | | |
2221 | | /* compute blk ht and unit ht for c and n */ |
2222 | 0 | if(au1_encode[num_layers - 1]) |
2223 | 0 | { |
2224 | 0 | blk_ht_c = 16; |
2225 | 0 | unit_ht_c = ctb_size; |
2226 | 0 | } |
2227 | 0 | else |
2228 | 0 | { |
2229 | 0 | blk_ht_c = hme_get_blk_size(use_4x4, num_layers - 1, num_layers, 0); |
2230 | 0 | unit_ht_c = blk_ht_c; |
2231 | 0 | } |
2232 | |
|
2233 | 0 | num_vert_c = (ht_c + unit_ht_c - 1) / unit_ht_c; |
2234 | | /* For new design in Coarsest HME layer we need */ |
2235 | | /* one additional row extra at the end of frame */ |
2236 | | /* hence num_vert_c is incremented by 1 */ |
2237 | 0 | num_vert_c++; |
2238 | | |
2239 | | /*************************************************************************/ |
2240 | | /* Run through each layer, set the number of vertical units */ |
2241 | | /*************************************************************************/ |
2242 | 0 | for(i = num_layers - 1; i > 0; i--) |
2243 | 0 | { |
2244 | 0 | pai4_num_vert_units_in_lyr[i] = num_vert_c; |
2245 | | |
2246 | | /* "n" is computed for first time */ |
2247 | 0 | ht_n = pai4_ht[i - 1]; |
2248 | 0 | blk_ht_n = hme_get_blk_size(use_4x4, i - 1, num_layers, 0); |
2249 | 0 | unit_ht_n = blk_ht_n; |
2250 | 0 | if(au1_encode[i - 1]) |
2251 | 0 | unit_ht_n = ctb_size; |
2252 | |
|
2253 | 0 | num_vert_n = (ht_n + unit_ht_n - 1) / unit_ht_n; |
2254 | | |
2255 | | /* Compute the blk size and vert unit size in each layer */ |
2256 | | /* "c" denotes curr layer, and "n" denotes the layer to which result */ |
2257 | | /* is projected to */ |
2258 | 0 | ht_c = ht_n; |
2259 | 0 | blk_ht_c = blk_ht_n; |
2260 | 0 | unit_ht_c = unit_ht_n; |
2261 | 0 | num_vert_c = num_vert_n; |
2262 | 0 | } |
2263 | | |
2264 | | /* LAYER 0 OR ENCODE LAYER UPDATE : NO OUTPUT DEPS */ |
2265 | | /* set the numebr of vertical units */ |
2266 | 0 | pai4_num_vert_units_in_lyr[0] = num_vert_c; |
2267 | 0 | } |
2268 | | |
2269 | | /** |
2270 | | ******************************************************************************** |
2271 | | * @fn hme_coarse_dep_mngr_alloc_mem() |
2272 | | * |
2273 | | * @brief Requests memory for HME Dep Mngr |
2274 | | * |
2275 | | * \param[in,out] ps_mem_tab : pointer to memory descriptors table |
2276 | | * \param[in] ps_init_prms : Create time static parameters |
2277 | | * \param[in] i4_mem_space : memspace in whihc memory request should be done |
2278 | | * |
2279 | | * @return number of memtabs |
2280 | | ******************************************************************************** |
2281 | | */ |
2282 | | WORD32 hme_coarse_dep_mngr_alloc_mem( |
2283 | | iv_mem_rec_t *ps_mem_tab, |
2284 | | ihevce_static_cfg_params_t *ps_init_prms, |
2285 | | WORD32 i4_mem_space, |
2286 | | WORD32 i4_num_proc_thrds, |
2287 | | WORD32 i4_resolution_id) |
2288 | 0 | { |
2289 | 0 | WORD32 ai4_num_vert_units_in_lyr[MAX_NUM_HME_LAYERS]; |
2290 | 0 | WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS]; |
2291 | 0 | WORD32 a_disp_wd[MAX_NUM_HME_LAYERS], a_disp_ht[MAX_NUM_HME_LAYERS]; |
2292 | 0 | WORD32 n_enc_layers = 1, n_tot_layers, n_dep_tabs = 0, i; |
2293 | 0 | WORD32 min_cu_size; |
2294 | | |
2295 | | /* get the min cu size from config params */ |
2296 | 0 | min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size; |
2297 | |
|
2298 | 0 | min_cu_size = 1 << min_cu_size; |
2299 | | |
2300 | | /* Get the width and heights of different decomp layers */ |
2301 | 0 | *a_wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width + |
2302 | 0 | SET_CTB_ALIGN( |
2303 | 0 | ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size); |
2304 | |
|
2305 | 0 | *a_ht = |
2306 | 0 | ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height + |
2307 | 0 | SET_CTB_ALIGN( |
2308 | 0 | ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size); |
2309 | |
|
2310 | 0 | n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht); |
2311 | 0 | ASSERT(n_tot_layers >= 3); |
2312 | | |
2313 | | /* --- Get the number of vartical units in each layer for dep. mngr -- */ |
2314 | 0 | ihevce_coarse_me_get_lyr_prms_dep_mngr( |
2315 | 0 | n_tot_layers, &a_ht[0], &a_wd[0], &ai4_num_vert_units_in_lyr[0]); |
2316 | | |
2317 | | /* Fill memtabs for HME layers,except for L0 layer */ |
2318 | 0 | for(i = 1; i < n_tot_layers; i++) |
2319 | 0 | { |
2320 | 0 | n_dep_tabs += ihevce_dmgr_get_mem_recs( |
2321 | 0 | &ps_mem_tab[n_dep_tabs], |
2322 | 0 | DEP_MNGR_ROW_ROW_SYNC, |
2323 | 0 | ai4_num_vert_units_in_lyr[i], |
2324 | 0 | 1, /* Number of Col Tiles : Not supported in PreEnc */ |
2325 | 0 | i4_num_proc_thrds, |
2326 | 0 | i4_mem_space); |
2327 | 0 | } |
2328 | |
|
2329 | 0 | ASSERT(n_dep_tabs <= hme_coarse_dep_mngr_num_alloc()); |
2330 | | |
2331 | 0 | return (n_dep_tabs); |
2332 | 0 | } |
2333 | | |
2334 | | /** |
2335 | | ******************************************************************************** |
2336 | | * @fn hme_coarse_dep_mngr_init() |
2337 | | * |
2338 | | * @brief Assign memory for HME Dep Mngr |
2339 | | * |
2340 | | * \param[in,out] ps_mem_tab : pointer to memory descriptors table |
2341 | | * \param[in] ps_init_prms : Create time static parameters |
2342 | | * @param[in] pv_ctxt : ME ctxt |
2343 | | * \param[in] pv_osal_handle : Osal handle |
2344 | | * |
2345 | | * @return number of memtabs |
2346 | | ******************************************************************************** |
2347 | | */ |
2348 | | WORD32 hme_coarse_dep_mngr_init( |
2349 | | iv_mem_rec_t *ps_mem_tab, |
2350 | | ihevce_static_cfg_params_t *ps_init_prms, |
2351 | | void *pv_ctxt, |
2352 | | void *pv_osal_handle, |
2353 | | WORD32 i4_num_proc_thrds, |
2354 | | WORD32 i4_resolution_id) |
2355 | 0 | { |
2356 | 0 | WORD32 ai4_num_vert_units_in_lyr[MAX_NUM_HME_LAYERS]; |
2357 | 0 | WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS]; |
2358 | 0 | WORD32 a_disp_wd[MAX_NUM_HME_LAYERS], a_disp_ht[MAX_NUM_HME_LAYERS]; |
2359 | 0 | WORD32 n_enc_layers = 1, n_tot_layers, n_dep_tabs = 0, i; |
2360 | 0 | WORD32 min_cu_size; |
2361 | |
|
2362 | 0 | coarse_me_master_ctxt_t *ps_me_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt; |
2363 | | |
2364 | | /* get the min cu size from config params */ |
2365 | 0 | min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size; |
2366 | |
|
2367 | 0 | min_cu_size = 1 << min_cu_size; |
2368 | | |
2369 | | /* Get the width and heights of different decomp layers */ |
2370 | 0 | *a_wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width + |
2371 | 0 | SET_CTB_ALIGN( |
2372 | 0 | ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size); |
2373 | 0 | *a_ht = |
2374 | 0 | ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height + |
2375 | 0 | SET_CTB_ALIGN( |
2376 | 0 | ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size); |
2377 | |
|
2378 | 0 | n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht); |
2379 | 0 | ASSERT(n_tot_layers >= 3); |
2380 | | |
2381 | | /* --- Get the number of vartical units in each layer for dep. mngr -- */ |
2382 | 0 | ihevce_coarse_me_get_lyr_prms_dep_mngr( |
2383 | 0 | n_tot_layers, &a_ht[0], &a_wd[0], &ai4_num_vert_units_in_lyr[0]); |
2384 | | |
2385 | | /* --- HME sync Dep Mngr Mem init -- */ |
2386 | 0 | for(i = 1; i < n_tot_layers; i++) |
2387 | 0 | { |
2388 | 0 | WORD32 num_blks_in_row, num_blks_in_pic, blk_size_shift; |
2389 | |
|
2390 | 0 | if(i == (n_tot_layers - 1)) /* coarsest layer */ |
2391 | 0 | blk_size_shift = 2; |
2392 | 0 | else |
2393 | 0 | blk_size_shift = 3; /* refine layers */ |
2394 | |
|
2395 | 0 | GET_NUM_BLKS_IN_PIC(a_wd[i], a_ht[i], blk_size_shift, num_blks_in_row, num_blks_in_pic); |
2396 | | |
2397 | | /* Coarsest layer : 1 block extra, since the last block */ |
2398 | 0 | if(i == (n_tot_layers - 1)) /* in a row needs East block */ |
2399 | 0 | num_blks_in_row += 1; |
2400 | | |
2401 | | /* Note : i-1, only for HME layers, L0 is separate */ |
2402 | 0 | ps_me_ctxt->apv_dep_mngr_hme_sync[i - 1] = ihevce_dmgr_init( |
2403 | 0 | &ps_mem_tab[n_dep_tabs], |
2404 | 0 | pv_osal_handle, |
2405 | 0 | DEP_MNGR_ROW_ROW_SYNC, |
2406 | 0 | ai4_num_vert_units_in_lyr[i], |
2407 | 0 | num_blks_in_row, |
2408 | 0 | 1, /* Number of Col Tiles : Not supported in PreEnc */ |
2409 | 0 | i4_num_proc_thrds, |
2410 | 0 | 1 /*Sem disabled*/ |
2411 | 0 | ); |
2412 | |
|
2413 | 0 | n_dep_tabs += ihevce_dmgr_get_num_mem_recs(); |
2414 | 0 | } |
2415 | |
|
2416 | 0 | return n_dep_tabs; |
2417 | 0 | } |
2418 | | |
2419 | | /** |
2420 | | ******************************************************************************** |
2421 | | * @fn hme_coarse_dep_mngr_reg_sem() |
2422 | | * |
2423 | | * @brief Assign semaphores for HME Dep Mngr |
2424 | | * |
2425 | | * \param[in] pv_me_ctxt : pointer to Coarse ME ctxt |
2426 | | * \param[in] ppv_sem_hdls : Arry of semaphore handles |
2427 | | * \param[in] i4_num_proc_thrds : Number of processing threads |
2428 | | * |
2429 | | * @return number of memtabs |
2430 | | ******************************************************************************** |
2431 | | */ |
2432 | | void hme_coarse_dep_mngr_reg_sem(void *pv_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds) |
2433 | 0 | { |
2434 | 0 | WORD32 i; |
2435 | 0 | coarse_me_master_ctxt_t *ps_me_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt; |
2436 | 0 | coarse_me_ctxt_t *ps_ctxt = ps_me_ctxt->aps_me_ctxt[0]; |
2437 | | |
2438 | | /* --- HME sync Dep Mngr semaphore init -- */ |
2439 | 0 | for(i = 1; i < ps_ctxt->num_layers; i++) |
2440 | 0 | { |
2441 | 0 | ihevce_dmgr_reg_sem_hdls( |
2442 | 0 | ps_me_ctxt->apv_dep_mngr_hme_sync[i - 1], ppv_sem_hdls, i4_num_proc_thrds); |
2443 | 0 | } |
2444 | |
|
2445 | 0 | return; |
2446 | 0 | } |
2447 | | |
2448 | | /** |
2449 | | ******************************************************************************** |
2450 | | * @fn hme_coarse_dep_mngr_delete() |
2451 | | * |
2452 | | * Destroy Coarse ME Dep Mngr module |
2453 | | * Note : Only Destroys the resources allocated in the module like |
2454 | | * semaphore,etc. Memory free is done Separately using memtabs |
2455 | | * |
2456 | | * \param[in] pv_me_ctxt : pointer to Coarse ME ctxt |
2457 | | * \param[in] ps_init_prms : Create time static parameters |
2458 | | * |
2459 | | * @return none |
2460 | | ******************************************************************************** |
2461 | | */ |
2462 | | void hme_coarse_dep_mngr_delete( |
2463 | | void *pv_me_ctxt, ihevce_static_cfg_params_t *ps_init_prms, WORD32 i4_resolution_id) |
2464 | 0 | { |
2465 | 0 | WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS]; |
2466 | 0 | WORD32 a_disp_wd[MAX_NUM_HME_LAYERS], a_disp_ht[MAX_NUM_HME_LAYERS]; |
2467 | 0 | WORD32 n_enc_layers = 1, n_tot_layers, i; |
2468 | 0 | WORD32 min_cu_size; |
2469 | |
|
2470 | 0 | coarse_me_master_ctxt_t *ps_me_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; |
2471 | | |
2472 | | /* get the min cu size from config params */ |
2473 | 0 | min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size; |
2474 | |
|
2475 | 0 | min_cu_size = 1 << min_cu_size; |
2476 | | |
2477 | | /* Get the width and heights of different decomp layers */ |
2478 | 0 | *a_wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width + |
2479 | 0 | SET_CTB_ALIGN( |
2480 | 0 | ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size); |
2481 | 0 | *a_ht = |
2482 | 0 | ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height + |
2483 | 0 | SET_CTB_ALIGN( |
2484 | 0 | ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size); |
2485 | 0 | n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht); |
2486 | 0 | ASSERT(n_tot_layers >= 3); |
2487 | | |
2488 | | /* --- HME sync Dep Mngr Delete -- */ |
2489 | 0 | for(i = 1; i < n_tot_layers; i++) |
2490 | 0 | { |
2491 | | /* Note : i-1, only for HME layers, L0 is separate */ |
2492 | 0 | ihevce_dmgr_del(ps_me_ctxt->apv_dep_mngr_hme_sync[i - 1]); |
2493 | 0 | } |
2494 | 0 | } |
2495 | | |
2496 | | /** |
2497 | | ******************************************************************************* |
2498 | | * @fn S32 hme_enc_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms) |
2499 | | * |
2500 | | * @brief Fills up memtabs with memory information details required by HME |
2501 | | * |
2502 | | * @param[out] ps_memtabs : Pointre to an array of memtabs where module fills |
2503 | | * up its requirements of memory |
2504 | | * |
2505 | | * @param[in] ps_prms : Input parameters to module crucial in calculating reqd |
2506 | | * amt of memory |
2507 | | * |
2508 | | * @return Number of memtabs required |
2509 | | ******************************************************************************* |
2510 | | */ |
2511 | | S32 hme_enc_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms, WORD32 i4_num_me_frm_pllel) |
2512 | 0 | { |
2513 | 0 | S32 num, tot, i; |
2514 | | |
2515 | | /* Validation of init params */ |
2516 | 0 | if(-1 == hme_validate_init_prms(ps_prms)) |
2517 | 0 | return (-1); |
2518 | | |
2519 | 0 | num = hme_enc_alloc_init_mem(ps_memtabs, ps_prms, NULL, 0, i4_num_me_frm_pllel); |
2520 | 0 | tot = hme_enc_num_alloc(i4_num_me_frm_pllel); |
2521 | 0 | for(i = num; i < tot; i++) |
2522 | 0 | { |
2523 | 0 | ps_memtabs[i].size = 4; |
2524 | 0 | ps_memtabs[i].align = 4; |
2525 | 0 | ps_memtabs[i].e_mem_attr = HME_PERSISTENT_MEM; |
2526 | 0 | } |
2527 | 0 | return (tot); |
2528 | 0 | } |
2529 | | |
2530 | | /** |
2531 | | ******************************************************************************* |
2532 | | * @fn S32 hme_coarse_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms) |
2533 | | * |
2534 | | * @brief Fills up memtabs with memory information details required by Coarse HME |
2535 | | * |
2536 | | * @param[out] ps_memtabs : Pointre to an array of memtabs where module fills |
2537 | | * up its requirements of memory |
2538 | | * |
2539 | | * @param[in] ps_prms : Input parameters to module crucial in calculating reqd |
2540 | | * amt of memory |
2541 | | * |
2542 | | * @return Number of memtabs required |
2543 | | ******************************************************************************* |
2544 | | */ |
2545 | | S32 hme_coarse_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms) |
2546 | 0 | { |
2547 | 0 | S32 num, tot, i; |
2548 | | |
2549 | | /* Validation of init params */ |
2550 | 0 | if(-1 == hme_validate_init_prms(ps_prms)) |
2551 | 0 | return (-1); |
2552 | | |
2553 | 0 | num = hme_coarse_alloc_init_mem(ps_memtabs, ps_prms, NULL, 0); |
2554 | 0 | tot = hme_coarse_num_alloc(); |
2555 | 0 | for(i = num; i < tot; i++) |
2556 | 0 | { |
2557 | 0 | ps_memtabs[i].size = 4; |
2558 | 0 | ps_memtabs[i].align = 4; |
2559 | 0 | ps_memtabs[i].e_mem_attr = HME_PERSISTENT_MEM; |
2560 | 0 | } |
2561 | 0 | return (tot); |
2562 | 0 | } |
2563 | | |
2564 | | /** |
2565 | | ******************************************************************************* |
2566 | | * @fn hme_coarse_dep_mngr_alloc |
2567 | | * |
2568 | | * @brief Fills up memtabs with memory information details required by Coarse HME |
2569 | | * |
2570 | | * \param[in,out] ps_mem_tab : pointer to memory descriptors table |
2571 | | * \param[in] ps_init_prms : Create time static parameters |
2572 | | * \param[in] i4_mem_space : memspace in whihc memory request should be done |
2573 | | * |
2574 | | * @return Number of memtabs required |
2575 | | ******************************************************************************* |
2576 | | */ |
2577 | | WORD32 hme_coarse_dep_mngr_alloc( |
2578 | | iv_mem_rec_t *ps_mem_tab, |
2579 | | ihevce_static_cfg_params_t *ps_init_prms, |
2580 | | WORD32 i4_mem_space, |
2581 | | WORD32 i4_num_proc_thrds, |
2582 | | WORD32 i4_resolution_id) |
2583 | 0 | { |
2584 | 0 | S32 num, tot, i; |
2585 | |
|
2586 | 0 | num = hme_coarse_dep_mngr_alloc_mem( |
2587 | 0 | ps_mem_tab, ps_init_prms, i4_mem_space, i4_num_proc_thrds, i4_resolution_id); |
2588 | 0 | tot = hme_coarse_dep_mngr_num_alloc(); |
2589 | 0 | for(i = num; i < tot; i++) |
2590 | 0 | { |
2591 | 0 | ps_mem_tab[i].i4_mem_size = 4; |
2592 | 0 | ps_mem_tab[i].i4_mem_alignment = 4; |
2593 | 0 | ps_mem_tab[i].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
2594 | 0 | } |
2595 | 0 | return (tot); |
2596 | 0 | } |
2597 | | |
2598 | | /** |
2599 | | ******************************************************************************** |
2600 | | * @fn hme_coarse_init_ctxt() |
2601 | | * |
2602 | | * @brief initialise context memory |
2603 | | * |
2604 | | * @param[in] ps_prms : init prms |
2605 | | * |
2606 | | * @param[in] pv_ctxt : ME ctxt |
2607 | | * |
2608 | | * @return number of memtabs |
2609 | | ******************************************************************************** |
2610 | | */ |
2611 | | void hme_coarse_init_ctxt(coarse_me_master_ctxt_t *ps_master_ctxt, hme_init_prms_t *ps_prms) |
2612 | 0 | { |
2613 | 0 | S32 i, j, num_thrds; |
2614 | 0 | coarse_me_ctxt_t *ps_ctxt; |
2615 | 0 | S32 num_rows_coarse; |
2616 | | |
2617 | | /* initialise the parameters inot context of all threads */ |
2618 | 0 | for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++) |
2619 | 0 | { |
2620 | 0 | ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; |
2621 | | |
2622 | | /* Copy the init prms to context */ |
2623 | 0 | ps_ctxt->s_init_prms = *ps_prms; |
2624 | | |
2625 | | /* Initialize some other variables in ctxt */ |
2626 | 0 | ps_ctxt->i4_prev_poc = -1; |
2627 | |
|
2628 | 0 | ps_ctxt->num_b_frms = ps_prms->num_b_frms; |
2629 | |
|
2630 | 0 | ps_ctxt->apu1_ref_bits_tlu_lc[0] = &ps_ctxt->au1_ref_bits_tlu_lc[0][0]; |
2631 | 0 | ps_ctxt->apu1_ref_bits_tlu_lc[1] = &ps_ctxt->au1_ref_bits_tlu_lc[1][0]; |
2632 | | |
2633 | | /* Initialize num rows lookuptable */ |
2634 | 0 | ps_ctxt->i4_num_row_bufs = ps_prms->i4_num_proc_thrds + 1; |
2635 | 0 | num_rows_coarse = ps_ctxt->i4_num_row_bufs; |
2636 | 0 | for(i = 0; i < ((HEVCE_MAX_HEIGHT >> 1) >> 2); i++) |
2637 | 0 | { |
2638 | 0 | ps_ctxt->ai4_row_index[i] = (i % num_rows_coarse); |
2639 | 0 | } |
2640 | 0 | } |
2641 | | |
2642 | | /* since same layer desc pointer is stored in all the threads ctxt */ |
2643 | | /* layer init is done only using 0th thread ctxt */ |
2644 | 0 | ps_ctxt = ps_master_ctxt->aps_me_ctxt[0]; |
2645 | | |
2646 | | /* Initialize all layers descriptors to have -1 = poc meaning unfilled */ |
2647 | 0 | for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++) |
2648 | 0 | { |
2649 | 0 | for(j = 1; j < ps_ctxt->num_layers; j++) |
2650 | 0 | { |
2651 | 0 | layer_ctxt_t *ps_layer; |
2652 | 0 | ps_layer = ps_ctxt->as_ref_descr[i].aps_layers[j]; |
2653 | 0 | ps_layer->i4_poc = -1; |
2654 | 0 | ps_layer->ppu1_list_inp = &ps_ctxt->apu1_list_inp[j][0]; |
2655 | 0 | memset( |
2656 | 0 | ps_layer->s_global_mv, 0, sizeof(hme_mv_t) * ps_ctxt->max_num_ref * NUM_GMV_LOBES); |
2657 | 0 | } |
2658 | 0 | } |
2659 | 0 | } |
2660 | | |
2661 | | /** |
2662 | | ******************************************************************************** |
2663 | | * @fn hme_enc_init_ctxt() |
2664 | | * |
2665 | | * @brief initialise context memory |
2666 | | * |
2667 | | * @param[in] ps_prms : init prms |
2668 | | * |
2669 | | * @param[in] pv_ctxt : ME ctxt |
2670 | | * |
2671 | | * @return number of memtabs |
2672 | | ******************************************************************************** |
2673 | | */ |
2674 | | void hme_enc_init_ctxt( |
2675 | | me_master_ctxt_t *ps_master_ctxt, hme_init_prms_t *ps_prms, rc_quant_t *ps_rc_quant_ctxt) |
2676 | 0 | { |
2677 | 0 | S32 i, j, num_thrds; |
2678 | 0 | me_ctxt_t *ps_ctxt; |
2679 | 0 | me_frm_ctxt_t *ps_frm_ctxt; |
2680 | | |
2681 | | /* initialise the parameters in context of all threads */ |
2682 | 0 | for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++) |
2683 | 0 | { |
2684 | 0 | ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; |
2685 | | /* Store Tile params base into ME context */ |
2686 | 0 | ps_ctxt->pv_tile_params_base = ps_master_ctxt->pv_tile_params_base; |
2687 | |
|
2688 | 0 | for(i = 0; i < MAX_NUM_ME_PARALLEL; i++) |
2689 | 0 | { |
2690 | 0 | ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i]; |
2691 | | |
2692 | | /* Copy the init prms to context */ |
2693 | 0 | ps_ctxt->s_init_prms = *ps_prms; |
2694 | | |
2695 | | /* Initialize some other variables in ctxt */ |
2696 | 0 | ps_frm_ctxt->i4_prev_poc = INVALID_POC; |
2697 | |
|
2698 | 0 | ps_frm_ctxt->log_ctb_size = ps_prms->log_ctb_size; |
2699 | |
|
2700 | 0 | ps_frm_ctxt->num_b_frms = ps_prms->num_b_frms; |
2701 | |
|
2702 | 0 | ps_frm_ctxt->i4_is_prev_frame_reference = 0; |
2703 | |
|
2704 | 0 | ps_frm_ctxt->ps_rc_quant_ctxt = ps_rc_quant_ctxt; |
2705 | | |
2706 | | /* Initialize mv grids for L0 and L1 used in final refinement layer */ |
2707 | 0 | { |
2708 | 0 | hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid[0]); |
2709 | 0 | hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid[1]); |
2710 | 0 | hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_fpel[0]); |
2711 | 0 | hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_fpel[1]); |
2712 | 0 | hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_qpel[0]); |
2713 | 0 | hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_qpel[1]); |
2714 | 0 | } |
2715 | |
|
2716 | 0 | ps_frm_ctxt->apu1_ref_bits_tlu_lc[0] = &ps_frm_ctxt->au1_ref_bits_tlu_lc[0][0]; |
2717 | 0 | ps_frm_ctxt->apu1_ref_bits_tlu_lc[1] = &ps_frm_ctxt->au1_ref_bits_tlu_lc[1][0]; |
2718 | 0 | } |
2719 | 0 | } |
2720 | | |
2721 | | /* since same layer desc pointer is stored in all the threads ctxt */ |
2722 | | /* layer init is done only using 0th thread ctxt */ |
2723 | 0 | ps_ctxt = ps_master_ctxt->aps_me_ctxt[0]; |
2724 | |
|
2725 | 0 | ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[0]; |
2726 | | |
2727 | | /* Initialize all layers descriptors to have -1 = poc meaning unfilled */ |
2728 | 0 | for(i = 0; i < (ps_frm_ctxt->max_num_ref * ps_master_ctxt->i4_num_me_frm_pllel) + 1; i++) |
2729 | 0 | { |
2730 | | /* only enocde layer is processed */ |
2731 | 0 | for(j = 0; j < 1; j++) |
2732 | 0 | { |
2733 | 0 | layer_ctxt_t *ps_layer; |
2734 | 0 | ps_layer = ps_ctxt->as_ref_descr[i].aps_layers[j]; |
2735 | 0 | ps_layer->i4_poc = INVALID_POC; |
2736 | 0 | ps_layer->i4_is_free = 1; |
2737 | 0 | ps_layer->ppu1_list_inp = &ps_frm_ctxt->apu1_list_inp[j][0]; |
2738 | 0 | ps_layer->ppu1_list_rec_fxfy = &ps_frm_ctxt->apu1_list_rec_fxfy[j][0]; |
2739 | 0 | ps_layer->ppu1_list_rec_hxfy = &ps_frm_ctxt->apu1_list_rec_hxfy[j][0]; |
2740 | 0 | ps_layer->ppu1_list_rec_fxhy = &ps_frm_ctxt->apu1_list_rec_fxhy[j][0]; |
2741 | 0 | ps_layer->ppu1_list_rec_hxhy = &ps_frm_ctxt->apu1_list_rec_hxhy[j][0]; |
2742 | 0 | ps_layer->ppv_dep_mngr_recon = &ps_frm_ctxt->apv_list_dep_mngr[j][0]; |
2743 | |
|
2744 | 0 | memset( |
2745 | 0 | ps_layer->s_global_mv, |
2746 | 0 | 0, |
2747 | 0 | sizeof(hme_mv_t) * ps_frm_ctxt->max_num_ref * NUM_GMV_LOBES); |
2748 | 0 | } |
2749 | 0 | } |
2750 | 0 | } |
2751 | | |
2752 | | /** |
2753 | | ******************************************************************************* |
2754 | | * @fn S32 hme_enc_init(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms,rc_quant_t *ps_rc_quant_ctxt) |
2755 | | * |
2756 | | * @brief Initialises the Encode Layer HME ctxt |
2757 | | * |
2758 | | * @param[out] ps_memtabs : Pointer to an array of memtabs where module fills |
2759 | | * up its requirements of memory |
2760 | | * |
2761 | | * @param[in] ps_prms : Input parameters to module crucial in calculating reqd |
2762 | | * amt of memory |
2763 | | * |
2764 | | * @return Number of memtabs required |
2765 | | ******************************************************************************* |
2766 | | */ |
2767 | | S32 hme_enc_init( |
2768 | | void *pv_ctxt, |
2769 | | hme_memtab_t *ps_memtabs, |
2770 | | hme_init_prms_t *ps_prms, |
2771 | | rc_quant_t *ps_rc_quant_ctxt, |
2772 | | WORD32 i4_num_me_frm_pllel) |
2773 | 0 | { |
2774 | 0 | S32 num, tot; |
2775 | 0 | me_master_ctxt_t *ps_ctxt = (me_master_ctxt_t *)pv_ctxt; |
2776 | |
|
2777 | 0 | tot = hme_enc_num_alloc(i4_num_me_frm_pllel); |
2778 | | /* Validation of init params */ |
2779 | 0 | if(-1 == hme_validate_init_prms(ps_prms)) |
2780 | 0 | return (-1); |
2781 | | |
2782 | 0 | num = hme_enc_alloc_init_mem(ps_memtabs, ps_prms, pv_ctxt, 1, i4_num_me_frm_pllel); |
2783 | 0 | if(num > tot) |
2784 | 0 | return (-1); |
2785 | | |
2786 | | /* Initialize all enumerations based globals */ |
2787 | | //hme_init_globals(); /* done as part of coarse me */ |
2788 | | |
2789 | | /* Copy the memtabs into the context for returning during free */ |
2790 | 0 | memcpy(ps_ctxt->as_memtabs, ps_memtabs, sizeof(hme_memtab_t) * tot); |
2791 | | |
2792 | | /* initialize the context and related buffers */ |
2793 | 0 | hme_enc_init_ctxt(ps_ctxt, ps_prms, ps_rc_quant_ctxt); |
2794 | 0 | return (0); |
2795 | 0 | } |
2796 | | |
2797 | | /** |
2798 | | ******************************************************************************* |
2799 | | * @fn S32 hme_coarse_init(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms) |
2800 | | * |
2801 | | * @brief Initialises the Coarse HME ctxt |
2802 | | * |
2803 | | * @param[out] ps_memtabs : Pointer to an array of memtabs where module fills |
2804 | | * up its requirements of memory |
2805 | | * |
2806 | | * @param[in] ps_prms : Input parameters to module crucial in calculating reqd |
2807 | | * amt of memory |
2808 | | * |
2809 | | * @return Number of memtabs required |
2810 | | ******************************************************************************* |
2811 | | */ |
2812 | | S32 hme_coarse_init(void *pv_ctxt, hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms) |
2813 | 0 | { |
2814 | 0 | S32 num, tot; |
2815 | 0 | coarse_me_master_ctxt_t *ps_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt; |
2816 | |
|
2817 | 0 | tot = hme_coarse_num_alloc(); |
2818 | | /* Validation of init params */ |
2819 | 0 | if(-1 == hme_validate_init_prms(ps_prms)) |
2820 | 0 | return (-1); |
2821 | | |
2822 | 0 | num = hme_coarse_alloc_init_mem(ps_memtabs, ps_prms, pv_ctxt, 1); |
2823 | 0 | if(num > tot) |
2824 | 0 | return (-1); |
2825 | | |
2826 | | /* Initialize all enumerations based globals */ |
2827 | 0 | hme_init_globals(); |
2828 | | |
2829 | | /* Copy the memtabs into the context for returning during free */ |
2830 | 0 | memcpy(ps_ctxt->as_memtabs, ps_memtabs, sizeof(hme_memtab_t) * tot); |
2831 | | |
2832 | | /* initialize the context and related buffers */ |
2833 | 0 | hme_coarse_init_ctxt(ps_ctxt, ps_prms); |
2834 | |
|
2835 | 0 | return (0); |
2836 | 0 | } |
2837 | | |
2838 | | /** |
2839 | | ******************************************************************************* |
2840 | | * @fn S32 hme_set_resolution(void *pv_me_ctxt, |
2841 | | * S32 n_enc_layers, |
2842 | | * S32 *p_wd, |
2843 | | * S32 *p_ht |
2844 | | * |
2845 | | * @brief Sets up the layers based on resolution information. |
2846 | | * |
2847 | | * @param[in, out] pv_me_ctxt : ME handle, updated with the resolution info |
2848 | | * |
2849 | | * @param[in] n_enc_layers : Number of layers encoded |
2850 | | * |
2851 | | * @param[in] p_wd : Pointer to an array having widths for each encode layer |
2852 | | * |
2853 | | * @param[in] p_ht : Pointer to an array having heights for each encode layer |
2854 | | * |
2855 | | * @return void |
2856 | | ******************************************************************************* |
2857 | | */ |
2858 | | |
2859 | | void hme_set_resolution(void *pv_me_ctxt, S32 n_enc_layers, S32 *p_wd, S32 *p_ht, S32 me_frm_id) |
2860 | 0 | { |
2861 | 0 | S32 n_tot_layers, num_layers_explicit_search, i, j; |
2862 | 0 | me_ctxt_t *ps_thrd_ctxt; |
2863 | 0 | me_frm_ctxt_t *ps_ctxt; |
2864 | |
|
2865 | 0 | S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS]; |
2866 | 0 | S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS]; |
2867 | 0 | memcpy(a_wd, p_wd, n_enc_layers * sizeof(S32)); |
2868 | 0 | memcpy(a_ht, p_ht, n_enc_layers * sizeof(S32)); |
2869 | |
|
2870 | 0 | ps_thrd_ctxt = (me_ctxt_t *)pv_me_ctxt; |
2871 | |
|
2872 | 0 | ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[me_frm_id]; |
2873 | | |
2874 | | /*************************************************************************/ |
2875 | | /* Derive the number of HME layers, including both encoded and non encode*/ |
2876 | | /* This function also derives the width and ht of each layer. */ |
2877 | | /*************************************************************************/ |
2878 | 0 | n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht); |
2879 | 0 | num_layers_explicit_search = ps_thrd_ctxt->s_init_prms.num_layers_explicit_search; |
2880 | 0 | if(num_layers_explicit_search <= 0) |
2881 | 0 | num_layers_explicit_search = n_tot_layers - 1; |
2882 | |
|
2883 | 0 | num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1); |
2884 | 0 | ps_ctxt->num_layers_explicit_search = num_layers_explicit_search; |
2885 | 0 | memset(ps_ctxt->u1_encode, 0, n_tot_layers); |
2886 | 0 | memset(ps_ctxt->u1_encode, 1, n_enc_layers); |
2887 | | |
2888 | | /* only encode layer should be processed */ |
2889 | 0 | ps_ctxt->num_layers = n_tot_layers; |
2890 | |
|
2891 | 0 | ps_ctxt->i4_wd = a_wd[0]; |
2892 | 0 | ps_ctxt->i4_ht = a_ht[0]; |
2893 | | |
2894 | | /* Memtabs : Layers * num-ref + 1 */ |
2895 | 0 | for(i = 0; i < ps_ctxt->max_num_ref + 1; i++) |
2896 | 0 | { |
2897 | 0 | for(j = 0; j < 1; j++) |
2898 | 0 | { |
2899 | 0 | S32 wd, ht; |
2900 | 0 | layer_ctxt_t *ps_layer; |
2901 | 0 | U08 u1_enc = ps_ctxt->u1_encode[j]; |
2902 | 0 | wd = a_wd[j]; |
2903 | 0 | ht = a_ht[j]; |
2904 | 0 | ps_layer = ps_thrd_ctxt->as_ref_descr[i].aps_layers[j]; |
2905 | 0 | hme_set_layer_res_attrs(ps_layer, wd, ht, a_disp_wd[j], a_disp_ht[j], u1_enc); |
2906 | 0 | } |
2907 | 0 | } |
2908 | 0 | } |
2909 | | |
2910 | | /** |
2911 | | ******************************************************************************* |
2912 | | * @fn S32 hme_coarse_set_resolution(void *pv_me_ctxt, |
2913 | | * S32 n_enc_layers, |
2914 | | * S32 *p_wd, |
2915 | | * S32 *p_ht |
2916 | | * |
2917 | | * @brief Sets up the layers based on resolution information. |
2918 | | * |
2919 | | * @param[in, out] pv_me_ctxt : ME handle, updated with the resolution info |
2920 | | * |
2921 | | * @param[in] n_enc_layers : Number of layers encoded |
2922 | | * |
2923 | | * @param[in] p_wd : Pointer to an array having widths for each encode layer |
2924 | | * |
2925 | | * @param[in] p_ht : Pointer to an array having heights for each encode layer |
2926 | | * |
2927 | | * @return void |
2928 | | ******************************************************************************* |
2929 | | */ |
2930 | | |
2931 | | void hme_coarse_set_resolution(void *pv_me_ctxt, S32 n_enc_layers, S32 *p_wd, S32 *p_ht) |
2932 | 0 | { |
2933 | 0 | S32 n_tot_layers, num_layers_explicit_search, i, j; |
2934 | 0 | coarse_me_ctxt_t *ps_ctxt; |
2935 | 0 | S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS]; |
2936 | 0 | S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS]; |
2937 | 0 | memcpy(a_wd, p_wd, n_enc_layers * sizeof(S32)); |
2938 | 0 | memcpy(a_ht, p_ht, n_enc_layers * sizeof(S32)); |
2939 | |
|
2940 | 0 | ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt; |
2941 | | /*************************************************************************/ |
2942 | | /* Derive the number of HME layers, including both encoded and non encode*/ |
2943 | | /* This function also derives the width and ht of each layer. */ |
2944 | | /*************************************************************************/ |
2945 | 0 | n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht); |
2946 | 0 | num_layers_explicit_search = ps_ctxt->s_init_prms.num_layers_explicit_search; |
2947 | 0 | if(num_layers_explicit_search <= 0) |
2948 | 0 | num_layers_explicit_search = n_tot_layers - 1; |
2949 | |
|
2950 | 0 | num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1); |
2951 | 0 | ps_ctxt->num_layers_explicit_search = num_layers_explicit_search; |
2952 | 0 | memset(ps_ctxt->u1_encode, 0, n_tot_layers); |
2953 | 0 | memset(ps_ctxt->u1_encode, 1, n_enc_layers); |
2954 | | |
2955 | | /* encode layer should be excluded */ |
2956 | 0 | ps_ctxt->num_layers = n_tot_layers; |
2957 | |
|
2958 | 0 | memcpy(ps_ctxt->a_wd, a_wd, sizeof(S32) * n_tot_layers); |
2959 | 0 | memcpy(ps_ctxt->a_ht, a_ht, sizeof(S32) * n_tot_layers); |
2960 | | |
2961 | | /* Memtabs : Layers * num-ref + 1 */ |
2962 | 0 | for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++) |
2963 | 0 | { |
2964 | 0 | for(j = 1; j < n_tot_layers; j++) |
2965 | 0 | { |
2966 | 0 | S32 wd, ht; |
2967 | 0 | layer_ctxt_t *ps_layer; |
2968 | 0 | U08 u1_enc = ps_ctxt->u1_encode[j]; |
2969 | 0 | wd = a_wd[j]; |
2970 | 0 | ht = a_ht[j]; |
2971 | 0 | ps_layer = ps_ctxt->as_ref_descr[i].aps_layers[j]; |
2972 | 0 | hme_set_layer_res_attrs(ps_layer, wd, ht, a_disp_wd[j], a_disp_ht[j], u1_enc); |
2973 | 0 | } |
2974 | 0 | } |
2975 | 0 | } |
2976 | | |
2977 | | S32 hme_find_descr_idx(me_ctxt_t *ps_ctxt, S32 i4_poc, S32 i4_idr_gop_num, S32 i4_num_me_frm_pllel) |
2978 | 0 | { |
2979 | 0 | S32 i; |
2980 | |
|
2981 | 0 | for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++) |
2982 | 0 | { |
2983 | 0 | if(ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc == i4_poc && |
2984 | 0 | ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_idr_gop_num == i4_idr_gop_num) |
2985 | 0 | return i; |
2986 | 0 | } |
2987 | | /* Should not come here */ |
2988 | 0 | ASSERT(0); |
2989 | 0 | return (-1); |
2990 | 0 | } |
2991 | | |
2992 | | S32 hme_coarse_find_descr_idx(coarse_me_ctxt_t *ps_ctxt, S32 i4_poc) |
2993 | 0 | { |
2994 | 0 | S32 i; |
2995 | |
|
2996 | 0 | for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++) |
2997 | 0 | { |
2998 | 0 | if(ps_ctxt->as_ref_descr[i].aps_layers[1]->i4_poc == i4_poc) |
2999 | 0 | return i; |
3000 | 0 | } |
3001 | | /* Should not come here */ |
3002 | 0 | ASSERT(0); |
3003 | 0 | return (-1); |
3004 | 0 | } |
3005 | | |
3006 | | S32 hme_find_free_descr_idx(me_ctxt_t *ps_ctxt, S32 i4_num_me_frm_pllel) |
3007 | 0 | { |
3008 | 0 | S32 i; |
3009 | |
|
3010 | 0 | for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++) |
3011 | 0 | { |
3012 | 0 | if(ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_free == 1) |
3013 | 0 | { |
3014 | 0 | ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_free = 0; |
3015 | 0 | return i; |
3016 | 0 | } |
3017 | 0 | } |
3018 | | /* Should not come here */ |
3019 | 0 | ASSERT(0); |
3020 | 0 | return (-1); |
3021 | 0 | } |
3022 | | |
3023 | | S32 hme_coarse_find_free_descr_idx(void *pv_ctxt) |
3024 | 0 | { |
3025 | 0 | S32 i; |
3026 | |
|
3027 | 0 | coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_ctxt; |
3028 | |
|
3029 | 0 | for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++) |
3030 | 0 | { |
3031 | 0 | if(ps_ctxt->as_ref_descr[i].aps_layers[1]->i4_poc == -1) |
3032 | 0 | return i; |
3033 | 0 | } |
3034 | | /* Should not come here */ |
3035 | 0 | ASSERT(0); |
3036 | 0 | return (-1); |
3037 | 0 | } |
3038 | | |
3039 | | void hme_discard_frm( |
3040 | | void *pv_me_ctxt, S32 *p_pocs_to_remove, S32 i4_idr_gop_num, S32 i4_num_me_frm_pllel) |
3041 | 0 | { |
3042 | 0 | me_ctxt_t *ps_ctxt = (me_ctxt_t *)pv_me_ctxt; |
3043 | 0 | S32 count = 0, idx, i; |
3044 | 0 | layers_descr_t *ps_descr; |
3045 | | |
3046 | | /* Search for the id of the layer descriptor that has this poc */ |
3047 | 0 | while(p_pocs_to_remove[count] != INVALID_POC) |
3048 | 0 | { |
3049 | 0 | ASSERT(count == 0); |
3050 | 0 | idx = hme_find_descr_idx( |
3051 | 0 | ps_ctxt, p_pocs_to_remove[count], i4_idr_gop_num, i4_num_me_frm_pllel); |
3052 | 0 | ps_descr = &ps_ctxt->as_ref_descr[idx]; |
3053 | | /*********************************************************************/ |
3054 | | /* Setting i4_is_free = 1 in all layers invalidates this layer ctxt */ |
3055 | | /* Now this can be used for a fresh picture. */ |
3056 | | /*********************************************************************/ |
3057 | 0 | for(i = 0; i < 1; i++) |
3058 | 0 | { |
3059 | 0 | ps_descr->aps_layers[i]->i4_is_free = 1; |
3060 | 0 | } |
3061 | 0 | count++; |
3062 | 0 | } |
3063 | 0 | } |
3064 | | |
3065 | | void hme_coarse_discard_frm(void *pv_me_ctxt, S32 *p_pocs_to_remove) |
3066 | 0 | { |
3067 | 0 | coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt; |
3068 | 0 | S32 count = 0, idx, i; |
3069 | 0 | layers_descr_t *ps_descr; |
3070 | | |
3071 | | /* Search for the id of the layer descriptor that has this poc */ |
3072 | 0 | while(p_pocs_to_remove[count] != -1) |
3073 | 0 | { |
3074 | 0 | idx = hme_coarse_find_descr_idx(ps_ctxt, p_pocs_to_remove[count]); |
3075 | 0 | ps_descr = &ps_ctxt->as_ref_descr[idx]; |
3076 | | /*********************************************************************/ |
3077 | | /* Setting poc = -1 in all layers invalidates this layer ctxt */ |
3078 | | /* Now this can be used for a fresh picture. */ |
3079 | | /*********************************************************************/ |
3080 | 0 | for(i = 1; i < ps_ctxt->num_layers; i++) |
3081 | 0 | { |
3082 | 0 | ps_descr->aps_layers[i]->i4_poc = -1; |
3083 | 0 | } |
3084 | 0 | count++; |
3085 | 0 | } |
3086 | 0 | } |
3087 | | |
3088 | | void hme_update_layer_desc( |
3089 | | layers_descr_t *ps_layers_desc, |
3090 | | hme_ref_desc_t *ps_ref_desc, |
3091 | | S32 start_lyr_id, |
3092 | | S32 num_layers, |
3093 | | layers_descr_t *ps_curr_desc) |
3094 | 0 | { |
3095 | 0 | layer_ctxt_t *ps_layer_ctxt, *ps_curr_layer; |
3096 | 0 | S32 i; |
3097 | 0 | for(i = start_lyr_id; i < num_layers; i++) |
3098 | 0 | { |
3099 | 0 | ps_layer_ctxt = ps_layers_desc->aps_layers[i]; |
3100 | 0 | ps_curr_layer = ps_curr_desc->aps_layers[i]; |
3101 | |
|
3102 | 0 | ps_layer_ctxt->i4_poc = ps_ref_desc->i4_poc; |
3103 | 0 | ps_layer_ctxt->i4_idr_gop_num = ps_ref_desc->i4_GOP_num; |
3104 | | |
3105 | | /* Copy the recon planes for the given reference pic at given layer */ |
3106 | 0 | ps_layer_ctxt->pu1_rec_fxfy = ps_ref_desc->as_ref_info[i].pu1_rec_fxfy; |
3107 | 0 | ps_layer_ctxt->pu1_rec_hxfy = ps_ref_desc->as_ref_info[i].pu1_rec_hxfy; |
3108 | 0 | ps_layer_ctxt->pu1_rec_fxhy = ps_ref_desc->as_ref_info[i].pu1_rec_fxhy; |
3109 | 0 | ps_layer_ctxt->pu1_rec_hxhy = ps_ref_desc->as_ref_info[i].pu1_rec_hxhy; |
3110 | | |
3111 | | /*********************************************************************/ |
3112 | | /* reconstruction strides, offsets and padding info are copied for */ |
3113 | | /* this reference pic. It is assumed that these will be same across */ |
3114 | | /* pics, so even the current pic has this info updated, though the */ |
3115 | | /* current pic still does not have valid recon pointers. */ |
3116 | | /*********************************************************************/ |
3117 | 0 | ps_layer_ctxt->i4_rec_stride = ps_ref_desc->as_ref_info[i].luma_stride; |
3118 | 0 | ps_layer_ctxt->i4_rec_offset = ps_ref_desc->as_ref_info[i].luma_offset; |
3119 | 0 | ps_layer_ctxt->i4_pad_x_rec = ps_ref_desc->as_ref_info[i].u1_pad_x; |
3120 | 0 | ps_layer_ctxt->i4_pad_y_rec = ps_ref_desc->as_ref_info[i].u1_pad_y; |
3121 | |
|
3122 | 0 | ps_curr_layer->i4_rec_stride = ps_ref_desc->as_ref_info[i].luma_stride; |
3123 | 0 | ps_curr_layer->i4_pad_x_rec = ps_ref_desc->as_ref_info[i].u1_pad_x; |
3124 | 0 | ps_curr_layer->i4_pad_y_rec = ps_ref_desc->as_ref_info[i].u1_pad_y; |
3125 | 0 | } |
3126 | 0 | } |
3127 | | |
3128 | | void hme_add_inp(void *pv_me_ctxt, hme_inp_desc_t *ps_inp_desc, S32 me_frm_id, S32 i4_thrd_id) |
3129 | 0 | { |
3130 | 0 | layers_descr_t *ps_desc; |
3131 | 0 | layer_ctxt_t *ps_layer_ctxt; |
3132 | 0 | me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_me_ctxt; |
3133 | 0 | me_ctxt_t *ps_thrd_ctxt; |
3134 | 0 | me_frm_ctxt_t *ps_ctxt; |
3135 | |
|
3136 | 0 | hme_inp_buf_attr_t *ps_attr; |
3137 | 0 | S32 i4_poc, idx, i, i4_prev_poc; |
3138 | 0 | S32 num_thrds, prev_me_frm_id; |
3139 | 0 | S32 i4_idr_gop_num, i4_is_reference; |
3140 | | |
3141 | | /* since same layer desc pointer is stored in all thread ctxt */ |
3142 | | /* a free idx is obtained using 0th thread ctxt pointer */ |
3143 | |
|
3144 | 0 | ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[i4_thrd_id]; |
3145 | |
|
3146 | 0 | ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[me_frm_id]; |
3147 | | |
3148 | | /* Deriving the previous poc from previous frames context */ |
3149 | 0 | if(me_frm_id == 0) |
3150 | 0 | prev_me_frm_id = (MAX_NUM_ME_PARALLEL - 1); |
3151 | 0 | else |
3152 | 0 | prev_me_frm_id = me_frm_id - 1; |
3153 | |
|
3154 | 0 | i4_prev_poc = ps_thrd_ctxt->aps_me_frm_prms[prev_me_frm_id]->i4_curr_poc; |
3155 | | |
3156 | | /* Obtain an empty layer descriptor */ |
3157 | 0 | idx = hme_find_free_descr_idx(ps_thrd_ctxt, ps_master_ctxt->i4_num_me_frm_pllel); |
3158 | 0 | ps_desc = &ps_thrd_ctxt->as_ref_descr[idx]; |
3159 | | |
3160 | | /* initialise the parameters for all the threads */ |
3161 | 0 | for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++) |
3162 | 0 | { |
3163 | 0 | me_frm_ctxt_t *ps_tmp_frm_ctxt; |
3164 | |
|
3165 | 0 | ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; |
3166 | 0 | ps_tmp_frm_ctxt = ps_thrd_ctxt->aps_me_frm_prms[me_frm_id]; |
3167 | |
|
3168 | 0 | ps_tmp_frm_ctxt->ps_curr_descr = &ps_thrd_ctxt->as_ref_descr[idx]; |
3169 | | |
3170 | | /* Do the initialization for the first thread alone */ |
3171 | 0 | i4_poc = ps_inp_desc->i4_poc; |
3172 | 0 | i4_idr_gop_num = ps_inp_desc->i4_idr_gop_num; |
3173 | 0 | i4_is_reference = ps_inp_desc->i4_is_reference; |
3174 | | /*Update poc id of previously encoded frm and curr frm */ |
3175 | 0 | ps_tmp_frm_ctxt->i4_prev_poc = i4_prev_poc; |
3176 | 0 | ps_tmp_frm_ctxt->i4_curr_poc = i4_poc; |
3177 | 0 | } |
3178 | | |
3179 | | /* since same layer desc pointer is stored in all thread ctxt */ |
3180 | | /* following processing is done using 0th thread ctxt pointer */ |
3181 | 0 | ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[0]; |
3182 | | |
3183 | | /* only encode layer */ |
3184 | 0 | for(i = 0; i < 1; i++) |
3185 | 0 | { |
3186 | 0 | ps_layer_ctxt = ps_desc->aps_layers[i]; |
3187 | 0 | ps_attr = &ps_inp_desc->s_layer_desc[i]; |
3188 | |
|
3189 | 0 | ps_layer_ctxt->i4_poc = i4_poc; |
3190 | 0 | ps_layer_ctxt->i4_idr_gop_num = i4_idr_gop_num; |
3191 | 0 | ps_layer_ctxt->i4_is_reference = i4_is_reference; |
3192 | 0 | ps_layer_ctxt->i4_non_ref_free = 0; |
3193 | | |
3194 | | /* If this layer is encoded, copy input attributes */ |
3195 | 0 | if(ps_ctxt->u1_encode[i]) |
3196 | 0 | { |
3197 | 0 | ps_layer_ctxt->pu1_inp = ps_attr->pu1_y; |
3198 | 0 | ps_layer_ctxt->i4_inp_stride = ps_attr->luma_stride; |
3199 | 0 | ps_layer_ctxt->i4_pad_x_inp = 0; |
3200 | 0 | ps_layer_ctxt->i4_pad_y_inp = 0; |
3201 | 0 | } |
3202 | 0 | else |
3203 | 0 | { |
3204 | | /* If not encoded, then ME owns the buffer.*/ |
3205 | 0 | S32 wd, dst_stride; |
3206 | |
|
3207 | 0 | ASSERT(i != 0); |
3208 | | |
3209 | 0 | wd = ps_ctxt->i4_wd; |
3210 | | |
3211 | | /* destination has padding on either side of 16 */ |
3212 | 0 | dst_stride = CEIL16((wd >> 1)) + 32 + 4; |
3213 | 0 | ps_layer_ctxt->i4_inp_stride = dst_stride; |
3214 | 0 | } |
3215 | 0 | } |
3216 | | |
3217 | 0 | return; |
3218 | 0 | } |
3219 | | |
3220 | | void hme_coarse_add_inp(void *pv_me_ctxt, hme_inp_desc_t *ps_inp_desc, WORD32 i4_curr_idx) |
3221 | 0 | { |
3222 | 0 | layers_descr_t *ps_desc; |
3223 | 0 | layer_ctxt_t *ps_layer_ctxt; |
3224 | 0 | coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; |
3225 | 0 | coarse_me_ctxt_t *ps_ctxt; |
3226 | 0 | hme_inp_buf_attr_t *ps_attr; |
3227 | 0 | S32 i4_poc, i; |
3228 | 0 | S32 num_thrds; |
3229 | | |
3230 | | /* since same layer desc pointer is stored in all thread ctxt */ |
3231 | | /* a free idx is obtained using 0th thread ctxt pointer */ |
3232 | 0 | ps_ctxt = ps_master_ctxt->aps_me_ctxt[0]; |
3233 | |
|
3234 | 0 | ps_desc = &ps_ctxt->as_ref_descr[i4_curr_idx]; |
3235 | | |
3236 | | /* initialise the parameters for all the threads */ |
3237 | 0 | for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++) |
3238 | 0 | { |
3239 | 0 | ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; |
3240 | 0 | ps_ctxt->ps_curr_descr = &ps_ctxt->as_ref_descr[i4_curr_idx]; |
3241 | 0 | i4_poc = ps_inp_desc->i4_poc; |
3242 | | |
3243 | | /*Update poc id of previously encoded frm and curr frm */ |
3244 | 0 | ps_ctxt->i4_prev_poc = ps_ctxt->i4_curr_poc; |
3245 | 0 | ps_ctxt->i4_curr_poc = i4_poc; |
3246 | 0 | } |
3247 | | |
3248 | | /* since same layer desc pointer is stored in all thread ctxt */ |
3249 | | /* following processing is done using 0th thread ctxt pointer */ |
3250 | 0 | ps_ctxt = ps_master_ctxt->aps_me_ctxt[0]; |
3251 | | |
3252 | | /* only non encode layer */ |
3253 | 0 | for(i = 1; i < ps_ctxt->num_layers; i++) |
3254 | 0 | { |
3255 | 0 | ps_layer_ctxt = ps_desc->aps_layers[i]; |
3256 | 0 | ps_attr = &ps_inp_desc->s_layer_desc[i]; |
3257 | |
|
3258 | 0 | ps_layer_ctxt->i4_poc = i4_poc; |
3259 | | /* If this layer is encoded, copy input attributes */ |
3260 | 0 | if(ps_ctxt->u1_encode[i]) |
3261 | 0 | { |
3262 | 0 | ps_layer_ctxt->pu1_inp = ps_attr->pu1_y; |
3263 | 0 | ps_layer_ctxt->i4_inp_stride = ps_attr->luma_stride; |
3264 | 0 | ps_layer_ctxt->i4_pad_x_inp = 0; |
3265 | 0 | ps_layer_ctxt->i4_pad_y_inp = 0; |
3266 | 0 | } |
3267 | 0 | else |
3268 | 0 | { |
3269 | | /* If not encoded, then ME owns the buffer. */ |
3270 | | /* decomp of lower layers happens on a seperate pass */ |
3271 | | /* Coarse Me should export the pointers to the caller */ |
3272 | 0 | S32 wd, dst_stride; |
3273 | |
|
3274 | 0 | ASSERT(i != 0); |
3275 | | |
3276 | 0 | wd = ps_ctxt->a_wd[i - 1]; |
3277 | | |
3278 | | /* destination has padding on either side of 16 */ |
3279 | 0 | dst_stride = CEIL16((wd >> 1)) + 32 + 4; |
3280 | 0 | ps_layer_ctxt->i4_inp_stride = dst_stride; |
3281 | 0 | } |
3282 | 0 | } |
3283 | 0 | } |
3284 | | |
3285 | | static __inline U08 hme_determine_num_results_per_part( |
3286 | | U08 u1_layer_id, U08 u1_num_layers, ME_QUALITY_PRESETS_T e_quality_preset) |
3287 | 0 | { |
3288 | 0 | U08 u1_num_results_per_part = MAX_RESULTS_PER_PART; |
3289 | |
|
3290 | 0 | if((u1_layer_id == 0) && !!RESTRICT_NUM_PARTITION_LEVEL_L0ME_RESULTS_TO_1) |
3291 | 0 | { |
3292 | 0 | switch(e_quality_preset) |
3293 | 0 | { |
3294 | 0 | case ME_XTREME_SPEED_25: |
3295 | 0 | case ME_XTREME_SPEED: |
3296 | 0 | case ME_HIGH_SPEED: |
3297 | 0 | case ME_MEDIUM_SPEED: |
3298 | 0 | case ME_HIGH_QUALITY: |
3299 | 0 | case ME_PRISTINE_QUALITY: |
3300 | 0 | { |
3301 | 0 | u1_num_results_per_part = 1; |
3302 | |
|
3303 | 0 | break; |
3304 | 0 | } |
3305 | 0 | default: |
3306 | 0 | { |
3307 | 0 | u1_num_results_per_part = MAX_RESULTS_PER_PART; |
3308 | |
|
3309 | 0 | break; |
3310 | 0 | } |
3311 | 0 | } |
3312 | 0 | } |
3313 | 0 | else if((u1_layer_id == 1) && !!RESTRICT_NUM_PARTITION_LEVEL_L1ME_RESULTS_TO_1) |
3314 | 0 | { |
3315 | 0 | switch(e_quality_preset) |
3316 | 0 | { |
3317 | 0 | case ME_XTREME_SPEED_25: |
3318 | 0 | case ME_HIGH_QUALITY: |
3319 | 0 | case ME_PRISTINE_QUALITY: |
3320 | 0 | { |
3321 | 0 | u1_num_results_per_part = 1; |
3322 | |
|
3323 | 0 | break; |
3324 | 0 | } |
3325 | 0 | default: |
3326 | 0 | { |
3327 | 0 | u1_num_results_per_part = MAX_RESULTS_PER_PART; |
3328 | |
|
3329 | 0 | break; |
3330 | 0 | } |
3331 | 0 | } |
3332 | 0 | } |
3333 | 0 | else if((u1_layer_id == 2) && (u1_num_layers > 3) && !!RESTRICT_NUM_PARTITION_LEVEL_L2ME_RESULTS_TO_1) |
3334 | 0 | { |
3335 | 0 | switch(e_quality_preset) |
3336 | 0 | { |
3337 | 0 | case ME_XTREME_SPEED_25: |
3338 | 0 | case ME_XTREME_SPEED: |
3339 | 0 | case ME_HIGH_SPEED: |
3340 | 0 | case ME_MEDIUM_SPEED: |
3341 | 0 | { |
3342 | 0 | u1_num_results_per_part = 1; |
3343 | |
|
3344 | 0 | break; |
3345 | 0 | } |
3346 | 0 | default: |
3347 | 0 | { |
3348 | 0 | u1_num_results_per_part = MAX_RESULTS_PER_PART; |
3349 | |
|
3350 | 0 | break; |
3351 | 0 | } |
3352 | 0 | } |
3353 | 0 | } |
3354 | | |
3355 | 0 | return u1_num_results_per_part; |
3356 | 0 | } |
3357 | | |
3358 | | static __inline void hme_max_search_cands_per_search_cand_loc_populator( |
3359 | | hme_frm_prms_t *ps_frm_prms, |
3360 | | U08 *pu1_num_fpel_search_cands, |
3361 | | U08 u1_layer_id, |
3362 | | ME_QUALITY_PRESETS_T e_quality_preset) |
3363 | 0 | { |
3364 | 0 | if(0 == u1_layer_id) |
3365 | 0 | { |
3366 | 0 | S32 i; |
3367 | |
|
3368 | 0 | for(i = 0; i < NUM_SEARCH_CAND_LOCATIONS; i++) |
3369 | 0 | { |
3370 | 0 | switch(e_quality_preset) |
3371 | 0 | { |
3372 | 0 | #if RESTRICT_NUM_SEARCH_CANDS_PER_SEARCH_CAND_LOC |
3373 | 0 | case ME_XTREME_SPEED_25: |
3374 | 0 | case ME_XTREME_SPEED: |
3375 | 0 | case ME_HIGH_SPEED: |
3376 | 0 | case ME_MEDIUM_SPEED: |
3377 | 0 | { |
3378 | 0 | pu1_num_fpel_search_cands[i] = 1; |
3379 | |
|
3380 | 0 | break; |
3381 | 0 | } |
3382 | 0 | #endif |
3383 | 0 | default: |
3384 | 0 | { |
3385 | 0 | pu1_num_fpel_search_cands[i] = |
3386 | 0 | MAX(2, |
3387 | 0 | MAX(ps_frm_prms->u1_num_active_ref_l0, ps_frm_prms->u1_num_active_ref_l1) * |
3388 | 0 | ((COLOCATED == (SEARCH_CAND_LOCATIONS_T)i) + 1)); |
3389 | |
|
3390 | 0 | break; |
3391 | 0 | } |
3392 | 0 | } |
3393 | 0 | } |
3394 | 0 | } |
3395 | 0 | } |
3396 | | |
3397 | | static __inline U08 |
3398 | | hme_determine_max_2nx2n_tu_recur_cands(U08 u1_layer_id, ME_QUALITY_PRESETS_T e_quality_preset) |
3399 | 0 | { |
3400 | 0 | U08 u1_num_cands = 2; |
3401 | |
|
3402 | 0 | if((u1_layer_id == 0) && !!RESTRICT_NUM_2NX2N_TU_RECUR_CANDS) |
3403 | 0 | { |
3404 | 0 | switch(e_quality_preset) |
3405 | 0 | { |
3406 | 0 | case ME_XTREME_SPEED_25: |
3407 | 0 | case ME_XTREME_SPEED: |
3408 | 0 | case ME_HIGH_SPEED: |
3409 | 0 | case ME_MEDIUM_SPEED: |
3410 | 0 | { |
3411 | 0 | u1_num_cands = 1; |
3412 | |
|
3413 | 0 | break; |
3414 | 0 | } |
3415 | 0 | default: |
3416 | 0 | { |
3417 | 0 | u1_num_cands = 2; |
3418 | |
|
3419 | 0 | break; |
3420 | 0 | } |
3421 | 0 | } |
3422 | 0 | } |
3423 | | |
3424 | 0 | return u1_num_cands; |
3425 | 0 | } |
3426 | | |
3427 | | static __inline U08 |
3428 | | hme_determine_max_num_fpel_refine_centers(U08 u1_layer_id, ME_QUALITY_PRESETS_T e_quality_preset) |
3429 | 0 | { |
3430 | 0 | U08 i; |
3431 | |
|
3432 | 0 | U08 u1_num_centers = 0; |
3433 | |
|
3434 | 0 | if(0 == u1_layer_id) |
3435 | 0 | { |
3436 | 0 | switch(e_quality_preset) |
3437 | 0 | { |
3438 | 0 | case ME_XTREME_SPEED_25: |
3439 | 0 | { |
3440 | 0 | for(i = 0; i < TOT_NUM_PARTS; i++) |
3441 | 0 | { |
3442 | 0 | u1_num_centers += gau1_num_best_results_XS25[i]; |
3443 | 0 | } |
3444 | |
|
3445 | 0 | break; |
3446 | 0 | } |
3447 | 0 | case ME_XTREME_SPEED: |
3448 | 0 | { |
3449 | 0 | for(i = 0; i < TOT_NUM_PARTS; i++) |
3450 | 0 | { |
3451 | 0 | u1_num_centers += gau1_num_best_results_XS[i]; |
3452 | 0 | } |
3453 | |
|
3454 | 0 | break; |
3455 | 0 | } |
3456 | 0 | case ME_HIGH_SPEED: |
3457 | 0 | { |
3458 | 0 | for(i = 0; i < TOT_NUM_PARTS; i++) |
3459 | 0 | { |
3460 | 0 | u1_num_centers += gau1_num_best_results_HS[i]; |
3461 | 0 | } |
3462 | |
|
3463 | 0 | break; |
3464 | 0 | } |
3465 | 0 | case ME_MEDIUM_SPEED: |
3466 | 0 | { |
3467 | 0 | for(i = 0; i < TOT_NUM_PARTS; i++) |
3468 | 0 | { |
3469 | 0 | u1_num_centers += gau1_num_best_results_MS[i]; |
3470 | 0 | } |
3471 | |
|
3472 | 0 | break; |
3473 | 0 | } |
3474 | 0 | case ME_HIGH_QUALITY: |
3475 | 0 | { |
3476 | 0 | for(i = 0; i < TOT_NUM_PARTS; i++) |
3477 | 0 | { |
3478 | 0 | u1_num_centers += gau1_num_best_results_HQ[i]; |
3479 | 0 | } |
3480 | |
|
3481 | 0 | break; |
3482 | 0 | } |
3483 | 0 | case ME_PRISTINE_QUALITY: |
3484 | 0 | { |
3485 | 0 | for(i = 0; i < TOT_NUM_PARTS; i++) |
3486 | 0 | { |
3487 | 0 | u1_num_centers += gau1_num_best_results_PQ[i]; |
3488 | 0 | } |
3489 | |
|
3490 | 0 | break; |
3491 | 0 | } |
3492 | 0 | } |
3493 | 0 | } |
3494 | | |
3495 | 0 | return u1_num_centers; |
3496 | 0 | } |
3497 | | |
3498 | | static __inline U08 hme_determine_max_num_subpel_refine_centers( |
3499 | | U08 u1_layer_id, U08 u1_max_2Nx2N_subpel_cands, U08 u1_max_NxN_subpel_cands) |
3500 | 0 | { |
3501 | 0 | U08 u1_num_centers = 0; |
3502 | |
|
3503 | 0 | if(0 == u1_layer_id) |
3504 | 0 | { |
3505 | 0 | u1_num_centers += u1_max_2Nx2N_subpel_cands + 4 * u1_max_NxN_subpel_cands; |
3506 | 0 | } |
3507 | |
|
3508 | 0 | return u1_num_centers; |
3509 | 0 | } |
3510 | | |
3511 | | void hme_set_refine_prms( |
3512 | | void *pv_refine_prms, |
3513 | | U08 u1_encode, |
3514 | | S32 num_ref, |
3515 | | S32 layer_id, |
3516 | | S32 num_layers, |
3517 | | S32 num_layers_explicit_search, |
3518 | | S32 use_4x4, |
3519 | | hme_frm_prms_t *ps_frm_prms, |
3520 | | double **ppd_intra_costs, |
3521 | | me_coding_params_t *ps_me_coding_tools) |
3522 | 0 | { |
3523 | 0 | refine_prms_t *ps_refine_prms = (refine_prms_t *)pv_refine_prms; |
3524 | |
|
3525 | 0 | ps_refine_prms->i4_encode = u1_encode; |
3526 | 0 | ps_refine_prms->bidir_enabled = ps_frm_prms->bidir_enabled; |
3527 | 0 | ps_refine_prms->i4_layer_id = layer_id; |
3528 | | /*************************************************************************/ |
3529 | | /* Refinement layers have two lambdas, one for closed loop, another for */ |
3530 | | /* open loop. Non encode layers use only open loop lambda. */ |
3531 | | /*************************************************************************/ |
3532 | 0 | ps_refine_prms->lambda_inp = ps_frm_prms->i4_ol_sad_lambda_qf; |
3533 | 0 | ps_refine_prms->lambda_recon = ps_frm_prms->i4_cl_sad_lambda_qf; |
3534 | 0 | ps_refine_prms->lambda_q_shift = ps_frm_prms->lambda_q_shift; |
3535 | 0 | ps_refine_prms->lambda_inp = |
3536 | 0 | ((float)ps_refine_prms->lambda_inp) * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f; |
3537 | 0 | ps_refine_prms->lambda_recon = |
3538 | 0 | ((float)ps_refine_prms->lambda_recon) * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f; |
3539 | |
|
3540 | 0 | if((u1_encode) && (NULL != ppd_intra_costs)) |
3541 | 0 | { |
3542 | 0 | ps_refine_prms->pd_intra_costs = ppd_intra_costs[layer_id]; |
3543 | 0 | } |
3544 | | |
3545 | | /* Explicit or implicit depends on number of layers having eplicit search */ |
3546 | 0 | if((layer_id == 0) || (num_layers - layer_id > num_layers_explicit_search)) |
3547 | 0 | { |
3548 | 0 | ps_refine_prms->explicit_ref = 0; |
3549 | 0 | ps_refine_prms->i4_num_ref_fpel = MIN(2, num_ref); |
3550 | 0 | } |
3551 | 0 | else |
3552 | 0 | { |
3553 | 0 | ps_refine_prms->explicit_ref = 1; |
3554 | 0 | ps_refine_prms->i4_num_ref_fpel = num_ref; |
3555 | 0 | } |
3556 | |
|
3557 | 0 | ps_refine_prms->e_search_complexity = SEARCH_CX_HIGH; |
3558 | |
|
3559 | 0 | ps_refine_prms->i4_num_steps_hpel_refine = ps_me_coding_tools->i4_num_steps_hpel_refine; |
3560 | 0 | ps_refine_prms->i4_num_steps_qpel_refine = ps_me_coding_tools->i4_num_steps_qpel_refine; |
3561 | |
|
3562 | 0 | if(u1_encode) |
3563 | 0 | { |
3564 | 0 | ps_refine_prms->i4_num_mvbank_results = 1; |
3565 | 0 | ps_refine_prms->i4_use_rec_in_fpel = 1; |
3566 | 0 | ps_refine_prms->i4_num_steps_fpel_refine = 1; |
3567 | |
|
3568 | 0 | if(ps_me_coding_tools->e_me_quality_presets == ME_PRISTINE_QUALITY) |
3569 | 0 | { |
3570 | 0 | ps_refine_prms->i4_num_fpel_results = 4; |
3571 | 0 | ps_refine_prms->i4_num_32x32_merge_results = 4; |
3572 | 0 | ps_refine_prms->i4_num_64x64_merge_results = 4; |
3573 | 0 | ps_refine_prms->i4_num_steps_post_refine_fpel = 3; |
3574 | 0 | ps_refine_prms->i4_use_satd_subpel = 1; |
3575 | 0 | ps_refine_prms->u1_max_subpel_candts_2Nx2N = 2; |
3576 | 0 | ps_refine_prms->u1_max_subpel_candts_NxN = 1; |
3577 | 0 | ps_refine_prms->u1_subpel_candt_threshold = 1; |
3578 | 0 | ps_refine_prms->e_search_complexity = SEARCH_CX_MED; |
3579 | 0 | ps_refine_prms->pu1_num_best_results = gau1_num_best_results_PQ; |
3580 | 0 | ps_refine_prms->limit_active_partitions = 0; |
3581 | 0 | } |
3582 | 0 | else if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_QUALITY) |
3583 | 0 | { |
3584 | 0 | ps_refine_prms->i4_num_fpel_results = 4; |
3585 | 0 | ps_refine_prms->i4_num_32x32_merge_results = 4; |
3586 | 0 | ps_refine_prms->i4_num_64x64_merge_results = 4; |
3587 | 0 | ps_refine_prms->i4_num_steps_post_refine_fpel = 3; |
3588 | 0 | ps_refine_prms->i4_use_satd_subpel = 1; |
3589 | 0 | ps_refine_prms->u1_max_subpel_candts_2Nx2N = 2; |
3590 | 0 | ps_refine_prms->u1_max_subpel_candts_NxN = 1; |
3591 | 0 | ps_refine_prms->u1_subpel_candt_threshold = 2; |
3592 | 0 | ps_refine_prms->e_search_complexity = SEARCH_CX_MED; |
3593 | 0 | ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HQ; |
3594 | 0 | ps_refine_prms->limit_active_partitions = 0; |
3595 | 0 | } |
3596 | 0 | else if(ps_me_coding_tools->e_me_quality_presets == ME_MEDIUM_SPEED) |
3597 | 0 | { |
3598 | 0 | ps_refine_prms->i4_num_fpel_results = 1; |
3599 | 0 | ps_refine_prms->i4_num_32x32_merge_results = 2; |
3600 | 0 | ps_refine_prms->i4_num_64x64_merge_results = 2; |
3601 | 0 | ps_refine_prms->i4_num_steps_post_refine_fpel = 0; |
3602 | 0 | ps_refine_prms->i4_use_satd_subpel = 1; |
3603 | 0 | ps_refine_prms->u1_max_subpel_candts_2Nx2N = 2; |
3604 | 0 | ps_refine_prms->u1_max_subpel_candts_NxN = 1; |
3605 | 0 | ps_refine_prms->u1_subpel_candt_threshold = 3; |
3606 | 0 | ps_refine_prms->e_search_complexity = SEARCH_CX_MED; |
3607 | 0 | ps_refine_prms->pu1_num_best_results = gau1_num_best_results_MS; |
3608 | 0 | ps_refine_prms->limit_active_partitions = 1; |
3609 | 0 | } |
3610 | 0 | else if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_SPEED) |
3611 | 0 | { |
3612 | 0 | ps_refine_prms->i4_num_fpel_results = 1; |
3613 | 0 | ps_refine_prms->i4_num_32x32_merge_results = 2; |
3614 | 0 | ps_refine_prms->i4_num_64x64_merge_results = 2; |
3615 | 0 | ps_refine_prms->i4_num_steps_post_refine_fpel = 0; |
3616 | 0 | ps_refine_prms->u1_max_subpel_candts_2Nx2N = 1; |
3617 | 0 | ps_refine_prms->u1_max_subpel_candts_NxN = 1; |
3618 | 0 | ps_refine_prms->i4_use_satd_subpel = 0; |
3619 | 0 | ps_refine_prms->u1_subpel_candt_threshold = 0; |
3620 | 0 | ps_refine_prms->e_search_complexity = SEARCH_CX_MED; |
3621 | 0 | ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HS; |
3622 | 0 | ps_refine_prms->limit_active_partitions = 1; |
3623 | 0 | } |
3624 | 0 | else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED) |
3625 | 0 | { |
3626 | 0 | ps_refine_prms->i4_num_fpel_results = 1; |
3627 | 0 | ps_refine_prms->i4_num_32x32_merge_results = 2; |
3628 | 0 | ps_refine_prms->i4_num_64x64_merge_results = 2; |
3629 | 0 | ps_refine_prms->i4_num_steps_post_refine_fpel = 0; |
3630 | 0 | ps_refine_prms->i4_use_satd_subpel = 0; |
3631 | 0 | ps_refine_prms->u1_max_subpel_candts_2Nx2N = 1; |
3632 | 0 | ps_refine_prms->u1_max_subpel_candts_NxN = 0; |
3633 | 0 | ps_refine_prms->u1_subpel_candt_threshold = 0; |
3634 | 0 | ps_refine_prms->e_search_complexity = SEARCH_CX_MED; |
3635 | 0 | ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS; |
3636 | 0 | ps_refine_prms->limit_active_partitions = 1; |
3637 | 0 | } |
3638 | 0 | else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED_25) |
3639 | 0 | { |
3640 | 0 | ps_refine_prms->i4_num_fpel_results = 1; |
3641 | 0 | ps_refine_prms->i4_num_32x32_merge_results = 2; |
3642 | 0 | ps_refine_prms->i4_num_64x64_merge_results = 2; |
3643 | 0 | ps_refine_prms->i4_num_steps_post_refine_fpel = 0; |
3644 | 0 | ps_refine_prms->i4_use_satd_subpel = 0; |
3645 | 0 | ps_refine_prms->u1_max_subpel_candts_2Nx2N = 1; |
3646 | 0 | ps_refine_prms->u1_max_subpel_candts_NxN = 0; |
3647 | 0 | ps_refine_prms->u1_subpel_candt_threshold = 0; |
3648 | 0 | ps_refine_prms->e_search_complexity = SEARCH_CX_LOW; |
3649 | 0 | ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS25; |
3650 | 0 | ps_refine_prms->limit_active_partitions = 1; |
3651 | 0 | } |
3652 | 0 | } |
3653 | 0 | else |
3654 | 0 | { |
3655 | 0 | ps_refine_prms->i4_num_fpel_results = 2; |
3656 | 0 | ps_refine_prms->i4_use_rec_in_fpel = 0; |
3657 | 0 | ps_refine_prms->i4_num_steps_fpel_refine = 1; |
3658 | 0 | ps_refine_prms->i4_num_steps_hpel_refine = 0; |
3659 | 0 | ps_refine_prms->i4_num_steps_qpel_refine = 0; |
3660 | |
|
3661 | 0 | if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_SPEED) |
3662 | 0 | { |
3663 | 0 | ps_refine_prms->i4_num_steps_post_refine_fpel = 0; |
3664 | 0 | ps_refine_prms->i4_use_satd_subpel = 1; |
3665 | 0 | ps_refine_prms->e_search_complexity = SEARCH_CX_LOW; |
3666 | 0 | ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HS; |
3667 | 0 | } |
3668 | 0 | else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED) |
3669 | 0 | { |
3670 | 0 | ps_refine_prms->i4_num_steps_post_refine_fpel = 0; |
3671 | 0 | ps_refine_prms->i4_use_satd_subpel = 0; |
3672 | 0 | ps_refine_prms->e_search_complexity = SEARCH_CX_LOW; |
3673 | 0 | ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS; |
3674 | 0 | } |
3675 | 0 | else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED_25) |
3676 | 0 | { |
3677 | 0 | ps_refine_prms->i4_num_steps_post_refine_fpel = 0; |
3678 | 0 | ps_refine_prms->i4_use_satd_subpel = 0; |
3679 | 0 | ps_refine_prms->e_search_complexity = SEARCH_CX_LOW; |
3680 | 0 | ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS25; |
3681 | 0 | } |
3682 | 0 | else if(ps_me_coding_tools->e_me_quality_presets == ME_PRISTINE_QUALITY) |
3683 | 0 | { |
3684 | 0 | ps_refine_prms->i4_num_steps_post_refine_fpel = 2; |
3685 | 0 | ps_refine_prms->i4_use_satd_subpel = 1; |
3686 | 0 | ps_refine_prms->e_search_complexity = SEARCH_CX_MED; |
3687 | 0 | ps_refine_prms->pu1_num_best_results = gau1_num_best_results_PQ; |
3688 | 0 | } |
3689 | 0 | else if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_QUALITY) |
3690 | 0 | { |
3691 | 0 | ps_refine_prms->i4_num_steps_post_refine_fpel = 2; |
3692 | 0 | ps_refine_prms->i4_use_satd_subpel = 1; |
3693 | 0 | ps_refine_prms->e_search_complexity = SEARCH_CX_MED; |
3694 | 0 | ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HQ; |
3695 | 0 | } |
3696 | 0 | else if(ps_me_coding_tools->e_me_quality_presets == ME_MEDIUM_SPEED) |
3697 | 0 | { |
3698 | 0 | ps_refine_prms->i4_num_steps_post_refine_fpel = 0; |
3699 | 0 | ps_refine_prms->i4_use_satd_subpel = 1; |
3700 | 0 | ps_refine_prms->e_search_complexity = SEARCH_CX_LOW; |
3701 | 0 | ps_refine_prms->pu1_num_best_results = gau1_num_best_results_MS; |
3702 | 0 | } |
3703 | | |
3704 | | /* Following fields unused in the non-encode layers */ |
3705 | | /* But setting the same to default values */ |
3706 | 0 | ps_refine_prms->i4_num_32x32_merge_results = 4; |
3707 | 0 | ps_refine_prms->i4_num_64x64_merge_results = 4; |
3708 | |
|
3709 | 0 | if(!ps_frm_prms->bidir_enabled) |
3710 | 0 | { |
3711 | 0 | ps_refine_prms->limit_active_partitions = 0; |
3712 | 0 | } |
3713 | 0 | else |
3714 | 0 | { |
3715 | 0 | ps_refine_prms->limit_active_partitions = 1; |
3716 | 0 | } |
3717 | 0 | } |
3718 | |
|
3719 | 0 | ps_refine_prms->i4_enable_4x4_part = |
3720 | 0 | hme_get_mv_blk_size(use_4x4, layer_id, num_layers, u1_encode); |
3721 | |
|
3722 | 0 | if(!ps_me_coding_tools->u1_l0_me_controlled_via_cmd_line) |
3723 | 0 | { |
3724 | 0 | ps_refine_prms->i4_num_results_per_part = hme_determine_num_results_per_part( |
3725 | 0 | layer_id, num_layers, ps_me_coding_tools->e_me_quality_presets); |
3726 | |
|
3727 | 0 | hme_max_search_cands_per_search_cand_loc_populator( |
3728 | 0 | ps_frm_prms, |
3729 | 0 | ps_refine_prms->au1_num_fpel_search_cands, |
3730 | 0 | layer_id, |
3731 | 0 | ps_me_coding_tools->e_me_quality_presets); |
3732 | |
|
3733 | 0 | ps_refine_prms->u1_max_2nx2n_tu_recur_cands = hme_determine_max_2nx2n_tu_recur_cands( |
3734 | 0 | layer_id, ps_me_coding_tools->e_me_quality_presets); |
3735 | |
|
3736 | 0 | ps_refine_prms->u1_max_num_fpel_refine_centers = hme_determine_max_num_fpel_refine_centers( |
3737 | 0 | layer_id, ps_me_coding_tools->e_me_quality_presets); |
3738 | |
|
3739 | 0 | ps_refine_prms->u1_max_num_subpel_refine_centers = |
3740 | 0 | hme_determine_max_num_subpel_refine_centers( |
3741 | 0 | layer_id, |
3742 | 0 | ps_refine_prms->u1_max_subpel_candts_2Nx2N, |
3743 | 0 | ps_refine_prms->u1_max_subpel_candts_NxN); |
3744 | 0 | } |
3745 | 0 | else |
3746 | 0 | { |
3747 | 0 | if(0 == layer_id) |
3748 | 0 | { |
3749 | 0 | ps_refine_prms->i4_num_results_per_part = |
3750 | 0 | ps_me_coding_tools->u1_num_results_per_part_in_l0me; |
3751 | 0 | } |
3752 | 0 | else if(1 == layer_id) |
3753 | 0 | { |
3754 | 0 | ps_refine_prms->i4_num_results_per_part = |
3755 | 0 | ps_me_coding_tools->u1_num_results_per_part_in_l1me; |
3756 | 0 | } |
3757 | 0 | else if((2 == layer_id) && (num_layers > 3)) |
3758 | 0 | { |
3759 | 0 | ps_refine_prms->i4_num_results_per_part = |
3760 | 0 | ps_me_coding_tools->u1_num_results_per_part_in_l2me; |
3761 | 0 | } |
3762 | 0 | else |
3763 | 0 | { |
3764 | 0 | ps_refine_prms->i4_num_results_per_part = hme_determine_num_results_per_part( |
3765 | 0 | layer_id, num_layers, ps_me_coding_tools->e_me_quality_presets); |
3766 | 0 | } |
3767 | |
|
3768 | 0 | memset( |
3769 | 0 | ps_refine_prms->au1_num_fpel_search_cands, |
3770 | 0 | ps_me_coding_tools->u1_max_num_coloc_cands, |
3771 | 0 | sizeof(ps_refine_prms->au1_num_fpel_search_cands)); |
3772 | |
|
3773 | 0 | ps_refine_prms->u1_max_2nx2n_tu_recur_cands = |
3774 | 0 | ps_me_coding_tools->u1_max_2nx2n_tu_recur_cands; |
3775 | |
|
3776 | 0 | ps_refine_prms->u1_max_num_fpel_refine_centers = |
3777 | 0 | ps_me_coding_tools->u1_max_num_fpel_refine_centers; |
3778 | |
|
3779 | 0 | ps_refine_prms->u1_max_num_subpel_refine_centers = |
3780 | 0 | ps_me_coding_tools->u1_max_num_subpel_refine_centers; |
3781 | 0 | } |
3782 | |
|
3783 | 0 | if(layer_id != 0) |
3784 | 0 | { |
3785 | 0 | ps_refine_prms->i4_num_mvbank_results = ps_refine_prms->i4_num_results_per_part; |
3786 | 0 | } |
3787 | | |
3788 | | /* 4 * lambda */ |
3789 | 0 | ps_refine_prms->sdi_threshold = |
3790 | 0 | (ps_refine_prms->lambda_recon + (1 << (ps_frm_prms->lambda_q_shift - 1))) >> |
3791 | 0 | (ps_frm_prms->lambda_q_shift - 2); |
3792 | |
|
3793 | 0 | ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb = |
3794 | 0 | MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON && ps_frm_prms->u1_is_cu_qp_delta_enabled; |
3795 | 0 | } |
3796 | | |
3797 | | void hme_set_ctb_boundary_attrs(ctb_boundary_attrs_t *ps_attrs, S32 num_8x8_horz, S32 num_8x8_vert) |
3798 | 0 | { |
3799 | 0 | S32 cu_16x16_valid_flag = 0, merge_pattern_x, merge_pattern_y; |
3800 | 0 | S32 blk, blk_x, blk_y; |
3801 | 0 | S32 num_16x16_horz, num_16x16_vert; |
3802 | 0 | blk_ctb_attrs_t *ps_blk_attrs = &ps_attrs->as_blk_attrs[0]; |
3803 | |
|
3804 | 0 | num_16x16_horz = (num_8x8_horz + 1) >> 1; |
3805 | 0 | num_16x16_vert = (num_8x8_vert + 1) >> 1; |
3806 | 0 | ps_attrs->u1_num_blks_in_ctb = (U08)(num_16x16_horz * num_16x16_vert); |
3807 | | |
3808 | | /*************************************************************************/ |
3809 | | /* Run through each blk assuming all 16x16 CUs valid. The order would be */ |
3810 | | /* 0 1 4 5 */ |
3811 | | /* 2 3 6 7 */ |
3812 | | /* 8 9 12 13 */ |
3813 | | /* 10 11 14 15 */ |
3814 | | /* Out of these some may not be valid. For example, if num_16x16_horz is */ |
3815 | | /* 2 and num_16x16_vert is 4, then right 2 columns not valid. In this */ |
3816 | | /* case, blks 8-11 get encoding number of 4-7. Further, the variable */ |
3817 | | /* cu_16x16_valid_flag will be 1111 0000 1111 0000. Also, the variable */ |
3818 | | /* u1_merge_to_32x32_flag will be 1010, and u1_merge_to_64x64_flag 0 */ |
3819 | | /*************************************************************************/ |
3820 | 0 | for(blk = 0; blk < 16; blk++) |
3821 | 0 | { |
3822 | 0 | U08 u1_blk_8x8_mask = 0xF; |
3823 | 0 | blk_x = gau1_encode_to_raster_x[blk]; |
3824 | 0 | blk_y = gau1_encode_to_raster_y[blk]; |
3825 | 0 | if((blk_x >= num_16x16_horz) || (blk_y >= num_16x16_vert)) |
3826 | 0 | { |
3827 | 0 | continue; |
3828 | 0 | } |
3829 | | |
3830 | | /* The CU at encode location blk is valid */ |
3831 | 0 | cu_16x16_valid_flag |= (1 << blk); |
3832 | 0 | ps_blk_attrs->u1_blk_id_in_full_ctb = blk; |
3833 | 0 | ps_blk_attrs->u1_blk_x = blk_x; |
3834 | 0 | ps_blk_attrs->u1_blk_y = blk_y; |
3835 | | |
3836 | | /* Disable blks 1 and 3 if the 16x16 blk overshoots on rt border */ |
3837 | 0 | if(((blk_x << 1) + 2) > num_8x8_horz) |
3838 | 0 | u1_blk_8x8_mask &= 0x5; |
3839 | | /* Disable blks 2 and 3 if the 16x16 blk overshoots on bot border */ |
3840 | 0 | if(((blk_y << 1) + 2) > num_8x8_vert) |
3841 | 0 | u1_blk_8x8_mask &= 0x3; |
3842 | 0 | ps_blk_attrs->u1_blk_8x8_mask = u1_blk_8x8_mask; |
3843 | 0 | ps_blk_attrs++; |
3844 | 0 | } |
3845 | |
|
3846 | 0 | ps_attrs->cu_16x16_valid_flag = cu_16x16_valid_flag; |
3847 | | |
3848 | | /* 32x32 merge is logical combination of what merge is possible */ |
3849 | | /* horizontally as well as vertically. */ |
3850 | 0 | if(num_8x8_horz < 4) |
3851 | 0 | merge_pattern_x = 0x0; |
3852 | 0 | else if(num_8x8_horz < 8) |
3853 | 0 | merge_pattern_x = 0x5; |
3854 | 0 | else |
3855 | 0 | merge_pattern_x = 0xF; |
3856 | |
|
3857 | 0 | if(num_8x8_vert < 4) |
3858 | 0 | merge_pattern_y = 0x0; |
3859 | 0 | else if(num_8x8_vert < 8) |
3860 | 0 | merge_pattern_y = 0x3; |
3861 | 0 | else |
3862 | 0 | merge_pattern_y = 0xF; |
3863 | |
|
3864 | 0 | ps_attrs->u1_merge_to_32x32_flag = (U08)(merge_pattern_x & merge_pattern_y); |
3865 | | |
3866 | | /* Do not attempt 64x64 merge if any blk invalid */ |
3867 | 0 | if(ps_attrs->u1_merge_to_32x32_flag != 0xF) |
3868 | 0 | ps_attrs->u1_merge_to_64x64_flag = 0; |
3869 | 0 | else |
3870 | 0 | ps_attrs->u1_merge_to_64x64_flag = 1; |
3871 | 0 | } |
3872 | | |
3873 | | void hme_set_ctb_attrs(ctb_boundary_attrs_t *ps_attrs, S32 wd, S32 ht) |
3874 | 0 | { |
3875 | 0 | S32 is_cropped_rt, is_cropped_bot; |
3876 | |
|
3877 | 0 | is_cropped_rt = ((wd & 63) != 0) ? 1 : 0; |
3878 | 0 | is_cropped_bot = ((ht & 63) != 0) ? 1 : 0; |
3879 | |
|
3880 | 0 | if(is_cropped_rt) |
3881 | 0 | { |
3882 | 0 | hme_set_ctb_boundary_attrs(&ps_attrs[CTB_RT_PIC_BOUNDARY], (wd & 63) >> 3, 8); |
3883 | 0 | } |
3884 | 0 | if(is_cropped_bot) |
3885 | 0 | { |
3886 | 0 | hme_set_ctb_boundary_attrs(&ps_attrs[CTB_BOT_PIC_BOUNDARY], 8, (ht & 63) >> 3); |
3887 | 0 | } |
3888 | 0 | if(is_cropped_rt & is_cropped_bot) |
3889 | 0 | { |
3890 | 0 | hme_set_ctb_boundary_attrs( |
3891 | 0 | &ps_attrs[CTB_BOT_RT_PIC_BOUNDARY], (wd & 63) >> 3, (ht & 63) >> 3); |
3892 | 0 | } |
3893 | 0 | hme_set_ctb_boundary_attrs(&ps_attrs[CTB_CENTRE], 8, 8); |
3894 | 0 | } |
3895 | | |
3896 | | /** |
3897 | | ******************************************************************************** |
3898 | | * @fn hme_scale_for_ref_idx(S32 curr_poc, S32 poc_from, S32 poc_to) |
3899 | | * |
3900 | | * @brief When we have an mv with ref id "poc_to" for which predictor to be |
3901 | | * computed, and predictor is ref id "poc_from", this funciton returns |
3902 | | * scale factor in Q8 for such a purpose |
3903 | | * |
3904 | | * @param[in] curr_poc : input picture poc |
3905 | | * |
3906 | | * @param[in] poc_from : POC of the pic, pointed to by ref id to be scaled |
3907 | | * |
3908 | | * @param[in] poc_to : POC of hte pic, pointed to by ref id to be scaled to |
3909 | | * |
3910 | | * @return Scale factor in Q8 format |
3911 | | ******************************************************************************** |
3912 | | */ |
3913 | | S16 hme_scale_for_ref_idx(S32 curr_poc, S32 poc_from, S32 poc_to) |
3914 | 0 | { |
3915 | 0 | S32 td, tx, tb; |
3916 | 0 | S16 i2_scf; |
3917 | | /*************************************************************************/ |
3918 | | /* Approximate scale factor: 256 * num / denom */ |
3919 | | /* num = curr_poc - poc_to, denom = curr_poc - poc_from */ |
3920 | | /* Exact implementation as per standard. */ |
3921 | | /*************************************************************************/ |
3922 | |
|
3923 | 0 | tb = HME_CLIP((curr_poc - poc_to), -128, 127); |
3924 | 0 | td = HME_CLIP((curr_poc - poc_from), -128, 127); |
3925 | |
|
3926 | 0 | tx = (16384 + (ABS(td) >> 1)) / td; |
3927 | | //i2_scf = HME_CLIP((((tb*tx)+32)>>6), -128, 127); |
3928 | 0 | i2_scf = HME_CLIP((((tb * tx) + 32) >> 6), -4096, 4095); |
3929 | |
|
3930 | 0 | return (i2_scf); |
3931 | 0 | } |
3932 | | |
3933 | | /** |
3934 | | ******************************************************************************** |
3935 | | * @fn hme_process_frm_init |
3936 | | * |
3937 | | * @brief HME frame level initialsation processing function |
3938 | | * |
3939 | | * @param[in] pv_me_ctxt : ME ctxt pointer |
3940 | | * |
3941 | | * @param[in] ps_ref_map : Reference map prms pointer |
3942 | | * |
3943 | | * @param[in] ps_frm_prms :Pointer to frame params |
3944 | | * |
3945 | | * called only for encode layer |
3946 | | * |
3947 | | * @return Scale factor in Q8 format |
3948 | | ******************************************************************************** |
3949 | | */ |
3950 | | void hme_process_frm_init( |
3951 | | void *pv_me_ctxt, |
3952 | | hme_ref_map_t *ps_ref_map, |
3953 | | hme_frm_prms_t *ps_frm_prms, |
3954 | | WORD32 i4_me_frm_id, |
3955 | | WORD32 i4_num_me_frm_pllel) |
3956 | 0 | { |
3957 | 0 | me_ctxt_t *ps_thrd_ctxt = (me_ctxt_t *)pv_me_ctxt; |
3958 | 0 | me_frm_ctxt_t *ps_ctxt = (me_frm_ctxt_t *)ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id]; |
3959 | |
|
3960 | 0 | S32 i, j, desc_idx; |
3961 | 0 | S16 i2_max_x = 0, i2_max_y = 0; |
3962 | | |
3963 | | /* Set the Qp of current frm passed by caller. Required for intra cost */ |
3964 | 0 | ps_ctxt->frm_qstep = ps_frm_prms->qstep; |
3965 | 0 | ps_ctxt->qstep_ls8 = ps_frm_prms->qstep_ls8; |
3966 | | |
3967 | | /* Bidir enabled or not */ |
3968 | 0 | ps_ctxt->s_frm_prms = *ps_frm_prms; |
3969 | | |
3970 | | /*************************************************************************/ |
3971 | | /* Set up the ref pic parameters across all layers. For this, we do the */ |
3972 | | /* following: the application has given us a ref pic list, we go index */ |
3973 | | /* by index and pick up the picture. A picture can be uniquely be mapped */ |
3974 | | /* to a POC. So we search all layer descriptor array to find the POC */ |
3975 | | /* Once found, we update all attributes in this descriptor. */ |
3976 | | /* During this updation process we also create an index of descriptor id */ |
3977 | | /* to ref id mapping. It is important to find the same POC in the layers */ |
3978 | | /* descr strcture since it holds the pyramid inputs for non encode layers*/ |
3979 | | /* Apart from this, e also update array containing the index of the descr*/ |
3980 | | /* During processing for ease of access, each layer has a pointer to aray*/ |
3981 | | /* of pointers containing fxfy, fxhy, hxfy, hxhy and inputs for each ref */ |
3982 | | /* we update this too. */ |
3983 | | /*************************************************************************/ |
3984 | 0 | ps_ctxt->num_ref_past = 0; |
3985 | 0 | ps_ctxt->num_ref_future = 0; |
3986 | 0 | for(i = 0; i < ps_ref_map->i4_num_ref; i++) |
3987 | 0 | { |
3988 | 0 | S32 ref_id_lc, idx; |
3989 | 0 | hme_ref_desc_t *ps_ref_desc; |
3990 | |
|
3991 | 0 | ps_ref_desc = &ps_ref_map->as_ref_desc[i]; |
3992 | 0 | ref_id_lc = ps_ref_desc->i1_ref_id_lc; |
3993 | | /* Obtain the id of descriptor that contains this POC */ |
3994 | 0 | idx = hme_find_descr_idx( |
3995 | 0 | ps_thrd_ctxt, ps_ref_desc->i4_poc, ps_ref_desc->i4_GOP_num, i4_num_me_frm_pllel); |
3996 | | |
3997 | | /* Update all layers in this descr with the reference attributes */ |
3998 | 0 | hme_update_layer_desc( |
3999 | 0 | &ps_thrd_ctxt->as_ref_descr[idx], |
4000 | 0 | ps_ref_desc, |
4001 | 0 | 0, |
4002 | 0 | 1, //ps_ctxt->num_layers, |
4003 | 0 | ps_ctxt->ps_curr_descr); |
4004 | | |
4005 | | /* Update the pointer holder for the recon planes */ |
4006 | 0 | ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_inp = &ps_ctxt->apu1_list_inp[0][0]; |
4007 | 0 | ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_fxfy = |
4008 | 0 | &ps_ctxt->apu1_list_rec_fxfy[0][0]; |
4009 | 0 | ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_hxfy = |
4010 | 0 | &ps_ctxt->apu1_list_rec_hxfy[0][0]; |
4011 | 0 | ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_fxhy = |
4012 | 0 | &ps_ctxt->apu1_list_rec_fxhy[0][0]; |
4013 | 0 | ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_hxhy = |
4014 | 0 | &ps_ctxt->apu1_list_rec_hxhy[0][0]; |
4015 | 0 | ps_ctxt->ps_curr_descr->aps_layers[0]->ppv_dep_mngr_recon = |
4016 | 0 | &ps_ctxt->apv_list_dep_mngr[0][0]; |
4017 | | |
4018 | | /* Update the array having ref id lc to descr id mapping */ |
4019 | 0 | ps_ctxt->a_ref_to_descr_id[ps_ref_desc->i1_ref_id_lc] = idx; |
4020 | | |
4021 | | /* From ref id lc we need to work out the POC, So update this array */ |
4022 | 0 | ps_ctxt->ai4_ref_idx_to_poc_lc[ref_id_lc] = ps_ref_desc->i4_poc; |
4023 | | |
4024 | | /* When computing costs in L0 and L1 directions, we need the */ |
4025 | | /* respective ref id L0 and L1, so update this mapping */ |
4026 | 0 | ps_ctxt->a_ref_idx_lc_to_l0[ref_id_lc] = ps_ref_desc->i1_ref_id_l0; |
4027 | 0 | ps_ctxt->a_ref_idx_lc_to_l1[ref_id_lc] = ps_ref_desc->i1_ref_id_l1; |
4028 | 0 | if((ps_ctxt->i4_curr_poc > ps_ref_desc->i4_poc) || ps_ctxt->i4_curr_poc == 0) |
4029 | 0 | { |
4030 | 0 | ps_ctxt->au1_is_past[ref_id_lc] = 1; |
4031 | 0 | ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = ref_id_lc; |
4032 | 0 | ps_ctxt->num_ref_past++; |
4033 | 0 | } |
4034 | 0 | else |
4035 | 0 | { |
4036 | 0 | ps_ctxt->au1_is_past[ref_id_lc] = 0; |
4037 | 0 | ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = ref_id_lc; |
4038 | 0 | ps_ctxt->num_ref_future++; |
4039 | 0 | } |
4040 | |
|
4041 | 0 | if(1 == ps_ctxt->i4_wt_pred_enable_flag) |
4042 | 0 | { |
4043 | | /* copy the weight and offsets from current ref desc */ |
4044 | 0 | ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = ps_ref_desc->i2_weight; |
4045 | | |
4046 | | /* inv weight is stored in Q15 format */ |
4047 | 0 | ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] = |
4048 | 0 | ((1 << 15) + (ps_ref_desc->i2_weight >> 1)) / ps_ref_desc->i2_weight; |
4049 | 0 | ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = ps_ref_desc->i2_offset; |
4050 | 0 | } |
4051 | 0 | else |
4052 | 0 | { |
4053 | | /* store default wt and offset*/ |
4054 | 0 | ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = WGHT_DEFAULT; |
4055 | | |
4056 | | /* inv weight is stored in Q15 format */ |
4057 | 0 | ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] = |
4058 | 0 | ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT; |
4059 | |
|
4060 | 0 | ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = 0; |
4061 | 0 | } |
4062 | 0 | } |
4063 | |
|
4064 | 0 | ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = -1; |
4065 | 0 | ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = -1; |
4066 | | |
4067 | | /*************************************************************************/ |
4068 | | /* Preparation of the TLU for bits for reference indices. */ |
4069 | | /* Special case is that of numref = 2. (TEV) */ |
4070 | | /* Other cases uses UEV */ |
4071 | | /*************************************************************************/ |
4072 | 0 | for(i = 0; i < MAX_NUM_REF; i++) |
4073 | 0 | { |
4074 | 0 | ps_ctxt->au1_ref_bits_tlu_lc[0][i] = 0; |
4075 | 0 | ps_ctxt->au1_ref_bits_tlu_lc[1][i] = 0; |
4076 | 0 | } |
4077 | |
|
4078 | 0 | if(ps_ref_map->i4_num_ref == 2) |
4079 | 0 | { |
4080 | 0 | ps_ctxt->au1_ref_bits_tlu_lc[0][0] = 1; |
4081 | 0 | ps_ctxt->au1_ref_bits_tlu_lc[1][0] = 1; |
4082 | 0 | ps_ctxt->au1_ref_bits_tlu_lc[0][1] = 1; |
4083 | 0 | ps_ctxt->au1_ref_bits_tlu_lc[1][1] = 1; |
4084 | 0 | } |
4085 | 0 | else if(ps_ref_map->i4_num_ref > 2) |
4086 | 0 | { |
4087 | 0 | for(i = 0; i < ps_ref_map->i4_num_ref; i++) |
4088 | 0 | { |
4089 | 0 | S32 l0, l1; |
4090 | 0 | l0 = ps_ctxt->a_ref_idx_lc_to_l0[i]; |
4091 | 0 | l1 = ps_ctxt->a_ref_idx_lc_to_l1[i]; |
4092 | 0 | ps_ctxt->au1_ref_bits_tlu_lc[0][i] = gau1_ref_bits[l0]; |
4093 | 0 | ps_ctxt->au1_ref_bits_tlu_lc[1][i] = gau1_ref_bits[l1]; |
4094 | 0 | } |
4095 | 0 | } |
4096 | | |
4097 | | /*************************************************************************/ |
4098 | | /* Preparation of the scaling factors for reference indices. The scale */ |
4099 | | /* factor depends on distance of the two ref indices from current input */ |
4100 | | /* in terms of poc delta. */ |
4101 | | /*************************************************************************/ |
4102 | 0 | for(i = 0; i < ps_ref_map->i4_num_ref; i++) |
4103 | 0 | { |
4104 | 0 | for(j = 0; j < ps_ref_map->i4_num_ref; j++) |
4105 | 0 | { |
4106 | 0 | S16 i2_scf_q8; |
4107 | 0 | S32 poc_from, poc_to; |
4108 | |
|
4109 | 0 | poc_from = ps_ctxt->ai4_ref_idx_to_poc_lc[j]; |
4110 | 0 | poc_to = ps_ctxt->ai4_ref_idx_to_poc_lc[i]; |
4111 | |
|
4112 | 0 | i2_scf_q8 = hme_scale_for_ref_idx(ps_ctxt->i4_curr_poc, poc_from, poc_to); |
4113 | 0 | ps_ctxt->ai2_ref_scf[j + i * MAX_NUM_REF] = i2_scf_q8; |
4114 | 0 | } |
4115 | 0 | } |
4116 | | |
4117 | | /*************************************************************************/ |
4118 | | /* We store simplified look ups for 4 hpel planes and inp y plane for */ |
4119 | | /* every layer and for every ref id in the layer. So update these lookups*/ |
4120 | | /*************************************************************************/ |
4121 | 0 | for(i = 0; i < 1; i++) |
4122 | 0 | { |
4123 | 0 | U08 **ppu1_rec_fxfy, **ppu1_rec_hxfy, **ppu1_rec_fxhy, **ppu1_rec_hxhy; |
4124 | 0 | U08 **ppu1_inp; |
4125 | 0 | void **ppvlist_dep_mngr; |
4126 | 0 | layer_ctxt_t *ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i]; |
4127 | |
|
4128 | 0 | ppvlist_dep_mngr = &ps_ctxt->apv_list_dep_mngr[i][0]; |
4129 | 0 | ppu1_rec_fxfy = &ps_ctxt->apu1_list_rec_fxfy[i][0]; |
4130 | 0 | ppu1_rec_hxfy = &ps_ctxt->apu1_list_rec_hxfy[i][0]; |
4131 | 0 | ppu1_rec_fxhy = &ps_ctxt->apu1_list_rec_fxhy[i][0]; |
4132 | 0 | ppu1_rec_hxhy = &ps_ctxt->apu1_list_rec_hxhy[i][0]; |
4133 | 0 | ppu1_inp = &ps_ctxt->apu1_list_inp[i][0]; |
4134 | 0 | for(j = 0; j < ps_ref_map->i4_num_ref; j++) |
4135 | 0 | { |
4136 | 0 | hme_ref_desc_t *ps_ref_desc; |
4137 | 0 | hme_ref_buf_info_t *ps_buf_info; |
4138 | 0 | layer_ctxt_t *ps_layer; |
4139 | 0 | S32 ref_id_lc; |
4140 | |
|
4141 | 0 | ps_ref_desc = &ps_ref_map->as_ref_desc[j]; |
4142 | 0 | ps_buf_info = &ps_ref_desc->as_ref_info[i]; |
4143 | 0 | ref_id_lc = ps_ref_desc->i1_ref_id_lc; |
4144 | |
|
4145 | 0 | desc_idx = ps_ctxt->a_ref_to_descr_id[ref_id_lc]; |
4146 | 0 | ps_layer = ps_thrd_ctxt->as_ref_descr[desc_idx].aps_layers[i]; |
4147 | |
|
4148 | 0 | ppu1_inp[j] = ps_buf_info->pu1_ref_src; |
4149 | 0 | ppu1_rec_fxfy[j] = ps_buf_info->pu1_rec_fxfy; |
4150 | 0 | ppu1_rec_hxfy[j] = ps_buf_info->pu1_rec_hxfy; |
4151 | 0 | ppu1_rec_fxhy[j] = ps_buf_info->pu1_rec_fxhy; |
4152 | 0 | ppu1_rec_hxhy[j] = ps_buf_info->pu1_rec_hxhy; |
4153 | 0 | ppvlist_dep_mngr[j] = ps_buf_info->pv_dep_mngr; |
4154 | | |
4155 | | /* Update the curr descriptors reference pointers here */ |
4156 | 0 | ps_layer_ctxt->ppu1_list_inp[j] = ps_buf_info->pu1_ref_src; |
4157 | 0 | ps_layer_ctxt->ppu1_list_rec_fxfy[j] = ps_buf_info->pu1_rec_fxfy; |
4158 | 0 | ps_layer_ctxt->ppu1_list_rec_hxfy[j] = ps_buf_info->pu1_rec_hxfy; |
4159 | 0 | ps_layer_ctxt->ppu1_list_rec_fxhy[j] = ps_buf_info->pu1_rec_fxhy; |
4160 | 0 | ps_layer_ctxt->ppu1_list_rec_hxhy[j] = ps_buf_info->pu1_rec_hxhy; |
4161 | 0 | } |
4162 | 0 | } |
4163 | | /*************************************************************************/ |
4164 | | /* The mv range for each layer is computed. For dyadic layers it will */ |
4165 | | /* keep shrinking by 2, for non dyadic it will shrink by ratio of wd and */ |
4166 | | /* ht. In general formula used is scale by ratio of wd for x and ht for y*/ |
4167 | | /*************************************************************************/ |
4168 | 0 | for(i = 0; i < 1; i++) |
4169 | 0 | { |
4170 | 0 | layer_ctxt_t *ps_layer_ctxt; |
4171 | 0 | if(i == 0) |
4172 | 0 | { |
4173 | 0 | i2_max_x = ps_frm_prms->i2_mv_range_x; |
4174 | 0 | i2_max_y = ps_frm_prms->i2_mv_range_y; |
4175 | 0 | } |
4176 | 0 | else |
4177 | 0 | { |
4178 | 0 | i2_max_x = (S16)FLOOR8(((i2_max_x * ps_ctxt->i4_wd) / ps_ctxt->i4_wd)); |
4179 | 0 | i2_max_y = (S16)FLOOR8(((i2_max_y * ps_ctxt->i4_ht) / ps_ctxt->i4_ht)); |
4180 | 0 | } |
4181 | 0 | ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i]; |
4182 | 0 | ps_layer_ctxt->i2_max_mv_x = i2_max_x; |
4183 | 0 | ps_layer_ctxt->i2_max_mv_y = i2_max_y; |
4184 | | |
4185 | | /*********************************************************************/ |
4186 | | /* Every layer maintains a reference id lc to POC mapping. This is */ |
4187 | | /* because the mapping is unique for every frm. Also, in next frm, */ |
4188 | | /* we require colocated mvs which means scaling according to temporal*/ |
4189 | | /*distance. Hence this mapping needs to be maintained in every */ |
4190 | | /* layer ctxt */ |
4191 | | /*********************************************************************/ |
4192 | 0 | memset(ps_layer_ctxt->ai4_ref_id_to_poc_lc, -1, sizeof(S32) * ps_ctxt->max_num_ref); |
4193 | 0 | if(ps_ref_map->i4_num_ref) |
4194 | 0 | { |
4195 | 0 | memcpy( |
4196 | 0 | ps_layer_ctxt->ai4_ref_id_to_poc_lc, |
4197 | 0 | ps_ctxt->ai4_ref_idx_to_poc_lc, |
4198 | 0 | ps_ref_map->i4_num_ref * sizeof(S32)); |
4199 | 0 | } |
4200 | 0 | } |
4201 | |
|
4202 | 0 | return; |
4203 | 0 | } |
4204 | | |
4205 | | /** |
4206 | | ******************************************************************************** |
4207 | | * @fn hme_coarse_process_frm_init |
4208 | | * |
4209 | | * @brief HME frame level initialsation processing function |
4210 | | * |
4211 | | * @param[in] pv_me_ctxt : ME ctxt pointer |
4212 | | * |
4213 | | * @param[in] ps_ref_map : Reference map prms pointer |
4214 | | * |
4215 | | * @param[in] ps_frm_prms :Pointer to frame params |
4216 | | * |
4217 | | * @return Scale factor in Q8 format |
4218 | | ******************************************************************************** |
4219 | | */ |
4220 | | void hme_coarse_process_frm_init( |
4221 | | void *pv_me_ctxt, hme_ref_map_t *ps_ref_map, hme_frm_prms_t *ps_frm_prms) |
4222 | 0 | { |
4223 | 0 | coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt; |
4224 | 0 | S32 i, j, desc_idx; |
4225 | 0 | S16 i2_max_x = 0, i2_max_y = 0; |
4226 | | |
4227 | | /* Set the Qp of current frm passed by caller. Required for intra cost */ |
4228 | 0 | ps_ctxt->frm_qstep = ps_frm_prms->qstep; |
4229 | | |
4230 | | /* Bidir enabled or not */ |
4231 | 0 | ps_ctxt->s_frm_prms = *ps_frm_prms; |
4232 | | |
4233 | | /*************************************************************************/ |
4234 | | /* Set up the ref pic parameters across all layers. For this, we do the */ |
4235 | | /* following: the application has given us a ref pic list, we go index */ |
4236 | | /* by index and pick up the picture. A picture can be uniquely be mapped */ |
4237 | | /* to a POC. So we search all layer descriptor array to find the POC */ |
4238 | | /* Once found, we update all attributes in this descriptor. */ |
4239 | | /* During this updation process we also create an index of descriptor id */ |
4240 | | /* to ref id mapping. It is important to find the same POC in the layers */ |
4241 | | /* descr strcture since it holds the pyramid inputs for non encode layers*/ |
4242 | | /* Apart from this, e also update array containing the index of the descr*/ |
4243 | | /* During processing for ease of access, each layer has a pointer to aray*/ |
4244 | | /* of pointers containing fxfy, fxhy, hxfy, hxhy and inputs for each ref */ |
4245 | | /* we update this too. */ |
4246 | | /*************************************************************************/ |
4247 | 0 | ps_ctxt->num_ref_past = 0; |
4248 | 0 | ps_ctxt->num_ref_future = 0; |
4249 | 0 | for(i = 0; i < ps_ref_map->i4_num_ref; i++) |
4250 | 0 | { |
4251 | 0 | S32 ref_id_lc, idx; |
4252 | 0 | hme_ref_desc_t *ps_ref_desc; |
4253 | |
|
4254 | 0 | ps_ref_desc = &ps_ref_map->as_ref_desc[i]; |
4255 | 0 | ref_id_lc = ps_ref_desc->i1_ref_id_lc; |
4256 | | /* Obtain the id of descriptor that contains this POC */ |
4257 | 0 | idx = hme_coarse_find_descr_idx(ps_ctxt, ps_ref_desc->i4_poc); |
4258 | | |
4259 | | /* Update all layers in this descr with the reference attributes */ |
4260 | 0 | hme_update_layer_desc( |
4261 | 0 | &ps_ctxt->as_ref_descr[idx], |
4262 | 0 | ps_ref_desc, |
4263 | 0 | 1, |
4264 | 0 | ps_ctxt->num_layers - 1, |
4265 | 0 | ps_ctxt->ps_curr_descr); |
4266 | | |
4267 | | /* Update the array having ref id lc to descr id mapping */ |
4268 | 0 | ps_ctxt->a_ref_to_descr_id[ps_ref_desc->i1_ref_id_lc] = idx; |
4269 | | |
4270 | | /* From ref id lc we need to work out the POC, So update this array */ |
4271 | 0 | ps_ctxt->ai4_ref_idx_to_poc_lc[ref_id_lc] = ps_ref_desc->i4_poc; |
4272 | | |
4273 | | /* From ref id lc we need to work out the display num, So update this array */ |
4274 | 0 | ps_ctxt->ai4_ref_idx_to_disp_num[ref_id_lc] = ps_ref_desc->i4_display_num; |
4275 | | |
4276 | | /* When computing costs in L0 and L1 directions, we need the */ |
4277 | | /* respective ref id L0 and L1, so update this mapping */ |
4278 | 0 | ps_ctxt->a_ref_idx_lc_to_l0[ref_id_lc] = ps_ref_desc->i1_ref_id_l0; |
4279 | 0 | ps_ctxt->a_ref_idx_lc_to_l1[ref_id_lc] = ps_ref_desc->i1_ref_id_l1; |
4280 | 0 | if((ps_ctxt->i4_curr_poc > ps_ref_desc->i4_poc) || ps_ctxt->i4_curr_poc == 0) |
4281 | 0 | { |
4282 | 0 | ps_ctxt->au1_is_past[ref_id_lc] = 1; |
4283 | 0 | ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = ref_id_lc; |
4284 | 0 | ps_ctxt->num_ref_past++; |
4285 | 0 | } |
4286 | 0 | else |
4287 | 0 | { |
4288 | 0 | ps_ctxt->au1_is_past[ref_id_lc] = 0; |
4289 | 0 | ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = ref_id_lc; |
4290 | 0 | ps_ctxt->num_ref_future++; |
4291 | 0 | } |
4292 | 0 | if(1 == ps_ctxt->i4_wt_pred_enable_flag) |
4293 | 0 | { |
4294 | | /* copy the weight and offsets from current ref desc */ |
4295 | 0 | ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = ps_ref_desc->i2_weight; |
4296 | | |
4297 | | /* inv weight is stored in Q15 format */ |
4298 | 0 | ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] = |
4299 | 0 | ((1 << 15) + (ps_ref_desc->i2_weight >> 1)) / ps_ref_desc->i2_weight; |
4300 | |
|
4301 | 0 | ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = ps_ref_desc->i2_offset; |
4302 | 0 | } |
4303 | 0 | else |
4304 | 0 | { |
4305 | | /* store default wt and offset*/ |
4306 | 0 | ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = WGHT_DEFAULT; |
4307 | | |
4308 | | /* inv weight is stored in Q15 format */ |
4309 | 0 | ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] = |
4310 | 0 | ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT; |
4311 | |
|
4312 | 0 | ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = 0; |
4313 | 0 | } |
4314 | 0 | } |
4315 | |
|
4316 | 0 | ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = -1; |
4317 | 0 | ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = -1; |
4318 | | |
4319 | | /*************************************************************************/ |
4320 | | /* Preparation of the TLU for bits for reference indices. */ |
4321 | | /* Special case is that of numref = 2. (TEV) */ |
4322 | | /* Other cases uses UEV */ |
4323 | | /*************************************************************************/ |
4324 | 0 | for(i = 0; i < MAX_NUM_REF; i++) |
4325 | 0 | { |
4326 | 0 | ps_ctxt->au1_ref_bits_tlu_lc[0][i] = 0; |
4327 | 0 | ps_ctxt->au1_ref_bits_tlu_lc[1][i] = 0; |
4328 | 0 | } |
4329 | |
|
4330 | 0 | if(ps_ref_map->i4_num_ref == 2) |
4331 | 0 | { |
4332 | 0 | ps_ctxt->au1_ref_bits_tlu_lc[0][0] = 1; |
4333 | 0 | ps_ctxt->au1_ref_bits_tlu_lc[1][0] = 1; |
4334 | 0 | ps_ctxt->au1_ref_bits_tlu_lc[0][1] = 1; |
4335 | 0 | ps_ctxt->au1_ref_bits_tlu_lc[1][1] = 1; |
4336 | 0 | } |
4337 | 0 | else if(ps_ref_map->i4_num_ref > 2) |
4338 | 0 | { |
4339 | 0 | for(i = 0; i < ps_ref_map->i4_num_ref; i++) |
4340 | 0 | { |
4341 | 0 | S32 l0, l1; |
4342 | 0 | l0 = ps_ctxt->a_ref_idx_lc_to_l0[i]; |
4343 | 0 | l1 = ps_ctxt->a_ref_idx_lc_to_l1[i]; |
4344 | 0 | ps_ctxt->au1_ref_bits_tlu_lc[0][i] = gau1_ref_bits[l0]; |
4345 | 0 | ps_ctxt->au1_ref_bits_tlu_lc[1][i] = gau1_ref_bits[l1]; |
4346 | 0 | } |
4347 | 0 | } |
4348 | | |
4349 | | /*************************************************************************/ |
4350 | | /* Preparation of the scaling factors for reference indices. The scale */ |
4351 | | /* factor depends on distance of the two ref indices from current input */ |
4352 | | /* in terms of poc delta. */ |
4353 | | /*************************************************************************/ |
4354 | 0 | for(i = 0; i < ps_ref_map->i4_num_ref; i++) |
4355 | 0 | { |
4356 | 0 | for(j = 0; j < ps_ref_map->i4_num_ref; j++) |
4357 | 0 | { |
4358 | 0 | S16 i2_scf_q8; |
4359 | 0 | S32 poc_from, poc_to; |
4360 | |
|
4361 | 0 | poc_from = ps_ctxt->ai4_ref_idx_to_poc_lc[j]; |
4362 | 0 | poc_to = ps_ctxt->ai4_ref_idx_to_poc_lc[i]; |
4363 | |
|
4364 | 0 | i2_scf_q8 = hme_scale_for_ref_idx(ps_ctxt->i4_curr_poc, poc_from, poc_to); |
4365 | 0 | ps_ctxt->ai2_ref_scf[j + i * MAX_NUM_REF] = i2_scf_q8; |
4366 | 0 | } |
4367 | 0 | } |
4368 | | |
4369 | | /*************************************************************************/ |
4370 | | /* We store simplified look ups for inp y plane for */ |
4371 | | /* every layer and for every ref id in the layer. */ |
4372 | | /*************************************************************************/ |
4373 | 0 | for(i = 1; i < ps_ctxt->num_layers; i++) |
4374 | 0 | { |
4375 | 0 | U08 **ppu1_inp; |
4376 | |
|
4377 | 0 | ppu1_inp = &ps_ctxt->apu1_list_inp[i][0]; |
4378 | 0 | for(j = 0; j < ps_ref_map->i4_num_ref; j++) |
4379 | 0 | { |
4380 | 0 | hme_ref_desc_t *ps_ref_desc; |
4381 | 0 | hme_ref_buf_info_t *ps_buf_info; |
4382 | 0 | layer_ctxt_t *ps_layer; |
4383 | 0 | S32 ref_id_lc; |
4384 | |
|
4385 | 0 | ps_ref_desc = &ps_ref_map->as_ref_desc[j]; |
4386 | 0 | ps_buf_info = &ps_ref_desc->as_ref_info[i]; |
4387 | 0 | ref_id_lc = ps_ref_desc->i1_ref_id_lc; |
4388 | |
|
4389 | 0 | desc_idx = ps_ctxt->a_ref_to_descr_id[ref_id_lc]; |
4390 | 0 | ps_layer = ps_ctxt->as_ref_descr[desc_idx].aps_layers[i]; |
4391 | |
|
4392 | 0 | ppu1_inp[j] = ps_layer->pu1_inp; |
4393 | 0 | } |
4394 | 0 | } |
4395 | | /*************************************************************************/ |
4396 | | /* The mv range for each layer is computed. For dyadic layers it will */ |
4397 | | /* keep shrinking by 2, for non dyadic it will shrink by ratio of wd and */ |
4398 | | /* ht. In general formula used is scale by ratio of wd for x and ht for y*/ |
4399 | | /*************************************************************************/ |
4400 | | |
4401 | | /* set to layer 0 search range params */ |
4402 | 0 | i2_max_x = ps_frm_prms->i2_mv_range_x; |
4403 | 0 | i2_max_y = ps_frm_prms->i2_mv_range_y; |
4404 | |
|
4405 | 0 | for(i = 1; i < ps_ctxt->num_layers; i++) |
4406 | 0 | { |
4407 | 0 | layer_ctxt_t *ps_layer_ctxt; |
4408 | |
|
4409 | 0 | { |
4410 | 0 | i2_max_x = (S16)FLOOR8(((i2_max_x * ps_ctxt->a_wd[i]) / ps_ctxt->a_wd[i - 1])); |
4411 | 0 | i2_max_y = (S16)FLOOR8(((i2_max_y * ps_ctxt->a_ht[i]) / ps_ctxt->a_ht[i - 1])); |
4412 | 0 | } |
4413 | 0 | ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i]; |
4414 | 0 | ps_layer_ctxt->i2_max_mv_x = i2_max_x; |
4415 | 0 | ps_layer_ctxt->i2_max_mv_y = i2_max_y; |
4416 | | |
4417 | | /*********************************************************************/ |
4418 | | /* Every layer maintains a reference id lc to POC mapping. This is */ |
4419 | | /* because the mapping is unique for every frm. Also, in next frm, */ |
4420 | | /* we require colocated mvs which means scaling according to temporal*/ |
4421 | | /*distance. Hence this mapping needs to be maintained in every */ |
4422 | | /* layer ctxt */ |
4423 | | /*********************************************************************/ |
4424 | 0 | memset(ps_layer_ctxt->ai4_ref_id_to_poc_lc, -1, sizeof(S32) * ps_ctxt->max_num_ref); |
4425 | 0 | if(ps_ref_map->i4_num_ref) |
4426 | 0 | { |
4427 | 0 | memcpy( |
4428 | 0 | ps_layer_ctxt->ai4_ref_id_to_poc_lc, |
4429 | 0 | ps_ctxt->ai4_ref_idx_to_poc_lc, |
4430 | 0 | ps_ref_map->i4_num_ref * sizeof(S32)); |
4431 | 0 | memcpy( |
4432 | 0 | ps_layer_ctxt->ai4_ref_id_to_disp_num, |
4433 | 0 | ps_ctxt->ai4_ref_idx_to_disp_num, |
4434 | 0 | ps_ref_map->i4_num_ref * sizeof(S32)); |
4435 | 0 | } |
4436 | 0 | } |
4437 | |
|
4438 | 0 | return; |
4439 | 0 | } |
4440 | | |
4441 | | /** |
4442 | | ******************************************************************************** |
4443 | | * @fn hme_process_frm |
4444 | | * |
4445 | | * @brief HME frame level processing function |
4446 | | * |
4447 | | * @param[in] pv_me_ctxt : ME ctxt pointer |
4448 | | * |
4449 | | * @param[in] ps_ref_map : Reference map prms pointer |
4450 | | * |
4451 | | * @param[in] ppd_intra_costs : pointer to array of intra cost cost buffers for each layer |
4452 | | * |
4453 | | * @param[in] ps_frm_prms : pointer to Frame level parameters of HME |
4454 | | * |
4455 | | * @param[in] pf_ext_update_fxn : function pointer to update CTb results |
4456 | | * |
4457 | | * @param[in] pf_get_intra_cu_and_cost :function pointer to get intra cu size and cost |
4458 | | * |
4459 | | * @param[in] ps_multi_thrd_ctxt :function pointer to get intra cu size and cost |
4460 | | * |
4461 | | * @return Scale factor in Q8 format |
4462 | | ******************************************************************************** |
4463 | | */ |
4464 | | |
4465 | | void hme_process_frm( |
4466 | | void *pv_me_ctxt, |
4467 | | pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input, |
4468 | | hme_ref_map_t *ps_ref_map, |
4469 | | double **ppd_intra_costs, |
4470 | | hme_frm_prms_t *ps_frm_prms, |
4471 | | PF_EXT_UPDATE_FXN_T pf_ext_update_fxn, |
4472 | | void *pv_coarse_layer, |
4473 | | void *pv_multi_thrd_ctxt, |
4474 | | S32 i4_frame_parallelism_level, |
4475 | | S32 thrd_id, |
4476 | | S32 i4_me_frm_id) |
4477 | 0 | { |
4478 | 0 | refine_prms_t s_refine_prms; |
4479 | 0 | me_ctxt_t *ps_thrd_ctxt = (me_ctxt_t *)pv_me_ctxt; |
4480 | 0 | me_frm_ctxt_t *ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id]; |
4481 | |
|
4482 | 0 | S32 lyr_job_type; |
4483 | 0 | multi_thrd_ctxt_t *ps_multi_thrd_ctxt; |
4484 | 0 | layer_ctxt_t *ps_coarse_layer = (layer_ctxt_t *)pv_coarse_layer; |
4485 | |
|
4486 | 0 | ps_multi_thrd_ctxt = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt; |
4487 | |
|
4488 | 0 | lyr_job_type = ME_JOB_ENC_LYR; |
4489 | | /*************************************************************************/ |
4490 | | /* Final L0 layer ME call */ |
4491 | | /*************************************************************************/ |
4492 | 0 | { |
4493 | | /* Set the CTB attributes dependin on corner/rt edge/bot edge/center*/ |
4494 | 0 | hme_set_ctb_attrs(ps_ctxt->as_ctb_bound_attrs, ps_ctxt->i4_wd, ps_ctxt->i4_ht); |
4495 | |
|
4496 | 0 | hme_set_refine_prms( |
4497 | 0 | &s_refine_prms, |
4498 | 0 | ps_ctxt->u1_encode[0], |
4499 | 0 | ps_ref_map->i4_num_ref, |
4500 | 0 | 0, |
4501 | 0 | ps_ctxt->num_layers, |
4502 | 0 | ps_ctxt->num_layers_explicit_search, |
4503 | 0 | ps_thrd_ctxt->s_init_prms.use_4x4, |
4504 | 0 | ps_frm_prms, |
4505 | 0 | ppd_intra_costs, |
4506 | 0 | &ps_thrd_ctxt->s_init_prms.s_me_coding_tools); |
4507 | |
|
4508 | 0 | hme_refine( |
4509 | 0 | ps_thrd_ctxt, |
4510 | 0 | &s_refine_prms, |
4511 | 0 | pf_ext_update_fxn, |
4512 | 0 | ps_coarse_layer, |
4513 | 0 | ps_multi_thrd_ctxt, |
4514 | 0 | lyr_job_type, |
4515 | 0 | thrd_id, |
4516 | 0 | i4_me_frm_id, |
4517 | 0 | ps_l0_ipe_input); |
4518 | | |
4519 | | /* Set current ref pic status which will used as perv frame ref pic */ |
4520 | 0 | if(i4_frame_parallelism_level) |
4521 | 0 | { |
4522 | 0 | ps_ctxt->i4_is_prev_frame_reference = 0; |
4523 | 0 | } |
4524 | 0 | else |
4525 | 0 | { |
4526 | 0 | ps_ctxt->i4_is_prev_frame_reference = |
4527 | 0 | ps_multi_thrd_ctxt->aps_cur_inp_me_prms[i4_me_frm_id] |
4528 | 0 | ->ps_curr_inp->s_lap_out.i4_is_ref_pic; |
4529 | 0 | } |
4530 | 0 | } |
4531 | |
|
4532 | 0 | return; |
4533 | 0 | } |
4534 | | |
4535 | | /** |
4536 | | ******************************************************************************** |
4537 | | * @fn hme_coarse_process_frm |
4538 | | * |
4539 | | * @brief HME frame level processing function (coarse + refine) |
4540 | | * |
4541 | | * @param[in] pv_me_ctxt : ME ctxt pointer |
4542 | | * |
4543 | | * @param[in] ps_ref_map : Reference map prms pointer |
4544 | | * |
4545 | | * @param[in] ps_frm_prms : pointer to Frame level parameters of HME |
4546 | | * |
4547 | | * @param[in] ps_multi_thrd_ctxt :Multi thread related ctxt |
4548 | | * |
4549 | | * @return Scale factor in Q8 format |
4550 | | ******************************************************************************** |
4551 | | */ |
4552 | | |
4553 | | void hme_coarse_process_frm( |
4554 | | void *pv_me_ctxt, |
4555 | | hme_ref_map_t *ps_ref_map, |
4556 | | hme_frm_prms_t *ps_frm_prms, |
4557 | | void *pv_multi_thrd_ctxt, |
4558 | | WORD32 i4_ping_pong, |
4559 | | void **ppv_dep_mngr_hme_sync) |
4560 | 0 | { |
4561 | 0 | S16 i2_max; |
4562 | 0 | S32 layer_id; |
4563 | 0 | coarse_prms_t s_coarse_prms; |
4564 | 0 | refine_prms_t s_refine_prms; |
4565 | 0 | coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt; |
4566 | 0 | S32 lyr_job_type; |
4567 | 0 | multi_thrd_ctxt_t *ps_multi_thrd_ctxt; |
4568 | |
|
4569 | 0 | ps_multi_thrd_ctxt = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt; |
4570 | | /*************************************************************************/ |
4571 | | /* Fire processing of all layers, starting with coarsest layer. */ |
4572 | | /*************************************************************************/ |
4573 | 0 | layer_id = ps_ctxt->num_layers - 1; |
4574 | 0 | i2_max = ps_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_x; |
4575 | 0 | i2_max = MAX(i2_max, ps_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_y); |
4576 | 0 | s_coarse_prms.i4_layer_id = layer_id; |
4577 | 0 | { |
4578 | 0 | S32 log_start_step; |
4579 | | /* Based on Preset, set the starting step size for Refinement */ |
4580 | 0 | if(ME_MEDIUM_SPEED > ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets) |
4581 | 0 | { |
4582 | 0 | log_start_step = 0; |
4583 | 0 | } |
4584 | 0 | else |
4585 | 0 | { |
4586 | 0 | log_start_step = 1; |
4587 | 0 | } |
4588 | |
|
4589 | 0 | s_coarse_prms.i4_max_iters = i2_max >> log_start_step; |
4590 | 0 | s_coarse_prms.i4_start_step = 1 << log_start_step; |
4591 | 0 | } |
4592 | 0 | s_coarse_prms.i4_num_ref = ps_ref_map->i4_num_ref; |
4593 | 0 | s_coarse_prms.do_full_search = 1; |
4594 | 0 | if(s_coarse_prms.do_full_search) |
4595 | 0 | { |
4596 | | /* Set to 2 or 4 */ |
4597 | 0 | if(ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets < ME_MEDIUM_SPEED) |
4598 | 0 | s_coarse_prms.full_search_step = HME_COARSE_STEP_SIZE_HIGH_QUALITY; |
4599 | 0 | else if(ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets >= ME_MEDIUM_SPEED) |
4600 | 0 | s_coarse_prms.full_search_step = HME_COARSE_STEP_SIZE_HIGH_SPEED; |
4601 | 0 | } |
4602 | 0 | s_coarse_prms.num_results = ps_ctxt->max_num_results_coarse; |
4603 | | |
4604 | | /* Coarse layer uses only 1 lambda, i.e. the one for open loop ME */ |
4605 | 0 | s_coarse_prms.lambda = ps_frm_prms->i4_ol_sad_lambda_qf; |
4606 | 0 | s_coarse_prms.lambda_q_shift = ps_frm_prms->lambda_q_shift; |
4607 | 0 | s_coarse_prms.lambda = ((float)s_coarse_prms.lambda * (100.0 - ME_LAMBDA_DISCOUNT) / 100.0); |
4608 | |
|
4609 | 0 | hme_coarsest(ps_ctxt, &s_coarse_prms, ps_multi_thrd_ctxt, i4_ping_pong, ppv_dep_mngr_hme_sync); |
4610 | | |
4611 | | /* all refinement layer processed in the loop below */ |
4612 | 0 | layer_id--; |
4613 | 0 | lyr_job_type = ps_multi_thrd_ctxt->i4_me_coarsest_lyr_type + 1; |
4614 | | |
4615 | | /*************************************************************************/ |
4616 | | /* This loop will run for all refine layers (non- encode layers) */ |
4617 | | /*************************************************************************/ |
4618 | 0 | while(layer_id > 0) |
4619 | 0 | { |
4620 | 0 | hme_set_refine_prms( |
4621 | 0 | &s_refine_prms, |
4622 | 0 | ps_ctxt->u1_encode[layer_id], |
4623 | 0 | ps_ref_map->i4_num_ref, |
4624 | 0 | layer_id, |
4625 | 0 | ps_ctxt->num_layers, |
4626 | 0 | ps_ctxt->num_layers_explicit_search, |
4627 | 0 | ps_ctxt->s_init_prms.use_4x4, |
4628 | 0 | ps_frm_prms, |
4629 | 0 | NULL, |
4630 | 0 | &ps_ctxt->s_init_prms.s_me_coding_tools); |
4631 | |
|
4632 | 0 | hme_refine_no_encode( |
4633 | 0 | ps_ctxt, |
4634 | 0 | &s_refine_prms, |
4635 | 0 | ps_multi_thrd_ctxt, |
4636 | 0 | lyr_job_type, |
4637 | 0 | i4_ping_pong, |
4638 | 0 | ppv_dep_mngr_hme_sync); |
4639 | |
|
4640 | 0 | layer_id--; |
4641 | 0 | lyr_job_type++; |
4642 | 0 | } |
4643 | 0 | } |
4644 | | /** |
4645 | | ******************************************************************************** |
4646 | | * @fn hme_fill_neighbour_mvs |
4647 | | * |
4648 | | * @brief HME neighbour MV population function |
4649 | | * |
4650 | | * @param[in] pps_mv_grid : MV grid array pointer |
4651 | | * |
4652 | | * @param[in] i4_ctb_x : CTB pos X |
4653 | | |
4654 | | * @param[in] i4_ctb_y : CTB pos Y |
4655 | | * |
4656 | | * @remarks : Needs to be populated for proper implementation of cost fxn |
4657 | | * |
4658 | | * @return Scale factor in Q8 format |
4659 | | ******************************************************************************** |
4660 | | */ |
4661 | | void hme_fill_neighbour_mvs( |
4662 | | mv_grid_t **pps_mv_grid, S32 i4_ctb_x, S32 i4_ctb_y, S32 i4_num_ref, void *pv_ctxt) |
4663 | 0 | { |
4664 | | /* TODO : Needs to be populated for proper implementation of cost fxn */ |
4665 | 0 | ARG_NOT_USED(pps_mv_grid); |
4666 | 0 | ARG_NOT_USED(i4_ctb_x); |
4667 | 0 | ARG_NOT_USED(i4_ctb_y); |
4668 | 0 | ARG_NOT_USED(i4_num_ref); |
4669 | 0 | ARG_NOT_USED(pv_ctxt); |
4670 | 0 | } |
4671 | | |
4672 | | /** |
4673 | | ******************************************************************************* |
4674 | | * @fn void hme_get_active_pocs_list(void *pv_me_ctxt, |
4675 | | * S32 *p_pocs_buffered_in_me) |
4676 | | * |
4677 | | * @brief Returns the list of active POCs in ME ctxt |
4678 | | * |
4679 | | * @param[in] pv_me_ctxt : handle to ME context |
4680 | | * |
4681 | | * @param[out] p_pocs_buffered_in_me : pointer to an array which this fxn |
4682 | | * populates with pocs active |
4683 | | * |
4684 | | * @return void |
4685 | | ******************************************************************************* |
4686 | | */ |
4687 | | WORD32 hme_get_active_pocs_list(void *pv_me_ctxt, S32 i4_num_me_frm_pllel) |
4688 | 0 | { |
4689 | 0 | me_ctxt_t *ps_ctxt = (me_ctxt_t *)pv_me_ctxt; |
4690 | 0 | S32 i, count = 0; |
4691 | |
|
4692 | 0 | for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++) |
4693 | 0 | { |
4694 | 0 | S32 poc = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc; |
4695 | 0 | S32 i4_is_free = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_free; |
4696 | |
|
4697 | 0 | if((i4_is_free == 0) && (poc != INVALID_POC)) |
4698 | 0 | { |
4699 | 0 | count++; |
4700 | 0 | } |
4701 | 0 | } |
4702 | 0 | if(count == (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1) |
4703 | 0 | { |
4704 | 0 | return 1; |
4705 | 0 | } |
4706 | 0 | else |
4707 | 0 | { |
4708 | 0 | return 0; |
4709 | 0 | } |
4710 | 0 | } |
4711 | | |
4712 | | /** |
4713 | | ******************************************************************************* |
4714 | | * @fn void hme_coarse_get_active_pocs_list(void *pv_me_ctxt, |
4715 | | * S32 *p_pocs_buffered_in_me) |
4716 | | * |
4717 | | * @brief Returns the list of active POCs in ME ctxt |
4718 | | * |
4719 | | * @param[in] pv_me_ctxt : handle to ME context |
4720 | | * |
4721 | | * @param[out] p_pocs_buffered_in_me : pointer to an array which this fxn |
4722 | | * populates with pocs active |
4723 | | * |
4724 | | * @return void |
4725 | | ******************************************************************************* |
4726 | | */ |
4727 | | void hme_coarse_get_active_pocs_list(void *pv_me_ctxt, S32 *p_pocs_buffered_in_me) |
4728 | 0 | { |
4729 | 0 | coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt; |
4730 | 0 | S32 i, count = 0; |
4731 | |
|
4732 | 0 | for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++) |
4733 | 0 | { |
4734 | 0 | S32 poc = ps_ctxt->as_ref_descr[i].aps_layers[1]->i4_poc; |
4735 | |
|
4736 | 0 | if(poc != -1) |
4737 | 0 | { |
4738 | 0 | p_pocs_buffered_in_me[count] = poc; |
4739 | 0 | count++; |
4740 | 0 | } |
4741 | 0 | } |
4742 | 0 | p_pocs_buffered_in_me[count] = -1; |
4743 | 0 | } |
4744 | | |
4745 | | S32 hme_get_blk_size(S32 use_4x4, S32 layer_id, S32 n_layers, S32 encode) |
4746 | 0 | { |
4747 | | /* coarsest layer uses 4x4 blks, lowermost layer/encode layer uses 16x16 */ |
4748 | 0 | if(layer_id == n_layers - 1) |
4749 | 0 | return 4; |
4750 | 0 | else if((layer_id == 0) || (encode)) |
4751 | 0 | return 16; |
4752 | | |
4753 | | /* Intermediate non encode layers use 8 */ |
4754 | 0 | return 8; |
4755 | 0 | } |