/src/libavc/encoder/svc/isvce_ibl_eval.c
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2022 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | |
21 | | /** |
22 | | ******************************************************************************* |
23 | | * @file |
24 | | * isvce_ibl_eval.c |
25 | | * |
26 | | * @brief |
27 | | * Contains functions used for SVC intra prediction |
28 | | * |
29 | | ******************************************************************************* |
30 | | */ |
31 | | #include <math.h> |
32 | | #include <limits.h> |
33 | | #include <stdbool.h> |
34 | | |
35 | | #include "ih264_typedefs.h" |
36 | | #include "iv2.h" |
37 | | #include "isvc_macros.h" |
38 | | #include "ih264_debug.h" |
39 | | #include "ih264_padding.h" |
40 | | #include "isvce_defs.h" |
41 | | #include "isvce_ibl_private_defs.h" |
42 | | #include "isvce_ibl_eval.h" |
43 | | #include "isvce_utils.h" |
44 | | #include "isvc_intra_resample.h" |
45 | | #include "isvc_defs.h" |
46 | | |
47 | | static FORCEINLINE WORD32 isvce_get_num_mb_states(UWORD32 u4_wd, UWORD32 u4_ht) |
48 | 123k | { |
49 | 123k | return (u4_wd / MB_SIZE) * (u4_ht / MB_SIZE); |
50 | 123k | } |
51 | | |
52 | | static FORCEINLINE WORD32 isvce_get_phase_array_size(DOUBLE d_spatial_res_ratio, bool b_is_chroma) |
53 | 89.6k | { |
54 | 89.6k | return (2 == d_spatial_res_ratio) ? (b_is_chroma ? 3 : 0) : 5; |
55 | 89.6k | } |
56 | | |
57 | | /** |
58 | | ******************************************************************************* |
59 | | * |
60 | | * @brief |
61 | | * Returns size of buffers for storing residual pred ctxt |
62 | | * |
63 | | * @param[in] u1_num_spatial_layers |
64 | | * Num Spatial Layers |
65 | | * |
66 | | * @param[in] d_spatial_res_ratio |
67 | | * Resolution Ratio b/w spatial layers |
68 | | * |
69 | | * @param[in] u4_wd |
70 | | * Input Width |
71 | | * |
72 | | * @param[in] u4_ht |
73 | | * Input Height |
74 | | * |
75 | | * @returns Size of buffers |
76 | | * |
77 | | ******************************************************************************* |
78 | | */ |
79 | | UWORD32 isvce_get_svc_intra_pred_ctxt_size(UWORD8 u1_num_spatial_layers, DOUBLE d_spatial_res_ratio, |
80 | | UWORD32 u4_wd, UWORD32 u4_ht) |
81 | 15.6k | { |
82 | 15.6k | WORD32 i, j; |
83 | | |
84 | 15.6k | UWORD32 u4_size = 0; |
85 | | |
86 | 15.6k | if(u1_num_spatial_layers > 1) |
87 | 12.9k | { |
88 | 12.9k | u4_size += MAX_PROCESS_CTXT * sizeof(svc_intra_pred_ctxt_t); |
89 | 12.9k | u4_size += MAX_PROCESS_CTXT * sizeof(intra_pred_state_t); |
90 | 12.9k | u4_size += MAX_PROCESS_CTXT * u1_num_spatial_layers * sizeof(intra_pred_layer_state_t); |
91 | | |
92 | 46.5k | for(i = u1_num_spatial_layers - 1; i >= 0; i--) |
93 | 33.6k | { |
94 | 33.6k | WORD32 i4_layer_luma_wd = |
95 | 33.6k | (WORD32) ((DOUBLE) u4_wd / |
96 | 33.6k | pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + |
97 | 33.6k | 0.99; |
98 | 33.6k | WORD32 i4_layer_luma_ht = |
99 | 33.6k | ((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + 0.99; |
100 | 33.6k | WORD32 i4_layer_wd_mbs = i4_layer_luma_wd / MB_SIZE; |
101 | 33.6k | WORD32 i4_layer_ht_mbs = i4_layer_luma_ht / MB_SIZE; |
102 | | /*Add PAD Mbs */ |
103 | 33.6k | WORD32 i4_layer_luma_mbs = |
104 | 33.6k | ((i4_layer_luma_wd / MB_SIZE) + 2) * ((i4_layer_luma_ht / MB_SIZE) + 2); |
105 | 33.6k | WORD32 i4_num_mb_states = isvce_get_num_mb_states(i4_layer_luma_wd, i4_layer_luma_ht); |
106 | | |
107 | 100k | for(j = 0; j < NUM_SP_COMPONENTS; j++) |
108 | 67.2k | { |
109 | 67.2k | bool b_is_chroma = ((COMPONENT_TYPE) j) != Y; |
110 | | |
111 | 67.2k | u4_size += i4_num_mb_states * sizeof(intra_pred_mb_state_t); |
112 | | |
113 | | /* pi4_ref_array_positions_x */ |
114 | 67.2k | u4_size += MAX_REF_ARR_WD_HT * i4_layer_wd_mbs * sizeof(WORD32); |
115 | | |
116 | | /* pi4_ref_array_positions_y */ |
117 | 67.2k | u4_size += (i4_layer_ht_mbs >> b_is_chroma) * i4_layer_ht_mbs * sizeof(WORD32); |
118 | | |
119 | | /* ps_ref_array_phases */ |
120 | 67.2k | u4_size += isvce_get_phase_array_size(d_spatial_res_ratio, b_is_chroma) * |
121 | 67.2k | sizeof(coordinates_t); |
122 | 67.2k | } |
123 | | |
124 | | /* pi1_mb_mode */ |
125 | 33.6k | u4_size += i4_layer_luma_mbs * sizeof(WORD8); |
126 | | |
127 | | /* pu1_refarray_buffer */ |
128 | 33.6k | u4_size += MAX_PROCESS_CTXT * TEMP_BUF_SIZE_LUMA * sizeof(UWORD8); |
129 | | |
130 | | /* pu1_refarray_cb, pu1_refarray_cr */ |
131 | 33.6k | u4_size += MAX_PROCESS_CTXT * (TEMP_BUF_SIZE_CB + TEMP_BUF_SIZE_CR) * sizeof(UWORD8); |
132 | | |
133 | | /* pi4_temp_interpolation_buffer */ |
134 | 33.6k | u4_size += MAX_PROCESS_CTXT * TEMP_INTERPOLATION_BUF_SIZE * sizeof(WORD32); |
135 | 33.6k | } |
136 | | |
137 | | /* intra_pred_outputs_t.s_pred_buf */ |
138 | 12.9k | u4_size += MAX_PROCESS_CTXT * MB_SIZE * MB_SIZE * sizeof(UWORD8); |
139 | | |
140 | 12.9k | u4_size += MAX_PROCESS_CTXT * MB_SIZE * MB_SIZE * sizeof(UWORD8); |
141 | 12.9k | } |
142 | | |
143 | 15.6k | return u4_size; |
144 | 15.6k | } |
145 | | |
146 | | static FORCEINLINE WORD32 isvce_get_scaled_pixel_pos(layer_resampler_props_t *ps_layer_props, |
147 | | WORD32 i4_pixel_pos, UWORD8 u1_dim_id) |
148 | 60.7M | { |
149 | 60.7M | if(1 == u1_dim_id) |
150 | 28.6M | { |
151 | 28.6M | return (((i4_pixel_pos - ps_layer_props->i4_offset_y) * |
152 | 28.6M | ((WORD64) ps_layer_props->u4_scale_y) + |
153 | 28.6M | ps_layer_props->i4_add_y) >> |
154 | 28.6M | (ps_layer_props->u4_shift_y - 4)) - |
155 | 28.6M | ps_layer_props->i4_delta_y; |
156 | 28.6M | } |
157 | 32.0M | else |
158 | 32.0M | { |
159 | 32.0M | return (((i4_pixel_pos - ps_layer_props->i4_offset_x) * |
160 | 32.0M | ((WORD64) ps_layer_props->u4_scale_x) + |
161 | 32.0M | ps_layer_props->i4_add_x) >> |
162 | 32.0M | (ps_layer_props->u4_shift_x - 4)) - |
163 | 32.0M | ps_layer_props->i4_delta_x; |
164 | 32.0M | } |
165 | 60.7M | } |
166 | | |
167 | | static FORCEINLINE void isvce_ref_array_pos_init( |
168 | | layer_resampler_props_t *ps_layer_props, intra_pred_mb_state_t *ps_mb_state, |
169 | | coordinates_t *ps_mb_pos, DOUBLE d_spatial_res_ratio, UWORD8 u1_frame_mbs_only_flag, |
170 | | UWORD8 u1_field_mb_flag, UWORD8 u1_ref_layer_frame_mbs_only_flag) |
171 | 130k | { |
172 | 130k | if(1.5 == d_spatial_res_ratio) |
173 | 125k | { |
174 | 125k | UWORD32 i; |
175 | | |
176 | 125k | WORD32 *pi4_ref_array_positions_x = ps_mb_state->pi4_ref_array_positions_x; |
177 | 125k | WORD32 *pi4_ref_array_positions_y = ps_mb_state->pi4_ref_array_positions_y; |
178 | 125k | WORD32 i4_x_offset = ps_mb_state->s_offsets.i4_abscissa; |
179 | 125k | WORD32 i4_y_offset = ps_mb_state->s_offsets.i4_ordinate; |
180 | | |
181 | 125k | if(0 == ps_mb_pos->i4_abscissa) |
182 | 49.3k | { |
183 | 641k | for(i = 0; i < ps_layer_props->u4_mb_ht; i++) |
184 | 592k | { |
185 | 592k | WORD32 i4_y_ref16; |
186 | | |
187 | 592k | WORD32 i4_yc = ps_mb_pos->i4_ordinate * ps_layer_props->u4_mb_ht + i; |
188 | | |
189 | 592k | if((0 == u1_frame_mbs_only_flag) || (0 == u1_ref_layer_frame_mbs_only_flag)) |
190 | 0 | { |
191 | 0 | i4_yc = i4_yc >> (1 - u1_field_mb_flag); |
192 | 0 | } |
193 | | |
194 | 592k | i4_y_ref16 = isvce_get_scaled_pixel_pos(ps_layer_props, i4_yc, 1); |
195 | | |
196 | 592k | pi4_ref_array_positions_y[i] = (i4_y_ref16 >> 4) - i4_y_offset; |
197 | 592k | } |
198 | 49.3k | } |
199 | | |
200 | 125k | if(0 == ps_mb_pos->i4_ordinate) |
201 | 78.4k | { |
202 | 3.84M | for(i = 0; i < MAX_REF_ARR_WD_HT; i++) |
203 | 3.76M | { |
204 | 3.76M | WORD32 i4_x_ref16; |
205 | | |
206 | 3.76M | WORD32 i4_xc = ps_mb_pos->i4_abscissa * ps_layer_props->u4_mb_wd + i; |
207 | | |
208 | 3.76M | i4_x_ref16 = isvce_get_scaled_pixel_pos(ps_layer_props, i4_xc, 0); |
209 | | |
210 | 3.76M | pi4_ref_array_positions_x[i] = (i4_x_ref16 >> 4) - i4_x_offset; |
211 | 3.76M | } |
212 | 78.4k | } |
213 | 125k | } |
214 | 130k | } |
215 | | |
216 | | static FORCEINLINE void isvce_ref_array_phase_init( |
217 | | layer_resampler_props_t *ps_layer_props, intra_pred_mb_state_t *ps_mb_state, |
218 | | coordinates_t *ps_mb_pos, DOUBLE d_spatial_res_ratio, UWORD8 u1_frame_mbs_only_flag, |
219 | | UWORD8 u1_field_mb_flag, UWORD8 u1_ref_layer_frame_mbs_only_flag) |
220 | 130k | { |
221 | 130k | UWORD32 i, j; |
222 | | |
223 | 130k | coordinates_t *ps_ref_array_phases = ps_mb_state->ps_ref_array_phases; |
224 | | |
225 | 130k | WORD32 i4_x_offset = ps_mb_state->s_offsets.i4_abscissa; |
226 | 130k | WORD32 i4_y_offset = ps_mb_state->s_offsets.i4_ordinate; |
227 | 130k | UWORD32 u4_phase_array_idx = 0; |
228 | | |
229 | 130k | if(1.5 == d_spatial_res_ratio) |
230 | 125k | { |
231 | 500k | for(i = 0; i < 3; i++) |
232 | 375k | { |
233 | 375k | WORD32 i4_y_ref16; |
234 | | |
235 | 375k | WORD32 i4_yc = ps_mb_pos->i4_ordinate * ps_layer_props->u4_mb_ht + i; |
236 | | |
237 | 375k | if((0 == u1_frame_mbs_only_flag) || (0 == u1_ref_layer_frame_mbs_only_flag)) |
238 | 0 | { |
239 | 0 | i4_yc = i4_yc >> (1 - u1_field_mb_flag); |
240 | 0 | } |
241 | | |
242 | 375k | i4_y_ref16 = isvce_get_scaled_pixel_pos(ps_layer_props, i4_yc, 1); |
243 | | |
244 | 1.00M | for(j = 0; j < ((0 == i) ? 3 : 1); j++) |
245 | 625k | { |
246 | 625k | WORD32 i4_x_ref16; |
247 | | |
248 | 625k | WORD32 i4_xc = ps_mb_pos->i4_abscissa * ps_layer_props->u4_mb_wd + j; |
249 | | |
250 | 625k | i4_x_ref16 = isvce_get_scaled_pixel_pos(ps_layer_props, i4_xc, 0); |
251 | | |
252 | 625k | ps_ref_array_phases[u4_phase_array_idx].i4_abscissa = i4_x_ref16 & 15; |
253 | 625k | ps_ref_array_phases[u4_phase_array_idx].i4_ordinate = i4_y_ref16 & 15; |
254 | | |
255 | 625k | u4_phase_array_idx++; |
256 | 625k | } |
257 | 375k | } |
258 | 125k | } |
259 | 5.53k | else |
260 | 5.53k | { |
261 | 16.5k | for(i = 0; i < 2; i++) |
262 | 11.0k | { |
263 | 11.0k | WORD32 i4_y_ref16; |
264 | | |
265 | 11.0k | WORD32 i4_yc = ps_mb_pos->i4_ordinate * ps_layer_props->u4_mb_ht + i; |
266 | | |
267 | 11.0k | if((0 == u1_frame_mbs_only_flag) || (0 == u1_ref_layer_frame_mbs_only_flag)) |
268 | 0 | { |
269 | 0 | i4_yc = i4_yc >> (1 - u1_field_mb_flag); |
270 | 0 | } |
271 | | |
272 | 11.0k | i4_y_ref16 = isvce_get_scaled_pixel_pos(ps_layer_props, i4_yc, 1); |
273 | | |
274 | 27.6k | for(j = 0; j < ((0 == i) ? 2 : 1); j++) |
275 | 16.5k | { |
276 | 16.5k | WORD32 i4_x_ref16; |
277 | | |
278 | 16.5k | WORD32 i4_xc = ps_mb_pos->i4_abscissa * ps_layer_props->u4_mb_wd + j; |
279 | | |
280 | 16.5k | i4_x_ref16 = isvce_get_scaled_pixel_pos(ps_layer_props, i4_xc, 0); |
281 | | |
282 | 16.5k | ps_ref_array_phases[u4_phase_array_idx].i4_abscissa = |
283 | 16.5k | (i4_x_ref16 - (16 * i4_x_offset)) & 15; |
284 | 16.5k | ps_ref_array_phases[u4_phase_array_idx].i4_ordinate = |
285 | 16.5k | (i4_y_ref16 - (16 * i4_y_offset)) & 15; |
286 | | |
287 | 16.5k | u4_phase_array_idx++; |
288 | 16.5k | } |
289 | 11.0k | } |
290 | 5.53k | } |
291 | 130k | } |
292 | | |
293 | | static FORCEINLINE void isvce_set_mb_states(layer_resampler_props_t *ps_layer_props, |
294 | | intra_pred_mb_state_t *ps_mb_states, |
295 | | coordinates_t *ps_mb_pos, DOUBLE d_spatial_res_ratio, |
296 | | UWORD32 u4_wd_in_mbs, bool b_is_chroma) |
297 | 13.8M | { |
298 | 13.8M | WORD32 i4_x_refmin16; |
299 | 13.8M | WORD32 i4_x_refmax16; |
300 | 13.8M | WORD32 i4_y_refmin16; |
301 | 13.8M | WORD32 i4_y_refmax16; |
302 | 13.8M | WORD32 i4_x_offset, i4_y_offset; |
303 | | |
304 | 13.8M | const UWORD8 u1_frame_mbs_only_flag = 1; |
305 | 13.8M | const UWORD8 u1_ref_layer_frame_mbs_only_flag = 1; |
306 | 13.8M | const UWORD8 u1_field_mb_flag = 0; |
307 | | |
308 | 13.8M | i4_x_refmin16 = isvce_get_scaled_pixel_pos( |
309 | 13.8M | ps_layer_props, ps_mb_pos->i4_abscissa * ps_layer_props->u4_mb_wd, 0); |
310 | 13.8M | i4_x_refmax16 = isvce_get_scaled_pixel_pos( |
311 | 13.8M | ps_layer_props, |
312 | 13.8M | ps_mb_pos->i4_abscissa * ps_layer_props->u4_mb_wd + ps_layer_props->u4_mb_wd - 1, 0); |
313 | | |
314 | 13.8M | i4_y_refmin16 = isvce_get_scaled_pixel_pos( |
315 | 13.8M | ps_layer_props, ps_mb_pos->i4_ordinate * ps_layer_props->u4_mb_ht, 1); |
316 | 13.8M | i4_y_refmax16 = isvce_get_scaled_pixel_pos( |
317 | 13.8M | ps_layer_props, |
318 | 13.8M | ps_mb_pos->i4_ordinate * ps_layer_props->u4_mb_ht + ps_layer_props->u4_mb_ht - 1, 1); |
319 | | |
320 | 13.8M | i4_x_offset = (i4_x_refmin16 >> 4); |
321 | 13.8M | i4_y_offset = (i4_y_refmin16 >> 4); |
322 | | |
323 | 13.8M | ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] |
324 | 13.8M | .s_offsets.i4_abscissa = i4_x_offset; |
325 | 13.8M | ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] |
326 | 13.8M | .s_offsets.i4_ordinate = i4_y_offset; |
327 | 13.8M | ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] |
328 | 13.8M | .s_ref_array_dims.i4_abscissa = (((i4_x_refmax16 + 15) >> 8) << 4) + |
329 | 13.8M | ((WORD32) (ps_layer_props->u4_mb_wd >> 1)) - i4_x_offset + |
330 | 13.8M | 16; |
331 | 13.8M | ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] |
332 | 13.8M | .s_ref_array_dims.i4_ordinate = (((i4_y_refmax16 + 15) >> 8) << 4) + |
333 | 13.8M | ((WORD32) (ps_layer_props->u4_mb_ht >> 1)) - i4_y_offset + |
334 | 13.8M | 16; |
335 | | |
336 | 13.8M | ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] |
337 | 13.8M | .s_max_pos.i4_abscissa = ((i4_x_refmax16 + 15) >> 4) - i4_x_offset; |
338 | 13.8M | ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] |
339 | 13.8M | .s_max_pos.i4_ordinate = ((i4_y_refmax16 + 15) >> 4) - i4_y_offset; |
340 | | |
341 | 13.8M | ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] |
342 | 13.8M | .s_min_pos.i4_abscissa = (i4_x_refmin16 >> 4) - i4_x_offset; |
343 | 13.8M | ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] |
344 | 13.8M | .s_min_pos.i4_ordinate = (i4_y_refmin16 >> 4) - i4_y_offset; |
345 | | |
346 | 13.8M | if((1.5 == d_spatial_res_ratio) && |
347 | 2.36M | ((0 == ps_mb_pos->i4_abscissa) || (0 == ps_mb_pos->i4_ordinate))) |
348 | 125k | { |
349 | 125k | WORD32 i4_min, i4_max, i4_xr_index, i4_yr_index, i4_ref_array_wd, i4_ref_array_ht; |
350 | | |
351 | 125k | i4_x_offset = i4_x_offset - 2; |
352 | 125k | i4_ref_array_wd = ((i4_x_refmax16 + 15) >> 4) - (i4_x_refmin16 >> 4) + 1 + 4; |
353 | | |
354 | 125k | i4_min = i4_x_offset; |
355 | 125k | i4_xr_index = i4_min - ((i4_min / (WORD32) ps_layer_props->u4_mb_wd) * |
356 | 125k | (WORD32) ps_layer_props->u4_mb_wd); |
357 | | |
358 | 125k | if(i4_xr_index < (WORD32) (ps_layer_props->u4_mb_wd >> 1)) |
359 | 88.5k | { |
360 | 88.5k | i4_ref_array_wd = i4_ref_array_wd + (ps_layer_props->u4_mb_wd >> 1); |
361 | 88.5k | i4_x_offset = i4_x_offset - ((WORD32) (ps_layer_props->u4_mb_wd >> 1)); |
362 | 88.5k | } |
363 | | |
364 | 125k | i4_max = ((i4_x_refmax16 + 15) >> 4) + 2; |
365 | 125k | i4_xr_index = i4_max - ((i4_max / (WORD32) ps_layer_props->u4_mb_wd) * |
366 | 125k | (WORD32) ps_layer_props->u4_mb_wd); |
367 | | |
368 | 125k | if(i4_xr_index >= (WORD32) (ps_layer_props->u4_mb_wd >> 1)) |
369 | 85.9k | { |
370 | 85.9k | i4_ref_array_wd = i4_ref_array_wd + (ps_layer_props->u4_mb_wd >> 1); |
371 | 85.9k | } |
372 | | |
373 | 125k | ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] |
374 | 125k | .s_ref_array_dims.i4_abscissa = i4_ref_array_wd; |
375 | 125k | ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] |
376 | 125k | .s_offsets.i4_abscissa = i4_x_offset; |
377 | | |
378 | 125k | i4_ref_array_ht = ((i4_y_refmax16 + 15) >> 4) - (i4_y_refmin16 >> 4) + 1 + 4; |
379 | | |
380 | 125k | i4_y_offset = (i4_y_refmin16 >> 4) - 2; |
381 | | |
382 | 125k | i4_min = i4_y_offset; |
383 | | |
384 | 125k | i4_yr_index = i4_min - ((i4_min / (WORD32) ps_layer_props->u4_mb_ht) * |
385 | 125k | (WORD32) ps_layer_props->u4_mb_ht); |
386 | | |
387 | 125k | if(i4_yr_index < (WORD32) (ps_layer_props->u4_mb_ht >> 1)) |
388 | 103k | { |
389 | 103k | i4_ref_array_ht = i4_ref_array_ht + (ps_layer_props->u4_mb_ht >> 1); |
390 | 103k | i4_y_offset = i4_y_offset - ((WORD32) (ps_layer_props->u4_mb_ht >> 1)); |
391 | 103k | } |
392 | | |
393 | 125k | i4_max = ((i4_y_refmax16 + 15) >> 4) + 2; |
394 | 125k | i4_yr_index = i4_max - ((i4_max / (WORD32) ps_layer_props->u4_mb_ht) * |
395 | 125k | (WORD32) ps_layer_props->u4_mb_ht); |
396 | | |
397 | 125k | if(i4_yr_index >= (WORD32) (ps_layer_props->u4_mb_ht >> 1)) |
398 | 100k | { |
399 | 100k | i4_ref_array_ht = i4_ref_array_ht + (ps_layer_props->u4_mb_ht >> 1); |
400 | 100k | } |
401 | | |
402 | 125k | ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] |
403 | 125k | .s_ref_array_dims.i4_ordinate = i4_ref_array_ht; |
404 | 125k | ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] |
405 | 125k | .s_offsets.i4_ordinate = i4_y_offset; |
406 | | |
407 | 125k | ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] |
408 | 125k | .s_max_pos.i4_abscissa = ((i4_x_refmax16 + 15) >> 4) - i4_x_offset; |
409 | 125k | ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] |
410 | 125k | .s_max_pos.i4_ordinate = ((i4_y_refmax16 + 15) >> 4) - i4_y_offset; |
411 | | |
412 | 125k | ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] |
413 | 125k | .s_min_pos.i4_abscissa = (i4_x_refmin16 >> 4) - i4_x_offset; |
414 | 125k | ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] |
415 | 125k | .s_min_pos.i4_ordinate = (i4_y_refmin16 >> 4) - i4_y_offset; |
416 | | |
417 | 125k | isvce_ref_array_pos_init( |
418 | 125k | ps_layer_props, |
419 | 125k | &ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs], |
420 | 125k | ps_mb_pos, d_spatial_res_ratio, u1_frame_mbs_only_flag, u1_field_mb_flag, |
421 | 125k | u1_ref_layer_frame_mbs_only_flag); |
422 | | |
423 | 125k | isvce_ref_array_phase_init( |
424 | 125k | ps_layer_props, |
425 | 125k | &ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs], |
426 | 125k | ps_mb_pos, d_spatial_res_ratio, u1_frame_mbs_only_flag, u1_field_mb_flag, |
427 | 125k | u1_ref_layer_frame_mbs_only_flag); |
428 | 125k | } |
429 | 13.7M | else if((2. == d_spatial_res_ratio) && |
430 | 11.4M | ((0 == ps_mb_pos->i4_abscissa) && (0 == ps_mb_pos->i4_ordinate) && b_is_chroma)) |
431 | 5.53k | { |
432 | 5.53k | isvce_ref_array_pos_init( |
433 | 5.53k | ps_layer_props, |
434 | 5.53k | &ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs], |
435 | 5.53k | ps_mb_pos, d_spatial_res_ratio, u1_frame_mbs_only_flag, u1_field_mb_flag, |
436 | 5.53k | u1_ref_layer_frame_mbs_only_flag); |
437 | | |
438 | 5.53k | isvce_ref_array_phase_init( |
439 | 5.53k | ps_layer_props, |
440 | 5.53k | &ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs], |
441 | 5.53k | ps_mb_pos, d_spatial_res_ratio, u1_frame_mbs_only_flag, u1_field_mb_flag, |
442 | 5.53k | u1_ref_layer_frame_mbs_only_flag); |
443 | 5.53k | } |
444 | 13.8M | } |
445 | | |
446 | | static void isvce_ibl_layer_state_init(intra_pred_layer_state_t *ps_layer_state, |
447 | | DOUBLE d_spatial_res_ratio, UWORD32 u4_wd, UWORD32 u4_ht, |
448 | | UWORD8 u1_level_idc, IV_COLOR_FORMAT_T e_color_format) |
449 | 6.87k | { |
450 | 6.87k | UWORD32 i, j, k; |
451 | | |
452 | 6.87k | const UWORD8 u1_ref_layer_field_pic_flag = 0; |
453 | 6.87k | const UWORD8 u1_field_pic_flag = 0; |
454 | 6.87k | const UWORD8 u1_frame_mbs_only_flag = 1; |
455 | 6.87k | const UWORD8 u1_ref_layer_frame_mbs_only_flag = 1; |
456 | 6.87k | const UWORD8 u1_bot_field_flag = 0; |
457 | 6.87k | const WORD32 i4_scaled_ref_layer_left_offset = 0; |
458 | 6.87k | const WORD32 i4_scaled_ref_layer_top_offset = 0; |
459 | 6.87k | const WORD32 i4_ref_layer_chroma_phase_x_plus1 = 1; |
460 | 6.87k | const WORD32 i4_ref_layer_chroma_phase_y_plus1 = 1; |
461 | 6.87k | const WORD32 i4_chroma_phase_x_plus1 = 1; |
462 | 6.87k | const WORD32 i4_chroma_phase_y_plus1 = 1; |
463 | 6.87k | const WORD32 i4_sub_wd_chroma = 2; |
464 | 6.87k | const WORD32 i4_sub_ht_chroma = 2; |
465 | | |
466 | 6.87k | ASSERT((IV_YUV_420P == e_color_format) || (IV_YUV_420SP_UV == e_color_format)); |
467 | | |
468 | 6.87k | UNUSED(e_color_format); |
469 | | |
470 | 20.6k | for(i = 0; i < NUM_SP_COMPONENTS; i++) |
471 | 13.7k | { |
472 | 13.7k | intra_pred_mb_state_t *ps_mb_states; |
473 | 13.7k | layer_resampler_props_t *ps_layer_props; |
474 | | |
475 | 13.7k | UWORD32 u4_wd_in_mbs; |
476 | 13.7k | UWORD32 u4_ht_in_mbs; |
477 | | |
478 | 13.7k | UWORD8 u1_is_chroma = (Y != ((COMPONENT_TYPE) i)); |
479 | 13.7k | UWORD32 u4_ref_wd = (u4_wd / d_spatial_res_ratio); |
480 | 13.7k | UWORD32 u4_ref_ht = (u4_ht / d_spatial_res_ratio) * (1 + u1_ref_layer_field_pic_flag); |
481 | 13.7k | UWORD32 u4_scaled_wd = u4_wd; |
482 | 13.7k | UWORD32 u4_scaled_ht = u4_ht * (1 + u1_field_pic_flag); |
483 | | |
484 | 13.7k | ps_mb_states = |
485 | 13.7k | u1_is_chroma ? ps_layer_state->ps_chroma_mb_states : ps_layer_state->ps_luma_mb_states; |
486 | 13.7k | ps_layer_props = |
487 | 13.7k | u1_is_chroma ? ps_layer_state->ps_chroma_props : ps_layer_state->ps_luma_props; |
488 | | |
489 | 13.7k | u4_ref_wd = u4_ref_wd >> u1_is_chroma; |
490 | 13.7k | u4_ref_ht = u4_ref_ht >> u1_is_chroma; |
491 | 13.7k | u4_scaled_wd = u4_scaled_wd >> u1_is_chroma; |
492 | 13.7k | u4_scaled_ht = u4_scaled_ht >> u1_is_chroma; |
493 | | |
494 | 13.7k | if(u1_is_chroma) |
495 | 6.87k | { |
496 | 6.87k | ps_layer_props->i4_refphase_x = i4_ref_layer_chroma_phase_x_plus1 - 1; |
497 | 6.87k | ps_layer_props->i4_refphase_y = i4_ref_layer_chroma_phase_y_plus1 - 1; |
498 | 6.87k | ps_layer_props->i4_phase_x = i4_chroma_phase_x_plus1 - 1; |
499 | 6.87k | ps_layer_props->i4_phase_y = i4_chroma_phase_y_plus1 - 1; |
500 | 6.87k | ps_layer_props->u4_sub_wd = i4_sub_wd_chroma; |
501 | 6.87k | ps_layer_props->u4_sub_ht = i4_sub_ht_chroma; |
502 | 6.87k | ps_layer_props->u4_mb_wd = MB_SIZE >> 1; |
503 | 6.87k | ps_layer_props->u4_mb_ht = MB_SIZE >> 1; |
504 | 6.87k | } |
505 | 6.87k | else |
506 | 6.87k | { |
507 | 6.87k | ps_layer_props->i4_refphase_x = 0; |
508 | 6.87k | ps_layer_props->i4_refphase_y = 0; |
509 | 6.87k | ps_layer_props->i4_phase_x = 0; |
510 | 6.87k | ps_layer_props->i4_phase_y = 0; |
511 | 6.87k | ps_layer_props->u4_sub_wd = 1; |
512 | 6.87k | ps_layer_props->u4_sub_ht = 1; |
513 | 6.87k | ps_layer_props->u4_mb_wd = MB_SIZE; |
514 | 6.87k | ps_layer_props->u4_mb_ht = MB_SIZE; |
515 | 6.87k | } |
516 | | |
517 | 13.7k | u4_wd_in_mbs = u4_scaled_wd / ps_layer_props->u4_mb_wd; |
518 | 13.7k | u4_ht_in_mbs = u4_scaled_ht / ps_layer_props->u4_mb_ht; |
519 | | |
520 | 13.7k | if(u1_level_idc <= 30) |
521 | 8.29k | { |
522 | 8.29k | ps_layer_props->u4_shift_x = 16; |
523 | 8.29k | ps_layer_props->u4_shift_y = 16; |
524 | 8.29k | } |
525 | 5.45k | else |
526 | 5.45k | { |
527 | 5.45k | ps_layer_props->u4_shift_x = 31 - isvcd_get_ceil_log2(u4_ref_wd); |
528 | 5.45k | ps_layer_props->u4_shift_y = 31 - isvcd_get_ceil_log2(u4_ref_ht); |
529 | 5.45k | } |
530 | | |
531 | 13.7k | if((0 == u1_frame_mbs_only_flag) || (0 == u1_ref_layer_frame_mbs_only_flag)) |
532 | 0 | { |
533 | 0 | if(1 == u1_ref_layer_frame_mbs_only_flag) |
534 | 0 | { |
535 | 0 | ps_layer_props->i4_phase_y = ps_layer_props->i4_phase_y + (4 * u1_bot_field_flag) + |
536 | 0 | 3 - ps_layer_props->u4_sub_ht; |
537 | 0 | ps_layer_props->i4_refphase_y = (2 * ps_layer_props->i4_refphase_y) + 2; |
538 | 0 | } |
539 | 0 | else |
540 | 0 | { |
541 | 0 | ps_layer_props->i4_phase_y = ps_layer_props->i4_phase_y + 4 * u1_bot_field_flag; |
542 | 0 | ps_layer_props->i4_refphase_y = |
543 | 0 | ps_layer_props->i4_refphase_y + (4 * u1_bot_field_flag); |
544 | 0 | } |
545 | 0 | } |
546 | | |
547 | 13.7k | ps_layer_props->u4_scale_x = |
548 | 13.7k | ((u4_ref_wd << ps_layer_props->u4_shift_x) + (u4_scaled_wd >> 1)) / (u4_scaled_wd); |
549 | 13.7k | ps_layer_props->u4_scale_y = |
550 | 13.7k | ((u4_ref_ht << ps_layer_props->u4_shift_y) + (u4_scaled_ht >> 1)) / (u4_scaled_ht); |
551 | | |
552 | 13.7k | ps_layer_props->i4_offset_x = i4_scaled_ref_layer_left_offset / ps_layer_props->u4_sub_wd; |
553 | 13.7k | ps_layer_props->i4_add_x = |
554 | 13.7k | (((u4_ref_wd * (2 + ps_layer_props->i4_phase_x)) << (ps_layer_props->u4_shift_x - 2)) + |
555 | 13.7k | (u4_scaled_wd >> 1)) / |
556 | 13.7k | u4_scaled_wd + |
557 | 13.7k | (1 << (ps_layer_props->u4_shift_x - 5)); |
558 | 13.7k | ps_layer_props->i4_delta_x = 4 * (2 + ps_layer_props->i4_refphase_x); |
559 | | |
560 | 13.7k | if((1 == u1_frame_mbs_only_flag) && (1 == u1_ref_layer_frame_mbs_only_flag)) |
561 | 13.7k | { |
562 | 13.7k | ps_layer_props->i4_offset_y = |
563 | 13.7k | i4_scaled_ref_layer_top_offset / ps_layer_props->u4_sub_ht; |
564 | 13.7k | ps_layer_props->i4_add_y = (((u4_ref_ht * (2 + ps_layer_props->i4_phase_y)) |
565 | 13.7k | << (ps_layer_props->u4_shift_y - 2)) + |
566 | 13.7k | (u4_scaled_ht >> 1)) / |
567 | 13.7k | u4_scaled_ht + |
568 | 13.7k | (1 << (ps_layer_props->u4_shift_y - 5)); |
569 | 13.7k | ps_layer_props->i4_delta_y = 4 * (2 + ps_layer_props->i4_refphase_y); |
570 | 13.7k | } |
571 | 0 | else |
572 | 0 | { |
573 | 0 | ps_layer_props->i4_offset_y = |
574 | 0 | i4_scaled_ref_layer_top_offset / (2 * ps_layer_props->u4_sub_ht); |
575 | 0 | ps_layer_props->i4_add_y = (((u4_ref_ht * (2 + ps_layer_props->i4_phase_y)) |
576 | 0 | << (ps_layer_props->u4_shift_y - 3)) + |
577 | 0 | (u4_scaled_ht >> 1)) / |
578 | 0 | u4_scaled_ht + |
579 | 0 | (1 << (ps_layer_props->u4_shift_y - 5)); |
580 | 0 | ps_layer_props->i4_delta_y = 2 * (2 + ps_layer_props->i4_refphase_y); |
581 | 0 | } |
582 | | |
583 | 273k | for(j = 0; j < u4_ht_in_mbs; j++) |
584 | 259k | { |
585 | 14.0M | for(k = 0; k < u4_wd_in_mbs; k++) |
586 | 13.8M | { |
587 | 13.8M | coordinates_t s_mb_pos = {k, j}; |
588 | | |
589 | 13.8M | isvce_set_mb_states(ps_layer_props, ps_mb_states, &s_mb_pos, d_spatial_res_ratio, |
590 | 13.8M | u4_wd_in_mbs, u1_is_chroma); |
591 | 13.8M | } |
592 | 259k | } |
593 | 13.7k | } |
594 | 6.87k | } |
595 | | |
596 | | /** |
597 | | ******************************************************************************* |
598 | | * |
599 | | * @brief |
600 | | * Function to initialize svc ilp buffers |
601 | | * |
602 | | * @param[in] ps_codec |
603 | | * Pointer to codec context |
604 | | * |
605 | | * @param[in] ps_mem_rec |
606 | | * Pointer to memory allocated for input buffers |
607 | | * |
608 | | ******************************************************************************* |
609 | | */ |
610 | | void isvce_intra_pred_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec) |
611 | 5.21k | { |
612 | 5.21k | intra_pred_state_t *ps_intra_pred_state; |
613 | 5.21k | svc_intra_pred_ctxt_t *ps_intra_pred_ctxt; |
614 | 5.21k | intra_pred_mb_state_t *aps_luma_mb_states[MAX_NUM_SPATIAL_LAYERS]; |
615 | 5.21k | intra_pred_mb_state_t *aps_chroma_mb_states[MAX_NUM_SPATIAL_LAYERS]; |
616 | | |
617 | 5.21k | WORD32 i, j, k, l, m; |
618 | 5.21k | WORD8 *api4_mb_modes[MAX_NUM_SPATIAL_LAYERS]; |
619 | | |
620 | 5.21k | isvce_process_ctxt_t *ps_proc = ps_codec->as_process; |
621 | | |
622 | 5.21k | const WORD32 i4_num_proc_ctxts = sizeof(ps_codec->as_process) / sizeof(ps_codec->as_process[0]); |
623 | 5.21k | DOUBLE d_spatial_res_ratio = ps_codec->s_cfg.s_svc_params.d_spatial_res_ratio; |
624 | 5.21k | UWORD8 u1_num_spatial_layers = ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; |
625 | 5.21k | UWORD32 u4_wd = ps_codec->s_cfg.u4_wd; |
626 | 5.21k | UWORD32 u4_ht = ps_codec->s_cfg.u4_ht; |
627 | 5.21k | UWORD8 *pu1_buf = ps_mem_rec->pv_base; |
628 | 5.21k | WORD64 i8_alloc_mem_size = isvce_get_svc_intra_pred_ctxt_size( |
629 | 5.21k | u1_num_spatial_layers, d_spatial_res_ratio, u4_wd, u4_ht); |
630 | | |
631 | 5.21k | if(u1_num_spatial_layers > 1) |
632 | 4.32k | { |
633 | 38.9k | for(j = 0; j < i4_num_proc_ctxts; j++) |
634 | 34.5k | { |
635 | 34.5k | ps_proc = &ps_codec->as_process[j]; |
636 | 34.5k | ps_intra_pred_ctxt = ps_proc->ps_intra_pred_ctxt = (svc_intra_pred_ctxt_t *) pu1_buf; |
637 | 34.5k | pu1_buf += sizeof(svc_intra_pred_ctxt_t); |
638 | 34.5k | i8_alloc_mem_size -= sizeof(svc_intra_pred_ctxt_t); |
639 | | |
640 | 34.5k | ps_intra_pred_ctxt->s_intra_pred_constants.pv_state = pu1_buf; |
641 | 34.5k | ps_intra_pred_state = (intra_pred_state_t *) pu1_buf; |
642 | 34.5k | pu1_buf += sizeof(intra_pred_state_t); |
643 | 34.5k | i8_alloc_mem_size -= sizeof(intra_pred_state_t); |
644 | | |
645 | 34.5k | ps_intra_pred_state->ps_layer_state = (intra_pred_layer_state_t *) pu1_buf; |
646 | 34.5k | pu1_buf += u1_num_spatial_layers * sizeof(ps_intra_pred_state->ps_layer_state[0]); |
647 | 34.5k | i8_alloc_mem_size -= |
648 | 34.5k | u1_num_spatial_layers * sizeof(ps_intra_pred_state->ps_layer_state[0]); |
649 | | |
650 | 34.5k | ASSERT(i8_alloc_mem_size >= 0); |
651 | | |
652 | 124k | for(i = u1_num_spatial_layers - 1; i >= 0; i--) |
653 | 89.5k | { |
654 | 89.5k | intra_pred_layer_state_t *ps_layer_state = &ps_intra_pred_state->ps_layer_state[i]; |
655 | | |
656 | 89.5k | WORD32 i4_layer_luma_wd = |
657 | 89.5k | ((DOUBLE) u4_wd / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + |
658 | 89.5k | 0.99; |
659 | 89.5k | WORD32 i4_layer_luma_ht = |
660 | 89.5k | ((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + |
661 | 89.5k | 0.99; |
662 | 89.5k | WORD32 i4_layer_wd_mbs = i4_layer_luma_wd / MB_SIZE; |
663 | 89.5k | WORD32 i4_layer_ht_mbs = i4_layer_luma_ht / MB_SIZE; |
664 | | /* Add PAD MBs on all directions */ |
665 | 89.5k | WORD32 i4_layer_luma_mbs = |
666 | 89.5k | ((i4_layer_luma_wd / MB_SIZE) + 2) * ((i4_layer_luma_ht / MB_SIZE) + 2); |
667 | 89.5k | WORD32 i4_num_mb_states = |
668 | 89.5k | isvce_get_num_mb_states(i4_layer_luma_wd, i4_layer_luma_ht); |
669 | | |
670 | 89.5k | if(0 == j) |
671 | 11.1k | { |
672 | 11.1k | UWORD32 au4_ref_xpos_array_size[NUM_SP_COMPONENTS]; |
673 | 11.1k | UWORD32 au4_ref_ypos_array_size[NUM_SP_COMPONENTS]; |
674 | 11.1k | UWORD32 au4_ref_phase_array_size[NUM_SP_COMPONENTS]; |
675 | | |
676 | 33.5k | for(k = 0; k < NUM_SP_COMPONENTS; k++) |
677 | 22.3k | { |
678 | 22.3k | bool b_is_chroma = ((COMPONENT_TYPE) k) != Y; |
679 | | |
680 | 22.3k | au4_ref_xpos_array_size[k] = MAX_REF_ARR_WD_HT; |
681 | 22.3k | au4_ref_ypos_array_size[k] = (i4_layer_ht_mbs >> b_is_chroma); |
682 | 22.3k | au4_ref_phase_array_size[k] = |
683 | 22.3k | isvce_get_phase_array_size(d_spatial_res_ratio, b_is_chroma); |
684 | 22.3k | } |
685 | | |
686 | 11.1k | ps_layer_state->ps_luma_mb_states = (intra_pred_mb_state_t *) pu1_buf; |
687 | 11.1k | aps_luma_mb_states[i] = ps_layer_state->ps_luma_mb_states; |
688 | 11.1k | pu1_buf += i4_num_mb_states * sizeof(ps_layer_state->ps_luma_mb_states[0]); |
689 | 11.1k | i8_alloc_mem_size -= |
690 | 11.1k | i4_num_mb_states * sizeof(ps_layer_state->ps_luma_mb_states[0]); |
691 | | |
692 | 11.1k | ps_layer_state->ps_chroma_mb_states = (intra_pred_mb_state_t *) pu1_buf; |
693 | 11.1k | aps_chroma_mb_states[i] = ps_layer_state->ps_chroma_mb_states; |
694 | 11.1k | pu1_buf += i4_num_mb_states * sizeof(ps_layer_state->ps_chroma_mb_states[0]); |
695 | 11.1k | i8_alloc_mem_size -= |
696 | 11.1k | i4_num_mb_states * sizeof(ps_layer_state->ps_chroma_mb_states[0]); |
697 | | |
698 | 11.1k | if(1.5 == d_spatial_res_ratio) |
699 | 2.68k | { |
700 | 8.05k | for(k = 0; k < NUM_SP_COMPONENTS; k++) |
701 | 5.37k | { |
702 | 5.37k | bool b_is_chroma = ((COMPONENT_TYPE) k) != Y; |
703 | | |
704 | 5.37k | WORD32 *pi4_ref_array_positions_x = (WORD32 *) pu1_buf; |
705 | 5.37k | WORD32 *pi4_ref_array_positions_y = |
706 | 5.37k | pi4_ref_array_positions_x + MAX_REF_ARR_WD_HT * i4_layer_wd_mbs; |
707 | 5.37k | coordinates_t *ps_ref_array_phases = |
708 | 5.37k | (coordinates_t *) (pi4_ref_array_positions_y + |
709 | 5.37k | (i4_layer_ht_mbs >> b_is_chroma) * |
710 | 5.37k | i4_layer_ht_mbs); |
711 | 5.37k | intra_pred_mb_state_t *ps_mb_state = |
712 | 5.37k | b_is_chroma ? ps_layer_state->ps_chroma_mb_states |
713 | 5.37k | : ps_layer_state->ps_luma_mb_states; |
714 | | |
715 | 87.6k | for(l = 0; l < i4_layer_ht_mbs; l++) |
716 | 82.2k | { |
717 | 3.49M | for(m = 0; m < i4_layer_wd_mbs; m++) |
718 | 3.41M | { |
719 | 3.41M | ps_mb_state[l * i4_layer_wd_mbs + m].pi4_ref_array_positions_x = |
720 | 3.41M | pi4_ref_array_positions_x + m * au4_ref_xpos_array_size[k]; |
721 | 3.41M | ps_mb_state[l * i4_layer_wd_mbs + m].pi4_ref_array_positions_y = |
722 | 3.41M | pi4_ref_array_positions_y + l * au4_ref_ypos_array_size[k]; |
723 | | |
724 | 3.41M | ps_mb_state[l * i4_layer_wd_mbs + m].ps_ref_array_phases = |
725 | 3.41M | ps_ref_array_phases; |
726 | 3.41M | } |
727 | 82.2k | } |
728 | | |
729 | 5.37k | pu1_buf += i4_layer_wd_mbs * au4_ref_xpos_array_size[k] * |
730 | 5.37k | sizeof(pi4_ref_array_positions_x[0]); |
731 | 5.37k | pu1_buf += i4_layer_ht_mbs * au4_ref_ypos_array_size[k] * |
732 | 5.37k | sizeof(pi4_ref_array_positions_y[0]); |
733 | 5.37k | pu1_buf += au4_ref_phase_array_size[k] * sizeof(ps_ref_array_phases[0]); |
734 | 5.37k | i8_alloc_mem_size -= i4_layer_wd_mbs * au4_ref_xpos_array_size[k] * |
735 | 5.37k | sizeof(pi4_ref_array_positions_x[0]); |
736 | 5.37k | i8_alloc_mem_size -= i4_layer_ht_mbs * au4_ref_ypos_array_size[k] * |
737 | 5.37k | sizeof(pi4_ref_array_positions_y[0]); |
738 | 5.37k | i8_alloc_mem_size -= |
739 | 5.37k | au4_ref_phase_array_size[k] * sizeof(ps_ref_array_phases[0]); |
740 | 5.37k | } |
741 | 2.68k | } |
742 | 8.51k | else |
743 | 8.51k | { |
744 | 8.51k | intra_pred_mb_state_t *ps_mb_state; |
745 | 8.51k | coordinates_t *ps_ref_array_phases; |
746 | | |
747 | 25.5k | for(k = 0; k < NUM_SP_COMPONENTS; k++) |
748 | 17.0k | { |
749 | 17.0k | bool b_is_chroma = ((COMPONENT_TYPE) k) != Y; |
750 | | |
751 | 17.0k | ps_mb_state = b_is_chroma ? ps_layer_state->ps_chroma_mb_states |
752 | 17.0k | : ps_layer_state->ps_luma_mb_states; |
753 | 17.0k | ps_ref_array_phases = b_is_chroma ? ((coordinates_t *) pu1_buf) : NULL; |
754 | | |
755 | 12.5M | for(l = 0; l < i4_num_mb_states; l++) |
756 | 12.5M | { |
757 | 12.5M | ps_mb_state[l].pi4_ref_array_positions_x = NULL; |
758 | 12.5M | ps_mb_state[l].pi4_ref_array_positions_y = NULL; |
759 | 12.5M | ps_mb_state[l].ps_ref_array_phases = ps_ref_array_phases; |
760 | 12.5M | } |
761 | 17.0k | } |
762 | | |
763 | 8.51k | pu1_buf += au4_ref_phase_array_size[U] * sizeof(ps_ref_array_phases[0]); |
764 | 8.51k | i8_alloc_mem_size -= |
765 | 8.51k | au4_ref_phase_array_size[U] * sizeof(ps_ref_array_phases[0]); |
766 | 8.51k | } |
767 | | |
768 | 11.1k | ps_layer_state->i4_mb_mode_stride = (i4_layer_luma_wd / MB_SIZE) + 2; |
769 | 11.1k | ps_layer_state->pi1_mb_mode = (WORD8 *) pu1_buf; |
770 | 11.1k | ps_layer_state->pi1_mb_mode += ps_layer_state->i4_mb_mode_stride + 1; |
771 | 11.1k | api4_mb_modes[i] = ps_layer_state->pi1_mb_mode; |
772 | 11.1k | pu1_buf += i4_layer_luma_mbs * sizeof(ps_layer_state->pi1_mb_mode[0]); |
773 | 11.1k | i8_alloc_mem_size -= |
774 | 11.1k | u1_num_spatial_layers * sizeof(ps_layer_state->pi1_mb_mode[0]); |
775 | 11.1k | memset(ps_layer_state->pi1_mb_mode, -1, i4_layer_luma_mbs); |
776 | | |
777 | 11.1k | if(i > 0) |
778 | 6.87k | { |
779 | | /* Asserts below verify that |
780 | | * 'ps_codec->s_svc_ilp_data.aps_layer_resampler_props' is initialised |
781 | | */ |
782 | 6.87k | ASSERT(ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][i].u4_mb_wd == |
783 | 6.87k | MB_SIZE); |
784 | 6.87k | ASSERT(ps_codec->s_svc_ilp_data.aps_layer_resampler_props[UV][i].u4_mb_wd == |
785 | 6.87k | (MB_SIZE / 2)); |
786 | | |
787 | 6.87k | ps_layer_state->ps_luma_props = |
788 | 6.87k | &ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][i]; |
789 | 6.87k | ps_layer_state->ps_chroma_props = |
790 | 6.87k | &ps_codec->s_svc_ilp_data.aps_layer_resampler_props[UV][i]; |
791 | | |
792 | 6.87k | isvce_ibl_layer_state_init( |
793 | 6.87k | ps_layer_state, d_spatial_res_ratio, i4_layer_luma_wd, i4_layer_luma_ht, |
794 | 6.87k | ps_codec->s_cfg.u4_max_level, ps_codec->s_cfg.e_inp_color_fmt); |
795 | 6.87k | } |
796 | 4.32k | else |
797 | 4.32k | { |
798 | 4.32k | ps_layer_state->ps_luma_props = NULL; |
799 | 4.32k | ps_layer_state->ps_chroma_props = NULL; |
800 | 4.32k | } |
801 | 11.1k | } |
802 | 78.3k | else |
803 | 78.3k | { |
804 | 78.3k | ps_layer_state->ps_luma_mb_states = aps_luma_mb_states[i]; |
805 | 78.3k | ps_layer_state->ps_chroma_mb_states = aps_chroma_mb_states[i]; |
806 | | |
807 | 78.3k | ps_layer_state->i4_mb_mode_stride = (i4_layer_luma_wd / MB_SIZE) + 2; |
808 | 78.3k | ps_layer_state->pi1_mb_mode = api4_mb_modes[i]; |
809 | | |
810 | 78.3k | if(i > 0) |
811 | 48.1k | { |
812 | 48.1k | ps_layer_state->ps_luma_props = |
813 | 48.1k | &ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][i]; |
814 | 48.1k | ps_layer_state->ps_chroma_props = |
815 | 48.1k | &ps_codec->s_svc_ilp_data.aps_layer_resampler_props[UV][i]; |
816 | 48.1k | } |
817 | 30.2k | else |
818 | 30.2k | { |
819 | 30.2k | ps_layer_state->ps_luma_props = NULL; |
820 | 30.2k | ps_layer_state->ps_chroma_props = NULL; |
821 | 30.2k | } |
822 | 78.3k | } |
823 | | |
824 | 89.5k | ps_layer_state->pu1_refarray_buffer = (UWORD8 *) pu1_buf; |
825 | 89.5k | memset(ps_layer_state->pu1_refarray_buffer, 0, TEMP_BUF_SIZE_LUMA * sizeof(UWORD8)); |
826 | 89.5k | pu1_buf += TEMP_BUF_SIZE_LUMA * sizeof(UWORD8); |
827 | 89.5k | i8_alloc_mem_size -= TEMP_BUF_SIZE_LUMA * sizeof(UWORD8); |
828 | | |
829 | 89.5k | ps_layer_state->pu1_refarray_cb = (UWORD8 *) pu1_buf; |
830 | 89.5k | memset(ps_layer_state->pu1_refarray_cb, 0, TEMP_BUF_SIZE_CB * sizeof(UWORD8)); |
831 | 89.5k | pu1_buf += TEMP_BUF_SIZE_CB * sizeof(UWORD8); |
832 | 89.5k | i8_alloc_mem_size -= TEMP_BUF_SIZE_CB * sizeof(UWORD8); |
833 | | |
834 | 89.5k | ps_layer_state->pu1_refarray_cr = (UWORD8 *) pu1_buf; |
835 | 89.5k | memset(ps_layer_state->pu1_refarray_cr, 0, TEMP_BUF_SIZE_CR * sizeof(UWORD8)); |
836 | 89.5k | pu1_buf += TEMP_BUF_SIZE_CR * sizeof(UWORD8); |
837 | 89.5k | i8_alloc_mem_size -= TEMP_BUF_SIZE_CR * sizeof(UWORD8); |
838 | | |
839 | 89.5k | ps_layer_state->pi4_temp_interpolation_buffer = (WORD32 *) pu1_buf; |
840 | 89.5k | pu1_buf += (TEMP_INTERPOLATION_BUF_SIZE * sizeof(WORD32)); |
841 | 89.5k | i8_alloc_mem_size -= (TEMP_INTERPOLATION_BUF_SIZE * sizeof(WORD32)); |
842 | | |
843 | 89.5k | ASSERT(i8_alloc_mem_size >= 0); |
844 | 89.5k | } |
845 | 34.5k | } |
846 | | |
847 | 38.9k | for(i = 0; i < i4_num_proc_ctxts; i++) |
848 | 34.5k | { |
849 | 34.5k | isvce_process_ctxt_t *ps_proc = &ps_codec->as_process[i]; |
850 | 34.5k | svc_intra_pred_ctxt_t *ps_intra_pred_ctxt = ps_proc->ps_intra_pred_ctxt; |
851 | 34.5k | yuv_buf_props_t *ps_mb_intra_pred_buf = |
852 | 34.5k | &ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf; |
853 | | |
854 | 34.5k | ps_proc->ps_mb_pred_buf = ps_mb_intra_pred_buf; |
855 | | |
856 | 103k | for(j = 0; j < NUM_SP_COMPONENTS; j++) |
857 | 69.1k | { |
858 | 69.1k | buffer_container_t *ps_comp_buf = &ps_mb_intra_pred_buf->as_component_bufs[j]; |
859 | | |
860 | 69.1k | ps_comp_buf->pv_data = pu1_buf; |
861 | 69.1k | ps_comp_buf->i4_data_stride = MB_SIZE; |
862 | 69.1k | pu1_buf += MB_SIZE * MB_SIZE * sizeof(UWORD8); |
863 | 69.1k | i8_alloc_mem_size -= MB_SIZE * MB_SIZE * sizeof(WORD8); |
864 | | |
865 | 69.1k | ASSERT(i8_alloc_mem_size >= 0); |
866 | 69.1k | } |
867 | | |
868 | 34.5k | ps_mb_intra_pred_buf->as_component_bufs[V].pv_data = NULL; |
869 | 34.5k | ps_mb_intra_pred_buf->e_color_format = IV_YUV_420SP_UV; |
870 | 34.5k | ps_mb_intra_pred_buf->u1_bit_depth = 16; |
871 | 34.5k | ps_mb_intra_pred_buf->u4_width = MB_SIZE; |
872 | 34.5k | ps_mb_intra_pred_buf->u4_height = MB_SIZE; |
873 | 34.5k | } |
874 | 4.32k | } |
875 | 889 | else |
876 | 889 | { |
877 | 8.00k | for(i = 0; i < i4_num_proc_ctxts; i++) |
878 | 7.11k | { |
879 | 7.11k | isvce_process_ctxt_t *ps_proc = &ps_codec->as_process[i]; |
880 | | |
881 | 7.11k | ps_proc->ps_intra_pred_ctxt = NULL; |
882 | 7.11k | } |
883 | 889 | } |
884 | 5.21k | } |
885 | | |
886 | | void isvce_intra_sampling_function_selector(intra_sampling_ctxt_t *ps_ctxt, |
887 | | DOUBLE d_spatial_res_ratio, IV_ARCH_T e_arch) |
888 | 11.4M | { |
889 | 11.4M | if(2. == d_spatial_res_ratio) |
890 | 11.3M | { |
891 | 11.3M | switch(e_arch) |
892 | 11.3M | { |
893 | 0 | #if defined(X86) |
894 | 3.24M | case ARCH_X86_SSE42: |
895 | 3.24M | { |
896 | 3.24M | ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id].pf_horz_interpol_chroma = |
897 | 3.24M | isvc_horz_interpol_chroma_dyadic_sse42; |
898 | 3.24M | ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id].pf_vert_interpol_chroma = |
899 | 3.24M | isvc_vert_interpol_chroma_dyadic_sse42; |
900 | 3.24M | ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id].pf_interpolate_luma = |
901 | 3.24M | isvc_interpolate_base_luma_dyadic_sse42; |
902 | | |
903 | 3.24M | break; |
904 | 0 | } |
905 | | #elif defined(ARMV8) |
906 | | case ARCH_ARM_A53: |
907 | | case ARCH_ARM_A57: |
908 | | case ARCH_ARM_V8_NEON: |
909 | | { |
910 | | ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id].pf_horz_interpol_chroma = |
911 | | isvc_horz_interpol_chroma_dyadic_neon; |
912 | | ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id].pf_vert_interpol_chroma = |
913 | | isvc_vert_interpol_chroma_dyadic_neon; |
914 | | ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id].pf_interpolate_luma = |
915 | | isvc_interpolate_base_luma_dyadic_neon; |
916 | | |
917 | | break; |
918 | | } |
919 | | #elif defined(ARM) && !defined(DISABLE_NEON) |
920 | | case ARCH_ARM_A9Q: |
921 | | case ARCH_ARM_A9A: |
922 | | case ARCH_ARM_A9: |
923 | | case ARCH_ARM_A7: |
924 | | case ARCH_ARM_A5: |
925 | | case ARCH_ARM_A15: |
926 | | { |
927 | | ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id].pf_horz_interpol_chroma = |
928 | | isvc_horz_interpol_chroma_dyadic_neon; |
929 | | ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id].pf_vert_interpol_chroma = |
930 | | isvc_vert_interpol_chroma_dyadic_neon; |
931 | | ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id].pf_interpolate_luma = |
932 | | isvc_interpolate_base_luma_dyadic_neon; |
933 | | |
934 | | break; |
935 | | } |
936 | | #endif |
937 | 8.14M | default: |
938 | 8.14M | { |
939 | 8.14M | ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id].pf_horz_interpol_chroma = |
940 | 8.14M | isvc_horz_interpol_chroma_dyadic; |
941 | 8.14M | ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id].pf_vert_interpol_chroma = |
942 | 8.14M | isvc_vert_interpol_chroma_dyadic; |
943 | 8.14M | ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id].pf_interpolate_luma = |
944 | 8.14M | isvc_interpolate_base_luma_dyadic; |
945 | | |
946 | 8.14M | break; |
947 | 0 | } |
948 | 11.3M | } |
949 | 11.3M | } |
950 | 11.4M | } |
951 | | |
952 | | static void isvce_get_mb_intra_pred(isvce_process_ctxt_t *ps_proc) |
953 | 5.73M | { |
954 | 5.73M | mem_element_t s_ref_mb_mode; |
955 | 5.73M | mem_element_t s_inp_luma; |
956 | 5.73M | mem_element_t s_inp_chroma; |
957 | 5.73M | mem_element_t s_out_luma; |
958 | 5.73M | mem_element_t s_out_chroma; |
959 | | |
960 | 5.73M | coordinates_t s_frame_dims; |
961 | 5.73M | coordinates_t s_frame_dims_in_mbs; |
962 | | |
963 | 5.73M | WORD32 i4_cur_stride; |
964 | 5.73M | WORD32 i4_ref_stride; |
965 | 5.73M | WORD32 i; |
966 | | |
967 | 5.73M | intra_sampling_ctxt_t s_intra_samp_ctxt[NUM_SP_COMPONENTS]; |
968 | 5.73M | isvce_codec_t *ps_codec = ps_proc->ps_codec; |
969 | 5.73M | svc_intra_pred_ctxt_t *ps_intra_pred_ctxt = ps_proc->ps_intra_pred_ctxt; |
970 | 5.73M | intra_pred_state_t *ps_intra_pred_state = |
971 | 5.73M | (intra_pred_state_t *) (ps_intra_pred_ctxt->s_intra_pred_constants.pv_state); |
972 | 5.73M | intra_pred_layer_state_t *ps_layer_state = |
973 | 5.73M | &ps_intra_pred_state->ps_layer_state[ps_proc->u1_spatial_layer_id]; |
974 | 5.73M | intra_pred_layer_state_t *ps_ref_layer_state = |
975 | 5.73M | &ps_intra_pred_state->ps_layer_state[ps_proc->u1_spatial_layer_id - 1]; |
976 | | |
977 | 5.73M | intra_pred_mb_state_t *ps_luma_mb_state; |
978 | 5.73M | intra_pred_mb_state_t *ps_chroma_mb_state; |
979 | | |
980 | 5.73M | coordinates_t *ps_mb_pos = &ps_intra_pred_ctxt->s_intra_pred_variables.s_mb_pos; |
981 | 5.73M | svc_ilp_data_t *ps_svc_ilp_data = ps_intra_pred_ctxt->s_intra_pred_variables.ps_svc_ilp_data; |
982 | | |
983 | 5.73M | s_frame_dims.i4_abscissa = |
984 | 5.73M | ps_svc_ilp_data->ps_intra_recon_bufs[ps_proc->u1_spatial_layer_id].u4_width; |
985 | 5.73M | s_frame_dims.i4_ordinate = |
986 | 5.73M | ps_svc_ilp_data->ps_intra_recon_bufs[ps_proc->u1_spatial_layer_id].u4_height; |
987 | 5.73M | s_frame_dims_in_mbs.i4_abscissa = s_frame_dims.i4_abscissa / MB_SIZE; |
988 | 5.73M | s_frame_dims_in_mbs.i4_ordinate = s_frame_dims.i4_ordinate / MB_SIZE; |
989 | | |
990 | 5.73M | ps_luma_mb_state = ps_layer_state->ps_luma_mb_states + ps_mb_pos->i4_abscissa + |
991 | 5.73M | ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa; |
992 | 5.73M | ps_chroma_mb_state = ps_layer_state->ps_chroma_mb_states + ps_mb_pos->i4_abscissa + |
993 | 5.73M | ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa; |
994 | | |
995 | 17.1M | for(i = 0; i < NUM_SP_COMPONENTS; i++) |
996 | 11.4M | { |
997 | 11.4M | UWORD32 u4_ref_wd, u4_ref_ht; |
998 | | |
999 | 11.4M | bool b_is_chroma = (Y != ((COMPONENT_TYPE) i)); |
1000 | 11.4M | mem_element_t *ps_buf = b_is_chroma ? &s_out_chroma : &s_out_luma; |
1001 | 11.4M | intra_pred_mb_state_t *ps_mb_state = b_is_chroma ? ps_chroma_mb_state : ps_luma_mb_state; |
1002 | 11.4M | layer_resampler_props_t *ps_layer_props = |
1003 | 11.4M | b_is_chroma ? ps_layer_state->ps_chroma_props : ps_layer_state->ps_luma_props; |
1004 | | |
1005 | 11.4M | s_intra_samp_ctxt[i].i4_res_lyr_id = ps_proc->u1_spatial_layer_id; |
1006 | | |
1007 | 11.4M | s_intra_samp_ctxt[i].i4_refarray_stride = REF_ARRAY_WIDTH; |
1008 | 11.4M | s_intra_samp_ctxt[i].i4_ref_width = |
1009 | 11.4M | ps_svc_ilp_data->ps_intra_recon_bufs[ps_proc->u1_spatial_layer_id - 1].u4_width; |
1010 | 11.4M | s_intra_samp_ctxt[i].i4_ref_height = |
1011 | 11.4M | ps_svc_ilp_data->ps_intra_recon_bufs[ps_proc->u1_spatial_layer_id - 1].u4_height; |
1012 | | |
1013 | 11.4M | isvce_intra_sampling_function_selector(&s_intra_samp_ctxt[i], |
1014 | 11.4M | ps_codec->s_cfg.s_svc_params.d_spatial_res_ratio, |
1015 | 11.4M | ps_codec->s_cfg.e_arch); |
1016 | | |
1017 | 11.4M | s_intra_samp_ctxt[i].pu1_refarray_buffer = ps_layer_state->pu1_refarray_buffer; |
1018 | 11.4M | s_intra_samp_ctxt[i].pu1_refarray_cb = ps_layer_state->pu1_refarray_cb; |
1019 | 11.4M | s_intra_samp_ctxt[i].pu1_refarray_cr = ps_layer_state->pu1_refarray_cr; |
1020 | 11.4M | s_intra_samp_ctxt[i].pi4_temp_interpolation_buffer = |
1021 | 11.4M | ps_layer_state->pi4_temp_interpolation_buffer; |
1022 | | |
1023 | 11.4M | s_intra_samp_ctxt[i].as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id].ps_mb_pos = ps_mb_pos; |
1024 | | |
1025 | | /* Phase is used only by chroma functions */ |
1026 | 11.4M | s_intra_samp_ctxt[i].as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id].i4_x_phase_0 = |
1027 | 11.4M | ps_chroma_mb_state->ps_ref_array_phases[0].i4_abscissa; |
1028 | 11.4M | s_intra_samp_ctxt[i].as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id].i4_x_phase_1 = |
1029 | 11.4M | ps_chroma_mb_state->ps_ref_array_phases[1].i4_abscissa; |
1030 | 11.4M | s_intra_samp_ctxt[i].as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id].i4_y_phase_0 = |
1031 | 11.4M | ps_chroma_mb_state->ps_ref_array_phases[0].i4_ordinate; |
1032 | 11.4M | s_intra_samp_ctxt[i].as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id].i4_y_phase_1 = |
1033 | 11.4M | ps_chroma_mb_state->ps_ref_array_phases[2].i4_ordinate; |
1034 | 11.4M | s_intra_samp_ctxt[i] |
1035 | 11.4M | .as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id] |
1036 | 11.4M | .i1_constrained_intra_rsmpl_flag = 0; |
1037 | 11.4M | s_intra_samp_ctxt[i].as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id].i4_ref_width = |
1038 | 11.4M | ps_svc_ilp_data->ps_intra_recon_bufs[ps_proc->u1_spatial_layer_id - 1].u4_width; |
1039 | 11.4M | s_intra_samp_ctxt[i].as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id].i4_ref_height = |
1040 | 11.4M | ps_svc_ilp_data->ps_intra_recon_bufs[ps_proc->u1_spatial_layer_id - 1].u4_height; |
1041 | | |
1042 | 11.4M | s_intra_samp_ctxt[i].as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id].i2_x_min_pos = |
1043 | 11.4M | ps_mb_state->s_min_pos.i4_abscissa; |
1044 | 11.4M | s_intra_samp_ctxt[i].as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id].i2_x_max_pos = |
1045 | 11.4M | ps_mb_state->s_max_pos.i4_abscissa; |
1046 | 11.4M | s_intra_samp_ctxt[i].as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id].i2_y_min_pos = |
1047 | 11.4M | ps_mb_state->s_min_pos.i4_ordinate; |
1048 | 11.4M | s_intra_samp_ctxt[i].as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id].i2_y_max_pos = |
1049 | 11.4M | ps_mb_state->s_max_pos.i4_ordinate; |
1050 | | |
1051 | 11.4M | s_intra_samp_ctxt[i].as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id].ps_phase = |
1052 | 11.4M | ps_mb_state->ps_ref_array_phases; |
1053 | | |
1054 | 11.4M | s_intra_samp_ctxt[i] |
1055 | 11.4M | .as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id] |
1056 | 11.4M | .pi4_ref_array_positions_x = ps_mb_state->pi4_ref_array_positions_x; |
1057 | 11.4M | s_intra_samp_ctxt[i] |
1058 | 11.4M | .as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id] |
1059 | 11.4M | .pi4_ref_array_positions_y = ps_mb_state->pi4_ref_array_positions_y; |
1060 | | |
1061 | 11.4M | s_intra_samp_ctxt[i].as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id].ps_offsets = |
1062 | 11.4M | &ps_mb_state->s_offsets; |
1063 | | |
1064 | 11.4M | s_intra_samp_ctxt[i].as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id].ps_ref_array_dims = |
1065 | 11.4M | &ps_mb_state->s_ref_array_dims; |
1066 | | |
1067 | 11.4M | i4_cur_stride = |
1068 | 11.4M | ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.as_component_bufs[i].i4_data_stride; |
1069 | 11.4M | ps_buf->pv_buffer = |
1070 | 11.4M | (UWORD8 *) (ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.as_component_bufs[i] |
1071 | 11.4M | .pv_data); |
1072 | | |
1073 | 11.4M | ps_buf->i4_element_size = 1; |
1074 | 11.4M | ps_buf->i4_num_element_stride = i4_cur_stride; |
1075 | | |
1076 | 11.4M | ps_buf = b_is_chroma ? &s_inp_chroma : &s_inp_luma; |
1077 | | |
1078 | 11.4M | i4_ref_stride = ps_svc_ilp_data->ps_intra_recon_bufs[ps_proc->u1_spatial_layer_id - 1] |
1079 | 11.4M | .as_component_bufs[i] |
1080 | 11.4M | .i4_data_stride; |
1081 | | |
1082 | 11.4M | u4_ref_wd = ps_svc_ilp_data->ps_intra_recon_bufs[ps_proc->u1_spatial_layer_id - 1].u4_width; |
1083 | 11.4M | u4_ref_ht = |
1084 | 11.4M | ps_svc_ilp_data->ps_intra_recon_bufs[ps_proc->u1_spatial_layer_id - 1].u4_height; |
1085 | | |
1086 | | /* For chroma, filteringModeFlag=1 */ |
1087 | | /* If filteringModeFlag=1, interpolation requires samples at an offset of -1 |
1088 | | * along both directions */ |
1089 | 11.4M | if(ps_proc->s_svc_params.d_spatial_res_ratio == 2.0) |
1090 | 11.3M | { |
1091 | 11.3M | WORD8 i1_x_odd, i1_y_odd; |
1092 | | |
1093 | 11.3M | ps_buf->pv_buffer = |
1094 | 11.3M | (UWORD8 *) ps_svc_ilp_data->ps_intra_recon_bufs[ps_proc->u1_spatial_layer_id - 1] |
1095 | 11.3M | .as_component_bufs[i] |
1096 | 11.3M | .pv_data + |
1097 | 11.3M | (ps_mb_state->s_offsets.i4_abscissa << b_is_chroma) + |
1098 | 11.3M | ps_mb_state->s_offsets.i4_ordinate * i4_ref_stride; |
1099 | | |
1100 | 11.3M | if(!b_is_chroma) |
1101 | 5.72M | { |
1102 | 5.72M | ps_buf->pv_buffer = ((UWORD8 *) ps_buf->pv_buffer) + -1 + -1 * i4_ref_stride; |
1103 | 5.72M | } |
1104 | | |
1105 | 11.3M | i1_x_odd = (ps_proc->i4_mb_x & 1); |
1106 | 11.3M | i1_y_odd = (ps_proc->i4_mb_y & 1); |
1107 | | |
1108 | 11.3M | if(i1_x_odd) |
1109 | 5.71M | { |
1110 | 5.71M | ps_buf->pv_buffer = (UWORD8 *) ps_buf->pv_buffer - 8; |
1111 | 5.71M | } |
1112 | 11.3M | if(i1_y_odd) |
1113 | 5.73M | { |
1114 | 5.73M | ps_buf->pv_buffer = |
1115 | 5.73M | (UWORD8 *) ps_buf->pv_buffer - ((8 >> b_is_chroma) * i4_ref_stride); |
1116 | 5.73M | } |
1117 | 11.3M | } |
1118 | 36.0k | else |
1119 | 36.0k | { |
1120 | 36.0k | WORD32 i4_horz_dim = 0; |
1121 | 36.0k | WORD32 i4_vert_dim = 0; |
1122 | 36.0k | WORD32 i4_dim = |
1123 | 36.0k | (WORD32) (ps_mb_state->s_max_pos.i4_abscissa - ps_mb_state->s_min_pos.i4_abscissa) + |
1124 | 36.0k | (4 >> b_is_chroma); |
1125 | | |
1126 | 36.0k | if(i4_dim > i4_horz_dim) |
1127 | 0 | { |
1128 | 0 | i4_horz_dim = i4_dim; |
1129 | 0 | } |
1130 | | |
1131 | 36.0k | i4_dim = |
1132 | 36.0k | (WORD32) (ps_mb_state->s_max_pos.i4_ordinate - ps_mb_state->s_min_pos.i4_ordinate) + |
1133 | 36.0k | (4 >> b_is_chroma); |
1134 | | |
1135 | 36.0k | if(i4_dim > i4_vert_dim) |
1136 | 0 | { |
1137 | 0 | i4_vert_dim = i4_dim; |
1138 | 0 | } |
1139 | | |
1140 | 36.0k | isvc_intra_resamp_generate_segment_lookup( |
1141 | 36.0k | &(s_intra_samp_ctxt[i] |
1142 | 36.0k | .as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id] |
1143 | 36.0k | .as_seg_lookup_horz[0]), |
1144 | 36.0k | i4_horz_dim, ps_layer_props->u4_mb_wd, 3); |
1145 | | |
1146 | 36.0k | isvc_intra_resamp_generate_segment_lookup( |
1147 | 36.0k | &(s_intra_samp_ctxt[i] |
1148 | 36.0k | .as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id] |
1149 | 36.0k | .as_seg_lookup_vert[0]), |
1150 | 36.0k | i4_vert_dim, ps_layer_props->u4_mb_ht, 4); |
1151 | | |
1152 | 36.0k | ps_buf->pv_buffer = |
1153 | 36.0k | (UWORD8 *) ps_svc_ilp_data->ps_intra_recon_bufs[ps_proc->u1_spatial_layer_id - 1] |
1154 | 36.0k | .as_component_bufs[i] |
1155 | 36.0k | .pv_data + |
1156 | 36.0k | (CLIP3(0, (WORD32) u4_ref_wd - 1, ps_mb_state->s_offsets.i4_abscissa) |
1157 | 36.0k | << b_is_chroma) + |
1158 | 36.0k | CLIP3(0, (WORD32) u4_ref_ht - 1, ps_mb_state->s_offsets.i4_ordinate) * |
1159 | 36.0k | i4_ref_stride; |
1160 | 36.0k | } |
1161 | | |
1162 | 11.4M | ps_buf->i4_element_size = 1; |
1163 | 11.4M | ps_buf->i4_num_element_stride = i4_ref_stride; |
1164 | 11.4M | } |
1165 | | |
1166 | 5.73M | s_ref_mb_mode.i4_element_size = 1; |
1167 | 5.73M | s_ref_mb_mode.i4_num_element_stride = |
1168 | 5.73M | (ps_svc_ilp_data->ps_intra_recon_bufs[ps_proc->u1_spatial_layer_id - 1].u4_width >> 4) + 2; |
1169 | 5.73M | s_ref_mb_mode.pv_buffer = ps_ref_layer_state->pi1_mb_mode; |
1170 | | |
1171 | 5.73M | if(ps_proc->s_svc_params.d_spatial_res_ratio == 2.0) |
1172 | 5.71M | { |
1173 | 5.71M | isvc_intra_samp_mb_dyadic(&s_intra_samp_ctxt[Y], &s_inp_luma, &s_inp_chroma, &s_ref_mb_mode, |
1174 | 5.71M | &s_out_luma, &s_out_chroma, ps_proc->i4_mb_x, ps_proc->i4_mb_y, 0, |
1175 | 5.71M | 0); |
1176 | 5.71M | } |
1177 | 15.1k | else |
1178 | 15.1k | { |
1179 | 15.1k | isvc_intra_samp_mb(&s_intra_samp_ctxt[Y], &s_intra_samp_ctxt[UV], &s_inp_luma, |
1180 | 15.1k | &s_inp_chroma, &s_ref_mb_mode, &s_out_luma, &s_out_chroma); |
1181 | 15.1k | } |
1182 | 5.73M | } |
1183 | | |
1184 | | static FORCEINLINE void isvce_get_sad(UWORD8 *pu1_src, UWORD8 *pu1_pred, UWORD32 src_strd, |
1185 | | UWORD32 pred_strd, WORD32 *pi4_distortion, UWORD32 u4_width, |
1186 | | UWORD32 u4_height) |
1187 | 5.74M | { |
1188 | 5.74M | UWORD32 i, j; |
1189 | 5.74M | *pi4_distortion = 0; |
1190 | 93.2M | for(i = 0; i < u4_width; i++) |
1191 | 87.4M | { |
1192 | 1.48G | for(j = 0; j < u4_height; j++) |
1193 | 1.39G | { |
1194 | 1.39G | *pi4_distortion += ABS(pu1_src[j] - pu1_pred[j]); |
1195 | 1.39G | } |
1196 | 87.4M | pu1_src += src_strd; |
1197 | 87.4M | pu1_pred += pred_strd; |
1198 | 87.4M | } |
1199 | 5.74M | } |
1200 | | |
1201 | | /** |
1202 | | ****************************************************************************** |
1203 | | * |
1204 | | * @brief |
1205 | | * evaluate IBL mode |
1206 | | * |
1207 | | * @par Description |
1208 | | * This function evaluates IBL mode for the macro-block |
1209 | | * |
1210 | | * @param[in] ps_proc_ctxt |
1211 | | * pointer to proc ctxt |
1212 | | * |
1213 | | * @return none |
1214 | | * |
1215 | | ****************************************************************************** |
1216 | | */ |
1217 | | void isvce_evaluate_IBL_mode(isvce_process_ctxt_t *ps_proc) |
1218 | 5.73M | { |
1219 | 5.73M | isvce_codec_t *ps_codec = ps_proc->ps_codec; |
1220 | 5.73M | svc_intra_pred_ctxt_t *ps_intra_pred_ctxt = ps_proc->ps_intra_pred_ctxt; |
1221 | | |
1222 | | /* SAD(distortion metric) of a block */ |
1223 | 5.73M | WORD32 i4_mb_distortion_least = INT_MAX; |
1224 | | |
1225 | | /* cost = distortion + lambda*rate */ |
1226 | 5.73M | WORD32 i4_mb_cost_least = INT_MAX; |
1227 | | |
1228 | 5.73M | WORD32 i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[Y].i4_data_stride; |
1229 | | |
1230 | 5.73M | UWORD8 *pu1_mb_src = (UWORD8 *) (ps_proc->s_src_buf_props.as_component_bufs[Y].pv_data); |
1231 | | |
1232 | 5.73M | WORD32 u4_cur_stride = |
1233 | 5.73M | ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.as_component_bufs[Y].i4_data_stride; |
1234 | | |
1235 | 5.73M | UWORD8 *pu1_mb_pred = |
1236 | 5.73M | (UWORD8 *) (ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.as_component_bufs[Y] |
1237 | 5.73M | .pv_data); |
1238 | | |
1239 | 5.73M | ps_intra_pred_ctxt->s_intra_pred_variables.ps_svc_ilp_data = &ps_codec->s_svc_ilp_data; |
1240 | 5.73M | ps_intra_pred_ctxt->s_intra_pred_variables.s_mb_pos.i4_abscissa = ps_proc->i4_mb_x; |
1241 | 5.73M | ps_intra_pred_ctxt->s_intra_pred_variables.s_mb_pos.i4_ordinate = ps_proc->i4_mb_y; |
1242 | 5.73M | ps_intra_pred_ctxt->s_intra_pred_variables.u1_spatial_layer_id = ps_proc->u1_spatial_layer_id; |
1243 | | |
1244 | 5.73M | isvce_get_mb_intra_pred(ps_proc); |
1245 | | |
1246 | | /* Luma cost */ |
1247 | 5.73M | isvce_get_sad(pu1_mb_src, pu1_mb_pred, i4_src_strd, u4_cur_stride, &i4_mb_distortion_least, |
1248 | 5.73M | ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.u4_width, |
1249 | 5.73M | ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.u4_height); |
1250 | | |
1251 | | /* cost = distortion + lambda*rate */ |
1252 | 5.73M | i4_mb_cost_least = i4_mb_distortion_least; |
1253 | | |
1254 | | /* update the type of the mb if necessary */ |
1255 | 5.73M | if(i4_mb_cost_least < ps_proc->i4_mb_cost) |
1256 | 5.73M | { |
1257 | 5.73M | ps_proc->i4_mb_cost = i4_mb_cost_least; |
1258 | 5.73M | ps_proc->i4_mb_distortion = i4_mb_distortion_least; |
1259 | 5.73M | ps_proc->ps_mb_info->i4_mb_distortion = i4_mb_distortion_least; |
1260 | 5.73M | ps_proc->ps_mb_info->u2_mb_type = BASE_MODE; |
1261 | 5.73M | ps_proc->ps_mb_info->u1_base_mode_flag = 1; |
1262 | 5.73M | ps_proc->ps_mb_info->u1_is_intra = 1; |
1263 | 5.73M | } |
1264 | 18.4E | else if(ps_proc->ps_mb_info->u2_mb_type != BASE_MODE) |
1265 | 0 | { |
1266 | 0 | ps_proc->ps_mb_info->u1_base_mode_flag = 0; |
1267 | 0 | } |
1268 | 5.73M | } |
1269 | | |
1270 | | void isvce_update_ibl_info(svc_intra_pred_ctxt_t *ps_intra_pred_ctxt, UWORD8 u1_num_spatial_layers, |
1271 | | UWORD8 u1_spatial_layer_id, UWORD16 u2_mb_type, WORD32 i4_mb_x, |
1272 | | WORD32 i4_mb_y, WORD8 u1_base_mode_flag) |
1273 | 10.3M | { |
1274 | 10.3M | if(u1_num_spatial_layers > 1) |
1275 | 9.46M | { |
1276 | 9.46M | intra_pred_state_t *ps_intra_pred_state = |
1277 | 9.46M | (intra_pred_state_t *) (ps_intra_pred_ctxt->s_intra_pred_constants.pv_state); |
1278 | 9.46M | intra_pred_layer_state_t *ps_layer_state = |
1279 | 9.46M | &ps_intra_pred_state->ps_layer_state[u1_spatial_layer_id]; |
1280 | 9.46M | WORD8 i1_is_intra = (u2_mb_type == I4x4 || u2_mb_type == I16x16 || u2_mb_type == I8x8); |
1281 | | |
1282 | 9.46M | WORD8 *pi1_mb_mode = |
1283 | 9.46M | &ps_layer_state->pi1_mb_mode[i4_mb_x + (i4_mb_y * (ps_layer_state->i4_mb_mode_stride))]; |
1284 | | |
1285 | 9.46M | if(u1_base_mode_flag == 1) |
1286 | 3.07M | { |
1287 | 3.07M | *pi1_mb_mode = SVC_IBL_MB; |
1288 | 3.07M | } |
1289 | 6.39M | else |
1290 | 6.39M | { |
1291 | 6.39M | if(i1_is_intra) |
1292 | 5.42M | { |
1293 | 5.42M | *pi1_mb_mode = SVC_INTRA_MB; |
1294 | 5.42M | } |
1295 | 965k | else |
1296 | 965k | { |
1297 | 965k | *pi1_mb_mode = SVC_INTER_MB; |
1298 | 965k | } |
1299 | 6.39M | } |
1300 | 9.46M | } |
1301 | 10.3M | } |
1302 | | |
1303 | | void isvce_pad_mb_mode_buf(svc_intra_pred_ctxt_t *ps_intra_pred_ctxt, UWORD8 u1_spatial_layer_id, |
1304 | | UWORD8 u1_num_spatial_layers, DOUBLE d_spatial_res_ratio, UWORD32 u4_wd, |
1305 | | UWORD32 u4_ht) |
1306 | 65.4k | { |
1307 | 65.4k | if(u1_num_spatial_layers > 1) |
1308 | 58.2k | { |
1309 | 58.2k | intra_pred_state_t *ps_intra_pred_state = |
1310 | 58.2k | (intra_pred_state_t *) (ps_intra_pred_ctxt->s_intra_pred_constants.pv_state); |
1311 | 58.2k | intra_pred_layer_state_t *ps_layer_state = |
1312 | 58.2k | &ps_intra_pred_state->ps_layer_state[u1_spatial_layer_id]; |
1313 | | |
1314 | 58.2k | WORD32 i4_layer_luma_wd = |
1315 | 58.2k | ((DOUBLE) u4_wd / |
1316 | 58.2k | pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - u1_spatial_layer_id)) + |
1317 | 58.2k | 0.99; |
1318 | 58.2k | WORD32 i4_layer_luma_ht = |
1319 | 58.2k | ((DOUBLE) u4_ht / |
1320 | 58.2k | pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - u1_spatial_layer_id)) + |
1321 | 58.2k | 0.99; |
1322 | | |
1323 | 58.2k | WORD32 row, src_strd; |
1324 | 58.2k | WORD8 *pu1_src; |
1325 | | |
1326 | 58.2k | WORD8 *pi1_mb_mode = ps_layer_state->pi1_mb_mode; |
1327 | 58.2k | WORD32 i4_mb_mode_stride = ps_layer_state->i4_mb_mode_stride; |
1328 | | |
1329 | | /* Add PAD MBs on all directions */ |
1330 | 58.2k | i4_layer_luma_wd /= MB_SIZE; |
1331 | 58.2k | i4_layer_luma_ht /= MB_SIZE; |
1332 | | |
1333 | 58.2k | if(d_spatial_res_ratio == 2.0) |
1334 | 37.3k | { |
1335 | 37.3k | UWORD8 *pu1_mb_mode = (UWORD8 *) pi1_mb_mode; |
1336 | | /* Pad left */ |
1337 | 37.3k | ih264_pad_left_luma(pu1_mb_mode, i4_mb_mode_stride, i4_layer_luma_ht, 1); |
1338 | | |
1339 | | /* Pad right */ |
1340 | 37.3k | ih264_pad_right_luma(pu1_mb_mode + i4_layer_luma_wd, i4_mb_mode_stride, |
1341 | 37.3k | i4_layer_luma_ht, 1); |
1342 | | |
1343 | | /* Pad top */ |
1344 | 37.3k | ih264_pad_top(pu1_mb_mode - 1, i4_mb_mode_stride, i4_layer_luma_wd + 2, 1); |
1345 | | |
1346 | | /* Pad bottom */ |
1347 | 37.3k | ih264_pad_bottom(pu1_mb_mode + (i4_layer_luma_ht * i4_mb_mode_stride) - 1, |
1348 | 37.3k | i4_mb_mode_stride, i4_layer_luma_wd + 2, 1); |
1349 | 37.3k | } |
1350 | 20.9k | else |
1351 | 20.9k | { |
1352 | | /* Pad left */ |
1353 | 20.9k | pu1_src = pi1_mb_mode; |
1354 | 20.9k | src_strd = i4_mb_mode_stride; |
1355 | 129k | for(row = 0; row < i4_layer_luma_ht; row++) |
1356 | 109k | { |
1357 | 109k | memset(pu1_src - 1, -1, 1); |
1358 | 109k | pu1_src += src_strd; |
1359 | 109k | } |
1360 | | |
1361 | | /* Pad right */ |
1362 | 20.9k | pu1_src = pi1_mb_mode + i4_layer_luma_wd; |
1363 | 129k | for(row = 0; row < i4_layer_luma_ht; row++) |
1364 | 109k | { |
1365 | 109k | memset(pu1_src, -1, 1); |
1366 | 109k | pu1_src += src_strd; |
1367 | 109k | } |
1368 | | |
1369 | | /* Pad top */ |
1370 | 20.9k | pu1_src = pi1_mb_mode - 1; |
1371 | 20.9k | memset(pu1_src - src_strd, -1, i4_layer_luma_wd + 2); |
1372 | | |
1373 | | /* Pad bottom */ |
1374 | 20.9k | pu1_src = pi1_mb_mode + (i4_layer_luma_ht * i4_mb_mode_stride) - 1; |
1375 | 20.9k | memset(pu1_src, -1, i4_layer_luma_wd + 2); |
1376 | 20.9k | } |
1377 | 58.2k | } |
1378 | 65.4k | } |