/src/libhevc/encoder/ihevce_me_instr_set_router.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2018 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | /*! |
21 | | ****************************************************************************** |
22 | | * \file ihevce_me_utils_instr_set_router.c |
23 | | * |
24 | | * \brief |
25 | | * This file contains function pointer initialization of me utility |
26 | | * functions |
27 | | * |
28 | | * \date |
29 | | * 15/07/2013 |
30 | | * |
31 | | * \author |
32 | | * Ittiam |
33 | | * |
34 | | * List of Functions |
35 | | * ihevce_me_utils_instr_set_router() |
36 | | * |
37 | | ****************************************************************************** |
38 | | */ |
39 | | |
40 | | /*****************************************************************************/ |
41 | | /* File Includes */ |
42 | | /*****************************************************************************/ |
43 | | /* System include files */ |
44 | | #include <stdio.h> |
45 | | #include <string.h> |
46 | | #include <assert.h> |
47 | | |
48 | | /* User include files */ |
49 | | #include "ihevc_typedefs.h" |
50 | | #include "itt_video_api.h" |
51 | | #include "ihevc_chroma_itrans_recon.h" |
52 | | #include "ihevc_chroma_intra_pred.h" |
53 | | #include "ihevc_debug.h" |
54 | | #include "ihevc_deblk.h" |
55 | | #include "ihevc_defs.h" |
56 | | #include "ihevc_itrans_recon.h" |
57 | | #include "ihevc_intra_pred.h" |
58 | | #include "ihevc_inter_pred.h" |
59 | | #include "ihevc_macros.h" |
60 | | #include "ihevc_mem_fns.h" |
61 | | #include "ihevc_padding.h" |
62 | | #include "ihevc_quant_iquant_ssd.h" |
63 | | #include "ihevc_resi_trans.h" |
64 | | #include "ihevc_sao.h" |
65 | | #include "ihevc_structs.h" |
66 | | #include "ihevc_weighted_pred.h" |
67 | | #include "ihevc_platform_macros.h" |
68 | | |
69 | | #include "rc_cntrl_param.h" |
70 | | #include "rc_frame_info_collector.h" |
71 | | #include "rc_look_ahead_params.h" |
72 | | |
73 | | #include "ihevce_api.h" |
74 | | #include "ihevce_defs.h" |
75 | | #include "ihevce_lap_enc_structs.h" |
76 | | #include "ihevce_multi_thrd_structs.h" |
77 | | #include "ihevce_function_selector.h" |
78 | | #include "ihevce_me_common_defs.h" |
79 | | #include "ihevce_enc_structs.h" |
80 | | #include "ihevce_had_satd.h" |
81 | | #include "ihevce_cmn_utils_instr_set_router.h" |
82 | | |
83 | | #include "hme_datatype.h" |
84 | | #include "hme_common_defs.h" |
85 | | #include "hme_common_utils.h" |
86 | | #include "hme_interface.h" |
87 | | #include "hme_defs.h" |
88 | | #include "hme_err_compute.h" |
89 | | #include "hme_globals.h" |
90 | | |
91 | | #include "ihevce_me_instr_set_router.h" |
92 | | |
93 | | /*****************************************************************************/ |
94 | | /* Globals */ |
95 | | /*****************************************************************************/ |
96 | | static FT_SAD_EVALUATOR *gapf_sad_pt_npu[NUM_BLK_SIZES]; |
97 | | static FT_PART_SADS_EVALUATOR_16X16CU *gpf_part_sads_evaluator_16x16CU; |
98 | | static FT_PART_SADS_EVALUATOR *gpf_part_sads_evaluator_MxM; |
99 | | static FT_SAD_EVALUATOR *gpf_sad_grid_mxn; |
100 | | /* 9 => Number of function types */ |
101 | | /* 2 => Number of results to store */ |
102 | | static FT_CALC_SAD_AND_RESULT *gapf_calc_sad_and_result_fxn[9][2]; |
103 | | |
104 | | static U08 gau1_calc_sad_and_result[2][2][4][TOT_NUM_PARTS] = { |
105 | | //grid flag = 0 |
106 | | { //noise = 0 |
107 | | { //NxN or NxN & SMP |
108 | | { 1, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4 }, |
109 | | //SMP only |
110 | | { 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4 }, |
111 | | //AMP |
112 | | { 1, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4 }, |
113 | | //2Nx2N only, i.e. num_parts = 1 |
114 | | { 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 } }, |
115 | | //noise = 1 |
116 | | { { 5, 7, 7, 7, 6, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 }, |
117 | | { 5, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 }, |
118 | | { 5, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 }, |
119 | | { 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 } } }, |
120 | | |
121 | | //grid flag = 1 |
122 | | { //noise = 0 |
123 | | { { 0, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4 }, |
124 | | { 0, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4 }, |
125 | | { 0, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4 }, |
126 | | { 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 } }, |
127 | | //noise = 1 |
128 | | { { 0, 7, 7, 7, 6, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 }, |
129 | | { 0, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 }, |
130 | | { 0, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 }, |
131 | | { 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 } } } |
132 | | }; |
133 | | |
134 | | /*****************************************************************************/ |
135 | | /* Function Definitions */ |
136 | | /*****************************************************************************/ |
137 | | /*! |
138 | | ****************************************************************************** |
139 | | * \if Function name : ihevce_me_instr_set_router \endif |
140 | | * |
141 | | * \brief |
142 | | * Function pointer initialization of me utils struct |
143 | | * |
144 | | ***************************************************************************** |
145 | | */ |
146 | | void ihevce_me_instr_set_router(ihevce_me_optimised_function_list_t *ps_func_list, IV_ARCH_T e_arch) |
147 | 0 | { |
148 | | // clang-format off |
149 | | #ifdef DISABLE_AVX2_INTR |
150 | | e_arch = (e_arch == ARCH_X86_AVX2) ? ARCH_X86_AVX : e_arch; |
151 | | #endif |
152 | |
|
153 | 0 | switch(e_arch) |
154 | 0 | { |
155 | | #ifdef ENABLE_NEON |
156 | | case ARCH_ARM_A9Q: |
157 | | case ARCH_ARM_V8_NEON: |
158 | | ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8 = hme_calc_pt_sad_and_result_explicit; |
159 | | ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8_4x4 = hme_calc_pt_sad_and_result_explicit; |
160 | | ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8_for_grid = hme_calc_pt_sad_and_result_explicit; |
161 | | ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_generic = hme_calc_pt_sad_and_result_explicit; |
162 | | ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8 = hme_calc_pt_sad_and_result_explicit; |
163 | | ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8_4x4 = hme_calc_pt_sad_and_result_explicit; |
164 | | ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8_for_grid = hme_calc_pt_sad_and_result_explicit; |
165 | | ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_generic = hme_calc_pt_sad_and_result_explicit; |
166 | | ps_func_list->pf_calc_sad_and_1_best_result_generic = hme_calc_sad_and_1_best_result; |
167 | | ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_generic = hme_calc_stim_injected_sad_and_1_best_result; |
168 | | ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_eq_1 = hme_calc_stim_injected_sad_and_1_best_result; |
169 | | ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_square_parts = hme_calc_stim_injected_sad_and_1_best_result; |
170 | | ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_9 = hme_calc_stim_injected_sad_and_1_best_result; |
171 | | ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_17 = hme_calc_stim_injected_sad_and_1_best_result; |
172 | | ps_func_list->pf_compute_variance_for_all_parts = hme_compute_variance_for_all_parts; |
173 | | ps_func_list->pf_compute_stim_injected_distortion_for_all_parts = hme_compute_stim_injected_distortion_for_all_parts; |
174 | | ps_func_list->pf_calc_sad_and_1_best_result_num_part_1_for_grid = hme_calc_sad_and_1_best_result_neon; |
175 | | ps_func_list->pf_calc_sad_and_1_best_result_num_part_eq_1 = hme_calc_sad_and_1_best_result_neon; |
176 | | ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_17 = hme_calc_sad_and_1_best_result_neon; |
177 | | ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_9 = hme_calc_sad_and_1_best_result_neon; |
178 | | ps_func_list->pf_calc_sad_and_1_best_result_num_square_parts = hme_calc_sad_and_1_best_result_neon; |
179 | | ps_func_list->pf_calc_sad_and_1_best_result_subpel_generic = hme_calc_sad_and_1_best_result_subpel; |
180 | | ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_eq_1 = hme_calc_sad_and_1_best_result_subpel_neon; |
181 | | ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_lt_17 = hme_calc_sad_and_1_best_result_subpel_neon; |
182 | | ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_lt_9 = hme_calc_sad_and_1_best_result_subpel_neon; |
183 | | ps_func_list->pf_calc_sad_and_1_best_result_subpel_square_parts = hme_calc_sad_and_1_best_result_subpel_neon; |
184 | | ps_func_list->pf_combine_4x4_sads_and_compute_cost_high_quality = hme_combine_4x4_sads_and_compute_cost_high_quality_neon; |
185 | | ps_func_list->pf_combine_4x4_sads_and_compute_cost_high_speed = hme_combine_4x4_sads_and_compute_cost_high_speed_neon; |
186 | | ps_func_list->pf_compute_4x4_sads_for_16x16_blk = compute_4x4_sads_for_16x16_blk_neon; |
187 | | ps_func_list->pf_evalsad_grid_npu_MxN = hme_evalsad_grid_npu_MxN_neon; |
188 | | ps_func_list->pf_evalsad_grid_pu_MxM = compute_part_sads_for_MxM_blk_neon; |
189 | | ps_func_list->pf_evalsad_pt_npu_12x16_8bit = hme_evalsad_pt_npu_MxN_8bit_neon; |
190 | | ps_func_list->pf_evalsad_pt_npu_16x12_8bit = hme_evalsad_pt_npu_MxN_8bit_neon; |
191 | | ps_func_list->pf_evalsad_pt_npu_16x4_8bit = hme_evalsad_pt_npu_MxN_8bit_neon; |
192 | | ps_func_list->pf_evalsad_pt_npu_24x32_8bit = hme_evalsad_pt_npu_MxN_8bit_neon; |
193 | | ps_func_list->pf_evalsad_pt_npu_8x4_8bit = hme_evalsad_pt_npu_MxN_8bit_neon; |
194 | | ps_func_list->pf_evalsad_pt_npu_mxn_8bit = hme_evalsad_pt_npu_MxN_8bit_neon; |
195 | | ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit = hme_evalsad_pt_npu_MxN_8bit_neon; |
196 | | ps_func_list->pf_evalsad_pt_npu_width_multiple_4_8bit = hme_evalsad_pt_npu_MxN_8bit_neon; |
197 | | ps_func_list->pf_evalsad_pt_npu_width_multiple_8_8bit = hme_evalsad_pt_npu_MxN_8bit_neon; |
198 | | ps_func_list->pf_get_wt_inp_8x8 = hme_get_wt_inp_8x8_neon; |
199 | | ps_func_list->pf_get_wt_inp_ctb = hme_get_wt_inp_ctb_neon; |
200 | | ps_func_list->pf_get_wt_inp_generic = hme_get_wt_inp; |
201 | | ps_func_list->pf_mv_clipper = hme_mv_clipper; |
202 | | ps_func_list->pf_qpel_interp_avg_1pt = hme_qpel_interp_avg_1pt_neon; |
203 | | ps_func_list->pf_qpel_interp_avg_2pt_horz_with_reuse = hme_qpel_interp_avg_2pt_horz_with_reuse_neon; |
204 | | ps_func_list->pf_qpel_interp_avg_2pt_vert_with_reuse = hme_qpel_interp_avg_2pt_vert_with_reuse_neon; |
205 | | ps_func_list->pf_qpel_interp_avg_generic = hme_qpel_interp_avg_neon; |
206 | | ps_func_list->pf_store_4x4_sads_high_quality = hme_store_4x4_sads_high_quality_neon; |
207 | | ps_func_list->pf_store_4x4_sads_high_speed = hme_store_4x4_sads_high_speed_neon; |
208 | | break; |
209 | | #endif |
210 | 0 | default: |
211 | 0 | ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8 = hme_calc_pt_sad_and_result_explicit; |
212 | 0 | ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8_4x4 = hme_calc_pt_sad_and_result_explicit; |
213 | 0 | ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8_for_grid = hme_calc_pt_sad_and_result_explicit; |
214 | 0 | ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_generic = hme_calc_pt_sad_and_result_explicit; |
215 | 0 | ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8 = hme_calc_pt_sad_and_result_explicit; |
216 | 0 | ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8_4x4 = hme_calc_pt_sad_and_result_explicit; |
217 | 0 | ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8_for_grid = hme_calc_pt_sad_and_result_explicit; |
218 | 0 | ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_generic = hme_calc_pt_sad_and_result_explicit; |
219 | 0 | ps_func_list->pf_calc_sad_and_1_best_result_generic = hme_calc_sad_and_1_best_result; |
220 | 0 | ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_generic = hme_calc_stim_injected_sad_and_1_best_result; |
221 | 0 | ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_eq_1 = hme_calc_stim_injected_sad_and_1_best_result; |
222 | 0 | ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_square_parts = hme_calc_stim_injected_sad_and_1_best_result; |
223 | 0 | ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_9 = hme_calc_stim_injected_sad_and_1_best_result; |
224 | 0 | ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_17 = hme_calc_stim_injected_sad_and_1_best_result; |
225 | 0 | ps_func_list->pf_compute_variance_for_all_parts = hme_compute_variance_for_all_parts; |
226 | 0 | ps_func_list->pf_compute_stim_injected_distortion_for_all_parts = hme_compute_stim_injected_distortion_for_all_parts; |
227 | 0 | ps_func_list->pf_calc_sad_and_1_best_result_num_part_1_for_grid = hme_calc_sad_and_1_best_result; |
228 | 0 | ps_func_list->pf_calc_sad_and_1_best_result_num_part_eq_1 = hme_calc_sad_and_1_best_result; |
229 | 0 | ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_17 = hme_calc_sad_and_1_best_result; |
230 | 0 | ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_9 = hme_calc_sad_and_1_best_result; |
231 | 0 | ps_func_list->pf_calc_sad_and_1_best_result_num_square_parts = hme_calc_sad_and_1_best_result; |
232 | 0 | ps_func_list->pf_calc_sad_and_1_best_result_subpel_generic = hme_calc_sad_and_1_best_result_subpel; |
233 | 0 | ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_eq_1 = hme_calc_sad_and_1_best_result_subpel; |
234 | 0 | ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_lt_17 = hme_calc_sad_and_1_best_result_subpel; |
235 | 0 | ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_lt_9 = hme_calc_sad_and_1_best_result_subpel; |
236 | 0 | ps_func_list->pf_calc_sad_and_1_best_result_subpel_square_parts = hme_calc_sad_and_1_best_result_subpel; |
237 | 0 | ps_func_list->pf_combine_4x4_sads_and_compute_cost_high_quality = hme_combine_4x4_sads_and_compute_cost_high_quality; |
238 | 0 | ps_func_list->pf_combine_4x4_sads_and_compute_cost_high_speed = hme_combine_4x4_sads_and_compute_cost_high_speed; |
239 | 0 | ps_func_list->pf_compute_4x4_sads_for_16x16_blk = compute_4x4_sads_for_16x16_blk; |
240 | 0 | ps_func_list->pf_evalsad_grid_npu_MxN = hme_evalsad_grid_npu_MxN; |
241 | 0 | ps_func_list->pf_evalsad_grid_pu_MxM = compute_part_sads_for_MxM_blk; |
242 | 0 | ps_func_list->pf_evalsad_pt_npu_12x16_8bit = hme_evalsad_pt_npu_MxN_8bit; |
243 | 0 | ps_func_list->pf_evalsad_pt_npu_16x12_8bit = hme_evalsad_pt_npu_MxN_8bit; |
244 | 0 | ps_func_list->pf_evalsad_pt_npu_16x4_8bit = hme_evalsad_pt_npu_MxN_8bit; |
245 | 0 | ps_func_list->pf_evalsad_pt_npu_24x32_8bit = hme_evalsad_pt_npu_MxN_8bit; |
246 | 0 | ps_func_list->pf_evalsad_pt_npu_8x4_8bit = hme_evalsad_pt_npu_MxN_8bit; |
247 | 0 | ps_func_list->pf_evalsad_pt_npu_mxn_8bit = hme_evalsad_pt_npu_MxN_8bit; |
248 | 0 | ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit = hme_evalsad_pt_npu_MxN_8bit; |
249 | 0 | ps_func_list->pf_evalsad_pt_npu_width_multiple_4_8bit = hme_evalsad_pt_npu_MxN_8bit; |
250 | 0 | ps_func_list->pf_evalsad_pt_npu_width_multiple_8_8bit = hme_evalsad_pt_npu_MxN_8bit; |
251 | 0 | ps_func_list->pf_get_wt_inp_8x8 = hme_get_wt_inp; |
252 | 0 | ps_func_list->pf_get_wt_inp_ctb = hme_get_wt_inp; |
253 | 0 | ps_func_list->pf_get_wt_inp_generic = hme_get_wt_inp; |
254 | 0 | ps_func_list->pf_mv_clipper = hme_mv_clipper; |
255 | 0 | ps_func_list->pf_qpel_interp_avg_1pt = hme_qpel_interp_avg_1pt; |
256 | 0 | ps_func_list->pf_qpel_interp_avg_2pt_horz_with_reuse = hme_qpel_interp_avg_2pt_horz_with_reuse; |
257 | 0 | ps_func_list->pf_qpel_interp_avg_2pt_vert_with_reuse = hme_qpel_interp_avg_2pt_vert_with_reuse; |
258 | 0 | ps_func_list->pf_qpel_interp_avg_generic = hme_qpel_interp_avg; |
259 | 0 | ps_func_list->pf_store_4x4_sads_high_quality = hme_store_4x4_sads_high_quality; |
260 | 0 | ps_func_list->pf_store_4x4_sads_high_speed = hme_store_4x4_sads_high_speed; |
261 | 0 | break; |
262 | 0 | } |
263 | | |
264 | 0 | gapf_sad_pt_npu[BLK_4x4] = ps_func_list->pf_evalsad_pt_npu_width_multiple_4_8bit; |
265 | 0 | gapf_sad_pt_npu[BLK_4x8] = ps_func_list->pf_evalsad_pt_npu_width_multiple_4_8bit; |
266 | 0 | gapf_sad_pt_npu[BLK_8x4] = ps_func_list->pf_evalsad_pt_npu_8x4_8bit; |
267 | 0 | gapf_sad_pt_npu[BLK_8x8] = ps_func_list->pf_evalsad_pt_npu_width_multiple_8_8bit; |
268 | 0 | gapf_sad_pt_npu[BLK_4x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_4_8bit; |
269 | 0 | gapf_sad_pt_npu[BLK_8x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_8_8bit; |
270 | 0 | gapf_sad_pt_npu[BLK_12x16] = ps_func_list->pf_evalsad_pt_npu_12x16_8bit; |
271 | 0 | gapf_sad_pt_npu[BLK_16x4] = ps_func_list->pf_evalsad_pt_npu_16x4_8bit; |
272 | 0 | gapf_sad_pt_npu[BLK_16x8] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; |
273 | 0 | gapf_sad_pt_npu[BLK_16x12] = ps_func_list->pf_evalsad_pt_npu_16x12_8bit; |
274 | 0 | gapf_sad_pt_npu[BLK_16x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; |
275 | 0 | gapf_sad_pt_npu[BLK_8x32] = ps_func_list->pf_evalsad_pt_npu_width_multiple_8_8bit; |
276 | 0 | gapf_sad_pt_npu[BLK_16x32] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; |
277 | 0 | gapf_sad_pt_npu[BLK_24x32] = ps_func_list->pf_evalsad_pt_npu_24x32_8bit; |
278 | 0 | gapf_sad_pt_npu[BLK_32x8] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; |
279 | 0 | gapf_sad_pt_npu[BLK_32x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; |
280 | 0 | gapf_sad_pt_npu[BLK_32x8] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; |
281 | 0 | gapf_sad_pt_npu[BLK_32x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; |
282 | 0 | gapf_sad_pt_npu[BLK_32x24] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; |
283 | 0 | gapf_sad_pt_npu[BLK_32x32] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; |
284 | 0 | gapf_sad_pt_npu[BLK_16x64] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; |
285 | 0 | gapf_sad_pt_npu[BLK_32x64] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; |
286 | 0 | gapf_sad_pt_npu[BLK_48x64] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; |
287 | 0 | gapf_sad_pt_npu[BLK_64x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; |
288 | 0 | gapf_sad_pt_npu[BLK_64x32] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; |
289 | 0 | gapf_sad_pt_npu[BLK_64x48] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; |
290 | 0 | gapf_sad_pt_npu[BLK_64x64] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; |
291 | |
|
292 | 0 | gpf_part_sads_evaluator_16x16CU = ps_func_list->pf_compute_4x4_sads_for_16x16_blk; |
293 | 0 | gpf_part_sads_evaluator_MxM = ps_func_list->pf_evalsad_grid_pu_MxM; |
294 | |
|
295 | 0 | gpf_sad_grid_mxn = ps_func_list->pf_evalsad_grid_npu_MxN; |
296 | |
|
297 | 0 | gapf_calc_sad_and_result_fxn[0][0] = ps_func_list->pf_calc_sad_and_1_best_result_num_part_1_for_grid; |
298 | 0 | gapf_calc_sad_and_result_fxn[1][0] = ps_func_list->pf_calc_sad_and_1_best_result_num_part_eq_1; |
299 | 0 | gapf_calc_sad_and_result_fxn[2][0] = ps_func_list->pf_calc_sad_and_1_best_result_num_square_parts; |
300 | 0 | gapf_calc_sad_and_result_fxn[3][0] = ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_9; |
301 | 0 | gapf_calc_sad_and_result_fxn[4][0] = ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_17; |
302 | 0 | gapf_calc_sad_and_result_fxn[5][0] = ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_eq_1; |
303 | 0 | gapf_calc_sad_and_result_fxn[6][0] = ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_square_parts; |
304 | 0 | gapf_calc_sad_and_result_fxn[7][0] = ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_9; |
305 | 0 | gapf_calc_sad_and_result_fxn[8][0] = ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_17; |
306 | 0 | gapf_calc_sad_and_result_fxn[0][1] = ps_func_list->pf_calc_sad_and_2_best_results_num_part_1_for_grid; |
307 | 0 | gapf_calc_sad_and_result_fxn[1][1] = ps_func_list->pf_calc_sad_and_2_best_results_num_part_eq_1; |
308 | 0 | gapf_calc_sad_and_result_fxn[2][1] = ps_func_list->pf_calc_sad_and_2_best_results_num_square_parts; |
309 | 0 | gapf_calc_sad_and_result_fxn[3][1] = ps_func_list->pf_calc_sad_and_2_best_results_num_part_lt_9; |
310 | 0 | gapf_calc_sad_and_result_fxn[4][1] = ps_func_list->pf_calc_sad_and_2_best_results_num_part_lt_17; |
311 | 0 | gapf_calc_sad_and_result_fxn[5][1] = ps_func_list->pf_calc_stim_injected_sad_and_2_best_results_num_part_eq_1; |
312 | 0 | gapf_calc_sad_and_result_fxn[6][1] = ps_func_list->pf_calc_stim_injected_sad_and_2_best_results_num_square_parts; |
313 | 0 | gapf_calc_sad_and_result_fxn[7][1] = ps_func_list->pf_calc_stim_injected_sad_and_2_best_results_num_part_lt_9; |
314 | 0 | gapf_calc_sad_and_result_fxn[8][1] = ps_func_list->pf_calc_stim_injected_sad_and_2_best_results_num_part_lt_17; |
315 | 0 | } |
316 | | // clang-format on |
317 | | |
318 | | FT_CALC_SAD_AND_RESULT *hme_get_calc_sad_and_result_fxn( |
319 | | S08 i1_grid_flag, U08 u1_is_cu_noisy, S32 i4_part_mask, S32 num_parts, S32 num_results) |
320 | 0 | { |
321 | 0 | U08 u1_index; |
322 | |
|
323 | 0 | ASSERT((1 == num_results) || (2 == num_results)); |
324 | | |
325 | 0 | u1_index = |
326 | 0 | gau1_calc_sad_and_result[i1_grid_flag][u1_is_cu_noisy] |
327 | 0 | [(!!(i4_part_mask & (ENABLE_SMP | ENABLE_NxN)) && |
328 | 0 | !(i4_part_mask & ENABLE_AMP)) |
329 | 0 | ? (!!(i4_part_mask & ENABLE_NxN) ? 0 : 1) |
330 | 0 | : (!!(i4_part_mask & ENABLE_AMP) ? 2 : 3)][num_parts - 1]; |
331 | |
|
332 | 0 | return gapf_calc_sad_and_result_fxn[u1_index][2 == num_results]; |
333 | 0 | } |
334 | | |
335 | | void hme_evalsad_grid_pu_MxM(err_prms_t *ps_prms) |
336 | 0 | { |
337 | 0 | grid_ctxt_t s_grid; |
338 | 0 | cand_t as_candt[9]; |
339 | |
|
340 | 0 | S32 *api4_sad_grid[TOT_NUM_PARTS]; |
341 | |
|
342 | 0 | hme_mv_t s_mv = { 0, 0 }; |
343 | |
|
344 | 0 | CU_SIZE_T e_cu_size = (CU_SIZE_T)(hme_get_range(ps_prms->i4_blk_wd) - 4); |
345 | |
|
346 | 0 | S32 i4_ref_idx = 0, i; |
347 | 0 | S32 num_candts = 0; |
348 | |
|
349 | 0 | s_grid.num_grids = 1; |
350 | 0 | s_grid.ref_buf_stride = ps_prms->i4_ref_stride; |
351 | 0 | s_grid.grd_sz_y_x = ((ps_prms->i4_step << 16) | ps_prms->i4_step); |
352 | 0 | s_grid.ppu1_ref_ptr = &ps_prms->pu1_ref; |
353 | 0 | s_grid.pi4_grd_mask = &ps_prms->i4_grid_mask; |
354 | 0 | s_grid.p_mv = &s_mv; |
355 | 0 | s_grid.p_ref_idx = &i4_ref_idx; |
356 | |
|
357 | 0 | for(i = 0; i < 9; i++) |
358 | 0 | { |
359 | 0 | if(s_grid.pi4_grd_mask[0] & (1 << i)) |
360 | 0 | { |
361 | 0 | num_candts++; |
362 | 0 | } |
363 | 0 | } |
364 | |
|
365 | 0 | for(i = 0; i < TOT_NUM_PARTS; i++) |
366 | 0 | { |
367 | 0 | api4_sad_grid[i] = &ps_prms->pi4_sad_grid[i * num_candts]; |
368 | 0 | } |
369 | |
|
370 | 0 | gpf_part_sads_evaluator_MxM( |
371 | 0 | &s_grid, |
372 | 0 | ps_prms->pu1_inp, |
373 | 0 | ps_prms->i4_inp_stride, |
374 | 0 | (WORD32 **)api4_sad_grid, |
375 | 0 | as_candt, |
376 | 0 | &num_candts, |
377 | 0 | e_cu_size); |
378 | 0 | } |
379 | | |
380 | | PF_SAD_FXN_T hme_get_sad_fxn(BLK_SIZE_T e_blk_size, S32 i4_grid_mask, S32 i4_part_mask) |
381 | 0 | { |
382 | 0 | S32 i4_grid_en = ((i4_grid_mask & 0x1fe) != 0); |
383 | |
|
384 | 0 | if(i4_grid_en) |
385 | 0 | { |
386 | 0 | if(i4_part_mask & (i4_part_mask - 1)) |
387 | 0 | { |
388 | 0 | if(BLK_16x16 == e_blk_size) |
389 | 0 | { |
390 | 0 | return hme_evalsad_grid_pu_16x16; |
391 | 0 | } |
392 | 0 | else |
393 | 0 | { |
394 | 0 | return hme_evalsad_grid_pu_MxM; |
395 | 0 | } |
396 | 0 | } |
397 | 0 | else |
398 | 0 | { |
399 | 0 | return gpf_sad_grid_mxn; |
400 | 0 | } |
401 | 0 | } |
402 | 0 | else |
403 | 0 | { |
404 | 0 | if(i4_part_mask & (i4_part_mask - 1)) |
405 | 0 | { |
406 | 0 | if(BLK_16x16 == e_blk_size) |
407 | 0 | { |
408 | 0 | return hme_evalsad_grid_pu_16x16; |
409 | 0 | } |
410 | 0 | else |
411 | 0 | { |
412 | 0 | return hme_evalsad_grid_pu_MxM; |
413 | 0 | } |
414 | 0 | } |
415 | 0 | else |
416 | 0 | { |
417 | 0 | return gapf_sad_pt_npu[e_blk_size]; |
418 | 0 | } |
419 | 0 | } |
420 | 0 | } |
421 | | |
422 | | void ihevce_sifter_sad_fxn_assigner(FT_SAD_EVALUATOR **ppf_evalsad_pt_npu_mxn, IV_ARCH_T e_arch) |
423 | 0 | { |
424 | 0 | switch(e_arch) |
425 | 0 | { |
426 | | #ifdef ENABLE_NEON |
427 | | case ARCH_ARM_A9Q: |
428 | | case ARCH_ARM_V8_NEON: |
429 | | ppf_evalsad_pt_npu_mxn[0] = hme_evalsad_pt_npu_MxN_8bit_neon; |
430 | | break; |
431 | | #endif |
432 | | |
433 | 0 | default: |
434 | 0 | ppf_evalsad_pt_npu_mxn[0] = hme_evalsad_pt_npu_MxN_8bit; |
435 | 0 | break; |
436 | 0 | } |
437 | 0 | } |