/src/libjxl/third_party/highway/hwy/highway.h
Line | Count | Source |
1 | | // Copyright 2020 Google LLC |
2 | | // SPDX-License-Identifier: Apache-2.0 |
3 | | // |
4 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
5 | | // you may not use this file except in compliance with the License. |
6 | | // You may obtain a copy of the License at |
7 | | // |
8 | | // http://www.apache.org/licenses/LICENSE-2.0 |
9 | | // |
10 | | // Unless required by applicable law or agreed to in writing, software |
11 | | // distributed under the License is distributed on an "AS IS" BASIS, |
12 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | | // See the License for the specific language governing permissions and |
14 | | // limitations under the License. |
15 | | |
16 | | // Main header required before using vector types. |
17 | | |
18 | | // IWYU pragma: begin_exports |
19 | | #include "hwy/base.h" |
20 | | #include "hwy/detect_compiler_arch.h" |
21 | | #include "hwy/detect_targets.h" |
22 | | #include "hwy/highway_export.h" |
23 | | #include "hwy/targets.h" |
24 | | // IWYU pragma: end_exports |
25 | | |
26 | | #if HWY_CXX_LANG < 201703L |
27 | | #define HWY_DISPATCH_MAP 1 |
28 | | #else |
29 | | #define HWY_DISPATCH_MAP 0 |
30 | | #endif |
31 | | |
32 | | // This include guard is checked by foreach_target, so avoid the usual _H_ |
33 | | // suffix to prevent copybara from renaming it. NOTE: ops/*-inl.h are included |
34 | | // after/outside this include guard. |
35 | | #ifndef HWY_HIGHWAY_INCLUDED |
36 | | #define HWY_HIGHWAY_INCLUDED |
37 | | |
38 | | namespace hwy { |
39 | | |
40 | | //------------------------------------------------------------------------------ |
41 | | // Shorthand for tags (defined in shared-inl.h) used to select overloads. |
42 | | // Note that ScalableTag<T> is preferred over HWY_FULL, and CappedTag<T, N> over |
43 | | // HWY_CAPPED(T, N). |
44 | | |
45 | | // HWY_FULL(T[,LMUL=1]) is a native vector/group. LMUL is the number of |
46 | | // registers in the group, and is ignored on targets that do not support groups. |
47 | 11.1M | #define HWY_FULL1(T) hwy::HWY_NAMESPACE::ScalableTag<T> |
48 | | #define HWY_FULL2(T, LMUL) \ |
49 | | hwy::HWY_NAMESPACE::ScalableTag<T, hwy::CeilLog2(HWY_MAX(0, LMUL))> |
50 | 11.1M | #define HWY_3TH_ARG(arg1, arg2, arg3, ...) arg3 |
51 | | // Workaround for MSVC grouping __VA_ARGS__ into a single argument |
52 | 11.1M | #define HWY_FULL_RECOMPOSER(args_with_paren) HWY_3TH_ARG args_with_paren |
53 | | // Trailing comma avoids -pedantic false alarm |
54 | | #define HWY_CHOOSE_FULL(...) \ |
55 | 11.1M | HWY_FULL_RECOMPOSER((__VA_ARGS__, HWY_FULL2, HWY_FULL1, )) |
56 | 11.1M | #define HWY_FULL(...) HWY_CHOOSE_FULL(__VA_ARGS__())(__VA_ARGS__) |
57 | | |
58 | | // Vector of up to MAX_N lanes. It's better to use full vectors where possible. |
59 | 53.4M | #define HWY_CAPPED(T, MAX_N) hwy::HWY_NAMESPACE::CappedTag<T, MAX_N> |
60 | | |
61 | | //------------------------------------------------------------------------------ |
62 | | // Export user functions for static/dynamic dispatch |
63 | | |
64 | | // Evaluates to 0 inside a translation unit if it is generating anything but the |
65 | | // static target (the last one if multiple targets are enabled). Used to prevent |
66 | | // redefinitions of HWY_EXPORT. Unless foreach_target.h is included, we only |
67 | | // compile once anyway, so this is 1 unless it is or has been included. |
68 | | #ifndef HWY_ONCE |
69 | | #define HWY_ONCE 1 |
70 | | #endif |
71 | | |
72 | | // HWY_STATIC_DISPATCH(FUNC_NAME) is the namespace-qualified FUNC_NAME for |
73 | | // HWY_STATIC_TARGET (the only defined namespace unless HWY_TARGET_INCLUDE is |
74 | | // defined), and can be used to deduce the return type of Choose*. |
75 | | #if HWY_STATIC_TARGET == HWY_SCALAR |
76 | | #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SCALAR::FUNC_NAME |
77 | | #elif HWY_STATIC_TARGET == HWY_EMU128 |
78 | | #define HWY_STATIC_DISPATCH(FUNC_NAME) N_EMU128::FUNC_NAME |
79 | | #elif HWY_STATIC_TARGET == HWY_RVV |
80 | | #define HWY_STATIC_DISPATCH(FUNC_NAME) N_RVV::FUNC_NAME |
81 | | #elif HWY_STATIC_TARGET == HWY_WASM_EMU256 |
82 | | #define HWY_STATIC_DISPATCH(FUNC_NAME) N_WASM_EMU256::FUNC_NAME |
83 | | #elif HWY_STATIC_TARGET == HWY_WASM |
84 | | #define HWY_STATIC_DISPATCH(FUNC_NAME) N_WASM::FUNC_NAME |
85 | | #elif HWY_STATIC_TARGET == HWY_NEON_WITHOUT_AES |
86 | | #define HWY_STATIC_DISPATCH(FUNC_NAME) N_NEON_WITHOUT_AES::FUNC_NAME |
87 | | #elif HWY_STATIC_TARGET == HWY_NEON |
88 | | #define HWY_STATIC_DISPATCH(FUNC_NAME) N_NEON::FUNC_NAME |
89 | | #elif HWY_STATIC_TARGET == HWY_NEON_BF16 |
90 | | #define HWY_STATIC_DISPATCH(FUNC_NAME) N_NEON_BF16::FUNC_NAME |
91 | | #elif HWY_STATIC_TARGET == HWY_SVE |
92 | | #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE::FUNC_NAME |
93 | | #elif HWY_STATIC_TARGET == HWY_SVE2 |
94 | | #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE2::FUNC_NAME |
95 | | #elif HWY_STATIC_TARGET == HWY_SVE_256 |
96 | | #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE_256::FUNC_NAME |
97 | | #elif HWY_STATIC_TARGET == HWY_SVE2_128 |
98 | | #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE2_128::FUNC_NAME |
99 | | #elif HWY_STATIC_TARGET == HWY_PPC8 |
100 | | #define HWY_STATIC_DISPATCH(FUNC_NAME) N_PPC8::FUNC_NAME |
101 | | #elif HWY_STATIC_TARGET == HWY_PPC9 |
102 | | #define HWY_STATIC_DISPATCH(FUNC_NAME) N_PPC9::FUNC_NAME |
103 | | #elif HWY_STATIC_TARGET == HWY_PPC10 |
104 | | #define HWY_STATIC_DISPATCH(FUNC_NAME) N_PPC10::FUNC_NAME |
105 | | #elif HWY_STATIC_TARGET == HWY_Z14 |
106 | | #define HWY_STATIC_DISPATCH(FUNC_NAME) N_Z14::FUNC_NAME |
107 | | #elif HWY_STATIC_TARGET == HWY_Z15 |
108 | | #define HWY_STATIC_DISPATCH(FUNC_NAME) N_Z15::FUNC_NAME |
109 | | #elif HWY_STATIC_TARGET == HWY_SSE2 |
110 | 94.0k | #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SSE2::FUNC_NAME |
111 | | #elif HWY_STATIC_TARGET == HWY_SSSE3 |
112 | | #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SSSE3::FUNC_NAME |
113 | | #elif HWY_STATIC_TARGET == HWY_SSE4 |
114 | | #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SSE4::FUNC_NAME |
115 | | #elif HWY_STATIC_TARGET == HWY_AVX2 |
116 | | #define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX2::FUNC_NAME |
117 | | #elif HWY_STATIC_TARGET == HWY_AVX3 |
118 | | #define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX3::FUNC_NAME |
119 | | #elif HWY_STATIC_TARGET == HWY_AVX3_DL |
120 | | #define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX3_DL::FUNC_NAME |
121 | | #elif HWY_STATIC_TARGET == HWY_AVX3_ZEN4 |
122 | | #define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX3_ZEN4::FUNC_NAME |
123 | | #elif HWY_STATIC_TARGET == HWY_AVX3_SPR |
124 | | #define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX3_SPR::FUNC_NAME |
125 | | #endif |
126 | | |
127 | | // HWY_CHOOSE_*(FUNC_NAME) expands to the function pointer for that target or |
128 | | // nullptr is that target was not compiled. |
129 | | #if HWY_TARGETS & HWY_EMU128 |
130 | | #define HWY_CHOOSE_FALLBACK(FUNC_NAME) &N_EMU128::FUNC_NAME |
131 | | #elif HWY_TARGETS & HWY_SCALAR |
132 | | #define HWY_CHOOSE_FALLBACK(FUNC_NAME) &N_SCALAR::FUNC_NAME |
133 | | #else |
134 | | // When HWY_SCALAR/HWY_EMU128 are not present and other targets were disabled at |
135 | | // runtime, fall back to the baseline with HWY_STATIC_DISPATCH(). |
136 | | #define HWY_CHOOSE_FALLBACK(FUNC_NAME) &HWY_STATIC_DISPATCH(FUNC_NAME) |
137 | | #endif |
138 | | |
139 | | #if HWY_TARGETS & HWY_WASM_EMU256 |
140 | | #define HWY_CHOOSE_WASM_EMU256(FUNC_NAME) &N_WASM_EMU256::FUNC_NAME |
141 | | #else |
142 | | #define HWY_CHOOSE_WASM_EMU256(FUNC_NAME) nullptr |
143 | | #endif |
144 | | |
145 | | #if HWY_TARGETS & HWY_WASM |
146 | | #define HWY_CHOOSE_WASM(FUNC_NAME) &N_WASM::FUNC_NAME |
147 | | #else |
148 | | #define HWY_CHOOSE_WASM(FUNC_NAME) nullptr |
149 | | #endif |
150 | | |
151 | | #if HWY_TARGETS & HWY_RVV |
152 | | #define HWY_CHOOSE_RVV(FUNC_NAME) &N_RVV::FUNC_NAME |
153 | | #else |
154 | | #define HWY_CHOOSE_RVV(FUNC_NAME) nullptr |
155 | | #endif |
156 | | |
157 | | #if HWY_TARGETS & HWY_NEON_WITHOUT_AES |
158 | | #define HWY_CHOOSE_NEON_WITHOUT_AES(FUNC_NAME) &N_NEON_WITHOUT_AES::FUNC_NAME |
159 | | #else |
160 | | #define HWY_CHOOSE_NEON_WITHOUT_AES(FUNC_NAME) nullptr |
161 | | #endif |
162 | | |
163 | | #if HWY_TARGETS & HWY_NEON |
164 | | #define HWY_CHOOSE_NEON(FUNC_NAME) &N_NEON::FUNC_NAME |
165 | | #else |
166 | | #define HWY_CHOOSE_NEON(FUNC_NAME) nullptr |
167 | | #endif |
168 | | |
169 | | #if HWY_TARGETS & HWY_NEON_BF16 |
170 | | #define HWY_CHOOSE_NEON_BF16(FUNC_NAME) &N_NEON_BF16::FUNC_NAME |
171 | | #else |
172 | | #define HWY_CHOOSE_NEON_BF16(FUNC_NAME) nullptr |
173 | | #endif |
174 | | |
175 | | #if HWY_TARGETS & HWY_SVE |
176 | | #define HWY_CHOOSE_SVE(FUNC_NAME) &N_SVE::FUNC_NAME |
177 | | #else |
178 | | #define HWY_CHOOSE_SVE(FUNC_NAME) nullptr |
179 | | #endif |
180 | | |
181 | | #if HWY_TARGETS & HWY_SVE2 |
182 | | #define HWY_CHOOSE_SVE2(FUNC_NAME) &N_SVE2::FUNC_NAME |
183 | | #else |
184 | | #define HWY_CHOOSE_SVE2(FUNC_NAME) nullptr |
185 | | #endif |
186 | | |
187 | | #if HWY_TARGETS & HWY_SVE_256 |
188 | | #define HWY_CHOOSE_SVE_256(FUNC_NAME) &N_SVE_256::FUNC_NAME |
189 | | #else |
190 | | #define HWY_CHOOSE_SVE_256(FUNC_NAME) nullptr |
191 | | #endif |
192 | | |
193 | | #if HWY_TARGETS & HWY_SVE2_128 |
194 | | #define HWY_CHOOSE_SVE2_128(FUNC_NAME) &N_SVE2_128::FUNC_NAME |
195 | | #else |
196 | | #define HWY_CHOOSE_SVE2_128(FUNC_NAME) nullptr |
197 | | #endif |
198 | | |
199 | | #if HWY_TARGETS & HWY_PPC8 |
200 | | #define HWY_CHOOSE_PPC8(FUNC_NAME) &N_PPC8::FUNC_NAME |
201 | | #else |
202 | | #define HWY_CHOOSE_PPC8(FUNC_NAME) nullptr |
203 | | #endif |
204 | | |
205 | | #if HWY_TARGETS & HWY_PPC9 |
206 | | #define HWY_CHOOSE_PPC9(FUNC_NAME) &N_PPC9::FUNC_NAME |
207 | | #else |
208 | | #define HWY_CHOOSE_PPC9(FUNC_NAME) nullptr |
209 | | #endif |
210 | | |
211 | | #if HWY_TARGETS & HWY_PPC10 |
212 | | #define HWY_CHOOSE_PPC10(FUNC_NAME) &N_PPC10::FUNC_NAME |
213 | | #else |
214 | | #define HWY_CHOOSE_PPC10(FUNC_NAME) nullptr |
215 | | #endif |
216 | | |
217 | | #if HWY_TARGETS & HWY_Z14 |
218 | | #define HWY_CHOOSE_Z14(FUNC_NAME) &N_Z14::FUNC_NAME |
219 | | #else |
220 | | #define HWY_CHOOSE_Z14(FUNC_NAME) nullptr |
221 | | #endif |
222 | | |
223 | | #if HWY_TARGETS & HWY_Z15 |
224 | | #define HWY_CHOOSE_Z15(FUNC_NAME) &N_Z15::FUNC_NAME |
225 | | #else |
226 | | #define HWY_CHOOSE_Z15(FUNC_NAME) nullptr |
227 | | #endif |
228 | | |
229 | | #if HWY_TARGETS & HWY_SSE2 |
230 | | #define HWY_CHOOSE_SSE2(FUNC_NAME) &N_SSE2::FUNC_NAME |
231 | | #else |
232 | | #define HWY_CHOOSE_SSE2(FUNC_NAME) nullptr |
233 | | #endif |
234 | | |
235 | | #if HWY_TARGETS & HWY_SSSE3 |
236 | | #define HWY_CHOOSE_SSSE3(FUNC_NAME) &N_SSSE3::FUNC_NAME |
237 | | #else |
238 | | #define HWY_CHOOSE_SSSE3(FUNC_NAME) nullptr |
239 | | #endif |
240 | | |
241 | | #if HWY_TARGETS & HWY_SSE4 |
242 | | #define HWY_CHOOSE_SSE4(FUNC_NAME) &N_SSE4::FUNC_NAME |
243 | | #else |
244 | | #define HWY_CHOOSE_SSE4(FUNC_NAME) nullptr |
245 | | #endif |
246 | | |
247 | | #if HWY_TARGETS & HWY_AVX2 |
248 | | #define HWY_CHOOSE_AVX2(FUNC_NAME) &N_AVX2::FUNC_NAME |
249 | | #else |
250 | | #define HWY_CHOOSE_AVX2(FUNC_NAME) nullptr |
251 | | #endif |
252 | | |
253 | | #if HWY_TARGETS & HWY_AVX3 |
254 | | #define HWY_CHOOSE_AVX3(FUNC_NAME) &N_AVX3::FUNC_NAME |
255 | | #else |
256 | | #define HWY_CHOOSE_AVX3(FUNC_NAME) nullptr |
257 | | #endif |
258 | | |
259 | | #if HWY_TARGETS & HWY_AVX3_DL |
260 | | #define HWY_CHOOSE_AVX3_DL(FUNC_NAME) &N_AVX3_DL::FUNC_NAME |
261 | | #else |
262 | | #define HWY_CHOOSE_AVX3_DL(FUNC_NAME) nullptr |
263 | | #endif |
264 | | |
265 | | #if HWY_TARGETS & HWY_AVX3_ZEN4 |
266 | | #define HWY_CHOOSE_AVX3_ZEN4(FUNC_NAME) &N_AVX3_ZEN4::FUNC_NAME |
267 | | #else |
268 | | #define HWY_CHOOSE_AVX3_ZEN4(FUNC_NAME) nullptr |
269 | | #endif |
270 | | |
271 | | #if HWY_TARGETS & HWY_AVX3_SPR |
272 | | #define HWY_CHOOSE_AVX3_SPR(FUNC_NAME) &N_AVX3_SPR::FUNC_NAME |
273 | | #else |
274 | | #define HWY_CHOOSE_AVX3_SPR(FUNC_NAME) nullptr |
275 | | #endif |
276 | | |
277 | | // MSVC 2017 workaround: the non-type template parameter to ChooseAndCall |
278 | | // apparently cannot be an array. Use a function pointer instead, which has the |
279 | | // disadvantage that we call the static (not best) target on the first call to |
280 | | // any HWY_DYNAMIC_DISPATCH. |
281 | | #if (HWY_COMPILER_MSVC && HWY_COMPILER_MSVC < 1915) || \ |
282 | | (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 700) |
283 | | #define HWY_DISPATCH_WORKAROUND 1 |
284 | | #else |
285 | | #define HWY_DISPATCH_WORKAROUND 0 |
286 | | #endif |
287 | | |
288 | | #if HWY_DISPATCH_MAP |
289 | | struct AllExports { |
290 | | template <class FuncPtr, class ExportsKey, uint64_t kHash> |
291 | | static const FuncPtr*& GetRefToExportsPtr() { |
292 | | static const FuncPtr* s_exports = nullptr; |
293 | | return s_exports; |
294 | | } |
295 | | }; |
296 | | #endif |
297 | | |
298 | | // Provides a static member function which is what is called during the first |
299 | | // HWY_DYNAMIC_DISPATCH, where GetIndex is still zero, and instantiations of |
300 | | // this function are the first entry in the tables created by HWY_EXPORT[_T]. |
301 | | template <typename RetType, typename... Args> |
302 | | struct FunctionCache { |
303 | | public: |
304 | | typedef RetType(FuncType)(Args...); |
305 | | using FuncPtr = FuncType*; |
306 | | |
307 | | // A template function that when instantiated has the same signature as the |
308 | | // function being called. This function initializes the bit array of targets |
309 | | // supported by the current CPU and then calls the appropriate entry within |
310 | | // the HWY_EXPORT table. Subsequent calls via HWY_DYNAMIC_DISPATCH to any |
311 | | // exported functions, even those defined by different translation units, |
312 | | // will dispatch directly to the best available target. |
313 | | #if HWY_DISPATCH_MAP |
314 | | template <class ExportsKey, uint64_t kHash> |
315 | | static RetType ChooseAndCall(Args... args) { |
316 | | ChosenTarget& chosen_target = GetChosenTarget(); |
317 | | chosen_target.Update(SupportedTargets()); |
318 | | |
319 | | const FuncPtr* table = AllExports::template GetRefToExportsPtr< |
320 | | FuncPtr, RemoveCvRef<ExportsKey>, kHash>(); |
321 | | HWY_ASSERT(table); |
322 | | |
323 | | return (table[chosen_target.GetIndex()])(args...); |
324 | | } |
325 | | |
326 | | #if !HWY_DISPATCH_WORKAROUND |
327 | | template <const FuncPtr* table> |
328 | | static RetType TableChooseAndCall(Args... args) { |
329 | | ChosenTarget& chosen_target = GetChosenTarget(); |
330 | | chosen_target.Update(SupportedTargets()); |
331 | | return (table[chosen_target.GetIndex()])(args...); |
332 | | } |
333 | | #endif // !HWY_DISPATCH_WORKAROUND |
334 | | |
335 | | #else // !HWY_DISPATCH_MAP: zero-overhead, but requires C++17 |
336 | | template <const FuncPtr* table> |
337 | 2 | static RetType ChooseAndCall(Args... args) { |
338 | 2 | ChosenTarget& chosen_target = GetChosenTarget(); |
339 | 2 | chosen_target.Update(SupportedTargets()); |
340 | 2 | return (table[chosen_target.GetIndex()])(args...); |
341 | 2 | } Unexecuted instantiation: enc_cluster.cc:_ZN3hwy13FunctionCacheIvJRKN3jxl9HistogramEEE13ChooseAndCallIXadsoKPFvS4_EL_ZNS1_L36HistogramEntropyHighwayDispatchTableEEEEEEvS4_ Unexecuted instantiation: enc_cluster.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNSt3__16vectorINS1_9HistogramENS3_9allocatorIS5_EEEEmPS8_PNS4_IjNS6_IjEEEEEE13ChooseAndCallIXadsoKPFS2_SA_mSB_SE_EL_ZNS1_L41FastClusterHistogramsHighwayDispatchTableEEEEEES2_SA_mSB_SE_ Unexecuted instantiation: enc_detect_dots.cc:_ZN3hwy13FunctionCacheIN3jxl8StatusOrINS1_5PlaneIfEEEEJRKNS1_6Image3IfEES9_PNS1_10ThreadPoolEEE13ChooseAndCallIXadsoKPFS5_S9_S9_SB_EL_ZNS1_L42SumOfSquareDifferencesHighwayDispatchTableEEEEEES5_S9_S9_SB_ Unexecuted instantiation: enc_convolve_separable5.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNS1_5PlaneIfEERKNS1_5RectTImEERKNS1_17WeightsSeparable5EPNS1_10ThreadPoolEPS4_EE13ChooseAndCallIXadsoKPFS2_S6_SA_SD_SF_SG_EL_ZNS1_L30Separable5HighwayDispatchTableEEEEEES2_S6_SA_SD_SF_SG_ Unexecuted instantiation: enc_xyb.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNS1_13ColorEncodingEfPKNS1_5PlaneIfEEPNS1_10ThreadPoolEPNS1_6Image3IfEERK15JxlCmsInterfaceSE_EE13ChooseAndCallIXadsoKPFS2_S5_fS9_SB_SE_SH_SE_EL_ZNS1_L25ToXYBHighwayDispatchTableEEEEEES2_S5_fS9_SB_SE_SH_SE_ Unexecuted instantiation: enc_xyb.cc:_ZN3hwy13FunctionCacheIvJPfS1_S1_PKfmEE13ChooseAndCallIXadsoKPFvS1_S1_S1_S3_mEL_ZN3jxlL37LinearRGBRowToXYBHighwayDispatchTableEEEEEEvS1_S1_S1_S3_m Unexecuted instantiation: enc_xyb.cc:_ZN3hwy13FunctionCacheIvJfPfEE13ChooseAndCallIXadsoKPFvfS1_EL_ZN3jxlL39ComputePremulAbsorbHighwayDispatchTableEEEEEEvfS1_ Unexecuted instantiation: enc_xyb.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNS1_5PlaneIfEES6_S6_PS4_S7_S7_PNS1_10ThreadPoolEEE13ChooseAndCallIXadsoKPFS2_S6_S6_S6_S7_S7_S7_S9_EL_ZNS1_L30RgbToYcbcrHighwayDispatchTableEEEEEES2_S6_S6_S6_S7_S7_S7_S9_ Unexecuted instantiation: butteraugli.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNS1_6Image3IfEERKNS1_17ButteraugliParamsEPS4_PNS1_8BlurTempESA_EE13ChooseAndCallIXadsoKPFS2_S6_S9_SA_SC_SA_EL_ZNS1_L38OpsinDynamicsImageHighwayDispatchTableEEEEEES2_S6_S9_SA_SC_SA_ Unexecuted instantiation: butteraugli.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJmmRKNS1_17ButteraugliParamsEPNS1_8BlurTempERKNS1_6Image3IfEERNS1_11PsychoImageEEE13ChooseAndCallIXadsoKPFS2_mmS5_S7_SB_SD_EL_ZNS1_L39SeparateFrequenciesHighwayDispatchTableEEEEEES2_mmS5_S7_SB_SD_ Unexecuted instantiation: butteraugli.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNS1_11PsychoImageES5_mmRKNS1_17ButteraugliParamsEPNS1_8BlurTempEPNS1_5PlaneIfEESD_EE13ChooseAndCallIXadsoKPFS2_S5_S5_mmS8_SA_SD_SD_EL_ZNS1_L35MaskPsychoImageHighwayDispatchTableEEEEEES2_S5_S5_mmS8_SA_SD_SD_ Unexecuted instantiation: butteraugli.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNS1_5PlaneIfEES6_dddPS4_S7_EE13ChooseAndCallIXadsoKPFS2_S6_S6_dddS7_S7_EL_ZNS1_L32MaltaDiffMapHighwayDispatchTableEEEEEES2_S6_S6_dddS7_S7_ Unexecuted instantiation: butteraugli.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNS1_5PlaneIfEES6_dddPS4_S7_EE13ChooseAndCallIXadsoKPFS2_S6_S6_dddS7_S7_EL_ZNS1_L34MaltaDiffMapLFHighwayDispatchTableEEEEEES2_S6_S6_dddS7_S7_ Unexecuted instantiation: butteraugli.cc:_ZN3hwy13FunctionCacheIvJRKN3jxl5PlaneIfEES5_ffPS3_EE13ChooseAndCallIXadsoKPFvS5_S5_ffS6_EL_ZNS1_L36L2DiffAsymmetricHighwayDispatchTableEEEEEEvS5_S5_ffS6_ Unexecuted instantiation: butteraugli.cc:_ZN3hwy13FunctionCacheIvJRKN3jxl5PlaneIfEES5_fPS3_EE13ChooseAndCallIXadsoKPFvS5_S5_fS6_EL_ZNS1_L26L2DiffHighwayDispatchTableEEEEEEvS5_S5_fS6_ Unexecuted instantiation: butteraugli.cc:_ZN3hwy13FunctionCacheIvJRKN3jxl5PlaneIfEES5_fPS3_EE13ChooseAndCallIXadsoKPFvS5_S5_fS6_EL_ZNS1_L29SetL2DiffHighwayDispatchTableEEEEEEvS5_S5_fS6_ Unexecuted instantiation: butteraugli.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNS1_5PlaneIfEERKNS1_6Image3IfEESA_fPS4_EE13ChooseAndCallIXadsoKPFS2_S6_SA_SA_fSB_EL_ZNS1_L44CombineChannelsToDiffmapHighwayDispatchTableEEEEEES2_S6_SA_SA_fSB_ Unexecuted instantiation: butteraugli.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRNS1_6Image3IfEES5_RKNS1_17ButteraugliParamsERNS1_5PlaneIfEEEE13ChooseAndCallIXadsoKPFS2_S5_S5_S8_SB_EL_ZNS1_L45ButteraugliDiffmapInPlaceHighwayDispatchTableEEEEEES2_S5_S5_S8_SB_ Unexecuted instantiation: enc_adaptive_quantization.cc:_ZN3hwy13FunctionCacheIN3jxl8StatusOrINS1_5PlaneIfEEEEJfRKNS1_6Image3IfEERKNS1_5RectTImEEfPNS1_10ThreadPoolEPS4_SG_EE13ChooseAndCallIXadsoKPFS5_fS9_SD_fSF_SG_SG_EL_ZNS1_L43AdaptiveQuantizationMapHighwayDispatchTableEEEEEES5_fS9_SD_fSF_SG_SG_ Unexecuted instantiation: enc_group.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJmPNS1_18PassesEncoderStateERKNS1_6Image3IfEERKNS1_5RectTImEEPS6_EE13ChooseAndCallIXadsoKPFS2_mS4_S8_SC_SD_EL_ZNS1_L39ComputeCoefficientsHighwayDispatchTableEEEEEES2_mS4_S8_SC_SD_ Unexecuted instantiation: enc_chroma_from_luma.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJP22JxlMemoryManagerStructmPNS1_5PlaneIfEEEE13ChooseAndCallIXadsoKPFS2_S4_mS7_EL_ZNS1_L33InitDCStorageHighwayDispatchTableEEEEEES2_S4_mS7_ Unexecuted instantiation: enc_chroma_from_luma.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNS1_6Image3IfEERKNS1_5RectTImEERKNS1_15DequantMatricesEPKNS1_15AcStrategyImageEPKNS1_5PlaneIiEEPKNS1_9QuantizerESA_bbPNSH_IaEESP_PNSH_IfEENS1_4SpanIfEEEE13ChooseAndCallIXadsoKPFS2_S6_SA_SD_SG_SK_SN_SA_bbSP_SP_SR_ST_EL_ZNS1_L31ComputeTileHighwayDispatchTableEEEEEES2_S6_SA_SD_SG_SK_SN_SA_bbSP_SP_SR_ST_ Unexecuted instantiation: enc_ac_strategy.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNS1_14CompressParamsERKNS1_9ACSConfigERKNS1_5RectTImEERKNS1_19ColorCorrelationMapEPfPjPNS1_15AcStrategyImageEEE13ChooseAndCallIXadsoKPFS2_S5_S8_SC_SF_SG_SH_SJ_EL_ZNS1_L34ProcessRectACSHighwayDispatchTableEEEEEES2_S5_S8_SC_SF_SG_SH_SJ_ Unexecuted instantiation: enc_entropy_coder.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJPKjRKNS1_5RectTImEEPrPKiRKNS1_15AcStrategyImageERKNS1_22YCbCrChromaSubsamplingEPNS1_6Image3IiEEPNSt3__16vectorINS1_5TokenENSM_9allocatorISO_EEEERKNS1_5PlaneIhEERKNST_IiEERKNS1_11BlockCtxMapEEE13ChooseAndCallIXadsoKPFS2_S4_S8_SC_SF_SI_SL_SS_SW_SZ_S12_EL_ZNS1_L40TokenizeCoefficientsHighwayDispatchTableEEEEEES2_S4_S8_SC_SF_SI_SL_SS_SW_SZ_S12_ Unexecuted instantiation: jxl_cms.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJPvmPKfPfmEE13ChooseAndCallIXadsoKPFS2_S3_mS5_S6_mEL_ZNS1_12_GLOBAL__N_141DoColorSpaceTransformHighwayDispatchTableEEEEEES2_S3_mS5_S6_m Unexecuted instantiation: enc_ma.cc:_ZN3hwy13FunctionCacheIvJRN3jxl11TreeSamplesEfRKNSt3__16vectorINS1_21ModularMultiplierInfoENS4_9allocatorIS6_EEEENS4_5arrayINSC_IjLm2EEELm2EEEfPNS5_INS1_20PropertyDecisionNodeENS7_ISF_EEEEEE13ChooseAndCallIXadsoKPFvS3_fSB_SE_fSI_EL_ZNS1_L33FindBestSplitHighwayDispatchTableEEEEEEvS3_fSB_SE_fSI_ Unexecuted instantiation: compressed_dc.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJP22JxlMemoryManagerStructPKfPNS1_6Image3IfEEPNS1_10ThreadPoolEEE13ChooseAndCallIXadsoKPFS2_S4_S6_S9_SB_EL_ZNS1_L39AdaptiveDCSmoothingHighwayDispatchTableEEEEEES2_S4_S6_S9_SB_ Unexecuted instantiation: compressed_dc.cc:_ZN3hwy13FunctionCacheIvJRKN3jxl5RectTImEEPNS1_6Image3IfEEPNS1_5PlaneIhEERKNS1_5ImageEPKffSG_RKNS1_22YCbCrChromaSubsamplingERKNS1_11BlockCtxMapEEE13ChooseAndCallIXadsoKPFvS5_S8_SB_SE_SG_fSG_SJ_SM_EL_ZNS1_L29DequantDCHighwayDispatchTableEEEEEEvS5_S8_SB_SE_SG_fSG_SJ_SM_ Unexecuted instantiation: convolve_symmetric5.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNS1_5PlaneIfEERKNS1_5RectTImEERKNS1_17WeightsSymmetric5EPNS1_10ThreadPoolEPS4_SA_EE13ChooseAndCallIXadsoKPFS2_S6_SA_SD_SF_SG_SA_EL_ZNS1_L30Symmetric5HighwayDispatchTableEEEEEES2_S6_SA_SD_SF_SG_SA_ Unexecuted instantiation: dec_external_image.cc:_ZN3hwy13FunctionCacheIvJPKfPNS_9float16_tEmEE13ChooseAndCallIXadsoKPFvS2_S4_mEL_ZN3jxlL30FloatToF16HighwayDispatchTableEEEEEEvS2_S4_m Unexecuted instantiation: dec_external_image.cc:_ZN3hwy13FunctionCacheIvJPKfPjmfmEE13ChooseAndCallIXadsoKPFvS2_S3_mfmEL_ZN3jxlL30FloatToU32HighwayDispatchTableEEEEEEvS2_S3_mfm Unexecuted instantiation: dec_group.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNS1_11FrameHeaderEPNS1_8GetBlockEPNS1_13GroupDecCacheEPNS1_18PassesDecoderStateEmmRNS1_19RenderPipelineInputEPNS1_4jpeg8JPEGDataENS1_8DrawModeEEE13ChooseAndCallIXadsoKPFS2_S5_S7_S9_SB_mmSD_SG_SH_EL_ZNS1_12_GLOBAL__N_135DecodeGroupImplHighwayDispatchTableEEEEEES2_S5_S7_S9_SB_mmSD_SG_SH_ Unexecuted instantiation: dec_modular.cc:_ZN3hwy13FunctionCacheIvJmPKifPfEE13ChooseAndCallIXadsoKPFvmS2_fS3_EL_ZN3jxlL36SingleFromSingleHighwayDispatchTableEEEEEEvmS2_fS3_ Unexecuted instantiation: dec_modular.cc:_ZN3hwy13FunctionCacheIvJmPKiS2_fPfEE13ChooseAndCallIXadsoKPFvmS2_S2_fS3_EL_ZN3jxlL31MultiplySumHighwayDispatchTableEEEEEEvmS2_S2_fS3_ Unexecuted instantiation: dec_modular.cc:_ZN3hwy13FunctionCacheIvJmPKifPfS3_S3_EE13ChooseAndCallIXadsoKPFvmS2_fS3_S3_S3_EL_ZN3jxlL33RgbFromSingleHighwayDispatchTableEEEEEEvmS2_fS3_S3_S3_ Unexecuted instantiation: dec_noise.cc:_ZN3hwy13FunctionCacheIvJmmmmRKNSt3__14pairIPN3jxl5PlaneIfEENS3_5RectTImEEEESB_SB_EE13ChooseAndCallIXadsoKPFvmmmmSB_SB_SB_EL_ZNS3_12_GLOBAL__N_133Random3PlanesHighwayDispatchTableEEEEEEvmmmmSB_SB_SB_ Unexecuted instantiation: dec_xyb.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNS1_6Image3IfEERKNS1_5RectTImEEPNS1_10ThreadPoolEPS4_RKNS1_11OpsinParamsEEE13ChooseAndCallIXadsoKPFS2_S6_SA_SC_SD_SG_EL_ZNS1_L33OpsinToLinearHighwayDispatchTableEEEEEES2_S6_SA_SC_SD_SG_ Unexecuted instantiation: squeeze.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRNS1_5ImageERKNSt3__16vectorINS1_13SqueezeParamsENS5_9allocatorIS7_EEEEPNS1_10ThreadPoolEEE13ChooseAndCallIXadsoKPFS2_S4_SC_SE_EL_ZNS1_L30InvSqueezeHighwayDispatchTableEEEEEES2_S4_SC_SE_ Unexecuted instantiation: rct.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRNS1_5ImageEmmPNS1_10ThreadPoolEEE13ChooseAndCallIXadsoKPFS2_S4_mmS6_EL_ZNS1_L26InvRCTHighwayDispatchTableEEEEEES2_S4_mmS6_ Unexecuted instantiation: quant_weights.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNS1_13QuantEncodingEPfS6_mNS1_10QuantTableEPmEE13ChooseAndCallIXadsoKPFS2_S5_S6_S6_mS7_S8_EL_ZNS1_12_GLOBAL__N_137ComputeQuantTableHighwayDispatchTableEEEEEES2_S5_S6_S6_mS7_S8_ Unexecuted instantiation: stage_blending.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJRKNS3_11FrameHeaderEPKNS3_18PassesDecoderStateERKNS3_13ColorEncodingEEE13ChooseAndCallIXadsoKPFS7_SA_SD_SG_EL_ZNS3_L36GetBlendingStageHighwayDispatchTableEEEEEES7_SA_SD_SG_ Unexecuted instantiation: stage_chroma_upsampling.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJmbEE13ChooseAndCallIXadsoKPFS7_mbEL_ZNS3_L44GetChromaUpsamplingStageHighwayDispatchTableEEEEEES7_mb Unexecuted instantiation: stage_cms.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJRKNS3_18OutputEncodingInfoEEE13ChooseAndCallIXadsoKPFS7_SA_EL_ZNS3_L31GetCmsStageHighwayDispatchTableEEEEEES7_SA_ Unexecuted instantiation: stage_epf.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJRKNS3_10LoopFilterERKNS3_5PlaneIfEEEE13ChooseAndCallIXadsoKPFS7_SA_SE_EL_ZNS3_L32GetEPFStage0HighwayDispatchTableEEEEEES7_SA_SE_ Unexecuted instantiation: stage_epf.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJRKNS3_10LoopFilterERKNS3_5PlaneIfEEEE13ChooseAndCallIXadsoKPFS7_SA_SE_EL_ZNS3_L32GetEPFStage1HighwayDispatchTableEEEEEES7_SA_SE_ Unexecuted instantiation: stage_epf.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJRKNS3_10LoopFilterERKNS3_5PlaneIfEEEE13ChooseAndCallIXadsoKPFS7_SA_SE_EL_ZNS3_L32GetEPFStage2HighwayDispatchTableEEEEEES7_SA_SE_ Unexecuted instantiation: stage_from_linear.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJRKNS3_18OutputEncodingInfoEEE13ChooseAndCallIXadsoKPFS7_SA_EL_ZNS3_L38GetFromLinearStageHighwayDispatchTableEEEEEES7_SA_ Unexecuted instantiation: stage_gaborish.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJRKNS3_10LoopFilterEEE13ChooseAndCallIXadsoKPFS7_SA_EL_ZNS3_L36GetGaborishStageHighwayDispatchTableEEEEEES7_SA_ Unexecuted instantiation: stage_noise.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJRKNS3_11NoiseParamsERKNS3_16ColorCorrelationEmEE13ChooseAndCallIXadsoKPFS7_SA_SD_mEL_ZNS3_L36GetAddNoiseStageHighwayDispatchTableEEEEEES7_SA_SD_m Unexecuted instantiation: stage_noise.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJmEE13ChooseAndCallIXadsoKPFS7_mEL_ZNS3_L41GetConvolveNoiseStageHighwayDispatchTableEEEEEES7_m Unexecuted instantiation: stage_splines.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJPKNS3_7SplinesEEE13ChooseAndCallIXadsoKPFS7_SA_EL_ZNS3_L34GetSplineStageHighwayDispatchTableEEEEEES7_SA_ Unexecuted instantiation: stage_to_linear.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJRKNS3_18OutputEncodingInfoEEE13ChooseAndCallIXadsoKPFS7_SA_EL_ZNS3_L36GetToLinearStageHighwayDispatchTableEEEEEES7_SA_ Unexecuted instantiation: stage_tone_mapping.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJRKNS3_18OutputEncodingInfoEEE13ChooseAndCallIXadsoKPFS7_SA_EL_ZNS3_L39GetToneMappingStageHighwayDispatchTableEEEEEES7_SA_ Unexecuted instantiation: stage_upsampling.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJRKNS3_19CustomTransformDataEmmEE13ChooseAndCallIXadsoKPFS7_SA_mmEL_ZNS3_L38GetUpsamplingStageHighwayDispatchTableEEEEEES7_SA_mm Unexecuted instantiation: stage_write.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJRKNS3_11ImageOutputEmmbbmNS3_11OrientationERNS1_6vectorIS8_NS1_9allocatorIS8_EEEEP22JxlMemoryManagerStructEE13ChooseAndCallIXadsoKPFS7_SA_mmbbmSB_SG_SI_EL_ZNS3_L41GetWriteToOutputStageHighwayDispatchTableEEEEEES7_SA_mmbbmSB_SG_SI_ Unexecuted instantiation: stage_xyb.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJRKNS3_18OutputEncodingInfoEEE13ChooseAndCallIXadsoKPFS7_SA_EL_ZNS3_L31GetXYBStageHighwayDispatchTableEEEEEES7_SA_ Unexecuted instantiation: stage_ycbcr.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJEE13ChooseAndCallIXadsoKPFS7_vEL_ZNS3_L33GetYCbCrStageHighwayDispatchTableEEEEEES7_v simd_util.cc:_ZN3hwy13FunctionCacheImJEE13ChooseAndCallIXadsoKPFmvEL_ZN3jxlL33MaxVectorSizeHighwayDispatchTableEEEEEEmv Line | Count | Source | 337 | 2 | static RetType ChooseAndCall(Args... args) { | 338 | 2 | ChosenTarget& chosen_target = GetChosenTarget(); | 339 | 2 | chosen_target.Update(SupportedTargets()); | 340 | 2 | return (table[chosen_target.GetIndex()])(args...); | 341 | 2 | } |
Unexecuted instantiation: splines.cc:_ZN3hwy13FunctionCacheIvJRKN3jxl6SplineERKNSt3__16vectorINS5_4pairINS2_5PointEfEENS5_9allocatorIS9_EEEEfRNS6_INS1_13SplineSegmentENSA_ISF_EEEERNS6_INS7_ImmEENSA_ISJ_EEEEEE13ChooseAndCallIXadsoKPFvS4_SE_fSI_SM_EL_ZNS1_L38SegmentsFromPointsHighwayDispatchTableEEEEEEvS4_SE_fSI_SM_ Unexecuted instantiation: splines.cc:_ZN3hwy13FunctionCacheIvJPfS1_S1_mmmbPKN3jxl13SplineSegmentEPKmS7_EE13ChooseAndCallIXadsoKPFvS1_S1_S1_mmmbS5_S7_S7_EL_ZNS2_L32DrawSegmentsHighwayDispatchTableEEEEEEvS1_S1_S1_mmmbS5_S7_S7_ |
342 | | #endif // HWY_DISPATCH_MAP |
343 | | }; |
344 | | |
345 | | // Used to deduce the template parameters RetType and Args from a function. |
346 | | template <typename RetType, typename... Args> |
347 | | FunctionCache<RetType, Args...> DeduceFunctionCache(RetType (*)(Args...)) { |
348 | | return FunctionCache<RetType, Args...>(); |
349 | | } |
350 | | |
351 | | #define HWY_DISPATCH_TABLE(FUNC_NAME) \ |
352 | 8.45M | HWY_CONCAT(FUNC_NAME, HighwayDispatchTable) |
353 | | |
354 | | // HWY_EXPORT(FUNC_NAME); expands to a static array that is used by |
355 | | // HWY_DYNAMIC_DISPATCH() to call the appropriate function at runtime. |
356 | | // After being exported, it can be called from other parts of the same source |
357 | | // file using HWY_DYNAMIC_DISPATCH(), in particular from a function wrapper |
358 | | // like in the following example: |
359 | | // |
360 | | // #include "hwy/highway.h" |
361 | | // HWY_BEFORE_NAMESPACE(); |
362 | | // namespace skeleton { |
363 | | // namespace HWY_NAMESPACE { |
364 | | // |
365 | | // void MyFunction(int a, char b, const char* c) { ... } |
366 | | // |
367 | | // // NOLINTNEXTLINE(google-readability-namespace-comments) |
368 | | // } // namespace HWY_NAMESPACE |
369 | | // } // namespace skeleton |
370 | | // HWY_AFTER_NAMESPACE(); |
371 | | // |
372 | | // namespace skeleton { |
373 | | // HWY_EXPORT(MyFunction); // Defines the dispatch table in this scope. |
374 | | // |
375 | | // void MyFunction(int a, char b, const char* c) { |
376 | | // return HWY_DYNAMIC_DISPATCH(MyFunction)(a, b, c); |
377 | | // } |
378 | | // } // namespace skeleton |
379 | | // |
380 | | // For templated code with a single type parameter, instead use HWY_EXPORT_T and |
381 | | // its HWY_DYNAMIC_DISPATCH_T counterpart: |
382 | | // |
383 | | // template <typename T> |
384 | | // void MyFunctionCaller(T ...) { |
385 | | // // First argument to both HWY_EXPORT_T and HWY_DYNAMIC_DISPATCH_T is an |
386 | | // // arbitrary table name; you must provide the same name for each call. |
387 | | // // It is fine to have multiple HWY_EXPORT_T in a function, but a 64-bit |
388 | | // // FNV hash collision among *any* table names will trigger HWY_ABORT. |
389 | | // HWY_EXPORT_T(Table1, MyFunction<T>) |
390 | | // HWY_DYNAMIC_DISPATCH_T(Table1)(a, b, c); |
391 | | // } |
392 | | // |
393 | | // Note that HWY_EXPORT_T must be invoked inside a template (in the above |
394 | | // example: `MyFunctionCaller`), so that a separate table will be created for |
395 | | // each template instantiation. For convenience, we also provide a macro that |
396 | | // combines both steps and avoids the need to pick a table name: |
397 | | // |
398 | | // template <typename T> |
399 | | // void MyFunctionCaller(T ...) { |
400 | | // // Table name is automatically chosen. Note that this variant must be |
401 | | // // called in statement context; it is not a valid expression. |
402 | | // HWY_EXPORT_AND_DYNAMIC_DISPATCH_T(MyFunction<T>)(a, b, c); |
403 | | // } |
404 | | |
405 | | // Simplified version for IDE or the dynamic dispatch case with only one target. |
406 | | #if HWY_IDE || ((HWY_TARGETS & (HWY_TARGETS - 1)) == 0) |
407 | | |
408 | | // We use a table to provide the same compile error conditions as with the |
409 | | // non-simplified case, but the table only has a single entry. |
410 | | #define HWY_EXPORT_T(TABLE_NAME, FUNC_NAME) \ |
411 | | HWY_MAYBE_UNUSED static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const \ |
412 | | HWY_DISPATCH_TABLE(TABLE_NAME)[1] = {&HWY_STATIC_DISPATCH(FUNC_NAME)} |
413 | | |
414 | | // Use the table, not just STATIC_DISPATCH as in DYNAMIC_DISPATCH, because |
415 | | // TABLE_NAME might not match the function name. |
416 | | #define HWY_DYNAMIC_POINTER_T(TABLE_NAME) (HWY_DISPATCH_TABLE(TABLE_NAME)[0]) |
417 | | #define HWY_DYNAMIC_DISPATCH_T(TABLE_NAME) \ |
418 | | (*(HWY_DYNAMIC_POINTER_T(TABLE_NAME))) |
419 | | |
420 | | #define HWY_EXPORT(FUNC_NAME) HWY_EXPORT_T(FUNC_NAME, FUNC_NAME) |
421 | | #define HWY_DYNAMIC_POINTER(FUNC_NAME) &HWY_STATIC_DISPATCH(FUNC_NAME) |
422 | | #define HWY_DYNAMIC_DISPATCH(FUNC_NAME) HWY_STATIC_DISPATCH(FUNC_NAME) |
423 | | |
424 | | #else // not simplified: full table |
425 | | |
426 | | // Pre-C++17 workaround: non-type template arguments must have linkage, which |
427 | | // means we cannot pass &table as a template argument to ChooseAndCall. |
428 | | // ChooseAndCall must find a way to access the table in order to dispatch to the |
429 | | // chosen target: |
430 | | // 0) Skipping this by dispatching to the static target would be surprising to |
431 | | // users and may have serious performance implications. |
432 | | // 1) An extra function parameter would be unacceptable because it changes the |
433 | | // user-visible function signature. |
434 | | // 2) Declaring a table, then defining a pointer to it would work, but requires |
435 | | // an additional DECLARE step outside the function so that the pointer has |
436 | | // linkage, which breaks existing code. |
437 | | // 3) We instead associate the function with the table using an instance of an |
438 | | // unnamed struct and the hash of the table name as the key. Because |
439 | | // ChooseAndCall has the type information, it can then cast to the function |
440 | | // pointer type. However, we cannot simply pass the name as a template |
441 | | // argument to ChooseAndCall because this requires char*, which hits the same |
442 | | // linkage problem. We instead hash the table name, which assumes the |
443 | | // function names do not have collisions. |
444 | | #if HWY_DISPATCH_MAP |
445 | | |
446 | | static constexpr uint64_t FNV(const char* name) { |
447 | | return *name ? static_cast<uint64_t>(static_cast<uint8_t>(*name)) ^ |
448 | | (0x100000001b3ULL * FNV(name + 1)) |
449 | | : 0xcbf29ce484222325ULL; |
450 | | } |
451 | | |
452 | | template <uint64_t kHash> |
453 | | struct AddExport { |
454 | | template <class ExportsKey, class FuncPtr> |
455 | | AddExport(ExportsKey /*exports_key*/, const char* table_name, |
456 | | const FuncPtr* table) { |
457 | | using FuncCache = decltype(DeduceFunctionCache(hwy::DeclVal<FuncPtr>())); |
458 | | static_assert( |
459 | | hwy::IsSame<RemoveCvRef<FuncPtr>, typename FuncCache::FuncPtr>(), |
460 | | "FuncPtr should be same type as FuncCache::FuncPtr"); |
461 | | |
462 | | const FuncPtr*& exports_ptr = AllExports::template GetRefToExportsPtr< |
463 | | RemoveCvRef<FuncPtr>, RemoveCvRef<ExportsKey>, kHash>(); |
464 | | if (exports_ptr && exports_ptr != table) { |
465 | | HWY_ABORT("Hash collision for %s, rename the function\n", table_name); |
466 | | } else { |
467 | | exports_ptr = table; |
468 | | } |
469 | | } |
470 | | }; |
471 | | |
472 | | // Dynamic dispatch: defines table of function pointers. This must be invoked |
473 | | // from inside the function template that calls the template we are exporting. |
474 | | // TABLE_NAME must match the one passed to HWY_DYNAMIC_DISPATCH_T. This |
475 | | // argument allows multiple exports within one function. |
476 | | #define HWY_EXPORT_T(TABLE_NAME, FUNC_NAME) \ |
477 | | static const struct { \ |
478 | | } HWY_CONCAT(TABLE_NAME, HighwayDispatchExportsKey) = {}; \ |
479 | | static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const HWY_DISPATCH_TABLE( \ |
480 | | TABLE_NAME)[static_cast<size_t>(HWY_MAX_DYNAMIC_TARGETS + 2)] = { \ |
481 | | /* The first entry in the table initializes the global cache and \ |
482 | | * calls the appropriate function. */ \ |
483 | | &decltype(hwy::DeduceFunctionCache(&HWY_STATIC_DISPATCH(FUNC_NAME))):: \ |
484 | | template ChooseAndCall<decltype(HWY_CONCAT( \ |
485 | | TABLE_NAME, HighwayDispatchExportsKey)), \ |
486 | | hwy::FNV(#TABLE_NAME)>, \ |
487 | | HWY_CHOOSE_TARGET_LIST(FUNC_NAME), \ |
488 | | HWY_CHOOSE_FALLBACK(FUNC_NAME), \ |
489 | | }; \ |
490 | | HWY_MAYBE_UNUSED static hwy::AddExport<hwy::FNV(#TABLE_NAME)> HWY_CONCAT( \ |
491 | | HighwayAddTable, __LINE__)( \ |
492 | | HWY_CONCAT(TABLE_NAME, HighwayDispatchExportsKey), #TABLE_NAME, \ |
493 | | HWY_DISPATCH_TABLE(TABLE_NAME)) |
494 | | |
495 | | // For non-template functions. Not necessarily invoked within a function, hence |
496 | | // we derive the string and variable names from FUNC_NAME, not HWY_FUNCTION. |
497 | | #if HWY_DISPATCH_WORKAROUND |
498 | | #define HWY_EXPORT(FUNC_NAME) HWY_EXPORT_T(FUNC_NAME, FUNC_NAME) |
499 | | #else |
500 | | #define HWY_EXPORT(FUNC_NAME) \ |
501 | | static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const HWY_DISPATCH_TABLE( \ |
502 | | FUNC_NAME)[static_cast<size_t>(HWY_MAX_DYNAMIC_TARGETS + 2)] = { \ |
503 | | /* The first entry in the table initializes the global cache and \ |
504 | | * calls the appropriate function. */ \ |
505 | | &decltype(hwy::DeduceFunctionCache(&HWY_STATIC_DISPATCH(FUNC_NAME))):: \ |
506 | | template TableChooseAndCall<HWY_DISPATCH_TABLE(FUNC_NAME)>, \ |
507 | | HWY_CHOOSE_TARGET_LIST(FUNC_NAME), \ |
508 | | HWY_CHOOSE_FALLBACK(FUNC_NAME), \ |
509 | | } |
510 | | #endif // HWY_DISPATCH_WORKAROUND |
511 | | |
512 | | #else // !HWY_DISPATCH_MAP |
513 | | |
514 | | // Zero-overhead, but requires C++17 for non-type template arguments without |
515 | | // linkage, because HWY_EXPORT_T tables are local static variables. |
516 | | #define HWY_EXPORT_T(TABLE_NAME, FUNC_NAME) \ |
517 | | static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const HWY_DISPATCH_TABLE( \ |
518 | | TABLE_NAME)[static_cast<size_t>(HWY_MAX_DYNAMIC_TARGETS + 2)] = { \ |
519 | | /* The first entry in the table initializes the global cache and \ |
520 | | * calls the appropriate function. */ \ |
521 | | &decltype(hwy::DeduceFunctionCache(&HWY_STATIC_DISPATCH(FUNC_NAME))):: \ |
522 | | template ChooseAndCall<HWY_DISPATCH_TABLE(TABLE_NAME)>, \ |
523 | | HWY_CHOOSE_TARGET_LIST(FUNC_NAME), \ |
524 | | HWY_CHOOSE_FALLBACK(FUNC_NAME), \ |
525 | | } |
526 | | |
527 | | #define HWY_EXPORT(FUNC_NAME) HWY_EXPORT_T(FUNC_NAME, FUNC_NAME) |
528 | | |
529 | | #endif // HWY_DISPATCH_MAP |
530 | | |
531 | | // HWY_DISPATCH_MAP only affects how tables are created, not their usage. |
532 | | |
533 | | // Evaluates to the function pointer for the chosen target. |
534 | | #define HWY_DYNAMIC_POINTER(FUNC_NAME) \ |
535 | 8.45M | (HWY_DISPATCH_TABLE(FUNC_NAME)[hwy::GetChosenTarget().GetIndex()]) |
536 | | |
537 | | // Calls the function pointer for the chosen target. |
538 | 8.45M | #define HWY_DYNAMIC_DISPATCH(FUNC_NAME) (*(HWY_DYNAMIC_POINTER(FUNC_NAME))) |
539 | | |
540 | | // Same as DISPATCH, but provide a different arg name to clarify usage. |
541 | | #define HWY_DYNAMIC_DISPATCH_T(TABLE_NAME) HWY_DYNAMIC_DISPATCH(TABLE_NAME) |
542 | | #define HWY_DYNAMIC_POINTER_T(TABLE_NAME) HWY_DYNAMIC_POINTER(TABLE_NAME) |
543 | | |
544 | | #endif // HWY_IDE || ((HWY_TARGETS & (HWY_TARGETS - 1)) == 0) |
545 | | |
546 | | // Returns the name of an anonymous dispatch table that is only shared with |
547 | | // macro invocations coming from the same source line. |
548 | | #define HWY_DISPATCH_TABLE_T() HWY_CONCAT(HighwayDispatchTableT, __LINE__) |
549 | | |
550 | | // For templated code, combines export and dispatch using an anonymous table. |
551 | | #define HWY_EXPORT_AND_DYNAMIC_DISPATCH_T(FUNC_NAME) \ |
552 | | HWY_EXPORT_T(HWY_DISPATCH_TABLE_T(), FUNC_NAME); \ |
553 | | HWY_DYNAMIC_DISPATCH_T(HWY_DISPATCH_TABLE_T()) |
554 | | |
555 | | // DEPRECATED names; please use HWY_HAVE_* instead. |
556 | | #define HWY_CAP_INTEGER64 HWY_HAVE_INTEGER64 |
557 | | #define HWY_CAP_FLOAT16 HWY_HAVE_FLOAT16 |
558 | | #define HWY_CAP_FLOAT64 HWY_HAVE_FLOAT64 |
559 | | |
560 | | } // namespace hwy |
561 | | |
562 | | #endif // HWY_HIGHWAY_INCLUDED |
563 | | |
564 | | //------------------------------------------------------------------------------ |
565 | | |
566 | | // NOTE: the following definitions and ops/*.h depend on HWY_TARGET, so we want |
567 | | // to include them once per target, which is ensured by the toggle check. |
568 | | // Because ops/*.h are included under it, they do not need their own guard. |
569 | | #if defined(HWY_HIGHWAY_PER_TARGET) == defined(HWY_TARGET_TOGGLE) |
570 | | #ifdef HWY_HIGHWAY_PER_TARGET |
571 | | #undef HWY_HIGHWAY_PER_TARGET |
572 | | #else |
573 | | #define HWY_HIGHWAY_PER_TARGET |
574 | | #endif |
575 | | |
576 | | // These define ops inside namespace hwy::HWY_NAMESPACE. |
577 | | #if HWY_TARGET == HWY_SSE2 || HWY_TARGET == HWY_SSSE3 || HWY_TARGET == HWY_SSE4 |
578 | | #include "hwy/ops/x86_128-inl.h" |
579 | | #elif HWY_TARGET == HWY_AVX2 |
580 | | #include "hwy/ops/x86_256-inl.h" |
581 | | #elif HWY_TARGET == HWY_AVX3 || HWY_TARGET == HWY_AVX3_DL || \ |
582 | | HWY_TARGET == HWY_AVX3_ZEN4 || HWY_TARGET == HWY_AVX3_SPR |
583 | | #include "hwy/ops/x86_512-inl.h" |
584 | | #elif HWY_TARGET == HWY_Z14 || HWY_TARGET == HWY_Z15 || \ |
585 | | (HWY_TARGET & HWY_ALL_PPC) |
586 | | #include "hwy/ops/ppc_vsx-inl.h" |
587 | | #elif HWY_TARGET & HWY_ALL_NEON |
588 | | #include "hwy/ops/arm_neon-inl.h" |
589 | | #elif HWY_TARGET & HWY_ALL_SVE |
590 | | #include "hwy/ops/arm_sve-inl.h" |
591 | | #elif HWY_TARGET == HWY_WASM_EMU256 |
592 | | #include "hwy/ops/wasm_256-inl.h" |
593 | | #elif HWY_TARGET == HWY_WASM |
594 | | #include "hwy/ops/wasm_128-inl.h" |
595 | | #elif HWY_TARGET == HWY_RVV |
596 | | #include "hwy/ops/rvv-inl.h" |
597 | | #elif HWY_TARGET == HWY_EMU128 |
598 | | #include "hwy/ops/emu128-inl.h" |
599 | | #elif HWY_TARGET == HWY_SCALAR |
600 | | #include "hwy/ops/scalar-inl.h" |
601 | | #else |
602 | | #pragma message("HWY_TARGET does not match any known target") |
603 | | #endif // HWY_TARGET |
604 | | |
605 | | #include "hwy/ops/generic_ops-inl.h" |
606 | | |
607 | | #endif // HWY_HIGHWAY_PER_TARGET |