Coverage Report

Created: 2025-06-16 07:00

/src/libjxl/third_party/highway/hwy/highway.h
Line
Count
Source
1
// Copyright 2020 Google LLC
2
// SPDX-License-Identifier: Apache-2.0
3
//
4
// Licensed under the Apache License, Version 2.0 (the "License");
5
// you may not use this file except in compliance with the License.
6
// You may obtain a copy of the License at
7
//
8
//      http://www.apache.org/licenses/LICENSE-2.0
9
//
10
// Unless required by applicable law or agreed to in writing, software
11
// distributed under the License is distributed on an "AS IS" BASIS,
12
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
// See the License for the specific language governing permissions and
14
// limitations under the License.
15
16
// Main header required before using vector types.
17
18
// IWYU pragma: begin_exports
19
#include "hwy/base.h"
20
#include "hwy/detect_compiler_arch.h"
21
#include "hwy/detect_targets.h"
22
#include "hwy/highway_export.h"
23
#include "hwy/targets.h"
24
// IWYU pragma: end_exports
25
26
#if HWY_CXX_LANG < 201703L
27
#define HWY_DISPATCH_MAP 1
28
#else
29
#define HWY_DISPATCH_MAP 0
30
#endif
31
32
// This include guard is checked by foreach_target, so avoid the usual _H_
33
// suffix to prevent copybara from renaming it. NOTE: ops/*-inl.h are included
34
// after/outside this include guard.
35
#ifndef HWY_HIGHWAY_INCLUDED
36
#define HWY_HIGHWAY_INCLUDED
37
38
namespace hwy {
39
40
//------------------------------------------------------------------------------
41
// Shorthand for tags (defined in shared-inl.h) used to select overloads.
42
// Note that ScalableTag<T> is preferred over HWY_FULL, and CappedTag<T, N> over
43
// HWY_CAPPED(T, N).
44
45
// HWY_FULL(T[,LMUL=1]) is a native vector/group. LMUL is the number of
46
// registers in the group, and is ignored on targets that do not support groups.
47
11.1M
#define HWY_FULL1(T) hwy::HWY_NAMESPACE::ScalableTag<T>
48
#define HWY_FULL2(T, LMUL) \
49
  hwy::HWY_NAMESPACE::ScalableTag<T, hwy::CeilLog2(HWY_MAX(0, LMUL))>
50
11.1M
#define HWY_3TH_ARG(arg1, arg2, arg3, ...) arg3
51
// Workaround for MSVC grouping __VA_ARGS__ into a single argument
52
11.1M
#define HWY_FULL_RECOMPOSER(args_with_paren) HWY_3TH_ARG args_with_paren
53
// Trailing comma avoids -pedantic false alarm
54
#define HWY_CHOOSE_FULL(...) \
55
11.1M
  HWY_FULL_RECOMPOSER((__VA_ARGS__, HWY_FULL2, HWY_FULL1, ))
56
11.1M
#define HWY_FULL(...) HWY_CHOOSE_FULL(__VA_ARGS__())(__VA_ARGS__)
57
58
// Vector of up to MAX_N lanes. It's better to use full vectors where possible.
59
53.4M
#define HWY_CAPPED(T, MAX_N) hwy::HWY_NAMESPACE::CappedTag<T, MAX_N>
60
61
//------------------------------------------------------------------------------
62
// Export user functions for static/dynamic dispatch
63
64
// Evaluates to 0 inside a translation unit if it is generating anything but the
65
// static target (the last one if multiple targets are enabled). Used to prevent
66
// redefinitions of HWY_EXPORT. Unless foreach_target.h is included, we only
67
// compile once anyway, so this is 1 unless it is or has been included.
68
#ifndef HWY_ONCE
69
#define HWY_ONCE 1
70
#endif
71
72
// HWY_STATIC_DISPATCH(FUNC_NAME) is the namespace-qualified FUNC_NAME for
73
// HWY_STATIC_TARGET (the only defined namespace unless HWY_TARGET_INCLUDE is
74
// defined), and can be used to deduce the return type of Choose*.
75
#if HWY_STATIC_TARGET == HWY_SCALAR
76
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SCALAR::FUNC_NAME
77
#elif HWY_STATIC_TARGET == HWY_EMU128
78
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_EMU128::FUNC_NAME
79
#elif HWY_STATIC_TARGET == HWY_RVV
80
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_RVV::FUNC_NAME
81
#elif HWY_STATIC_TARGET == HWY_WASM_EMU256
82
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_WASM_EMU256::FUNC_NAME
83
#elif HWY_STATIC_TARGET == HWY_WASM
84
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_WASM::FUNC_NAME
85
#elif HWY_STATIC_TARGET == HWY_NEON_WITHOUT_AES
86
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_NEON_WITHOUT_AES::FUNC_NAME
87
#elif HWY_STATIC_TARGET == HWY_NEON
88
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_NEON::FUNC_NAME
89
#elif HWY_STATIC_TARGET == HWY_NEON_BF16
90
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_NEON_BF16::FUNC_NAME
91
#elif HWY_STATIC_TARGET == HWY_SVE
92
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE::FUNC_NAME
93
#elif HWY_STATIC_TARGET == HWY_SVE2
94
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE2::FUNC_NAME
95
#elif HWY_STATIC_TARGET == HWY_SVE_256
96
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE_256::FUNC_NAME
97
#elif HWY_STATIC_TARGET == HWY_SVE2_128
98
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE2_128::FUNC_NAME
99
#elif HWY_STATIC_TARGET == HWY_PPC8
100
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_PPC8::FUNC_NAME
101
#elif HWY_STATIC_TARGET == HWY_PPC9
102
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_PPC9::FUNC_NAME
103
#elif HWY_STATIC_TARGET == HWY_PPC10
104
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_PPC10::FUNC_NAME
105
#elif HWY_STATIC_TARGET == HWY_Z14
106
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_Z14::FUNC_NAME
107
#elif HWY_STATIC_TARGET == HWY_Z15
108
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_Z15::FUNC_NAME
109
#elif HWY_STATIC_TARGET == HWY_SSE2
110
94.0k
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SSE2::FUNC_NAME
111
#elif HWY_STATIC_TARGET == HWY_SSSE3
112
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SSSE3::FUNC_NAME
113
#elif HWY_STATIC_TARGET == HWY_SSE4
114
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SSE4::FUNC_NAME
115
#elif HWY_STATIC_TARGET == HWY_AVX2
116
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX2::FUNC_NAME
117
#elif HWY_STATIC_TARGET == HWY_AVX3
118
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX3::FUNC_NAME
119
#elif HWY_STATIC_TARGET == HWY_AVX3_DL
120
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX3_DL::FUNC_NAME
121
#elif HWY_STATIC_TARGET == HWY_AVX3_ZEN4
122
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX3_ZEN4::FUNC_NAME
123
#elif HWY_STATIC_TARGET == HWY_AVX3_SPR
124
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX3_SPR::FUNC_NAME
125
#endif
126
127
// HWY_CHOOSE_*(FUNC_NAME) expands to the function pointer for that target or
128
// nullptr is that target was not compiled.
129
#if HWY_TARGETS & HWY_EMU128
130
#define HWY_CHOOSE_FALLBACK(FUNC_NAME) &N_EMU128::FUNC_NAME
131
#elif HWY_TARGETS & HWY_SCALAR
132
#define HWY_CHOOSE_FALLBACK(FUNC_NAME) &N_SCALAR::FUNC_NAME
133
#else
134
// When HWY_SCALAR/HWY_EMU128 are not present and other targets were disabled at
135
// runtime, fall back to the baseline with HWY_STATIC_DISPATCH().
136
#define HWY_CHOOSE_FALLBACK(FUNC_NAME) &HWY_STATIC_DISPATCH(FUNC_NAME)
137
#endif
138
139
#if HWY_TARGETS & HWY_WASM_EMU256
140
#define HWY_CHOOSE_WASM_EMU256(FUNC_NAME) &N_WASM_EMU256::FUNC_NAME
141
#else
142
#define HWY_CHOOSE_WASM_EMU256(FUNC_NAME) nullptr
143
#endif
144
145
#if HWY_TARGETS & HWY_WASM
146
#define HWY_CHOOSE_WASM(FUNC_NAME) &N_WASM::FUNC_NAME
147
#else
148
#define HWY_CHOOSE_WASM(FUNC_NAME) nullptr
149
#endif
150
151
#if HWY_TARGETS & HWY_RVV
152
#define HWY_CHOOSE_RVV(FUNC_NAME) &N_RVV::FUNC_NAME
153
#else
154
#define HWY_CHOOSE_RVV(FUNC_NAME) nullptr
155
#endif
156
157
#if HWY_TARGETS & HWY_NEON_WITHOUT_AES
158
#define HWY_CHOOSE_NEON_WITHOUT_AES(FUNC_NAME) &N_NEON_WITHOUT_AES::FUNC_NAME
159
#else
160
#define HWY_CHOOSE_NEON_WITHOUT_AES(FUNC_NAME) nullptr
161
#endif
162
163
#if HWY_TARGETS & HWY_NEON
164
#define HWY_CHOOSE_NEON(FUNC_NAME) &N_NEON::FUNC_NAME
165
#else
166
#define HWY_CHOOSE_NEON(FUNC_NAME) nullptr
167
#endif
168
169
#if HWY_TARGETS & HWY_NEON_BF16
170
#define HWY_CHOOSE_NEON_BF16(FUNC_NAME) &N_NEON_BF16::FUNC_NAME
171
#else
172
#define HWY_CHOOSE_NEON_BF16(FUNC_NAME) nullptr
173
#endif
174
175
#if HWY_TARGETS & HWY_SVE
176
#define HWY_CHOOSE_SVE(FUNC_NAME) &N_SVE::FUNC_NAME
177
#else
178
#define HWY_CHOOSE_SVE(FUNC_NAME) nullptr
179
#endif
180
181
#if HWY_TARGETS & HWY_SVE2
182
#define HWY_CHOOSE_SVE2(FUNC_NAME) &N_SVE2::FUNC_NAME
183
#else
184
#define HWY_CHOOSE_SVE2(FUNC_NAME) nullptr
185
#endif
186
187
#if HWY_TARGETS & HWY_SVE_256
188
#define HWY_CHOOSE_SVE_256(FUNC_NAME) &N_SVE_256::FUNC_NAME
189
#else
190
#define HWY_CHOOSE_SVE_256(FUNC_NAME) nullptr
191
#endif
192
193
#if HWY_TARGETS & HWY_SVE2_128
194
#define HWY_CHOOSE_SVE2_128(FUNC_NAME) &N_SVE2_128::FUNC_NAME
195
#else
196
#define HWY_CHOOSE_SVE2_128(FUNC_NAME) nullptr
197
#endif
198
199
#if HWY_TARGETS & HWY_PPC8
200
#define HWY_CHOOSE_PPC8(FUNC_NAME) &N_PPC8::FUNC_NAME
201
#else
202
#define HWY_CHOOSE_PPC8(FUNC_NAME) nullptr
203
#endif
204
205
#if HWY_TARGETS & HWY_PPC9
206
#define HWY_CHOOSE_PPC9(FUNC_NAME) &N_PPC9::FUNC_NAME
207
#else
208
#define HWY_CHOOSE_PPC9(FUNC_NAME) nullptr
209
#endif
210
211
#if HWY_TARGETS & HWY_PPC10
212
#define HWY_CHOOSE_PPC10(FUNC_NAME) &N_PPC10::FUNC_NAME
213
#else
214
#define HWY_CHOOSE_PPC10(FUNC_NAME) nullptr
215
#endif
216
217
#if HWY_TARGETS & HWY_Z14
218
#define HWY_CHOOSE_Z14(FUNC_NAME) &N_Z14::FUNC_NAME
219
#else
220
#define HWY_CHOOSE_Z14(FUNC_NAME) nullptr
221
#endif
222
223
#if HWY_TARGETS & HWY_Z15
224
#define HWY_CHOOSE_Z15(FUNC_NAME) &N_Z15::FUNC_NAME
225
#else
226
#define HWY_CHOOSE_Z15(FUNC_NAME) nullptr
227
#endif
228
229
#if HWY_TARGETS & HWY_SSE2
230
#define HWY_CHOOSE_SSE2(FUNC_NAME) &N_SSE2::FUNC_NAME
231
#else
232
#define HWY_CHOOSE_SSE2(FUNC_NAME) nullptr
233
#endif
234
235
#if HWY_TARGETS & HWY_SSSE3
236
#define HWY_CHOOSE_SSSE3(FUNC_NAME) &N_SSSE3::FUNC_NAME
237
#else
238
#define HWY_CHOOSE_SSSE3(FUNC_NAME) nullptr
239
#endif
240
241
#if HWY_TARGETS & HWY_SSE4
242
#define HWY_CHOOSE_SSE4(FUNC_NAME) &N_SSE4::FUNC_NAME
243
#else
244
#define HWY_CHOOSE_SSE4(FUNC_NAME) nullptr
245
#endif
246
247
#if HWY_TARGETS & HWY_AVX2
248
#define HWY_CHOOSE_AVX2(FUNC_NAME) &N_AVX2::FUNC_NAME
249
#else
250
#define HWY_CHOOSE_AVX2(FUNC_NAME) nullptr
251
#endif
252
253
#if HWY_TARGETS & HWY_AVX3
254
#define HWY_CHOOSE_AVX3(FUNC_NAME) &N_AVX3::FUNC_NAME
255
#else
256
#define HWY_CHOOSE_AVX3(FUNC_NAME) nullptr
257
#endif
258
259
#if HWY_TARGETS & HWY_AVX3_DL
260
#define HWY_CHOOSE_AVX3_DL(FUNC_NAME) &N_AVX3_DL::FUNC_NAME
261
#else
262
#define HWY_CHOOSE_AVX3_DL(FUNC_NAME) nullptr
263
#endif
264
265
#if HWY_TARGETS & HWY_AVX3_ZEN4
266
#define HWY_CHOOSE_AVX3_ZEN4(FUNC_NAME) &N_AVX3_ZEN4::FUNC_NAME
267
#else
268
#define HWY_CHOOSE_AVX3_ZEN4(FUNC_NAME) nullptr
269
#endif
270
271
#if HWY_TARGETS & HWY_AVX3_SPR
272
#define HWY_CHOOSE_AVX3_SPR(FUNC_NAME) &N_AVX3_SPR::FUNC_NAME
273
#else
274
#define HWY_CHOOSE_AVX3_SPR(FUNC_NAME) nullptr
275
#endif
276
277
// MSVC 2017 workaround: the non-type template parameter to ChooseAndCall
278
// apparently cannot be an array. Use a function pointer instead, which has the
279
// disadvantage that we call the static (not best) target on the first call to
280
// any HWY_DYNAMIC_DISPATCH.
281
#if (HWY_COMPILER_MSVC && HWY_COMPILER_MSVC < 1915) || \
282
    (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 700)
283
#define HWY_DISPATCH_WORKAROUND 1
284
#else
285
#define HWY_DISPATCH_WORKAROUND 0
286
#endif
287
288
#if HWY_DISPATCH_MAP
289
struct AllExports {
290
  template <class FuncPtr, class ExportsKey, uint64_t kHash>
291
  static const FuncPtr*& GetRefToExportsPtr() {
292
    static const FuncPtr* s_exports = nullptr;
293
    return s_exports;
294
  }
295
};
296
#endif
297
298
// Provides a static member function which is what is called during the first
299
// HWY_DYNAMIC_DISPATCH, where GetIndex is still zero, and instantiations of
300
// this function are the first entry in the tables created by HWY_EXPORT[_T].
301
template <typename RetType, typename... Args>
302
struct FunctionCache {
303
 public:
304
  typedef RetType(FuncType)(Args...);
305
  using FuncPtr = FuncType*;
306
307
  // A template function that when instantiated has the same signature as the
308
  // function being called. This function initializes the bit array of targets
309
  // supported by the current CPU and then calls the appropriate entry within
310
  // the HWY_EXPORT table. Subsequent calls via HWY_DYNAMIC_DISPATCH to any
311
  // exported functions, even those defined by different translation units,
312
  // will dispatch directly to the best available target.
313
#if HWY_DISPATCH_MAP
314
  template <class ExportsKey, uint64_t kHash>
315
  static RetType ChooseAndCall(Args... args) {
316
    ChosenTarget& chosen_target = GetChosenTarget();
317
    chosen_target.Update(SupportedTargets());
318
319
    const FuncPtr* table = AllExports::template GetRefToExportsPtr<
320
        FuncPtr, RemoveCvRef<ExportsKey>, kHash>();
321
    HWY_ASSERT(table);
322
323
    return (table[chosen_target.GetIndex()])(args...);
324
  }
325
326
#if !HWY_DISPATCH_WORKAROUND
327
  template <const FuncPtr* table>
328
  static RetType TableChooseAndCall(Args... args) {
329
    ChosenTarget& chosen_target = GetChosenTarget();
330
    chosen_target.Update(SupportedTargets());
331
    return (table[chosen_target.GetIndex()])(args...);
332
  }
333
#endif  // !HWY_DISPATCH_WORKAROUND
334
335
#else   // !HWY_DISPATCH_MAP: zero-overhead, but requires C++17
336
  template <const FuncPtr* table>
337
2
  static RetType ChooseAndCall(Args... args) {
338
2
    ChosenTarget& chosen_target = GetChosenTarget();
339
2
    chosen_target.Update(SupportedTargets());
340
2
    return (table[chosen_target.GetIndex()])(args...);
341
2
  }
Unexecuted instantiation: enc_cluster.cc:_ZN3hwy13FunctionCacheIvJRKN3jxl9HistogramEEE13ChooseAndCallIXadsoKPFvS4_EL_ZNS1_L36HistogramEntropyHighwayDispatchTableEEEEEEvS4_
Unexecuted instantiation: enc_cluster.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNSt3__16vectorINS1_9HistogramENS3_9allocatorIS5_EEEEmPS8_PNS4_IjNS6_IjEEEEEE13ChooseAndCallIXadsoKPFS2_SA_mSB_SE_EL_ZNS1_L41FastClusterHistogramsHighwayDispatchTableEEEEEES2_SA_mSB_SE_
Unexecuted instantiation: enc_detect_dots.cc:_ZN3hwy13FunctionCacheIN3jxl8StatusOrINS1_5PlaneIfEEEEJRKNS1_6Image3IfEES9_PNS1_10ThreadPoolEEE13ChooseAndCallIXadsoKPFS5_S9_S9_SB_EL_ZNS1_L42SumOfSquareDifferencesHighwayDispatchTableEEEEEES5_S9_S9_SB_
Unexecuted instantiation: enc_convolve_separable5.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNS1_5PlaneIfEERKNS1_5RectTImEERKNS1_17WeightsSeparable5EPNS1_10ThreadPoolEPS4_EE13ChooseAndCallIXadsoKPFS2_S6_SA_SD_SF_SG_EL_ZNS1_L30Separable5HighwayDispatchTableEEEEEES2_S6_SA_SD_SF_SG_
Unexecuted instantiation: enc_xyb.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNS1_13ColorEncodingEfPKNS1_5PlaneIfEEPNS1_10ThreadPoolEPNS1_6Image3IfEERK15JxlCmsInterfaceSE_EE13ChooseAndCallIXadsoKPFS2_S5_fS9_SB_SE_SH_SE_EL_ZNS1_L25ToXYBHighwayDispatchTableEEEEEES2_S5_fS9_SB_SE_SH_SE_
Unexecuted instantiation: enc_xyb.cc:_ZN3hwy13FunctionCacheIvJPfS1_S1_PKfmEE13ChooseAndCallIXadsoKPFvS1_S1_S1_S3_mEL_ZN3jxlL37LinearRGBRowToXYBHighwayDispatchTableEEEEEEvS1_S1_S1_S3_m
Unexecuted instantiation: enc_xyb.cc:_ZN3hwy13FunctionCacheIvJfPfEE13ChooseAndCallIXadsoKPFvfS1_EL_ZN3jxlL39ComputePremulAbsorbHighwayDispatchTableEEEEEEvfS1_
Unexecuted instantiation: enc_xyb.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNS1_5PlaneIfEES6_S6_PS4_S7_S7_PNS1_10ThreadPoolEEE13ChooseAndCallIXadsoKPFS2_S6_S6_S6_S7_S7_S7_S9_EL_ZNS1_L30RgbToYcbcrHighwayDispatchTableEEEEEES2_S6_S6_S6_S7_S7_S7_S9_
Unexecuted instantiation: butteraugli.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNS1_6Image3IfEERKNS1_17ButteraugliParamsEPS4_PNS1_8BlurTempESA_EE13ChooseAndCallIXadsoKPFS2_S6_S9_SA_SC_SA_EL_ZNS1_L38OpsinDynamicsImageHighwayDispatchTableEEEEEES2_S6_S9_SA_SC_SA_
Unexecuted instantiation: butteraugli.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJmmRKNS1_17ButteraugliParamsEPNS1_8BlurTempERKNS1_6Image3IfEERNS1_11PsychoImageEEE13ChooseAndCallIXadsoKPFS2_mmS5_S7_SB_SD_EL_ZNS1_L39SeparateFrequenciesHighwayDispatchTableEEEEEES2_mmS5_S7_SB_SD_
Unexecuted instantiation: butteraugli.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNS1_11PsychoImageES5_mmRKNS1_17ButteraugliParamsEPNS1_8BlurTempEPNS1_5PlaneIfEESD_EE13ChooseAndCallIXadsoKPFS2_S5_S5_mmS8_SA_SD_SD_EL_ZNS1_L35MaskPsychoImageHighwayDispatchTableEEEEEES2_S5_S5_mmS8_SA_SD_SD_
Unexecuted instantiation: butteraugli.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNS1_5PlaneIfEES6_dddPS4_S7_EE13ChooseAndCallIXadsoKPFS2_S6_S6_dddS7_S7_EL_ZNS1_L32MaltaDiffMapHighwayDispatchTableEEEEEES2_S6_S6_dddS7_S7_
Unexecuted instantiation: butteraugli.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNS1_5PlaneIfEES6_dddPS4_S7_EE13ChooseAndCallIXadsoKPFS2_S6_S6_dddS7_S7_EL_ZNS1_L34MaltaDiffMapLFHighwayDispatchTableEEEEEES2_S6_S6_dddS7_S7_
Unexecuted instantiation: butteraugli.cc:_ZN3hwy13FunctionCacheIvJRKN3jxl5PlaneIfEES5_ffPS3_EE13ChooseAndCallIXadsoKPFvS5_S5_ffS6_EL_ZNS1_L36L2DiffAsymmetricHighwayDispatchTableEEEEEEvS5_S5_ffS6_
Unexecuted instantiation: butteraugli.cc:_ZN3hwy13FunctionCacheIvJRKN3jxl5PlaneIfEES5_fPS3_EE13ChooseAndCallIXadsoKPFvS5_S5_fS6_EL_ZNS1_L26L2DiffHighwayDispatchTableEEEEEEvS5_S5_fS6_
Unexecuted instantiation: butteraugli.cc:_ZN3hwy13FunctionCacheIvJRKN3jxl5PlaneIfEES5_fPS3_EE13ChooseAndCallIXadsoKPFvS5_S5_fS6_EL_ZNS1_L29SetL2DiffHighwayDispatchTableEEEEEEvS5_S5_fS6_
Unexecuted instantiation: butteraugli.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNS1_5PlaneIfEERKNS1_6Image3IfEESA_fPS4_EE13ChooseAndCallIXadsoKPFS2_S6_SA_SA_fSB_EL_ZNS1_L44CombineChannelsToDiffmapHighwayDispatchTableEEEEEES2_S6_SA_SA_fSB_
Unexecuted instantiation: butteraugli.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRNS1_6Image3IfEES5_RKNS1_17ButteraugliParamsERNS1_5PlaneIfEEEE13ChooseAndCallIXadsoKPFS2_S5_S5_S8_SB_EL_ZNS1_L45ButteraugliDiffmapInPlaceHighwayDispatchTableEEEEEES2_S5_S5_S8_SB_
Unexecuted instantiation: enc_adaptive_quantization.cc:_ZN3hwy13FunctionCacheIN3jxl8StatusOrINS1_5PlaneIfEEEEJfRKNS1_6Image3IfEERKNS1_5RectTImEEfPNS1_10ThreadPoolEPS4_SG_EE13ChooseAndCallIXadsoKPFS5_fS9_SD_fSF_SG_SG_EL_ZNS1_L43AdaptiveQuantizationMapHighwayDispatchTableEEEEEES5_fS9_SD_fSF_SG_SG_
Unexecuted instantiation: enc_group.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJmPNS1_18PassesEncoderStateERKNS1_6Image3IfEERKNS1_5RectTImEEPS6_EE13ChooseAndCallIXadsoKPFS2_mS4_S8_SC_SD_EL_ZNS1_L39ComputeCoefficientsHighwayDispatchTableEEEEEES2_mS4_S8_SC_SD_
Unexecuted instantiation: enc_chroma_from_luma.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJP22JxlMemoryManagerStructmPNS1_5PlaneIfEEEE13ChooseAndCallIXadsoKPFS2_S4_mS7_EL_ZNS1_L33InitDCStorageHighwayDispatchTableEEEEEES2_S4_mS7_
Unexecuted instantiation: enc_chroma_from_luma.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNS1_6Image3IfEERKNS1_5RectTImEERKNS1_15DequantMatricesEPKNS1_15AcStrategyImageEPKNS1_5PlaneIiEEPKNS1_9QuantizerESA_bbPNSH_IaEESP_PNSH_IfEENS1_4SpanIfEEEE13ChooseAndCallIXadsoKPFS2_S6_SA_SD_SG_SK_SN_SA_bbSP_SP_SR_ST_EL_ZNS1_L31ComputeTileHighwayDispatchTableEEEEEES2_S6_SA_SD_SG_SK_SN_SA_bbSP_SP_SR_ST_
Unexecuted instantiation: enc_ac_strategy.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNS1_14CompressParamsERKNS1_9ACSConfigERKNS1_5RectTImEERKNS1_19ColorCorrelationMapEPfPjPNS1_15AcStrategyImageEEE13ChooseAndCallIXadsoKPFS2_S5_S8_SC_SF_SG_SH_SJ_EL_ZNS1_L34ProcessRectACSHighwayDispatchTableEEEEEES2_S5_S8_SC_SF_SG_SH_SJ_
Unexecuted instantiation: enc_entropy_coder.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJPKjRKNS1_5RectTImEEPrPKiRKNS1_15AcStrategyImageERKNS1_22YCbCrChromaSubsamplingEPNS1_6Image3IiEEPNSt3__16vectorINS1_5TokenENSM_9allocatorISO_EEEERKNS1_5PlaneIhEERKNST_IiEERKNS1_11BlockCtxMapEEE13ChooseAndCallIXadsoKPFS2_S4_S8_SC_SF_SI_SL_SS_SW_SZ_S12_EL_ZNS1_L40TokenizeCoefficientsHighwayDispatchTableEEEEEES2_S4_S8_SC_SF_SI_SL_SS_SW_SZ_S12_
Unexecuted instantiation: jxl_cms.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJPvmPKfPfmEE13ChooseAndCallIXadsoKPFS2_S3_mS5_S6_mEL_ZNS1_12_GLOBAL__N_141DoColorSpaceTransformHighwayDispatchTableEEEEEES2_S3_mS5_S6_m
Unexecuted instantiation: enc_ma.cc:_ZN3hwy13FunctionCacheIvJRN3jxl11TreeSamplesEfRKNSt3__16vectorINS1_21ModularMultiplierInfoENS4_9allocatorIS6_EEEENS4_5arrayINSC_IjLm2EEELm2EEEfPNS5_INS1_20PropertyDecisionNodeENS7_ISF_EEEEEE13ChooseAndCallIXadsoKPFvS3_fSB_SE_fSI_EL_ZNS1_L33FindBestSplitHighwayDispatchTableEEEEEEvS3_fSB_SE_fSI_
Unexecuted instantiation: compressed_dc.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJP22JxlMemoryManagerStructPKfPNS1_6Image3IfEEPNS1_10ThreadPoolEEE13ChooseAndCallIXadsoKPFS2_S4_S6_S9_SB_EL_ZNS1_L39AdaptiveDCSmoothingHighwayDispatchTableEEEEEES2_S4_S6_S9_SB_
Unexecuted instantiation: compressed_dc.cc:_ZN3hwy13FunctionCacheIvJRKN3jxl5RectTImEEPNS1_6Image3IfEEPNS1_5PlaneIhEERKNS1_5ImageEPKffSG_RKNS1_22YCbCrChromaSubsamplingERKNS1_11BlockCtxMapEEE13ChooseAndCallIXadsoKPFvS5_S8_SB_SE_SG_fSG_SJ_SM_EL_ZNS1_L29DequantDCHighwayDispatchTableEEEEEEvS5_S8_SB_SE_SG_fSG_SJ_SM_
Unexecuted instantiation: convolve_symmetric5.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNS1_5PlaneIfEERKNS1_5RectTImEERKNS1_17WeightsSymmetric5EPNS1_10ThreadPoolEPS4_SA_EE13ChooseAndCallIXadsoKPFS2_S6_SA_SD_SF_SG_SA_EL_ZNS1_L30Symmetric5HighwayDispatchTableEEEEEES2_S6_SA_SD_SF_SG_SA_
Unexecuted instantiation: dec_external_image.cc:_ZN3hwy13FunctionCacheIvJPKfPNS_9float16_tEmEE13ChooseAndCallIXadsoKPFvS2_S4_mEL_ZN3jxlL30FloatToF16HighwayDispatchTableEEEEEEvS2_S4_m
Unexecuted instantiation: dec_external_image.cc:_ZN3hwy13FunctionCacheIvJPKfPjmfmEE13ChooseAndCallIXadsoKPFvS2_S3_mfmEL_ZN3jxlL30FloatToU32HighwayDispatchTableEEEEEEvS2_S3_mfm
Unexecuted instantiation: dec_group.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNS1_11FrameHeaderEPNS1_8GetBlockEPNS1_13GroupDecCacheEPNS1_18PassesDecoderStateEmmRNS1_19RenderPipelineInputEPNS1_4jpeg8JPEGDataENS1_8DrawModeEEE13ChooseAndCallIXadsoKPFS2_S5_S7_S9_SB_mmSD_SG_SH_EL_ZNS1_12_GLOBAL__N_135DecodeGroupImplHighwayDispatchTableEEEEEES2_S5_S7_S9_SB_mmSD_SG_SH_
Unexecuted instantiation: dec_modular.cc:_ZN3hwy13FunctionCacheIvJmPKifPfEE13ChooseAndCallIXadsoKPFvmS2_fS3_EL_ZN3jxlL36SingleFromSingleHighwayDispatchTableEEEEEEvmS2_fS3_
Unexecuted instantiation: dec_modular.cc:_ZN3hwy13FunctionCacheIvJmPKiS2_fPfEE13ChooseAndCallIXadsoKPFvmS2_S2_fS3_EL_ZN3jxlL31MultiplySumHighwayDispatchTableEEEEEEvmS2_S2_fS3_
Unexecuted instantiation: dec_modular.cc:_ZN3hwy13FunctionCacheIvJmPKifPfS3_S3_EE13ChooseAndCallIXadsoKPFvmS2_fS3_S3_S3_EL_ZN3jxlL33RgbFromSingleHighwayDispatchTableEEEEEEvmS2_fS3_S3_S3_
Unexecuted instantiation: dec_noise.cc:_ZN3hwy13FunctionCacheIvJmmmmRKNSt3__14pairIPN3jxl5PlaneIfEENS3_5RectTImEEEESB_SB_EE13ChooseAndCallIXadsoKPFvmmmmSB_SB_SB_EL_ZNS3_12_GLOBAL__N_133Random3PlanesHighwayDispatchTableEEEEEEvmmmmSB_SB_SB_
Unexecuted instantiation: dec_xyb.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNS1_6Image3IfEERKNS1_5RectTImEEPNS1_10ThreadPoolEPS4_RKNS1_11OpsinParamsEEE13ChooseAndCallIXadsoKPFS2_S6_SA_SC_SD_SG_EL_ZNS1_L33OpsinToLinearHighwayDispatchTableEEEEEES2_S6_SA_SC_SD_SG_
Unexecuted instantiation: squeeze.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRNS1_5ImageERKNSt3__16vectorINS1_13SqueezeParamsENS5_9allocatorIS7_EEEEPNS1_10ThreadPoolEEE13ChooseAndCallIXadsoKPFS2_S4_SC_SE_EL_ZNS1_L30InvSqueezeHighwayDispatchTableEEEEEES2_S4_SC_SE_
Unexecuted instantiation: rct.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRNS1_5ImageEmmPNS1_10ThreadPoolEEE13ChooseAndCallIXadsoKPFS2_S4_mmS6_EL_ZNS1_L26InvRCTHighwayDispatchTableEEEEEES2_S4_mmS6_
Unexecuted instantiation: quant_weights.cc:_ZN3hwy13FunctionCacheIN3jxl6StatusEJRKNS1_13QuantEncodingEPfS6_mNS1_10QuantTableEPmEE13ChooseAndCallIXadsoKPFS2_S5_S6_S6_mS7_S8_EL_ZNS1_12_GLOBAL__N_137ComputeQuantTableHighwayDispatchTableEEEEEES2_S5_S6_S6_mS7_S8_
Unexecuted instantiation: stage_blending.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJRKNS3_11FrameHeaderEPKNS3_18PassesDecoderStateERKNS3_13ColorEncodingEEE13ChooseAndCallIXadsoKPFS7_SA_SD_SG_EL_ZNS3_L36GetBlendingStageHighwayDispatchTableEEEEEES7_SA_SD_SG_
Unexecuted instantiation: stage_chroma_upsampling.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJmbEE13ChooseAndCallIXadsoKPFS7_mbEL_ZNS3_L44GetChromaUpsamplingStageHighwayDispatchTableEEEEEES7_mb
Unexecuted instantiation: stage_cms.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJRKNS3_18OutputEncodingInfoEEE13ChooseAndCallIXadsoKPFS7_SA_EL_ZNS3_L31GetCmsStageHighwayDispatchTableEEEEEES7_SA_
Unexecuted instantiation: stage_epf.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJRKNS3_10LoopFilterERKNS3_5PlaneIfEEEE13ChooseAndCallIXadsoKPFS7_SA_SE_EL_ZNS3_L32GetEPFStage0HighwayDispatchTableEEEEEES7_SA_SE_
Unexecuted instantiation: stage_epf.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJRKNS3_10LoopFilterERKNS3_5PlaneIfEEEE13ChooseAndCallIXadsoKPFS7_SA_SE_EL_ZNS3_L32GetEPFStage1HighwayDispatchTableEEEEEES7_SA_SE_
Unexecuted instantiation: stage_epf.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJRKNS3_10LoopFilterERKNS3_5PlaneIfEEEE13ChooseAndCallIXadsoKPFS7_SA_SE_EL_ZNS3_L32GetEPFStage2HighwayDispatchTableEEEEEES7_SA_SE_
Unexecuted instantiation: stage_from_linear.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJRKNS3_18OutputEncodingInfoEEE13ChooseAndCallIXadsoKPFS7_SA_EL_ZNS3_L38GetFromLinearStageHighwayDispatchTableEEEEEES7_SA_
Unexecuted instantiation: stage_gaborish.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJRKNS3_10LoopFilterEEE13ChooseAndCallIXadsoKPFS7_SA_EL_ZNS3_L36GetGaborishStageHighwayDispatchTableEEEEEES7_SA_
Unexecuted instantiation: stage_noise.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJRKNS3_11NoiseParamsERKNS3_16ColorCorrelationEmEE13ChooseAndCallIXadsoKPFS7_SA_SD_mEL_ZNS3_L36GetAddNoiseStageHighwayDispatchTableEEEEEES7_SA_SD_m
Unexecuted instantiation: stage_noise.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJmEE13ChooseAndCallIXadsoKPFS7_mEL_ZNS3_L41GetConvolveNoiseStageHighwayDispatchTableEEEEEES7_m
Unexecuted instantiation: stage_splines.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJPKNS3_7SplinesEEE13ChooseAndCallIXadsoKPFS7_SA_EL_ZNS3_L34GetSplineStageHighwayDispatchTableEEEEEES7_SA_
Unexecuted instantiation: stage_to_linear.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJRKNS3_18OutputEncodingInfoEEE13ChooseAndCallIXadsoKPFS7_SA_EL_ZNS3_L36GetToLinearStageHighwayDispatchTableEEEEEES7_SA_
Unexecuted instantiation: stage_tone_mapping.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJRKNS3_18OutputEncodingInfoEEE13ChooseAndCallIXadsoKPFS7_SA_EL_ZNS3_L39GetToneMappingStageHighwayDispatchTableEEEEEES7_SA_
Unexecuted instantiation: stage_upsampling.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJRKNS3_19CustomTransformDataEmmEE13ChooseAndCallIXadsoKPFS7_SA_mmEL_ZNS3_L38GetUpsamplingStageHighwayDispatchTableEEEEEES7_SA_mm
Unexecuted instantiation: stage_write.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJRKNS3_11ImageOutputEmmbbmNS3_11OrientationERNS1_6vectorIS8_NS1_9allocatorIS8_EEEEP22JxlMemoryManagerStructEE13ChooseAndCallIXadsoKPFS7_SA_mmbbmSB_SG_SI_EL_ZNS3_L41GetWriteToOutputStageHighwayDispatchTableEEEEEES7_SA_mmbbmSB_SG_SI_
Unexecuted instantiation: stage_xyb.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJRKNS3_18OutputEncodingInfoEEE13ChooseAndCallIXadsoKPFS7_SA_EL_ZNS3_L31GetXYBStageHighwayDispatchTableEEEEEES7_SA_
Unexecuted instantiation: stage_ycbcr.cc:_ZN3hwy13FunctionCacheINSt3__110unique_ptrIN3jxl19RenderPipelineStageENS1_14default_deleteIS4_EEEEJEE13ChooseAndCallIXadsoKPFS7_vEL_ZNS3_L33GetYCbCrStageHighwayDispatchTableEEEEEES7_v
simd_util.cc:_ZN3hwy13FunctionCacheImJEE13ChooseAndCallIXadsoKPFmvEL_ZN3jxlL33MaxVectorSizeHighwayDispatchTableEEEEEEmv
Line
Count
Source
337
2
  static RetType ChooseAndCall(Args... args) {
338
2
    ChosenTarget& chosen_target = GetChosenTarget();
339
2
    chosen_target.Update(SupportedTargets());
340
2
    return (table[chosen_target.GetIndex()])(args...);
341
2
  }
Unexecuted instantiation: splines.cc:_ZN3hwy13FunctionCacheIvJRKN3jxl6SplineERKNSt3__16vectorINS5_4pairINS2_5PointEfEENS5_9allocatorIS9_EEEEfRNS6_INS1_13SplineSegmentENSA_ISF_EEEERNS6_INS7_ImmEENSA_ISJ_EEEEEE13ChooseAndCallIXadsoKPFvS4_SE_fSI_SM_EL_ZNS1_L38SegmentsFromPointsHighwayDispatchTableEEEEEEvS4_SE_fSI_SM_
Unexecuted instantiation: splines.cc:_ZN3hwy13FunctionCacheIvJPfS1_S1_mmmbPKN3jxl13SplineSegmentEPKmS7_EE13ChooseAndCallIXadsoKPFvS1_S1_S1_mmmbS5_S7_S7_EL_ZNS2_L32DrawSegmentsHighwayDispatchTableEEEEEEvS1_S1_S1_mmmbS5_S7_S7_
342
#endif  // HWY_DISPATCH_MAP
343
};
344
345
// Used to deduce the template parameters RetType and Args from a function.
346
template <typename RetType, typename... Args>
347
FunctionCache<RetType, Args...> DeduceFunctionCache(RetType (*)(Args...)) {
348
  return FunctionCache<RetType, Args...>();
349
}
350
351
#define HWY_DISPATCH_TABLE(FUNC_NAME) \
352
8.45M
  HWY_CONCAT(FUNC_NAME, HighwayDispatchTable)
353
354
// HWY_EXPORT(FUNC_NAME); expands to a static array that is used by
355
// HWY_DYNAMIC_DISPATCH() to call the appropriate function at runtime.
356
// After being exported, it can be called from other parts of the same source
357
// file using HWY_DYNAMIC_DISPATCH(), in particular from a function wrapper
358
// like in the following example:
359
//
360
//   #include "hwy/highway.h"
361
//   HWY_BEFORE_NAMESPACE();
362
//   namespace skeleton {
363
//   namespace HWY_NAMESPACE {
364
//
365
//   void MyFunction(int a, char b, const char* c) { ... }
366
//
367
//   // NOLINTNEXTLINE(google-readability-namespace-comments)
368
//   }  // namespace HWY_NAMESPACE
369
//   }  // namespace skeleton
370
//   HWY_AFTER_NAMESPACE();
371
//
372
//   namespace skeleton {
373
//   HWY_EXPORT(MyFunction);  // Defines the dispatch table in this scope.
374
//
375
//   void MyFunction(int a, char b, const char* c) {
376
//     return HWY_DYNAMIC_DISPATCH(MyFunction)(a, b, c);
377
//   }
378
//   }  // namespace skeleton
379
//
380
// For templated code with a single type parameter, instead use HWY_EXPORT_T and
381
// its HWY_DYNAMIC_DISPATCH_T counterpart:
382
//
383
//   template <typename T>
384
//   void MyFunctionCaller(T ...) {
385
//     // First argument to both HWY_EXPORT_T and HWY_DYNAMIC_DISPATCH_T is an
386
//     // arbitrary table name; you must provide the same name for each call.
387
//     // It is fine to have multiple HWY_EXPORT_T in a function, but a 64-bit
388
//     // FNV hash collision among *any* table names will trigger HWY_ABORT.
389
//     HWY_EXPORT_T(Table1, MyFunction<T>)
390
//     HWY_DYNAMIC_DISPATCH_T(Table1)(a, b, c);
391
//   }
392
//
393
// Note that HWY_EXPORT_T must be invoked inside a template (in the above
394
// example: `MyFunctionCaller`), so that a separate table will be created for
395
// each template instantiation. For convenience, we also provide a macro that
396
// combines both steps and avoids the need to pick a table name:
397
//
398
//   template <typename T>
399
//   void MyFunctionCaller(T ...) {
400
//     // Table name is automatically chosen. Note that this variant must be
401
//     // called in statement context; it is not a valid expression.
402
//     HWY_EXPORT_AND_DYNAMIC_DISPATCH_T(MyFunction<T>)(a, b, c);
403
//   }
404
405
// Simplified version for IDE or the dynamic dispatch case with only one target.
406
#if HWY_IDE || ((HWY_TARGETS & (HWY_TARGETS - 1)) == 0)
407
408
// We use a table to provide the same compile error conditions as with the
409
// non-simplified case, but the table only has a single entry.
410
#define HWY_EXPORT_T(TABLE_NAME, FUNC_NAME)                               \
411
  HWY_MAYBE_UNUSED static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const \
412
  HWY_DISPATCH_TABLE(TABLE_NAME)[1] = {&HWY_STATIC_DISPATCH(FUNC_NAME)}
413
414
// Use the table, not just STATIC_DISPATCH as in DYNAMIC_DISPATCH, because
415
// TABLE_NAME might not match the function name.
416
#define HWY_DYNAMIC_POINTER_T(TABLE_NAME) (HWY_DISPATCH_TABLE(TABLE_NAME)[0])
417
#define HWY_DYNAMIC_DISPATCH_T(TABLE_NAME) \
418
  (*(HWY_DYNAMIC_POINTER_T(TABLE_NAME)))
419
420
#define HWY_EXPORT(FUNC_NAME) HWY_EXPORT_T(FUNC_NAME, FUNC_NAME)
421
#define HWY_DYNAMIC_POINTER(FUNC_NAME) &HWY_STATIC_DISPATCH(FUNC_NAME)
422
#define HWY_DYNAMIC_DISPATCH(FUNC_NAME) HWY_STATIC_DISPATCH(FUNC_NAME)
423
424
#else  // not simplified: full table
425
426
// Pre-C++17 workaround: non-type template arguments must have linkage, which
427
// means we cannot pass &table as a template argument to ChooseAndCall.
428
// ChooseAndCall must find a way to access the table in order to dispatch to the
429
// chosen target:
430
// 0) Skipping this by dispatching to the static target would be surprising to
431
//    users and may have serious performance implications.
432
// 1) An extra function parameter would be unacceptable because it changes the
433
//    user-visible function signature.
434
// 2) Declaring a table, then defining a pointer to it would work, but requires
435
//    an additional DECLARE step outside the function so that the pointer has
436
//    linkage, which breaks existing code.
437
// 3) We instead associate the function with the table using an instance of an
438
//    unnamed struct and the hash of the table name as the key. Because
439
//    ChooseAndCall has the type information, it can then cast to the function
440
//    pointer type. However, we cannot simply pass the name as a template
441
//    argument to ChooseAndCall because this requires char*, which hits the same
442
//    linkage problem. We instead hash the table name, which assumes the
443
//    function names do not have collisions.
444
#if HWY_DISPATCH_MAP
445
446
static constexpr uint64_t FNV(const char* name) {
447
  return *name ? static_cast<uint64_t>(static_cast<uint8_t>(*name)) ^
448
                     (0x100000001b3ULL * FNV(name + 1))
449
               : 0xcbf29ce484222325ULL;
450
}
451
452
template <uint64_t kHash>
453
struct AddExport {
454
  template <class ExportsKey, class FuncPtr>
455
  AddExport(ExportsKey /*exports_key*/, const char* table_name,
456
            const FuncPtr* table) {
457
    using FuncCache = decltype(DeduceFunctionCache(hwy::DeclVal<FuncPtr>()));
458
    static_assert(
459
        hwy::IsSame<RemoveCvRef<FuncPtr>, typename FuncCache::FuncPtr>(),
460
        "FuncPtr should be same type as FuncCache::FuncPtr");
461
462
    const FuncPtr*& exports_ptr = AllExports::template GetRefToExportsPtr<
463
        RemoveCvRef<FuncPtr>, RemoveCvRef<ExportsKey>, kHash>();
464
    if (exports_ptr && exports_ptr != table) {
465
      HWY_ABORT("Hash collision for %s, rename the function\n", table_name);
466
    } else {
467
      exports_ptr = table;
468
    }
469
  }
470
};
471
472
// Dynamic dispatch: defines table of function pointers. This must be invoked
473
// from inside the function template that calls the template we are exporting.
474
// TABLE_NAME must match the one passed to HWY_DYNAMIC_DISPATCH_T. This
475
// argument allows multiple exports within one function.
476
#define HWY_EXPORT_T(TABLE_NAME, FUNC_NAME)                                   \
477
  static const struct {                                                       \
478
  } HWY_CONCAT(TABLE_NAME, HighwayDispatchExportsKey) = {};                   \
479
  static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const HWY_DISPATCH_TABLE(  \
480
      TABLE_NAME)[static_cast<size_t>(HWY_MAX_DYNAMIC_TARGETS + 2)] = {       \
481
      /* The first entry in the table initializes the global cache and        \
482
       * calls the appropriate function. */                                   \
483
      &decltype(hwy::DeduceFunctionCache(&HWY_STATIC_DISPATCH(FUNC_NAME)))::  \
484
          template ChooseAndCall<decltype(HWY_CONCAT(                         \
485
                                     TABLE_NAME, HighwayDispatchExportsKey)), \
486
                                 hwy::FNV(#TABLE_NAME)>,                      \
487
      HWY_CHOOSE_TARGET_LIST(FUNC_NAME),                                      \
488
      HWY_CHOOSE_FALLBACK(FUNC_NAME),                                         \
489
  };                                                                          \
490
  HWY_MAYBE_UNUSED static hwy::AddExport<hwy::FNV(#TABLE_NAME)> HWY_CONCAT(   \
491
      HighwayAddTable, __LINE__)(                                             \
492
      HWY_CONCAT(TABLE_NAME, HighwayDispatchExportsKey), #TABLE_NAME,         \
493
      HWY_DISPATCH_TABLE(TABLE_NAME))
494
495
// For non-template functions. Not necessarily invoked within a function, hence
496
// we derive the string and variable names from FUNC_NAME, not HWY_FUNCTION.
497
#if HWY_DISPATCH_WORKAROUND
498
#define HWY_EXPORT(FUNC_NAME) HWY_EXPORT_T(FUNC_NAME, FUNC_NAME)
499
#else
500
#define HWY_EXPORT(FUNC_NAME)                                                \
501
  static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const HWY_DISPATCH_TABLE( \
502
      FUNC_NAME)[static_cast<size_t>(HWY_MAX_DYNAMIC_TARGETS + 2)] = {       \
503
      /* The first entry in the table initializes the global cache and       \
504
       * calls the appropriate function. */                                  \
505
      &decltype(hwy::DeduceFunctionCache(&HWY_STATIC_DISPATCH(FUNC_NAME))):: \
506
          template TableChooseAndCall<HWY_DISPATCH_TABLE(FUNC_NAME)>,        \
507
      HWY_CHOOSE_TARGET_LIST(FUNC_NAME),                                     \
508
      HWY_CHOOSE_FALLBACK(FUNC_NAME),                                        \
509
  }
510
#endif  // HWY_DISPATCH_WORKAROUND
511
512
#else  // !HWY_DISPATCH_MAP
513
514
// Zero-overhead, but requires C++17 for non-type template arguments without
515
// linkage, because HWY_EXPORT_T tables are local static variables.
516
#define HWY_EXPORT_T(TABLE_NAME, FUNC_NAME)                                  \
517
  static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const HWY_DISPATCH_TABLE( \
518
      TABLE_NAME)[static_cast<size_t>(HWY_MAX_DYNAMIC_TARGETS + 2)] = {      \
519
      /* The first entry in the table initializes the global cache and       \
520
       * calls the appropriate function. */                                  \
521
      &decltype(hwy::DeduceFunctionCache(&HWY_STATIC_DISPATCH(FUNC_NAME))):: \
522
          template ChooseAndCall<HWY_DISPATCH_TABLE(TABLE_NAME)>,            \
523
      HWY_CHOOSE_TARGET_LIST(FUNC_NAME),                                     \
524
      HWY_CHOOSE_FALLBACK(FUNC_NAME),                                        \
525
  }
526
527
#define HWY_EXPORT(FUNC_NAME) HWY_EXPORT_T(FUNC_NAME, FUNC_NAME)
528
529
#endif  // HWY_DISPATCH_MAP
530
531
// HWY_DISPATCH_MAP only affects how tables are created, not their usage.
532
533
// Evaluates to the function pointer for the chosen target.
534
#define HWY_DYNAMIC_POINTER(FUNC_NAME) \
535
8.45M
  (HWY_DISPATCH_TABLE(FUNC_NAME)[hwy::GetChosenTarget().GetIndex()])
536
537
// Calls the function pointer for the chosen target.
538
8.45M
#define HWY_DYNAMIC_DISPATCH(FUNC_NAME) (*(HWY_DYNAMIC_POINTER(FUNC_NAME)))
539
540
// Same as DISPATCH, but provide a different arg name to clarify usage.
541
#define HWY_DYNAMIC_DISPATCH_T(TABLE_NAME) HWY_DYNAMIC_DISPATCH(TABLE_NAME)
542
#define HWY_DYNAMIC_POINTER_T(TABLE_NAME) HWY_DYNAMIC_POINTER(TABLE_NAME)
543
544
#endif  // HWY_IDE || ((HWY_TARGETS & (HWY_TARGETS - 1)) == 0)
545
546
// Returns the name of an anonymous dispatch table that is only shared with
547
// macro invocations coming from the same source line.
548
#define HWY_DISPATCH_TABLE_T() HWY_CONCAT(HighwayDispatchTableT, __LINE__)
549
550
// For templated code, combines export and dispatch using an anonymous table.
551
#define HWY_EXPORT_AND_DYNAMIC_DISPATCH_T(FUNC_NAME) \
552
  HWY_EXPORT_T(HWY_DISPATCH_TABLE_T(), FUNC_NAME);   \
553
  HWY_DYNAMIC_DISPATCH_T(HWY_DISPATCH_TABLE_T())
554
555
// DEPRECATED names; please use HWY_HAVE_* instead.
556
#define HWY_CAP_INTEGER64 HWY_HAVE_INTEGER64
557
#define HWY_CAP_FLOAT16 HWY_HAVE_FLOAT16
558
#define HWY_CAP_FLOAT64 HWY_HAVE_FLOAT64
559
560
}  // namespace hwy
561
562
#endif  // HWY_HIGHWAY_INCLUDED
563
564
//------------------------------------------------------------------------------
565
566
// NOTE: the following definitions and ops/*.h depend on HWY_TARGET, so we want
567
// to include them once per target, which is ensured by the toggle check.
568
// Because ops/*.h are included under it, they do not need their own guard.
569
#if defined(HWY_HIGHWAY_PER_TARGET) == defined(HWY_TARGET_TOGGLE)
570
#ifdef HWY_HIGHWAY_PER_TARGET
571
#undef HWY_HIGHWAY_PER_TARGET
572
#else
573
#define HWY_HIGHWAY_PER_TARGET
574
#endif
575
576
// These define ops inside namespace hwy::HWY_NAMESPACE.
577
#if HWY_TARGET == HWY_SSE2 || HWY_TARGET == HWY_SSSE3 || HWY_TARGET == HWY_SSE4
578
#include "hwy/ops/x86_128-inl.h"
579
#elif HWY_TARGET == HWY_AVX2
580
#include "hwy/ops/x86_256-inl.h"
581
#elif HWY_TARGET == HWY_AVX3 || HWY_TARGET == HWY_AVX3_DL || \
582
    HWY_TARGET == HWY_AVX3_ZEN4 || HWY_TARGET == HWY_AVX3_SPR
583
#include "hwy/ops/x86_512-inl.h"
584
#elif HWY_TARGET == HWY_Z14 || HWY_TARGET == HWY_Z15 || \
585
    (HWY_TARGET & HWY_ALL_PPC)
586
#include "hwy/ops/ppc_vsx-inl.h"
587
#elif HWY_TARGET & HWY_ALL_NEON
588
#include "hwy/ops/arm_neon-inl.h"
589
#elif HWY_TARGET & HWY_ALL_SVE
590
#include "hwy/ops/arm_sve-inl.h"
591
#elif HWY_TARGET == HWY_WASM_EMU256
592
#include "hwy/ops/wasm_256-inl.h"
593
#elif HWY_TARGET == HWY_WASM
594
#include "hwy/ops/wasm_128-inl.h"
595
#elif HWY_TARGET == HWY_RVV
596
#include "hwy/ops/rvv-inl.h"
597
#elif HWY_TARGET == HWY_EMU128
598
#include "hwy/ops/emu128-inl.h"
599
#elif HWY_TARGET == HWY_SCALAR
600
#include "hwy/ops/scalar-inl.h"
601
#else
602
#pragma message("HWY_TARGET does not match any known target")
603
#endif  // HWY_TARGET
604
605
#include "hwy/ops/generic_ops-inl.h"
606
607
#endif  // HWY_HIGHWAY_PER_TARGET