Coverage Report

Created: 2025-06-16 07:00

/src/libjxl/third_party/highway/hwy/targets.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2020 Google LLC
2
// SPDX-License-Identifier: Apache-2.0
3
//
4
// Licensed under the Apache License, Version 2.0 (the "License");
5
// you may not use this file except in compliance with the License.
6
// You may obtain a copy of the License at
7
//
8
//      http://www.apache.org/licenses/LICENSE-2.0
9
//
10
// Unless required by applicable law or agreed to in writing, software
11
// distributed under the License is distributed on an "AS IS" BASIS,
12
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
// See the License for the specific language governing permissions and
14
// limitations under the License.
15
16
#ifndef HIGHWAY_HWY_TARGETS_H_
17
#define HIGHWAY_HWY_TARGETS_H_
18
19
// Allows opting out of C++ standard library usage, which is not available in
20
// some Compiler Explorer environments.
21
#ifndef HWY_NO_LIBCXX
22
#include <vector>
23
#endif
24
25
// For SIMD module implementations and their callers. Defines which targets to
26
// generate and call.
27
28
#include "hwy/base.h"
29
#include "hwy/detect_targets.h"
30
#include "hwy/highway_export.h"
31
32
#if !defined(HWY_NO_LIBCXX)
33
#include <atomic>
34
#endif
35
36
namespace hwy {
37
38
// Returns bitfield of enabled targets that are supported on this CPU; there is
39
// always at least one such target, hence the return value is never 0. The
40
// targets returned may change after calling DisableTargets. This function is
41
// always defined, but the HWY_SUPPORTED_TARGETS wrapper may allow eliding
42
// calls to it if there is only a single target enabled.
43
HWY_DLLEXPORT int64_t SupportedTargets();
44
45
// Evaluates to a function call, or literal if there is a single target.
46
#if (HWY_TARGETS & (HWY_TARGETS - 1)) == 0
47
#define HWY_SUPPORTED_TARGETS HWY_TARGETS
48
#else
49
#define HWY_SUPPORTED_TARGETS hwy::SupportedTargets()
50
#endif
51
52
// Subsequent SupportedTargets will not return targets whose bit(s) are set in
53
// `disabled_targets`. Exception: if SupportedTargets would return 0, it will
54
// instead return HWY_STATIC_TARGET (there must always be one target to call).
55
//
56
// This function is useful for disabling targets known to be buggy, or if the
57
// best available target is undesirable (perhaps due to throttling or memory
58
// bandwidth limitations). Use SetSupportedTargetsForTest instead of this
59
// function for iteratively enabling specific targets for testing.
60
HWY_DLLEXPORT void DisableTargets(int64_t disabled_targets);
61
62
// Subsequent SupportedTargets will return the given set of targets, except
63
// those disabled via DisableTargets. Call with a mask of 0 to disable the mock
64
// and return to the normal SupportedTargets behavior. Used to run tests for
65
// all targets.
66
HWY_DLLEXPORT void SetSupportedTargetsForTest(int64_t targets);
67
68
#ifndef HWY_NO_LIBCXX
69
70
// Return the list of targets in HWY_TARGETS supported by the CPU as a list of
71
// individual HWY_* target macros such as HWY_SCALAR or HWY_NEON. This list
72
// is affected by the current SetSupportedTargetsForTest() mock if any.
73
0
HWY_INLINE std::vector<int64_t> SupportedAndGeneratedTargets() {
74
0
  std::vector<int64_t> ret;
75
0
  for (int64_t targets = SupportedTargets() & HWY_TARGETS; targets != 0;
76
0
       targets = targets & (targets - 1)) {
77
0
    int64_t current_target = targets & ~(targets - 1);
78
0
    ret.push_back(current_target);
79
0
  }
80
0
  return ret;
81
0
}
82
83
#endif  // HWY_NO_LIBCXX
84
85
0
static inline HWY_MAYBE_UNUSED const char* TargetName(int64_t target) {
86
0
  switch (target) {
87
0
#if HWY_ARCH_X86
88
0
    case HWY_SSE2:
89
0
      return "SSE2";
90
0
    case HWY_SSSE3:
91
0
      return "SSSE3";
92
0
    case HWY_SSE4:
93
0
      return "SSE4";
94
0
    case HWY_AVX2:
95
0
      return "AVX2";
96
0
    case HWY_AVX3:
97
0
      return "AVX3";
98
0
    case HWY_AVX3_DL:
99
0
      return "AVX3_DL";
100
0
    case HWY_AVX3_ZEN4:
101
0
      return "AVX3_ZEN4";
102
0
    case HWY_AVX3_SPR:
103
0
      return "AVX3_SPR";
104
0
#endif
105
0
106
0
#if HWY_ARCH_ARM
107
0
    case HWY_SVE2_128:
108
0
      return "SVE2_128";
109
0
    case HWY_SVE_256:
110
0
      return "SVE_256";
111
0
    case HWY_SVE2:
112
0
      return "SVE2";
113
0
    case HWY_SVE:
114
0
      return "SVE";
115
0
    case HWY_NEON_BF16:
116
0
      return "NEON_BF16";
117
0
    case HWY_NEON:
118
0
      return "NEON";
119
0
    case HWY_NEON_WITHOUT_AES:
120
0
      return "NEON_WITHOUT_AES";
121
0
#endif
122
0
123
0
#if HWY_ARCH_PPC
124
0
    case HWY_PPC8:
125
0
      return "PPC8";
126
0
    case HWY_PPC9:
127
0
      return "PPC9";
128
0
    case HWY_PPC10:
129
0
      return "PPC10";
130
0
#endif
131
0
132
0
#if HWY_ARCH_S390X
133
0
    case HWY_Z14:
134
0
      return "Z14";
135
0
    case HWY_Z15:
136
0
      return "Z15";
137
0
#endif
138
0
139
0
#if HWY_ARCH_WASM
140
0
    case HWY_WASM:
141
0
      return "WASM";
142
0
    case HWY_WASM_EMU256:
143
0
      return "WASM_EMU256";
144
0
#endif
145
0
146
0
#if HWY_ARCH_RISCV
147
0
    case HWY_RVV:
148
0
      return "RVV";
149
0
#endif
150
0
151
0
    case HWY_EMU128:
152
0
      return "EMU128";
153
0
    case HWY_SCALAR:
154
0
      return "SCALAR";
155
0
156
0
    default:
157
0
      return "Unknown";  // must satisfy gtest IsValidParamName()
158
0
  }
159
0
}
Unexecuted instantiation: enc_cluster.cc:hwy::TargetName(long)
Unexecuted instantiation: targets.cc:hwy::TargetName(long)
Unexecuted instantiation: enc_lz77.cc:hwy::TargetName(long)
Unexecuted instantiation: enc_detect_dots.cc:hwy::TargetName(long)
Unexecuted instantiation: enc_convolve_separable5.cc:hwy::TargetName(long)
Unexecuted instantiation: enc_xyb.cc:hwy::TargetName(long)
Unexecuted instantiation: butteraugli.cc:hwy::TargetName(long)
Unexecuted instantiation: enc_adaptive_quantization.cc:hwy::TargetName(long)
Unexecuted instantiation: enc_group.cc:hwy::TargetName(long)
Unexecuted instantiation: enc_chroma_from_luma.cc:hwy::TargetName(long)
Unexecuted instantiation: enc_ac_strategy.cc:hwy::TargetName(long)
Unexecuted instantiation: enc_entropy_coder.cc:hwy::TargetName(long)
Unexecuted instantiation: jxl_cms.cc:hwy::TargetName(long)
Unexecuted instantiation: enc_ma.cc:hwy::TargetName(long)
Unexecuted instantiation: compressed_dc.cc:hwy::TargetName(long)
Unexecuted instantiation: convolve_symmetric5.cc:hwy::TargetName(long)
Unexecuted instantiation: dec_context_map.cc:hwy::TargetName(long)
Unexecuted instantiation: dec_external_image.cc:hwy::TargetName(long)
Unexecuted instantiation: dec_group.cc:hwy::TargetName(long)
Unexecuted instantiation: dec_modular.cc:hwy::TargetName(long)
Unexecuted instantiation: dec_noise.cc:hwy::TargetName(long)
Unexecuted instantiation: dec_xyb.cc:hwy::TargetName(long)
Unexecuted instantiation: squeeze.cc:hwy::TargetName(long)
Unexecuted instantiation: rct.cc:hwy::TargetName(long)
Unexecuted instantiation: quant_weights.cc:hwy::TargetName(long)
Unexecuted instantiation: stage_blending.cc:hwy::TargetName(long)
Unexecuted instantiation: stage_chroma_upsampling.cc:hwy::TargetName(long)
Unexecuted instantiation: stage_cms.cc:hwy::TargetName(long)
Unexecuted instantiation: stage_epf.cc:hwy::TargetName(long)
Unexecuted instantiation: stage_from_linear.cc:hwy::TargetName(long)
Unexecuted instantiation: stage_gaborish.cc:hwy::TargetName(long)
Unexecuted instantiation: stage_noise.cc:hwy::TargetName(long)
Unexecuted instantiation: stage_splines.cc:hwy::TargetName(long)
Unexecuted instantiation: stage_to_linear.cc:hwy::TargetName(long)
Unexecuted instantiation: stage_tone_mapping.cc:hwy::TargetName(long)
Unexecuted instantiation: stage_upsampling.cc:hwy::TargetName(long)
Unexecuted instantiation: stage_write.cc:hwy::TargetName(long)
Unexecuted instantiation: stage_xyb.cc:hwy::TargetName(long)
Unexecuted instantiation: stage_ycbcr.cc:hwy::TargetName(long)
Unexecuted instantiation: simd_util.cc:hwy::TargetName(long)
Unexecuted instantiation: splines.cc:hwy::TargetName(long)
160
161
// The maximum number of dynamic targets on any architecture is defined by
162
// HWY_MAX_DYNAMIC_TARGETS and depends on the arch.
163
164
// For the ChosenTarget mask and index we use a different bit arrangement than
165
// in the HWY_TARGETS mask. Only the targets involved in the current
166
// architecture are used in this mask, and therefore only the least significant
167
// (HWY_MAX_DYNAMIC_TARGETS + 2) bits of the int64_t mask are used. The least
168
// significant bit is set when the mask is not initialized, the next
169
// HWY_MAX_DYNAMIC_TARGETS more significant bits are a range of bits from the
170
// HWY_TARGETS or SupportedTargets() mask for the given architecture shifted to
171
// that position and the next more significant bit is used for HWY_SCALAR (if
172
// HWY_COMPILE_ONLY_SCALAR is defined) or HWY_EMU128. Because of this we need to
173
// define equivalent values for HWY_TARGETS in this representation.
174
// This mask representation allows to use ctz() on this mask and obtain a small
175
// number that's used as an index of the table for dynamic dispatch. In this
176
// way the first entry is used when the mask is uninitialized, the following
177
// HWY_MAX_DYNAMIC_TARGETS are for dynamic dispatch and the last one is for
178
// scalar.
179
180
// The HWY_SCALAR/HWY_EMU128 bit in the ChosenTarget mask format.
181
8.46M
#define HWY_CHOSEN_TARGET_MASK_SCALAR (1LL << (HWY_MAX_DYNAMIC_TARGETS + 1))
182
183
// Converts from a HWY_TARGETS mask to a ChosenTarget mask format for the
184
// current architecture.
185
#define HWY_CHOSEN_TARGET_SHIFT(X)                                    \
186
8.46M
  ((((X) >> (HWY_HIGHEST_TARGET_BIT + 1 - HWY_MAX_DYNAMIC_TARGETS)) & \
187
8.46M
    ((1LL << HWY_MAX_DYNAMIC_TARGETS) - 1))                           \
188
8.46M
   << 1)
189
190
// The HWY_TARGETS mask in the ChosenTarget mask format.
191
#define HWY_CHOSEN_TARGET_MASK_TARGETS \
192
8.46M
  (HWY_CHOSEN_TARGET_SHIFT(HWY_TARGETS) | HWY_CHOSEN_TARGET_MASK_SCALAR | 1LL)
193
194
#if HWY_ARCH_X86
195
// Maximum number of dynamic targets, changing this value is an ABI incompatible
196
// change
197
25.4M
#define HWY_MAX_DYNAMIC_TARGETS 15
198
8.46M
#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_X86
199
// These must match the order in which the HWY_TARGETS are defined
200
// starting by the least significant (HWY_HIGHEST_TARGET_BIT + 1 -
201
// HWY_MAX_DYNAMIC_TARGETS) bit. This list must contain exactly
202
// HWY_MAX_DYNAMIC_TARGETS elements and does not include SCALAR. The first entry
203
// corresponds to the best target. Don't include a "," at the end of the list.
204
#define HWY_CHOOSE_TARGET_LIST(func_name)                     \
205
  nullptr,                             /* reserved */         \
206
      nullptr,                         /* reserved */         \
207
      nullptr,                         /* reserved */         \
208
      nullptr,                         /* reserved */         \
209
      HWY_CHOOSE_AVX3_SPR(func_name),  /* AVX3_SPR */         \
210
      nullptr,                         /* reserved */         \
211
      HWY_CHOOSE_AVX3_ZEN4(func_name), /* AVX3_ZEN4 */        \
212
      HWY_CHOOSE_AVX3_DL(func_name),   /* AVX3_DL */          \
213
      HWY_CHOOSE_AVX3(func_name),      /* AVX3 */             \
214
      HWY_CHOOSE_AVX2(func_name),      /* AVX2 */             \
215
      nullptr,                         /* AVX */              \
216
      HWY_CHOOSE_SSE4(func_name),      /* SSE4 */             \
217
      HWY_CHOOSE_SSSE3(func_name),     /* SSSE3 */            \
218
      nullptr,                         /* reserved - SSE3? */ \
219
      HWY_CHOOSE_SSE2(func_name)       /* SSE2 */
220
221
#elif HWY_ARCH_ARM
222
// See HWY_ARCH_X86 above for details.
223
#define HWY_MAX_DYNAMIC_TARGETS 15
224
#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_ARM
225
#define HWY_CHOOSE_TARGET_LIST(func_name)                              \
226
  nullptr,                                   /* reserved */            \
227
      nullptr,                               /* reserved */            \
228
      nullptr,                               /* reserved */            \
229
      HWY_CHOOSE_SVE2_128(func_name),        /* SVE2 128-bit */        \
230
      HWY_CHOOSE_SVE_256(func_name),         /* SVE 256-bit */         \
231
      nullptr,                               /* reserved */            \
232
      nullptr,                               /* reserved */            \
233
      nullptr,                               /* reserved */            \
234
      HWY_CHOOSE_SVE2(func_name),            /* SVE2 */                \
235
      HWY_CHOOSE_SVE(func_name),             /* SVE */                 \
236
      nullptr,                               /* reserved */            \
237
      HWY_CHOOSE_NEON_BF16(func_name),       /* NEON + f16/dot/bf16 */ \
238
      nullptr,                               /* reserved */            \
239
      HWY_CHOOSE_NEON(func_name),            /* NEON */                \
240
      HWY_CHOOSE_NEON_WITHOUT_AES(func_name) /* NEON without AES */
241
242
#elif HWY_ARCH_RISCV
243
// See HWY_ARCH_X86 above for details.
244
#define HWY_MAX_DYNAMIC_TARGETS 9
245
#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_RVV
246
#define HWY_CHOOSE_TARGET_LIST(func_name)       \
247
  nullptr,                       /* reserved */ \
248
      nullptr,                   /* reserved */ \
249
      nullptr,                   /* reserved */ \
250
      nullptr,                   /* reserved */ \
251
      nullptr,                   /* reserved */ \
252
      nullptr,                   /* reserved */ \
253
      nullptr,                   /* reserved */ \
254
      HWY_CHOOSE_RVV(func_name), /* RVV */      \
255
      nullptr                    /* reserved */
256
257
#elif HWY_ARCH_PPC || HWY_ARCH_S390X
258
// See HWY_ARCH_X86 above for details.
259
#define HWY_MAX_DYNAMIC_TARGETS 9
260
#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_PPC
261
#define HWY_CHOOSE_TARGET_LIST(func_name)         \
262
  nullptr,                         /* reserved */ \
263
      nullptr,                     /* reserved */ \
264
      nullptr,                     /* reserved */ \
265
      nullptr,                     /* reserved */ \
266
      HWY_CHOOSE_PPC10(func_name), /* PPC10 */    \
267
      HWY_CHOOSE_PPC9(func_name),  /* PPC9 */     \
268
      HWY_CHOOSE_PPC8(func_name),  /* PPC8 */     \
269
      HWY_CHOOSE_Z15(func_name),   /* Z15 */      \
270
      HWY_CHOOSE_Z14(func_name)    /* Z14 */
271
272
#elif HWY_ARCH_WASM
273
// See HWY_ARCH_X86 above for details.
274
#define HWY_MAX_DYNAMIC_TARGETS 9
275
#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_WASM
276
#define HWY_CHOOSE_TARGET_LIST(func_name)                  \
277
  nullptr,                               /* reserved */    \
278
      nullptr,                           /* reserved */    \
279
      nullptr,                           /* reserved */    \
280
      nullptr,                           /* reserved */    \
281
      nullptr,                           /* reserved */    \
282
      nullptr,                           /* reserved */    \
283
      HWY_CHOOSE_WASM_EMU256(func_name), /* WASM_EMU256 */ \
284
      HWY_CHOOSE_WASM(func_name),        /* WASM */        \
285
      nullptr                            /* reserved */
286
287
#else
288
// Unknown architecture, will use HWY_SCALAR without dynamic dispatch, though
289
// still creating single-entry tables in HWY_EXPORT to ensure portability.
290
#define HWY_MAX_DYNAMIC_TARGETS 1
291
#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_SCALAR
292
#endif
293
294
// Bitfield of supported and enabled targets. The format differs from that of
295
// HWY_TARGETS; the lowest bit governs the first function pointer (which is
296
// special in that it calls FunctionCache, then Update, then dispatches to the
297
// actual implementation) in the tables created by HWY_EXPORT. Monostate (see
298
// GetChosenTarget), thread-safe except on RVV.
299
struct ChosenTarget {
300
 public:
301
  // Reset bits according to `targets` (typically the return value of
302
  // SupportedTargets()). Postcondition: IsInitialized() == true.
303
4
  void Update(int64_t targets) {
304
    // These are `targets` shifted downwards, see above. Also include SCALAR
305
    // (corresponds to the last entry in the function table) as fallback.
306
4
    StoreMask(HWY_CHOSEN_TARGET_SHIFT(targets) | HWY_CHOSEN_TARGET_MASK_SCALAR);
307
4
  }
308
309
  // Reset to the uninitialized state, so that FunctionCache will call Update
310
  // during the next HWY_DYNAMIC_DISPATCH, and IsInitialized returns false.
311
0
  void DeInit() { StoreMask(1); }
312
313
  // Whether Update was called. This indicates whether any HWY_DYNAMIC_DISPATCH
314
  // function was called, which we check in tests.
315
0
  bool IsInitialized() const { return LoadMask() != 1; }
316
317
  // Return the index in the dynamic dispatch table to be used by the current
318
  // CPU. Note that this method must be in the header file so it uses the value
319
  // of HWY_CHOSEN_TARGET_MASK_TARGETS defined in the translation unit that
320
  // calls it, which may be different from others. This means we only enable
321
  // those targets that were actually compiled in this module.
322
8.46M
  size_t HWY_INLINE GetIndex() const {
323
8.46M
    return hwy::Num0BitsBelowLS1Bit_Nonzero64(
324
8.46M
        static_cast<uint64_t>(LoadMask() & HWY_CHOSEN_TARGET_MASK_TARGETS));
325
8.46M
  }
326
327
 private:
328
#if defined(HWY_NO_LIBCXX)
329
  int64_t LoadMask() const { return mask_; }
330
  void StoreMask(int64_t mask) { mask_ = mask; }
331
332
  int64_t mask_{1};  // Initialized to 1 so GetIndex() returns 0.
333
#else
334
8.46M
  int64_t LoadMask() const { return mask_.load(); }
335
4
  void StoreMask(int64_t mask) { mask_.store(mask); }
336
337
  std::atomic<int64_t> mask_{1};  // Initialized to 1 so GetIndex() returns 0.
338
#endif  // HWY_ARCH_RISCV
339
};
340
341
// For internal use (e.g. by FunctionCache and DisableTargets).
342
HWY_DLLEXPORT ChosenTarget& GetChosenTarget();
343
344
}  // namespace hwy
345
346
#endif  // HIGHWAY_HWY_TARGETS_H_