/src/libjxl/lib/jxl/simd_util.cc
Line | Count | Source |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include "lib/jxl/simd_util.h" |
7 | | |
8 | | #include <cstddef> |
9 | | |
10 | | #undef HWY_TARGET_INCLUDE |
11 | | #define HWY_TARGET_INCLUDE "lib/jxl/simd_util.cc" |
12 | | #include <hwy/foreach_target.h> |
13 | | #include <hwy/highway.h> |
14 | | |
15 | | HWY_BEFORE_NAMESPACE(); |
16 | | namespace jxl { |
17 | | namespace HWY_NAMESPACE { |
18 | | |
19 | | using hwy::HWY_NAMESPACE::GetLane; |
20 | | using hwy::HWY_NAMESPACE::IfThenElseZero; |
21 | | using hwy::HWY_NAMESPACE::Iota; |
22 | | using hwy::HWY_NAMESPACE::LoadU; |
23 | | using hwy::HWY_NAMESPACE::Lt; |
24 | | using hwy::HWY_NAMESPACE::Max; |
25 | | using hwy::HWY_NAMESPACE::MaxOfLanes; |
26 | | using hwy::HWY_NAMESPACE::Set; |
27 | | |
28 | 12.2M | size_t MaxVectorSize() { |
29 | 12.2M | HWY_FULL(float) df; |
30 | 12.2M | return Lanes(df) * sizeof(float); |
31 | 12.2M | } Unexecuted instantiation: jxl::N_SSE4::MaxVectorSize() jxl::N_AVX2::MaxVectorSize() Line | Count | Source | 28 | 12.2M | size_t MaxVectorSize() { | 29 | 12.2M | HWY_FULL(float) df; | 30 | 12.2M | return Lanes(df) * sizeof(float); | 31 | 12.2M | } |
Unexecuted instantiation: jxl::N_AVX3::MaxVectorSize() Unexecuted instantiation: jxl::N_AVX3_ZEN4::MaxVectorSize() Unexecuted instantiation: jxl::N_AVX3_SPR::MaxVectorSize() Unexecuted instantiation: jxl::N_SSE2::MaxVectorSize() |
32 | | |
33 | 169k | uint32_t MaxValue(uint32_t* JXL_RESTRICT data, size_t len) { |
34 | 169k | HWY_FULL(uint32_t) du; |
35 | 169k | size_t last_full = Lanes(du) * (len / Lanes(du)); |
36 | 169k | auto max = Set(du, 0); |
37 | 5.60M | for (size_t i = 0; i < last_full; i += Lanes(du)) { |
38 | 5.43M | max = Max(max, LoadU(du, data + i)); |
39 | 5.43M | } |
40 | 169k | if (last_full < len) { |
41 | 49.9k | const auto stop = Set(du, len); |
42 | 49.9k | const auto fence = Iota(du, last_full); |
43 | 49.9k | const auto take = Lt(fence, stop); |
44 | 49.9k | max = Max(max, IfThenElseZero(take, LoadU(du, data + last_full))); |
45 | 49.9k | } |
46 | 169k | return GetLane(MaxOfLanes(du, max)); |
47 | 169k | } Unexecuted instantiation: jxl::N_SSE4::MaxValue(unsigned int*, unsigned long) jxl::N_AVX2::MaxValue(unsigned int*, unsigned long) Line | Count | Source | 33 | 169k | uint32_t MaxValue(uint32_t* JXL_RESTRICT data, size_t len) { | 34 | 169k | HWY_FULL(uint32_t) du; | 35 | 169k | size_t last_full = Lanes(du) * (len / Lanes(du)); | 36 | 169k | auto max = Set(du, 0); | 37 | 5.60M | for (size_t i = 0; i < last_full; i += Lanes(du)) { | 38 | 5.43M | max = Max(max, LoadU(du, data + i)); | 39 | 5.43M | } | 40 | 169k | if (last_full < len) { | 41 | 49.9k | const auto stop = Set(du, len); | 42 | 49.9k | const auto fence = Iota(du, last_full); | 43 | 49.9k | const auto take = Lt(fence, stop); | 44 | 49.9k | max = Max(max, IfThenElseZero(take, LoadU(du, data + last_full))); | 45 | 49.9k | } | 46 | 169k | return GetLane(MaxOfLanes(du, max)); | 47 | 169k | } |
Unexecuted instantiation: jxl::N_AVX3::MaxValue(unsigned int*, unsigned long) Unexecuted instantiation: jxl::N_AVX3_ZEN4::MaxValue(unsigned int*, unsigned long) Unexecuted instantiation: jxl::N_AVX3_SPR::MaxValue(unsigned int*, unsigned long) Unexecuted instantiation: jxl::N_SSE2::MaxValue(unsigned int*, unsigned long) |
48 | | |
49 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
50 | | } // namespace HWY_NAMESPACE |
51 | | } // namespace jxl |
52 | | HWY_AFTER_NAMESPACE(); |
53 | | |
54 | | #if HWY_ONCE |
55 | | namespace jxl { |
56 | | |
57 | | HWY_EXPORT(MaxVectorSize); |
58 | | HWY_EXPORT(MaxValue); |
59 | | |
60 | 12.2M | size_t MaxVectorSize() { |
61 | | // Ideally HWY framework should provide us this value. |
62 | | // Less than ideal is to check all available targets and choose maximal. |
63 | | // As for now, we just ask current active target, assuming it won't change. |
64 | 12.2M | return HWY_DYNAMIC_DISPATCH(MaxVectorSize)(); |
65 | 12.2M | } |
66 | | |
67 | 169k | uint32_t MaxValue(uint32_t* JXL_RESTRICT data, size_t len) { |
68 | 169k | return HWY_DYNAMIC_DISPATCH(MaxValue)(data, len); |
69 | 169k | } |
70 | | |
71 | | } // namespace jxl |
72 | | #endif |