/src/libjxl/lib/jxl/xorshift128plus-inl.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | // Fast but weak random generator. |
7 | | |
8 | | #if defined(LIB_JXL_XORSHIFT128PLUS_INL_H_) == defined(HWY_TARGET_TOGGLE) |
9 | | #ifdef LIB_JXL_XORSHIFT128PLUS_INL_H_ |
10 | | #undef LIB_JXL_XORSHIFT128PLUS_INL_H_ |
11 | | #else |
12 | | #define LIB_JXL_XORSHIFT128PLUS_INL_H_ |
13 | | #endif |
14 | | |
15 | | #include <cstddef> |
16 | | #include <cstdint> |
17 | | #include <hwy/highway.h> |
18 | | HWY_BEFORE_NAMESPACE(); |
19 | | namespace jxl { |
20 | | namespace HWY_NAMESPACE { |
21 | | namespace { |
22 | | |
23 | | // These templates are not found via ADL. |
24 | | using hwy::HWY_NAMESPACE::Add; |
25 | | using hwy::HWY_NAMESPACE::ShiftLeft; |
26 | | using hwy::HWY_NAMESPACE::ShiftRight; |
27 | | using hwy::HWY_NAMESPACE::Xor; |
28 | | |
29 | | // Adapted from https://github.com/vpxyz/xorshift/blob/master/xorshift128plus/ |
30 | | // (MIT-license) |
31 | | class Xorshift128Plus { |
32 | | public: |
33 | | // 8 independent generators (= single iteration for AVX-512) |
34 | | enum { N = 8 }; |
35 | | |
36 | 0 | explicit HWY_MAYBE_UNUSED Xorshift128Plus(const uint64_t seed) { |
37 | 0 | // Init state using SplitMix64 generator |
38 | 0 | s0_[0] = SplitMix64(seed + 0x9E3779B97F4A7C15ull); |
39 | 0 | s1_[0] = SplitMix64(s0_[0]); |
40 | 0 | for (size_t i = 1; i < N; ++i) { |
41 | 0 | s0_[i] = SplitMix64(s1_[i - 1]); |
42 | 0 | s1_[i] = SplitMix64(s0_[i]); |
43 | 0 | } |
44 | 0 | } Unexecuted instantiation: dec_noise.cc:jxl::N_SSE4::(anonymous namespace)::Xorshift128Plus::Xorshift128Plus(unsigned long) Unexecuted instantiation: dec_noise.cc:jxl::N_AVX2::(anonymous namespace)::Xorshift128Plus::Xorshift128Plus(unsigned long) Unexecuted instantiation: dec_noise.cc:jxl::N_SSE2::(anonymous namespace)::Xorshift128Plus::Xorshift128Plus(unsigned long) |
45 | | |
46 | | HWY_MAYBE_UNUSED Xorshift128Plus(const uint32_t seed1, const uint32_t seed2, |
47 | 18.1k | const uint32_t seed3, const uint32_t seed4) { |
48 | | // Init state using SplitMix64 generator |
49 | 18.1k | s0_[0] = SplitMix64(((static_cast<uint64_t>(seed1) << 32) + seed2) + |
50 | 18.1k | 0x9E3779B97F4A7C15ull); |
51 | 18.1k | s1_[0] = SplitMix64(((static_cast<uint64_t>(seed3) << 32) + seed4) + |
52 | 18.1k | 0x9E3779B97F4A7C15ull); |
53 | 145k | for (size_t i = 1; i < N; ++i) { |
54 | 127k | s0_[i] = SplitMix64(s0_[i - 1]); |
55 | 127k | s1_[i] = SplitMix64(s1_[i - 1]); |
56 | 127k | } |
57 | 18.1k | } Unexecuted instantiation: dec_noise.cc:jxl::N_SSE4::(anonymous namespace)::Xorshift128Plus::Xorshift128Plus(unsigned int, unsigned int, unsigned int, unsigned int) dec_noise.cc:jxl::N_AVX2::(anonymous namespace)::Xorshift128Plus::Xorshift128Plus(unsigned int, unsigned int, unsigned int, unsigned int) Line | Count | Source | 47 | 18.1k | const uint32_t seed3, const uint32_t seed4) { | 48 | | // Init state using SplitMix64 generator | 49 | 18.1k | s0_[0] = SplitMix64(((static_cast<uint64_t>(seed1) << 32) + seed2) + | 50 | 18.1k | 0x9E3779B97F4A7C15ull); | 51 | 18.1k | s1_[0] = SplitMix64(((static_cast<uint64_t>(seed3) << 32) + seed4) + | 52 | 18.1k | 0x9E3779B97F4A7C15ull); | 53 | 145k | for (size_t i = 1; i < N; ++i) { | 54 | 127k | s0_[i] = SplitMix64(s0_[i - 1]); | 55 | 127k | s1_[i] = SplitMix64(s1_[i - 1]); | 56 | 127k | } | 57 | 18.1k | } |
Unexecuted instantiation: dec_noise.cc:jxl::N_SSE2::(anonymous namespace)::Xorshift128Plus::Xorshift128Plus(unsigned int, unsigned int, unsigned int, unsigned int) |
58 | | |
59 | 33.0M | HWY_INLINE HWY_MAYBE_UNUSED void Fill(uint64_t* HWY_RESTRICT random_bits) { |
60 | 33.0M | #if HWY_CAP_INTEGER64 |
61 | 33.0M | const HWY_CAPPED(uint64_t, N) d; |
62 | 99.0M | for (size_t i = 0; i < N; i += Lanes(d)) { |
63 | 66.0M | auto s1 = Load(d, s0_ + i); |
64 | 66.0M | const auto s0 = Load(d, s1_ + i); |
65 | 66.0M | const auto bits = Add(s1, s0); // b, c |
66 | 66.0M | Store(s0, d, s0_ + i); |
67 | 66.0M | s1 = Xor(s1, ShiftLeft<23>(s1)); |
68 | 66.0M | Store(bits, d, random_bits + i); |
69 | 66.0M | s1 = Xor(s1, Xor(s0, Xor(ShiftRight<18>(s1), ShiftRight<5>(s0)))); |
70 | 66.0M | Store(s1, d, s1_ + i); |
71 | 66.0M | } |
72 | | #else |
73 | | for (size_t i = 0; i < N; ++i) { |
74 | | auto s1 = s0_[i]; |
75 | | const auto s0 = s1_[i]; |
76 | | const auto bits = s1 + s0; // b, c |
77 | | s0_[i] = s0; |
78 | | s1 ^= s1 << 23; |
79 | | random_bits[i] = bits; |
80 | | s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5); |
81 | | s1_[i] = s1; |
82 | | } |
83 | | #endif |
84 | 33.0M | } Unexecuted instantiation: dec_noise.cc:jxl::N_SSE4::(anonymous namespace)::Xorshift128Plus::Fill(unsigned long*) dec_noise.cc:jxl::N_AVX2::(anonymous namespace)::Xorshift128Plus::Fill(unsigned long*) Line | Count | Source | 59 | 33.0M | HWY_INLINE HWY_MAYBE_UNUSED void Fill(uint64_t* HWY_RESTRICT random_bits) { | 60 | 33.0M | #if HWY_CAP_INTEGER64 | 61 | 33.0M | const HWY_CAPPED(uint64_t, N) d; | 62 | 99.0M | for (size_t i = 0; i < N; i += Lanes(d)) { | 63 | 66.0M | auto s1 = Load(d, s0_ + i); | 64 | 66.0M | const auto s0 = Load(d, s1_ + i); | 65 | 66.0M | const auto bits = Add(s1, s0); // b, c | 66 | 66.0M | Store(s0, d, s0_ + i); | 67 | 66.0M | s1 = Xor(s1, ShiftLeft<23>(s1)); | 68 | 66.0M | Store(bits, d, random_bits + i); | 69 | 66.0M | s1 = Xor(s1, Xor(s0, Xor(ShiftRight<18>(s1), ShiftRight<5>(s0)))); | 70 | 66.0M | Store(s1, d, s1_ + i); | 71 | 66.0M | } | 72 | | #else | 73 | | for (size_t i = 0; i < N; ++i) { | 74 | | auto s1 = s0_[i]; | 75 | | const auto s0 = s1_[i]; | 76 | | const auto bits = s1 + s0; // b, c | 77 | | s0_[i] = s0; | 78 | | s1 ^= s1 << 23; | 79 | | random_bits[i] = bits; | 80 | | s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5); | 81 | | s1_[i] = s1; | 82 | | } | 83 | | #endif | 84 | 33.0M | } |
Unexecuted instantiation: dec_noise.cc:jxl::N_SSE2::(anonymous namespace)::Xorshift128Plus::Fill(unsigned long*) |
85 | | |
86 | | private: |
87 | 290k | static uint64_t SplitMix64(uint64_t z) { |
88 | 290k | z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ull; |
89 | 290k | z = (z ^ (z >> 27)) * 0x94D049BB133111EBull; |
90 | 290k | return z ^ (z >> 31); |
91 | 290k | } Unexecuted instantiation: dec_noise.cc:jxl::N_SSE4::(anonymous namespace)::Xorshift128Plus::SplitMix64(unsigned long) dec_noise.cc:jxl::N_AVX2::(anonymous namespace)::Xorshift128Plus::SplitMix64(unsigned long) Line | Count | Source | 87 | 290k | static uint64_t SplitMix64(uint64_t z) { | 88 | 290k | z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ull; | 89 | 290k | z = (z ^ (z >> 27)) * 0x94D049BB133111EBull; | 90 | 290k | return z ^ (z >> 31); | 91 | 290k | } |
Unexecuted instantiation: dec_noise.cc:jxl::N_SSE2::(anonymous namespace)::Xorshift128Plus::SplitMix64(unsigned long) |
92 | | |
93 | | HWY_ALIGN uint64_t s0_[N]; |
94 | | HWY_ALIGN uint64_t s1_[N]; |
95 | | }; |
96 | | |
97 | | } // namespace |
98 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
99 | | } // namespace HWY_NAMESPACE |
100 | | } // namespace jxl |
101 | | HWY_AFTER_NAMESPACE(); |
102 | | |
103 | | #endif // LIB_JXL_XORSHIFT128PLUS_INL_H_ |