/src/libjxl/lib/jxl/quantizer-inl.h
Line | Count | Source |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include <cstddef> |
7 | | #include <cstdint> |
8 | | |
9 | | #if defined(LIB_JXL_QUANTIZER_INL_H_) == defined(HWY_TARGET_TOGGLE) |
10 | | #ifdef LIB_JXL_QUANTIZER_INL_H_ |
11 | | #undef LIB_JXL_QUANTIZER_INL_H_ |
12 | | #else |
13 | | #define LIB_JXL_QUANTIZER_INL_H_ |
14 | | #endif |
15 | | |
16 | | #include <hwy/highway.h> |
17 | | HWY_BEFORE_NAMESPACE(); |
18 | | namespace jxl { |
19 | | namespace HWY_NAMESPACE { |
20 | | namespace { |
21 | | |
22 | | // These templates are not found via ADL. |
23 | | using hwy::HWY_NAMESPACE::And; |
24 | | using hwy::HWY_NAMESPACE::AndNot; |
25 | | using hwy::HWY_NAMESPACE::ApproximateReciprocal; |
26 | | using hwy::HWY_NAMESPACE::Gt; |
27 | | using hwy::HWY_NAMESPACE::IfThenElse; |
28 | | using hwy::HWY_NAMESPACE::IfThenElseZero; |
29 | | using hwy::HWY_NAMESPACE::Lt; |
30 | | using hwy::HWY_NAMESPACE::Rebind; |
31 | | using hwy::HWY_NAMESPACE::Vec; |
32 | | using hwy::HWY_NAMESPACE::Xor; |
33 | | |
34 | | template <class DI> |
35 | | HWY_INLINE HWY_MAYBE_UNUSED Vec<Rebind<float, DI>> AdjustQuantBias( |
36 | | DI di, const size_t c, const Vec<DI> quant_i, |
37 | 34.1M | const float* HWY_RESTRICT biases) { |
38 | 34.1M | const Rebind<float, DI> df; |
39 | | |
40 | 34.1M | const auto quant = ConvertTo(df, quant_i); |
41 | | |
42 | | // Compare |quant|, keep sign bit for negating result. |
43 | 34.1M | const auto kSign = BitCast(df, Set(di, INT32_MIN)); |
44 | 34.1M | const auto sign = And(quant, kSign); // TODO(janwas): = abs ^ orig |
45 | 34.1M | const auto abs_quant = AndNot(kSign, quant); |
46 | | |
47 | | // If |x| is 1, kZeroBias creates a different bias for each channel. |
48 | | // We're implementing the following: |
49 | | // if (quant == 0) return 0; |
50 | | // if (quant == 1) return biases[c]; |
51 | | // if (quant == -1) return -biases[c]; |
52 | | // return quant - biases[3] / quant; |
53 | | |
54 | | // Integer comparison is not helpful because Clang incurs bypass penalties |
55 | | // from unnecessarily mixing integer and float. |
56 | 34.1M | const auto is_01 = Lt(abs_quant, Set(df, 1.125f)); |
57 | 34.1M | const auto not_0 = Gt(abs_quant, Zero(df)); |
58 | | |
59 | | // Bitwise logic is faster than quant * biases[c]. |
60 | 34.1M | const auto one_bias = IfThenElseZero(not_0, Xor(Set(df, biases[c]), sign)); |
61 | | |
62 | | // About 2E-5 worse than ReciprocalNR or division. |
63 | 34.1M | const auto bias = |
64 | 34.1M | NegMulAdd(Set(df, biases[3]), ApproximateReciprocal(quant), quant); |
65 | | |
66 | 34.1M | return IfThenElse(is_01, one_bias, bias); |
67 | 34.1M | } Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_SSE4::Simd<int, 4ul, 0>::Rebind<float>)())) jxl::N_SSE4::(anonymous namespace)::AdjustQuantBias<hwy::N_SSE4::Simd<int, 4ul, 0> >(hwy::N_SSE4::Simd<int, 4ul, 0>, unsigned long, decltype (Zero((hwy::N_SSE4::Simd<int, 4ul, 0>)())), float const*) enc_group.cc:decltype (Zero((hwy::N_AVX2::Simd<int, 8ul, 0>::Rebind<float>)())) jxl::N_AVX2::(anonymous namespace)::AdjustQuantBias<hwy::N_AVX2::Simd<int, 8ul, 0> >(hwy::N_AVX2::Simd<int, 8ul, 0>, unsigned long, decltype (Zero((hwy::N_AVX2::Simd<int, 8ul, 0>)())), float const*) Line | Count | Source | 37 | 3.18M | const float* HWY_RESTRICT biases) { | 38 | 3.18M | const Rebind<float, DI> df; | 39 | | | 40 | 3.18M | const auto quant = ConvertTo(df, quant_i); | 41 | | | 42 | | // Compare |quant|, keep sign bit for negating result. | 43 | 3.18M | const auto kSign = BitCast(df, Set(di, INT32_MIN)); | 44 | 3.18M | const auto sign = And(quant, kSign); // TODO(janwas): = abs ^ orig | 45 | 3.18M | const auto abs_quant = AndNot(kSign, quant); | 46 | | | 47 | | // If |x| is 1, kZeroBias creates a different bias for each channel. | 48 | | // We're implementing the following: | 49 | | // if (quant == 0) return 0; | 50 | | // if (quant == 1) return biases[c]; | 51 | | // if (quant == -1) return -biases[c]; | 52 | | // return quant - biases[3] / quant; | 53 | | | 54 | | // Integer comparison is not helpful because Clang incurs bypass penalties | 55 | | // from unnecessarily mixing integer and float. | 56 | 3.18M | const auto is_01 = Lt(abs_quant, Set(df, 1.125f)); | 57 | 3.18M | const auto not_0 = Gt(abs_quant, Zero(df)); | 58 | | | 59 | | // Bitwise logic is faster than quant * biases[c]. | 60 | 3.18M | const auto one_bias = IfThenElseZero(not_0, Xor(Set(df, biases[c]), sign)); | 61 | | | 62 | | // About 2E-5 worse than ReciprocalNR or division. | 63 | 3.18M | const auto bias = | 64 | 3.18M | NegMulAdd(Set(df, biases[3]), ApproximateReciprocal(quant), quant); | 65 | | | 66 | 3.18M | return IfThenElse(is_01, one_bias, bias); | 67 | 3.18M | } |
Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_SSE2::Simd<int, 4ul, 0>::Rebind<float>)())) jxl::N_SSE2::(anonymous namespace)::AdjustQuantBias<hwy::N_SSE2::Simd<int, 4ul, 0> >(hwy::N_SSE2::Simd<int, 4ul, 0>, unsigned long, decltype (Zero((hwy::N_SSE2::Simd<int, 4ul, 0>)())), float const*) Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_SSE4::Simd<int, 4ul, 0>::Rebind<float>)())) jxl::N_SSE4::(anonymous namespace)::AdjustQuantBias<hwy::N_SSE4::Simd<int, 4ul, 0> >(hwy::N_SSE4::Simd<int, 4ul, 0>, unsigned long, decltype (Zero((hwy::N_SSE4::Simd<int, 4ul, 0>)())), float const*) dec_group.cc:decltype (Zero((hwy::N_AVX2::Simd<int, 8ul, 0>::Rebind<float>)())) jxl::N_AVX2::(anonymous namespace)::AdjustQuantBias<hwy::N_AVX2::Simd<int, 8ul, 0> >(hwy::N_AVX2::Simd<int, 8ul, 0>, unsigned long, decltype (Zero((hwy::N_AVX2::Simd<int, 8ul, 0>)())), float const*) Line | Count | Source | 37 | 30.9M | const float* HWY_RESTRICT biases) { | 38 | 30.9M | const Rebind<float, DI> df; | 39 | | | 40 | 30.9M | const auto quant = ConvertTo(df, quant_i); | 41 | | | 42 | | // Compare |quant|, keep sign bit for negating result. | 43 | 30.9M | const auto kSign = BitCast(df, Set(di, INT32_MIN)); | 44 | 30.9M | const auto sign = And(quant, kSign); // TODO(janwas): = abs ^ orig | 45 | 30.9M | const auto abs_quant = AndNot(kSign, quant); | 46 | | | 47 | | // If |x| is 1, kZeroBias creates a different bias for each channel. | 48 | | // We're implementing the following: | 49 | | // if (quant == 0) return 0; | 50 | | // if (quant == 1) return biases[c]; | 51 | | // if (quant == -1) return -biases[c]; | 52 | | // return quant - biases[3] / quant; | 53 | | | 54 | | // Integer comparison is not helpful because Clang incurs bypass penalties | 55 | | // from unnecessarily mixing integer and float. | 56 | 30.9M | const auto is_01 = Lt(abs_quant, Set(df, 1.125f)); | 57 | 30.9M | const auto not_0 = Gt(abs_quant, Zero(df)); | 58 | | | 59 | | // Bitwise logic is faster than quant * biases[c]. | 60 | 30.9M | const auto one_bias = IfThenElseZero(not_0, Xor(Set(df, biases[c]), sign)); | 61 | | | 62 | | // About 2E-5 worse than ReciprocalNR or division. | 63 | 30.9M | const auto bias = | 64 | 30.9M | NegMulAdd(Set(df, biases[3]), ApproximateReciprocal(quant), quant); | 65 | | | 66 | 30.9M | return IfThenElse(is_01, one_bias, bias); | 67 | 30.9M | } |
Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_SSE2::Simd<int, 4ul, 0>::Rebind<float>)())) jxl::N_SSE2::(anonymous namespace)::AdjustQuantBias<hwy::N_SSE2::Simd<int, 4ul, 0> >(hwy::N_SSE2::Simd<int, 4ul, 0>, unsigned long, decltype (Zero((hwy::N_SSE2::Simd<int, 4ul, 0>)())), float const*) |
68 | | |
69 | | } // namespace |
70 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
71 | | } // namespace HWY_NAMESPACE |
72 | | } // namespace jxl |
73 | | HWY_AFTER_NAMESPACE(); |
74 | | |
75 | | #endif // LIB_JXL_QUANTIZER_INL_H_ |