Coverage Report

Created: 2022-08-24 06:33

/src/libjxl/lib/jxl/quantizer-inl.h
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#if defined(LIB_JXL_QUANTIZER_INL_H_) == defined(HWY_TARGET_TOGGLE)
7
#ifdef LIB_JXL_QUANTIZER_INL_H_
8
#undef LIB_JXL_QUANTIZER_INL_H_
9
#else
10
#define LIB_JXL_QUANTIZER_INL_H_
11
#endif
12
13
#include <stddef.h>
14
15
#include <hwy/highway.h>
16
HWY_BEFORE_NAMESPACE();
17
namespace jxl {
18
namespace HWY_NAMESPACE {
19
namespace {
20
21
// These templates are not found via ADL.
22
using hwy::HWY_NAMESPACE::And;
23
using hwy::HWY_NAMESPACE::AndNot;
24
using hwy::HWY_NAMESPACE::ApproximateReciprocal;
25
using hwy::HWY_NAMESPACE::Gt;
26
using hwy::HWY_NAMESPACE::IfThenElse;
27
using hwy::HWY_NAMESPACE::IfThenElseZero;
28
using hwy::HWY_NAMESPACE::Lt;
29
using hwy::HWY_NAMESPACE::Rebind;
30
using hwy::HWY_NAMESPACE::Vec;
31
using hwy::HWY_NAMESPACE::Xor;
32
33
template <class DI>
34
HWY_INLINE HWY_MAYBE_UNUSED Vec<Rebind<float, DI>> AdjustQuantBias(
35
    DI di, const size_t c, const Vec<DI> quant_i,
36
34.1M
    const float* HWY_RESTRICT biases) {
37
34.1M
  const Rebind<float, DI> df;
38
39
34.1M
  const auto quant = ConvertTo(df, quant_i);
40
41
  // Compare |quant|, keep sign bit for negating result.
42
34.1M
  const auto kSign = BitCast(df, Set(di, INT32_MIN));
43
34.1M
  const auto sign = And(quant, kSign);  // TODO(janwas): = abs ^ orig
44
34.1M
  const auto abs_quant = AndNot(kSign, quant);
45
46
  // If |x| is 1, kZeroBias creates a different bias for each channel.
47
  // We're implementing the following:
48
  // if (quant == 0) return 0;
49
  // if (quant == 1) return biases[c];
50
  // if (quant == -1) return -biases[c];
51
  // return quant - biases[3] / quant;
52
53
  // Integer comparison is not helpful because Clang incurs bypass penalties
54
  // from unnecessarily mixing integer and float.
55
34.1M
  const auto is_01 = Lt(abs_quant, Set(df, 1.125f));
56
34.1M
  const auto not_0 = Gt(abs_quant, Zero(df));
57
58
  // Bitwise logic is faster than quant * biases[c].
59
34.1M
  const auto one_bias = IfThenElseZero(not_0, Xor(Set(df, biases[c]), sign));
60
61
  // About 2E-5 worse than ReciprocalNR or division.
62
34.1M
  const auto bias =
63
34.1M
      NegMulAdd(Set(df, biases[3]), ApproximateReciprocal(quant), quant);
64
65
34.1M
  return IfThenElse(is_01, one_bias, bias);
66
34.1M
}
dec_group.cc:decltype (Zero((hwy::N_SSE4::Simd<int, 4ul, 0>::Rebind<float>)())) jxl::N_SSE4::(anonymous namespace)::AdjustQuantBias<hwy::N_SSE4::Simd<int, 4ul, 0> >(hwy::N_SSE4::Simd<int, 4ul, 0>, unsigned long, decltype (Zero((hwy::N_SSE4::Simd<int, 4ul, 0>)())), float const*)
Line
Count
Source
36
10.5M
    const float* HWY_RESTRICT biases) {
37
10.5M
  const Rebind<float, DI> df;
38
39
10.5M
  const auto quant = ConvertTo(df, quant_i);
40
41
  // Compare |quant|, keep sign bit for negating result.
42
10.5M
  const auto kSign = BitCast(df, Set(di, INT32_MIN));
43
10.5M
  const auto sign = And(quant, kSign);  // TODO(janwas): = abs ^ orig
44
10.5M
  const auto abs_quant = AndNot(kSign, quant);
45
46
  // If |x| is 1, kZeroBias creates a different bias for each channel.
47
  // We're implementing the following:
48
  // if (quant == 0) return 0;
49
  // if (quant == 1) return biases[c];
50
  // if (quant == -1) return -biases[c];
51
  // return quant - biases[3] / quant;
52
53
  // Integer comparison is not helpful because Clang incurs bypass penalties
54
  // from unnecessarily mixing integer and float.
55
10.5M
  const auto is_01 = Lt(abs_quant, Set(df, 1.125f));
56
10.5M
  const auto not_0 = Gt(abs_quant, Zero(df));
57
58
  // Bitwise logic is faster than quant * biases[c].
59
10.5M
  const auto one_bias = IfThenElseZero(not_0, Xor(Set(df, biases[c]), sign));
60
61
  // About 2E-5 worse than ReciprocalNR or division.
62
10.5M
  const auto bias =
63
10.5M
      NegMulAdd(Set(df, biases[3]), ApproximateReciprocal(quant), quant);
64
65
10.5M
  return IfThenElse(is_01, one_bias, bias);
66
10.5M
}
dec_group.cc:decltype (Zero((hwy::N_AVX2::Simd<int, 8ul, 0>::Rebind<float>)())) jxl::N_AVX2::(anonymous namespace)::AdjustQuantBias<hwy::N_AVX2::Simd<int, 8ul, 0> >(hwy::N_AVX2::Simd<int, 8ul, 0>, unsigned long, decltype (Zero((hwy::N_AVX2::Simd<int, 8ul, 0>)())), float const*)
Line
Count
Source
36
8.17M
    const float* HWY_RESTRICT biases) {
37
8.17M
  const Rebind<float, DI> df;
38
39
8.17M
  const auto quant = ConvertTo(df, quant_i);
40
41
  // Compare |quant|, keep sign bit for negating result.
42
8.17M
  const auto kSign = BitCast(df, Set(di, INT32_MIN));
43
8.17M
  const auto sign = And(quant, kSign);  // TODO(janwas): = abs ^ orig
44
8.17M
  const auto abs_quant = AndNot(kSign, quant);
45
46
  // If |x| is 1, kZeroBias creates a different bias for each channel.
47
  // We're implementing the following:
48
  // if (quant == 0) return 0;
49
  // if (quant == 1) return biases[c];
50
  // if (quant == -1) return -biases[c];
51
  // return quant - biases[3] / quant;
52
53
  // Integer comparison is not helpful because Clang incurs bypass penalties
54
  // from unnecessarily mixing integer and float.
55
8.17M
  const auto is_01 = Lt(abs_quant, Set(df, 1.125f));
56
8.17M
  const auto not_0 = Gt(abs_quant, Zero(df));
57
58
  // Bitwise logic is faster than quant * biases[c].
59
8.17M
  const auto one_bias = IfThenElseZero(not_0, Xor(Set(df, biases[c]), sign));
60
61
  // About 2E-5 worse than ReciprocalNR or division.
62
8.17M
  const auto bias =
63
8.17M
      NegMulAdd(Set(df, biases[3]), ApproximateReciprocal(quant), quant);
64
65
8.17M
  return IfThenElse(is_01, one_bias, bias);
66
8.17M
}
Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_AVX3::Simd<int, 16ul, 0>::Rebind<float>)())) jxl::N_AVX3::(anonymous namespace)::AdjustQuantBias<hwy::N_AVX3::Simd<int, 16ul, 0> >(hwy::N_AVX3::Simd<int, 16ul, 0>, unsigned long, decltype (Zero((hwy::N_AVX3::Simd<int, 16ul, 0>)())), float const*)
dec_group.cc:decltype (Zero((hwy::N_EMU128::Simd<int, 4ul, 0>::Rebind<float>)())) jxl::N_EMU128::(anonymous namespace)::AdjustQuantBias<hwy::N_EMU128::Simd<int, 4ul, 0> >(hwy::N_EMU128::Simd<int, 4ul, 0>, unsigned long, decltype (Zero((hwy::N_EMU128::Simd<int, 4ul, 0>)())), float const*)
Line
Count
Source
36
15.4M
    const float* HWY_RESTRICT biases) {
37
15.4M
  const Rebind<float, DI> df;
38
39
15.4M
  const auto quant = ConvertTo(df, quant_i);
40
41
  // Compare |quant|, keep sign bit for negating result.
42
15.4M
  const auto kSign = BitCast(df, Set(di, INT32_MIN));
43
15.4M
  const auto sign = And(quant, kSign);  // TODO(janwas): = abs ^ orig
44
15.4M
  const auto abs_quant = AndNot(kSign, quant);
45
46
  // If |x| is 1, kZeroBias creates a different bias for each channel.
47
  // We're implementing the following:
48
  // if (quant == 0) return 0;
49
  // if (quant == 1) return biases[c];
50
  // if (quant == -1) return -biases[c];
51
  // return quant - biases[3] / quant;
52
53
  // Integer comparison is not helpful because Clang incurs bypass penalties
54
  // from unnecessarily mixing integer and float.
55
15.4M
  const auto is_01 = Lt(abs_quant, Set(df, 1.125f));
56
15.4M
  const auto not_0 = Gt(abs_quant, Zero(df));
57
58
  // Bitwise logic is faster than quant * biases[c].
59
15.4M
  const auto one_bias = IfThenElseZero(not_0, Xor(Set(df, biases[c]), sign));
60
61
  // About 2E-5 worse than ReciprocalNR or division.
62
15.4M
  const auto bias =
63
15.4M
      NegMulAdd(Set(df, biases[3]), ApproximateReciprocal(quant), quant);
64
65
15.4M
  return IfThenElse(is_01, one_bias, bias);
66
15.4M
}
Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_SSE4::Simd<int, 4ul, 0>::Rebind<float>)())) jxl::N_SSE4::(anonymous namespace)::AdjustQuantBias<hwy::N_SSE4::Simd<int, 4ul, 0> >(hwy::N_SSE4::Simd<int, 4ul, 0>, unsigned long, decltype (Zero((hwy::N_SSE4::Simd<int, 4ul, 0>)())), float const*)
enc_group.cc:decltype (Zero((hwy::N_AVX2::Simd<int, 8ul, 0>::Rebind<float>)())) jxl::N_AVX2::(anonymous namespace)::AdjustQuantBias<hwy::N_AVX2::Simd<int, 8ul, 0> >(hwy::N_AVX2::Simd<int, 8ul, 0>, unsigned long, decltype (Zero((hwy::N_AVX2::Simd<int, 8ul, 0>)())), float const*)
Line
Count
Source
36
736
    const float* HWY_RESTRICT biases) {
37
736
  const Rebind<float, DI> df;
38
39
736
  const auto quant = ConvertTo(df, quant_i);
40
41
  // Compare |quant|, keep sign bit for negating result.
42
736
  const auto kSign = BitCast(df, Set(di, INT32_MIN));
43
736
  const auto sign = And(quant, kSign);  // TODO(janwas): = abs ^ orig
44
736
  const auto abs_quant = AndNot(kSign, quant);
45
46
  // If |x| is 1, kZeroBias creates a different bias for each channel.
47
  // We're implementing the following:
48
  // if (quant == 0) return 0;
49
  // if (quant == 1) return biases[c];
50
  // if (quant == -1) return -biases[c];
51
  // return quant - biases[3] / quant;
52
53
  // Integer comparison is not helpful because Clang incurs bypass penalties
54
  // from unnecessarily mixing integer and float.
55
736
  const auto is_01 = Lt(abs_quant, Set(df, 1.125f));
56
736
  const auto not_0 = Gt(abs_quant, Zero(df));
57
58
  // Bitwise logic is faster than quant * biases[c].
59
736
  const auto one_bias = IfThenElseZero(not_0, Xor(Set(df, biases[c]), sign));
60
61
  // About 2E-5 worse than ReciprocalNR or division.
62
736
  const auto bias =
63
736
      NegMulAdd(Set(df, biases[3]), ApproximateReciprocal(quant), quant);
64
65
736
  return IfThenElse(is_01, one_bias, bias);
66
736
}
Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_AVX3::Simd<int, 16ul, 0>::Rebind<float>)())) jxl::N_AVX3::(anonymous namespace)::AdjustQuantBias<hwy::N_AVX3::Simd<int, 16ul, 0> >(hwy::N_AVX3::Simd<int, 16ul, 0>, unsigned long, decltype (Zero((hwy::N_AVX3::Simd<int, 16ul, 0>)())), float const*)
Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_EMU128::Simd<int, 4ul, 0>::Rebind<float>)())) jxl::N_EMU128::(anonymous namespace)::AdjustQuantBias<hwy::N_EMU128::Simd<int, 4ul, 0> >(hwy::N_EMU128::Simd<int, 4ul, 0>, unsigned long, decltype (Zero((hwy::N_EMU128::Simd<int, 4ul, 0>)())), float const*)
67
68
}  // namespace
69
// NOLINTNEXTLINE(google-readability-namespace-comments)
70
}  // namespace HWY_NAMESPACE
71
}  // namespace jxl
72
HWY_AFTER_NAMESPACE();
73
74
#endif  // LIB_JXL_QUANTIZER_INL_H_