Coverage Report

Created: 2025-07-23 08:18

/src/libjxl/lib/jxl/quantizer-inl.h
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include <cstddef>
7
#include <cstdint>
8
9
#if defined(LIB_JXL_QUANTIZER_INL_H_) == defined(HWY_TARGET_TOGGLE)
10
#ifdef LIB_JXL_QUANTIZER_INL_H_
11
#undef LIB_JXL_QUANTIZER_INL_H_
12
#else
13
#define LIB_JXL_QUANTIZER_INL_H_
14
#endif
15
16
#include <hwy/highway.h>
17
HWY_BEFORE_NAMESPACE();
18
namespace jxl {
19
namespace HWY_NAMESPACE {
20
namespace {
21
22
// These templates are not found via ADL.
23
using hwy::HWY_NAMESPACE::And;
24
using hwy::HWY_NAMESPACE::AndNot;
25
using hwy::HWY_NAMESPACE::ApproximateReciprocal;
26
using hwy::HWY_NAMESPACE::Gt;
27
using hwy::HWY_NAMESPACE::IfThenElse;
28
using hwy::HWY_NAMESPACE::IfThenElseZero;
29
using hwy::HWY_NAMESPACE::Lt;
30
using hwy::HWY_NAMESPACE::Rebind;
31
using hwy::HWY_NAMESPACE::Vec;
32
using hwy::HWY_NAMESPACE::Xor;
33
34
template <class DI>
35
HWY_INLINE HWY_MAYBE_UNUSED Vec<Rebind<float, DI>> AdjustQuantBias(
36
    DI di, const size_t c, const Vec<DI> quant_i,
37
235M
    const float* HWY_RESTRICT biases) {
38
235M
  const Rebind<float, DI> df;
39
40
235M
  const auto quant = ConvertTo(df, quant_i);
41
42
  // Compare |quant|, keep sign bit for negating result.
43
235M
  const auto kSign = BitCast(df, Set(di, INT32_MIN));
44
235M
  const auto sign = And(quant, kSign);  // TODO(janwas): = abs ^ orig
45
235M
  const auto abs_quant = AndNot(kSign, quant);
46
47
  // If |x| is 1, kZeroBias creates a different bias for each channel.
48
  // We're implementing the following:
49
  // if (quant == 0) return 0;
50
  // if (quant == 1) return biases[c];
51
  // if (quant == -1) return -biases[c];
52
  // return quant - biases[3] / quant;
53
54
  // Integer comparison is not helpful because Clang incurs bypass penalties
55
  // from unnecessarily mixing integer and float.
56
235M
  const auto is_01 = Lt(abs_quant, Set(df, 1.125f));
57
235M
  const auto not_0 = Gt(abs_quant, Zero(df));
58
59
  // Bitwise logic is faster than quant * biases[c].
60
235M
  const auto one_bias = IfThenElseZero(not_0, Xor(Set(df, biases[c]), sign));
61
62
  // About 2E-5 worse than ReciprocalNR or division.
63
235M
  const auto bias =
64
235M
      NegMulAdd(Set(df, biases[3]), ApproximateReciprocal(quant), quant);
65
66
235M
  return IfThenElse(is_01, one_bias, bias);
67
235M
}
Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_SSE4::Simd<int, 4ul, 0>::Rebind<float>)())) jxl::N_SSE4::(anonymous namespace)::AdjustQuantBias<hwy::N_SSE4::Simd<int, 4ul, 0> >(hwy::N_SSE4::Simd<int, 4ul, 0>, unsigned long, decltype (Zero((hwy::N_SSE4::Simd<int, 4ul, 0>)())), float const*)
enc_group.cc:decltype (Zero((hwy::N_AVX2::Simd<int, 8ul, 0>::Rebind<float>)())) jxl::N_AVX2::(anonymous namespace)::AdjustQuantBias<hwy::N_AVX2::Simd<int, 8ul, 0> >(hwy::N_AVX2::Simd<int, 8ul, 0>, unsigned long, decltype (Zero((hwy::N_AVX2::Simd<int, 8ul, 0>)())), float const*)
Line
Count
Source
37
23.1M
    const float* HWY_RESTRICT biases) {
38
23.1M
  const Rebind<float, DI> df;
39
40
23.1M
  const auto quant = ConvertTo(df, quant_i);
41
42
  // Compare |quant|, keep sign bit for negating result.
43
23.1M
  const auto kSign = BitCast(df, Set(di, INT32_MIN));
44
23.1M
  const auto sign = And(quant, kSign);  // TODO(janwas): = abs ^ orig
45
23.1M
  const auto abs_quant = AndNot(kSign, quant);
46
47
  // If |x| is 1, kZeroBias creates a different bias for each channel.
48
  // We're implementing the following:
49
  // if (quant == 0) return 0;
50
  // if (quant == 1) return biases[c];
51
  // if (quant == -1) return -biases[c];
52
  // return quant - biases[3] / quant;
53
54
  // Integer comparison is not helpful because Clang incurs bypass penalties
55
  // from unnecessarily mixing integer and float.
56
23.1M
  const auto is_01 = Lt(abs_quant, Set(df, 1.125f));
57
23.1M
  const auto not_0 = Gt(abs_quant, Zero(df));
58
59
  // Bitwise logic is faster than quant * biases[c].
60
23.1M
  const auto one_bias = IfThenElseZero(not_0, Xor(Set(df, biases[c]), sign));
61
62
  // About 2E-5 worse than ReciprocalNR or division.
63
23.1M
  const auto bias =
64
23.1M
      NegMulAdd(Set(df, biases[3]), ApproximateReciprocal(quant), quant);
65
66
23.1M
  return IfThenElse(is_01, one_bias, bias);
67
23.1M
}
Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_AVX3::Simd<int, 16ul, 0>::Rebind<float>)())) jxl::N_AVX3::(anonymous namespace)::AdjustQuantBias<hwy::N_AVX3::Simd<int, 16ul, 0> >(hwy::N_AVX3::Simd<int, 16ul, 0>, unsigned long, decltype (Zero((hwy::N_AVX3::Simd<int, 16ul, 0>)())), float const*)
Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_AVX3_ZEN4::Simd<int, 16ul, 0>::Rebind<float>)())) jxl::N_AVX3_ZEN4::(anonymous namespace)::AdjustQuantBias<hwy::N_AVX3_ZEN4::Simd<int, 16ul, 0> >(hwy::N_AVX3_ZEN4::Simd<int, 16ul, 0>, unsigned long, decltype (Zero((hwy::N_AVX3_ZEN4::Simd<int, 16ul, 0>)())), float const*)
Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_AVX3_SPR::Simd<int, 16ul, 0>::Rebind<float>)())) jxl::N_AVX3_SPR::(anonymous namespace)::AdjustQuantBias<hwy::N_AVX3_SPR::Simd<int, 16ul, 0> >(hwy::N_AVX3_SPR::Simd<int, 16ul, 0>, unsigned long, decltype (Zero((hwy::N_AVX3_SPR::Simd<int, 16ul, 0>)())), float const*)
Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_SSE2::Simd<int, 4ul, 0>::Rebind<float>)())) jxl::N_SSE2::(anonymous namespace)::AdjustQuantBias<hwy::N_SSE2::Simd<int, 4ul, 0> >(hwy::N_SSE2::Simd<int, 4ul, 0>, unsigned long, decltype (Zero((hwy::N_SSE2::Simd<int, 4ul, 0>)())), float const*)
Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_SSE4::Simd<int, 4ul, 0>::Rebind<float>)())) jxl::N_SSE4::(anonymous namespace)::AdjustQuantBias<hwy::N_SSE4::Simd<int, 4ul, 0> >(hwy::N_SSE4::Simd<int, 4ul, 0>, unsigned long, decltype (Zero((hwy::N_SSE4::Simd<int, 4ul, 0>)())), float const*)
dec_group.cc:decltype (Zero((hwy::N_AVX2::Simd<int, 8ul, 0>::Rebind<float>)())) jxl::N_AVX2::(anonymous namespace)::AdjustQuantBias<hwy::N_AVX2::Simd<int, 8ul, 0> >(hwy::N_AVX2::Simd<int, 8ul, 0>, unsigned long, decltype (Zero((hwy::N_AVX2::Simd<int, 8ul, 0>)())), float const*)
Line
Count
Source
37
211M
    const float* HWY_RESTRICT biases) {
38
211M
  const Rebind<float, DI> df;
39
40
211M
  const auto quant = ConvertTo(df, quant_i);
41
42
  // Compare |quant|, keep sign bit for negating result.
43
211M
  const auto kSign = BitCast(df, Set(di, INT32_MIN));
44
211M
  const auto sign = And(quant, kSign);  // TODO(janwas): = abs ^ orig
45
211M
  const auto abs_quant = AndNot(kSign, quant);
46
47
  // If |x| is 1, kZeroBias creates a different bias for each channel.
48
  // We're implementing the following:
49
  // if (quant == 0) return 0;
50
  // if (quant == 1) return biases[c];
51
  // if (quant == -1) return -biases[c];
52
  // return quant - biases[3] / quant;
53
54
  // Integer comparison is not helpful because Clang incurs bypass penalties
55
  // from unnecessarily mixing integer and float.
56
211M
  const auto is_01 = Lt(abs_quant, Set(df, 1.125f));
57
211M
  const auto not_0 = Gt(abs_quant, Zero(df));
58
59
  // Bitwise logic is faster than quant * biases[c].
60
211M
  const auto one_bias = IfThenElseZero(not_0, Xor(Set(df, biases[c]), sign));
61
62
  // About 2E-5 worse than ReciprocalNR or division.
63
211M
  const auto bias =
64
211M
      NegMulAdd(Set(df, biases[3]), ApproximateReciprocal(quant), quant);
65
66
211M
  return IfThenElse(is_01, one_bias, bias);
67
211M
}
Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_AVX3::Simd<int, 16ul, 0>::Rebind<float>)())) jxl::N_AVX3::(anonymous namespace)::AdjustQuantBias<hwy::N_AVX3::Simd<int, 16ul, 0> >(hwy::N_AVX3::Simd<int, 16ul, 0>, unsigned long, decltype (Zero((hwy::N_AVX3::Simd<int, 16ul, 0>)())), float const*)
Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_AVX3_ZEN4::Simd<int, 16ul, 0>::Rebind<float>)())) jxl::N_AVX3_ZEN4::(anonymous namespace)::AdjustQuantBias<hwy::N_AVX3_ZEN4::Simd<int, 16ul, 0> >(hwy::N_AVX3_ZEN4::Simd<int, 16ul, 0>, unsigned long, decltype (Zero((hwy::N_AVX3_ZEN4::Simd<int, 16ul, 0>)())), float const*)
Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_AVX3_SPR::Simd<int, 16ul, 0>::Rebind<float>)())) jxl::N_AVX3_SPR::(anonymous namespace)::AdjustQuantBias<hwy::N_AVX3_SPR::Simd<int, 16ul, 0> >(hwy::N_AVX3_SPR::Simd<int, 16ul, 0>, unsigned long, decltype (Zero((hwy::N_AVX3_SPR::Simd<int, 16ul, 0>)())), float const*)
Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_SSE2::Simd<int, 4ul, 0>::Rebind<float>)())) jxl::N_SSE2::(anonymous namespace)::AdjustQuantBias<hwy::N_SSE2::Simd<int, 4ul, 0> >(hwy::N_SSE2::Simd<int, 4ul, 0>, unsigned long, decltype (Zero((hwy::N_SSE2::Simd<int, 4ul, 0>)())), float const*)
68
69
}  // namespace
70
// NOLINTNEXTLINE(google-readability-namespace-comments)
71
}  // namespace HWY_NAMESPACE
72
}  // namespace jxl
73
HWY_AFTER_NAMESPACE();
74
75
#endif  // LIB_JXL_QUANTIZER_INL_H_