Coverage Report

Created: 2025-11-16 07:22

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/splines.cc
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/splines.h"
7
8
#include <jxl/memory_manager.h>
9
10
#include <algorithm>
11
#include <cinttypes>  // PRIu64
12
#include <cmath>
13
#include <cstddef>
14
#include <cstdint>
15
#include <cstring>
16
#include <limits>
17
#include <utility>
18
#include <vector>
19
20
#include "lib/jxl/base/bits.h"
21
#include "lib/jxl/base/common.h"
22
#include "lib/jxl/base/printf_macros.h"
23
#include "lib/jxl/base/rect.h"
24
#include "lib/jxl/base/status.h"
25
#include "lib/jxl/chroma_from_luma.h"
26
#include "lib/jxl/common.h"  // JXL_HIGH_PRECISION
27
#include "lib/jxl/dct_scales.h"
28
#include "lib/jxl/dec_ans.h"
29
#include "lib/jxl/dec_bit_reader.h"
30
#include "lib/jxl/image.h"
31
#include "lib/jxl/pack_signed.h"
32
33
#undef HWY_TARGET_INCLUDE
34
#define HWY_TARGET_INCLUDE "lib/jxl/splines.cc"
35
#include <hwy/foreach_target.h>
36
#include <hwy/highway.h>
37
38
#include "lib/jxl/base/fast_math-inl.h"
39
HWY_BEFORE_NAMESPACE();
40
namespace jxl {
41
namespace HWY_NAMESPACE {
42
namespace {
43
44
// These templates are not found via ADL.
45
using hwy::HWY_NAMESPACE::Mul;
46
using hwy::HWY_NAMESPACE::MulAdd;
47
using hwy::HWY_NAMESPACE::MulSub;
48
using hwy::HWY_NAMESPACE::Sqrt;
49
using hwy::HWY_NAMESPACE::Sub;
50
51
// Given a set of DCT coefficients, this returns the result of performing cosine
52
// interpolation on the original samples.
53
85.6M
float ContinuousIDCT(const Dct32& dct, const float t) {
54
  // We compute here the DCT-3 of the `dct` vector, rescaled by a factor of
55
  // sqrt(32). This is such that an input vector vector {x, 0, ..., 0} produces
56
  // a constant result of x. dct[0] was scaled in Dequantize() to allow uniform
57
  // treatment of all the coefficients.
58
85.6M
  constexpr float kMultipliers[32] = {
59
85.6M
      kPi / 32 * 0,  kPi / 32 * 1,  kPi / 32 * 2,  kPi / 32 * 3,  kPi / 32 * 4,
60
85.6M
      kPi / 32 * 5,  kPi / 32 * 6,  kPi / 32 * 7,  kPi / 32 * 8,  kPi / 32 * 9,
61
85.6M
      kPi / 32 * 10, kPi / 32 * 11, kPi / 32 * 12, kPi / 32 * 13, kPi / 32 * 14,
62
85.6M
      kPi / 32 * 15, kPi / 32 * 16, kPi / 32 * 17, kPi / 32 * 18, kPi / 32 * 19,
63
85.6M
      kPi / 32 * 20, kPi / 32 * 21, kPi / 32 * 22, kPi / 32 * 23, kPi / 32 * 24,
64
85.6M
      kPi / 32 * 25, kPi / 32 * 26, kPi / 32 * 27, kPi / 32 * 28, kPi / 32 * 29,
65
85.6M
      kPi / 32 * 30, kPi / 32 * 31,
66
85.6M
  };
67
85.6M
  HWY_CAPPED(float, 32) df;
68
85.6M
  auto result = Zero(df);
69
85.6M
  const auto tandhalf = Set(df, t + 0.5f);
70
428M
  for (int i = 0; i < 32; i += Lanes(df)) {
71
342M
    auto cos_arg = Mul(LoadU(df, kMultipliers + i), tandhalf);
72
342M
    auto cos = FastCosf(df, cos_arg);
73
342M
    auto local_res = Mul(LoadU(df, dct.data() + i), cos);
74
342M
    result = MulAdd(Set(df, kSqrt2), local_res, result);
75
342M
  }
76
85.6M
  return GetLane(SumOfLanes(df, result));
77
85.6M
}
Unexecuted instantiation: splines.cc:jxl::N_AVX3_SPR::(anonymous namespace)::ContinuousIDCT(std::__1::array<float, 32ul> const&, float)
Unexecuted instantiation: splines.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::ContinuousIDCT(std::__1::array<float, 32ul> const&, float)
Unexecuted instantiation: splines.cc:jxl::N_AVX3::(anonymous namespace)::ContinuousIDCT(std::__1::array<float, 32ul> const&, float)
splines.cc:jxl::N_AVX2::(anonymous namespace)::ContinuousIDCT(std::__1::array<float, 32ul> const&, float)
Line
Count
Source
53
85.6M
float ContinuousIDCT(const Dct32& dct, const float t) {
54
  // We compute here the DCT-3 of the `dct` vector, rescaled by a factor of
55
  // sqrt(32). This is such that an input vector vector {x, 0, ..., 0} produces
56
  // a constant result of x. dct[0] was scaled in Dequantize() to allow uniform
57
  // treatment of all the coefficients.
58
85.6M
  constexpr float kMultipliers[32] = {
59
85.6M
      kPi / 32 * 0,  kPi / 32 * 1,  kPi / 32 * 2,  kPi / 32 * 3,  kPi / 32 * 4,
60
85.6M
      kPi / 32 * 5,  kPi / 32 * 6,  kPi / 32 * 7,  kPi / 32 * 8,  kPi / 32 * 9,
61
85.6M
      kPi / 32 * 10, kPi / 32 * 11, kPi / 32 * 12, kPi / 32 * 13, kPi / 32 * 14,
62
85.6M
      kPi / 32 * 15, kPi / 32 * 16, kPi / 32 * 17, kPi / 32 * 18, kPi / 32 * 19,
63
85.6M
      kPi / 32 * 20, kPi / 32 * 21, kPi / 32 * 22, kPi / 32 * 23, kPi / 32 * 24,
64
85.6M
      kPi / 32 * 25, kPi / 32 * 26, kPi / 32 * 27, kPi / 32 * 28, kPi / 32 * 29,
65
85.6M
      kPi / 32 * 30, kPi / 32 * 31,
66
85.6M
  };
67
85.6M
  HWY_CAPPED(float, 32) df;
68
85.6M
  auto result = Zero(df);
69
85.6M
  const auto tandhalf = Set(df, t + 0.5f);
70
428M
  for (int i = 0; i < 32; i += Lanes(df)) {
71
342M
    auto cos_arg = Mul(LoadU(df, kMultipliers + i), tandhalf);
72
342M
    auto cos = FastCosf(df, cos_arg);
73
342M
    auto local_res = Mul(LoadU(df, dct.data() + i), cos);
74
342M
    result = MulAdd(Set(df, kSqrt2), local_res, result);
75
342M
  }
76
85.6M
  return GetLane(SumOfLanes(df, result));
77
85.6M
}
Unexecuted instantiation: splines.cc:jxl::N_SSE4::(anonymous namespace)::ContinuousIDCT(std::__1::array<float, 32ul> const&, float)
Unexecuted instantiation: splines.cc:jxl::N_SSE2::(anonymous namespace)::ContinuousIDCT(std::__1::array<float, 32ul> const&, float)
78
79
template <typename DF>
80
void DrawSegment(DF df, const SplineSegment& segment, const bool add,
81
                 const size_t y, const size_t x, const size_t x0,
82
1.05M
                 float* JXL_RESTRICT rows[3]) {
83
1.05M
  Rebind<int32_t, DF> di;
84
1.05M
  const auto inv_sigma = Set(df, segment.inv_sigma);
85
1.05M
  const auto half = Set(df, 0.5f);
86
1.05M
  const auto one_over_2s2 = Set(df, 0.353553391f);
87
1.05M
  const auto sigma_over_4_times_intensity =
88
1.05M
      Set(df, segment.sigma_over_4_times_intensity);
89
1.05M
  const auto dx =
90
1.05M
      Sub(ConvertTo(df, Iota(di, x + x0)), Set(df, segment.center_x));
91
1.05M
  const auto dy = Set(df, y - segment.center_y);
92
1.05M
  const auto sqd = MulAdd(dx, dx, Mul(dy, dy));
93
1.05M
  const auto distance = Sqrt(sqd);
94
1.05M
  const auto one_dimensional_factor =
95
1.05M
      Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)),
96
1.05M
          FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma)));
97
1.05M
  auto local_intensity =
98
1.05M
      Mul(sigma_over_4_times_intensity,
99
1.05M
          Mul(one_dimensional_factor, one_dimensional_factor));
100
4.20M
  for (size_t c = 0; c < 3; ++c) {
101
3.15M
    const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]);
102
3.15M
    const auto in = LoadU(df, rows[c] + x);
103
3.15M
    StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x);
104
3.15M
  }
105
1.05M
}
Unexecuted instantiation: splines.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DrawSegment<hwy::N_AVX3_SPR::Simd<float, 16ul, 0> >(hwy::N_AVX3_SPR::Simd<float, 16ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*)
Unexecuted instantiation: splines.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DrawSegment<hwy::N_AVX3_SPR::Simd<float, 1ul, 0> >(hwy::N_AVX3_SPR::Simd<float, 1ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*)
Unexecuted instantiation: splines.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DrawSegment<hwy::N_AVX3_ZEN4::Simd<float, 16ul, 0> >(hwy::N_AVX3_ZEN4::Simd<float, 16ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*)
Unexecuted instantiation: splines.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DrawSegment<hwy::N_AVX3_ZEN4::Simd<float, 1ul, 0> >(hwy::N_AVX3_ZEN4::Simd<float, 1ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*)
Unexecuted instantiation: splines.cc:void jxl::N_AVX3::(anonymous namespace)::DrawSegment<hwy::N_AVX3::Simd<float, 16ul, 0> >(hwy::N_AVX3::Simd<float, 16ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*)
Unexecuted instantiation: splines.cc:void jxl::N_AVX3::(anonymous namespace)::DrawSegment<hwy::N_AVX3::Simd<float, 1ul, 0> >(hwy::N_AVX3::Simd<float, 1ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*)
splines.cc:void jxl::N_AVX2::(anonymous namespace)::DrawSegment<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*)
Line
Count
Source
82
861k
                 float* JXL_RESTRICT rows[3]) {
83
861k
  Rebind<int32_t, DF> di;
84
861k
  const auto inv_sigma = Set(df, segment.inv_sigma);
85
861k
  const auto half = Set(df, 0.5f);
86
861k
  const auto one_over_2s2 = Set(df, 0.353553391f);
87
861k
  const auto sigma_over_4_times_intensity =
88
861k
      Set(df, segment.sigma_over_4_times_intensity);
89
861k
  const auto dx =
90
861k
      Sub(ConvertTo(df, Iota(di, x + x0)), Set(df, segment.center_x));
91
861k
  const auto dy = Set(df, y - segment.center_y);
92
861k
  const auto sqd = MulAdd(dx, dx, Mul(dy, dy));
93
861k
  const auto distance = Sqrt(sqd);
94
861k
  const auto one_dimensional_factor =
95
861k
      Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)),
96
861k
          FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma)));
97
861k
  auto local_intensity =
98
861k
      Mul(sigma_over_4_times_intensity,
99
861k
          Mul(one_dimensional_factor, one_dimensional_factor));
100
3.44M
  for (size_t c = 0; c < 3; ++c) {
101
2.58M
    const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]);
102
2.58M
    const auto in = LoadU(df, rows[c] + x);
103
2.58M
    StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x);
104
2.58M
  }
105
861k
}
splines.cc:void jxl::N_AVX2::(anonymous namespace)::DrawSegment<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*)
Line
Count
Source
82
189k
                 float* JXL_RESTRICT rows[3]) {
83
189k
  Rebind<int32_t, DF> di;
84
189k
  const auto inv_sigma = Set(df, segment.inv_sigma);
85
189k
  const auto half = Set(df, 0.5f);
86
189k
  const auto one_over_2s2 = Set(df, 0.353553391f);
87
189k
  const auto sigma_over_4_times_intensity =
88
189k
      Set(df, segment.sigma_over_4_times_intensity);
89
189k
  const auto dx =
90
189k
      Sub(ConvertTo(df, Iota(di, x + x0)), Set(df, segment.center_x));
91
189k
  const auto dy = Set(df, y - segment.center_y);
92
189k
  const auto sqd = MulAdd(dx, dx, Mul(dy, dy));
93
189k
  const auto distance = Sqrt(sqd);
94
189k
  const auto one_dimensional_factor =
95
189k
      Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)),
96
189k
          FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma)));
97
189k
  auto local_intensity =
98
189k
      Mul(sigma_over_4_times_intensity,
99
189k
          Mul(one_dimensional_factor, one_dimensional_factor));
100
759k
  for (size_t c = 0; c < 3; ++c) {
101
569k
    const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]);
102
569k
    const auto in = LoadU(df, rows[c] + x);
103
569k
    StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x);
104
569k
  }
105
189k
}
Unexecuted instantiation: splines.cc:void jxl::N_SSE4::(anonymous namespace)::DrawSegment<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*)
Unexecuted instantiation: splines.cc:void jxl::N_SSE4::(anonymous namespace)::DrawSegment<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*)
Unexecuted instantiation: splines.cc:void jxl::N_SSE2::(anonymous namespace)::DrawSegment<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*)
Unexecuted instantiation: splines.cc:void jxl::N_SSE2::(anonymous namespace)::DrawSegment<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*)
106
107
void DrawSegment(const SplineSegment& segment, const bool add, const size_t y,
108
                 const size_t x0, const size_t x1,
109
1.68M
                 float* JXL_RESTRICT rows[3]) {
110
1.68M
  ptrdiff_t start = std::llround(segment.center_x - segment.maximum_distance);
111
1.68M
  ptrdiff_t end = std::llround(segment.center_x + segment.maximum_distance);
112
1.68M
  if (end < static_cast<ptrdiff_t>(x0) || start >= static_cast<ptrdiff_t>(x1)) {
113
1.57M
    return;  // span does not intersect scan
114
1.57M
  }
115
112k
  size_t span_x0 = std::max<ptrdiff_t>(x0, start) - x0;
116
112k
  size_t span_x1 = std::min<ptrdiff_t>(x1, end + 1) - x0;  // exclusive
117
112k
  HWY_FULL(float) df;
118
112k
  size_t x = span_x0;
119
973k
  for (; x + Lanes(df) <= span_x1; x += Lanes(df)) {
120
861k
    DrawSegment(df, segment, add, y, x, x0, rows);
121
861k
  }
122
302k
  for (; x < span_x1; ++x) {
123
189k
    DrawSegment(HWY_CAPPED(float, 1)(), segment, add, y, x, x0, rows);
124
189k
  }
125
112k
}
Unexecuted instantiation: splines.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DrawSegment(jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*)
Unexecuted instantiation: splines.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DrawSegment(jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*)
Unexecuted instantiation: splines.cc:jxl::N_AVX3::(anonymous namespace)::DrawSegment(jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*)
splines.cc:jxl::N_AVX2::(anonymous namespace)::DrawSegment(jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*)
Line
Count
Source
109
1.68M
                 float* JXL_RESTRICT rows[3]) {
110
1.68M
  ptrdiff_t start = std::llround(segment.center_x - segment.maximum_distance);
111
1.68M
  ptrdiff_t end = std::llround(segment.center_x + segment.maximum_distance);
112
1.68M
  if (end < static_cast<ptrdiff_t>(x0) || start >= static_cast<ptrdiff_t>(x1)) {
113
1.57M
    return;  // span does not intersect scan
114
1.57M
  }
115
112k
  size_t span_x0 = std::max<ptrdiff_t>(x0, start) - x0;
116
112k
  size_t span_x1 = std::min<ptrdiff_t>(x1, end + 1) - x0;  // exclusive
117
112k
  HWY_FULL(float) df;
118
112k
  size_t x = span_x0;
119
973k
  for (; x + Lanes(df) <= span_x1; x += Lanes(df)) {
120
861k
    DrawSegment(df, segment, add, y, x, x0, rows);
121
861k
  }
122
302k
  for (; x < span_x1; ++x) {
123
189k
    DrawSegment(HWY_CAPPED(float, 1)(), segment, add, y, x, x0, rows);
124
189k
  }
125
112k
}
Unexecuted instantiation: splines.cc:jxl::N_SSE4::(anonymous namespace)::DrawSegment(jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*)
Unexecuted instantiation: splines.cc:jxl::N_SSE2::(anonymous namespace)::DrawSegment(jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*)
126
127
void ComputeSegments(const Spline::Point& center, const float intensity,
128
                     const float color[3], const float sigma,
129
                     std::vector<SplineSegment>& segments,
130
21.4M
                     std::vector<std::pair<size_t, size_t>>& segments_by_y) {
131
  // Sanity check sigma, inverse sigma and intensity
132
21.4M
  if (!(std::isfinite(sigma) && sigma != 0.0f && std::isfinite(1.0f / sigma) &&
133
21.3M
        std::isfinite(intensity))) {
134
30.7k
    return;
135
30.7k
  }
136
21.3M
#if JXL_HIGH_PRECISION
137
21.3M
  constexpr float kDistanceExp = 5;
138
#else
139
  // About 30% faster.
140
  constexpr float kDistanceExp = 3;
141
#endif
142
  // We cap from below colors to at least 0.01.
143
21.3M
  float max_color = 0.01f;
144
85.5M
  for (size_t c = 0; c < 3; c++) {
145
64.1M
    max_color = std::max(max_color, std::abs(color[c] * intensity));
146
64.1M
  }
147
  // Distance beyond which max_color*intensity*exp(-d^2 / (2 * sigma^2)) drops
148
  // below 10^-kDistanceExp.
149
21.3M
  const float maximum_distance =
150
21.3M
      std::sqrt(-2.0f * sigma * sigma *
151
21.3M
                (std::log(0.1f) * kDistanceExp - std::log(max_color)));
152
21.3M
  SplineSegment segment;
153
21.3M
  segment.center_y = center.y;
154
21.3M
  segment.center_x = center.x;
155
21.3M
  memcpy(segment.color, color, sizeof(segment.color));
156
21.3M
  segment.inv_sigma = 1.0f / sigma;
157
21.3M
  segment.sigma_over_4_times_intensity = .25f * sigma * intensity;
158
21.3M
  segment.maximum_distance = maximum_distance;
159
21.3M
  ptrdiff_t y0 = std::llround(center.y - maximum_distance);
160
21.3M
  ptrdiff_t y1 =
161
21.3M
      std::llround(center.y + maximum_distance) + 1;  // one-past-the-end
162
128M
  for (ptrdiff_t y = std::max<ptrdiff_t>(y0, 0); y < y1; y++) {
163
106M
    segments_by_y.emplace_back(y, segments.size());
164
106M
  }
165
21.3M
  segments.push_back(segment);
166
21.3M
}
Unexecuted instantiation: splines.cc:jxl::N_AVX3_SPR::(anonymous namespace)::ComputeSegments(jxl::Spline::Point const&, float, float const*, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&)
Unexecuted instantiation: splines.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::ComputeSegments(jxl::Spline::Point const&, float, float const*, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&)
Unexecuted instantiation: splines.cc:jxl::N_AVX3::(anonymous namespace)::ComputeSegments(jxl::Spline::Point const&, float, float const*, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&)
splines.cc:jxl::N_AVX2::(anonymous namespace)::ComputeSegments(jxl::Spline::Point const&, float, float const*, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&)
Line
Count
Source
130
21.4M
                     std::vector<std::pair<size_t, size_t>>& segments_by_y) {
131
  // Sanity check sigma, inverse sigma and intensity
132
21.4M
  if (!(std::isfinite(sigma) && sigma != 0.0f && std::isfinite(1.0f / sigma) &&
133
21.3M
        std::isfinite(intensity))) {
134
30.7k
    return;
135
30.7k
  }
136
21.3M
#if JXL_HIGH_PRECISION
137
21.3M
  constexpr float kDistanceExp = 5;
138
#else
139
  // About 30% faster.
140
  constexpr float kDistanceExp = 3;
141
#endif
142
  // We cap from below colors to at least 0.01.
143
21.3M
  float max_color = 0.01f;
144
85.5M
  for (size_t c = 0; c < 3; c++) {
145
64.1M
    max_color = std::max(max_color, std::abs(color[c] * intensity));
146
64.1M
  }
147
  // Distance beyond which max_color*intensity*exp(-d^2 / (2 * sigma^2)) drops
148
  // below 10^-kDistanceExp.
149
21.3M
  const float maximum_distance =
150
21.3M
      std::sqrt(-2.0f * sigma * sigma *
151
21.3M
                (std::log(0.1f) * kDistanceExp - std::log(max_color)));
152
21.3M
  SplineSegment segment;
153
21.3M
  segment.center_y = center.y;
154
21.3M
  segment.center_x = center.x;
155
21.3M
  memcpy(segment.color, color, sizeof(segment.color));
156
21.3M
  segment.inv_sigma = 1.0f / sigma;
157
21.3M
  segment.sigma_over_4_times_intensity = .25f * sigma * intensity;
158
21.3M
  segment.maximum_distance = maximum_distance;
159
21.3M
  ptrdiff_t y0 = std::llround(center.y - maximum_distance);
160
21.3M
  ptrdiff_t y1 =
161
21.3M
      std::llround(center.y + maximum_distance) + 1;  // one-past-the-end
162
128M
  for (ptrdiff_t y = std::max<ptrdiff_t>(y0, 0); y < y1; y++) {
163
106M
    segments_by_y.emplace_back(y, segments.size());
164
106M
  }
165
21.3M
  segments.push_back(segment);
166
21.3M
}
Unexecuted instantiation: splines.cc:jxl::N_SSE4::(anonymous namespace)::ComputeSegments(jxl::Spline::Point const&, float, float const*, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&)
Unexecuted instantiation: splines.cc:jxl::N_SSE2::(anonymous namespace)::ComputeSegments(jxl::Spline::Point const&, float, float const*, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&)
167
168
void DrawSegments(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y,
169
                  float* JXL_RESTRICT row_b, size_t y, size_t x0, size_t x1,
170
                  const bool add, const SplineSegment* segments,
171
                  const size_t* segment_indices,
172
12.1k
                  const size_t* segment_y_start) {
173
12.1k
  float* JXL_RESTRICT rows[3] = {row_x, row_y, row_b};
174
1.70M
  for (size_t i = segment_y_start[y]; i < segment_y_start[y + 1]; i++) {
175
1.68M
    DrawSegment(segments[segment_indices[i]], add, y, x0, x1, rows);
176
1.68M
  }
177
12.1k
}
Unexecuted instantiation: splines.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DrawSegments(float*, float*, float*, unsigned long, unsigned long, unsigned long, bool, jxl::SplineSegment const*, unsigned long const*, unsigned long const*)
Unexecuted instantiation: splines.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DrawSegments(float*, float*, float*, unsigned long, unsigned long, unsigned long, bool, jxl::SplineSegment const*, unsigned long const*, unsigned long const*)
Unexecuted instantiation: splines.cc:jxl::N_AVX3::(anonymous namespace)::DrawSegments(float*, float*, float*, unsigned long, unsigned long, unsigned long, bool, jxl::SplineSegment const*, unsigned long const*, unsigned long const*)
splines.cc:jxl::N_AVX2::(anonymous namespace)::DrawSegments(float*, float*, float*, unsigned long, unsigned long, unsigned long, bool, jxl::SplineSegment const*, unsigned long const*, unsigned long const*)
Line
Count
Source
172
12.1k
                  const size_t* segment_y_start) {
173
12.1k
  float* JXL_RESTRICT rows[3] = {row_x, row_y, row_b};
174
1.70M
  for (size_t i = segment_y_start[y]; i < segment_y_start[y + 1]; i++) {
175
1.68M
    DrawSegment(segments[segment_indices[i]], add, y, x0, x1, rows);
176
1.68M
  }
177
12.1k
}
Unexecuted instantiation: splines.cc:jxl::N_SSE4::(anonymous namespace)::DrawSegments(float*, float*, float*, unsigned long, unsigned long, unsigned long, bool, jxl::SplineSegment const*, unsigned long const*, unsigned long const*)
Unexecuted instantiation: splines.cc:jxl::N_SSE2::(anonymous namespace)::DrawSegments(float*, float*, float*, unsigned long, unsigned long, unsigned long, bool, jxl::SplineSegment const*, unsigned long const*, unsigned long const*)
178
179
void SegmentsFromPoints(
180
    const Spline& spline,
181
    const std::vector<std::pair<Spline::Point, float>>& points_to_draw,
182
    const float arc_length, std::vector<SplineSegment>& segments,
183
41.3k
    std::vector<std::pair<size_t, size_t>>& segments_by_y) {
184
41.3k
  const float inv_arc_length = 1.0f / arc_length;
185
41.3k
  int k = 0;
186
21.4M
  for (const auto& point_to_draw : points_to_draw) {
187
21.4M
    const Spline::Point& point = point_to_draw.first;
188
21.4M
    const float multiplier = point_to_draw.second;
189
21.4M
    const float progress_along_arc =
190
21.4M
        std::min(1.f, (k * kDesiredRenderingDistance) * inv_arc_length);
191
21.4M
    ++k;
192
21.4M
    float color[3];
193
85.6M
    for (size_t c = 0; c < 3; ++c) {
194
64.2M
      color[c] =
195
64.2M
          ContinuousIDCT(spline.color_dct[c], (32 - 1) * progress_along_arc);
196
64.2M
    }
197
21.4M
    const float sigma =
198
21.4M
        ContinuousIDCT(spline.sigma_dct, (32 - 1) * progress_along_arc);
199
21.4M
    ComputeSegments(point, multiplier, color, sigma, segments, segments_by_y);
200
21.4M
  }
201
41.3k
}
Unexecuted instantiation: splines.cc:jxl::N_AVX3_SPR::(anonymous namespace)::SegmentsFromPoints(jxl::Spline const&, std::__1::vector<std::__1::pair<jxl::Spline::Point, float>, std::__1::allocator<std::__1::pair<jxl::Spline::Point, float> > > const&, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&)
Unexecuted instantiation: splines.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::SegmentsFromPoints(jxl::Spline const&, std::__1::vector<std::__1::pair<jxl::Spline::Point, float>, std::__1::allocator<std::__1::pair<jxl::Spline::Point, float> > > const&, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&)
Unexecuted instantiation: splines.cc:jxl::N_AVX3::(anonymous namespace)::SegmentsFromPoints(jxl::Spline const&, std::__1::vector<std::__1::pair<jxl::Spline::Point, float>, std::__1::allocator<std::__1::pair<jxl::Spline::Point, float> > > const&, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&)
splines.cc:jxl::N_AVX2::(anonymous namespace)::SegmentsFromPoints(jxl::Spline const&, std::__1::vector<std::__1::pair<jxl::Spline::Point, float>, std::__1::allocator<std::__1::pair<jxl::Spline::Point, float> > > const&, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&)
Line
Count
Source
183
41.3k
    std::vector<std::pair<size_t, size_t>>& segments_by_y) {
184
41.3k
  const float inv_arc_length = 1.0f / arc_length;
185
41.3k
  int k = 0;
186
21.4M
  for (const auto& point_to_draw : points_to_draw) {
187
21.4M
    const Spline::Point& point = point_to_draw.first;
188
21.4M
    const float multiplier = point_to_draw.second;
189
21.4M
    const float progress_along_arc =
190
21.4M
        std::min(1.f, (k * kDesiredRenderingDistance) * inv_arc_length);
191
21.4M
    ++k;
192
21.4M
    float color[3];
193
85.6M
    for (size_t c = 0; c < 3; ++c) {
194
64.2M
      color[c] =
195
64.2M
          ContinuousIDCT(spline.color_dct[c], (32 - 1) * progress_along_arc);
196
64.2M
    }
197
21.4M
    const float sigma =
198
21.4M
        ContinuousIDCT(spline.sigma_dct, (32 - 1) * progress_along_arc);
199
21.4M
    ComputeSegments(point, multiplier, color, sigma, segments, segments_by_y);
200
21.4M
  }
201
41.3k
}
Unexecuted instantiation: splines.cc:jxl::N_SSE4::(anonymous namespace)::SegmentsFromPoints(jxl::Spline const&, std::__1::vector<std::__1::pair<jxl::Spline::Point, float>, std::__1::allocator<std::__1::pair<jxl::Spline::Point, float> > > const&, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&)
Unexecuted instantiation: splines.cc:jxl::N_SSE2::(anonymous namespace)::SegmentsFromPoints(jxl::Spline const&, std::__1::vector<std::__1::pair<jxl::Spline::Point, float>, std::__1::allocator<std::__1::pair<jxl::Spline::Point, float> > > const&, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&)
202
}  // namespace
203
// NOLINTNEXTLINE(google-readability-namespace-comments)
204
}  // namespace HWY_NAMESPACE
205
}  // namespace jxl
206
HWY_AFTER_NAMESPACE();
207
208
#if HWY_ONCE
209
namespace jxl {
210
HWY_EXPORT(SegmentsFromPoints);
211
HWY_EXPORT(DrawSegments);
212
213
namespace {
214
215
// It is not in spec, but reasonable limit to avoid overflows.
216
template <typename T>
217
1.30M
Status ValidateSplinePointPos(const T& x, const T& y) {
218
1.30M
  constexpr T kSplinePosLimit = 1u << 23;
219
1.30M
  if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) ||
220
1.30M
      (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) {
221
77
    return JXL_FAILURE("Spline coordinates out of bounds");
222
77
  }
223
1.30M
  return true;
224
1.30M
}
splines.cc:jxl::Status jxl::(anonymous namespace)::ValidateSplinePointPos<long>(long const&, long const&)
Line
Count
Source
217
1.07M
Status ValidateSplinePointPos(const T& x, const T& y) {
218
1.07M
  constexpr T kSplinePosLimit = 1u << 23;
219
1.07M
  if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) ||
220
1.07M
      (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) {
221
45
    return JXL_FAILURE("Spline coordinates out of bounds");
222
45
  }
223
1.07M
  return true;
224
1.07M
}
splines.cc:jxl::Status jxl::(anonymous namespace)::ValidateSplinePointPos<float>(float const&, float const&)
Line
Count
Source
217
47.6k
Status ValidateSplinePointPos(const T& x, const T& y) {
218
47.6k
  constexpr T kSplinePosLimit = 1u << 23;
219
47.6k
  if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) ||
220
47.6k
      (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) {
221
0
    return JXL_FAILURE("Spline coordinates out of bounds");
222
0
  }
223
47.6k
  return true;
224
47.6k
}
splines.cc:jxl::Status jxl::(anonymous namespace)::ValidateSplinePointPos<int>(int const&, int const&)
Line
Count
Source
217
186k
Status ValidateSplinePointPos(const T& x, const T& y) {
218
186k
  constexpr T kSplinePosLimit = 1u << 23;
219
186k
  if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) ||
220
186k
      (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) {
221
32
    return JXL_FAILURE("Spline coordinates out of bounds");
222
32
  }
223
186k
  return true;
224
186k
}
225
226
// Maximum number of spline control points per frame is
227
//   std::min(kMaxNumControlPoints, xsize * ysize / 2)
228
constexpr size_t kMaxNumControlPoints = 1u << 20u;
229
constexpr size_t kMaxNumControlPointsPerPixelRatio = 2;
230
231
0
float AdjustedQuant(const int32_t adjustment) {
232
0
  return (adjustment >= 0) ? (1.f + .125f * adjustment)
233
0
                           : 1.f / (1.f - .125f * adjustment);
234
0
}
235
236
47.6k
float InvAdjustedQuant(const int32_t adjustment) {
237
47.6k
  return (adjustment >= 0) ? 1.f / (1.f + .125f * adjustment)
238
47.6k
                           : (1.f - .125f * adjustment);
239
47.6k
}
240
241
// X, Y, B, sigma.
242
constexpr float kChannelWeight[] = {0.0042f, 0.075f, 0.07f, .3333f};
243
244
Status DecodeAllStartingPoints(std::vector<Spline::Point>* const points,
245
                               BitReader* const br, ANSSymbolReader* reader,
246
                               const std::vector<uint8_t>& context_map,
247
32.7k
                               const size_t num_splines) {
248
32.7k
  points->clear();
249
32.7k
  points->reserve(num_splines);
250
32.7k
  int64_t last_x = 0;
251
32.7k
  int64_t last_y = 0;
252
1.10M
  for (size_t i = 0; i < num_splines; i++) {
253
1.07M
    size_t dx =
254
1.07M
        reader->ReadHybridUint(kStartingPositionContext, br, context_map);
255
1.07M
    size_t dy =
256
1.07M
        reader->ReadHybridUint(kStartingPositionContext, br, context_map);
257
1.07M
    int64_t x;
258
1.07M
    int64_t y;
259
1.07M
    if (i != 0) {
260
1.04M
      x = UnpackSigned(dx) + last_x;
261
1.04M
      y = UnpackSigned(dy) + last_y;
262
1.04M
    } else {
263
32.7k
      x = dx;
264
32.7k
      y = dy;
265
32.7k
    }
266
1.07M
    JXL_RETURN_IF_ERROR(ValidateSplinePointPos(x, y));
267
1.07M
    points->emplace_back(static_cast<float>(x), static_cast<float>(y));
268
1.07M
    last_x = x;
269
1.07M
    last_y = y;
270
1.07M
  }
271
32.6k
  return true;
272
32.7k
}
273
274
struct Vector {
275
  float x, y;
276
0
  Vector operator-() const { return {-x, -y}; }
277
0
  Vector operator+(const Vector& other) const {
278
0
    return {x + other.x, y + other.y};
279
0
  }
280
22.7M
  float SquaredNorm() const { return x * x + y * y; }
281
};
282
29.1M
Vector operator*(const float k, const Vector& vec) {
283
29.1M
  return {k * vec.x, k * vec.y};
284
29.1M
}
285
286
29.2M
Spline::Point operator+(const Spline::Point& p, const Vector& vec) {
287
29.2M
  return {p.x + vec.x, p.y + vec.y};
288
29.2M
}
289
52.0M
Vector operator-(const Spline::Point& a, const Spline::Point& b) {
290
52.0M
  return {a.x - b.x, a.y - b.y};
291
52.0M
}
292
293
// TODO(eustas): avoid making a copy of "points".
294
void DrawCentripetalCatmullRomSpline(std::vector<Spline::Point> points,
295
47.1k
                                     std::vector<Spline::Point>& result) {
296
47.1k
  if (points.empty()) return;
297
47.1k
  if (points.size() == 1) {
298
5.74k
    result.push_back(points[0]);
299
5.74k
    return;
300
5.74k
  }
301
  // Number of points to compute between each control point.
302
41.3k
  static constexpr int kNumPoints = 16;
303
41.3k
  result.reserve((points.size() - 1) * kNumPoints + 1);
304
41.3k
  points.insert(points.begin(), points[0] + (points[0] - points[1]));
305
41.3k
  points.push_back(points[points.size() - 1] +
306
41.3k
                   (points[points.size() - 1] - points[points.size() - 2]));
307
  // points has at least 4 elements at this point.
308
128k
  for (size_t start = 0; start < points.size() - 3; ++start) {
309
    // 4 of them are used, and we draw from p[1] to p[2].
310
87.4k
    const Spline::Point* const p = &points[start];
311
87.4k
    result.push_back(p[1]);
312
87.4k
    float d[3];
313
87.4k
    float t[4];
314
87.4k
    t[0] = 0;
315
349k
    for (int k = 0; k < 3; ++k) {
316
      // TODO(eustas): for each segment delta is calculated 3 times...
317
      // TODO(eustas): restrict d[k] with reasonable limit and spec it.
318
262k
      d[k] = std::sqrt(hypotf(p[k + 1].x - p[k].x, p[k + 1].y - p[k].y));
319
262k
      t[k + 1] = t[k] + d[k];
320
262k
    }
321
1.39M
    for (int i = 1; i < kNumPoints; ++i) {
322
1.31M
      const float tt = d[0] + (static_cast<float>(i) / kNumPoints) * d[1];
323
1.31M
      Spline::Point a[3];
324
5.24M
      for (int k = 0; k < 3; ++k) {
325
        // TODO(eustas): reciprocal multiplication would be faster.
326
3.93M
        a[k] = p[k] + ((tt - t[k]) / d[k]) * (p[k + 1] - p[k]);
327
3.93M
      }
328
1.31M
      Spline::Point b[2];
329
3.93M
      for (int k = 0; k < 2; ++k) {
330
2.62M
        b[k] = a[k] + ((tt - t[k]) / (d[k] + d[k + 1])) * (a[k + 1] - a[k]);
331
2.62M
      }
332
1.31M
      result.push_back(b[0] + ((tt - t[1]) / d[1]) * (b[1] - b[0]));
333
1.31M
    }
334
87.4k
  }
335
41.3k
  result.push_back(points[points.size() - 2]);
336
41.3k
}
337
338
// Move along the line segments defined by `points`, `kDesiredRenderingDistance`
339
// pixels at a time, and call `functor` with each point and the actual distance
340
// to the previous point (which will always be kDesiredRenderingDistance except
341
// possibly for the very last point).
342
// TODO(eustas): this method always adds the last point, but never the first
343
//               (unless those are one); I believe both ends matter.
344
template <typename Points, typename Functor>
345
47.1k
Status ForEachEquallySpacedPoint(const Points& points, const Functor& functor) {
346
47.1k
  JXL_ENSURE(!points.empty());
347
47.1k
  Spline::Point current = points.front();
348
47.1k
  functor(current, kDesiredRenderingDistance);
349
47.1k
  auto next = points.begin();
350
21.3M
  while (next != points.end()) {
351
21.3M
    const Spline::Point* previous = &current;
352
21.3M
    float arclength_from_previous = 0.f;
353
22.8M
    for (;;) {
354
22.8M
      if (next == points.end()) {
355
47.1k
        functor(*previous, arclength_from_previous);
356
47.1k
        return true;
357
47.1k
      }
358
22.7M
      const float arclength_to_next =
359
22.7M
          std::sqrt((*next - *previous).SquaredNorm());
360
22.7M
      if (arclength_from_previous + arclength_to_next >=
361
22.7M
          kDesiredRenderingDistance) {
362
21.3M
        current =
363
21.3M
            *previous + ((kDesiredRenderingDistance - arclength_from_previous) /
364
21.3M
                         arclength_to_next) *
365
21.3M
                            (*next - *previous);
366
21.3M
        functor(current, kDesiredRenderingDistance);
367
21.3M
        break;
368
21.3M
      }
369
1.44M
      arclength_from_previous += arclength_to_next;
370
1.44M
      previous = &*next;
371
1.44M
      ++next;
372
1.44M
    }
373
21.3M
  }
374
0
  return true;
375
47.1k
}
376
377
}  // namespace
378
379
StatusOr<QuantizedSpline> QuantizedSpline::Create(
380
    const Spline& original, const int32_t quantization_adjustment,
381
0
    const float y_to_x, const float y_to_b) {
382
0
  JXL_ENSURE(!original.control_points.empty());
383
0
  QuantizedSpline result;
384
0
  result.control_points_.reserve(original.control_points.size() - 1);
385
0
  const Spline::Point& starting_point = original.control_points.front();
386
0
  int previous_x = static_cast<int>(std::round(starting_point.x));
387
0
  int previous_y = static_cast<int>(std::round(starting_point.y));
388
0
  int previous_delta_x = 0;
389
0
  int previous_delta_y = 0;
390
0
  for (auto it = original.control_points.begin() + 1;
391
0
       it != original.control_points.end(); ++it) {
392
0
    const int new_x = static_cast<int>(std::round(it->x));
393
0
    const int new_y = static_cast<int>(std::round(it->y));
394
0
    const int new_delta_x = new_x - previous_x;
395
0
    const int new_delta_y = new_y - previous_y;
396
0
    result.control_points_.emplace_back(new_delta_x - previous_delta_x,
397
0
                                        new_delta_y - previous_delta_y);
398
0
    previous_delta_x = new_delta_x;
399
0
    previous_delta_y = new_delta_y;
400
0
    previous_x = new_x;
401
0
    previous_y = new_y;
402
0
  }
403
404
0
  const auto to_int = [](float v) -> int {
405
    // Maximal int representable with float.
406
0
    constexpr float kMax = std::numeric_limits<int>::max() - 127;
407
0
    constexpr float kMin = -kMax;
408
0
    return static_cast<int>(std::round(Clamp1(v, kMin, kMax)));
409
0
  };
410
411
0
  const auto quant = AdjustedQuant(quantization_adjustment);
412
0
  const auto inv_quant = InvAdjustedQuant(quantization_adjustment);
413
0
  for (int c : {1, 0, 2}) {
414
0
    float factor = (c == 0) ? y_to_x : (c == 1) ? 0 : y_to_b;
415
0
    for (int i = 0; i < 32; ++i) {
416
0
      const float dct_factor = (i == 0) ? kSqrt2 : 1.0f;
417
0
      const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f;
418
0
      auto restored_y = result.color_dct_[1][i] * inv_dct_factor *
419
0
                        kChannelWeight[1] * inv_quant;
420
0
      auto decorrelated = original.color_dct[c][i] - factor * restored_y;
421
0
      result.color_dct_[c][i] =
422
0
          to_int(decorrelated * dct_factor * quant / kChannelWeight[c]);
423
0
    }
424
0
  }
425
0
  for (int i = 0; i < 32; ++i) {
426
0
    const float dct_factor = (i == 0) ? kSqrt2 : 1.0f;
427
0
    result.sigma_dct_[i] =
428
0
        to_int(original.sigma_dct[i] * dct_factor * quant / kChannelWeight[3]);
429
0
  }
430
0
  return result;
431
0
}
432
433
Status QuantizedSpline::Dequantize(const Spline::Point& starting_point,
434
                                   const int32_t quantization_adjustment,
435
                                   const float y_to_x, const float y_to_b,
436
                                   const uint64_t image_size,
437
                                   uint64_t* total_estimated_area_reached,
438
47.6k
                                   Spline& result) const {
439
47.6k
  constexpr uint64_t kOne = static_cast<uint64_t>(1);
440
47.6k
  const uint64_t area_limit =
441
47.6k
      std::min(1024 * image_size + (kOne << 32), kOne << 42);
442
443
47.6k
  result.control_points.clear();
444
47.6k
  result.control_points.reserve(control_points_.size() + 1);
445
47.6k
  float px = std::round(starting_point.x);
446
47.6k
  float py = std::round(starting_point.y);
447
47.6k
  JXL_RETURN_IF_ERROR(ValidateSplinePointPos(px, py));
448
47.6k
  int current_x = static_cast<int>(px);
449
47.6k
  int current_y = static_cast<int>(py);
450
47.6k
  result.control_points.emplace_back(static_cast<float>(current_x),
451
47.6k
                                     static_cast<float>(current_y));
452
47.6k
  int current_delta_x = 0;
453
47.6k
  int current_delta_y = 0;
454
47.6k
  uint64_t manhattan_distance = 0;
455
93.3k
  for (const auto& point : control_points_) {
456
93.3k
    current_delta_x += point.first;
457
93.3k
    current_delta_y += point.second;
458
93.3k
    manhattan_distance += std::abs(current_delta_x) + std::abs(current_delta_y);
459
93.3k
    if (manhattan_distance > area_limit) {
460
0
      return JXL_FAILURE("Too large manhattan_distance reached: %" PRIu64,
461
0
                         manhattan_distance);
462
0
    }
463
93.3k
    JXL_RETURN_IF_ERROR(
464
93.3k
        ValidateSplinePointPos(current_delta_x, current_delta_y));
465
93.3k
    current_x += current_delta_x;
466
93.3k
    current_y += current_delta_y;
467
93.3k
    JXL_RETURN_IF_ERROR(ValidateSplinePointPos(current_x, current_y));
468
93.3k
    result.control_points.emplace_back(static_cast<float>(current_x),
469
93.3k
                                       static_cast<float>(current_y));
470
93.3k
  }
471
472
47.6k
  const auto inv_quant = InvAdjustedQuant(quantization_adjustment);
473
190k
  for (int c = 0; c < 3; ++c) {
474
4.71M
    for (int i = 0; i < 32; ++i) {
475
4.57M
      const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f;
476
4.57M
      result.color_dct[c][i] =
477
4.57M
          color_dct_[c][i] * inv_dct_factor * kChannelWeight[c] * inv_quant;
478
4.57M
    }
479
142k
  }
480
1.57M
  for (int i = 0; i < 32; ++i) {
481
1.52M
    result.color_dct[0][i] += y_to_x * result.color_dct[1][i];
482
1.52M
    result.color_dct[2][i] += y_to_b * result.color_dct[1][i];
483
1.52M
  }
484
47.6k
  uint64_t width_estimate = 0;
485
486
47.6k
  uint64_t color[3] = {};
487
190k
  for (int c = 0; c < 3; ++c) {
488
4.71M
    for (int i = 0; i < 32; ++i) {
489
4.57M
      color[c] += static_cast<uint64_t>(
490
4.57M
          std::ceil(inv_quant * std::abs(color_dct_[c][i])));
491
4.57M
    }
492
142k
  }
493
47.6k
  color[0] += static_cast<uint64_t>(std::ceil(std::abs(y_to_x))) * color[1];
494
47.6k
  color[2] += static_cast<uint64_t>(std::ceil(std::abs(y_to_b))) * color[1];
495
  // This is not taking kChannelWeight into account, but up to constant factors
496
  // it gives an indication of the influence of the color values on the area
497
  // that will need to be rendered.
498
47.6k
  const uint64_t max_color = std::max({color[1], color[0], color[2]});
499
47.6k
  uint64_t logcolor =
500
47.6k
      std::max(kOne, static_cast<uint64_t>(CeilLog2Nonzero(kOne + max_color)));
501
502
47.6k
  const float weight_limit =
503
47.6k
      std::ceil(std::sqrt((static_cast<float>(area_limit) / logcolor) /
504
47.6k
                          std::max<size_t>(1, manhattan_distance)));
505
506
1.57M
  for (int i = 0; i < 32; ++i) {
507
1.52M
    const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f;
508
1.52M
    result.sigma_dct[i] =
509
1.52M
        sigma_dct_[i] * inv_dct_factor * kChannelWeight[3] * inv_quant;
510
    // If we include the factor kChannelWeight[3]=.3333f here, we get a
511
    // realistic area estimate. We leave it out to simplify the calculations,
512
    // and understand that this way we underestimate the area by a factor of
513
    // 1/(0.3333*0.3333). This is taken into account in the limits below.
514
1.52M
    float weight_f = std::ceil(inv_quant * std::abs(sigma_dct_[i]));
515
1.52M
    uint64_t weight =
516
1.52M
        static_cast<uint64_t>(std::min(weight_limit, std::max(1.0f, weight_f)));
517
1.52M
    width_estimate += weight * weight * logcolor;
518
1.52M
  }
519
47.6k
  *total_estimated_area_reached += (width_estimate * manhattan_distance);
520
47.6k
  if (*total_estimated_area_reached > area_limit) {
521
37
    return JXL_FAILURE("Too large total_estimated_area eached: %" PRIu64,
522
37
                       *total_estimated_area_reached);
523
37
  }
524
525
47.6k
  return true;
526
47.6k
}
527
528
Status QuantizedSpline::Decode(const std::vector<uint8_t>& context_map,
529
                               ANSSymbolReader* const decoder,
530
                               BitReader* const br,
531
                               const size_t max_control_points,
532
1.04M
                               size_t* total_num_control_points) {
533
1.04M
  const size_t num_control_points =
534
1.04M
      decoder->ReadHybridUint(kNumControlPointsContext, br, context_map);
535
1.04M
  if (num_control_points > max_control_points) {
536
71
    return JXL_FAILURE("Too many control points: %" PRIuS, num_control_points);
537
71
  }
538
1.04M
  *total_num_control_points += num_control_points;
539
1.04M
  if (*total_num_control_points > max_control_points) {
540
346
    return JXL_FAILURE("Too many control points: %" PRIuS,
541
346
                       *total_num_control_points);
542
346
  }
543
1.04M
  control_points_.resize(num_control_points);
544
  // Maximal image dimension.
545
1.04M
  constexpr int64_t kDeltaLimit = 1u << 30;
546
1.48M
  for (std::pair<int64_t, int64_t>& control_point : control_points_) {
547
1.48M
    control_point.first = UnpackSigned(
548
1.48M
        decoder->ReadHybridUint(kControlPointsContext, br, context_map));
549
1.48M
    control_point.second = UnpackSigned(
550
1.48M
        decoder->ReadHybridUint(kControlPointsContext, br, context_map));
551
    // Check delta-deltas are not outrageous; it is not in spec, but there is
552
    // no reason to allow larger values.
553
1.48M
    if ((control_point.first >= kDeltaLimit) ||
554
1.48M
        (control_point.first <= -kDeltaLimit) ||
555
1.48M
        (control_point.second >= kDeltaLimit) ||
556
1.48M
        (control_point.second <= -kDeltaLimit)) {
557
21
      return JXL_FAILURE("Spline delta-delta is out of bounds");
558
21
    }
559
1.48M
  }
560
561
4.19M
  const auto decode_dct = [decoder, br, &context_map](int dct[32]) -> Status {
562
4.19M
    constexpr int kWeirdNumber = std::numeric_limits<int>::min();
563
138M
    for (int i = 0; i < 32; ++i) {
564
134M
      dct[i] =
565
134M
          UnpackSigned(decoder->ReadHybridUint(kDCTContext, br, context_map));
566
134M
      if (dct[i] == kWeirdNumber) {
567
12
        return JXL_FAILURE("The weird number in spline DCT");
568
12
      }
569
134M
    }
570
4.19M
    return true;
571
4.19M
  };
572
3.14M
  for (auto& dct : color_dct_) {
573
3.14M
    JXL_RETURN_IF_ERROR(decode_dct(dct));
574
3.14M
  }
575
1.04M
  JXL_RETURN_IF_ERROR(decode_dct(sigma_dct_));
576
1.04M
  return true;
577
1.04M
}
578
579
105k
void Splines::Clear() {
580
105k
  quantization_adjustment_ = 0;
581
105k
  splines_.clear();
582
105k
  starting_points_.clear();
583
105k
  segments_.clear();
584
105k
  segment_indices_.clear();
585
105k
  segment_y_start_.clear();
586
105k
}
587
588
Status Splines::Decode(JxlMemoryManager* memory_manager, jxl::BitReader* br,
589
53.4k
                       const size_t num_pixels) {
590
53.4k
  std::vector<uint8_t> context_map;
591
53.4k
  ANSCode code;
592
53.4k
  JXL_RETURN_IF_ERROR(DecodeHistograms(memory_manager, br, kNumSplineContexts,
593
53.4k
                                       &code, &context_map));
594
66.5k
  JXL_ASSIGN_OR_RETURN(ANSSymbolReader decoder,
595
66.5k
                       ANSSymbolReader::Create(&code, br));
596
66.5k
  size_t num_splines =
597
66.5k
      decoder.ReadHybridUint(kNumSplinesContext, br, context_map);
598
66.5k
  size_t max_control_points = std::min(
599
66.5k
      kMaxNumControlPoints, num_pixels / kMaxNumControlPointsPerPixelRatio);
600
66.5k
  if (num_splines > max_control_points ||
601
33.1k
      num_splines + 1 > max_control_points) {
602
520
    return JXL_FAILURE("Too many splines: %" PRIuS, num_splines);
603
520
  }
604
32.7k
  num_splines++;
605
32.7k
  JXL_RETURN_IF_ERROR(DecodeAllStartingPoints(&starting_points_, br, &decoder,
606
32.7k
                                              context_map, num_splines));
607
608
32.6k
  quantization_adjustment_ = UnpackSigned(
609
32.6k
      decoder.ReadHybridUint(kQuantizationAdjustmentContext, br, context_map));
610
611
32.6k
  splines_.clear();
612
32.6k
  splines_.reserve(num_splines);
613
32.6k
  size_t num_control_points = num_splines;
614
1.08M
  for (size_t i = 0; i < num_splines; ++i) {
615
1.04M
    QuantizedSpline spline;
616
1.04M
    JXL_RETURN_IF_ERROR(spline.Decode(context_map, &decoder, br,
617
1.04M
                                      max_control_points, &num_control_points));
618
1.04M
    splines_.push_back(std::move(spline));
619
1.04M
  }
620
621
32.2k
  JXL_RETURN_IF_ERROR(decoder.CheckANSFinalState());
622
623
32.2k
  if (!HasAny()) {
624
0
    return JXL_FAILURE("Decoded splines but got none");
625
0
  }
626
627
32.2k
  return true;
628
32.2k
}
629
630
0
void Splines::AddTo(Image3F* const opsin, const Rect& opsin_rect) const {
631
0
  Apply</*add=*/true>(opsin, opsin_rect);
632
0
}
633
void Splines::AddToRow(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y,
634
                       float* JXL_RESTRICT row_b, size_t y, size_t x0,
635
21.9k
                       size_t x1) const {
636
21.9k
  ApplyToRow</*add=*/true>(row_x, row_y, row_b, y, x0, x1);
637
21.9k
}
638
639
3.48k
void Splines::SubtractFrom(Image3F* const opsin) const {
640
3.48k
  Apply</*add=*/false>(opsin, Rect(*opsin));
641
3.48k
}
642
643
Status Splines::InitializeDrawCache(const size_t image_xsize,
644
                                    const size_t image_ysize,
645
31.1k
                                    const ColorCorrelation& color_correlation) {
646
  // TODO(veluca): avoid storing segments that are entirely outside image
647
  // boundaries.
648
31.1k
  segments_.clear();
649
31.1k
  segment_indices_.clear();
650
31.1k
  segment_y_start_.clear();
651
31.1k
  std::vector<std::pair<size_t, size_t>> segments_by_y;
652
31.1k
  std::vector<Spline::Point> intermediate_points;
653
31.1k
  uint64_t total_estimated_area_reached = 0;
654
31.1k
  std::vector<Spline> splines;
655
78.7k
  for (size_t i = 0; i < splines_.size(); ++i) {
656
47.6k
    Spline spline;
657
47.6k
    JXL_RETURN_IF_ERROR(splines_[i].Dequantize(
658
47.6k
        starting_points_[i], quantization_adjustment_,
659
47.6k
        color_correlation.YtoXRatio(0), color_correlation.YtoBRatio(0),
660
47.6k
        image_xsize * image_ysize, &total_estimated_area_reached, spline));
661
47.6k
    if (std::adjacent_find(spline.control_points.begin(),
662
47.6k
                           spline.control_points.end()) !=
663
47.6k
        spline.control_points.end()) {
664
      // Otherwise division by zero might occur. Once control points coincide,
665
      // the direction of curve is undefined...
666
74
      return JXL_FAILURE(
667
74
          "identical successive control points in spline %" PRIuS, i);
668
74
    }
669
47.5k
    splines.push_back(spline);
670
47.5k
  }
671
  // TODO(firsching) Change this into a JXL_FAILURE for level 5 codestreams.
672
31.0k
  if (total_estimated_area_reached >
673
31.0k
      std::min(
674
31.0k
          (8 * image_xsize * image_ysize + (static_cast<uint64_t>(1) << 25)),
675
31.0k
          (static_cast<uint64_t>(1) << 30))) {
676
23
    JXL_WARNING(
677
23
        "Large total_estimated_area_reached, expect slower decoding: %" PRIu64,
678
23
        total_estimated_area_reached);
679
23
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
680
23
    return JXL_FAILURE("Total spline area is too large");
681
23
#endif
682
23
  }
683
684
47.1k
  for (Spline& spline : splines) {
685
47.1k
    std::vector<std::pair<Spline::Point, float>> points_to_draw;
686
21.4M
    auto add_point = [&](const Spline::Point& point, const float multiplier) {
687
21.4M
      points_to_draw.emplace_back(point, multiplier);
688
21.4M
    };
689
47.1k
    intermediate_points.clear();
690
47.1k
    DrawCentripetalCatmullRomSpline(spline.control_points, intermediate_points);
691
47.1k
    JXL_RETURN_IF_ERROR(
692
47.1k
        ForEachEquallySpacedPoint(intermediate_points, add_point));
693
47.1k
    const float arc_length =
694
47.1k
        (points_to_draw.size() - 2) * kDesiredRenderingDistance +
695
47.1k
        points_to_draw.back().second;
696
47.1k
    if (arc_length <= 0.f) {
697
      // This spline wouldn't have any effect.
698
5.74k
      continue;
699
5.74k
    }
700
41.3k
    HWY_DYNAMIC_DISPATCH(SegmentsFromPoints)
701
41.3k
    (spline, points_to_draw, arc_length, segments_, segments_by_y);
702
41.3k
  }
703
704
  // TODO(eustas): consider linear sorting here.
705
30.9k
  std::sort(segments_by_y.begin(), segments_by_y.end());
706
30.9k
  segment_indices_.resize(segments_by_y.size());
707
30.9k
  segment_y_start_.resize(image_ysize + 1);
708
106M
  for (size_t i = 0; i < segments_by_y.size(); i++) {
709
106M
    segment_indices_[i] = segments_by_y[i].second;
710
106M
    size_t y = segments_by_y[i].first;
711
106M
    if (y < image_ysize) {
712
25.8M
      segment_y_start_[y + 1]++;
713
25.8M
    }
714
106M
  }
715
4.13M
  for (size_t y = 0; y < image_ysize; y++) {
716
4.10M
    segment_y_start_[y + 1] += segment_y_start_[y];
717
4.10M
  }
718
30.9k
  return true;
719
30.9k
}
720
721
template <bool add>
722
void Splines::ApplyToRow(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y,
723
                         float* JXL_RESTRICT row_b, size_t y, size_t x0,
724
21.9k
                         size_t x1) const {
725
21.9k
  if (segments_.empty()) return;
726
12.1k
  HWY_DYNAMIC_DISPATCH(DrawSegments)
727
12.1k
  (row_x, row_y, row_b, y, x0, x1, add, segments_.data(),
728
12.1k
   segment_indices_.data(), segment_y_start_.data());
729
12.1k
}
void jxl::Splines::ApplyToRow<true>(float*, float*, float*, unsigned long, unsigned long, unsigned long) const
Line
Count
Source
724
21.9k
                         size_t x1) const {
725
21.9k
  if (segments_.empty()) return;
726
12.1k
  HWY_DYNAMIC_DISPATCH(DrawSegments)
727
12.1k
  (row_x, row_y, row_b, y, x0, x1, add, segments_.data(),
728
12.1k
   segment_indices_.data(), segment_y_start_.data());
729
12.1k
}
Unexecuted instantiation: void jxl::Splines::ApplyToRow<false>(float*, float*, float*, unsigned long, unsigned long, unsigned long) const
730
731
template <bool add>
732
3.48k
void Splines::Apply(Image3F* const opsin, const Rect& opsin_rect) const {
733
3.48k
  if (segments_.empty()) return;
734
0
  const size_t y0 = opsin_rect.y0();
735
0
  const size_t x0 = opsin_rect.x0();
736
0
  const size_t x1 = opsin_rect.x1();
737
0
  for (size_t y = 0; y < opsin_rect.ysize(); y++) {
738
0
    ApplyToRow<add>(opsin->PlaneRow(0, y0 + y) + x0,
739
0
                    opsin->PlaneRow(1, y0 + y) + x0,
740
0
                    opsin->PlaneRow(2, y0 + y) + x0, y0 + y, x0, x1);
741
0
  }
742
0
}
Unexecuted instantiation: void jxl::Splines::Apply<true>(jxl::Image3<float>*, jxl::RectT<unsigned long> const&) const
void jxl::Splines::Apply<false>(jxl::Image3<float>*, jxl::RectT<unsigned long> const&) const
Line
Count
Source
732
3.48k
void Splines::Apply(Image3F* const opsin, const Rect& opsin_rect) const {
733
3.48k
  if (segments_.empty()) return;
734
0
  const size_t y0 = opsin_rect.y0();
735
0
  const size_t x0 = opsin_rect.x0();
736
0
  const size_t x1 = opsin_rect.x1();
737
0
  for (size_t y = 0; y < opsin_rect.ysize(); y++) {
738
0
    ApplyToRow<add>(opsin->PlaneRow(0, y0 + y) + x0,
739
0
                    opsin->PlaneRow(1, y0 + y) + x0,
740
0
                    opsin->PlaneRow(2, y0 + y) + x0, y0 + y, x0, x1);
741
0
  }
742
0
}
743
744
}  // namespace jxl
745
#endif  // HWY_ONCE